aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c107
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayout.c299
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c6
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h22
-rw-r--r--fs/nfs/netns.h3
-rw-r--r--fs/nfs/nfs3acl.c7
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h21
-rw-r--r--fs/nfs/nfs4client.c43
-rw-r--r--fs/nfs/nfs4proc.c292
-rw-r--r--fs/nfs/nfs4state.c49
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c330
-rw-r--r--fs/nfs/pnfs.c180
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c167
32 files changed, 1281 insertions, 960 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..cbb1797149d5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
210 SetPageUptodate(bvec->bv_page); 210 SetPageUptodate(bvec->bv_page);
211 211
212 if (err) { 212 if (err) {
213 struct nfs_pgio_data *rdata = par->data; 213 struct nfs_pgio_header *header = par->data;
214 struct nfs_pgio_header *header = rdata->header;
215 214
216 if (!header->pnfs_error) 215 if (!header->pnfs_error)
217 header->pnfs_error = -EIO; 216 header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
224static void bl_read_cleanup(struct work_struct *work) 223static void bl_read_cleanup(struct work_struct *work)
225{ 224{
226 struct rpc_task *task; 225 struct rpc_task *task;
227 struct nfs_pgio_data *rdata; 226 struct nfs_pgio_header *hdr;
228 dprintk("%s enter\n", __func__); 227 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work); 228 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_pgio_data, task); 229 hdr = container_of(task, struct nfs_pgio_header, task);
231 pnfs_ld_read_done(rdata); 230 pnfs_ld_read_done(hdr);
232} 231}
233 232
234static void 233static void
235bl_end_par_io_read(void *data, int unused) 234bl_end_par_io_read(void *data, int unused)
236{ 235{
237 struct nfs_pgio_data *rdata = data; 236 struct nfs_pgio_header *hdr = data;
238 237
239 rdata->task.tk_status = rdata->header->pnfs_error; 238 hdr->task.tk_status = hdr->pnfs_error;
240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 239 INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
241 schedule_work(&rdata->task.u.tk_work); 240 schedule_work(&hdr->task.u.tk_work);
242} 241}
243 242
244static enum pnfs_try_status 243static enum pnfs_try_status
245bl_read_pagelist(struct nfs_pgio_data *rdata) 244bl_read_pagelist(struct nfs_pgio_header *hdr)
246{ 245{
247 struct nfs_pgio_header *header = rdata->header; 246 struct nfs_pgio_header *header = hdr;
248 int i, hole; 247 int i, hole;
249 struct bio *bio = NULL; 248 struct bio *bio = NULL;
250 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 249 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
251 sector_t isect, extent_length = 0; 250 sector_t isect, extent_length = 0;
252 struct parallel_io *par; 251 struct parallel_io *par;
253 loff_t f_offset = rdata->args.offset; 252 loff_t f_offset = hdr->args.offset;
254 size_t bytes_left = rdata->args.count; 253 size_t bytes_left = hdr->args.count;
255 unsigned int pg_offset, pg_len; 254 unsigned int pg_offset, pg_len;
256 struct page **pages = rdata->args.pages; 255 struct page **pages = hdr->args.pages;
257 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 256 int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
258 const bool is_dio = (header->dreq != NULL); 257 const bool is_dio = (header->dreq != NULL);
259 258
260 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 259 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
261 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 260 hdr->page_array.npages, f_offset,
261 (unsigned int)hdr->args.count);
262 262
263 par = alloc_parallel(rdata); 263 par = alloc_parallel(hdr);
264 if (!par) 264 if (!par)
265 goto use_mds; 265 goto use_mds;
266 par->pnfs_callback = bl_end_par_io_read; 266 par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
268 268
269 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 269 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
270 /* Code assumes extents are page-aligned */ 270 /* Code assumes extents are page-aligned */
271 for (i = pg_index; i < rdata->pages.npages; i++) { 271 for (i = pg_index; i < hdr->page_array.npages; i++) {
272 if (!extent_length) { 272 if (!extent_length) {
273 /* We've used up the previous extent */ 273 /* We've used up the previous extent */
274 bl_put_extent(be); 274 bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
317 struct pnfs_block_extent *be_read; 317 struct pnfs_block_extent *be_read;
318 318
319 be_read = (hole && cow_read) ? cow_read : be; 319 be_read = (hole && cow_read) ? cow_read : be;
320 bio = do_add_page_to_bio(bio, rdata->pages.npages - i, 320 bio = do_add_page_to_bio(bio,
321 hdr->page_array.npages - i,
321 READ, 322 READ,
322 isect, pages[i], be_read, 323 isect, pages[i], be_read,
323 bl_end_io_read, par, 324 bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
332 extent_length -= PAGE_CACHE_SECTORS; 333 extent_length -= PAGE_CACHE_SECTORS;
333 } 334 }
334 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 335 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
335 rdata->res.eof = 1; 336 hdr->res.eof = 1;
336 rdata->res.count = header->inode->i_size - rdata->args.offset; 337 hdr->res.count = header->inode->i_size - hdr->args.offset;
337 } else { 338 } else {
338 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; 339 hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
339 } 340 }
340out: 341out:
341 bl_put_extent(be); 342 bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
390 } 391 }
391 392
392 if (unlikely(err)) { 393 if (unlikely(err)) {
393 struct nfs_pgio_data *data = par->data; 394 struct nfs_pgio_header *header = par->data;
394 struct nfs_pgio_header *header = data->header;
395 395
396 if (!header->pnfs_error) 396 if (!header->pnfs_error)
397 header->pnfs_error = -EIO; 397 header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
405{ 405{
406 struct parallel_io *par = bio->bi_private; 406 struct parallel_io *par = bio->bi_private;
407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
408 struct nfs_pgio_data *data = par->data; 408 struct nfs_pgio_header *header = par->data;
409 struct nfs_pgio_header *header = data->header;
410 409
411 if (!uptodate) { 410 if (!uptodate) {
412 if (!header->pnfs_error) 411 if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
423static void bl_write_cleanup(struct work_struct *work) 422static void bl_write_cleanup(struct work_struct *work)
424{ 423{
425 struct rpc_task *task; 424 struct rpc_task *task;
426 struct nfs_pgio_data *wdata; 425 struct nfs_pgio_header *hdr;
427 dprintk("%s enter\n", __func__); 426 dprintk("%s enter\n", __func__);
428 task = container_of(work, struct rpc_task, u.tk_work); 427 task = container_of(work, struct rpc_task, u.tk_work);
429 wdata = container_of(task, struct nfs_pgio_data, task); 428 hdr = container_of(task, struct nfs_pgio_header, task);
430 if (likely(!wdata->header->pnfs_error)) { 429 if (likely(!hdr->pnfs_error)) {
431 /* Marks for LAYOUTCOMMIT */ 430 /* Marks for LAYOUTCOMMIT */
432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 431 mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
433 wdata->args.offset, wdata->args.count); 432 hdr->args.offset, hdr->args.count);
434 } 433 }
435 pnfs_ld_write_done(wdata); 434 pnfs_ld_write_done(hdr);
436} 435}
437 436
438/* Called when last of bios associated with a bl_write_pagelist call finishes */ 437/* Called when last of bios associated with a bl_write_pagelist call finishes */
439static void bl_end_par_io_write(void *data, int num_se) 438static void bl_end_par_io_write(void *data, int num_se)
440{ 439{
441 struct nfs_pgio_data *wdata = data; 440 struct nfs_pgio_header *hdr = data;
442 441
443 if (unlikely(wdata->header->pnfs_error)) { 442 if (unlikely(hdr->pnfs_error)) {
444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 443 bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
445 num_se); 444 num_se);
446 } 445 }
447 446
448 wdata->task.tk_status = wdata->header->pnfs_error; 447 hdr->task.tk_status = hdr->pnfs_error;
449 wdata->verf.committed = NFS_FILE_SYNC; 448 hdr->verf.committed = NFS_FILE_SYNC;
450 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 449 INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
451 schedule_work(&wdata->task.u.tk_work); 450 schedule_work(&hdr->task.u.tk_work);
452} 451}
453 452
454/* FIXME STUB - mark intersection of layout and page as bad, so is not 453/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
673} 672}
674 673
675static enum pnfs_try_status 674static enum pnfs_try_status
676bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) 675bl_write_pagelist(struct nfs_pgio_header *header, int sync)
677{ 676{
678 struct nfs_pgio_header *header = wdata->header;
679 int i, ret, npg_zero, pg_index, last = 0; 677 int i, ret, npg_zero, pg_index, last = 0;
680 struct bio *bio = NULL; 678 struct bio *bio = NULL;
681 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 679 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
682 sector_t isect, last_isect = 0, extent_length = 0; 680 sector_t isect, last_isect = 0, extent_length = 0;
683 struct parallel_io *par = NULL; 681 struct parallel_io *par = NULL;
684 loff_t offset = wdata->args.offset; 682 loff_t offset = header->args.offset;
685 size_t count = wdata->args.count; 683 size_t count = header->args.count;
686 unsigned int pg_offset, pg_len, saved_len; 684 unsigned int pg_offset, pg_len, saved_len;
687 struct page **pages = wdata->args.pages; 685 struct page **pages = header->args.pages;
688 struct page *page; 686 struct page *page;
689 pgoff_t index; 687 pgoff_t index;
690 u64 temp; 688 u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
699 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); 697 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
700 goto out_mds; 698 goto out_mds;
701 } 699 }
702 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 700 /* At this point, header->page_aray is a (sequential) list of nfs_pages.
703 * We want to write each, and if there is an error set pnfs_error 701 * We want to write each, and if there is an error set pnfs_error
704 * to have it redone using nfs. 702 * to have it redone using nfs.
705 */ 703 */
706 par = alloc_parallel(wdata); 704 par = alloc_parallel(header);
707 if (!par) 705 if (!par)
708 goto out_mds; 706 goto out_mds;
709 par->pnfs_callback = bl_end_par_io_write; 707 par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
790 bio = bl_submit_bio(WRITE, bio); 788 bio = bl_submit_bio(WRITE, bio);
791 789
792 /* Middle pages */ 790 /* Middle pages */
793 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 791 pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
794 for (i = pg_index; i < wdata->pages.npages; i++) { 792 for (i = pg_index; i < header->page_array.npages; i++) {
795 if (!extent_length) { 793 if (!extent_length) {
796 /* We've used up the previous extent */ 794 /* We've used up the previous extent */
797 bl_put_extent(be); 795 bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
862 } 860 }
863 861
864 862
865 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 863 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
864 WRITE,
866 isect, pages[i], be, 865 isect, pages[i], be,
867 bl_end_io_write, par, 866 bl_end_io_write, par,
868 pg_offset, pg_len); 867 pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
890 } 889 }
891 890
892write_done: 891write_done:
893 wdata->res.count = wdata->args.count; 892 header->res.count = header->args.count;
894out: 893out:
895 bl_put_extent(be); 894 bl_put_extent(be);
896 bl_put_extent(cow_read); 895 bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
1063 return ERR_PTR(-ENOMEM); 1062 return ERR_PTR(-ENOMEM);
1064 } 1063 }
1065 1064
1066 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 1065 pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
1067 if (pages == NULL) { 1066 if (pages == NULL) {
1068 kfree(dev); 1067 kfree(dev);
1069 return ERR_PTR(-ENOMEM); 1068 return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
428 if (p == NULL) 428 if (p == NULL)
429 return 0; 429 return 0;
430 430
431 /*
432 * Did we get the acceptor from userland during the SETCLIENID
433 * negotiation?
434 */
435 if (clp->cl_acceptor)
436 return !strcmp(p, clp->cl_acceptor);
437
438 /*
439 * Otherwise try to verify it using the cl_hostname. Note that this
440 * doesn't work if a non-canonical hostname was used in the devname.
441 */
442
431 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ 443 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
432 444
433 if (memcmp(p, "nfs@", 4) != 0) 445 if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1d09289c8f0e..6a4f3666e273 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
110 mutex_unlock(&nfs_version_mutex); 110 mutex_unlock(&nfs_version_mutex);
111 } 111 }
112 112
113 if (!IS_ERR(nfs)) 113 if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
114 try_module_get(nfs->owner); 114 return ERR_PTR(-EAGAIN);
115 return nfs; 115 return nfs;
116} 116}
117 117
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
158 goto error_0; 158 goto error_0;
159 159
160 clp->cl_nfs_mod = cl_init->nfs_mod; 160 clp->cl_nfs_mod = cl_init->nfs_mod;
161 try_module_get(clp->cl_nfs_mod->owner); 161 if (!try_module_get(clp->cl_nfs_mod->owner))
162 goto error_dealloc;
162 163
163 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; 164 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
164 165
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
190 191
191error_cleanup: 192error_cleanup:
192 put_nfs_version(clp->cl_nfs_mod); 193 put_nfs_version(clp->cl_nfs_mod);
194error_dealloc:
193 kfree(clp); 195 kfree(clp);
194error_0: 196error_0:
195 return ERR_PTR(err); 197 return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
252 put_net(clp->cl_net); 254 put_net(clp->cl_net);
253 put_nfs_version(clp->cl_nfs_mod); 255 put_nfs_version(clp->cl_nfs_mod);
254 kfree(clp->cl_hostname); 256 kfree(clp->cl_hostname);
257 kfree(clp->cl_acceptor);
255 kfree(clp); 258 kfree(clp);
256 259
257 dprintk("<-- nfs_free_client()\n"); 260 dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
482 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 485 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
483 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; 486 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
484 487
488 if (cl_init->hostname == NULL) {
489 WARN_ON(1);
490 return NULL;
491 }
492
485 dprintk("--> nfs_get_client(%s,v%u)\n", 493 dprintk("--> nfs_get_client(%s,v%u)\n",
486 cl_init->hostname ?: "", rpc_ops->version); 494 cl_init->hostname, rpc_ops->version);
487 495
488 /* see if the client already exists */ 496 /* see if the client already exists */
489 do { 497 do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
510 } while (!IS_ERR(new)); 518 } while (!IS_ERR(new));
511 519
512 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", 520 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
513 cl_init->hostname ?: "", PTR_ERR(new)); 521 cl_init->hostname, PTR_ERR(new));
514 return new; 522 return new;
515} 523}
516EXPORT_SYMBOL_GPL(nfs_get_client); 524EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -1205,7 +1213,7 @@ static const struct file_operations nfs_server_list_fops = {
1205 .open = nfs_server_list_open, 1213 .open = nfs_server_list_open,
1206 .read = seq_read, 1214 .read = seq_read,
1207 .llseek = seq_lseek, 1215 .llseek = seq_lseek,
1208 .release = seq_release, 1216 .release = seq_release_net,
1209 .owner = THIS_MODULE, 1217 .owner = THIS_MODULE,
1210}; 1218};
1211 1219
@@ -1226,7 +1234,7 @@ static const struct file_operations nfs_volume_list_fops = {
1226 .open = nfs_volume_list_open, 1234 .open = nfs_volume_list_open,
1227 .read = seq_read, 1235 .read = seq_read,
1228 .llseek = seq_lseek, 1236 .llseek = seq_lseek,
1229 .release = seq_release, 1237 .release = seq_release_net,
1230 .owner = THIS_MODULE, 1238 .owner = THIS_MODULE,
1231}; 1239};
1232 1240
@@ -1236,19 +1244,8 @@ static const struct file_operations nfs_volume_list_fops = {
1236 */ 1244 */
1237static int nfs_server_list_open(struct inode *inode, struct file *file) 1245static int nfs_server_list_open(struct inode *inode, struct file *file)
1238{ 1246{
1239 struct seq_file *m; 1247 return seq_open_net(inode, file, &nfs_server_list_ops,
1240 int ret; 1248 sizeof(struct seq_net_private));
1241 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1242 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1243
1244 ret = seq_open(file, &nfs_server_list_ops);
1245 if (ret < 0)
1246 return ret;
1247
1248 m = file->private_data;
1249 m->private = net;
1250
1251 return 0;
1252} 1249}
1253 1250
1254/* 1251/*
@@ -1256,7 +1253,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1256 */ 1253 */
1257static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1254static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1258{ 1255{
1259 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1256 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1260 1257
1261 /* lock the list against modification */ 1258 /* lock the list against modification */
1262 spin_lock(&nn->nfs_client_lock); 1259 spin_lock(&nn->nfs_client_lock);
@@ -1268,7 +1265,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1268 */ 1265 */
1269static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1266static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1270{ 1267{
1271 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1268 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1272 1269
1273 return seq_list_next(v, &nn->nfs_client_list, pos); 1270 return seq_list_next(v, &nn->nfs_client_list, pos);
1274} 1271}
@@ -1278,7 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1278 */ 1275 */
1279static void nfs_server_list_stop(struct seq_file *p, void *v) 1276static void nfs_server_list_stop(struct seq_file *p, void *v)
1280{ 1277{
1281 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1278 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1282 1279
1283 spin_unlock(&nn->nfs_client_lock); 1280 spin_unlock(&nn->nfs_client_lock);
1284} 1281}
@@ -1289,7 +1286,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
1289static int nfs_server_list_show(struct seq_file *m, void *v) 1286static int nfs_server_list_show(struct seq_file *m, void *v)
1290{ 1287{
1291 struct nfs_client *clp; 1288 struct nfs_client *clp;
1292 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1289 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1293 1290
1294 /* display header on line 1 */ 1291 /* display header on line 1 */
1295 if (v == &nn->nfs_client_list) { 1292 if (v == &nn->nfs_client_list) {
@@ -1321,19 +1318,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1321 */ 1318 */
1322static int nfs_volume_list_open(struct inode *inode, struct file *file) 1319static int nfs_volume_list_open(struct inode *inode, struct file *file)
1323{ 1320{
1324 struct seq_file *m; 1321 return seq_open_net(inode, file, &nfs_server_list_ops,
1325 int ret; 1322 sizeof(struct seq_net_private));
1326 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1327 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1328
1329 ret = seq_open(file, &nfs_volume_list_ops);
1330 if (ret < 0)
1331 return ret;
1332
1333 m = file->private_data;
1334 m->private = net;
1335
1336 return 0;
1337} 1323}
1338 1324
1339/* 1325/*
@@ -1341,7 +1327,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1341 */ 1327 */
1342static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1328static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1343{ 1329{
1344 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1330 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1345 1331
1346 /* lock the list against modification */ 1332 /* lock the list against modification */
1347 spin_lock(&nn->nfs_client_lock); 1333 spin_lock(&nn->nfs_client_lock);
@@ -1353,7 +1339,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1353 */ 1339 */
1354static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1340static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1355{ 1341{
1356 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1342 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1357 1343
1358 return seq_list_next(v, &nn->nfs_volume_list, pos); 1344 return seq_list_next(v, &nn->nfs_volume_list, pos);
1359} 1345}
@@ -1363,7 +1349,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1363 */ 1349 */
1364static void nfs_volume_list_stop(struct seq_file *p, void *v) 1350static void nfs_volume_list_stop(struct seq_file *p, void *v)
1365{ 1351{
1366 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1352 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1367 1353
1368 spin_unlock(&nn->nfs_client_lock); 1354 spin_unlock(&nn->nfs_client_lock);
1369} 1355}
@@ -1376,7 +1362,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1376 struct nfs_server *server; 1362 struct nfs_server *server;
1377 struct nfs_client *clp; 1363 struct nfs_client *clp;
1378 char dev[8], fsid[17]; 1364 char dev[8], fsid[17];
1379 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1365 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1380 1366
1381 /* display header on line 1 */ 1367 /* display header on line 1 */
1382 if (v == &nn->nfs_volume_list) { 1368 if (v == &nn->nfs_volume_list) {
@@ -1407,6 +1393,39 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1407 return 0; 1393 return 0;
1408} 1394}
1409 1395
1396int nfs_fs_proc_net_init(struct net *net)
1397{
1398 struct nfs_net *nn = net_generic(net, nfs_net_id);
1399 struct proc_dir_entry *p;
1400
1401 nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net);
1402 if (!nn->proc_nfsfs)
1403 goto error_0;
1404
1405 /* a file of servers with which we're dealing */
1406 p = proc_create("servers", S_IFREG|S_IRUGO,
1407 nn->proc_nfsfs, &nfs_server_list_fops);
1408 if (!p)
1409 goto error_1;
1410
1411 /* a file of volumes that we have mounted */
1412 p = proc_create("volumes", S_IFREG|S_IRUGO,
1413 nn->proc_nfsfs, &nfs_volume_list_fops);
1414 if (!p)
1415 goto error_1;
1416 return 0;
1417
1418error_1:
1419 remove_proc_subtree("nfsfs", net->proc_net);
1420error_0:
1421 return -ENOMEM;
1422}
1423
1424void nfs_fs_proc_net_exit(struct net *net)
1425{
1426 remove_proc_subtree("nfsfs", net->proc_net);
1427}
1428
1410/* 1429/*
1411 * initialise the /proc/fs/nfsfs/ directory 1430 * initialise the /proc/fs/nfsfs/ directory
1412 */ 1431 */
@@ -1419,14 +1438,12 @@ int __init nfs_fs_proc_init(void)
1419 goto error_0; 1438 goto error_0;
1420 1439
1421 /* a file of servers with which we're dealing */ 1440 /* a file of servers with which we're dealing */
1422 p = proc_create("servers", S_IFREG|S_IRUGO, 1441 p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
1423 proc_fs_nfs, &nfs_server_list_fops);
1424 if (!p) 1442 if (!p)
1425 goto error_1; 1443 goto error_1;
1426 1444
1427 /* a file of volumes that we have mounted */ 1445 /* a file of volumes that we have mounted */
1428 p = proc_create("volumes", S_IFREG|S_IRUGO, 1446 p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
1429 proc_fs_nfs, &nfs_volume_list_fops);
1430 if (!p) 1447 if (!p)
1431 goto error_2; 1448 goto error_2;
1432 return 0; 1449 return 0;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5c..5853f53db732 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); 41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
42} 42}
43 43
44/** 44static int
45 * nfs_have_delegation - check if inode has a delegation 45nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
46 * @inode: inode to check
47 * @flags: delegation types to check for
48 *
49 * Returns one if inode has the indicated delegation, otherwise zero.
50 */
51int nfs4_have_delegation(struct inode *inode, fmode_t flags)
52{ 46{
53 struct nfs_delegation *delegation; 47 struct nfs_delegation *delegation;
54 int ret = 0; 48 int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
58 delegation = rcu_dereference(NFS_I(inode)->delegation); 52 delegation = rcu_dereference(NFS_I(inode)->delegation);
59 if (delegation != NULL && (delegation->type & flags) == flags && 53 if (delegation != NULL && (delegation->type & flags) == flags &&
60 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { 54 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
61 nfs_mark_delegation_referenced(delegation); 55 if (mark)
56 nfs_mark_delegation_referenced(delegation);
62 ret = 1; 57 ret = 1;
63 } 58 }
64 rcu_read_unlock(); 59 rcu_read_unlock();
65 return ret; 60 return ret;
66} 61}
62/**
63 * nfs_have_delegation - check if inode has a delegation, mark it
64 * NFS_DELEGATION_REFERENCED if there is one.
65 * @inode: inode to check
66 * @flags: delegation types to check for
67 *
68 * Returns one if inode has the indicated delegation, otherwise zero.
69 */
70int nfs4_have_delegation(struct inode *inode, fmode_t flags)
71{
72 return nfs4_do_check_delegation(inode, flags, true);
73}
74
75/*
76 * nfs4_check_delegation - check if inode has a delegation, do not mark
77 * NFS_DELEGATION_REFERENCED if it has one.
78 */
79int nfs4_check_delegation(struct inode *inode, fmode_t flags)
80{
81 return nfs4_do_check_delegation(inode, flags, false);
82}
67 83
68static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 84static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
69{ 85{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..5c1cce39297f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
59 59
60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
61int nfs4_have_delegation(struct inode *inode, fmode_t flags); 61int nfs4_have_delegation(struct inode *inode, fmode_t flags);
62int nfs4_check_delegation(struct inode *inode, fmode_t flags);
62 63
63#endif 64#endif
64 65
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef76127..36d921f0c602 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
988 * A check for whether or not the parent directory has changed. 988 * A check for whether or not the parent directory has changed.
989 * In the case it has, we assume that the dentries are untrustworthy 989 * In the case it has, we assume that the dentries are untrustworthy
990 * and may need to be looked up again. 990 * and may need to be looked up again.
991 * If rcu_walk prevents us from performing a full check, return 0.
991 */ 992 */
992static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) 993static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
994 int rcu_walk)
993{ 995{
996 int ret;
997
994 if (IS_ROOT(dentry)) 998 if (IS_ROOT(dentry))
995 return 1; 999 return 1;
996 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) 1000 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
998 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1002 if (!nfs_verify_change_attribute(dir, dentry->d_time))
999 return 0; 1003 return 0;
1000 /* Revalidate nfsi->cache_change_attribute before we declare a match */ 1004 /* Revalidate nfsi->cache_change_attribute before we declare a match */
1001 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) 1005 if (rcu_walk)
1006 ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
1007 else
1008 ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
1009 if (ret < 0)
1002 return 0; 1010 return 0;
1003 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1011 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1004 return 0; 1012 return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1042out: 1050out:
1043 return (inode->i_nlink == 0) ? -ENOENT : 0; 1051 return (inode->i_nlink == 0) ? -ENOENT : 0;
1044out_force: 1052out_force:
1053 if (flags & LOOKUP_RCU)
1054 return -ECHILD;
1045 ret = __nfs_revalidate_inode(server, inode); 1055 ret = __nfs_revalidate_inode(server, inode);
1046 if (ret != 0) 1056 if (ret != 0)
1047 return ret; 1057 return ret;
@@ -1054,6 +1064,9 @@ out_force:
1054 * 1064 *
1055 * If parent mtime has changed, we revalidate, else we wait for a 1065 * If parent mtime has changed, we revalidate, else we wait for a
1056 * period corresponding to the parent's attribute cache timeout value. 1066 * period corresponding to the parent's attribute cache timeout value.
1067 *
1068 * If LOOKUP_RCU prevents us from performing a full check, return 1
1069 * suggesting a reval is needed.
1057 */ 1070 */
1058static inline 1071static inline
1059int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, 1072int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1064 return 0; 1077 return 0;
1065 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) 1078 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1066 return 1; 1079 return 1;
1067 return !nfs_check_verifier(dir, dentry); 1080 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1068} 1081}
1069 1082
1070/* 1083/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1088 struct nfs4_label *label = NULL; 1101 struct nfs4_label *label = NULL;
1089 int error; 1102 int error;
1090 1103
1091 if (flags & LOOKUP_RCU) 1104 if (flags & LOOKUP_RCU) {
1092 return -ECHILD; 1105 parent = ACCESS_ONCE(dentry->d_parent);
1093 1106 dir = ACCESS_ONCE(parent->d_inode);
1094 parent = dget_parent(dentry); 1107 if (!dir)
1095 dir = parent->d_inode; 1108 return -ECHILD;
1109 } else {
1110 parent = dget_parent(dentry);
1111 dir = parent->d_inode;
1112 }
1096 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1113 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1097 inode = dentry->d_inode; 1114 inode = dentry->d_inode;
1098 1115
1099 if (!inode) { 1116 if (!inode) {
1100 if (nfs_neg_need_reval(dir, dentry, flags)) 1117 if (nfs_neg_need_reval(dir, dentry, flags)) {
1118 if (flags & LOOKUP_RCU)
1119 return -ECHILD;
1101 goto out_bad; 1120 goto out_bad;
1121 }
1102 goto out_valid_noent; 1122 goto out_valid_noent;
1103 } 1123 }
1104 1124
1105 if (is_bad_inode(inode)) { 1125 if (is_bad_inode(inode)) {
1126 if (flags & LOOKUP_RCU)
1127 return -ECHILD;
1106 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", 1128 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1107 __func__, dentry); 1129 __func__, dentry);
1108 goto out_bad; 1130 goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1112 goto out_set_verifier; 1134 goto out_set_verifier;
1113 1135
1114 /* Force a full look up iff the parent directory has changed */ 1136 /* Force a full look up iff the parent directory has changed */
1115 if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { 1137 if (!nfs_is_exclusive_create(dir, flags) &&
1116 if (nfs_lookup_verify_inode(inode, flags)) 1138 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1139
1140 if (nfs_lookup_verify_inode(inode, flags)) {
1141 if (flags & LOOKUP_RCU)
1142 return -ECHILD;
1117 goto out_zap_parent; 1143 goto out_zap_parent;
1144 }
1118 goto out_valid; 1145 goto out_valid;
1119 } 1146 }
1120 1147
1148 if (flags & LOOKUP_RCU)
1149 return -ECHILD;
1150
1121 if (NFS_STALE(inode)) 1151 if (NFS_STALE(inode))
1122 goto out_bad; 1152 goto out_bad;
1123 1153
@@ -1153,13 +1183,18 @@ out_set_verifier:
1153 /* Success: notify readdir to use READDIRPLUS */ 1183 /* Success: notify readdir to use READDIRPLUS */
1154 nfs_advise_use_readdirplus(dir); 1184 nfs_advise_use_readdirplus(dir);
1155 out_valid_noent: 1185 out_valid_noent:
1156 dput(parent); 1186 if (flags & LOOKUP_RCU) {
1187 if (parent != ACCESS_ONCE(dentry->d_parent))
1188 return -ECHILD;
1189 } else
1190 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", 1191 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1158 __func__, dentry); 1192 __func__, dentry);
1159 return 1; 1193 return 1;
1160out_zap_parent: 1194out_zap_parent:
1161 nfs_zap_caches(dir); 1195 nfs_zap_caches(dir);
1162 out_bad: 1196 out_bad:
1197 WARN_ON(flags & LOOKUP_RCU);
1163 nfs_free_fattr(fattr); 1198 nfs_free_fattr(fattr);
1164 nfs_free_fhandle(fhandle); 1199 nfs_free_fhandle(fhandle);
1165 nfs4_label_free(label); 1200 nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
1185 __func__, dentry); 1220 __func__, dentry);
1186 return 0; 1221 return 0;
1187out_error: 1222out_error:
1223 WARN_ON(flags & LOOKUP_RCU);
1188 nfs_free_fattr(fattr); 1224 nfs_free_fattr(fattr);
1189 nfs_free_fhandle(fhandle); 1225 nfs_free_fhandle(fhandle);
1190 nfs4_label_free(label); 1226 nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
1529 1565
1530static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) 1566static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1531{ 1567{
1532 struct dentry *parent = NULL;
1533 struct inode *inode; 1568 struct inode *inode;
1534 struct inode *dir;
1535 int ret = 0; 1569 int ret = 0;
1536 1570
1537 if (flags & LOOKUP_RCU)
1538 return -ECHILD;
1539
1540 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) 1571 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1541 goto no_open; 1572 goto no_open;
1542 if (d_mountpoint(dentry)) 1573 if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1545 goto no_open; 1576 goto no_open;
1546 1577
1547 inode = dentry->d_inode; 1578 inode = dentry->d_inode;
1548 parent = dget_parent(dentry);
1549 dir = parent->d_inode;
1550 1579
1551 /* We can't create new files in nfs_open_revalidate(), so we 1580 /* We can't create new files in nfs_open_revalidate(), so we
1552 * optimize away revalidation of negative dentries. 1581 * optimize away revalidation of negative dentries.
1553 */ 1582 */
1554 if (inode == NULL) { 1583 if (inode == NULL) {
1584 struct dentry *parent;
1585 struct inode *dir;
1586
1587 if (flags & LOOKUP_RCU) {
1588 parent = ACCESS_ONCE(dentry->d_parent);
1589 dir = ACCESS_ONCE(parent->d_inode);
1590 if (!dir)
1591 return -ECHILD;
1592 } else {
1593 parent = dget_parent(dentry);
1594 dir = parent->d_inode;
1595 }
1555 if (!nfs_neg_need_reval(dir, dentry, flags)) 1596 if (!nfs_neg_need_reval(dir, dentry, flags))
1556 ret = 1; 1597 ret = 1;
1598 else if (flags & LOOKUP_RCU)
1599 ret = -ECHILD;
1600 if (!(flags & LOOKUP_RCU))
1601 dput(parent);
1602 else if (parent != ACCESS_ONCE(dentry->d_parent))
1603 return -ECHILD;
1557 goto out; 1604 goto out;
1558 } 1605 }
1559 1606
1560 /* NFS only supports OPEN on regular files */ 1607 /* NFS only supports OPEN on regular files */
1561 if (!S_ISREG(inode->i_mode)) 1608 if (!S_ISREG(inode->i_mode))
1562 goto no_open_dput; 1609 goto no_open;
1563 /* We cannot do exclusive creation on a positive dentry */ 1610 /* We cannot do exclusive creation on a positive dentry */
1564 if (flags & LOOKUP_EXCL) 1611 if (flags & LOOKUP_EXCL)
1565 goto no_open_dput; 1612 goto no_open;
1566 1613
1567 /* Let f_op->open() actually open (and revalidate) the file */ 1614 /* Let f_op->open() actually open (and revalidate) the file */
1568 ret = 1; 1615 ret = 1;
1569 1616
1570out: 1617out:
1571 dput(parent);
1572 return ret; 1618 return ret;
1573 1619
1574no_open_dput:
1575 dput(parent);
1576no_open: 1620no_open:
1577 return nfs_lookup_revalidate(dentry, flags); 1621 return nfs_lookup_revalidate(dentry, flags);
1578} 1622}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
2028static LIST_HEAD(nfs_access_lru_list); 2072static LIST_HEAD(nfs_access_lru_list);
2029static atomic_long_t nfs_access_nr_entries; 2073static atomic_long_t nfs_access_nr_entries;
2030 2074
2075static unsigned long nfs_access_max_cachesize = ULONG_MAX;
2076module_param(nfs_access_max_cachesize, ulong, 0644);
2077MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2078
2031static void nfs_access_free_entry(struct nfs_access_entry *entry) 2079static void nfs_access_free_entry(struct nfs_access_entry *entry)
2032{ 2080{
2033 put_rpccred(entry->cred); 2081 put_rpccred(entry->cred);
2034 kfree(entry); 2082 kfree_rcu(entry, rcu_head);
2035 smp_mb__before_atomic(); 2083 smp_mb__before_atomic();
2036 atomic_long_dec(&nfs_access_nr_entries); 2084 atomic_long_dec(&nfs_access_nr_entries);
2037 smp_mb__after_atomic(); 2085 smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
2048 } 2096 }
2049} 2097}
2050 2098
2051unsigned long 2099static unsigned long
2052nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 2100nfs_do_access_cache_scan(unsigned int nr_to_scan)
2053{ 2101{
2054 LIST_HEAD(head); 2102 LIST_HEAD(head);
2055 struct nfs_inode *nfsi, *next; 2103 struct nfs_inode *nfsi, *next;
2056 struct nfs_access_entry *cache; 2104 struct nfs_access_entry *cache;
2057 int nr_to_scan = sc->nr_to_scan;
2058 gfp_t gfp_mask = sc->gfp_mask;
2059 long freed = 0; 2105 long freed = 0;
2060 2106
2061 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2062 return SHRINK_STOP;
2063
2064 spin_lock(&nfs_access_lru_lock); 2107 spin_lock(&nfs_access_lru_lock);
2065 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { 2108 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2066 struct inode *inode; 2109 struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
2094} 2137}
2095 2138
2096unsigned long 2139unsigned long
2140nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2141{
2142 int nr_to_scan = sc->nr_to_scan;
2143 gfp_t gfp_mask = sc->gfp_mask;
2144
2145 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2146 return SHRINK_STOP;
2147 return nfs_do_access_cache_scan(nr_to_scan);
2148}
2149
2150
2151unsigned long
2097nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) 2152nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2098{ 2153{
2099 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); 2154 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2100} 2155}
2101 2156
2157static void
2158nfs_access_cache_enforce_limit(void)
2159{
2160 long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2161 unsigned long diff;
2162 unsigned int nr_to_scan;
2163
2164 if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2165 return;
2166 nr_to_scan = 100;
2167 diff = nr_entries - nfs_access_max_cachesize;
2168 if (diff < nr_to_scan)
2169 nr_to_scan = diff;
2170 nfs_do_access_cache_scan(nr_to_scan);
2171}
2172
2102static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) 2173static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2103{ 2174{
2104 struct rb_root *root_node = &nfsi->access_cache; 2175 struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
2186 return -ENOENT; 2257 return -ENOENT;
2187} 2258}
2188 2259
2260static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
2261{
2262 /* Only check the most recently returned cache entry,
2263 * but do it without locking.
2264 */
2265 struct nfs_inode *nfsi = NFS_I(inode);
2266 struct nfs_access_entry *cache;
2267 int err = -ECHILD;
2268 struct list_head *lh;
2269
2270 rcu_read_lock();
2271 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2272 goto out;
2273 lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
2274 cache = list_entry(lh, struct nfs_access_entry, lru);
2275 if (lh == &nfsi->access_cache_entry_lru ||
2276 cred != cache->cred)
2277 cache = NULL;
2278 if (cache == NULL)
2279 goto out;
2280 if (!nfs_have_delegated_attributes(inode) &&
2281 !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
2282 goto out;
2283 res->jiffies = cache->jiffies;
2284 res->cred = cache->cred;
2285 res->mask = cache->mask;
2286 err = 0;
2287out:
2288 rcu_read_unlock();
2289 return err;
2290}
2291
2189static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) 2292static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2190{ 2293{
2191 struct nfs_inode *nfsi = NFS_I(inode); 2294 struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2229 cache->cred = get_rpccred(set->cred); 2332 cache->cred = get_rpccred(set->cred);
2230 cache->mask = set->mask; 2333 cache->mask = set->mask;
2231 2334
2335 /* The above field assignments must be visible
2336 * before this item appears on the lru. We cannot easily
2337 * use rcu_assign_pointer, so just force the memory barrier.
2338 */
2339 smp_wmb();
2232 nfs_access_add_rbtree(inode, cache); 2340 nfs_access_add_rbtree(inode, cache);
2233 2341
2234 /* Update accounting */ 2342 /* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2244 &nfs_access_lru_list); 2352 &nfs_access_lru_list);
2245 spin_unlock(&nfs_access_lru_lock); 2353 spin_unlock(&nfs_access_lru_lock);
2246 } 2354 }
2355 nfs_access_cache_enforce_limit();
2247} 2356}
2248EXPORT_SYMBOL_GPL(nfs_access_add_cache); 2357EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2249 2358
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
2267 2376
2268 trace_nfs_access_enter(inode); 2377 trace_nfs_access_enter(inode);
2269 2378
2270 status = nfs_access_get_cached(inode, cred, &cache); 2379 status = nfs_access_get_cached_rcu(inode, cred, &cache);
2380 if (status != 0)
2381 status = nfs_access_get_cached(inode, cred, &cache);
2271 if (status == 0) 2382 if (status == 0)
2272 goto out_cached; 2383 goto out_cached;
2273 2384
2385 status = -ECHILD;
2386 if (mask & MAY_NOT_BLOCK)
2387 goto out;
2388
2274 /* Be clever: ask server to check for all possible rights */ 2389 /* Be clever: ask server to check for all possible rights */
2275 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; 2390 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
2276 cache.cred = cred; 2391 cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
2321 struct rpc_cred *cred; 2436 struct rpc_cred *cred;
2322 int res = 0; 2437 int res = 0;
2323 2438
2324 if (mask & MAY_NOT_BLOCK)
2325 return -ECHILD;
2326
2327 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2439 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2328 2440
2329 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2441 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
2350 if (!NFS_PROTO(inode)->access) 2462 if (!NFS_PROTO(inode)->access)
2351 goto out_notsup; 2463 goto out_notsup;
2352 2464
2353 cred = rpc_lookup_cred(); 2465 /* Always try fast lookups first */
2354 if (!IS_ERR(cred)) { 2466 rcu_read_lock();
2355 res = nfs_do_access(inode, cred, mask); 2467 cred = rpc_lookup_cred_nonblock();
2356 put_rpccred(cred); 2468 if (!IS_ERR(cred))
2357 } else 2469 res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
2470 else
2358 res = PTR_ERR(cred); 2471 res = PTR_ERR(cred);
2472 rcu_read_unlock();
2473 if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
2474 /* Fast lookup failed, try the slow way */
2475 cred = rpc_lookup_cred();
2476 if (!IS_ERR(cred)) {
2477 res = nfs_do_access(inode, cred, mask);
2478 put_rpccred(cred);
2479 } else
2480 res = PTR_ERR(cred);
2481 }
2359out: 2482out:
2360 if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) 2483 if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
2361 res = -EACCES; 2484 res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
2364 inode->i_sb->s_id, inode->i_ino, mask, res); 2487 inode->i_sb->s_id, inode->i_ino, mask, res);
2365 return res; 2488 return res;
2366out_notsup: 2489out_notsup:
2490 if (mask & MAY_NOT_BLOCK)
2491 return -ECHILD;
2492
2367 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2493 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2368 if (res == 0) 2494 if (res == 0)
2369 res = generic_permission(inode, mask); 2495 res = generic_permission(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f11b9eed0de1..65ef6e00deee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
148{ 148{
149 struct nfs_writeverf *verfp; 149 struct nfs_writeverf *verfp;
150 150
151 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 151 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
152 hdr->data->ds_idx); 152 hdr->ds_idx);
153 WARN_ON_ONCE(verfp->committed >= 0); 153 WARN_ON_ONCE(verfp->committed >= 0);
154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); 154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
155 WARN_ON_ONCE(verfp->committed < 0); 155 WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
169{ 169{
170 struct nfs_writeverf *verfp; 170 struct nfs_writeverf *verfp;
171 171
172 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 172 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
173 hdr->data->ds_idx); 173 hdr->ds_idx);
174 if (verfp->committed < 0) { 174 if (verfp->committed < 0) {
175 nfs_direct_set_hdr_verf(dreq, hdr); 175 nfs_direct_set_hdr_verf(dreq, hdr);
176 return 0; 176 return 0;
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
715{ 715{
716 struct nfs_direct_req *dreq = hdr->dreq; 716 struct nfs_direct_req *dreq = hdr->dreq;
717 struct nfs_commit_info cinfo; 717 struct nfs_commit_info cinfo;
718 int bit = -1; 718 bool request_commit = false;
719 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 719 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
720 720
721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
729 dreq->flags = 0; 729 dreq->flags = 0;
730 dreq->error = hdr->error; 730 dreq->error = hdr->error;
731 } 731 }
732 if (dreq->error != 0) 732 if (dreq->error == 0) {
733 bit = NFS_IOHDR_ERROR;
734 else {
735 dreq->count += hdr->good_bytes; 733 dreq->count += hdr->good_bytes;
736 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 734 if (nfs_write_need_commit(hdr)) {
737 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
738 bit = NFS_IOHDR_NEED_RESCHED;
739 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
740 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 735 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
741 bit = NFS_IOHDR_NEED_RESCHED; 736 request_commit = true;
742 else if (dreq->flags == 0) { 737 else if (dreq->flags == 0) {
743 nfs_direct_set_hdr_verf(dreq, hdr); 738 nfs_direct_set_hdr_verf(dreq, hdr);
744 bit = NFS_IOHDR_NEED_COMMIT; 739 request_commit = true;
745 dreq->flags = NFS_ODIRECT_DO_COMMIT; 740 dreq->flags = NFS_ODIRECT_DO_COMMIT;
746 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 741 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
747 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { 742 request_commit = true;
743 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
748 dreq->flags = 744 dreq->flags =
749 NFS_ODIRECT_RESCHED_WRITES; 745 NFS_ODIRECT_RESCHED_WRITES;
750 bit = NFS_IOHDR_NEED_RESCHED;
751 } else
752 bit = NFS_IOHDR_NEED_COMMIT;
753 } 746 }
754 } 747 }
755 } 748 }
@@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
759 752
760 req = nfs_list_entry(hdr->pages.next); 753 req = nfs_list_entry(hdr->pages.next);
761 nfs_list_remove_request(req); 754 nfs_list_remove_request(req);
762 switch (bit) { 755 if (request_commit) {
763 case NFS_IOHDR_NEED_RESCHED:
764 case NFS_IOHDR_NEED_COMMIT:
765 kref_get(&req->wb_kref); 756 kref_get(&req->wb_kref);
766 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 757 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
767 } 758 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
361 * Prevent starvation issues if someone is doing a consistency 361 * Prevent starvation issues if someone is doing a consistency
362 * sync-to-disk 362 * sync-to-disk
363 */ 363 */
364 ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, 364 ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
365 nfs_wait_bit_killable, TASK_KILLABLE); 365 nfs_wait_bit_killable, TASK_KILLABLE);
366 if (ret) 366 if (ret)
367 return ret; 367 return ret;
368 368
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..90978075f730 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
84 BUG(); 84 BUG();
85} 85}
86 86
87static void filelayout_reset_write(struct nfs_pgio_data *data) 87static void filelayout_reset_write(struct nfs_pgio_header *hdr)
88{ 88{
89 struct nfs_pgio_header *hdr = data->header; 89 struct rpc_task *task = &hdr->task;
90 struct rpc_task *task = &data->task;
91 90
92 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 91 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
93 dprintk("%s Reset task %5u for i/o through MDS " 92 dprintk("%s Reset task %5u for i/o through MDS "
94 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 93 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
95 data->task.tk_pid, 94 hdr->task.tk_pid,
96 hdr->inode->i_sb->s_id, 95 hdr->inode->i_sb->s_id,
97 (unsigned long long)NFS_FILEID(hdr->inode), 96 (unsigned long long)NFS_FILEID(hdr->inode),
98 data->args.count, 97 hdr->args.count,
99 (unsigned long long)data->args.offset); 98 (unsigned long long)hdr->args.offset);
100 99
101 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 100 task->tk_status = pnfs_write_done_resend_to_mds(hdr);
102 &hdr->pages,
103 hdr->completion_ops,
104 hdr->dreq);
105 } 101 }
106} 102}
107 103
108static void filelayout_reset_read(struct nfs_pgio_data *data) 104static void filelayout_reset_read(struct nfs_pgio_header *hdr)
109{ 105{
110 struct nfs_pgio_header *hdr = data->header; 106 struct rpc_task *task = &hdr->task;
111 struct rpc_task *task = &data->task;
112 107
113 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 108 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
114 dprintk("%s Reset task %5u for i/o through MDS " 109 dprintk("%s Reset task %5u for i/o through MDS "
115 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 110 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
116 data->task.tk_pid, 111 hdr->task.tk_pid,
117 hdr->inode->i_sb->s_id, 112 hdr->inode->i_sb->s_id,
118 (unsigned long long)NFS_FILEID(hdr->inode), 113 (unsigned long long)NFS_FILEID(hdr->inode),
119 data->args.count, 114 hdr->args.count,
120 (unsigned long long)data->args.offset); 115 (unsigned long long)hdr->args.offset);
121 116
122 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 117 task->tk_status = pnfs_read_done_resend_to_mds(hdr);
123 &hdr->pages,
124 hdr->completion_ops,
125 hdr->dreq);
126 } 118 }
127} 119}
128 120
@@ -243,18 +235,17 @@ wait_on_recovery:
243/* NFS_PROTO call done callback routines */ 235/* NFS_PROTO call done callback routines */
244 236
245static int filelayout_read_done_cb(struct rpc_task *task, 237static int filelayout_read_done_cb(struct rpc_task *task,
246 struct nfs_pgio_data *data) 238 struct nfs_pgio_header *hdr)
247{ 239{
248 struct nfs_pgio_header *hdr = data->header;
249 int err; 240 int err;
250 241
251 trace_nfs4_pnfs_read(data, task->tk_status); 242 trace_nfs4_pnfs_read(hdr, task->tk_status);
252 err = filelayout_async_handle_error(task, data->args.context->state, 243 err = filelayout_async_handle_error(task, hdr->args.context->state,
253 data->ds_clp, hdr->lseg); 244 hdr->ds_clp, hdr->lseg);
254 245
255 switch (err) { 246 switch (err) {
256 case -NFS4ERR_RESET_TO_MDS: 247 case -NFS4ERR_RESET_TO_MDS:
257 filelayout_reset_read(data); 248 filelayout_reset_read(hdr);
258 return task->tk_status; 249 return task->tk_status;
259 case -EAGAIN: 250 case -EAGAIN:
260 rpc_restart_call_prepare(task); 251 rpc_restart_call_prepare(task);
@@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
270 * rfc5661 is not clear about which credential should be used. 261 * rfc5661 is not clear about which credential should be used.
271 */ 262 */
272static void 263static void
273filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) 264filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
274{ 265{
275 struct nfs_pgio_header *hdr = wdata->header;
276 266
277 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || 267 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
278 wdata->res.verf->committed == NFS_FILE_SYNC) 268 hdr->res.verf->committed == NFS_FILE_SYNC)
279 return; 269 return;
280 270
281 pnfs_set_layoutcommit(wdata); 271 pnfs_set_layoutcommit(hdr);
282 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 272 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 273 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
284} 274}
@@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
305 */ 295 */
306static void filelayout_read_prepare(struct rpc_task *task, void *data) 296static void filelayout_read_prepare(struct rpc_task *task, void *data)
307{ 297{
308 struct nfs_pgio_data *rdata = data; 298 struct nfs_pgio_header *hdr = data;
309 299
310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { 300 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
311 rpc_exit(task, -EIO); 301 rpc_exit(task, -EIO);
312 return; 302 return;
313 } 303 }
314 if (filelayout_reset_to_mds(rdata->header->lseg)) { 304 if (filelayout_reset_to_mds(hdr->lseg)) {
315 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 305 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
316 filelayout_reset_read(rdata); 306 filelayout_reset_read(hdr);
317 rpc_exit(task, 0); 307 rpc_exit(task, 0);
318 return; 308 return;
319 } 309 }
320 rdata->pgio_done_cb = filelayout_read_done_cb; 310 hdr->pgio_done_cb = filelayout_read_done_cb;
321 311
322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 312 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
323 &rdata->args.seq_args, 313 &hdr->args.seq_args,
324 &rdata->res.seq_res, 314 &hdr->res.seq_res,
325 task)) 315 task))
326 return; 316 return;
327 if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, 317 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
328 rdata->args.lock_context, FMODE_READ) == -EIO) 318 hdr->args.lock_context, FMODE_READ) == -EIO)
329 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 319 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
330} 320}
331 321
332static void filelayout_read_call_done(struct rpc_task *task, void *data) 322static void filelayout_read_call_done(struct rpc_task *task, void *data)
333{ 323{
334 struct nfs_pgio_data *rdata = data; 324 struct nfs_pgio_header *hdr = data;
335 325
336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 326 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
337 327
338 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && 328 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
339 task->tk_status == 0) { 329 task->tk_status == 0) {
340 nfs41_sequence_done(task, &rdata->res.seq_res); 330 nfs41_sequence_done(task, &hdr->res.seq_res);
341 return; 331 return;
342 } 332 }
343 333
344 /* Note this may cause RPC to be resent */ 334 /* Note this may cause RPC to be resent */
345 rdata->header->mds_ops->rpc_call_done(task, data); 335 hdr->mds_ops->rpc_call_done(task, data);
346} 336}
347 337
348static void filelayout_read_count_stats(struct rpc_task *task, void *data) 338static void filelayout_read_count_stats(struct rpc_task *task, void *data)
349{ 339{
350 struct nfs_pgio_data *rdata = data; 340 struct nfs_pgio_header *hdr = data;
351 341
352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); 342 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
353} 343}
354 344
355static void filelayout_read_release(void *data) 345static void filelayout_read_release(void *data)
356{ 346{
357 struct nfs_pgio_data *rdata = data; 347 struct nfs_pgio_header *hdr = data;
358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; 348 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
359 349
360 filelayout_fenceme(lo->plh_inode, lo); 350 filelayout_fenceme(lo->plh_inode, lo);
361 nfs_put_client(rdata->ds_clp); 351 nfs_put_client(hdr->ds_clp);
362 rdata->header->mds_ops->rpc_release(data); 352 hdr->mds_ops->rpc_release(data);
363} 353}
364 354
365static int filelayout_write_done_cb(struct rpc_task *task, 355static int filelayout_write_done_cb(struct rpc_task *task,
366 struct nfs_pgio_data *data) 356 struct nfs_pgio_header *hdr)
367{ 357{
368 struct nfs_pgio_header *hdr = data->header;
369 int err; 358 int err;
370 359
371 trace_nfs4_pnfs_write(data, task->tk_status); 360 trace_nfs4_pnfs_write(hdr, task->tk_status);
372 err = filelayout_async_handle_error(task, data->args.context->state, 361 err = filelayout_async_handle_error(task, hdr->args.context->state,
373 data->ds_clp, hdr->lseg); 362 hdr->ds_clp, hdr->lseg);
374 363
375 switch (err) { 364 switch (err) {
376 case -NFS4ERR_RESET_TO_MDS: 365 case -NFS4ERR_RESET_TO_MDS:
377 filelayout_reset_write(data); 366 filelayout_reset_write(hdr);
378 return task->tk_status; 367 return task->tk_status;
379 case -EAGAIN: 368 case -EAGAIN:
380 rpc_restart_call_prepare(task); 369 rpc_restart_call_prepare(task);
381 return -EAGAIN; 370 return -EAGAIN;
382 } 371 }
383 372
384 filelayout_set_layoutcommit(data); 373 filelayout_set_layoutcommit(hdr);
385 return 0; 374 return 0;
386} 375}
387 376
@@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
419 408
420static void filelayout_write_prepare(struct rpc_task *task, void *data) 409static void filelayout_write_prepare(struct rpc_task *task, void *data)
421{ 410{
422 struct nfs_pgio_data *wdata = data; 411 struct nfs_pgio_header *hdr = data;
423 412
424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { 413 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
425 rpc_exit(task, -EIO); 414 rpc_exit(task, -EIO);
426 return; 415 return;
427 } 416 }
428 if (filelayout_reset_to_mds(wdata->header->lseg)) { 417 if (filelayout_reset_to_mds(hdr->lseg)) {
429 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 418 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
430 filelayout_reset_write(wdata); 419 filelayout_reset_write(hdr);
431 rpc_exit(task, 0); 420 rpc_exit(task, 0);
432 return; 421 return;
433 } 422 }
434 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 423 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
435 &wdata->args.seq_args, 424 &hdr->args.seq_args,
436 &wdata->res.seq_res, 425 &hdr->res.seq_res,
437 task)) 426 task))
438 return; 427 return;
439 if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, 428 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
440 wdata->args.lock_context, FMODE_WRITE) == -EIO) 429 hdr->args.lock_context, FMODE_WRITE) == -EIO)
441 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 430 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
442} 431}
443 432
444static void filelayout_write_call_done(struct rpc_task *task, void *data) 433static void filelayout_write_call_done(struct rpc_task *task, void *data)
445{ 434{
446 struct nfs_pgio_data *wdata = data; 435 struct nfs_pgio_header *hdr = data;
447 436
448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && 437 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
449 task->tk_status == 0) { 438 task->tk_status == 0) {
450 nfs41_sequence_done(task, &wdata->res.seq_res); 439 nfs41_sequence_done(task, &hdr->res.seq_res);
451 return; 440 return;
452 } 441 }
453 442
454 /* Note this may cause RPC to be resent */ 443 /* Note this may cause RPC to be resent */
455 wdata->header->mds_ops->rpc_call_done(task, data); 444 hdr->mds_ops->rpc_call_done(task, data);
456} 445}
457 446
458static void filelayout_write_count_stats(struct rpc_task *task, void *data) 447static void filelayout_write_count_stats(struct rpc_task *task, void *data)
459{ 448{
460 struct nfs_pgio_data *wdata = data; 449 struct nfs_pgio_header *hdr = data;
461 450
462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); 451 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
463} 452}
464 453
465static void filelayout_write_release(void *data) 454static void filelayout_write_release(void *data)
466{ 455{
467 struct nfs_pgio_data *wdata = data; 456 struct nfs_pgio_header *hdr = data;
468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; 457 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
469 458
470 filelayout_fenceme(lo->plh_inode, lo); 459 filelayout_fenceme(lo->plh_inode, lo);
471 nfs_put_client(wdata->ds_clp); 460 nfs_put_client(hdr->ds_clp);
472 wdata->header->mds_ops->rpc_release(data); 461 hdr->mds_ops->rpc_release(data);
473} 462}
474 463
475static void filelayout_commit_prepare(struct rpc_task *task, void *data) 464static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
529}; 518};
530 519
531static enum pnfs_try_status 520static enum pnfs_try_status
532filelayout_read_pagelist(struct nfs_pgio_data *data) 521filelayout_read_pagelist(struct nfs_pgio_header *hdr)
533{ 522{
534 struct nfs_pgio_header *hdr = data->header;
535 struct pnfs_layout_segment *lseg = hdr->lseg; 523 struct pnfs_layout_segment *lseg = hdr->lseg;
536 struct nfs4_pnfs_ds *ds; 524 struct nfs4_pnfs_ds *ds;
537 struct rpc_clnt *ds_clnt; 525 struct rpc_clnt *ds_clnt;
538 loff_t offset = data->args.offset; 526 loff_t offset = hdr->args.offset;
539 u32 j, idx; 527 u32 j, idx;
540 struct nfs_fh *fh; 528 struct nfs_fh *fh;
541 529
542 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 530 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
543 __func__, hdr->inode->i_ino, 531 __func__, hdr->inode->i_ino,
544 data->args.pgbase, (size_t)data->args.count, offset); 532 hdr->args.pgbase, (size_t)hdr->args.count, offset);
545 533
546 /* Retrieve the correct rpc_client for the byte range */ 534 /* Retrieve the correct rpc_client for the byte range */
547 j = nfs4_fl_calc_j_index(lseg, offset); 535 j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
559 547
560 /* No multipath support. Use first DS */ 548 /* No multipath support. Use first DS */
561 atomic_inc(&ds->ds_clp->cl_count); 549 atomic_inc(&ds->ds_clp->cl_count);
562 data->ds_clp = ds->ds_clp; 550 hdr->ds_clp = ds->ds_clp;
563 data->ds_idx = idx; 551 hdr->ds_idx = idx;
564 fh = nfs4_fl_select_ds_fh(lseg, j); 552 fh = nfs4_fl_select_ds_fh(lseg, j);
565 if (fh) 553 if (fh)
566 data->args.fh = fh; 554 hdr->args.fh = fh;
567 555
568 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 556 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
569 data->mds_offset = offset; 557 hdr->mds_offset = offset;
570 558
571 /* Perform an asynchronous read to ds */ 559 /* Perform an asynchronous read to ds */
572 nfs_initiate_pgio(ds_clnt, data, 560 nfs_initiate_pgio(ds_clnt, hdr,
573 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); 561 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
574 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
575} 563}
576 564
577/* Perform async writes. */ 565/* Perform async writes. */
578static enum pnfs_try_status 566static enum pnfs_try_status
579filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) 567filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
580{ 568{
581 struct nfs_pgio_header *hdr = data->header;
582 struct pnfs_layout_segment *lseg = hdr->lseg; 569 struct pnfs_layout_segment *lseg = hdr->lseg;
583 struct nfs4_pnfs_ds *ds; 570 struct nfs4_pnfs_ds *ds;
584 struct rpc_clnt *ds_clnt; 571 struct rpc_clnt *ds_clnt;
585 loff_t offset = data->args.offset; 572 loff_t offset = hdr->args.offset;
586 u32 j, idx; 573 u32 j, idx;
587 struct nfs_fh *fh; 574 struct nfs_fh *fh;
588 575
@@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
598 return PNFS_NOT_ATTEMPTED; 585 return PNFS_NOT_ATTEMPTED;
599 586
600 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", 587 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
601 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, 588 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
602 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); 589 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
603 590
604 data->pgio_done_cb = filelayout_write_done_cb; 591 hdr->pgio_done_cb = filelayout_write_done_cb;
605 atomic_inc(&ds->ds_clp->cl_count); 592 atomic_inc(&ds->ds_clp->cl_count);
606 data->ds_clp = ds->ds_clp; 593 hdr->ds_clp = ds->ds_clp;
607 data->ds_idx = idx; 594 hdr->ds_idx = idx;
608 fh = nfs4_fl_select_ds_fh(lseg, j); 595 fh = nfs4_fl_select_ds_fh(lseg, j);
609 if (fh) 596 if (fh)
610 data->args.fh = fh; 597 hdr->args.fh = fh;
611 598 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
612 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
613 599
614 /* Perform an asynchronous write */ 600 /* Perform an asynchronous write */
615 nfs_initiate_pgio(ds_clnt, data, 601 nfs_initiate_pgio(ds_clnt, hdr,
616 &filelayout_write_call_ops, sync, 602 &filelayout_write_call_ops, sync,
617 RPC_TASK_SOFTCONN); 603 RPC_TASK_SOFTCONN);
618 return PNFS_ATTEMPTED; 604 return PNFS_ATTEMPTED;
@@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
1023 1009
1024/* The generic layer is about to remove the req from the commit list. 1010/* The generic layer is about to remove the req from the commit list.
1025 * If this will make the bucket empty, it will need to put the lseg reference. 1011 * If this will make the bucket empty, it will need to put the lseg reference.
1012 * Note this is must be called holding the inode (/cinfo) lock
1026 */ 1013 */
1027static void 1014static void
1028filelayout_clear_request_commit(struct nfs_page *req, 1015filelayout_clear_request_commit(struct nfs_page *req,
@@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
1030{ 1017{
1031 struct pnfs_layout_segment *freeme = NULL; 1018 struct pnfs_layout_segment *freeme = NULL;
1032 1019
1033 spin_lock(cinfo->lock);
1034 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 1020 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
1035 goto out; 1021 goto out;
1036 cinfo->ds->nwritten--; 1022 cinfo->ds->nwritten--;
@@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
1045 } 1031 }
1046out: 1032out:
1047 nfs_request_remove_commit_list(req, cinfo); 1033 nfs_request_remove_commit_list(req, cinfo);
1048 spin_unlock(cinfo->lock); 1034 pnfs_put_lseg_async(freeme);
1049 pnfs_put_lseg(freeme);
1050} 1035}
1051 1036
1052static struct list_head * 1037static void
1053filelayout_choose_commit_list(struct nfs_page *req, 1038filelayout_mark_request_commit(struct nfs_page *req,
1054 struct pnfs_layout_segment *lseg, 1039 struct pnfs_layout_segment *lseg,
1055 struct nfs_commit_info *cinfo) 1040 struct nfs_commit_info *cinfo)
1041
1056{ 1042{
1057 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 1043 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
1058 u32 i, j; 1044 u32 i, j;
1059 struct list_head *list; 1045 struct list_head *list;
1060 struct pnfs_commit_bucket *buckets; 1046 struct pnfs_commit_bucket *buckets;
1061 1047
1062 if (fl->commit_through_mds) 1048 if (fl->commit_through_mds) {
1063 return &cinfo->mds->list; 1049 list = &cinfo->mds->list;
1050 spin_lock(cinfo->lock);
1051 goto mds_commit;
1052 }
1064 1053
1065 /* Note that we are calling nfs4_fl_calc_j_index on each page 1054 /* Note that we are calling nfs4_fl_calc_j_index on each page
1066 * that ends up being committed to a data server. An attractive 1055 * that ends up being committed to a data server. An attractive
@@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
1084 } 1073 }
1085 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1074 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1086 cinfo->ds->nwritten++; 1075 cinfo->ds->nwritten++;
1087 spin_unlock(cinfo->lock);
1088 return list;
1089}
1090 1076
1091static void 1077mds_commit:
1092filelayout_mark_request_commit(struct nfs_page *req, 1078 /* nfs_request_add_commit_list(). We need to add req to list without
1093 struct pnfs_layout_segment *lseg, 1079 * dropping cinfo lock.
1094 struct nfs_commit_info *cinfo) 1080 */
1095{ 1081 set_bit(PG_CLEAN, &(req)->wb_flags);
1096 struct list_head *list; 1082 nfs_list_add_request(req, list);
1097 1083 cinfo->mds->ncommit++;
1098 list = filelayout_choose_commit_list(req, lseg, cinfo); 1084 spin_unlock(cinfo->lock);
1099 nfs_request_add_commit_list(req, list, cinfo); 1085 if (!cinfo->dreq) {
1086 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1087 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1088 BDI_RECLAIMABLE);
1089 __mark_inode_dirty(req->wb_context->dentry->d_inode,
1090 I_DIRTY_DATASYNC);
1091 }
1100} 1092}
1101 1093
1102static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1094static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -1244,15 +1236,64 @@ restart:
1244 spin_unlock(cinfo->lock); 1236 spin_unlock(cinfo->lock);
1245} 1237}
1246 1238
1239/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
1240 * for @page
1241 * @cinfo - commit info for current inode
1242 * @page - page to search for matching head request
1243 *
1244 * Returns a the head request if one is found, otherwise returns NULL.
1245 */
1246static struct nfs_page *
1247filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
1248{
1249 struct nfs_page *freq, *t;
1250 struct pnfs_commit_bucket *b;
1251 int i;
1252
1253 /* Linearly search the commit lists for each bucket until a matching
1254 * request is found */
1255 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1256 list_for_each_entry_safe(freq, t, &b->written, wb_list) {
1257 if (freq->wb_page == page)
1258 return freq->wb_head;
1259 }
1260 list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
1261 if (freq->wb_page == page)
1262 return freq->wb_head;
1263 }
1264 }
1265
1266 return NULL;
1267}
1268
1269static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
1270{
1271 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
1272 struct pnfs_commit_bucket *bucket;
1273 struct pnfs_layout_segment *freeme;
1274 int i;
1275
1276 for (i = idx; i < fl_cinfo->nbuckets; i++) {
1277 bucket = &fl_cinfo->buckets[i];
1278 if (list_empty(&bucket->committing))
1279 continue;
1280 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1281 spin_lock(cinfo->lock);
1282 freeme = bucket->clseg;
1283 bucket->clseg = NULL;
1284 spin_unlock(cinfo->lock);
1285 pnfs_put_lseg(freeme);
1286 }
1287}
1288
1247static unsigned int 1289static unsigned int
1248alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) 1290alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1249{ 1291{
1250 struct pnfs_ds_commit_info *fl_cinfo; 1292 struct pnfs_ds_commit_info *fl_cinfo;
1251 struct pnfs_commit_bucket *bucket; 1293 struct pnfs_commit_bucket *bucket;
1252 struct nfs_commit_data *data; 1294 struct nfs_commit_data *data;
1253 int i, j; 1295 int i;
1254 unsigned int nreq = 0; 1296 unsigned int nreq = 0;
1255 struct pnfs_layout_segment *freeme;
1256 1297
1257 fl_cinfo = cinfo->ds; 1298 fl_cinfo = cinfo->ds;
1258 bucket = fl_cinfo->buckets; 1299 bucket = fl_cinfo->buckets;
@@ -1272,16 +1313,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1272 } 1313 }
1273 1314
1274 /* Clean up on error */ 1315 /* Clean up on error */
1275 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { 1316 filelayout_retry_commit(cinfo, i);
1276 if (list_empty(&bucket->committing))
1277 continue;
1278 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1279 spin_lock(cinfo->lock);
1280 freeme = bucket->clseg;
1281 bucket->clseg = NULL;
1282 spin_unlock(cinfo->lock);
1283 pnfs_put_lseg(freeme);
1284 }
1285 /* Caller will clean up entries put on list */ 1317 /* Caller will clean up entries put on list */
1286 return nreq; 1318 return nreq;
1287} 1319}
@@ -1301,8 +1333,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1301 data->lseg = NULL; 1333 data->lseg = NULL;
1302 list_add(&data->pages, &list); 1334 list_add(&data->pages, &list);
1303 nreq++; 1335 nreq++;
1304 } else 1336 } else {
1305 nfs_retry_commit(mds_pages, NULL, cinfo); 1337 nfs_retry_commit(mds_pages, NULL, cinfo);
1338 filelayout_retry_commit(cinfo, 0);
1339 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1340 return -ENOMEM;
1341 }
1306 } 1342 }
1307 1343
1308 nreq += alloc_ds_commits(cinfo, &list); 1344 nreq += alloc_ds_commits(cinfo, &list);
@@ -1380,6 +1416,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
1380 .clear_request_commit = filelayout_clear_request_commit, 1416 .clear_request_commit = filelayout_clear_request_commit,
1381 .scan_commit_lists = filelayout_scan_commit_lists, 1417 .scan_commit_lists = filelayout_scan_commit_lists,
1382 .recover_commit_reqs = filelayout_recover_commit_reqs, 1418 .recover_commit_reqs = filelayout_recover_commit_reqs,
1419 .search_commit_reqs = filelayout_search_commit_reqs,
1383 .commit_pagelist = filelayout_commit_pagelist, 1420 .commit_pagelist = filelayout_commit_pagelist,
1384 .read_pagelist = filelayout_read_pagelist, 1421 .read_pagelist = filelayout_read_pagelist,
1385 .write_pagelist = filelayout_write_pagelist, 1422 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..8540516f4d71 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
695 if (pdev == NULL) 695 if (pdev == NULL)
696 return NULL; 696 return NULL;
697 697
698 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 698 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
699 if (pages == NULL) { 699 if (pages == NULL) {
700 kfree(pdev); 700 kfree(pdev);
701 return NULL; 701 return NULL;
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
783static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 783static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
784{ 784{
785 might_sleep(); 785 might_sleep();
786 wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, 786 wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
787 nfs_wait_bit_killable, TASK_KILLABLE); 787 nfs_wait_bit_killable, TASK_KILLABLE);
788} 788}
789 789
790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a58..880618a8b048 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
112 * if the dentry tree reaches them; however if the dentry already 112 * if the dentry tree reaches them; however if the dentry already
113 * exists, we'll pick it up at this point and use it as the root 113 * exists, we'll pick it up at this point and use it as the root
114 */ 114 */
115 ret = d_obtain_alias(inode); 115 ret = d_obtain_root(inode);
116 if (IS_ERR(ret)) { 116 if (IS_ERR(ret)) {
117 dprintk("nfs_get_root: get root dentry failed\n"); 117 dprintk("nfs_get_root: get root dentry failed\n");
118 goto out; 118 goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..577a36f0a510 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
75 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks 75 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
76 * @word: long word containing the bit lock 76 * @word: long word containing the bit lock
77 */ 77 */
78int nfs_wait_bit_killable(void *word) 78int nfs_wait_bit_killable(struct wait_bit_key *key)
79{ 79{
80 if (fatal_signal_pending(current)) 80 if (fatal_signal_pending(current))
81 return -ERESTARTSYS; 81 return -ERESTARTSYS;
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1002} 1002}
1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode); 1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
1004 1004
1005int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
1006{
1007 if (!(NFS_I(inode)->cache_validity &
1008 (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
1009 && !nfs_attribute_cache_expired(inode))
1010 return NFS_STALE(inode) ? -ESTALE : 0;
1011 return -ECHILD;
1012}
1013
1005static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) 1014static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
1006{ 1015{
1007 struct nfs_inode *nfsi = NFS_I(inode); 1016 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1074,8 +1083,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1074 * the bit lock here if it looks like we're going to be doing that. 1083 * the bit lock here if it looks like we're going to be doing that.
1075 */ 1084 */
1076 for (;;) { 1085 for (;;) {
1077 ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, 1086 ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
1078 nfs_wait_bit_killable, TASK_KILLABLE); 1087 nfs_wait_bit_killable, TASK_KILLABLE);
1079 if (ret) 1088 if (ret)
1080 goto out; 1089 goto out;
1081 spin_lock(&inode->i_lock); 1090 spin_lock(&inode->i_lock);
@@ -1840,11 +1849,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
1840static int nfs_net_init(struct net *net) 1849static int nfs_net_init(struct net *net)
1841{ 1850{
1842 nfs_clients_init(net); 1851 nfs_clients_init(net);
1843 return 0; 1852 return nfs_fs_proc_net_init(net);
1844} 1853}
1845 1854
1846static void nfs_net_exit(struct net *net) 1855static void nfs_net_exit(struct net *net)
1847{ 1856{
1857 nfs_fs_proc_net_exit(net);
1848 nfs_cleanup_cb_ident_idr(net); 1858 nfs_cleanup_cb_ident_idr(net);
1849} 1859}
1850 1860
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f415cbf9f6c3..9056622d2230 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
195#ifdef CONFIG_PROC_FS 195#ifdef CONFIG_PROC_FS
196extern int __init nfs_fs_proc_init(void); 196extern int __init nfs_fs_proc_init(void);
197extern void nfs_fs_proc_exit(void); 197extern void nfs_fs_proc_exit(void);
198extern int nfs_fs_proc_net_init(struct net *net);
199extern void nfs_fs_proc_net_exit(struct net *net);
198#else 200#else
201static inline int nfs_fs_proc_net_init(struct net *net)
202{
203 return 0;
204}
205static inline void nfs_fs_proc_net_exit(struct net *net)
206{
207}
199static inline int nfs_fs_proc_init(void) 208static inline int nfs_fs_proc_init(void)
200{ 209{
201 return 0; 210 return 0;
@@ -238,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
238int nfs_iocounter_wait(struct nfs_io_counter *c); 247int nfs_iocounter_wait(struct nfs_io_counter *c);
239 248
240extern const struct nfs_pageio_ops nfs_pgio_rw_ops; 249extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
241struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); 250struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
242void nfs_rw_header_free(struct nfs_pgio_header *); 251void nfs_pgio_header_free(struct nfs_pgio_header *);
243void nfs_pgio_data_release(struct nfs_pgio_data *); 252void nfs_pgio_data_destroy(struct nfs_pgio_header *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 253int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, 254int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
246 const struct rpc_call_ops *, int, int); 255 const struct rpc_call_ops *, int, int);
247void nfs_free_request(struct nfs_page *req); 256void nfs_free_request(struct nfs_page *req);
248 257
@@ -348,7 +357,7 @@ extern int nfs_drop_inode(struct inode *);
348extern void nfs_clear_inode(struct inode *); 357extern void nfs_clear_inode(struct inode *);
349extern void nfs_evict_inode(struct inode *); 358extern void nfs_evict_inode(struct inode *);
350void nfs_zap_acl_cache(struct inode *inode); 359void nfs_zap_acl_cache(struct inode *inode);
351extern int nfs_wait_bit_killable(void *word); 360extern int nfs_wait_bit_killable(struct wait_bit_key *key);
352 361
353/* super.c */ 362/* super.c */
354extern const struct super_operations nfs_sops; 363extern const struct super_operations nfs_sops;
@@ -442,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
442void nfs_mark_request_commit(struct nfs_page *req, 451void nfs_mark_request_commit(struct nfs_page *req,
443 struct pnfs_layout_segment *lseg, 452 struct pnfs_layout_segment *lseg,
444 struct nfs_commit_info *cinfo); 453 struct nfs_commit_info *cinfo);
454int nfs_write_need_commit(struct nfs_pgio_header *);
445int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 455int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
446 int how, struct nfs_commit_info *cinfo); 456 int how, struct nfs_commit_info *cinfo);
447void nfs_retry_commit(struct list_head *page_list, 457void nfs_retry_commit(struct list_head *page_list,
@@ -482,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
482extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 492extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
483 493
484/* nfs4proc.c */ 494/* nfs4proc.c */
485extern void __nfs4_read_done_cb(struct nfs_pgio_data *); 495extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
486extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, 496extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
487 const struct rpc_timeout *timeparms, 497 const struct rpc_timeout *timeparms,
488 const char *ip_addr); 498 const char *ip_addr);
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 8ee1fab83268..ef221fb8a183 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -29,6 +29,9 @@ struct nfs_net {
29#endif 29#endif
30 spinlock_t nfs_client_lock; 30 spinlock_t nfs_client_lock;
31 struct timespec boot_time; 31 struct timespec boot_time;
32#ifdef CONFIG_PROC_FS
33 struct proc_dir_entry *proc_nfsfs;
34#endif
32}; 35};
33 36
34extern int nfs_net_id; 37extern int nfs_net_id;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 8f854dde4150..24c6898159cc 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
129 .rpc_argp = &args, 129 .rpc_argp = &args,
130 .rpc_resp = &fattr, 130 .rpc_resp = &fattr,
131 }; 131 };
132 int status; 132 int status = 0;
133
134 if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
135 goto out;
133 136
134 status = -EOPNOTSUPP; 137 status = -EOPNOTSUPP;
135 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 138 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -256,7 +259,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
256 char *p = data + *result; 259 char *p = data + *result;
257 260
258 acl = get_acl(inode, type); 261 acl = get_acl(inode, type);
259 if (!acl) 262 if (IS_ERR_OR_NULL(acl))
260 return 0; 263 return 0;
261 264
262 posix_acl_release(acl); 265 posix_acl_release(acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f0afa291fd58..809670eba52a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
795 return status; 795 return status;
796} 796}
797 797
798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
799{ 799{
800 struct inode *inode = data->header->inode; 800 struct inode *inode = hdr->inode;
801 801
802 if (nfs3_async_handle_jukebox(task, inode)) 802 if (nfs3_async_handle_jukebox(task, inode))
803 return -EAGAIN; 803 return -EAGAIN;
804 804
805 nfs_invalidate_atime(inode); 805 nfs_invalidate_atime(inode);
806 nfs_refresh_inode(inode, &data->fattr); 806 nfs_refresh_inode(inode, &hdr->fattr);
807 return 0; 807 return 0;
808} 808}
809 809
810static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 810static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
811 struct rpc_message *msg)
811{ 812{
812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 813 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
813} 814}
814 815
815static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 816static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
817 struct nfs_pgio_header *hdr)
816{ 818{
817 rpc_call_start(task); 819 rpc_call_start(task);
818 return 0; 820 return 0;
819} 821}
820 822
821static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 823static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
822{ 824{
823 struct inode *inode = data->header->inode; 825 struct inode *inode = hdr->inode;
824 826
825 if (nfs3_async_handle_jukebox(task, inode)) 827 if (nfs3_async_handle_jukebox(task, inode))
826 return -EAGAIN; 828 return -EAGAIN;
827 if (task->tk_status >= 0) 829 if (task->tk_status >= 0)
828 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 830 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
829 return 0; 831 return 0;
830} 832}
831 833
832static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 834static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
835 struct rpc_message *msg)
833{ 836{
834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 837 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
835} 838}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..a8b855ab4e22 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
54 const nfs4_stateid *); 54 const nfs4_stateid *);
55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
56 struct nfs_fsinfo *); 56 struct nfs_fsinfo *);
57 int (*free_lock_state)(struct nfs_server *, 57 void (*free_lock_state)(struct nfs_server *,
58 struct nfs4_lock_state *); 58 struct nfs4_lock_state *);
59 const struct rpc_call_ops *call_sync_ops; 59 const struct rpc_call_ops *call_sync_ops;
60 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 60 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@@ -129,17 +129,6 @@ enum {
129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) 129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
130 */ 130 */
131 131
132struct nfs4_lock_owner {
133 unsigned int lo_type;
134#define NFS4_ANY_LOCK_TYPE (0U)
135#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
136#define NFS4_POSIX_LOCK_TYPE (1U << 1)
137 union {
138 fl_owner_t posix_owner;
139 pid_t flock_owner;
140 } lo_u;
141};
142
143struct nfs4_lock_state { 132struct nfs4_lock_state {
144 struct list_head ls_locks; /* Other lock stateids */ 133 struct list_head ls_locks; /* Other lock stateids */
145 struct nfs4_state * ls_state; /* Pointer to open state */ 134 struct nfs4_state * ls_state; /* Pointer to open state */
@@ -149,7 +138,7 @@ struct nfs4_lock_state {
149 struct nfs_seqid_counter ls_seqid; 138 struct nfs_seqid_counter ls_seqid;
150 nfs4_stateid ls_stateid; 139 nfs4_stateid ls_stateid;
151 atomic_t ls_count; 140 atomic_t ls_count;
152 struct nfs4_lock_owner ls_owner; 141 fl_owner_t ls_owner;
153}; 142};
154 143
155/* bits for nfs4_state->flags */ 144/* bits for nfs4_state->flags */
@@ -337,11 +326,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
337 */ 326 */
338static inline void 327static inline void
339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 328nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
340 struct rpc_message *msg, struct nfs_pgio_data *wdata) 329 struct rpc_message *msg, struct nfs_pgio_header *hdr)
341{ 330{
342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && 331 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) 332 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
344 wdata->args.stable = NFS_FILE_SYNC; 333 hdr->args.stable = NFS_FILE_SYNC;
345} 334}
346#else /* CONFIG_NFS_v4_1 */ 335#else /* CONFIG_NFS_v4_1 */
347static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 336static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +358,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
369 358
370static inline void 359static inline void
371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 360nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
372 struct rpc_message *msg, struct nfs_pgio_data *wdata) 361 struct rpc_message *msg, struct nfs_pgio_header *hdr)
373{ 362{
374} 363}
375#endif /* CONFIG_NFS_V4_1 */ 364#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef4876046..ffdb28d86cf8 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -482,6 +482,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
482 482
483 spin_lock(&nn->nfs_client_lock); 483 spin_lock(&nn->nfs_client_lock);
484 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 484 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
485
486 if (pos->rpc_ops != new->rpc_ops)
487 continue;
488
489 if (pos->cl_proto != new->cl_proto)
490 continue;
491
492 if (pos->cl_minorversion != new->cl_minorversion)
493 continue;
494
485 /* If "pos" isn't marked ready, we can't trust the 495 /* If "pos" isn't marked ready, we can't trust the
486 * remaining fields in "pos" */ 496 * remaining fields in "pos" */
487 if (pos->cl_cons_state > NFS_CS_READY) { 497 if (pos->cl_cons_state > NFS_CS_READY) {
@@ -501,15 +511,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
501 if (pos->cl_cons_state != NFS_CS_READY) 511 if (pos->cl_cons_state != NFS_CS_READY)
502 continue; 512 continue;
503 513
504 if (pos->rpc_ops != new->rpc_ops)
505 continue;
506
507 if (pos->cl_proto != new->cl_proto)
508 continue;
509
510 if (pos->cl_minorversion != new->cl_minorversion)
511 continue;
512
513 if (pos->cl_clientid != new->cl_clientid) 514 if (pos->cl_clientid != new->cl_clientid)
514 continue; 515 continue;
515 516
@@ -622,6 +623,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
622 623
623 spin_lock(&nn->nfs_client_lock); 624 spin_lock(&nn->nfs_client_lock);
624 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 625 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
626
627 if (pos->rpc_ops != new->rpc_ops)
628 continue;
629
630 if (pos->cl_proto != new->cl_proto)
631 continue;
632
633 if (pos->cl_minorversion != new->cl_minorversion)
634 continue;
635
625 /* If "pos" isn't marked ready, we can't trust the 636 /* If "pos" isn't marked ready, we can't trust the
626 * remaining fields in "pos", especially the client 637 * remaining fields in "pos", especially the client
627 * ID and serverowner fields. Wait for CREATE_SESSION 638 * ID and serverowner fields. Wait for CREATE_SESSION
@@ -647,15 +658,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
647 if (pos->cl_cons_state != NFS_CS_READY) 658 if (pos->cl_cons_state != NFS_CS_READY)
648 continue; 659 continue;
649 660
650 if (pos->rpc_ops != new->rpc_ops)
651 continue;
652
653 if (pos->cl_proto != new->cl_proto)
654 continue;
655
656 if (pos->cl_minorversion != new->cl_minorversion)
657 continue;
658
659 if (!nfs4_match_clientids(pos, new)) 661 if (!nfs4_match_clientids(pos, new))
660 continue; 662 continue;
661 663
@@ -855,6 +857,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
855 }; 857 };
856 struct rpc_timeout ds_timeout; 858 struct rpc_timeout ds_timeout;
857 struct nfs_client *clp; 859 struct nfs_client *clp;
860 char buf[INET6_ADDRSTRLEN + 1];
861
862 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
863 return ERR_PTR(-EINVAL);
864 cl_init.hostname = buf;
858 865
859 /* 866 /*
860 * Set an authflavor equual to the MDS value. Use the MDS nfs_client 867 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..6ca0c8e7a945 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1952 return status; 1952 return status;
1953} 1953}
1954 1954
1955/*
1956 * Additional permission checks in order to distinguish between an
1957 * open for read, and an open for execute. This works around the
1958 * fact that NFSv4 OPEN treats read and execute permissions as being
1959 * the same.
1960 * Note that in the non-execute case, we want to turn off permission
1961 * checking if we just created a new file (POSIX open() semantics).
1962 */
1955static int nfs4_opendata_access(struct rpc_cred *cred, 1963static int nfs4_opendata_access(struct rpc_cred *cred,
1956 struct nfs4_opendata *opendata, 1964 struct nfs4_opendata *opendata,
1957 struct nfs4_state *state, fmode_t fmode, 1965 struct nfs4_state *state, fmode_t fmode,
@@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1966 return 0; 1974 return 0;
1967 1975
1968 mask = 0; 1976 mask = 0;
1969 /* don't check MAY_WRITE - a newly created file may not have 1977 /*
1970 * write mode bits, but POSIX allows the creating process to write. 1978 * Use openflags to check for exec, because fmode won't
1971 * use openflags to check for exec, because fmode won't 1979 * always have FMODE_EXEC set when file open for exec.
1972 * always have FMODE_EXEC set when file open for exec. */ 1980 */
1973 if (openflags & __FMODE_EXEC) { 1981 if (openflags & __FMODE_EXEC) {
1974 /* ONLY check for exec rights */ 1982 /* ONLY check for exec rights */
1975 mask = MAY_EXEC; 1983 mask = MAY_EXEC;
1976 } else if (fmode & FMODE_READ) 1984 } else if ((fmode & FMODE_READ) && !opendata->file_created)
1977 mask = MAY_READ; 1985 mask = MAY_READ;
1978 1986
1979 cache.cred = cred; 1987 cache.cred = cred;
@@ -2216,8 +2224,19 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2216 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 2224 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
2217 2225
2218 ret = _nfs4_proc_open(opendata); 2226 ret = _nfs4_proc_open(opendata);
2219 if (ret != 0) 2227 if (ret != 0) {
2228 if (ret == -ENOENT) {
2229 dentry = opendata->dentry;
2230 if (dentry->d_inode)
2231 d_delete(dentry);
2232 else if (d_unhashed(dentry))
2233 d_add(dentry, NULL);
2234
2235 nfs_set_verifier(dentry,
2236 nfs_save_change_attribute(opendata->dir->d_inode));
2237 }
2220 goto out; 2238 goto out;
2239 }
2221 2240
2222 state = nfs4_opendata_to_nfs4_state(opendata); 2241 state = nfs4_opendata_to_nfs4_state(opendata);
2223 ret = PTR_ERR(state); 2242 ret = PTR_ERR(state);
@@ -2545,6 +2564,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2545 struct nfs4_closedata *calldata = data; 2564 struct nfs4_closedata *calldata = data;
2546 struct nfs4_state *state = calldata->state; 2565 struct nfs4_state *state = calldata->state;
2547 struct nfs_server *server = NFS_SERVER(calldata->inode); 2566 struct nfs_server *server = NFS_SERVER(calldata->inode);
2567 nfs4_stateid *res_stateid = NULL;
2548 2568
2549 dprintk("%s: begin!\n", __func__); 2569 dprintk("%s: begin!\n", __func__);
2550 if (!nfs4_sequence_done(task, &calldata->res.seq_res)) 2570 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -2555,12 +2575,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2555 */ 2575 */
2556 switch (task->tk_status) { 2576 switch (task->tk_status) {
2557 case 0: 2577 case 0:
2558 if (calldata->roc) 2578 res_stateid = &calldata->res.stateid;
2579 if (calldata->arg.fmode == 0 && calldata->roc)
2559 pnfs_roc_set_barrier(state->inode, 2580 pnfs_roc_set_barrier(state->inode,
2560 calldata->roc_barrier); 2581 calldata->roc_barrier);
2561 nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
2562 renew_lease(server, calldata->timestamp); 2582 renew_lease(server, calldata->timestamp);
2563 goto out_release; 2583 break;
2564 case -NFS4ERR_ADMIN_REVOKED: 2584 case -NFS4ERR_ADMIN_REVOKED:
2565 case -NFS4ERR_STALE_STATEID: 2585 case -NFS4ERR_STALE_STATEID:
2566 case -NFS4ERR_OLD_STATEID: 2586 case -NFS4ERR_OLD_STATEID:
@@ -2574,7 +2594,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2574 goto out_release; 2594 goto out_release;
2575 } 2595 }
2576 } 2596 }
2577 nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); 2597 nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
2578out_release: 2598out_release:
2579 nfs_release_seqid(calldata->arg.seqid); 2599 nfs_release_seqid(calldata->arg.seqid);
2580 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2600 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -2586,6 +2606,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2586 struct nfs4_closedata *calldata = data; 2606 struct nfs4_closedata *calldata = data;
2587 struct nfs4_state *state = calldata->state; 2607 struct nfs4_state *state = calldata->state;
2588 struct inode *inode = calldata->inode; 2608 struct inode *inode = calldata->inode;
2609 bool is_rdonly, is_wronly, is_rdwr;
2589 int call_close = 0; 2610 int call_close = 0;
2590 2611
2591 dprintk("%s: begin!\n", __func__); 2612 dprintk("%s: begin!\n", __func__);
@@ -2593,21 +2614,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2593 goto out_wait; 2614 goto out_wait;
2594 2615
2595 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2616 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2596 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
2597 spin_lock(&state->owner->so_lock); 2617 spin_lock(&state->owner->so_lock);
2618 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2619 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2620 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2598 /* Calculate the change in open mode */ 2621 /* Calculate the change in open mode */
2622 calldata->arg.fmode = 0;
2599 if (state->n_rdwr == 0) { 2623 if (state->n_rdwr == 0) {
2600 if (state->n_rdonly == 0) { 2624 if (state->n_rdonly == 0)
2601 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); 2625 call_close |= is_rdonly;
2602 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); 2626 else if (is_rdonly)
2603 calldata->arg.fmode &= ~FMODE_READ; 2627 calldata->arg.fmode |= FMODE_READ;
2604 } 2628 if (state->n_wronly == 0)
2605 if (state->n_wronly == 0) { 2629 call_close |= is_wronly;
2606 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); 2630 else if (is_wronly)
2607 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); 2631 calldata->arg.fmode |= FMODE_WRITE;
2608 calldata->arg.fmode &= ~FMODE_WRITE; 2632 } else if (is_rdwr)
2609 } 2633 calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
2610 } 2634
2635 if (calldata->arg.fmode == 0)
2636 call_close |= is_rdwr;
2637
2611 if (!nfs4_valid_open_stateid(state)) 2638 if (!nfs4_valid_open_stateid(state))
2612 call_close = 0; 2639 call_close = 0;
2613 spin_unlock(&state->owner->so_lock); 2640 spin_unlock(&state->owner->so_lock);
@@ -2647,6 +2674,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
2647 .rpc_release = nfs4_free_closedata, 2674 .rpc_release = nfs4_free_closedata,
2648}; 2675};
2649 2676
2677static bool nfs4_state_has_opener(struct nfs4_state *state)
2678{
2679 /* first check existing openers */
2680 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
2681 state->n_rdonly != 0)
2682 return true;
2683
2684 if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
2685 state->n_wronly != 0)
2686 return true;
2687
2688 if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
2689 state->n_rdwr != 0)
2690 return true;
2691
2692 return false;
2693}
2694
2695static bool nfs4_roc(struct inode *inode)
2696{
2697 struct nfs_inode *nfsi = NFS_I(inode);
2698 struct nfs_open_context *ctx;
2699 struct nfs4_state *state;
2700
2701 spin_lock(&inode->i_lock);
2702 list_for_each_entry(ctx, &nfsi->open_files, list) {
2703 state = ctx->state;
2704 if (state == NULL)
2705 continue;
2706 if (nfs4_state_has_opener(state)) {
2707 spin_unlock(&inode->i_lock);
2708 return false;
2709 }
2710 }
2711 spin_unlock(&inode->i_lock);
2712
2713 if (nfs4_check_delegation(inode, FMODE_READ))
2714 return false;
2715
2716 return pnfs_roc(inode);
2717}
2718
2650/* 2719/*
2651 * It is possible for data to be read/written from a mem-mapped file 2720 * It is possible for data to be read/written from a mem-mapped file
2652 * after the sys_close call (which hits the vfs layer as a flush). 2721 * after the sys_close call (which hits the vfs layer as a flush).
@@ -2697,7 +2766,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
2697 calldata->res.fattr = &calldata->fattr; 2766 calldata->res.fattr = &calldata->fattr;
2698 calldata->res.seqid = calldata->arg.seqid; 2767 calldata->res.seqid = calldata->arg.seqid;
2699 calldata->res.server = server; 2768 calldata->res.server = server;
2700 calldata->roc = pnfs_roc(state->inode); 2769 calldata->roc = nfs4_roc(state->inode);
2701 nfs_sb_active(calldata->inode->i_sb); 2770 nfs_sb_active(calldata->inode->i_sb);
2702 2771
2703 msg.rpc_argp = &calldata->arg; 2772 msg.rpc_argp = &calldata->arg;
@@ -4033,24 +4102,25 @@ static bool nfs4_error_stateid_expired(int err)
4033 return false; 4102 return false;
4034} 4103}
4035 4104
4036void __nfs4_read_done_cb(struct nfs_pgio_data *data) 4105void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
4037{ 4106{
4038 nfs_invalidate_atime(data->header->inode); 4107 nfs_invalidate_atime(hdr->inode);
4039} 4108}
4040 4109
4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4110static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
4042{ 4111{
4043 struct nfs_server *server = NFS_SERVER(data->header->inode); 4112 struct nfs_server *server = NFS_SERVER(hdr->inode);
4044 4113
4045 trace_nfs4_read(data, task->tk_status); 4114 trace_nfs4_read(hdr, task->tk_status);
4046 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 4115 if (nfs4_async_handle_error(task, server,
4116 hdr->args.context->state) == -EAGAIN) {
4047 rpc_restart_call_prepare(task); 4117 rpc_restart_call_prepare(task);
4048 return -EAGAIN; 4118 return -EAGAIN;
4049 } 4119 }
4050 4120
4051 __nfs4_read_done_cb(data); 4121 __nfs4_read_done_cb(hdr);
4052 if (task->tk_status > 0) 4122 if (task->tk_status > 0)
4053 renew_lease(server, data->timestamp); 4123 renew_lease(server, hdr->timestamp);
4054 return 0; 4124 return 0;
4055} 4125}
4056 4126
@@ -4068,54 +4138,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
4068 return true; 4138 return true;
4069} 4139}
4070 4140
4071static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 4141static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4072{ 4142{
4073 4143
4074 dprintk("--> %s\n", __func__); 4144 dprintk("--> %s\n", __func__);
4075 4145
4076 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4146 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4077 return -EAGAIN; 4147 return -EAGAIN;
4078 if (nfs4_read_stateid_changed(task, &data->args)) 4148 if (nfs4_read_stateid_changed(task, &hdr->args))
4079 return -EAGAIN; 4149 return -EAGAIN;
4080 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4150 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4081 nfs4_read_done_cb(task, data); 4151 nfs4_read_done_cb(task, hdr);
4082} 4152}
4083 4153
4084static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4154static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
4155 struct rpc_message *msg)
4085{ 4156{
4086 data->timestamp = jiffies; 4157 hdr->timestamp = jiffies;
4087 data->pgio_done_cb = nfs4_read_done_cb; 4158 hdr->pgio_done_cb = nfs4_read_done_cb;
4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4159 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 4160 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
4090} 4161}
4091 4162
4092static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 4163static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
4164 struct nfs_pgio_header *hdr)
4093{ 4165{
4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), 4166 if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
4095 &data->args.seq_args, 4167 &hdr->args.seq_args,
4096 &data->res.seq_res, 4168 &hdr->res.seq_res,
4097 task)) 4169 task))
4098 return 0; 4170 return 0;
4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, 4171 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
4100 data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) 4172 hdr->args.lock_context,
4173 hdr->rw_ops->rw_mode) == -EIO)
4101 return -EIO; 4174 return -EIO;
4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) 4175 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
4103 return -EIO; 4176 return -EIO;
4104 return 0; 4177 return 0;
4105} 4178}
4106 4179
4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4180static int nfs4_write_done_cb(struct rpc_task *task,
4181 struct nfs_pgio_header *hdr)
4108{ 4182{
4109 struct inode *inode = data->header->inode; 4183 struct inode *inode = hdr->inode;
4110 4184
4111 trace_nfs4_write(data, task->tk_status); 4185 trace_nfs4_write(hdr, task->tk_status);
4112 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 4186 if (nfs4_async_handle_error(task, NFS_SERVER(inode),
4187 hdr->args.context->state) == -EAGAIN) {
4113 rpc_restart_call_prepare(task); 4188 rpc_restart_call_prepare(task);
4114 return -EAGAIN; 4189 return -EAGAIN;
4115 } 4190 }
4116 if (task->tk_status >= 0) { 4191 if (task->tk_status >= 0) {
4117 renew_lease(NFS_SERVER(inode), data->timestamp); 4192 renew_lease(NFS_SERVER(inode), hdr->timestamp);
4118 nfs_post_op_update_inode_force_wcc(inode, &data->fattr); 4193 nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
4119 } 4194 }
4120 return 0; 4195 return 0;
4121} 4196}
@@ -4134,23 +4209,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
4134 return true; 4209 return true;
4135} 4210}
4136 4211
4137static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 4212static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4138{ 4213{
4139 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4214 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4140 return -EAGAIN; 4215 return -EAGAIN;
4141 if (nfs4_write_stateid_changed(task, &data->args)) 4216 if (nfs4_write_stateid_changed(task, &hdr->args))
4142 return -EAGAIN; 4217 return -EAGAIN;
4143 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4218 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4144 nfs4_write_done_cb(task, data); 4219 nfs4_write_done_cb(task, hdr);
4145} 4220}
4146 4221
4147static 4222static
4148bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) 4223bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
4149{ 4224{
4150 const struct nfs_pgio_header *hdr = data->header;
4151
4152 /* Don't request attributes for pNFS or O_DIRECT writes */ 4225 /* Don't request attributes for pNFS or O_DIRECT writes */
4153 if (data->ds_clp != NULL || hdr->dreq != NULL) 4226 if (hdr->ds_clp != NULL || hdr->dreq != NULL)
4154 return false; 4227 return false;
4155 /* Otherwise, request attributes if and only if we don't hold 4228 /* Otherwise, request attributes if and only if we don't hold
4156 * a delegation 4229 * a delegation
@@ -4158,23 +4231,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; 4231 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
4159} 4232}
4160 4233
4161static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4234static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
4235 struct rpc_message *msg)
4162{ 4236{
4163 struct nfs_server *server = NFS_SERVER(data->header->inode); 4237 struct nfs_server *server = NFS_SERVER(hdr->inode);
4164 4238
4165 if (!nfs4_write_need_cache_consistency_data(data)) { 4239 if (!nfs4_write_need_cache_consistency_data(hdr)) {
4166 data->args.bitmask = NULL; 4240 hdr->args.bitmask = NULL;
4167 data->res.fattr = NULL; 4241 hdr->res.fattr = NULL;
4168 } else 4242 } else
4169 data->args.bitmask = server->cache_consistency_bitmask; 4243 hdr->args.bitmask = server->cache_consistency_bitmask;
4170 4244
4171 if (!data->pgio_done_cb) 4245 if (!hdr->pgio_done_cb)
4172 data->pgio_done_cb = nfs4_write_done_cb; 4246 hdr->pgio_done_cb = nfs4_write_done_cb;
4173 data->res.server = server; 4247 hdr->res.server = server;
4174 data->timestamp = jiffies; 4248 hdr->timestamp = jiffies;
4175 4249
4176 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 4250 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4251 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
4178} 4252}
4179 4253
4180static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 4254static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4881,6 +4955,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
4881 return scnprintf(buf, len, "tcp"); 4955 return scnprintf(buf, len, "tcp");
4882} 4956}
4883 4957
4958static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
4959{
4960 struct nfs4_setclientid *sc = calldata;
4961
4962 if (task->tk_status == 0)
4963 sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
4964}
4965
4966static const struct rpc_call_ops nfs4_setclientid_ops = {
4967 .rpc_call_done = nfs4_setclientid_done,
4968};
4969
4884/** 4970/**
4885 * nfs4_proc_setclientid - Negotiate client ID 4971 * nfs4_proc_setclientid - Negotiate client ID
4886 * @clp: state data structure 4972 * @clp: state data structure
@@ -4907,6 +4993,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4907 .rpc_resp = res, 4993 .rpc_resp = res,
4908 .rpc_cred = cred, 4994 .rpc_cred = cred,
4909 }; 4995 };
4996 struct rpc_task *task;
4997 struct rpc_task_setup task_setup_data = {
4998 .rpc_client = clp->cl_rpcclient,
4999 .rpc_message = &msg,
5000 .callback_ops = &nfs4_setclientid_ops,
5001 .callback_data = &setclientid,
5002 .flags = RPC_TASK_TIMEOUT,
5003 };
4910 int status; 5004 int status;
4911 5005
4912 /* nfs_client_id4 */ 5006 /* nfs_client_id4 */
@@ -4933,7 +5027,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4933 dprintk("NFS call setclientid auth=%s, '%.*s'\n", 5027 dprintk("NFS call setclientid auth=%s, '%.*s'\n",
4934 clp->cl_rpcclient->cl_auth->au_ops->au_name, 5028 clp->cl_rpcclient->cl_auth->au_ops->au_name,
4935 setclientid.sc_name_len, setclientid.sc_name); 5029 setclientid.sc_name_len, setclientid.sc_name);
4936 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5030 task = rpc_run_task(&task_setup_data);
5031 if (IS_ERR(task)) {
5032 status = PTR_ERR(task);
5033 goto out;
5034 }
5035 status = task->tk_status;
5036 if (setclientid.sc_cred) {
5037 clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
5038 put_rpccred(setclientid.sc_cred);
5039 }
5040 rpc_put_task(task);
5041out:
4937 trace_nfs4_setclientid(clp, status); 5042 trace_nfs4_setclientid(clp, status);
4938 dprintk("NFS reply setclientid: %d\n", status); 5043 dprintk("NFS reply setclientid: %d\n", status);
4939 return status; 5044 return status;
@@ -4975,6 +5080,9 @@ struct nfs4_delegreturndata {
4975 unsigned long timestamp; 5080 unsigned long timestamp;
4976 struct nfs_fattr fattr; 5081 struct nfs_fattr fattr;
4977 int rpc_status; 5082 int rpc_status;
5083 struct inode *inode;
5084 bool roc;
5085 u32 roc_barrier;
4978}; 5086};
4979 5087
4980static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) 5088static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -4988,7 +5096,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4988 switch (task->tk_status) { 5096 switch (task->tk_status) {
4989 case 0: 5097 case 0:
4990 renew_lease(data->res.server, data->timestamp); 5098 renew_lease(data->res.server, data->timestamp);
4991 break;
4992 case -NFS4ERR_ADMIN_REVOKED: 5099 case -NFS4ERR_ADMIN_REVOKED:
4993 case -NFS4ERR_DELEG_REVOKED: 5100 case -NFS4ERR_DELEG_REVOKED:
4994 case -NFS4ERR_BAD_STATEID: 5101 case -NFS4ERR_BAD_STATEID:
@@ -4996,6 +5103,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4996 case -NFS4ERR_STALE_STATEID: 5103 case -NFS4ERR_STALE_STATEID:
4997 case -NFS4ERR_EXPIRED: 5104 case -NFS4ERR_EXPIRED:
4998 task->tk_status = 0; 5105 task->tk_status = 0;
5106 if (data->roc)
5107 pnfs_roc_set_barrier(data->inode, data->roc_barrier);
4999 break; 5108 break;
5000 default: 5109 default:
5001 if (nfs4_async_handle_error(task, data->res.server, NULL) == 5110 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5009,6 +5118,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
5009 5118
5010static void nfs4_delegreturn_release(void *calldata) 5119static void nfs4_delegreturn_release(void *calldata)
5011{ 5120{
5121 struct nfs4_delegreturndata *data = calldata;
5122
5123 if (data->roc)
5124 pnfs_roc_release(data->inode);
5012 kfree(calldata); 5125 kfree(calldata);
5013} 5126}
5014 5127
@@ -5018,6 +5131,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
5018 5131
5019 d_data = (struct nfs4_delegreturndata *)data; 5132 d_data = (struct nfs4_delegreturndata *)data;
5020 5133
5134 if (d_data->roc &&
5135 pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
5136 return;
5137
5021 nfs4_setup_sequence(d_data->res.server, 5138 nfs4_setup_sequence(d_data->res.server,
5022 &d_data->args.seq_args, 5139 &d_data->args.seq_args,
5023 &d_data->res.seq_res, 5140 &d_data->res.seq_res,
@@ -5061,6 +5178,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
5061 nfs_fattr_init(data->res.fattr); 5178 nfs_fattr_init(data->res.fattr);
5062 data->timestamp = jiffies; 5179 data->timestamp = jiffies;
5063 data->rpc_status = 0; 5180 data->rpc_status = 0;
5181 data->inode = inode;
5182 data->roc = list_empty(&NFS_I(inode)->open_files) ?
5183 pnfs_roc(inode) : false;
5064 5184
5065 task_setup_data.callback_data = data; 5185 task_setup_data.callback_data = data;
5066 msg.rpc_argp = &data->args; 5186 msg.rpc_argp = &data->args;
@@ -5834,8 +5954,10 @@ struct nfs_release_lockowner_data {
5834static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) 5954static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
5835{ 5955{
5836 struct nfs_release_lockowner_data *data = calldata; 5956 struct nfs_release_lockowner_data *data = calldata;
5837 nfs40_setup_sequence(data->server, 5957 struct nfs_server *server = data->server;
5838 &data->args.seq_args, &data->res.seq_res, task); 5958 nfs40_setup_sequence(server, &data->args.seq_args,
5959 &data->res.seq_res, task);
5960 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
5839 data->timestamp = jiffies; 5961 data->timestamp = jiffies;
5840} 5962}
5841 5963
@@ -5852,6 +5974,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
5852 break; 5974 break;
5853 case -NFS4ERR_STALE_CLIENTID: 5975 case -NFS4ERR_STALE_CLIENTID:
5854 case -NFS4ERR_EXPIRED: 5976 case -NFS4ERR_EXPIRED:
5977 nfs4_schedule_lease_recovery(server->nfs_client);
5978 break;
5855 case -NFS4ERR_LEASE_MOVED: 5979 case -NFS4ERR_LEASE_MOVED:
5856 case -NFS4ERR_DELAY: 5980 case -NFS4ERR_DELAY:
5857 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) 5981 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@@ -5872,7 +5996,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
5872 .rpc_release = nfs4_release_lockowner_release, 5996 .rpc_release = nfs4_release_lockowner_release,
5873}; 5997};
5874 5998
5875static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) 5999static void
6000nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
5876{ 6001{
5877 struct nfs_release_lockowner_data *data; 6002 struct nfs_release_lockowner_data *data;
5878 struct rpc_message msg = { 6003 struct rpc_message msg = {
@@ -5880,11 +6005,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5880 }; 6005 };
5881 6006
5882 if (server->nfs_client->cl_mvops->minor_version != 0) 6007 if (server->nfs_client->cl_mvops->minor_version != 0)
5883 return -EINVAL; 6008 return;
5884 6009
5885 data = kmalloc(sizeof(*data), GFP_NOFS); 6010 data = kmalloc(sizeof(*data), GFP_NOFS);
5886 if (!data) 6011 if (!data)
5887 return -ENOMEM; 6012 return;
5888 data->lsp = lsp; 6013 data->lsp = lsp;
5889 data->server = server; 6014 data->server = server;
5890 data->args.lock_owner.clientid = server->nfs_client->cl_clientid; 6015 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5895,7 +6020,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5895 msg.rpc_resp = &data->res; 6020 msg.rpc_resp = &data->res;
5896 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 6021 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
5897 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); 6022 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
5898 return 0;
5899} 6023}
5900 6024
5901#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 6025#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -8182,7 +8306,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
8182 return ret; 8306 return ret;
8183} 8307}
8184 8308
8185static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) 8309static void
8310nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
8186{ 8311{
8187 struct rpc_task *task; 8312 struct rpc_task *task;
8188 struct rpc_cred *cred = lsp->ls_state->owner->so_cred; 8313 struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@@ -8190,9 +8315,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
8190 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); 8315 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
8191 nfs4_free_lock_state(server, lsp); 8316 nfs4_free_lock_state(server, lsp);
8192 if (IS_ERR(task)) 8317 if (IS_ERR(task))
8193 return PTR_ERR(task); 8318 return;
8194 rpc_put_task(task); 8319 rpc_put_task(task);
8195 return 0;
8196} 8320}
8197 8321
8198static bool nfs41_match_stateid(const nfs4_stateid *s1, 8322static bool nfs41_match_stateid(const nfs4_stateid *s1,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..22fe35104c0c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
787 * that is compatible with current->files 787 * that is compatible with current->files
788 */ 788 */
789static struct nfs4_lock_state * 789static struct nfs4_lock_state *
790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
791{ 791{
792 struct nfs4_lock_state *pos; 792 struct nfs4_lock_state *pos;
793 list_for_each_entry(pos, &state->lock_states, ls_locks) { 793 list_for_each_entry(pos, &state->lock_states, ls_locks) {
794 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) 794 if (pos->ls_owner != fl_owner)
795 continue; 795 continue;
796 switch (pos->ls_owner.lo_type) {
797 case NFS4_POSIX_LOCK_TYPE:
798 if (pos->ls_owner.lo_u.posix_owner != fl_owner)
799 continue;
800 break;
801 case NFS4_FLOCK_LOCK_TYPE:
802 if (pos->ls_owner.lo_u.flock_owner != fl_pid)
803 continue;
804 }
805 atomic_inc(&pos->ls_count); 796 atomic_inc(&pos->ls_count);
806 return pos; 797 return pos;
807 } 798 }
@@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
813 * exists, return an uninitialized one. 804 * exists, return an uninitialized one.
814 * 805 *
815 */ 806 */
816static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 807static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
817{ 808{
818 struct nfs4_lock_state *lsp; 809 struct nfs4_lock_state *lsp;
819 struct nfs_server *server = state->owner->so_server; 810 struct nfs_server *server = state->owner->so_server;
@@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
824 nfs4_init_seqid_counter(&lsp->ls_seqid); 815 nfs4_init_seqid_counter(&lsp->ls_seqid);
825 atomic_set(&lsp->ls_count, 1); 816 atomic_set(&lsp->ls_count, 1);
826 lsp->ls_state = state; 817 lsp->ls_state = state;
827 lsp->ls_owner.lo_type = type; 818 lsp->ls_owner = fl_owner;
828 switch (lsp->ls_owner.lo_type) {
829 case NFS4_FLOCK_LOCK_TYPE:
830 lsp->ls_owner.lo_u.flock_owner = fl_pid;
831 break;
832 case NFS4_POSIX_LOCK_TYPE:
833 lsp->ls_owner.lo_u.posix_owner = fl_owner;
834 break;
835 default:
836 goto out_free;
837 }
838 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); 819 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
839 if (lsp->ls_seqid.owner_id < 0) 820 if (lsp->ls_seqid.owner_id < 0)
840 goto out_free; 821 goto out_free;
@@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
857 * exists, return an uninitialized one. 838 * exists, return an uninitialized one.
858 * 839 *
859 */ 840 */
860static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) 841static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
861{ 842{
862 struct nfs4_lock_state *lsp, *new = NULL; 843 struct nfs4_lock_state *lsp, *new = NULL;
863 844
864 for(;;) { 845 for(;;) {
865 spin_lock(&state->state_lock); 846 spin_lock(&state->state_lock);
866 lsp = __nfs4_find_lock_state(state, owner, pid, type); 847 lsp = __nfs4_find_lock_state(state, owner);
867 if (lsp != NULL) 848 if (lsp != NULL)
868 break; 849 break;
869 if (new != NULL) { 850 if (new != NULL) {
@@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
874 break; 855 break;
875 } 856 }
876 spin_unlock(&state->state_lock); 857 spin_unlock(&state->state_lock);
877 new = nfs4_alloc_lock_state(state, owner, pid, type); 858 new = nfs4_alloc_lock_state(state, owner);
878 if (new == NULL) 859 if (new == NULL)
879 return NULL; 860 return NULL;
880 } 861 }
@@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
935 916
936 if (fl->fl_ops != NULL) 917 if (fl->fl_ops != NULL)
937 return 0; 918 return 0;
938 if (fl->fl_flags & FL_POSIX) 919 lsp = nfs4_get_lock_state(state, fl->fl_owner);
939 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
940 else if (fl->fl_flags & FL_FLOCK)
941 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
942 NFS4_FLOCK_LOCK_TYPE);
943 else
944 return -EINVAL;
945 if (lsp == NULL) 920 if (lsp == NULL)
946 return -ENOMEM; 921 return -ENOMEM;
947 fl->fl_u.nfs4_fl.owner = lsp; 922 fl->fl_u.nfs4_fl.owner = lsp;
@@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
955{ 930{
956 struct nfs4_lock_state *lsp; 931 struct nfs4_lock_state *lsp;
957 fl_owner_t fl_owner; 932 fl_owner_t fl_owner;
958 pid_t fl_pid;
959 int ret = -ENOENT; 933 int ret = -ENOENT;
960 934
961 935
@@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
966 goto out; 940 goto out;
967 941
968 fl_owner = lockowner->l_owner; 942 fl_owner = lockowner->l_owner;
969 fl_pid = lockowner->l_pid;
970 spin_lock(&state->state_lock); 943 spin_lock(&state->state_lock);
971 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 944 lsp = __nfs4_find_lock_state(state, fl_owner);
972 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) 945 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
973 ret = -EIO; 946 ret = -EIO;
974 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { 947 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
@@ -1251,8 +1224,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
1251 might_sleep(); 1224 might_sleep();
1252 1225
1253 atomic_inc(&clp->cl_count); 1226 atomic_inc(&clp->cl_count);
1254 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, 1227 res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
1255 nfs_wait_bit_killable, TASK_KILLABLE); 1228 nfs_wait_bit_killable, TASK_KILLABLE);
1256 if (res) 1229 if (res)
1257 goto out; 1230 goto out;
1258 if (clp->cl_cons_state < 0) 1231 if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
932 932
933DECLARE_EVENT_CLASS(nfs4_read_event, 933DECLARE_EVENT_CLASS(nfs4_read_event,
934 TP_PROTO( 934 TP_PROTO(
935 const struct nfs_pgio_data *data, 935 const struct nfs_pgio_header *hdr,
936 int error 936 int error
937 ), 937 ),
938 938
939 TP_ARGS(data, error), 939 TP_ARGS(hdr, error),
940 940
941 TP_STRUCT__entry( 941 TP_STRUCT__entry(
942 __field(dev_t, dev) 942 __field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
948 ), 948 ),
949 949
950 TP_fast_assign( 950 TP_fast_assign(
951 const struct inode *inode = data->header->inode; 951 const struct inode *inode = hdr->inode;
952 __entry->dev = inode->i_sb->s_dev; 952 __entry->dev = inode->i_sb->s_dev;
953 __entry->fileid = NFS_FILEID(inode); 953 __entry->fileid = NFS_FILEID(inode);
954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
955 __entry->offset = data->args.offset; 955 __entry->offset = hdr->args.offset;
956 __entry->count = data->args.count; 956 __entry->count = hdr->args.count;
957 __entry->error = error; 957 __entry->error = error;
958 ), 958 ),
959 959
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
972#define DEFINE_NFS4_READ_EVENT(name) \ 972#define DEFINE_NFS4_READ_EVENT(name) \
973 DEFINE_EVENT(nfs4_read_event, name, \ 973 DEFINE_EVENT(nfs4_read_event, name, \
974 TP_PROTO( \ 974 TP_PROTO( \
975 const struct nfs_pgio_data *data, \ 975 const struct nfs_pgio_header *hdr, \
976 int error \ 976 int error \
977 ), \ 977 ), \
978 TP_ARGS(data, error)) 978 TP_ARGS(hdr, error))
979DEFINE_NFS4_READ_EVENT(nfs4_read); 979DEFINE_NFS4_READ_EVENT(nfs4_read);
980#ifdef CONFIG_NFS_V4_1 980#ifdef CONFIG_NFS_V4_1
981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); 981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
983 983
984DECLARE_EVENT_CLASS(nfs4_write_event, 984DECLARE_EVENT_CLASS(nfs4_write_event,
985 TP_PROTO( 985 TP_PROTO(
986 const struct nfs_pgio_data *data, 986 const struct nfs_pgio_header *hdr,
987 int error 987 int error
988 ), 988 ),
989 989
990 TP_ARGS(data, error), 990 TP_ARGS(hdr, error),
991 991
992 TP_STRUCT__entry( 992 TP_STRUCT__entry(
993 __field(dev_t, dev) 993 __field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
999 ), 999 ),
1000 1000
1001 TP_fast_assign( 1001 TP_fast_assign(
1002 const struct inode *inode = data->header->inode; 1002 const struct inode *inode = hdr->inode;
1003 __entry->dev = inode->i_sb->s_dev; 1003 __entry->dev = inode->i_sb->s_dev;
1004 __entry->fileid = NFS_FILEID(inode); 1004 __entry->fileid = NFS_FILEID(inode);
1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1006 __entry->offset = data->args.offset; 1006 __entry->offset = hdr->args.offset;
1007 __entry->count = data->args.count; 1007 __entry->count = hdr->args.count;
1008 __entry->error = error; 1008 __entry->error = error;
1009 ), 1009 ),
1010 1010
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1024#define DEFINE_NFS4_WRITE_EVENT(name) \ 1024#define DEFINE_NFS4_WRITE_EVENT(name) \
1025 DEFINE_EVENT(nfs4_write_event, name, \ 1025 DEFINE_EVENT(nfs4_write_event, name, \
1026 TP_PROTO( \ 1026 TP_PROTO( \
1027 const struct nfs_pgio_data *data, \ 1027 const struct nfs_pgio_header *hdr, \
1028 int error \ 1028 int error \
1029 ), \ 1029 ), \
1030 TP_ARGS(data, error)) 1030 TP_ARGS(hdr, error))
1031DEFINE_NFS4_WRITE_EVENT(nfs4_write); 1031DEFINE_NFS4_WRITE_EVENT(nfs4_write);
1032#ifdef CONFIG_NFS_V4_1 1032#ifdef CONFIG_NFS_V4_1
1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); 1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 939ae606cfa4..e13b59d8d9aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
7092 if (!status) 7092 if (!status)
7093 status = decode_sequence(xdr, &res->seq_res, rqstp); 7093 status = decode_sequence(xdr, &res->seq_res, rqstp);
7094 if (!status) 7094 if (!status)
7095 status = decode_reclaim_complete(xdr, (void *)NULL); 7095 status = decode_reclaim_complete(xdr, NULL);
7096 return status; 7096 return status;
7097} 7097}
7098 7098
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
439 objlayout_read_done(&objios->oir, status, objios->sync); 439 objlayout_read_done(&objios->oir, status, objios->sync);
440} 440}
441 441
442int objio_read_pagelist(struct nfs_pgio_data *rdata) 442int objio_read_pagelist(struct nfs_pgio_header *hdr)
443{ 443{
444 struct nfs_pgio_header *hdr = rdata->header;
445 struct objio_state *objios; 444 struct objio_state *objios;
446 int ret; 445 int ret;
447 446
448 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
449 hdr->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
450 rdata->args.offset, rdata->args.count, rdata, 449 hdr->args.offset, hdr->args.count, hdr,
451 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
452 if (unlikely(ret)) 451 if (unlikely(ret))
453 return ret; 452 return ret;
454 453
455 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
456 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
457 rdata->args.offset, rdata->args.count); 456 hdr->args.offset, hdr->args.count);
458 ret = ore_read(objios->ios); 457 ret = ore_read(objios->ios);
459 if (unlikely(ret)) 458 if (unlikely(ret))
460 objio_free_result(&objios->oir); 459 objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 486static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
488{ 487{
489 struct objio_state *objios = priv; 488 struct objio_state *objios = priv;
490 struct nfs_pgio_data *wdata = objios->oir.rpcdata; 489 struct nfs_pgio_header *hdr = objios->oir.rpcdata;
491 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = hdr->inode->i_mapping;
492 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
493 struct page *page; 492 struct page *page;
494 loff_t i_size = i_size_read(wdata->header->inode); 493 loff_t i_size = i_size_read(hdr->inode);
495 494
496 if (offset >= i_size) { 495 if (offset >= i_size) {
497 *uptodate = true; 496 *uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
531 .put_page = &__r4w_put_page, 530 .put_page = &__r4w_put_page,
532}; 531};
533 532
534int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) 533int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
535{ 534{
536 struct nfs_pgio_header *hdr = wdata->header;
537 struct objio_state *objios; 535 struct objio_state *objios;
538 int ret; 536 int ret;
539 537
540 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, 538 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
541 hdr->lseg, wdata->args.pages, wdata->args.pgbase, 539 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
542 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 540 hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
543 &objios); 541 &objios);
544 if (unlikely(ret)) 542 if (unlikely(ret))
545 return ret; 543 return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
551 objios->ios->done = _write_done; 549 objios->ios->done = _write_done;
552 550
553 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 551 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
554 wdata->args.offset, wdata->args.count); 552 hdr->args.offset, hdr->args.count);
555 ret = ore_write(objios->ios); 553 ret = ore_write(objios->ios);
556 if (unlikely(ret)) { 554 if (unlikely(ret)) {
557 objio_free_result(&objios->oir); 555 objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
229static void _rpc_read_complete(struct work_struct *work) 229static void _rpc_read_complete(struct work_struct *work)
230{ 230{
231 struct rpc_task *task; 231 struct rpc_task *task;
232 struct nfs_pgio_data *rdata; 232 struct nfs_pgio_header *hdr;
233 233
234 dprintk("%s enter\n", __func__); 234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work); 235 task = container_of(work, struct rpc_task, u.tk_work);
236 rdata = container_of(task, struct nfs_pgio_data, task); 236 hdr = container_of(task, struct nfs_pgio_header, task);
237 237
238 pnfs_ld_read_done(rdata); 238 pnfs_ld_read_done(hdr);
239} 239}
240 240
241void 241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{ 243{
244 struct nfs_pgio_data *rdata = oir->rpcdata; 244 struct nfs_pgio_header *hdr = oir->rpcdata;
245 245
246 oir->status = rdata->task.tk_status = status; 246 oir->status = hdr->task.tk_status = status;
247 if (status >= 0) 247 if (status >= 0)
248 rdata->res.count = status; 248 hdr->res.count = status;
249 else 249 else
250 rdata->header->pnfs_error = status; 250 hdr->pnfs_error = status;
251 objlayout_iodone(oir); 251 objlayout_iodone(oir);
252 /* must not use oir after this point */ 252 /* must not use oir after this point */
253 253
254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, 254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
255 status, rdata->res.eof, sync); 255 status, hdr->res.eof, sync);
256 256
257 if (sync) 257 if (sync)
258 pnfs_ld_read_done(rdata); 258 pnfs_ld_read_done(hdr);
259 else { 259 else {
260 INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); 260 INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
261 schedule_work(&rdata->task.u.tk_work); 261 schedule_work(&hdr->task.u.tk_work);
262 } 262 }
263} 263}
264 264
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
266 * Perform sync or async reads. 266 * Perform sync or async reads.
267 */ 267 */
268enum pnfs_try_status 268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_pgio_data *rdata) 269objlayout_read_pagelist(struct nfs_pgio_header *hdr)
270{ 270{
271 struct nfs_pgio_header *hdr = rdata->header;
272 struct inode *inode = hdr->inode; 271 struct inode *inode = hdr->inode;
273 loff_t offset = rdata->args.offset; 272 loff_t offset = hdr->args.offset;
274 size_t count = rdata->args.count; 273 size_t count = hdr->args.count;
275 int err; 274 int err;
276 loff_t eof; 275 loff_t eof;
277 276
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
279 if (unlikely(offset + count > eof)) { 278 if (unlikely(offset + count > eof)) {
280 if (offset >= eof) { 279 if (offset >= eof) {
281 err = 0; 280 err = 0;
282 rdata->res.count = 0; 281 hdr->res.count = 0;
283 rdata->res.eof = 1; 282 hdr->res.eof = 1;
284 /*FIXME: do we need to call pnfs_ld_read_done() */ 283 /*FIXME: do we need to call pnfs_ld_read_done() */
285 goto out; 284 goto out;
286 } 285 }
287 count = eof - offset; 286 count = eof - offset;
288 } 287 }
289 288
290 rdata->res.eof = (offset + count) >= eof; 289 hdr->res.eof = (offset + count) >= eof;
291 _fix_verify_io_params(hdr->lseg, &rdata->args.pages, 290 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
292 &rdata->args.pgbase, 291 &hdr->args.pgbase,
293 rdata->args.offset, rdata->args.count); 292 hdr->args.offset, hdr->args.count);
294 293
295 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 294 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
296 __func__, inode->i_ino, offset, count, rdata->res.eof); 295 __func__, inode->i_ino, offset, count, hdr->res.eof);
297 296
298 err = objio_read_pagelist(rdata); 297 err = objio_read_pagelist(hdr);
299 out: 298 out:
300 if (unlikely(err)) { 299 if (unlikely(err)) {
301 hdr->pnfs_error = err; 300 hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
312static void _rpc_write_complete(struct work_struct *work) 311static void _rpc_write_complete(struct work_struct *work)
313{ 312{
314 struct rpc_task *task; 313 struct rpc_task *task;
315 struct nfs_pgio_data *wdata; 314 struct nfs_pgio_header *hdr;
316 315
317 dprintk("%s enter\n", __func__); 316 dprintk("%s enter\n", __func__);
318 task = container_of(work, struct rpc_task, u.tk_work); 317 task = container_of(work, struct rpc_task, u.tk_work);
319 wdata = container_of(task, struct nfs_pgio_data, task); 318 hdr = container_of(task, struct nfs_pgio_header, task);
320 319
321 pnfs_ld_write_done(wdata); 320 pnfs_ld_write_done(hdr);
322} 321}
323 322
324void 323void
325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 324objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
326{ 325{
327 struct nfs_pgio_data *wdata = oir->rpcdata; 326 struct nfs_pgio_header *hdr = oir->rpcdata;
328 327
329 oir->status = wdata->task.tk_status = status; 328 oir->status = hdr->task.tk_status = status;
330 if (status >= 0) { 329 if (status >= 0) {
331 wdata->res.count = status; 330 hdr->res.count = status;
332 wdata->verf.committed = oir->committed; 331 hdr->verf.committed = oir->committed;
333 } else { 332 } else {
334 wdata->header->pnfs_error = status; 333 hdr->pnfs_error = status;
335 } 334 }
336 objlayout_iodone(oir); 335 objlayout_iodone(oir);
337 /* must not use oir after this point */ 336 /* must not use oir after this point */
338 337
339 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, 338 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
340 status, wdata->verf.committed, sync); 339 status, hdr->verf.committed, sync);
341 340
342 if (sync) 341 if (sync)
343 pnfs_ld_write_done(wdata); 342 pnfs_ld_write_done(hdr);
344 else { 343 else {
345 INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); 344 INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
346 schedule_work(&wdata->task.u.tk_work); 345 schedule_work(&hdr->task.u.tk_work);
347 } 346 }
348} 347}
349 348
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
351 * Perform sync or async writes. 350 * Perform sync or async writes.
352 */ 351 */
353enum pnfs_try_status 352enum pnfs_try_status
354objlayout_write_pagelist(struct nfs_pgio_data *wdata, 353objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
355 int how)
356{ 354{
357 struct nfs_pgio_header *hdr = wdata->header;
358 int err; 355 int err;
359 356
360 _fix_verify_io_params(hdr->lseg, &wdata->args.pages, 357 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
361 &wdata->args.pgbase, 358 &hdr->args.pgbase,
362 wdata->args.offset, wdata->args.count); 359 hdr->args.offset, hdr->args.count);
363 360
364 err = objio_write_pagelist(wdata, how); 361 err = objio_write_pagelist(hdr, how);
365 if (unlikely(err)) { 362 if (unlikely(err)) {
366 hdr->pnfs_error = err; 363 hdr->pnfs_error = err;
367 dprintk("%s: Returned Error %d\n", __func__, err); 364 dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
119 */ 119 */
120extern void objio_free_result(struct objlayout_io_res *oir); 120extern void objio_free_result(struct objlayout_io_res *oir);
121 121
122extern int objio_read_pagelist(struct nfs_pgio_data *rdata); 122extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
123extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); 123extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
124 124
125/* 125/*
126 * callback API 126 * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
168extern void objlayout_free_lseg(struct pnfs_layout_segment *); 168extern void objlayout_free_lseg(struct pnfs_layout_segment *);
169 169
170extern enum pnfs_try_status objlayout_read_pagelist( 170extern enum pnfs_try_status objlayout_read_pagelist(
171 struct nfs_pgio_data *); 171 struct nfs_pgio_header *);
172 172
173extern enum pnfs_try_status objlayout_write_pagelist( 173extern enum pnfs_try_status objlayout_write_pagelist(
174 struct nfs_pgio_data *, 174 struct nfs_pgio_header *,
175 int how); 175 int how);
176 176
177extern void objlayout_encode_layoutcommit( 177extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 17fab89f6358..be7cbce6e4c7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,8 +115,8 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
115 set_bit(NFS_IO_INPROGRESS, &c->flags); 115 set_bit(NFS_IO_INPROGRESS, &c->flags);
116 if (atomic_read(&c->io_count) == 0) 116 if (atomic_read(&c->io_count) == 0)
117 break; 117 break;
118 ret = nfs_wait_bit_killable(&c->flags); 118 ret = nfs_wait_bit_killable(&q.key);
119 } while (atomic_read(&c->io_count) != 0); 119 } while (atomic_read(&c->io_count) != 0 && !ret);
120 finish_wait(wq, &q.wait); 120 finish_wait(wq, &q.wait);
121 return ret; 121 return ret;
122} 122}
@@ -136,28 +136,52 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
136 return __nfs_iocounter_wait(c); 136 return __nfs_iocounter_wait(c);
137} 137}
138 138
139static int nfs_wait_bit_uninterruptible(void *word)
140{
141 io_schedule();
142 return 0;
143}
144
145/* 139/*
146 * nfs_page_group_lock - lock the head of the page group 140 * nfs_page_group_lock - lock the head of the page group
147 * @req - request in group that is to be locked 141 * @req - request in group that is to be locked
142 * @nonblock - if true don't block waiting for lock
148 * 143 *
149 * this lock must be held if modifying the page group list 144 * this lock must be held if modifying the page group list
145 *
146 * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
147 * result from wait_on_bit_lock
148 *
149 * NOTE: calling with nonblock=false should always have set the
150 * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
151 * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
152 */
153int
154nfs_page_group_lock(struct nfs_page *req, bool nonblock)
155{
156 struct nfs_page *head = req->wb_head;
157
158 WARN_ON_ONCE(head != head->wb_head);
159
160 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
161 return 0;
162
163 if (!nonblock)
164 return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
165 TASK_UNINTERRUPTIBLE);
166
167 return -EAGAIN;
168}
169
170/*
171 * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
172 * @req - a request in the group
173 *
174 * This is a blocking call to wait for the group lock to be cleared.
150 */ 175 */
151void 176void
152nfs_page_group_lock(struct nfs_page *req) 177nfs_page_group_lock_wait(struct nfs_page *req)
153{ 178{
154 struct nfs_page *head = req->wb_head; 179 struct nfs_page *head = req->wb_head;
155 180
156 WARN_ON_ONCE(head != head->wb_head); 181 WARN_ON_ONCE(head != head->wb_head);
157 182
158 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 183 wait_on_bit(&head->wb_flags, PG_HEADLOCK,
159 nfs_wait_bit_uninterruptible, 184 TASK_UNINTERRUPTIBLE);
160 TASK_UNINTERRUPTIBLE);
161} 185}
162 186
163/* 187/*
@@ -218,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
218{ 242{
219 bool ret; 243 bool ret;
220 244
221 nfs_page_group_lock(req); 245 nfs_page_group_lock(req, false);
222 ret = nfs_page_group_sync_on_bit_locked(req, bit); 246 ret = nfs_page_group_sync_on_bit_locked(req, bit);
223 nfs_page_group_unlock(req); 247 nfs_page_group_unlock(req);
224 248
@@ -435,9 +459,8 @@ void nfs_release_request(struct nfs_page *req)
435int 459int
436nfs_wait_on_request(struct nfs_page *req) 460nfs_wait_on_request(struct nfs_page *req)
437{ 461{
438 return wait_on_bit(&req->wb_flags, PG_BUSY, 462 return wait_on_bit_io(&req->wb_flags, PG_BUSY,
439 nfs_wait_bit_uninterruptible, 463 TASK_UNINTERRUPTIBLE);
440 TASK_UNINTERRUPTIBLE);
441} 464}
442 465
443/* 466/*
@@ -462,123 +485,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
462} 485}
463EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 486EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
464 487
465static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) 488struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
466{ 489{
467 return container_of(hdr, struct nfs_rw_header, header); 490 struct nfs_pgio_header *hdr = ops->rw_alloc_header();
468}
469
470/**
471 * nfs_rw_header_alloc - Allocate a header for a read or write
472 * @ops: Read or write function vector
473 */
474struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
475{
476 struct nfs_rw_header *header = ops->rw_alloc_header();
477
478 if (header) {
479 struct nfs_pgio_header *hdr = &header->header;
480 491
492 if (hdr) {
481 INIT_LIST_HEAD(&hdr->pages); 493 INIT_LIST_HEAD(&hdr->pages);
482 spin_lock_init(&hdr->lock); 494 spin_lock_init(&hdr->lock);
483 atomic_set(&hdr->refcnt, 0);
484 hdr->rw_ops = ops; 495 hdr->rw_ops = ops;
485 } 496 }
486 return header; 497 return hdr;
487} 498}
488EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); 499EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
489 500
490/* 501/*
491 * nfs_rw_header_free - Free a read or write header 502 * nfs_pgio_header_free - Free a read or write header
492 * @hdr: The header to free 503 * @hdr: The header to free
493 */ 504 */
494void nfs_rw_header_free(struct nfs_pgio_header *hdr) 505void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
495{ 506{
496 hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); 507 hdr->rw_ops->rw_free_header(hdr);
497} 508}
498EXPORT_SYMBOL_GPL(nfs_rw_header_free); 509EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
499 510
500/** 511/**
501 * nfs_pgio_data_alloc - Allocate pageio data 512 * nfs_pgio_data_destroy - make @hdr suitable for reuse
502 * @hdr: The header making a request 513 *
503 * @pagecount: Number of pages to create 514 * Frees memory and releases refs from nfs_generic_pgio, so that it may
504 */ 515 * be called again.
505static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, 516 *
506 unsigned int pagecount) 517 * @hdr: A header that has had nfs_generic_pgio called
507{
508 struct nfs_pgio_data *data, *prealloc;
509
510 prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
511 if (prealloc->header == NULL)
512 data = prealloc;
513 else
514 data = kzalloc(sizeof(*data), GFP_KERNEL);
515 if (!data)
516 goto out;
517
518 if (nfs_pgarray_set(&data->pages, pagecount)) {
519 data->header = hdr;
520 atomic_inc(&hdr->refcnt);
521 } else {
522 if (data != prealloc)
523 kfree(data);
524 data = NULL;
525 }
526out:
527 return data;
528}
529
530/**
531 * nfs_pgio_data_release - Properly free pageio data
532 * @data: The data to release
533 */ 518 */
534void nfs_pgio_data_release(struct nfs_pgio_data *data) 519void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
535{ 520{
536 struct nfs_pgio_header *hdr = data->header; 521 put_nfs_open_context(hdr->args.context);
537 struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); 522 if (hdr->page_array.pagevec != hdr->page_array.page_array)
538 523 kfree(hdr->page_array.pagevec);
539 put_nfs_open_context(data->args.context);
540 if (data->pages.pagevec != data->pages.page_array)
541 kfree(data->pages.pagevec);
542 if (data == &pageio_header->rpc_data) {
543 data->header = NULL;
544 data = NULL;
545 }
546 if (atomic_dec_and_test(&hdr->refcnt))
547 hdr->completion_ops->completion(hdr);
548 /* Note: we only free the rpc_task after callbacks are done.
549 * See the comment in rpc_free_task() for why
550 */
551 kfree(data);
552} 524}
553EXPORT_SYMBOL_GPL(nfs_pgio_data_release); 525EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
554 526
555/** 527/**
556 * nfs_pgio_rpcsetup - Set up arguments for a pageio call 528 * nfs_pgio_rpcsetup - Set up arguments for a pageio call
557 * @data: The pageio data 529 * @hdr: The pageio hdr
558 * @count: Number of bytes to read 530 * @count: Number of bytes to read
559 * @offset: Initial offset 531 * @offset: Initial offset
560 * @how: How to commit data (writes only) 532 * @how: How to commit data (writes only)
561 * @cinfo: Commit information for the call (writes only) 533 * @cinfo: Commit information for the call (writes only)
562 */ 534 */
563static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, 535static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
564 unsigned int count, unsigned int offset, 536 unsigned int count, unsigned int offset,
565 int how, struct nfs_commit_info *cinfo) 537 int how, struct nfs_commit_info *cinfo)
566{ 538{
567 struct nfs_page *req = data->header->req; 539 struct nfs_page *req = hdr->req;
568 540
569 /* Set up the RPC argument and reply structs 541 /* Set up the RPC argument and reply structs
570 * NB: take care not to mess about with data->commit et al. */ 542 * NB: take care not to mess about with hdr->commit et al. */
571 543
572 data->args.fh = NFS_FH(data->header->inode); 544 hdr->args.fh = NFS_FH(hdr->inode);
573 data->args.offset = req_offset(req) + offset; 545 hdr->args.offset = req_offset(req) + offset;
574 /* pnfs_set_layoutcommit needs this */ 546 /* pnfs_set_layoutcommit needs this */
575 data->mds_offset = data->args.offset; 547 hdr->mds_offset = hdr->args.offset;
576 data->args.pgbase = req->wb_pgbase + offset; 548 hdr->args.pgbase = req->wb_pgbase + offset;
577 data->args.pages = data->pages.pagevec; 549 hdr->args.pages = hdr->page_array.pagevec;
578 data->args.count = count; 550 hdr->args.count = count;
579 data->args.context = get_nfs_open_context(req->wb_context); 551 hdr->args.context = get_nfs_open_context(req->wb_context);
580 data->args.lock_context = req->wb_lock_context; 552 hdr->args.lock_context = req->wb_lock_context;
581 data->args.stable = NFS_UNSTABLE; 553 hdr->args.stable = NFS_UNSTABLE;
582 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 554 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
583 case 0: 555 case 0:
584 break; 556 break;
@@ -586,59 +558,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
586 if (nfs_reqs_to_commit(cinfo)) 558 if (nfs_reqs_to_commit(cinfo))
587 break; 559 break;
588 default: 560 default:
589 data->args.stable = NFS_FILE_SYNC; 561 hdr->args.stable = NFS_FILE_SYNC;
590 } 562 }
591 563
592 data->res.fattr = &data->fattr; 564 hdr->res.fattr = &hdr->fattr;
593 data->res.count = count; 565 hdr->res.count = count;
594 data->res.eof = 0; 566 hdr->res.eof = 0;
595 data->res.verf = &data->verf; 567 hdr->res.verf = &hdr->verf;
596 nfs_fattr_init(&data->fattr); 568 nfs_fattr_init(&hdr->fattr);
597} 569}
598 570
599/** 571/**
600 * nfs_pgio_prepare - Prepare pageio data to go over the wire 572 * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
601 * @task: The current task 573 * @task: The current task
602 * @calldata: pageio data to prepare 574 * @calldata: pageio header to prepare
603 */ 575 */
604static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) 576static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
605{ 577{
606 struct nfs_pgio_data *data = calldata; 578 struct nfs_pgio_header *hdr = calldata;
607 int err; 579 int err;
608 err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); 580 err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
609 if (err) 581 if (err)
610 rpc_exit(task, err); 582 rpc_exit(task, err);
611} 583}
612 584
613int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, 585int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
614 const struct rpc_call_ops *call_ops, int how, int flags) 586 const struct rpc_call_ops *call_ops, int how, int flags)
615{ 587{
616 struct rpc_task *task; 588 struct rpc_task *task;
617 struct rpc_message msg = { 589 struct rpc_message msg = {
618 .rpc_argp = &data->args, 590 .rpc_argp = &hdr->args,
619 .rpc_resp = &data->res, 591 .rpc_resp = &hdr->res,
620 .rpc_cred = data->header->cred, 592 .rpc_cred = hdr->cred,
621 }; 593 };
622 struct rpc_task_setup task_setup_data = { 594 struct rpc_task_setup task_setup_data = {
623 .rpc_client = clnt, 595 .rpc_client = clnt,
624 .task = &data->task, 596 .task = &hdr->task,
625 .rpc_message = &msg, 597 .rpc_message = &msg,
626 .callback_ops = call_ops, 598 .callback_ops = call_ops,
627 .callback_data = data, 599 .callback_data = hdr,
628 .workqueue = nfsiod_workqueue, 600 .workqueue = nfsiod_workqueue,
629 .flags = RPC_TASK_ASYNC | flags, 601 .flags = RPC_TASK_ASYNC | flags,
630 }; 602 };
631 int ret = 0; 603 int ret = 0;
632 604
633 data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); 605 hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
634 606
635 dprintk("NFS: %5u initiated pgio call " 607 dprintk("NFS: %5u initiated pgio call "
636 "(req %s/%llu, %u bytes @ offset %llu)\n", 608 "(req %s/%llu, %u bytes @ offset %llu)\n",
637 data->task.tk_pid, 609 hdr->task.tk_pid,
638 data->header->inode->i_sb->s_id, 610 hdr->inode->i_sb->s_id,
639 (unsigned long long)NFS_FILEID(data->header->inode), 611 (unsigned long long)NFS_FILEID(hdr->inode),
640 data->args.count, 612 hdr->args.count,
641 (unsigned long long)data->args.offset); 613 (unsigned long long)hdr->args.offset);
642 614
643 task = rpc_run_task(&task_setup_data); 615 task = rpc_run_task(&task_setup_data);
644 if (IS_ERR(task)) { 616 if (IS_ERR(task)) {
@@ -665,22 +637,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
665 struct nfs_pgio_header *hdr) 637 struct nfs_pgio_header *hdr)
666{ 638{
667 set_bit(NFS_IOHDR_REDO, &hdr->flags); 639 set_bit(NFS_IOHDR_REDO, &hdr->flags);
668 nfs_pgio_data_release(hdr->data); 640 nfs_pgio_data_destroy(hdr);
669 hdr->data = NULL; 641 hdr->completion_ops->completion(hdr);
670 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 642 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
671 return -ENOMEM; 643 return -ENOMEM;
672} 644}
673 645
674/** 646/**
675 * nfs_pgio_release - Release pageio data 647 * nfs_pgio_release - Release pageio data
676 * @calldata: The pageio data to release 648 * @calldata: The pageio header to release
677 */ 649 */
678static void nfs_pgio_release(void *calldata) 650static void nfs_pgio_release(void *calldata)
679{ 651{
680 struct nfs_pgio_data *data = calldata; 652 struct nfs_pgio_header *hdr = calldata;
681 if (data->header->rw_ops->rw_release) 653 if (hdr->rw_ops->rw_release)
682 data->header->rw_ops->rw_release(data); 654 hdr->rw_ops->rw_release(hdr);
683 nfs_pgio_data_release(data); 655 nfs_pgio_data_destroy(hdr);
656 hdr->completion_ops->completion(hdr);
684} 657}
685 658
686/** 659/**
@@ -721,22 +694,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
721/** 694/**
722 * nfs_pgio_result - Basic pageio error handling 695 * nfs_pgio_result - Basic pageio error handling
723 * @task: The task that ran 696 * @task: The task that ran
724 * @calldata: Pageio data to check 697 * @calldata: Pageio header to check
725 */ 698 */
726static void nfs_pgio_result(struct rpc_task *task, void *calldata) 699static void nfs_pgio_result(struct rpc_task *task, void *calldata)
727{ 700{
728 struct nfs_pgio_data *data = calldata; 701 struct nfs_pgio_header *hdr = calldata;
729 struct inode *inode = data->header->inode; 702 struct inode *inode = hdr->inode;
730 703
731 dprintk("NFS: %s: %5u, (status %d)\n", __func__, 704 dprintk("NFS: %s: %5u, (status %d)\n", __func__,
732 task->tk_pid, task->tk_status); 705 task->tk_pid, task->tk_status);
733 706
734 if (data->header->rw_ops->rw_done(task, data, inode) != 0) 707 if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
735 return; 708 return;
736 if (task->tk_status < 0) 709 if (task->tk_status < 0)
737 nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); 710 nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
738 else 711 else
739 data->header->rw_ops->rw_result(task, data); 712 hdr->rw_ops->rw_result(task, hdr);
740} 713}
741 714
742/* 715/*
@@ -751,32 +724,42 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
751 struct nfs_pgio_header *hdr) 724 struct nfs_pgio_header *hdr)
752{ 725{
753 struct nfs_page *req; 726 struct nfs_page *req;
754 struct page **pages; 727 struct page **pages,
755 struct nfs_pgio_data *data; 728 *last_page;
756 struct list_head *head = &desc->pg_list; 729 struct list_head *head = &desc->pg_list;
757 struct nfs_commit_info cinfo; 730 struct nfs_commit_info cinfo;
731 unsigned int pagecount, pageused;
758 732
759 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, 733 pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
760 desc->pg_count)); 734 if (!nfs_pgarray_set(&hdr->page_array, pagecount))
761 if (!data)
762 return nfs_pgio_error(desc, hdr); 735 return nfs_pgio_error(desc, hdr);
763 736
764 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 737 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
765 pages = data->pages.pagevec; 738 pages = hdr->page_array.pagevec;
739 last_page = NULL;
740 pageused = 0;
766 while (!list_empty(head)) { 741 while (!list_empty(head)) {
767 req = nfs_list_entry(head->next); 742 req = nfs_list_entry(head->next);
768 nfs_list_remove_request(req); 743 nfs_list_remove_request(req);
769 nfs_list_add_request(req, &hdr->pages); 744 nfs_list_add_request(req, &hdr->pages);
770 *pages++ = req->wb_page; 745
746 if (WARN_ON_ONCE(pageused >= pagecount))
747 return nfs_pgio_error(desc, hdr);
748
749 if (!last_page || last_page != req->wb_page) {
750 *pages++ = last_page = req->wb_page;
751 pageused++;
752 }
771 } 753 }
754 if (WARN_ON_ONCE(pageused != pagecount))
755 return nfs_pgio_error(desc, hdr);
772 756
773 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 757 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
774 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) 758 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
775 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 759 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
776 760
777 /* Set up the argument struct */ 761 /* Set up the argument struct */
778 nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); 762 nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
779 hdr->data = data;
780 desc->pg_rpc_callops = &nfs_pgio_common_ops; 763 desc->pg_rpc_callops = &nfs_pgio_common_ops;
781 return 0; 764 return 0;
782} 765}
@@ -784,25 +767,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
784 767
785static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) 768static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
786{ 769{
787 struct nfs_rw_header *rw_hdr;
788 struct nfs_pgio_header *hdr; 770 struct nfs_pgio_header *hdr;
789 int ret; 771 int ret;
790 772
791 rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); 773 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
792 if (!rw_hdr) { 774 if (!hdr) {
793 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 775 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
794 return -ENOMEM; 776 return -ENOMEM;
795 } 777 }
796 hdr = &rw_hdr->header; 778 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
797 nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
798 atomic_inc(&hdr->refcnt);
799 ret = nfs_generic_pgio(desc, hdr); 779 ret = nfs_generic_pgio(desc, hdr);
800 if (ret == 0) 780 if (ret == 0)
801 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), 781 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
802 hdr->data, desc->pg_rpc_callops, 782 hdr, desc->pg_rpc_callops,
803 desc->pg_ioflags, 0); 783 desc->pg_ioflags, 0);
804 if (atomic_dec_and_test(&hdr->refcnt))
805 hdr->completion_ops->completion(hdr);
806 return ret; 784 return ret;
807} 785}
808 786
@@ -845,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
845 return false; 823 return false;
846 if (req_offset(req) != req_offset(prev) + prev->wb_bytes) 824 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
847 return false; 825 return false;
826 if (req->wb_page == prev->wb_page) {
827 if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
828 return false;
829 } else {
830 if (req->wb_pgbase != 0 ||
831 prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
832 return false;
833 }
848 } 834 }
849 size = pgio->pg_ops->pg_test(pgio, prev, req); 835 size = pgio->pg_ops->pg_test(pgio, prev, req);
850 WARN_ON_ONCE(size > req->wb_bytes); 836 WARN_ON_ONCE(size > req->wb_bytes);
@@ -916,7 +902,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
916 unsigned int bytes_left = 0; 902 unsigned int bytes_left = 0;
917 unsigned int offset, pgbase; 903 unsigned int offset, pgbase;
918 904
919 nfs_page_group_lock(req); 905 nfs_page_group_lock(req, false);
920 906
921 subreq = req; 907 subreq = req;
922 bytes_left = subreq->wb_bytes; 908 bytes_left = subreq->wb_bytes;
@@ -938,7 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
938 if (desc->pg_recoalesce) 924 if (desc->pg_recoalesce)
939 return 0; 925 return 0;
940 /* retry add_request for this subreq */ 926 /* retry add_request for this subreq */
941 nfs_page_group_lock(req); 927 nfs_page_group_lock(req, false);
942 continue; 928 continue;
943 } 929 }
944 930
@@ -1013,7 +999,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1013 } while (ret); 999 } while (ret);
1014 return ret; 1000 return ret;
1015} 1001}
1016EXPORT_SYMBOL_GPL(nfs_pageio_add_request); 1002
1003/*
1004 * nfs_pageio_resend - Transfer requests to new descriptor and resend
1005 * @hdr - the pgio header to move request from
1006 * @desc - the pageio descriptor to add requests to
1007 *
1008 * Try to move each request (nfs_page) from @hdr to @desc then attempt
1009 * to send them.
1010 *
1011 * Returns 0 on success and < 0 on error.
1012 */
1013int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
1014 struct nfs_pgio_header *hdr)
1015{
1016 LIST_HEAD(failed);
1017
1018 desc->pg_dreq = hdr->dreq;
1019 while (!list_empty(&hdr->pages)) {
1020 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
1021
1022 nfs_list_remove_request(req);
1023 if (!nfs_pageio_add_request(desc, req))
1024 nfs_list_add_request(req, &failed);
1025 }
1026 nfs_pageio_complete(desc);
1027 if (!list_empty(&failed)) {
1028 list_move(&failed, &hdr->pages);
1029 return -EIO;
1030 }
1031 return 0;
1032}
1033EXPORT_SYMBOL_GPL(nfs_pageio_resend);
1017 1034
1018/** 1035/**
1019 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 1036 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -1029,7 +1046,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
1029 break; 1046 break;
1030 } 1047 }
1031} 1048}
1032EXPORT_SYMBOL_GPL(nfs_pageio_complete);
1033 1049
1034/** 1050/**
1035 * nfs_pageio_cond_complete - Conditional I/O completion 1051 * nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a3851debf8a2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
361} 361}
362EXPORT_SYMBOL_GPL(pnfs_put_lseg); 362EXPORT_SYMBOL_GPL(pnfs_put_lseg);
363 363
364static void pnfs_put_lseg_async_work(struct work_struct *work)
365{
366 struct pnfs_layout_segment *lseg;
367
368 lseg = container_of(work, struct pnfs_layout_segment, pls_work);
369
370 pnfs_put_lseg(lseg);
371}
372
373void
374pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
375{
376 INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
377 schedule_work(&lseg->pls_work);
378}
379EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
380
364static u64 381static u64
365end_offset(u64 start, u64 len) 382end_offset(u64 start, u64 len)
366{ 383{
@@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1470} 1487}
1471EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1472 1489
1473int pnfs_write_done_resend_to_mds(struct inode *inode, 1490int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
1474 struct list_head *head,
1475 const struct nfs_pgio_completion_ops *compl_ops,
1476 struct nfs_direct_req *dreq)
1477{ 1491{
1478 struct nfs_pageio_descriptor pgio; 1492 struct nfs_pageio_descriptor pgio;
1479 LIST_HEAD(failed);
1480 1493
1481 /* Resend all requests through the MDS */ 1494 /* Resend all requests through the MDS */
1482 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); 1495 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
1483 pgio.pg_dreq = dreq; 1496 hdr->completion_ops);
1484 while (!list_empty(head)) { 1497 return nfs_pageio_resend(&pgio, hdr);
1485 struct nfs_page *req = nfs_list_entry(head->next);
1486
1487 nfs_list_remove_request(req);
1488 if (!nfs_pageio_add_request(&pgio, req))
1489 nfs_list_add_request(req, &failed);
1490 }
1491 nfs_pageio_complete(&pgio);
1492
1493 if (!list_empty(&failed)) {
1494 /* For some reason our attempt to resend pages. Mark the
1495 * overall send request as having failed, and let
1496 * nfs_writeback_release_full deal with the error.
1497 */
1498 list_move(&failed, head);
1499 return -EIO;
1500 }
1501 return 0;
1502} 1498}
1503EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1499EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1504 1500
1505static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) 1501static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
1506{ 1502{
1507 struct nfs_pgio_header *hdr = data->header;
1508 1503
1509 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1504 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1510 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1505 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
1512 pnfs_return_layout(hdr->inode); 1507 pnfs_return_layout(hdr->inode);
1513 } 1508 }
1514 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1509 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1515 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 1510 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
1516 &hdr->pages,
1517 hdr->completion_ops,
1518 hdr->dreq);
1519} 1511}
1520 1512
1521/* 1513/*
1522 * Called by non rpc-based layout drivers 1514 * Called by non rpc-based layout drivers
1523 */ 1515 */
1524void pnfs_ld_write_done(struct nfs_pgio_data *data) 1516void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
1525{ 1517{
1526 struct nfs_pgio_header *hdr = data->header; 1518 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
1527
1528 trace_nfs4_pnfs_write(data, hdr->pnfs_error);
1529 if (!hdr->pnfs_error) { 1519 if (!hdr->pnfs_error) {
1530 pnfs_set_layoutcommit(data); 1520 pnfs_set_layoutcommit(hdr);
1531 hdr->mds_ops->rpc_call_done(&data->task, data); 1521 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1532 } else 1522 } else
1533 pnfs_ld_handle_write_error(data); 1523 pnfs_ld_handle_write_error(hdr);
1534 hdr->mds_ops->rpc_release(data); 1524 hdr->mds_ops->rpc_release(hdr);
1535} 1525}
1536EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1526EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1537 1527
1538static void 1528static void
1539pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1529pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1540 struct nfs_pgio_data *data) 1530 struct nfs_pgio_header *hdr)
1541{ 1531{
1542 struct nfs_pgio_header *hdr = data->header;
1543
1544 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1532 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1545 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1533 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1546 nfs_pageio_reset_write_mds(desc); 1534 nfs_pageio_reset_write_mds(desc);
1547 desc->pg_recoalesce = 1; 1535 desc->pg_recoalesce = 1;
1548 } 1536 }
1549 nfs_pgio_data_release(data); 1537 nfs_pgio_data_destroy(hdr);
1550} 1538}
1551 1539
1552static enum pnfs_try_status 1540static enum pnfs_try_status
1553pnfs_try_to_write_data(struct nfs_pgio_data *wdata, 1541pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
1554 const struct rpc_call_ops *call_ops, 1542 const struct rpc_call_ops *call_ops,
1555 struct pnfs_layout_segment *lseg, 1543 struct pnfs_layout_segment *lseg,
1556 int how) 1544 int how)
1557{ 1545{
1558 struct nfs_pgio_header *hdr = wdata->header;
1559 struct inode *inode = hdr->inode; 1546 struct inode *inode = hdr->inode;
1560 enum pnfs_try_status trypnfs; 1547 enum pnfs_try_status trypnfs;
1561 struct nfs_server *nfss = NFS_SERVER(inode); 1548 struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
1563 hdr->mds_ops = call_ops; 1550 hdr->mds_ops = call_ops;
1564 1551
1565 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1552 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1566 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1553 inode->i_ino, hdr->args.count, hdr->args.offset, how);
1567 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1554 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
1568 if (trypnfs != PNFS_NOT_ATTEMPTED) 1555 if (trypnfs != PNFS_NOT_ATTEMPTED)
1569 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1556 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1570 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1557 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,139 +1562,105 @@ static void
1575pnfs_do_write(struct nfs_pageio_descriptor *desc, 1562pnfs_do_write(struct nfs_pageio_descriptor *desc,
1576 struct nfs_pgio_header *hdr, int how) 1563 struct nfs_pgio_header *hdr, int how)
1577{ 1564{
1578 struct nfs_pgio_data *data = hdr->data;
1579 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1565 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1580 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1566 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1581 enum pnfs_try_status trypnfs; 1567 enum pnfs_try_status trypnfs;
1582 1568
1583 desc->pg_lseg = NULL; 1569 desc->pg_lseg = NULL;
1584 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1570 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
1585 if (trypnfs == PNFS_NOT_ATTEMPTED) 1571 if (trypnfs == PNFS_NOT_ATTEMPTED)
1586 pnfs_write_through_mds(desc, data); 1572 pnfs_write_through_mds(desc, hdr);
1587 pnfs_put_lseg(lseg); 1573 pnfs_put_lseg(lseg);
1588} 1574}
1589 1575
1590static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1576static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1591{ 1577{
1592 pnfs_put_lseg(hdr->lseg); 1578 pnfs_put_lseg(hdr->lseg);
1593 nfs_rw_header_free(hdr); 1579 nfs_pgio_header_free(hdr);
1594} 1580}
1595EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1581EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1596 1582
1597int 1583int
1598pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1584pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1599{ 1585{
1600 struct nfs_rw_header *whdr;
1601 struct nfs_pgio_header *hdr; 1586 struct nfs_pgio_header *hdr;
1602 int ret; 1587 int ret;
1603 1588
1604 whdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1589 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1605 if (!whdr) { 1590 if (!hdr) {
1606 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1591 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1607 pnfs_put_lseg(desc->pg_lseg); 1592 pnfs_put_lseg(desc->pg_lseg);
1608 desc->pg_lseg = NULL; 1593 desc->pg_lseg = NULL;
1609 return -ENOMEM; 1594 return -ENOMEM;
1610 } 1595 }
1611 hdr = &whdr->header;
1612 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1596 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1613 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1597 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1614 atomic_inc(&hdr->refcnt);
1615 ret = nfs_generic_pgio(desc, hdr); 1598 ret = nfs_generic_pgio(desc, hdr);
1616 if (ret != 0) { 1599 if (ret != 0) {
1617 pnfs_put_lseg(desc->pg_lseg); 1600 pnfs_put_lseg(desc->pg_lseg);
1618 desc->pg_lseg = NULL; 1601 desc->pg_lseg = NULL;
1619 } else 1602 } else
1620 pnfs_do_write(desc, hdr, desc->pg_ioflags); 1603 pnfs_do_write(desc, hdr, desc->pg_ioflags);
1621 if (atomic_dec_and_test(&hdr->refcnt))
1622 hdr->completion_ops->completion(hdr);
1623 return ret; 1604 return ret;
1624} 1605}
1625EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1606EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1626 1607
1627int pnfs_read_done_resend_to_mds(struct inode *inode, 1608int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
1628 struct list_head *head,
1629 const struct nfs_pgio_completion_ops *compl_ops,
1630 struct nfs_direct_req *dreq)
1631{ 1609{
1632 struct nfs_pageio_descriptor pgio; 1610 struct nfs_pageio_descriptor pgio;
1633 LIST_HEAD(failed);
1634 1611
1635 /* Resend all requests through the MDS */ 1612 /* Resend all requests through the MDS */
1636 nfs_pageio_init_read(&pgio, inode, true, compl_ops); 1613 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
1637 pgio.pg_dreq = dreq; 1614 return nfs_pageio_resend(&pgio, hdr);
1638 while (!list_empty(head)) {
1639 struct nfs_page *req = nfs_list_entry(head->next);
1640
1641 nfs_list_remove_request(req);
1642 if (!nfs_pageio_add_request(&pgio, req))
1643 nfs_list_add_request(req, &failed);
1644 }
1645 nfs_pageio_complete(&pgio);
1646
1647 if (!list_empty(&failed)) {
1648 list_move(&failed, head);
1649 return -EIO;
1650 }
1651 return 0;
1652} 1615}
1653EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1616EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1654 1617
1655static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) 1618static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
1656{ 1619{
1657 struct nfs_pgio_header *hdr = data->header;
1658
1659 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1620 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1660 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1621 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1661 PNFS_LAYOUTRET_ON_ERROR) { 1622 PNFS_LAYOUTRET_ON_ERROR) {
1662 pnfs_return_layout(hdr->inode); 1623 pnfs_return_layout(hdr->inode);
1663 } 1624 }
1664 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1625 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1665 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 1626 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
1666 &hdr->pages,
1667 hdr->completion_ops,
1668 hdr->dreq);
1669} 1627}
1670 1628
1671/* 1629/*
1672 * Called by non rpc-based layout drivers 1630 * Called by non rpc-based layout drivers
1673 */ 1631 */
1674void pnfs_ld_read_done(struct nfs_pgio_data *data) 1632void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
1675{ 1633{
1676 struct nfs_pgio_header *hdr = data->header; 1634 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
1677
1678 trace_nfs4_pnfs_read(data, hdr->pnfs_error);
1679 if (likely(!hdr->pnfs_error)) { 1635 if (likely(!hdr->pnfs_error)) {
1680 __nfs4_read_done_cb(data); 1636 __nfs4_read_done_cb(hdr);
1681 hdr->mds_ops->rpc_call_done(&data->task, data); 1637 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1682 } else 1638 } else
1683 pnfs_ld_handle_read_error(data); 1639 pnfs_ld_handle_read_error(hdr);
1684 hdr->mds_ops->rpc_release(data); 1640 hdr->mds_ops->rpc_release(hdr);
1685} 1641}
1686EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1642EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1687 1643
1688static void 1644static void
1689pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1645pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1690 struct nfs_pgio_data *data) 1646 struct nfs_pgio_header *hdr)
1691{ 1647{
1692 struct nfs_pgio_header *hdr = data->header;
1693
1694 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1648 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1695 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1649 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1696 nfs_pageio_reset_read_mds(desc); 1650 nfs_pageio_reset_read_mds(desc);
1697 desc->pg_recoalesce = 1; 1651 desc->pg_recoalesce = 1;
1698 } 1652 }
1699 nfs_pgio_data_release(data); 1653 nfs_pgio_data_destroy(hdr);
1700} 1654}
1701 1655
1702/* 1656/*
1703 * Call the appropriate parallel I/O subsystem read function. 1657 * Call the appropriate parallel I/O subsystem read function.
1704 */ 1658 */
1705static enum pnfs_try_status 1659static enum pnfs_try_status
1706pnfs_try_to_read_data(struct nfs_pgio_data *rdata, 1660pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
1707 const struct rpc_call_ops *call_ops, 1661 const struct rpc_call_ops *call_ops,
1708 struct pnfs_layout_segment *lseg) 1662 struct pnfs_layout_segment *lseg)
1709{ 1663{
1710 struct nfs_pgio_header *hdr = rdata->header;
1711 struct inode *inode = hdr->inode; 1664 struct inode *inode = hdr->inode;
1712 struct nfs_server *nfss = NFS_SERVER(inode); 1665 struct nfs_server *nfss = NFS_SERVER(inode);
1713 enum pnfs_try_status trypnfs; 1666 enum pnfs_try_status trypnfs;
@@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1715 hdr->mds_ops = call_ops; 1668 hdr->mds_ops = call_ops;
1716 1669
1717 dprintk("%s: Reading ino:%lu %u@%llu\n", 1670 dprintk("%s: Reading ino:%lu %u@%llu\n",
1718 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1671 __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
1719 1672
1720 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1673 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
1721 if (trypnfs != PNFS_NOT_ATTEMPTED) 1674 if (trypnfs != PNFS_NOT_ATTEMPTED)
1722 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1675 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1723 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1676 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1727static void 1680static void
1728pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 1681pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
1729{ 1682{
1730 struct nfs_pgio_data *data = hdr->data;
1731 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1683 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1732 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1684 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1733 enum pnfs_try_status trypnfs; 1685 enum pnfs_try_status trypnfs;
1734 1686
1735 desc->pg_lseg = NULL; 1687 desc->pg_lseg = NULL;
1736 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1688 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
1737 if (trypnfs == PNFS_NOT_ATTEMPTED) 1689 if (trypnfs == PNFS_NOT_ATTEMPTED)
1738 pnfs_read_through_mds(desc, data); 1690 pnfs_read_through_mds(desc, hdr);
1739 pnfs_put_lseg(lseg); 1691 pnfs_put_lseg(lseg);
1740} 1692}
1741 1693
1742static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1694static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1743{ 1695{
1744 pnfs_put_lseg(hdr->lseg); 1696 pnfs_put_lseg(hdr->lseg);
1745 nfs_rw_header_free(hdr); 1697 nfs_pgio_header_free(hdr);
1746} 1698}
1747EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1699EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1748 1700
1749int 1701int
1750pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1702pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1751{ 1703{
1752 struct nfs_rw_header *rhdr;
1753 struct nfs_pgio_header *hdr; 1704 struct nfs_pgio_header *hdr;
1754 int ret; 1705 int ret;
1755 1706
1756 rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1707 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1757 if (!rhdr) { 1708 if (!hdr) {
1758 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1709 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1759 ret = -ENOMEM; 1710 ret = -ENOMEM;
1760 pnfs_put_lseg(desc->pg_lseg); 1711 pnfs_put_lseg(desc->pg_lseg);
1761 desc->pg_lseg = NULL; 1712 desc->pg_lseg = NULL;
1762 return ret; 1713 return ret;
1763 } 1714 }
1764 hdr = &rhdr->header;
1765 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1715 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1766 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1716 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1767 atomic_inc(&hdr->refcnt);
1768 ret = nfs_generic_pgio(desc, hdr); 1717 ret = nfs_generic_pgio(desc, hdr);
1769 if (ret != 0) { 1718 if (ret != 0) {
1770 pnfs_put_lseg(desc->pg_lseg); 1719 pnfs_put_lseg(desc->pg_lseg);
1771 desc->pg_lseg = NULL; 1720 desc->pg_lseg = NULL;
1772 } else 1721 } else
1773 pnfs_do_read(desc, hdr); 1722 pnfs_do_read(desc, hdr);
1774 if (atomic_dec_and_test(&hdr->refcnt))
1775 hdr->completion_ops->completion(hdr);
1776 return ret; 1723 return ret;
1777} 1724}
1778EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1725EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1820EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1767EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1821 1768
1822void 1769void
1823pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) 1770pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
1824{ 1771{
1825 struct nfs_pgio_header *hdr = wdata->header;
1826 struct inode *inode = hdr->inode; 1772 struct inode *inode = hdr->inode;
1827 struct nfs_inode *nfsi = NFS_I(inode); 1773 struct nfs_inode *nfsi = NFS_I(inode);
1828 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1774 loff_t end_pos = hdr->mds_offset + hdr->res.count;
1829 bool mark_as_dirty = false; 1775 bool mark_as_dirty = false;
1830 1776
1831 spin_lock(&inode->i_lock); 1777 spin_lock(&inode->i_lock);
@@ -1885,7 +1831,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1885 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 1831 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
1886 if (!sync) 1832 if (!sync)
1887 goto out; 1833 goto out;
1888 status = wait_on_bit_lock(&nfsi->flags, 1834 status = wait_on_bit_lock_action(&nfsi->flags,
1889 NFS_INO_LAYOUTCOMMITTING, 1835 NFS_INO_LAYOUTCOMMITTING,
1890 nfs_wait_bit_killable, 1836 nfs_wait_bit_killable,
1891 TASK_KILLABLE); 1837 TASK_KILLABLE);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..aca3dff5dae6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
32 32
33#include <linux/nfs_fs.h> 33#include <linux/nfs_fs.h>
34#include <linux/nfs_page.h> 34#include <linux/nfs_page.h>
35#include <linux/workqueue.h>
35 36
36enum { 37enum {
37 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 38 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@@ -46,6 +47,7 @@ struct pnfs_layout_segment {
46 atomic_t pls_refcount; 47 atomic_t pls_refcount;
47 unsigned long pls_flags; 48 unsigned long pls_flags;
48 struct pnfs_layout_hdr *pls_layout; 49 struct pnfs_layout_hdr *pls_layout;
50 struct work_struct pls_work;
49}; 51};
50 52
51enum pnfs_try_status { 53enum pnfs_try_status {
@@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
104 int max); 106 int max);
105 void (*recover_commit_reqs) (struct list_head *list, 107 void (*recover_commit_reqs) (struct list_head *list,
106 struct nfs_commit_info *cinfo); 108 struct nfs_commit_info *cinfo);
109 struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
110 struct page *page);
107 int (*commit_pagelist)(struct inode *inode, 111 int (*commit_pagelist)(struct inode *inode,
108 struct list_head *mds_pages, 112 struct list_head *mds_pages,
109 int how, 113 int how,
@@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 117 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 118 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
115 */ 119 */
116 enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); 120 enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
117 enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); 121 enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
118 122
119 void (*free_deviceid_node) (struct nfs4_deviceid_node *); 123 void (*free_deviceid_node) (struct nfs4_deviceid_node *);
120 124
@@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
179/* pnfs.c */ 183/* pnfs.c */
180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 184void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
181void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 185void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
186void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
182 187
183void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 188void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
184void unset_pnfs_layoutdriver(struct nfs_server *); 189void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
213void pnfs_roc_release(struct inode *ino); 218void pnfs_roc_release(struct inode *ino);
214void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 219void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
215bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 220bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
216void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); 221void pnfs_set_layoutcommit(struct nfs_pgio_header *);
217void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 222void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
218int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 223int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
219int _pnfs_return_layout(struct inode *); 224int _pnfs_return_layout(struct inode *);
220int pnfs_commit_and_return_layout(struct inode *); 225int pnfs_commit_and_return_layout(struct inode *);
221void pnfs_ld_write_done(struct nfs_pgio_data *); 226void pnfs_ld_write_done(struct nfs_pgio_header *);
222void pnfs_ld_read_done(struct nfs_pgio_data *); 227void pnfs_ld_read_done(struct nfs_pgio_header *);
223struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 228struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
224 struct nfs_open_context *ctx, 229 struct nfs_open_context *ctx,
225 loff_t pos, 230 loff_t pos,
@@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
228 gfp_t gfp_flags); 233 gfp_t gfp_flags);
229 234
230void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 235void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
231int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, 236int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
232 const struct nfs_pgio_completion_ops *compl_ops, 237int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
233 struct nfs_direct_req *dreq);
234int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
235 const struct nfs_pgio_completion_ops *compl_ops,
236 struct nfs_direct_req *dreq);
237struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); 238struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
238 239
239/* nfs4_deviceid_flags */ 240/* nfs4_deviceid_flags */
@@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
345 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 346 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
346} 347}
347 348
349static inline struct nfs_page *
350pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
351 struct page *page)
352{
353 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
354
355 if (ld == NULL || ld->search_commit_reqs == NULL)
356 return NULL;
357 return ld->search_commit_reqs(cinfo, page);
358}
359
348/* Should the pNFS client commit and return the layout upon a setattr */ 360/* Should the pNFS client commit and return the layout upon a setattr */
349static inline bool 361static inline bool
350pnfs_ld_layoutret_on_setattr(struct inode *inode) 362pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
410{ 422{
411} 423}
412 424
425static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
426{
427}
428
413static inline int pnfs_return_layout(struct inode *ino) 429static inline int pnfs_return_layout(struct inode *ino)
414{ 430{
415 return 0; 431 return 0;
@@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
496{ 512{
497} 513}
498 514
515static inline struct nfs_page *
516pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
517 struct page *page)
518{
519 return NULL;
520}
521
499static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 522static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
500{ 523{
501 return 0; 524 return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
578 return 0; 578 return 0;
579} 579}
580 580
581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
582{ 582{
583 struct inode *inode = data->header->inode; 583 struct inode *inode = hdr->inode;
584 584
585 nfs_invalidate_atime(inode); 585 nfs_invalidate_atime(inode);
586 if (task->tk_status >= 0) { 586 if (task->tk_status >= 0) {
587 nfs_refresh_inode(inode, data->res.fattr); 587 nfs_refresh_inode(inode, hdr->res.fattr);
588 /* Emulate the eof flag, which isn't normally needed in NFSv2 588 /* Emulate the eof flag, which isn't normally needed in NFSv2
589 * as it is guaranteed to always return the file attributes 589 * as it is guaranteed to always return the file attributes
590 */ 590 */
591 if (data->args.offset + data->res.count >= data->res.fattr->size) 591 if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
592 data->res.eof = 1; 592 hdr->res.eof = 1;
593 } 593 }
594 return 0; 594 return 0;
595} 595}
596 596
597static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 597static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
598 struct rpc_message *msg)
598{ 599{
599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 600 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
600} 601}
601 602
602static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 603static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
604 struct nfs_pgio_header *hdr)
603{ 605{
604 rpc_call_start(task); 606 rpc_call_start(task);
605 return 0; 607 return 0;
606} 608}
607 609
608static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
609{ 611{
610 struct inode *inode = data->header->inode; 612 struct inode *inode = hdr->inode;
611 613
612 if (task->tk_status >= 0) 614 if (task->tk_status >= 0)
613 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 615 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
614 return 0; 616 return 0;
615} 617}
616 618
617static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 619static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
620 struct rpc_message *msg)
618{ 621{
619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 622 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
620 data->args.stable = NFS_FILE_SYNC; 623 hdr->args.stable = NFS_FILE_SYNC;
621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 624 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
622} 625}
623 626
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..beff2769c5c5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
33 33
34static struct kmem_cache *nfs_rdata_cachep; 34static struct kmem_cache *nfs_rdata_cachep;
35 35
36static struct nfs_rw_header *nfs_readhdr_alloc(void) 36static struct nfs_pgio_header *nfs_readhdr_alloc(void)
37{ 37{
38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39} 39}
40 40
41static void nfs_readhdr_free(struct nfs_rw_header *rhdr) 41static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
42{ 42{
43 kmem_cache_free(nfs_rdata_cachep, rhdr); 43 kmem_cache_free(nfs_rdata_cachep, rhdr);
44} 44}
@@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
115 115
116 unlock_page(req->wb_page); 116 unlock_page(req->wb_page);
117 } 117 }
118
119 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
120 req->wb_context->dentry->d_inode->i_sb->s_id,
121 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
122 req->wb_bytes,
123 (long long)req_offset(req));
124 nfs_release_request(req); 118 nfs_release_request(req);
125} 119}
126 120
@@ -172,14 +166,15 @@ out:
172 hdr->release(hdr); 166 hdr->release(hdr);
173} 167}
174 168
175static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, 169static void nfs_initiate_read(struct nfs_pgio_header *hdr,
170 struct rpc_message *msg,
176 struct rpc_task_setup *task_setup_data, int how) 171 struct rpc_task_setup *task_setup_data, int how)
177{ 172{
178 struct inode *inode = data->header->inode; 173 struct inode *inode = hdr->inode;
179 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 174 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
180 175
181 task_setup_data->flags |= swap_flags; 176 task_setup_data->flags |= swap_flags;
182 NFS_PROTO(inode)->read_setup(data, msg); 177 NFS_PROTO(inode)->read_setup(hdr, msg);
183} 178}
184 179
185static void 180static void
@@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
203 * This is the callback from RPC telling us whether a reply was 198 * This is the callback from RPC telling us whether a reply was
204 * received or some error occurred (timeout or socket shutdown). 199 * received or some error occurred (timeout or socket shutdown).
205 */ 200 */
206static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, 201static int nfs_readpage_done(struct rpc_task *task,
202 struct nfs_pgio_header *hdr,
207 struct inode *inode) 203 struct inode *inode)
208{ 204{
209 int status = NFS_PROTO(inode)->read_done(task, data); 205 int status = NFS_PROTO(inode)->read_done(task, hdr);
210 if (status != 0) 206 if (status != 0)
211 return status; 207 return status;
212 208
213 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 209 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
214 210
215 if (task->tk_status == -ESTALE) { 211 if (task->tk_status == -ESTALE) {
216 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 212 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
219 return 0; 215 return 0;
220} 216}
221 217
222static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) 218static void nfs_readpage_retry(struct rpc_task *task,
219 struct nfs_pgio_header *hdr)
223{ 220{
224 struct nfs_pgio_args *argp = &data->args; 221 struct nfs_pgio_args *argp = &hdr->args;
225 struct nfs_pgio_res *resp = &data->res; 222 struct nfs_pgio_res *resp = &hdr->res;
226 223
227 /* This is a short read! */ 224 /* This is a short read! */
228 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 225 nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
229 /* Has the server at least made some progress? */ 226 /* Has the server at least made some progress? */
230 if (resp->count == 0) { 227 if (resp->count == 0) {
231 nfs_set_pgio_error(data->header, -EIO, argp->offset); 228 nfs_set_pgio_error(hdr, -EIO, argp->offset);
232 return; 229 return;
233 } 230 }
234 /* Yes, so retry the read at the end of the data */ 231 /* Yes, so retry the read at the end of the hdr */
235 data->mds_offset += resp->count; 232 hdr->mds_offset += resp->count;
236 argp->offset += resp->count; 233 argp->offset += resp->count;
237 argp->pgbase += resp->count; 234 argp->pgbase += resp->count;
238 argp->count -= resp->count; 235 argp->count -= resp->count;
239 rpc_restart_call_prepare(task); 236 rpc_restart_call_prepare(task);
240} 237}
241 238
242static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) 239static void nfs_readpage_result(struct rpc_task *task,
240 struct nfs_pgio_header *hdr)
243{ 241{
244 struct nfs_pgio_header *hdr = data->header; 242 if (hdr->res.eof) {
245
246 if (data->res.eof) {
247 loff_t bound; 243 loff_t bound;
248 244
249 bound = data->args.offset + data->res.count; 245 bound = hdr->args.offset + hdr->res.count;
250 spin_lock(&hdr->lock); 246 spin_lock(&hdr->lock);
251 if (bound < hdr->io_start + hdr->good_bytes) { 247 if (bound < hdr->io_start + hdr->good_bytes) {
252 set_bit(NFS_IOHDR_EOF, &hdr->flags); 248 set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
254 hdr->good_bytes = bound - hdr->io_start; 250 hdr->good_bytes = bound - hdr->io_start;
255 } 251 }
256 spin_unlock(&hdr->lock); 252 spin_unlock(&hdr->lock);
257 } else if (data->res.count != data->args.count) 253 } else if (hdr->res.count != hdr->args.count)
258 nfs_readpage_retry(task, data); 254 nfs_readpage_retry(task, hdr);
259} 255}
260 256
261/* 257/*
@@ -404,7 +400,7 @@ out:
404int __init nfs_init_readpagecache(void) 400int __init nfs_init_readpagecache(void)
405{ 401{
406 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 402 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
407 sizeof(struct nfs_rw_header), 403 sizeof(struct nfs_pgio_header),
408 0, SLAB_HWCACHE_ALIGN, 404 0, SLAB_HWCACHE_ALIGN,
409 NULL); 405 NULL);
410 if (nfs_rdata_cachep == NULL) 406 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 084af1060d79..e4499d5b51e8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
1027 rpc_authflavor_t flavor) 1027 rpc_authflavor_t flavor)
1028{ 1028{
1029 unsigned int i; 1029 unsigned int i;
1030 unsigned int max_flavor_len = (sizeof(auth_info->flavors) / 1030 unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
1031 sizeof(auth_info->flavors[0]));
1032 1031
1033 /* make sure this flavor isn't already in the list */ 1032 /* make sure this flavor isn't already in the list */
1034 for (i = 0; i < auth_info->flavor_len; i++) { 1033 for (i = 0; i < auth_info->flavor_len; i++) {
@@ -2180,7 +2179,7 @@ out_no_address:
2180 return -EINVAL; 2179 return -EINVAL;
2181} 2180}
2182 2181
2183#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ 2182#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2184 | NFS_MOUNT_SECURE \ 2183 | NFS_MOUNT_SECURE \
2185 | NFS_MOUNT_TCP \ 2184 | NFS_MOUNT_TCP \
2186 | NFS_MOUNT_VER3 \ 2185 | NFS_MOUNT_VER3 \
@@ -2188,15 +2187,16 @@ out_no_address:
2188 | NFS_MOUNT_NONLM \ 2187 | NFS_MOUNT_NONLM \
2189 | NFS_MOUNT_BROKEN_SUID \ 2188 | NFS_MOUNT_BROKEN_SUID \
2190 | NFS_MOUNT_STRICTLOCK \ 2189 | NFS_MOUNT_STRICTLOCK \
2191 | NFS_MOUNT_UNSHARED \
2192 | NFS_MOUNT_NORESVPORT \
2193 | NFS_MOUNT_LEGACY_INTERFACE) 2190 | NFS_MOUNT_LEGACY_INTERFACE)
2194 2191
2192#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
2193 ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
2194
2195static int 2195static int
2196nfs_compare_remount_data(struct nfs_server *nfss, 2196nfs_compare_remount_data(struct nfs_server *nfss,
2197 struct nfs_parsed_mount_data *data) 2197 struct nfs_parsed_mount_data *data)
2198{ 2198{
2199 if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || 2199 if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
2200 data->rsize != nfss->rsize || 2200 data->rsize != nfss->rsize ||
2201 data->wsize != nfss->wsize || 2201 data->wsize != nfss->wsize ||
2202 data->version != nfss->nfs_client->rpc_ops->version || 2202 data->version != nfss->nfs_client->rpc_ops->version ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5e2f10304548..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops; 48static const struct nfs_rw_ops nfs_rw_write_ops;
49static void nfs_clear_request_commit(struct nfs_page *req); 49static void nfs_clear_request_commit(struct nfs_page *req);
50static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
51 struct inode *inode);
50 52
51static struct kmem_cache *nfs_wdata_cachep; 53static struct kmem_cache *nfs_wdata_cachep;
52static mempool_t *nfs_wdata_mempool; 54static mempool_t *nfs_wdata_mempool;
@@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
71} 73}
72EXPORT_SYMBOL_GPL(nfs_commit_free); 74EXPORT_SYMBOL_GPL(nfs_commit_free);
73 75
74static struct nfs_rw_header *nfs_writehdr_alloc(void) 76static struct nfs_pgio_header *nfs_writehdr_alloc(void)
75{ 77{
76 struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 78 struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
77 79
78 if (p) 80 if (p)
79 memset(p, 0, sizeof(*p)); 81 memset(p, 0, sizeof(*p));
80 return p; 82 return p;
81} 83}
82 84
83static void nfs_writehdr_free(struct nfs_rw_header *whdr) 85static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
84{ 86{
85 mempool_free(whdr, nfs_wdata_mempool); 87 mempool_free(hdr, nfs_wdata_mempool);
86} 88}
87 89
88static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 90static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
93} 95}
94 96
95/* 97/*
98 * nfs_page_search_commits_for_head_request_locked
99 *
100 * Search through commit lists on @inode for the head request for @page.
101 * Must be called while holding the inode (which is cinfo) lock.
102 *
103 * Returns the head request if found, or NULL if not found.
104 */
105static struct nfs_page *
106nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
107 struct page *page)
108{
109 struct nfs_page *freq, *t;
110 struct nfs_commit_info cinfo;
111 struct inode *inode = &nfsi->vfs_inode;
112
113 nfs_init_cinfo_from_inode(&cinfo, inode);
114
115 /* search through pnfs commit lists */
116 freq = pnfs_search_commit_reqs(inode, &cinfo, page);
117 if (freq)
118 return freq->wb_head;
119
120 /* Linearly search the commit list for the correct request */
121 list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
122 if (freq->wb_page == page)
123 return freq->wb_head;
124 }
125
126 return NULL;
127}
128
129/*
96 * nfs_page_find_head_request_locked - find head request associated with @page 130 * nfs_page_find_head_request_locked - find head request associated with @page
97 * 131 *
98 * must be called while holding the inode lock. 132 * must be called while holding the inode lock.
@@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
106 140
107 if (PagePrivate(page)) 141 if (PagePrivate(page))
108 req = (struct nfs_page *)page_private(page); 142 req = (struct nfs_page *)page_private(page);
109 else if (unlikely(PageSwapCache(page))) { 143 else if (unlikely(PageSwapCache(page)))
110 struct nfs_page *freq, *t; 144 req = nfs_page_search_commits_for_head_request_locked(nfsi,
111 145 page);
112 /* Linearly search the commit list for the correct req */
113 list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
114 if (freq->wb_page == page) {
115 req = freq->wb_head;
116 break;
117 }
118 }
119 }
120 146
121 if (req) { 147 if (req) {
122 WARN_ON_ONCE(req->wb_head != req); 148 WARN_ON_ONCE(req->wb_head != req);
123
124 kref_get(&req->wb_kref); 149 kref_get(&req->wb_kref);
125 } 150 }
126 151
@@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
216 unsigned int pos = 0; 241 unsigned int pos = 0;
217 unsigned int len = nfs_page_length(req->wb_page); 242 unsigned int len = nfs_page_length(req->wb_page);
218 243
219 nfs_page_group_lock(req); 244 nfs_page_group_lock(req, false);
220 245
221 do { 246 do {
222 tmp = nfs_page_group_search_locked(req->wb_head, pos); 247 tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
379 subreq->wb_head = subreq; 404 subreq->wb_head = subreq;
380 subreq->wb_this_page = subreq; 405 subreq->wb_this_page = subreq;
381 406
382 nfs_clear_request_commit(subreq);
383
384 /* subreq is now totally disconnected from page group or any 407 /* subreq is now totally disconnected from page group or any
385 * write / commit lists. last chance to wake any waiters */ 408 * write / commit lists. last chance to wake any waiters */
386 nfs_unlock_request(subreq); 409 nfs_unlock_request(subreq);
@@ -455,8 +478,23 @@ try_again:
455 return NULL; 478 return NULL;
456 } 479 }
457 480
481 /* holding inode lock, so always make a non-blocking call to try the
482 * page group lock */
483 ret = nfs_page_group_lock(head, true);
484 if (ret < 0) {
485 spin_unlock(&inode->i_lock);
486
487 if (!nonblock && ret == -EAGAIN) {
488 nfs_page_group_lock_wait(head);
489 nfs_release_request(head);
490 goto try_again;
491 }
492
493 nfs_release_request(head);
494 return ERR_PTR(ret);
495 }
496
458 /* lock each request in the page group */ 497 /* lock each request in the page group */
459 nfs_page_group_lock(head);
460 subreq = head; 498 subreq = head;
461 do { 499 do {
462 /* 500 /*
@@ -488,7 +526,7 @@ try_again:
488 * Commit list removal accounting is done after locks are dropped */ 526 * Commit list removal accounting is done after locks are dropped */
489 subreq = head; 527 subreq = head;
490 do { 528 do {
491 nfs_list_remove_request(subreq); 529 nfs_clear_request_commit(subreq);
492 subreq = subreq->wb_this_page; 530 subreq = subreq->wb_this_page;
493 } while (subreq != head); 531 } while (subreq != head);
494 532
@@ -518,15 +556,11 @@ try_again:
518 556
519 nfs_page_group_unlock(head); 557 nfs_page_group_unlock(head);
520 558
521 /* drop lock to clear_request_commit the head req and clean up 559 /* drop lock to clean uprequests on destroy list */
522 * requests on destroy list */
523 spin_unlock(&inode->i_lock); 560 spin_unlock(&inode->i_lock);
524 561
525 nfs_destroy_unlinked_subrequests(destroy_list, head); 562 nfs_destroy_unlinked_subrequests(destroy_list, head);
526 563
527 /* clean up commit list state */
528 nfs_clear_request_commit(head);
529
530 /* still holds ref on head from nfs_page_find_head_request_locked 564 /* still holds ref on head from nfs_page_find_head_request_locked
531 * and still has lock on head from lock loop */ 565 * and still has lock on head from lock loop */
532 return head; 566 return head;
@@ -623,7 +657,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
623 int err; 657 int err;
624 658
625 /* Stop dirtying of new pages while we sync */ 659 /* Stop dirtying of new pages while we sync */
626 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 660 err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
627 nfs_wait_bit_killable, TASK_KILLABLE); 661 nfs_wait_bit_killable, TASK_KILLABLE);
628 if (err) 662 if (err)
629 goto out_err; 663 goto out_err;
@@ -705,6 +739,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
705 739
706 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) 740 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
707 nfs_release_request(req); 741 nfs_release_request(req);
742 else
743 WARN_ON_ONCE(1);
708} 744}
709 745
710static void 746static void
@@ -808,6 +844,7 @@ nfs_clear_page_commit(struct page *page)
808 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); 844 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
809} 845}
810 846
847/* Called holding inode (/cinfo) lock */
811static void 848static void
812nfs_clear_request_commit(struct nfs_page *req) 849nfs_clear_request_commit(struct nfs_page *req)
813{ 850{
@@ -817,20 +854,17 @@ nfs_clear_request_commit(struct nfs_page *req)
817 854
818 nfs_init_cinfo_from_inode(&cinfo, inode); 855 nfs_init_cinfo_from_inode(&cinfo, inode);
819 if (!pnfs_clear_request_commit(req, &cinfo)) { 856 if (!pnfs_clear_request_commit(req, &cinfo)) {
820 spin_lock(cinfo.lock);
821 nfs_request_remove_commit_list(req, &cinfo); 857 nfs_request_remove_commit_list(req, &cinfo);
822 spin_unlock(cinfo.lock);
823 } 858 }
824 nfs_clear_page_commit(req->wb_page); 859 nfs_clear_page_commit(req->wb_page);
825 } 860 }
826} 861}
827 862
828static inline 863int nfs_write_need_commit(struct nfs_pgio_header *hdr)
829int nfs_write_need_commit(struct nfs_pgio_data *data)
830{ 864{
831 if (data->verf.committed == NFS_DATA_SYNC) 865 if (hdr->verf.committed == NFS_DATA_SYNC)
832 return data->header->lseg == NULL; 866 return hdr->lseg == NULL;
833 return data->verf.committed != NFS_FILE_SYNC; 867 return hdr->verf.committed != NFS_FILE_SYNC;
834} 868}
835 869
836#else 870#else
@@ -856,8 +890,7 @@ nfs_clear_request_commit(struct nfs_page *req)
856{ 890{
857} 891}
858 892
859static inline 893int nfs_write_need_commit(struct nfs_pgio_header *hdr)
860int nfs_write_need_commit(struct nfs_pgio_data *data)
861{ 894{
862 return 0; 895 return 0;
863} 896}
@@ -883,11 +916,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
883 nfs_context_set_write_error(req->wb_context, hdr->error); 916 nfs_context_set_write_error(req->wb_context, hdr->error);
884 goto remove_req; 917 goto remove_req;
885 } 918 }
886 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 919 if (nfs_write_need_commit(hdr)) {
887 nfs_mark_request_dirty(req);
888 goto next;
889 }
890 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
891 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 920 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
892 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 921 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
893 goto next; 922 goto next;
@@ -1038,9 +1067,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1038 else 1067 else
1039 req->wb_bytes = rqend - req->wb_offset; 1068 req->wb_bytes = rqend - req->wb_offset;
1040out_unlock: 1069out_unlock:
1041 spin_unlock(&inode->i_lock);
1042 if (req) 1070 if (req)
1043 nfs_clear_request_commit(req); 1071 nfs_clear_request_commit(req);
1072 spin_unlock(&inode->i_lock);
1044 return req; 1073 return req;
1045out_flushme: 1074out_flushme:
1046 spin_unlock(&inode->i_lock); 1075 spin_unlock(&inode->i_lock);
@@ -1241,17 +1270,18 @@ static int flush_task_priority(int how)
1241 return RPC_PRIORITY_NORMAL; 1270 return RPC_PRIORITY_NORMAL;
1242} 1271}
1243 1272
1244static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, 1273static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1274 struct rpc_message *msg,
1245 struct rpc_task_setup *task_setup_data, int how) 1275 struct rpc_task_setup *task_setup_data, int how)
1246{ 1276{
1247 struct inode *inode = data->header->inode; 1277 struct inode *inode = hdr->inode;
1248 int priority = flush_task_priority(how); 1278 int priority = flush_task_priority(how);
1249 1279
1250 task_setup_data->priority = priority; 1280 task_setup_data->priority = priority;
1251 NFS_PROTO(inode)->write_setup(data, msg); 1281 NFS_PROTO(inode)->write_setup(hdr, msg);
1252 1282
1253 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1283 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1254 &task_setup_data->rpc_client, msg, data); 1284 &task_setup_data->rpc_client, msg, hdr);
1255} 1285}
1256 1286
1257/* If a nfs_flush_* function fails, it should remove reqs from @head and 1287/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1313,21 +1343,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1313 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1343 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1314} 1344}
1315 1345
1316static void nfs_writeback_release_common(struct nfs_pgio_data *data) 1346static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
1317{ 1347{
1318 struct nfs_pgio_header *hdr = data->header; 1348 /* do nothing! */
1319 int status = data->task.tk_status;
1320
1321 if ((status >= 0) && nfs_write_need_commit(data)) {
1322 spin_lock(&hdr->lock);
1323 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1324 ; /* Do nothing */
1325 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1326 memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
1327 else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
1328 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1329 spin_unlock(&hdr->lock);
1330 }
1331} 1349}
1332 1350
1333/* 1351/*
@@ -1358,7 +1376,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
1358/* 1376/*
1359 * This function is called when the WRITE call is complete. 1377 * This function is called when the WRITE call is complete.
1360 */ 1378 */
1361static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, 1379static int nfs_writeback_done(struct rpc_task *task,
1380 struct nfs_pgio_header *hdr,
1362 struct inode *inode) 1381 struct inode *inode)
1363{ 1382{
1364 int status; 1383 int status;
@@ -1370,13 +1389,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1370 * another writer had changed the file, but some applications 1389 * another writer had changed the file, but some applications
1371 * depend on tighter cache coherency when writing. 1390 * depend on tighter cache coherency when writing.
1372 */ 1391 */
1373 status = NFS_PROTO(inode)->write_done(task, data); 1392 status = NFS_PROTO(inode)->write_done(task, hdr);
1374 if (status != 0) 1393 if (status != 0)
1375 return status; 1394 return status;
1376 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); 1395 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1377 1396
1378#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 1397#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1379 if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { 1398 if (hdr->res.verf->committed < hdr->args.stable &&
1399 task->tk_status >= 0) {
1380 /* We tried a write call, but the server did not 1400 /* We tried a write call, but the server did not
1381 * commit data to stable storage even though we 1401 * commit data to stable storage even though we
1382 * requested it. 1402 * requested it.
@@ -1392,7 +1412,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1392 dprintk("NFS: faulty NFS server %s:" 1412 dprintk("NFS: faulty NFS server %s:"
1393 " (committed = %d) != (stable = %d)\n", 1413 " (committed = %d) != (stable = %d)\n",
1394 NFS_SERVER(inode)->nfs_client->cl_hostname, 1414 NFS_SERVER(inode)->nfs_client->cl_hostname,
1395 data->res.verf->committed, data->args.stable); 1415 hdr->res.verf->committed, hdr->args.stable);
1396 complain = jiffies + 300 * HZ; 1416 complain = jiffies + 300 * HZ;
1397 } 1417 }
1398 } 1418 }
@@ -1407,16 +1427,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1407/* 1427/*
1408 * This function is called when the WRITE call is complete. 1428 * This function is called when the WRITE call is complete.
1409 */ 1429 */
1410static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) 1430static void nfs_writeback_result(struct rpc_task *task,
1431 struct nfs_pgio_header *hdr)
1411{ 1432{
1412 struct nfs_pgio_args *argp = &data->args; 1433 struct nfs_pgio_args *argp = &hdr->args;
1413 struct nfs_pgio_res *resp = &data->res; 1434 struct nfs_pgio_res *resp = &hdr->res;
1414 1435
1415 if (resp->count < argp->count) { 1436 if (resp->count < argp->count) {
1416 static unsigned long complain; 1437 static unsigned long complain;
1417 1438
1418 /* This a short write! */ 1439 /* This a short write! */
1419 nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); 1440 nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1420 1441
1421 /* Has the server at least made some progress? */ 1442 /* Has the server at least made some progress? */
1422 if (resp->count == 0) { 1443 if (resp->count == 0) {
@@ -1426,14 +1447,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
1426 argp->count); 1447 argp->count);
1427 complain = jiffies + 300 * HZ; 1448 complain = jiffies + 300 * HZ;
1428 } 1449 }
1429 nfs_set_pgio_error(data->header, -EIO, argp->offset); 1450 nfs_set_pgio_error(hdr, -EIO, argp->offset);
1430 task->tk_status = -EIO; 1451 task->tk_status = -EIO;
1431 return; 1452 return;
1432 } 1453 }
1433 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1454 /* Was this an NFSv2 write or an NFSv3 stable write? */
1434 if (resp->verf->committed != NFS_UNSTABLE) { 1455 if (resp->verf->committed != NFS_UNSTABLE) {
1435 /* Resend from where the server left off */ 1456 /* Resend from where the server left off */
1436 data->mds_offset += resp->count; 1457 hdr->mds_offset += resp->count;
1437 argp->offset += resp->count; 1458 argp->offset += resp->count;
1438 argp->pgbase += resp->count; 1459 argp->pgbase += resp->count;
1439 argp->count -= resp->count; 1460 argp->count -= resp->count;
@@ -1703,7 +1724,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1703 return error; 1724 return error;
1704 if (!may_wait) 1725 if (!may_wait)
1705 goto out_mark_dirty; 1726 goto out_mark_dirty;
1706 error = wait_on_bit(&NFS_I(inode)->flags, 1727 error = wait_on_bit_action(&NFS_I(inode)->flags,
1707 NFS_INO_COMMIT, 1728 NFS_INO_COMMIT,
1708 nfs_wait_bit_killable, 1729 nfs_wait_bit_killable,
1709 TASK_KILLABLE); 1730 TASK_KILLABLE);
@@ -1884,7 +1905,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1884int __init nfs_init_writepagecache(void) 1905int __init nfs_init_writepagecache(void)
1885{ 1906{
1886 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1907 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1887 sizeof(struct nfs_rw_header), 1908 sizeof(struct nfs_pgio_header),
1888 0, SLAB_HWCACHE_ALIGN, 1909 0, SLAB_HWCACHE_ALIGN,
1889 NULL); 1910 NULL);
1890 if (nfs_wdata_cachep == NULL) 1911 if (nfs_wdata_cachep == NULL)