aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c90
-rw-r--r--fs/nfs/client.c30
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c39
-rw-r--r--fs/nfs/direct.c744
-rw-r--r--fs/nfs/file.c7
-rw-r--r--fs/nfs/fscache.c15
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c85
-rw-r--r--fs/nfs/inode.c114
-rw-r--r--fs/nfs/internal.h121
-rw-r--r--fs/nfs/namespace.c103
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3proc.c27
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4filelayout.c693
-rw-r--r--fs/nfs/nfs4filelayout.h63
-rw-r--r--fs/nfs/nfs4filelayoutdev.c77
-rw-r--r--fs/nfs/nfs4namespace.c53
-rw-r--r--fs/nfs/nfs4proc.c177
-rw-r--r--fs/nfs/nfs4xdr.c102
-rw-r--r--fs/nfs/objlayout/objio_osd.c18
-rw-r--r--fs/nfs/objlayout/objlayout.c19
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.c258
-rw-r--r--fs/nfs/pnfs.h106
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c435
-rw-r--r--fs/nfs/super.c756
-rw-r--r--fs/nfs/write.c809
-rw-r--r--include/linux/nfs_fs.h26
-rw-r--r--include/linux/nfs_page.h20
-rw-r--r--include/linux/nfs_xdr.h179
-rw-r--r--net/sunrpc/rpc_pipe.c2
38 files changed, 2871 insertions, 2543 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a0e6c599147..f90f4f5cd421 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFS_V2
33 bool "NFS client support for NFS version 2"
34 depends on NFS_FS
35 default y
36 help
37 This option enables support for version 2 of the NFS protocol
38 (RFC 1094) in the kernel's NFS client.
39
40 If unsure, say Y.
41
32config NFS_V3 42config NFS_V3
33 bool "NFS client support for NFS version 3" 43 bool "NFS client support for NFS version 3"
34 depends on NFS_FS 44 depends on NFS_FS
45 default y
35 help 46 help
36 This option enables support for version 3 of the NFS protocol 47 This option enables support for version 3 of the NFS protocol
37 (RFC 1813) in the kernel's NFS client. 48 (RFC 1813) in the kernel's NFS client.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d0abb3..7ddd45d9f170 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o \ 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o 10 dns_resolve.o cache_lib.o
11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 13nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 14nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
14nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 15nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..7ae8a608956f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
187 struct parallel_io *par = bio->bi_private; 187 struct parallel_io *par = bio->bi_private;
188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
190 struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
191 190
192 do { 191 do {
193 struct page *page = bvec->bv_page; 192 struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
198 SetPageUptodate(page); 197 SetPageUptodate(page);
199 } while (bvec >= bio->bi_io_vec); 198 } while (bvec >= bio->bi_io_vec);
200 if (!uptodate) { 199 if (!uptodate) {
201 if (!rdata->pnfs_error) 200 struct nfs_read_data *rdata = par->data;
202 rdata->pnfs_error = -EIO; 201 struct nfs_pgio_header *header = rdata->header;
203 pnfs_set_lo_fail(rdata->lseg); 202
203 if (!header->pnfs_error)
204 header->pnfs_error = -EIO;
205 pnfs_set_lo_fail(header->lseg);
204 } 206 }
205 bio_put(bio); 207 bio_put(bio);
206 put_parallel(par); 208 put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
221{ 223{
222 struct nfs_read_data *rdata = data; 224 struct nfs_read_data *rdata = data;
223 225
224 rdata->task.tk_status = rdata->pnfs_error; 226 rdata->task.tk_status = rdata->header->pnfs_error;
225 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 227 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
226 schedule_work(&rdata->task.u.tk_work); 228 schedule_work(&rdata->task.u.tk_work);
227} 229}
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
229static enum pnfs_try_status 231static enum pnfs_try_status
230bl_read_pagelist(struct nfs_read_data *rdata) 232bl_read_pagelist(struct nfs_read_data *rdata)
231{ 233{
234 struct nfs_pgio_header *header = rdata->header;
232 int i, hole; 235 int i, hole;
233 struct bio *bio = NULL; 236 struct bio *bio = NULL;
234 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 237 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
239 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 242 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
240 243
241 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 244 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
242 rdata->npages, f_offset, (unsigned int)rdata->args.count); 245 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
243 246
244 par = alloc_parallel(rdata); 247 par = alloc_parallel(rdata);
245 if (!par) 248 if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
249 252
250 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 253 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251 /* Code assumes extents are page-aligned */ 254 /* Code assumes extents are page-aligned */
252 for (i = pg_index; i < rdata->npages; i++) { 255 for (i = pg_index; i < rdata->pages.npages; i++) {
253 if (!extent_length) { 256 if (!extent_length) {
254 /* We've used up the previous extent */ 257 /* We've used up the previous extent */
255 bl_put_extent(be); 258 bl_put_extent(be);
256 bl_put_extent(cow_read); 259 bl_put_extent(cow_read);
257 bio = bl_submit_bio(READ, bio); 260 bio = bl_submit_bio(READ, bio);
258 /* Get the next one */ 261 /* Get the next one */
259 be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), 262 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
260 isect, &cow_read); 263 isect, &cow_read);
261 if (!be) { 264 if (!be) {
262 rdata->pnfs_error = -EIO; 265 header->pnfs_error = -EIO;
263 goto out; 266 goto out;
264 } 267 }
265 extent_length = be->be_length - 268 extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
282 struct pnfs_block_extent *be_read; 285 struct pnfs_block_extent *be_read;
283 286
284 be_read = (hole && cow_read) ? cow_read : be; 287 be_read = (hole && cow_read) ? cow_read : be;
285 bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, 288 bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
289 READ,
286 isect, pages[i], be_read, 290 isect, pages[i], be_read,
287 bl_end_io_read, par); 291 bl_end_io_read, par);
288 if (IS_ERR(bio)) { 292 if (IS_ERR(bio)) {
289 rdata->pnfs_error = PTR_ERR(bio); 293 header->pnfs_error = PTR_ERR(bio);
290 bio = NULL; 294 bio = NULL;
291 goto out; 295 goto out;
292 } 296 }
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
294 isect += PAGE_CACHE_SECTORS; 298 isect += PAGE_CACHE_SECTORS;
295 extent_length -= PAGE_CACHE_SECTORS; 299 extent_length -= PAGE_CACHE_SECTORS;
296 } 300 }
297 if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { 301 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
298 rdata->res.eof = 1; 302 rdata->res.eof = 1;
299 rdata->res.count = rdata->inode->i_size - f_offset; 303 rdata->res.count = header->inode->i_size - f_offset;
300 } else { 304 } else {
301 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; 305 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
302 } 306 }
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
345 struct parallel_io *par = bio->bi_private; 349 struct parallel_io *par = bio->bi_private;
346 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 350 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
347 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 351 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
348 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
349 352
350 do { 353 do {
351 struct page *page = bvec->bv_page; 354 struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
358 } while (bvec >= bio->bi_io_vec); 361 } while (bvec >= bio->bi_io_vec);
359 362
360 if (unlikely(!uptodate)) { 363 if (unlikely(!uptodate)) {
361 if (!wdata->pnfs_error) 364 struct nfs_write_data *data = par->data;
362 wdata->pnfs_error = -EIO; 365 struct nfs_pgio_header *header = data->header;
363 pnfs_set_lo_fail(wdata->lseg); 366
367 if (!header->pnfs_error)
368 header->pnfs_error = -EIO;
369 pnfs_set_lo_fail(header->lseg);
364 } 370 }
365 bio_put(bio); 371 bio_put(bio);
366 put_parallel(par); 372 put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
370{ 376{
371 struct parallel_io *par = bio->bi_private; 377 struct parallel_io *par = bio->bi_private;
372 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 378 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
373 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; 379 struct nfs_write_data *data = par->data;
380 struct nfs_pgio_header *header = data->header;
374 381
375 if (!uptodate) { 382 if (!uptodate) {
376 if (!wdata->pnfs_error) 383 if (!header->pnfs_error)
377 wdata->pnfs_error = -EIO; 384 header->pnfs_error = -EIO;
378 pnfs_set_lo_fail(wdata->lseg); 385 pnfs_set_lo_fail(header->lseg);
379 } 386 }
380 bio_put(bio); 387 bio_put(bio);
381 put_parallel(par); 388 put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
391 dprintk("%s enter\n", __func__); 398 dprintk("%s enter\n", __func__);
392 task = container_of(work, struct rpc_task, u.tk_work); 399 task = container_of(work, struct rpc_task, u.tk_work);
393 wdata = container_of(task, struct nfs_write_data, task); 400 wdata = container_of(task, struct nfs_write_data, task);
394 if (likely(!wdata->pnfs_error)) { 401 if (likely(!wdata->header->pnfs_error)) {
395 /* Marks for LAYOUTCOMMIT */ 402 /* Marks for LAYOUTCOMMIT */
396 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 403 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
397 wdata->args.offset, wdata->args.count); 404 wdata->args.offset, wdata->args.count);
398 } 405 }
399 pnfs_ld_write_done(wdata); 406 pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
404{ 411{
405 struct nfs_write_data *wdata = data; 412 struct nfs_write_data *wdata = data;
406 413
407 if (unlikely(wdata->pnfs_error)) { 414 if (unlikely(wdata->header->pnfs_error)) {
408 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, 415 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
409 num_se); 416 num_se);
410 } 417 }
411 418
412 wdata->task.tk_status = wdata->pnfs_error; 419 wdata->task.tk_status = wdata->header->pnfs_error;
413 wdata->verf.committed = NFS_FILE_SYNC; 420 wdata->verf.committed = NFS_FILE_SYNC;
414 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 421 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
415 schedule_work(&wdata->task.u.tk_work); 422 schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
540static enum pnfs_try_status 547static enum pnfs_try_status
541bl_write_pagelist(struct nfs_write_data *wdata, int sync) 548bl_write_pagelist(struct nfs_write_data *wdata, int sync)
542{ 549{
550 struct nfs_pgio_header *header = wdata->header;
543 int i, ret, npg_zero, pg_index, last = 0; 551 int i, ret, npg_zero, pg_index, last = 0;
544 struct bio *bio = NULL; 552 struct bio *bio = NULL;
545 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 553 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
552 pgoff_t index; 560 pgoff_t index;
553 u64 temp; 561 u64 temp;
554 int npg_per_block = 562 int npg_per_block =
555 NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 563 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
556 564
557 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 565 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
558 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 566 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
566 /* At this point, have to be more careful with error handling */ 574 /* At this point, have to be more careful with error handling */
567 575
568 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 576 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
569 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 577 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
570 if (!be || !is_writable(be, isect)) { 578 if (!be || !is_writable(be, isect)) {
571 dprintk("%s no matching extents!\n", __func__); 579 dprintk("%s no matching extents!\n", __func__);
572 goto out_mds; 580 goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
597 dprintk("%s zero %dth page: index %lu isect %llu\n", 605 dprintk("%s zero %dth page: index %lu isect %llu\n",
598 __func__, npg_zero, index, 606 __func__, npg_zero, index,
599 (unsigned long long)isect); 607 (unsigned long long)isect);
600 page = bl_find_get_zeroing_page(wdata->inode, index, 608 page = bl_find_get_zeroing_page(header->inode, index,
601 cow_read); 609 cow_read);
602 if (unlikely(IS_ERR(page))) { 610 if (unlikely(IS_ERR(page))) {
603 wdata->pnfs_error = PTR_ERR(page); 611 header->pnfs_error = PTR_ERR(page);
604 goto out; 612 goto out;
605 } else if (page == NULL) 613 } else if (page == NULL)
606 goto next_page; 614 goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
612 __func__, ret); 620 __func__, ret);
613 end_page_writeback(page); 621 end_page_writeback(page);
614 page_cache_release(page); 622 page_cache_release(page);
615 wdata->pnfs_error = ret; 623 header->pnfs_error = ret;
616 goto out; 624 goto out;
617 } 625 }
618 if (likely(!bl_push_one_short_extent(be->be_inval))) 626 if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
620 else { 628 else {
621 end_page_writeback(page); 629 end_page_writeback(page);
622 page_cache_release(page); 630 page_cache_release(page);
623 wdata->pnfs_error = -ENOMEM; 631 header->pnfs_error = -ENOMEM;
624 goto out; 632 goto out;
625 } 633 }
626 /* FIXME: This should be done in bi_end_io */ 634 /* FIXME: This should be done in bi_end_io */
627 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 635 mark_extents_written(BLK_LSEG2EXT(header->lseg),
628 page->index << PAGE_CACHE_SHIFT, 636 page->index << PAGE_CACHE_SHIFT,
629 PAGE_CACHE_SIZE); 637 PAGE_CACHE_SIZE);
630 638
@@ -632,7 +640,7 @@ fill_invalid_ext:
632 isect, page, be, 640 isect, page, be,
633 bl_end_io_write_zero, par); 641 bl_end_io_write_zero, par);
634 if (IS_ERR(bio)) { 642 if (IS_ERR(bio)) {
635 wdata->pnfs_error = PTR_ERR(bio); 643 header->pnfs_error = PTR_ERR(bio);
636 bio = NULL; 644 bio = NULL;
637 goto out; 645 goto out;
638 } 646 }
@@ -647,16 +655,16 @@ next_page:
647 655
648 /* Middle pages */ 656 /* Middle pages */
649 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 657 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
650 for (i = pg_index; i < wdata->npages; i++) { 658 for (i = pg_index; i < wdata->pages.npages; i++) {
651 if (!extent_length) { 659 if (!extent_length) {
652 /* We've used up the previous extent */ 660 /* We've used up the previous extent */
653 bl_put_extent(be); 661 bl_put_extent(be);
654 bio = bl_submit_bio(WRITE, bio); 662 bio = bl_submit_bio(WRITE, bio);
655 /* Get the next one */ 663 /* Get the next one */
656 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), 664 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
657 isect, NULL); 665 isect, NULL);
658 if (!be || !is_writable(be, isect)) { 666 if (!be || !is_writable(be, isect)) {
659 wdata->pnfs_error = -EINVAL; 667 header->pnfs_error = -EINVAL;
660 goto out; 668 goto out;
661 } 669 }
662 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 670 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
664 be->be_inval))) 672 be->be_inval)))
665 par->bse_count++; 673 par->bse_count++;
666 else { 674 else {
667 wdata->pnfs_error = -ENOMEM; 675 header->pnfs_error = -ENOMEM;
668 goto out; 676 goto out;
669 } 677 }
670 } 678 }
@@ -677,15 +685,15 @@ next_page:
677 if (unlikely(ret)) { 685 if (unlikely(ret)) {
678 dprintk("%s bl_mark_sectors_init fail %d\n", 686 dprintk("%s bl_mark_sectors_init fail %d\n",
679 __func__, ret); 687 __func__, ret);
680 wdata->pnfs_error = ret; 688 header->pnfs_error = ret;
681 goto out; 689 goto out;
682 } 690 }
683 } 691 }
684 bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, 692 bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
685 isect, pages[i], be, 693 isect, pages[i], be,
686 bl_end_io_write, par); 694 bl_end_io_write, par);
687 if (IS_ERR(bio)) { 695 if (IS_ERR(bio)) {
688 wdata->pnfs_error = PTR_ERR(bio); 696 header->pnfs_error = PTR_ERR(bio);
689 bio = NULL; 697 bio = NULL;
690 goto out; 698 goto out;
691 } 699 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..b4e2199c32b3 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
90 * RPC cruft for NFS 90 * RPC cruft for NFS
91 */ 91 */
92static const struct rpc_version *nfs_version[5] = { 92static const struct rpc_version *nfs_version[5] = {
93#ifdef CONFIG_NFS_V2
93 [2] = &nfs_version2, 94 [2] = &nfs_version2,
95#endif
94#ifdef CONFIG_NFS_V3 96#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 97 [3] = &nfs_version3,
96#endif 98#endif
@@ -847,7 +849,7 @@ static int nfs_init_server(struct nfs_server *server,
847 .hostname = data->nfs_server.hostname, 849 .hostname = data->nfs_server.hostname,
848 .addr = (const struct sockaddr *)&data->nfs_server.address, 850 .addr = (const struct sockaddr *)&data->nfs_server.address,
849 .addrlen = data->nfs_server.addrlen, 851 .addrlen = data->nfs_server.addrlen,
850 .rpc_ops = &nfs_v2_clientops, 852 .rpc_ops = NULL,
851 .proto = data->nfs_server.protocol, 853 .proto = data->nfs_server.protocol,
852 .net = data->net, 854 .net = data->net,
853 }; 855 };
@@ -857,10 +859,20 @@ static int nfs_init_server(struct nfs_server *server,
857 859
858 dprintk("--> nfs_init_server()\n"); 860 dprintk("--> nfs_init_server()\n");
859 861
862 switch (data->version) {
863#ifdef CONFIG_NFS_V2
864 case 2:
865 cl_init.rpc_ops = &nfs_v2_clientops;
866 break;
867#endif
860#ifdef CONFIG_NFS_V3 868#ifdef CONFIG_NFS_V3
861 if (data->version == 3) 869 case 3:
862 cl_init.rpc_ops = &nfs_v3_clientops; 870 cl_init.rpc_ops = &nfs_v3_clientops;
871 break;
863#endif 872#endif
873 default:
874 return -EPROTONOSUPPORT;
875 }
864 876
865 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 877 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
866 data->timeo, data->retrans); 878 data->timeo, data->retrans);
@@ -880,7 +892,7 @@ static int nfs_init_server(struct nfs_server *server,
880 server->options = data->options; 892 server->options = data->options;
881 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| 893 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
882 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| 894 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
883 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; 895 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
884 896
885 if (data->rsize) 897 if (data->rsize)
886 server->rsize = nfs_block_size(data->rsize, NULL); 898 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1465,8 +1477,8 @@ error:
1465 * the MDS. 1477 * the MDS.
1466 */ 1478 */
1467struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 1479struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1468 const struct sockaddr *ds_addr, 1480 const struct sockaddr *ds_addr, int ds_addrlen,
1469 int ds_addrlen, int ds_proto) 1481 int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
1470{ 1482{
1471 struct nfs_client_initdata cl_init = { 1483 struct nfs_client_initdata cl_init = {
1472 .addr = ds_addr, 1484 .addr = ds_addr,
@@ -1476,12 +1488,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1476 .minorversion = mds_clp->cl_minorversion, 1488 .minorversion = mds_clp->cl_minorversion,
1477 .net = mds_clp->net, 1489 .net = mds_clp->net,
1478 }; 1490 };
1479 struct rpc_timeout ds_timeout = { 1491 struct rpc_timeout ds_timeout;
1480 .to_initval = 15 * HZ,
1481 .to_maxval = 15 * HZ,
1482 .to_retries = 1,
1483 .to_exponential = 1,
1484 };
1485 struct nfs_client *clp; 1492 struct nfs_client *clp;
1486 1493
1487 /* 1494 /*
@@ -1489,6 +1496,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1489 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS 1496 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1490 * (section 13.1 RFC 5661). 1497 * (section 13.1 RFC 5661).
1491 */ 1498 */
1499 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
1492 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 1500 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1493 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); 1501 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
1494 1502
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 89af1d269274..bd3a9601d32d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -316,6 +316,10 @@ out:
316 * nfs_client_return_marked_delegations - return previously marked delegations 316 * nfs_client_return_marked_delegations - return previously marked delegations
317 * @clp: nfs_client to process 317 * @clp: nfs_client to process
318 * 318 *
319 * Note that this function is designed to be called by the state
320 * manager thread. For this reason, it cannot flush the dirty data,
321 * since that could deadlock in case of a state recovery error.
322 *
319 * Returns zero on success, or a negative errno value. 323 * Returns zero on success, or a negative errno value.
320 */ 324 */
321int nfs_client_return_marked_delegations(struct nfs_client *clp) 325int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
340 server); 344 server);
341 rcu_read_unlock(); 345 rcu_read_unlock();
342 346
343 if (delegation != NULL) { 347 if (delegation != NULL)
344 filemap_flush(inode->i_mapping);
345 err = __nfs_inode_return_delegation(inode, 348 err = __nfs_inode_return_delegation(inode,
346 delegation, 0); 349 delegation, 0);
347 }
348 iput(inode); 350 iput(inode);
349 if (!err) 351 if (!err)
350 goto restart; 352 goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
380 * nfs_inode_return_delegation - synchronously return a delegation 382 * nfs_inode_return_delegation - synchronously return a delegation
381 * @inode: inode to process 383 * @inode: inode to process
382 * 384 *
385 * This routine will always flush any dirty data to disk on the
386 * assumption that if we need to return the delegation, then
387 * we should stop caching.
388 *
383 * Returns zero on success, or a negative errno value. 389 * Returns zero on success, or a negative errno value.
384 */ 390 */
385int nfs_inode_return_delegation(struct inode *inode) 391int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
389 struct nfs_delegation *delegation; 395 struct nfs_delegation *delegation;
390 int err = 0; 396 int err = 0;
391 397
398 nfs_wb_all(inode);
392 if (rcu_access_pointer(nfsi->delegation) != NULL) { 399 if (rcu_access_pointer(nfsi->delegation) != NULL) {
393 delegation = nfs_detach_delegation(nfsi, server); 400 delegation = nfs_detach_delegation(nfsi, server);
394 if (delegation != NULL) { 401 if (delegation != NULL) {
395 nfs_wb_all(inode);
396 err = __nfs_inode_return_delegation(inode, delegation, 1); 402 err = __nfs_inode_return_delegation(inode, delegation, 1);
397 } 403 }
398 } 404 }
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
538 struct nfs_client *clp = server->nfs_client; 544 struct nfs_client *clp = server->nfs_client;
539 struct nfs_delegation *delegation; 545 struct nfs_delegation *delegation;
540 546
547 filemap_flush(inode->i_mapping);
548
541 rcu_read_lock(); 549 rcu_read_lock();
542 delegation = rcu_dereference(NFS_I(inode)->delegation); 550 delegation = rcu_dereference(NFS_I(inode)->delegation);
543 551
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index cd6a7a8dadae..72709c4193fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
66 66
67static inline int nfs_inode_return_delegation(struct inode *inode) 67static inline int nfs_inode_return_delegation(struct inode *inode)
68{ 68{
69 nfs_wb_all(inode);
69 return 0; 70 return 0;
70} 71}
71#endif 72#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8789210c6905..d0884c0d9464 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -475,6 +475,29 @@ different:
475} 475}
476 476
477static 477static
478bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
479{
480 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
481 return false;
482 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
483 return true;
484 if (filp->f_pos == 0)
485 return true;
486 return false;
487}
488
489/*
490 * This function is called by the lookup code to request the use of
491 * readdirplus to accelerate any future lookups in the same
492 * directory.
493 */
494static
495void nfs_advise_use_readdirplus(struct inode *dir)
496{
497 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
498}
499
500static
478void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 501void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479{ 502{
480 struct qstr filename = { 503 struct qstr filename = {
@@ -874,7 +897,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
874 desc->file = filp; 897 desc->file = filp;
875 desc->dir_cookie = &dir_ctx->dir_cookie; 898 desc->dir_cookie = &dir_ctx->dir_cookie;
876 desc->decode = NFS_PROTO(inode)->decode_dirent; 899 desc->decode = NFS_PROTO(inode)->decode_dirent;
877 desc->plus = NFS_USE_READDIRPLUS(inode); 900 desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
878 901
879 nfs_block_sillyrename(dentry); 902 nfs_block_sillyrename(dentry);
880 res = nfs_revalidate_mapping(inode, filp->f_mapping); 903 res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1114,7 +1137,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1114 if (!inode) { 1137 if (!inode) {
1115 if (nfs_neg_need_reval(dir, dentry, nd)) 1138 if (nfs_neg_need_reval(dir, dentry, nd))
1116 goto out_bad; 1139 goto out_bad;
1117 goto out_valid; 1140 goto out_valid_noent;
1118 } 1141 }
1119 1142
1120 if (is_bad_inode(inode)) { 1143 if (is_bad_inode(inode)) {
@@ -1143,7 +1166,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1143 if (fhandle == NULL || fattr == NULL) 1166 if (fhandle == NULL || fattr == NULL)
1144 goto out_error; 1167 goto out_error;
1145 1168
1146 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1169 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1147 if (error) 1170 if (error)
1148 goto out_bad; 1171 goto out_bad;
1149 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1172 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1156,6 +1179,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1156out_set_verifier: 1179out_set_verifier:
1157 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1180 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1158 out_valid: 1181 out_valid:
1182 /* Success: notify readdir to use READDIRPLUS */
1183 nfs_advise_use_readdirplus(dir);
1184 out_valid_noent:
1159 dput(parent); 1185 dput(parent);
1160 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", 1186 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
1161 __func__, dentry->d_parent->d_name.name, 1187 __func__, dentry->d_parent->d_name.name,
@@ -1299,7 +1325,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1299 parent = dentry->d_parent; 1325 parent = dentry->d_parent;
1300 /* Protect against concurrent sillydeletes */ 1326 /* Protect against concurrent sillydeletes */
1301 nfs_block_sillyrename(parent); 1327 nfs_block_sillyrename(parent);
1302 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1328 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1303 if (error == -ENOENT) 1329 if (error == -ENOENT)
1304 goto no_entry; 1330 goto no_entry;
1305 if (error < 0) { 1331 if (error < 0) {
@@ -1311,6 +1337,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1311 if (IS_ERR(res)) 1337 if (IS_ERR(res))
1312 goto out_unblock_sillyrename; 1338 goto out_unblock_sillyrename;
1313 1339
1340 /* Success: notify readdir to use READDIRPLUS */
1341 nfs_advise_use_readdirplus(dir);
1342
1314no_entry: 1343no_entry:
1315 res = d_materialise_unique(dentry, inode); 1344 res = d_materialise_unique(dentry, inode);
1316 if (res != NULL) { 1345 if (res != NULL) {
@@ -1646,7 +1675,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1646 if (dentry->d_inode) 1675 if (dentry->d_inode)
1647 goto out; 1676 goto out;
1648 if (fhandle->size == 0) { 1677 if (fhandle->size == 0) {
1649 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1678 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1650 if (error) 1679 if (error)
1651 goto out_error; 1680 goto out_error;
1652 } 1681 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f7bdd3..c47a46eaf905 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
56 56
57#include "internal.h" 57#include "internal.h"
58#include "iostat.h" 58#include "iostat.h"
59#include "pnfs.h"
59 60
60#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
61 62
@@ -81,16 +82,19 @@ struct nfs_direct_req {
81 struct completion completion; /* wait for i/o completion */ 82 struct completion completion; /* wait for i/o completion */
82 83
83 /* commit state */ 84 /* commit state */
84 struct list_head rewrite_list; /* saved nfs_write_data structs */ 85 struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
85 struct nfs_write_data * commit_data; /* special write_data for commits */ 86 struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
87 struct work_struct work;
86 int flags; 88 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 89#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
88#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 90#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
89 struct nfs_writeverf verf; /* unstable write verifier */ 91 struct nfs_writeverf verf; /* unstable write verifier */
90}; 92};
91 93
94static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
95static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
92static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 96static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
93static const struct rpc_call_ops nfs_write_direct_ops; 97static void nfs_direct_write_schedule_work(struct work_struct *work);
94 98
95static inline void get_dreq(struct nfs_direct_req *dreq) 99static inline void get_dreq(struct nfs_direct_req *dreq)
96{ 100{
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124 return -EINVAL; 128 return -EINVAL;
125} 129}
126 130
127static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
128{
129 unsigned int npages;
130 unsigned int i;
131
132 if (count == 0)
133 return;
134 pages += (pgbase >> PAGE_SHIFT);
135 npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136 for (i = 0; i < npages; i++) {
137 struct page *page = pages[i];
138 if (!PageCompound(page))
139 set_page_dirty(page);
140 }
141}
142
143static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 131static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
144{ 132{
145 unsigned int i; 133 unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
147 page_cache_release(pages[i]); 135 page_cache_release(pages[i]);
148} 136}
149 137
138void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
139 struct nfs_direct_req *dreq)
140{
141 cinfo->lock = &dreq->lock;
142 cinfo->mds = &dreq->mds_cinfo;
143 cinfo->ds = &dreq->ds_cinfo;
144 cinfo->dreq = dreq;
145 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
146}
147
150static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 148static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
151{ 149{
152 struct nfs_direct_req *dreq; 150 struct nfs_direct_req *dreq;
153 151
154 dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); 152 dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
155 if (!dreq) 153 if (!dreq)
156 return NULL; 154 return NULL;
157 155
158 kref_init(&dreq->kref); 156 kref_init(&dreq->kref);
159 kref_get(&dreq->kref); 157 kref_get(&dreq->kref);
160 init_completion(&dreq->completion); 158 init_completion(&dreq->completion);
161 INIT_LIST_HEAD(&dreq->rewrite_list); 159 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
162 dreq->iocb = NULL; 160 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
165 spin_lock_init(&dreq->lock); 161 spin_lock_init(&dreq->lock);
166 atomic_set(&dreq->io_count, 0);
167 dreq->count = 0;
168 dreq->error = 0;
169 dreq->flags = 0;
170 162
171 return dreq; 163 return dreq;
172} 164}
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226 nfs_direct_req_release(dreq); 218 nfs_direct_req_release(dreq);
227} 219}
228 220
229/* 221static void nfs_direct_readpage_release(struct nfs_page *req)
230 * We must hold a reference to all the pages in this direct read request
231 * until the RPCs complete. This could be long *after* we are woken up in
232 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233 */
234static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
235{ 222{
236 struct nfs_read_data *data = calldata; 223 dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
237 224 req->wb_context->dentry->d_inode->i_sb->s_id,
238 nfs_readpage_result(task, data); 225 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
226 req->wb_bytes,
227 (long long)req_offset(req));
228 nfs_release_request(req);
239} 229}
240 230
241static void nfs_direct_read_release(void *calldata) 231static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
242{ 232{
233 unsigned long bytes = 0;
234 struct nfs_direct_req *dreq = hdr->dreq;
243 235
244 struct nfs_read_data *data = calldata; 236 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
245 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 237 goto out_put;
246 int status = data->task.tk_status;
247 238
248 spin_lock(&dreq->lock); 239 spin_lock(&dreq->lock);
249 if (unlikely(status < 0)) { 240 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
250 dreq->error = status; 241 dreq->error = hdr->error;
251 spin_unlock(&dreq->lock); 242 else
252 } else { 243 dreq->count += hdr->good_bytes;
253 dreq->count += data->res.count; 244 spin_unlock(&dreq->lock);
254 spin_unlock(&dreq->lock);
255 nfs_direct_dirty_pages(data->pagevec,
256 data->args.pgbase,
257 data->res.count);
258 }
259 nfs_direct_release_pages(data->pagevec, data->npages);
260 245
246 while (!list_empty(&hdr->pages)) {
247 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
248 struct page *page = req->wb_page;
249
250 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
251 if (bytes > hdr->good_bytes)
252 zero_user(page, 0, PAGE_SIZE);
253 else if (hdr->good_bytes - bytes < PAGE_SIZE)
254 zero_user_segment(page,
255 hdr->good_bytes & ~PAGE_MASK,
256 PAGE_SIZE);
257 }
258 if (!PageCompound(page)) {
259 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
260 if (bytes < hdr->good_bytes)
261 set_page_dirty(page);
262 } else
263 set_page_dirty(page);
264 }
265 bytes += req->wb_bytes;
266 nfs_list_remove_request(req);
267 nfs_direct_readpage_release(req);
268 }
269out_put:
261 if (put_dreq(dreq)) 270 if (put_dreq(dreq))
262 nfs_direct_complete(dreq); 271 nfs_direct_complete(dreq);
263 nfs_readdata_free(data); 272 hdr->release(hdr);
273}
274
275static void nfs_read_sync_pgio_error(struct list_head *head)
276{
277 struct nfs_page *req;
278
279 while (!list_empty(head)) {
280 req = nfs_list_entry(head->next);
281 nfs_list_remove_request(req);
282 nfs_release_request(req);
283 }
264} 284}
265 285
266static const struct rpc_call_ops nfs_read_direct_ops = { 286static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
267 .rpc_call_prepare = nfs_read_prepare, 287{
268 .rpc_call_done = nfs_direct_read_result, 288 get_dreq(hdr->dreq);
269 .rpc_release = nfs_direct_read_release, 289}
290
291static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
292 .error_cleanup = nfs_read_sync_pgio_error,
293 .init_hdr = nfs_direct_pgio_init,
294 .completion = nfs_direct_read_completion,
270}; 295};
271 296
272/* 297/*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
276 * handled automatically by nfs_direct_read_result(). Otherwise, if 301 * handled automatically by nfs_direct_read_result(). Otherwise, if
277 * no requests have been sent, just return an error. 302 * no requests have been sent, just return an error.
278 */ 303 */
279static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, 304static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
280 const struct iovec *iov, 305 const struct iovec *iov,
281 loff_t pos) 306 loff_t pos)
282{ 307{
308 struct nfs_direct_req *dreq = desc->pg_dreq;
283 struct nfs_open_context *ctx = dreq->ctx; 309 struct nfs_open_context *ctx = dreq->ctx;
284 struct inode *inode = ctx->dentry->d_inode; 310 struct inode *inode = ctx->dentry->d_inode;
285 unsigned long user_addr = (unsigned long)iov->iov_base; 311 unsigned long user_addr = (unsigned long)iov->iov_base;
286 size_t count = iov->iov_len; 312 size_t count = iov->iov_len;
287 size_t rsize = NFS_SERVER(inode)->rsize; 313 size_t rsize = NFS_SERVER(inode)->rsize;
288 struct rpc_task *task;
289 struct rpc_message msg = {
290 .rpc_cred = ctx->cred,
291 };
292 struct rpc_task_setup task_setup_data = {
293 .rpc_client = NFS_CLIENT(inode),
294 .rpc_message = &msg,
295 .callback_ops = &nfs_read_direct_ops,
296 .workqueue = nfsiod_workqueue,
297 .flags = RPC_TASK_ASYNC,
298 };
299 unsigned int pgbase; 314 unsigned int pgbase;
300 int result; 315 int result;
301 ssize_t started = 0; 316 ssize_t started = 0;
317 struct page **pagevec = NULL;
318 unsigned int npages;
302 319
303 do { 320 do {
304 struct nfs_read_data *data;
305 size_t bytes; 321 size_t bytes;
322 int i;
306 323
307 pgbase = user_addr & ~PAGE_MASK; 324 pgbase = user_addr & ~PAGE_MASK;
308 bytes = min(rsize,count); 325 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
309 326
310 result = -ENOMEM; 327 result = -ENOMEM;
311 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); 328 npages = nfs_page_array_len(pgbase, bytes);
312 if (unlikely(!data)) 329 if (!pagevec)
330 pagevec = kmalloc(npages * sizeof(struct page *),
331 GFP_KERNEL);
332 if (!pagevec)
313 break; 333 break;
314
315 down_read(&current->mm->mmap_sem); 334 down_read(&current->mm->mmap_sem);
316 result = get_user_pages(current, current->mm, user_addr, 335 result = get_user_pages(current, current->mm, user_addr,
317 data->npages, 1, 0, data->pagevec, NULL); 336 npages, 1, 0, pagevec, NULL);
318 up_read(&current->mm->mmap_sem); 337 up_read(&current->mm->mmap_sem);
319 if (result < 0) { 338 if (result < 0)
320 nfs_readdata_free(data);
321 break; 339 break;
322 } 340 if ((unsigned)result < npages) {
323 if ((unsigned)result < data->npages) {
324 bytes = result * PAGE_SIZE; 341 bytes = result * PAGE_SIZE;
325 if (bytes <= pgbase) { 342 if (bytes <= pgbase) {
326 nfs_direct_release_pages(data->pagevec, result); 343 nfs_direct_release_pages(pagevec, result);
327 nfs_readdata_free(data);
328 break; 344 break;
329 } 345 }
330 bytes -= pgbase; 346 bytes -= pgbase;
331 data->npages = result; 347 npages = result;
332 } 348 }
333 349
334 get_dreq(dreq); 350 for (i = 0; i < npages; i++) {
335 351 struct nfs_page *req;
336 data->req = (struct nfs_page *) dreq; 352 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
337 data->inode = inode; 353 /* XXX do we need to do the eof zeroing found in async_filler? */
338 data->cred = msg.rpc_cred; 354 req = nfs_create_request(dreq->ctx, dreq->inode,
339 data->args.fh = NFS_FH(inode); 355 pagevec[i],
340 data->args.context = ctx; 356 pgbase, req_len);
341 data->args.lock_context = dreq->l_ctx; 357 if (IS_ERR(req)) {
342 data->args.offset = pos; 358 result = PTR_ERR(req);
343 data->args.pgbase = pgbase; 359 break;
344 data->args.pages = data->pagevec; 360 }
345 data->args.count = bytes; 361 req->wb_index = pos >> PAGE_SHIFT;
346 data->res.fattr = &data->fattr; 362 req->wb_offset = pos & ~PAGE_MASK;
347 data->res.eof = 0; 363 if (!nfs_pageio_add_request(desc, req)) {
348 data->res.count = bytes; 364 result = desc->pg_error;
349 nfs_fattr_init(&data->fattr); 365 nfs_release_request(req);
350 msg.rpc_argp = &data->args; 366 break;
351 msg.rpc_resp = &data->res; 367 }
352 368 pgbase = 0;
353 task_setup_data.task = &data->task; 369 bytes -= req_len;
354 task_setup_data.callback_data = data; 370 started += req_len;
355 NFS_PROTO(inode)->read_setup(data, &msg); 371 user_addr += req_len;
356 372 pos += req_len;
357 task = rpc_run_task(&task_setup_data); 373 count -= req_len;
358 if (IS_ERR(task)) 374 }
359 break; 375 /* The nfs_page now hold references to these pages */
360 rpc_put_task(task); 376 nfs_direct_release_pages(pagevec, npages);
361 377 } while (count != 0 && result >= 0);
362 dprintk("NFS: %5u initiated direct read call " 378
363 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 379 kfree(pagevec);
364 data->task.tk_pid,
365 inode->i_sb->s_id,
366 (long long)NFS_FILEID(inode),
367 bytes,
368 (unsigned long long)data->args.offset);
369
370 started += bytes;
371 user_addr += bytes;
372 pos += bytes;
373 /* FIXME: Remove this unnecessary math from final patch */
374 pgbase += bytes;
375 pgbase &= ~PAGE_MASK;
376 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
377
378 count -= bytes;
379 } while (count != 0);
380 380
381 if (started) 381 if (started)
382 return started; 382 return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
388 unsigned long nr_segs, 388 unsigned long nr_segs,
389 loff_t pos) 389 loff_t pos)
390{ 390{
391 struct nfs_pageio_descriptor desc;
391 ssize_t result = -EINVAL; 392 ssize_t result = -EINVAL;
392 size_t requested_bytes = 0; 393 size_t requested_bytes = 0;
393 unsigned long seg; 394 unsigned long seg;
394 395
396 nfs_pageio_init_read(&desc, dreq->inode,
397 &nfs_direct_read_completion_ops);
395 get_dreq(dreq); 398 get_dreq(dreq);
399 desc.pg_dreq = dreq;
396 400
397 for (seg = 0; seg < nr_segs; seg++) { 401 for (seg = 0; seg < nr_segs; seg++) {
398 const struct iovec *vec = &iov[seg]; 402 const struct iovec *vec = &iov[seg];
399 result = nfs_direct_read_schedule_segment(dreq, vec, pos); 403 result = nfs_direct_read_schedule_segment(&desc, vec, pos);
400 if (result < 0) 404 if (result < 0)
401 break; 405 break;
402 requested_bytes += result; 406 requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
405 pos += vec->iov_len; 409 pos += vec->iov_len;
406 } 410 }
407 411
412 nfs_pageio_complete(&desc);
413
408 /* 414 /*
409 * If no bytes were started, return the error, and let the 415 * If no bytes were started, return the error, and let the
410 * generic layer handle the completion. 416 * generic layer handle the completion.
@@ -447,98 +453,57 @@ out:
447 return result; 453 return result;
448} 454}
449 455
450static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
451{
452 while (!list_empty(&dreq->rewrite_list)) {
453 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
454 list_del(&data->pages);
455 nfs_direct_release_pages(data->pagevec, data->npages);
456 nfs_writedata_free(data);
457 }
458}
459
460#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 456#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
461static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 457static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
462{ 458{
463 struct inode *inode = dreq->inode; 459 struct nfs_pageio_descriptor desc;
464 struct list_head *p; 460 struct nfs_page *req, *tmp;
465 struct nfs_write_data *data; 461 LIST_HEAD(reqs);
466 struct rpc_task *task; 462 struct nfs_commit_info cinfo;
467 struct rpc_message msg = { 463 LIST_HEAD(failed);
468 .rpc_cred = dreq->ctx->cred, 464
469 }; 465 nfs_init_cinfo_from_dreq(&cinfo, dreq);
470 struct rpc_task_setup task_setup_data = { 466 pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
471 .rpc_client = NFS_CLIENT(inode), 467 spin_lock(cinfo.lock);
472 .rpc_message = &msg, 468 nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
473 .callback_ops = &nfs_write_direct_ops, 469 spin_unlock(cinfo.lock);
474 .workqueue = nfsiod_workqueue,
475 .flags = RPC_TASK_ASYNC,
476 };
477 470
478 dreq->count = 0; 471 dreq->count = 0;
479 get_dreq(dreq); 472 get_dreq(dreq);
480 473
481 list_for_each(p, &dreq->rewrite_list) { 474 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
482 data = list_entry(p, struct nfs_write_data, pages); 475 &nfs_direct_write_completion_ops);
483 476 desc.pg_dreq = dreq;
484 get_dreq(dreq); 477
485 478 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 /* Use stable writes */ 479 if (!nfs_pageio_add_request(&desc, req)) {
487 data->args.stable = NFS_FILE_SYNC; 480 nfs_list_add_request(req, &failed);
488 481 spin_lock(cinfo.lock);
489 /* 482 dreq->flags = 0;
490 * Reset data->res. 483 dreq->error = -EIO;
491 */ 484 spin_unlock(cinfo.lock);
492 nfs_fattr_init(&data->fattr); 485 }
493 data->res.count = data->args.count;
494 memset(&data->verf, 0, sizeof(data->verf));
495
496 /*
497 * Reuse data->task; data->args should not have changed
498 * since the original request was sent.
499 */
500 task_setup_data.task = &data->task;
501 task_setup_data.callback_data = data;
502 msg.rpc_argp = &data->args;
503 msg.rpc_resp = &data->res;
504 NFS_PROTO(inode)->write_setup(data, &msg);
505
506 /*
507 * We're called via an RPC callback, so BKL is already held.
508 */
509 task = rpc_run_task(&task_setup_data);
510 if (!IS_ERR(task))
511 rpc_put_task(task);
512
513 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
514 data->task.tk_pid,
515 inode->i_sb->s_id,
516 (long long)NFS_FILEID(inode),
517 data->args.count,
518 (unsigned long long)data->args.offset);
519 } 486 }
487 nfs_pageio_complete(&desc);
520 488
521 if (put_dreq(dreq)) 489 while (!list_empty(&failed))
522 nfs_direct_write_complete(dreq, inode); 490 nfs_unlock_and_release_request(req);
523}
524
525static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
526{
527 struct nfs_write_data *data = calldata;
528 491
529 /* Call the NFS version-specific code */ 492 if (put_dreq(dreq))
530 NFS_PROTO(data->inode)->commit_done(task, data); 493 nfs_direct_write_complete(dreq, dreq->inode);
531} 494}
532 495
533static void nfs_direct_commit_release(void *calldata) 496static void nfs_direct_commit_complete(struct nfs_commit_data *data)
534{ 497{
535 struct nfs_write_data *data = calldata; 498 struct nfs_direct_req *dreq = data->dreq;
536 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 499 struct nfs_commit_info cinfo;
500 struct nfs_page *req;
537 int status = data->task.tk_status; 501 int status = data->task.tk_status;
538 502
503 nfs_init_cinfo_from_dreq(&cinfo, dreq);
539 if (status < 0) { 504 if (status < 0) {
540 dprintk("NFS: %5u commit failed with error %d.\n", 505 dprintk("NFS: %5u commit failed with error %d.\n",
541 data->task.tk_pid, status); 506 data->task.tk_pid, status);
542 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 507 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
543 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 508 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
544 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 509 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +511,47 @@ static void nfs_direct_commit_release(void *calldata)
546 } 511 }
547 512
548 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 513 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
549 nfs_direct_write_complete(dreq, data->inode); 514 while (!list_empty(&data->pages)) {
550 nfs_commit_free(data); 515 req = nfs_list_entry(data->pages.next);
516 nfs_list_remove_request(req);
517 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
518 /* Note the rewrite will go through mds */
519 kref_get(&req->wb_kref);
520 nfs_mark_request_commit(req, NULL, &cinfo);
521 }
522 nfs_unlock_and_release_request(req);
523 }
524
525 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
526 nfs_direct_write_complete(dreq, data->inode);
527}
528
529static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
530{
531 /* There is no lock to clear */
551} 532}
552 533
553static const struct rpc_call_ops nfs_commit_direct_ops = { 534static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
554 .rpc_call_prepare = nfs_write_prepare, 535 .completion = nfs_direct_commit_complete,
555 .rpc_call_done = nfs_direct_commit_result, 536 .error_cleanup = nfs_direct_error_cleanup,
556 .rpc_release = nfs_direct_commit_release,
557}; 537};
558 538
559static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 539static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
560{ 540{
561 struct nfs_write_data *data = dreq->commit_data; 541 int res;
562 struct rpc_task *task; 542 struct nfs_commit_info cinfo;
563 struct rpc_message msg = { 543 LIST_HEAD(mds_list);
564 .rpc_argp = &data->args, 544
565 .rpc_resp = &data->res, 545 nfs_init_cinfo_from_dreq(&cinfo, dreq);
566 .rpc_cred = dreq->ctx->cred, 546 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
567 }; 547 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
568 struct rpc_task_setup task_setup_data = { 548 if (res < 0) /* res == -ENOMEM */
569 .task = &data->task, 549 nfs_direct_write_reschedule(dreq);
570 .rpc_client = NFS_CLIENT(dreq->inode),
571 .rpc_message = &msg,
572 .callback_ops = &nfs_commit_direct_ops,
573 .callback_data = data,
574 .workqueue = nfsiod_workqueue,
575 .flags = RPC_TASK_ASYNC,
576 };
577
578 data->inode = dreq->inode;
579 data->cred = msg.rpc_cred;
580
581 data->args.fh = NFS_FH(data->inode);
582 data->args.offset = 0;
583 data->args.count = 0;
584 data->args.context = dreq->ctx;
585 data->args.lock_context = dreq->l_ctx;
586 data->res.count = 0;
587 data->res.fattr = &data->fattr;
588 data->res.verf = &data->verf;
589 nfs_fattr_init(&data->fattr);
590
591 NFS_PROTO(data->inode)->commit_setup(data, &msg);
592
593 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
594 dreq->commit_data = NULL;
595
596 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
597
598 task = rpc_run_task(&task_setup_data);
599 if (!IS_ERR(task))
600 rpc_put_task(task);
601} 550}
602 551
603static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 552static void nfs_direct_write_schedule_work(struct work_struct *work)
604{ 553{
554 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
605 int flags = dreq->flags; 555 int flags = dreq->flags;
606 556
607 dreq->flags = 0; 557 dreq->flags = 0;
@@ -613,89 +563,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
613 nfs_direct_write_reschedule(dreq); 563 nfs_direct_write_reschedule(dreq);
614 break; 564 break;
615 default: 565 default:
616 if (dreq->commit_data != NULL) 566 nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
617 nfs_commit_free(dreq->commit_data);
618 nfs_direct_free_writedata(dreq);
619 nfs_zap_mapping(inode, inode->i_mapping);
620 nfs_direct_complete(dreq); 567 nfs_direct_complete(dreq);
621 } 568 }
622} 569}
623 570
624static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 571static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
625{ 572{
626 dreq->commit_data = nfs_commitdata_alloc(); 573 schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
627 if (dreq->commit_data != NULL)
628 dreq->commit_data->req = (struct nfs_page *) dreq;
629} 574}
575
630#else 576#else
631static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 577static void nfs_direct_write_schedule_work(struct work_struct *work)
632{ 578{
633 dreq->commit_data = NULL;
634} 579}
635 580
636static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 581static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
637{ 582{
638 nfs_direct_free_writedata(dreq);
639 nfs_zap_mapping(inode, inode->i_mapping); 583 nfs_zap_mapping(inode, inode->i_mapping);
640 nfs_direct_complete(dreq); 584 nfs_direct_complete(dreq);
641} 585}
642#endif 586#endif
643 587
644static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
645{
646 struct nfs_write_data *data = calldata;
647
648 nfs_writeback_done(task, data);
649}
650
651/* 588/*
652 * NB: Return the value of the first error return code. Subsequent 589 * NB: Return the value of the first error return code. Subsequent
653 * errors after the first one are ignored. 590 * errors after the first one are ignored.
654 */ 591 */
655static void nfs_direct_write_release(void *calldata)
656{
657 struct nfs_write_data *data = calldata;
658 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
659 int status = data->task.tk_status;
660
661 spin_lock(&dreq->lock);
662
663 if (unlikely(status < 0)) {
664 /* An error has occurred, so we should not commit */
665 dreq->flags = 0;
666 dreq->error = status;
667 }
668 if (unlikely(dreq->error != 0))
669 goto out_unlock;
670
671 dreq->count += data->res.count;
672
673 if (data->res.verf->committed != NFS_FILE_SYNC) {
674 switch (dreq->flags) {
675 case 0:
676 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
677 dreq->flags = NFS_ODIRECT_DO_COMMIT;
678 break;
679 case NFS_ODIRECT_DO_COMMIT:
680 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
681 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
682 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
683 }
684 }
685 }
686out_unlock:
687 spin_unlock(&dreq->lock);
688
689 if (put_dreq(dreq))
690 nfs_direct_write_complete(dreq, data->inode);
691}
692
693static const struct rpc_call_ops nfs_write_direct_ops = {
694 .rpc_call_prepare = nfs_write_prepare,
695 .rpc_call_done = nfs_direct_write_result,
696 .rpc_release = nfs_direct_write_release,
697};
698
699/* 592/*
700 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 593 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
701 * operation. If nfs_writedata_alloc() or get_user_pages() fails, 594 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +596,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
703 * handled automatically by nfs_direct_write_result(). Otherwise, if 596 * handled automatically by nfs_direct_write_result(). Otherwise, if
704 * no requests have been sent, just return an error. 597 * no requests have been sent, just return an error.
705 */ 598 */
706static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, 599static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
707 const struct iovec *iov, 600 const struct iovec *iov,
708 loff_t pos, int sync) 601 loff_t pos)
709{ 602{
603 struct nfs_direct_req *dreq = desc->pg_dreq;
710 struct nfs_open_context *ctx = dreq->ctx; 604 struct nfs_open_context *ctx = dreq->ctx;
711 struct inode *inode = ctx->dentry->d_inode; 605 struct inode *inode = ctx->dentry->d_inode;
712 unsigned long user_addr = (unsigned long)iov->iov_base; 606 unsigned long user_addr = (unsigned long)iov->iov_base;
713 size_t count = iov->iov_len; 607 size_t count = iov->iov_len;
714 struct rpc_task *task;
715 struct rpc_message msg = {
716 .rpc_cred = ctx->cred,
717 };
718 struct rpc_task_setup task_setup_data = {
719 .rpc_client = NFS_CLIENT(inode),
720 .rpc_message = &msg,
721 .callback_ops = &nfs_write_direct_ops,
722 .workqueue = nfsiod_workqueue,
723 .flags = RPC_TASK_ASYNC,
724 };
725 size_t wsize = NFS_SERVER(inode)->wsize; 608 size_t wsize = NFS_SERVER(inode)->wsize;
726 unsigned int pgbase; 609 unsigned int pgbase;
727 int result; 610 int result;
728 ssize_t started = 0; 611 ssize_t started = 0;
612 struct page **pagevec = NULL;
613 unsigned int npages;
729 614
730 do { 615 do {
731 struct nfs_write_data *data;
732 size_t bytes; 616 size_t bytes;
617 int i;
733 618
734 pgbase = user_addr & ~PAGE_MASK; 619 pgbase = user_addr & ~PAGE_MASK;
735 bytes = min(wsize,count); 620 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
736 621
737 result = -ENOMEM; 622 result = -ENOMEM;
738 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); 623 npages = nfs_page_array_len(pgbase, bytes);
739 if (unlikely(!data)) 624 if (!pagevec)
625 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
626 if (!pagevec)
740 break; 627 break;
741 628
742 down_read(&current->mm->mmap_sem); 629 down_read(&current->mm->mmap_sem);
743 result = get_user_pages(current, current->mm, user_addr, 630 result = get_user_pages(current, current->mm, user_addr,
744 data->npages, 0, 0, data->pagevec, NULL); 631 npages, 0, 0, pagevec, NULL);
745 up_read(&current->mm->mmap_sem); 632 up_read(&current->mm->mmap_sem);
746 if (result < 0) { 633 if (result < 0)
747 nfs_writedata_free(data);
748 break; 634 break;
749 } 635
750 if ((unsigned)result < data->npages) { 636 if ((unsigned)result < npages) {
751 bytes = result * PAGE_SIZE; 637 bytes = result * PAGE_SIZE;
752 if (bytes <= pgbase) { 638 if (bytes <= pgbase) {
753 nfs_direct_release_pages(data->pagevec, result); 639 nfs_direct_release_pages(pagevec, result);
754 nfs_writedata_free(data);
755 break; 640 break;
756 } 641 }
757 bytes -= pgbase; 642 bytes -= pgbase;
758 data->npages = result; 643 npages = result;
759 } 644 }
760 645
761 get_dreq(dreq); 646 for (i = 0; i < npages; i++) {
762 647 struct nfs_page *req;
763 list_move_tail(&data->pages, &dreq->rewrite_list); 648 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
764
765 data->req = (struct nfs_page *) dreq;
766 data->inode = inode;
767 data->cred = msg.rpc_cred;
768 data->args.fh = NFS_FH(inode);
769 data->args.context = ctx;
770 data->args.lock_context = dreq->l_ctx;
771 data->args.offset = pos;
772 data->args.pgbase = pgbase;
773 data->args.pages = data->pagevec;
774 data->args.count = bytes;
775 data->args.stable = sync;
776 data->res.fattr = &data->fattr;
777 data->res.count = bytes;
778 data->res.verf = &data->verf;
779 nfs_fattr_init(&data->fattr);
780
781 task_setup_data.task = &data->task;
782 task_setup_data.callback_data = data;
783 msg.rpc_argp = &data->args;
784 msg.rpc_resp = &data->res;
785 NFS_PROTO(inode)->write_setup(data, &msg);
786
787 task = rpc_run_task(&task_setup_data);
788 if (IS_ERR(task))
789 break;
790 rpc_put_task(task);
791
792 dprintk("NFS: %5u initiated direct write call "
793 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
794 data->task.tk_pid,
795 inode->i_sb->s_id,
796 (long long)NFS_FILEID(inode),
797 bytes,
798 (unsigned long long)data->args.offset);
799 649
800 started += bytes; 650 req = nfs_create_request(dreq->ctx, dreq->inode,
801 user_addr += bytes; 651 pagevec[i],
802 pos += bytes; 652 pgbase, req_len);
803 653 if (IS_ERR(req)) {
804 /* FIXME: Remove this useless math from the final patch */ 654 result = PTR_ERR(req);
805 pgbase += bytes; 655 break;
806 pgbase &= ~PAGE_MASK; 656 }
807 BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 657 nfs_lock_request(req);
658 req->wb_index = pos >> PAGE_SHIFT;
659 req->wb_offset = pos & ~PAGE_MASK;
660 if (!nfs_pageio_add_request(desc, req)) {
661 result = desc->pg_error;
662 nfs_unlock_and_release_request(req);
663 break;
664 }
665 pgbase = 0;
666 bytes -= req_len;
667 started += req_len;
668 user_addr += req_len;
669 pos += req_len;
670 count -= req_len;
671 }
672 /* The nfs_page now hold references to these pages */
673 nfs_direct_release_pages(pagevec, npages);
674 } while (count != 0 && result >= 0);
808 675
809 count -= bytes; 676 kfree(pagevec);
810 } while (count != 0);
811 677
812 if (started) 678 if (started)
813 return started; 679 return started;
814 return result < 0 ? (ssize_t) result : -EFAULT; 680 return result < 0 ? (ssize_t) result : -EFAULT;
815} 681}
816 682
683static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
684{
685 struct nfs_direct_req *dreq = hdr->dreq;
686 struct nfs_commit_info cinfo;
687 int bit = -1;
688 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
689
690 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
691 goto out_put;
692
693 nfs_init_cinfo_from_dreq(&cinfo, dreq);
694
695 spin_lock(&dreq->lock);
696
697 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
698 dreq->flags = 0;
699 dreq->error = hdr->error;
700 }
701 if (dreq->error != 0)
702 bit = NFS_IOHDR_ERROR;
703 else {
704 dreq->count += hdr->good_bytes;
705 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
706 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
707 bit = NFS_IOHDR_NEED_RESCHED;
708 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
709 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
710 bit = NFS_IOHDR_NEED_RESCHED;
711 else if (dreq->flags == 0) {
712 memcpy(&dreq->verf, &req->wb_verf,
713 sizeof(dreq->verf));
714 bit = NFS_IOHDR_NEED_COMMIT;
715 dreq->flags = NFS_ODIRECT_DO_COMMIT;
716 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
717 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
718 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
719 bit = NFS_IOHDR_NEED_RESCHED;
720 } else
721 bit = NFS_IOHDR_NEED_COMMIT;
722 }
723 }
724 }
725 spin_unlock(&dreq->lock);
726
727 while (!list_empty(&hdr->pages)) {
728 req = nfs_list_entry(hdr->pages.next);
729 nfs_list_remove_request(req);
730 switch (bit) {
731 case NFS_IOHDR_NEED_RESCHED:
732 case NFS_IOHDR_NEED_COMMIT:
733 kref_get(&req->wb_kref);
734 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
735 }
736 nfs_unlock_and_release_request(req);
737 }
738
739out_put:
740 if (put_dreq(dreq))
741 nfs_direct_write_complete(dreq, hdr->inode);
742 hdr->release(hdr);
743}
744
745static void nfs_write_sync_pgio_error(struct list_head *head)
746{
747 struct nfs_page *req;
748
749 while (!list_empty(head)) {
750 req = nfs_list_entry(head->next);
751 nfs_list_remove_request(req);
752 nfs_unlock_and_release_request(req);
753 }
754}
755
756static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
757 .error_cleanup = nfs_write_sync_pgio_error,
758 .init_hdr = nfs_direct_pgio_init,
759 .completion = nfs_direct_write_completion,
760};
761
817static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 762static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
818 const struct iovec *iov, 763 const struct iovec *iov,
819 unsigned long nr_segs, 764 unsigned long nr_segs,
820 loff_t pos, int sync) 765 loff_t pos)
821{ 766{
767 struct nfs_pageio_descriptor desc;
822 ssize_t result = 0; 768 ssize_t result = 0;
823 size_t requested_bytes = 0; 769 size_t requested_bytes = 0;
824 unsigned long seg; 770 unsigned long seg;
825 771
772 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
773 &nfs_direct_write_completion_ops);
774 desc.pg_dreq = dreq;
826 get_dreq(dreq); 775 get_dreq(dreq);
827 776
828 for (seg = 0; seg < nr_segs; seg++) { 777 for (seg = 0; seg < nr_segs; seg++) {
829 const struct iovec *vec = &iov[seg]; 778 const struct iovec *vec = &iov[seg];
830 result = nfs_direct_write_schedule_segment(dreq, vec, 779 result = nfs_direct_write_schedule_segment(&desc, vec, pos);
831 pos, sync);
832 if (result < 0) 780 if (result < 0)
833 break; 781 break;
834 requested_bytes += result; 782 requested_bytes += result;
@@ -836,6 +784,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
836 break; 784 break;
837 pos += vec->iov_len; 785 pos += vec->iov_len;
838 } 786 }
787 nfs_pageio_complete(&desc);
839 788
840 /* 789 /*
841 * If no bytes were started, return the error, and let the 790 * If no bytes were started, return the error, and let the
@@ -858,16 +807,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
858 ssize_t result = -ENOMEM; 807 ssize_t result = -ENOMEM;
859 struct inode *inode = iocb->ki_filp->f_mapping->host; 808 struct inode *inode = iocb->ki_filp->f_mapping->host;
860 struct nfs_direct_req *dreq; 809 struct nfs_direct_req *dreq;
861 size_t wsize = NFS_SERVER(inode)->wsize;
862 int sync = NFS_UNSTABLE;
863 810
864 dreq = nfs_direct_req_alloc(); 811 dreq = nfs_direct_req_alloc();
865 if (!dreq) 812 if (!dreq)
866 goto out; 813 goto out;
867 nfs_alloc_commit_data(dreq);
868
869 if (dreq->commit_data == NULL || count <= wsize)
870 sync = NFS_FILE_SYNC;
871 814
872 dreq->inode = inode; 815 dreq->inode = inode;
873 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 816 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +820,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
877 if (!is_sync_kiocb(iocb)) 820 if (!is_sync_kiocb(iocb))
878 dreq->iocb = iocb; 821 dreq->iocb = iocb;
879 822
880 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 823 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
881 if (!result) 824 if (!result)
882 result = nfs_direct_wait(dreq); 825 result = nfs_direct_wait(dreq);
883out_release: 826out_release:
@@ -997,10 +940,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
997 task_io_account_write(count); 940 task_io_account_write(count);
998 941
999 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 942 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
943 if (retval > 0) {
944 struct inode *inode = mapping->host;
1000 945
1001 if (retval > 0)
1002 iocb->ki_pos = pos + retval; 946 iocb->ki_pos = pos + retval;
1003 947 spin_lock(&inode->i_lock);
948 if (i_size_read(inode) < iocb->ki_pos)
949 i_size_write(inode, iocb->ki_pos);
950 spin_unlock(&inode->i_lock);
951 }
1004out: 952out:
1005 return retval; 953 return retval;
1006} 954}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709fd328..8eda8a6644c3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
174 if ((file->f_mode & FMODE_WRITE) == 0) 174 if ((file->f_mode & FMODE_WRITE) == 0)
175 return 0; 175 return 0;
176 176
177 /*
178 * If we're holding a write delegation, then just start the i/o
179 * but don't wait for completion (or send a commit).
180 */
181 if (nfs_have_delegation(inode, FMODE_WRITE))
182 return filemap_fdatawrite(file->f_mapping);
183
177 /* Flush writes to the server and return any errors */ 184 /* Flush writes to the server and return any errors */
178 return vfs_fsync(file, 0); 185 return vfs_fsync(file, 0);
179} 186}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ae65c16b3670..c817787fbdb4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent 64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
65 * superblock across an automount point of some nature. 65 * superblock across an automount point of some nature.
66 */ 66 */
67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, 67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
68 struct nfs_clone_mount *mntdata)
69{ 68{
70 struct nfs_fscache_key *key, *xkey; 69 struct nfs_fscache_key *key, *xkey;
71 struct nfs_server *nfss = NFS_SB(sb); 70 struct nfs_server *nfss = NFS_SB(sb);
72 struct rb_node **p, *parent; 71 struct rb_node **p, *parent;
73 int diff, ulen; 72 int diff;
74
75 if (uniq) {
76 ulen = strlen(uniq);
77 } else if (mntdata) {
78 struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
79 if (mnt_s->fscache_key) {
80 uniq = mnt_s->fscache_key->key.uniquifier;
81 ulen = mnt_s->fscache_key->key.uniq_len;
82 }
83 }
84 73
85 if (!uniq) { 74 if (!uniq) {
86 uniq = ""; 75 uniq = "";
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d0679f..c5b11b53ff33 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
73extern void nfs_fscache_get_client_cookie(struct nfs_client *); 73extern void nfs_fscache_get_client_cookie(struct nfs_client *);
74extern void nfs_fscache_release_client_cookie(struct nfs_client *); 74extern void nfs_fscache_release_client_cookie(struct nfs_client *);
75 75
76extern void nfs_fscache_get_super_cookie(struct super_block *, 76extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
77 const char *,
78 struct nfs_clone_mount *);
79extern void nfs_fscache_release_super_cookie(struct super_block *); 77extern void nfs_fscache_release_super_cookie(struct super_block *);
80 78
81extern void nfs_fscache_init_inode_cookie(struct inode *); 79extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
172static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} 170static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
173static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} 171static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
174 172
175static inline void nfs_fscache_get_super_cookie(
176 struct super_block *sb,
177 const char *uniq,
178 struct nfs_clone_mount *mntdata)
179{
180}
181static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} 173static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
182 174
183static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} 175static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4ca6f5c8038e..8abfb19bd3aa 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
150 goto out; 150 goto out;
151 151
152 /* Start by getting the root filehandle from the server */ 152 /* Start by getting the root filehandle from the server */
153 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 153 ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
154 if (ret < 0) { 154 if (ret < 0) {
155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); 155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
156 goto out; 156 goto out;
@@ -178,87 +178,4 @@ out:
178 return ret; 178 return ret;
179} 179}
180 180
181/*
182 * get an NFS4 root dentry from the root filehandle
183 */
184struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
185 const char *devname)
186{
187 struct nfs_server *server = NFS_SB(sb);
188 struct nfs_fattr *fattr = NULL;
189 struct dentry *ret;
190 struct inode *inode;
191 void *name = kstrdup(devname, GFP_KERNEL);
192 int error;
193
194 dprintk("--> nfs4_get_root()\n");
195
196 if (!name)
197 return ERR_PTR(-ENOMEM);
198
199 /* get the info about the server and filesystem */
200 error = nfs4_server_capabilities(server, mntfh);
201 if (error < 0) {
202 dprintk("nfs_get_root: getcaps error = %d\n",
203 -error);
204 kfree(name);
205 return ERR_PTR(error);
206 }
207
208 fattr = nfs_alloc_fattr();
209 if (fattr == NULL) {
210 kfree(name);
211 return ERR_PTR(-ENOMEM);
212 }
213
214 /* get the actual root for this mount */
215 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
216 if (error < 0) {
217 dprintk("nfs_get_root: getattr error = %d\n", -error);
218 ret = ERR_PTR(error);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
223 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
224 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
225
226 inode = nfs_fhget(sb, mntfh, fattr);
227 if (IS_ERR(inode)) {
228 dprintk("nfs_get_root: get root inode failed\n");
229 ret = ERR_CAST(inode);
230 goto out;
231 }
232
233 error = nfs_superblock_set_dummy_root(sb, inode);
234 if (error != 0) {
235 ret = ERR_PTR(error);
236 goto out;
237 }
238
239 /* root dentries normally start off anonymous and get spliced in later
240 * if the dentry tree reaches them; however if the dentry already
241 * exists, we'll pick it up at this point and use it as the root
242 */
243 ret = d_obtain_alias(inode);
244 if (IS_ERR(ret)) {
245 dprintk("nfs_get_root: get root dentry failed\n");
246 goto out;
247 }
248
249 security_d_instantiate(ret, inode);
250 spin_lock(&ret->d_lock);
251 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
252 ret->d_fsdata = name;
253 name = NULL;
254 }
255 spin_unlock(&ret->d_lock);
256out:
257 if (name)
258 kfree(name);
259 nfs_free_fattr(fattr);
260 dprintk("<-- nfs4_get_root()\n");
261 return ret;
262}
263
264#endif /* CONFIG_NFS_V4 */ 181#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5b3500..9ad81ce0c40f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
285 inode->i_mode = fattr->mode; 285 inode->i_mode = fattr->mode;
286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
287 && nfs_server_capable(inode, NFS_CAP_MODE)) 287 && nfs_server_capable(inode, NFS_CAP_MODE))
288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
289 | NFS_INO_INVALID_ACCESS
290 | NFS_INO_INVALID_ACL;
291 /* Why so? Because we want revalidate for devices/FIFOs, and 289 /* Why so? Because we want revalidate for devices/FIFOs, and
292 * that's precisely what we have in nfs_file_inode_operations. 290 * that's precisely what we have in nfs_file_inode_operations.
293 */ 291 */
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
300 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 298 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
301 inode->i_fop = &nfs_dir_operations; 299 inode->i_fop = &nfs_dir_operations;
302 inode->i_data.a_ops = &nfs_dir_aops; 300 inode->i_data.a_ops = &nfs_dir_aops;
303 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
304 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
305 /* Deal with crossing mountpoints */ 301 /* Deal with crossing mountpoints */
306 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || 302 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
307 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { 303 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -337,24 +333,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
337 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 333 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
338 inode->i_mtime = fattr->mtime; 334 inode->i_mtime = fattr->mtime;
339 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 335 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 336 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
341 | NFS_INO_INVALID_DATA;
342 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 337 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
343 inode->i_ctime = fattr->ctime; 338 inode->i_ctime = fattr->ctime;
344 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 339 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
345 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
346 | NFS_INO_INVALID_ACCESS
347 | NFS_INO_INVALID_ACL;
348 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 341 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
349 inode->i_version = fattr->change_attr; 342 inode->i_version = fattr->change_attr;
350 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 343 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
351 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 344 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
352 | NFS_INO_INVALID_DATA;
353 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 345 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
354 inode->i_size = nfs_size_to_loff_t(fattr->size); 346 inode->i_size = nfs_size_to_loff_t(fattr->size);
355 else 347 else
356 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 348 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
357 | NFS_INO_INVALID_DATA
358 | NFS_INO_REVAL_PAGECACHE; 349 | NFS_INO_REVAL_PAGECACHE;
359 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 350 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
360 set_nlink(inode, fattr->nlink); 351 set_nlink(inode, fattr->nlink);
@@ -363,15 +354,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
363 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 354 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
364 inode->i_uid = fattr->uid; 355 inode->i_uid = fattr->uid;
365 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 356 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 357 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
367 | NFS_INO_INVALID_ACCESS
368 | NFS_INO_INVALID_ACL;
369 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 358 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
370 inode->i_gid = fattr->gid; 359 inode->i_gid = fattr->gid;
371 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 360 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 361 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
373 | NFS_INO_INVALID_ACCESS
374 | NFS_INO_INVALID_ACL;
375 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 362 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
376 inode->i_blocks = fattr->du.nfs2.blocks; 363 inode->i_blocks = fattr->du.nfs2.blocks;
377 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 364 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -870,6 +857,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
870 return 0; 857 return 0;
871} 858}
872 859
860static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
861{
862 if (nfs_have_delegated_attributes(inode))
863 return false;
864 return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
865 || nfs_attribute_timeout(inode)
866 || NFS_STALE(inode);
867}
868
873/** 869/**
874 * nfs_revalidate_mapping - Revalidate the pagecache 870 * nfs_revalidate_mapping - Revalidate the pagecache
875 * @inode - pointer to host inode 871 * @inode - pointer to host inode
@@ -880,9 +876,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
880 struct nfs_inode *nfsi = NFS_I(inode); 876 struct nfs_inode *nfsi = NFS_I(inode);
881 int ret = 0; 877 int ret = 0;
882 878
883 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 879 if (nfs_mapping_need_revalidate_inode(inode)) {
884 || nfs_attribute_cache_expired(inode)
885 || NFS_STALE(inode)) {
886 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 880 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
887 if (ret < 0) 881 if (ret < 0)
888 goto out; 882 goto out;
@@ -948,6 +942,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
948 unsigned long invalid = 0; 942 unsigned long invalid = 0;
949 943
950 944
945 if (nfs_have_delegated_attributes(inode))
946 return 0;
951 /* Has the inode gone and changed behind our back? */ 947 /* Has the inode gone and changed behind our back? */
952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 948 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
953 return -EIO; 949 return -EIO;
@@ -960,7 +956,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
960 956
961 /* Verify a few of the more important attributes */ 957 /* Verify a few of the more important attributes */
962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) 958 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
963 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 959 invalid |= NFS_INO_INVALID_ATTR;
964 960
965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 961 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
966 cur_size = i_size_read(inode); 962 cur_size = i_size_read(inode);
@@ -1279,14 +1275,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1279 nfs_display_fhandle_hash(NFS_FH(inode)), 1275 nfs_display_fhandle_hash(NFS_FH(inode)),
1280 atomic_read(&inode->i_count), fattr->valid); 1276 atomic_read(&inode->i_count), fattr->valid);
1281 1277
1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1278 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
1283 goto out_fileid; 1279 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1280 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1281 NFS_SERVER(inode)->nfs_client->cl_hostname,
1282 inode->i_sb->s_id, (long long)nfsi->fileid,
1283 (long long)fattr->fileid);
1284 goto out_err;
1285 }
1284 1286
1285 /* 1287 /*
1286 * Make sure the inode's type hasn't changed. 1288 * Make sure the inode's type hasn't changed.
1287 */ 1289 */
1288 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1290 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1289 goto out_changed; 1291 /*
1292 * Big trouble! The inode has become a different object.
1293 */
1294 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1295 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1296 goto out_err;
1297 }
1290 1298
1291 server = NFS_SERVER(inode); 1299 server = NFS_SERVER(inode);
1292 /* Update the fsid? */ 1300 /* Update the fsid? */
@@ -1314,7 +1322,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1314 if (inode->i_version != fattr->change_attr) { 1322 if (inode->i_version != fattr->change_attr) {
1315 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1323 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1316 inode->i_sb->s_id, inode->i_ino); 1324 inode->i_sb->s_id, inode->i_ino);
1317 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1325 invalid |= NFS_INO_INVALID_ATTR
1326 | NFS_INO_INVALID_DATA
1327 | NFS_INO_INVALID_ACCESS
1328 | NFS_INO_INVALID_ACL
1329 | NFS_INO_REVAL_PAGECACHE;
1318 if (S_ISDIR(inode->i_mode)) 1330 if (S_ISDIR(inode->i_mode))
1319 nfs_force_lookup_revalidate(inode); 1331 nfs_force_lookup_revalidate(inode);
1320 inode->i_version = fattr->change_attr; 1332 inode->i_version = fattr->change_attr;
@@ -1323,38 +1335,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1323 invalid |= save_cache_validity; 1335 invalid |= save_cache_validity;
1324 1336
1325 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1337 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1326 /* NFSv2/v3: Check if the mtime agrees */ 1338 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1327 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1328 dprintk("NFS: mtime change on server for file %s/%ld\n",
1329 inode->i_sb->s_id, inode->i_ino);
1330 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1331 if (S_ISDIR(inode->i_mode))
1332 nfs_force_lookup_revalidate(inode);
1333 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1334 }
1335 } else if (server->caps & NFS_CAP_MTIME) 1339 } else if (server->caps & NFS_CAP_MTIME)
1336 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1340 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1337 | NFS_INO_INVALID_DATA
1338 | NFS_INO_REVAL_PAGECACHE
1339 | NFS_INO_REVAL_FORCED); 1341 | NFS_INO_REVAL_FORCED);
1340 1342
1341 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1343 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1342 /* If ctime has changed we should definitely clear access+acl caches */ 1344 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1343 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1344 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1345 /* and probably clear data for a directory too as utimes can cause
1346 * havoc with our cache.
1347 */
1348 if (S_ISDIR(inode->i_mode)) {
1349 invalid |= NFS_INO_INVALID_DATA;
1350 nfs_force_lookup_revalidate(inode);
1351 }
1352 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1353 }
1354 } else if (server->caps & NFS_CAP_CTIME) 1345 } else if (server->caps & NFS_CAP_CTIME)
1355 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1346 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1356 | NFS_INO_INVALID_ACCESS
1357 | NFS_INO_INVALID_ACL
1358 | NFS_INO_REVAL_FORCED); 1347 | NFS_INO_REVAL_FORCED);
1359 1348
1360 /* Check if our cached file size is stale */ 1349 /* Check if our cached file size is stale */
@@ -1466,12 +1455,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1466 nfsi->cache_validity |= invalid; 1455 nfsi->cache_validity |= invalid;
1467 1456
1468 return 0; 1457 return 0;
1469 out_changed:
1470 /*
1471 * Big trouble! The inode has become a different object.
1472 */
1473 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1474 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1475 out_err: 1458 out_err:
1476 /* 1459 /*
1477 * No need to worry about unhashing the dentry, as the 1460 * No need to worry about unhashing the dentry, as the
@@ -1480,13 +1463,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1480 */ 1463 */
1481 nfs_invalidate_inode(inode); 1464 nfs_invalidate_inode(inode);
1482 return -ESTALE; 1465 return -ESTALE;
1483
1484 out_fileid:
1485 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1486 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1487 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1488 (long long)nfsi->fileid, (long long)fattr->fileid);
1489 goto out_err;
1490} 1466}
1491 1467
1492 1468
@@ -1547,7 +1523,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1547 nfsi->delegation_state = 0; 1523 nfsi->delegation_state = 0;
1548 init_rwsem(&nfsi->rwsem); 1524 init_rwsem(&nfsi->rwsem);
1549 nfsi->layout = NULL; 1525 nfsi->layout = NULL;
1550 atomic_set(&nfsi->commits_outstanding, 0); 1526 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1551#endif 1527#endif
1552} 1528}
1553 1529
@@ -1559,9 +1535,9 @@ static void init_once(void *foo)
1559 INIT_LIST_HEAD(&nfsi->open_files); 1535 INIT_LIST_HEAD(&nfsi->open_files);
1560 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1536 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1561 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1537 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1562 INIT_LIST_HEAD(&nfsi->commit_list); 1538 INIT_LIST_HEAD(&nfsi->commit_info.list);
1563 nfsi->npages = 0; 1539 nfsi->npages = 0;
1564 nfsi->ncommit = 0; 1540 nfsi->commit_info.ncommit = 0;
1565 atomic_set(&nfsi->silly_count, 1); 1541 atomic_set(&nfsi->silly_count, 1);
1566 INIT_HLIST_HEAD(&nfsi->silly_list); 1542 INIT_HLIST_HEAD(&nfsi->silly_list);
1567 init_waitqueue_head(&nfsi->waitqueue); 1543 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..989959a59f07 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
103 unsigned int version; 103 unsigned int version;
104 unsigned int minorversion; 104 unsigned int minorversion;
105 char *fscache_uniq; 105 char *fscache_uniq;
106 bool need_mount;
106 107
107 struct { 108 struct {
108 struct sockaddr_storage address; 109 struct sockaddr_storage address;
@@ -171,7 +172,9 @@ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
171extern int nfs4_check_client_ready(struct nfs_client *clp); 172extern int nfs4_check_client_ready(struct nfs_client *clp);
172extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 173extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
173 const struct sockaddr *ds_addr, 174 const struct sockaddr *ds_addr,
174 int ds_addrlen, int ds_proto); 175 int ds_addrlen, int ds_proto,
176 unsigned int ds_timeo,
177 unsigned int ds_retrans);
175#ifdef CONFIG_PROC_FS 178#ifdef CONFIG_PROC_FS
176extern int __init nfs_fs_proc_init(void); 179extern int __init nfs_fs_proc_init(void);
177extern void nfs_fs_proc_exit(void); 180extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
185} 188}
186#endif 189#endif
187 190
188/* nfs4namespace.c */
189#ifdef CONFIG_NFS_V4
190extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
191#else
192static inline
193struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
194{
195 return ERR_PTR(-ENOENT);
196}
197#endif
198
199/* callback_xdr.c */ 191/* callback_xdr.c */
200extern struct svc_version nfs4_callback_version1; 192extern struct svc_version nfs4_callback_version1;
201extern struct svc_version nfs4_callback_version4; 193extern struct svc_version nfs4_callback_version4;
202 194
195struct nfs_pageio_descriptor;
203/* pagelist.c */ 196/* pagelist.c */
204extern int __init nfs_init_nfspagecache(void); 197extern int __init nfs_init_nfspagecache(void);
205extern void nfs_destroy_nfspagecache(void); 198extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
210 203
211extern int __init nfs_init_directcache(void); 204extern int __init nfs_init_directcache(void);
212extern void nfs_destroy_directcache(void); 205extern void nfs_destroy_directcache(void);
206extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
207extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
208 struct nfs_pgio_header *hdr,
209 void (*release)(struct nfs_pgio_header *hdr));
210void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
213 211
214/* nfs2xdr.c */ 212/* nfs2xdr.c */
215extern int nfs_stat_to_errno(enum nfs_stat);
216extern struct rpc_procinfo nfs_procedures[]; 213extern struct rpc_procinfo nfs_procedures[];
217extern int nfs2_decode_dirent(struct xdr_stream *, 214extern int nfs2_decode_dirent(struct xdr_stream *,
218 struct nfs_entry *, int); 215 struct nfs_entry *, int);
@@ -280,9 +277,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
280extern char *nfs_path(char **p, struct dentry *dentry, 277extern char *nfs_path(char **p, struct dentry *dentry,
281 char *buffer, ssize_t buflen); 278 char *buffer, ssize_t buflen);
282extern struct vfsmount *nfs_d_automount(struct path *path); 279extern struct vfsmount *nfs_d_automount(struct path *path);
283#ifdef CONFIG_NFS_V4 280struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
284rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 281 struct nfs_fh *, struct nfs_fattr *);
285#endif 282struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
283 struct nfs_fattr *, rpc_authflavor_t);
286 284
287/* getroot.c */ 285/* getroot.c */
288extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 286extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +292,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
294extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 292extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
295#endif 293#endif
296 294
297struct nfs_pageio_descriptor; 295struct nfs_pgio_completion_ops;
298/* read.c */ 296/* read.c */
299extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 297extern struct nfs_read_header *nfs_readhdr_alloc(void);
300 const struct rpc_call_ops *call_ops); 298extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
299extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
300 struct inode *inode,
301 const struct nfs_pgio_completion_ops *compl_ops);
302extern int nfs_initiate_read(struct rpc_clnt *clnt,
303 struct nfs_read_data *data,
304 const struct rpc_call_ops *call_ops, int flags);
301extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 305extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
302extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 306extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
303 struct list_head *head); 307 struct nfs_pgio_header *hdr);
304
305extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 308extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
306 struct inode *inode); 309 struct inode *inode,
310 const struct nfs_pgio_completion_ops *compl_ops);
307extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 311extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
308extern void nfs_readdata_release(struct nfs_read_data *rdata); 312extern void nfs_readdata_release(struct nfs_read_data *rdata);
309 313
310/* write.c */ 314/* write.c */
315extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
316 struct inode *inode, int ioflags,
317 const struct nfs_pgio_completion_ops *compl_ops);
318extern struct nfs_write_header *nfs_writehdr_alloc(void);
319extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
311extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 320extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
312 struct list_head *head); 321 struct nfs_pgio_header *hdr);
313extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 322extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
314 struct inode *inode, int ioflags); 323 struct inode *inode, int ioflags,
324 const struct nfs_pgio_completion_ops *compl_ops);
315extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 325extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
316extern void nfs_writedata_release(struct nfs_write_data *wdata); 326extern void nfs_writedata_release(struct nfs_write_data *wdata);
317extern void nfs_commit_free(struct nfs_write_data *p); 327extern void nfs_commit_free(struct nfs_commit_data *p);
318extern int nfs_initiate_write(struct nfs_write_data *data, 328extern int nfs_initiate_write(struct rpc_clnt *clnt,
319 struct rpc_clnt *clnt, 329 struct nfs_write_data *data,
320 const struct rpc_call_ops *call_ops, 330 const struct rpc_call_ops *call_ops,
321 int how); 331 int how, int flags);
322extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 332extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
323extern int nfs_initiate_commit(struct nfs_write_data *data, 333extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
324 struct rpc_clnt *clnt, 334extern int nfs_initiate_commit(struct rpc_clnt *clnt,
335 struct nfs_commit_data *data,
325 const struct rpc_call_ops *call_ops, 336 const struct rpc_call_ops *call_ops,
326 int how); 337 int how, int flags);
327extern void nfs_init_commit(struct nfs_write_data *data, 338extern void nfs_init_commit(struct nfs_commit_data *data,
328 struct list_head *head, 339 struct list_head *head,
329 struct pnfs_layout_segment *lseg); 340 struct pnfs_layout_segment *lseg,
341 struct nfs_commit_info *cinfo);
342int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
343 struct nfs_commit_info *cinfo, int max);
344int nfs_scan_commit(struct inode *inode, struct list_head *dst,
345 struct nfs_commit_info *cinfo);
346void nfs_mark_request_commit(struct nfs_page *req,
347 struct pnfs_layout_segment *lseg,
348 struct nfs_commit_info *cinfo);
349int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
350 int how, struct nfs_commit_info *cinfo);
330void nfs_retry_commit(struct list_head *page_list, 351void nfs_retry_commit(struct list_head *page_list,
331 struct pnfs_layout_segment *lseg); 352 struct pnfs_layout_segment *lseg,
332void nfs_commit_clear_lock(struct nfs_inode *nfsi); 353 struct nfs_commit_info *cinfo);
333void nfs_commitdata_release(void *data); 354void nfs_commitdata_release(struct nfs_commit_data *data);
334void nfs_commit_release_pages(struct nfs_write_data *data); 355void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); 356 struct nfs_commit_info *cinfo);
336void nfs_request_remove_commit_list(struct nfs_page *req); 357void nfs_request_remove_commit_list(struct nfs_page *req,
358 struct nfs_commit_info *cinfo);
359void nfs_init_cinfo(struct nfs_commit_info *cinfo,
360 struct inode *inode,
361 struct nfs_direct_req *dreq);
337 362
338#ifdef CONFIG_MIGRATION 363#ifdef CONFIG_MIGRATION
339extern int nfs_migrate_page(struct address_space *, 364extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +367,17 @@ extern int nfs_migrate_page(struct address_space *,
342#define nfs_migrate_page NULL 367#define nfs_migrate_page NULL
343#endif 368#endif
344 369
370/* direct.c */
371void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
372 struct nfs_direct_req *dreq);
373
345/* nfs4proc.c */ 374/* nfs4proc.c */
346extern void __nfs4_read_done_cb(struct nfs_read_data *); 375extern void __nfs4_read_done_cb(struct nfs_read_data *);
347extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
348extern int nfs4_init_client(struct nfs_client *clp, 376extern int nfs4_init_client(struct nfs_client *clp,
349 const struct rpc_timeout *timeparms, 377 const struct rpc_timeout *timeparms,
350 const char *ip_addr, 378 const char *ip_addr,
351 rpc_authflavor_t authflavour, 379 rpc_authflavor_t authflavour,
352 int noresvport); 380 int noresvport);
353extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
354extern int _nfs4_call_sync(struct rpc_clnt *clnt, 381extern int _nfs4_call_sync(struct rpc_clnt *clnt,
355 struct nfs_server *server, 382 struct nfs_server *server,
356 struct rpc_message *msg, 383 struct rpc_message *msg,
@@ -466,3 +493,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
466 PAGE_SIZE - 1) >> PAGE_SHIFT; 493 PAGE_SIZE - 1) >> PAGE_SHIFT;
467} 494}
468 495
496/*
497 * Convert a struct timespec into a 64-bit change attribute
498 *
499 * This does approximately the same thing as timespec_to_ns(),
500 * but for calculation efficiency, we multiply the seconds by
501 * 1024*1024*1024.
502 */
503static inline
504u64 nfs_timespec_to_change_attr(const struct timespec *ts)
505{
506 return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
507}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..08b9c93675da 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
27int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
28 28
29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
30 struct nfs_fh *fh,
31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
33
34/* 29/*
35 * nfs_path - reconstruct the path given an arbitrary dentry 30 * nfs_path - reconstruct the path given an arbitrary dentry
36 * @base - used to return pointer to the end of devname part of path 31 * @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
118 return ERR_PTR(-ENAMETOOLONG); 113 return ERR_PTR(-ENAMETOOLONG);
119} 114}
120 115
121#ifdef CONFIG_NFS_V4
122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
152 struct qstr *name,
153 struct nfs_fh *fh,
154 struct nfs_fattr *fattr)
155{
156 int err;
157
158 if (NFS_PROTO(dir)->version == 4)
159 return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
160
161 err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
162 if (err)
163 return ERR_PTR(err);
164 return rpc_clone_client(NFS_SERVER(dir)->client);
165}
166#else /* CONFIG_NFS_V4 */
167static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
168 struct qstr *name,
169 struct nfs_fh *fh,
170 struct nfs_fattr *fattr)
171{
172 int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
173 if (err)
174 return ERR_PTR(err);
175 return rpc_clone_client(NFS_SERVER(dir)->client);
176}
177#endif /* CONFIG_NFS_V4 */
178
179/* 116/*
180 * nfs_d_automount - Handle crossing a mountpoint on the server 117 * nfs_d_automount - Handle crossing a mountpoint on the server
181 * @path - The mountpoint 118 * @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
191struct vfsmount *nfs_d_automount(struct path *path) 128struct vfsmount *nfs_d_automount(struct path *path)
192{ 129{
193 struct vfsmount *mnt; 130 struct vfsmount *mnt;
194 struct dentry *parent; 131 struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
195 struct nfs_fh *fh = NULL; 132 struct nfs_fh *fh = NULL;
196 struct nfs_fattr *fattr = NULL; 133 struct nfs_fattr *fattr = NULL;
197 struct rpc_clnt *client;
198 134
199 dprintk("--> nfs_d_automount()\n"); 135 dprintk("--> nfs_d_automount()\n");
200 136
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
210 146
211 dprintk("%s: enter\n", __func__); 147 dprintk("%s: enter\n", __func__);
212 148
213 /* Look it up again to get its attributes */ 149 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
214 parent = dget_parent(path->dentry);
215 client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
216 dput(parent);
217 if (IS_ERR(client)) {
218 mnt = ERR_CAST(client);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
223 mnt = nfs_do_refmount(client, path->dentry);
224 else
225 mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
226 rpc_shutdown_client(client);
227
228 if (IS_ERR(mnt)) 150 if (IS_ERR(mnt))
229 goto out; 151 goto out;
230 152
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
297 * @authflavor - security flavor to use when performing the mount 219 * @authflavor - security flavor to use when performing the mount
298 * 220 *
299 */ 221 */
300static struct vfsmount *nfs_do_submount(struct dentry *dentry, 222struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
301 struct nfs_fh *fh, 223 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
302 struct nfs_fattr *fattr,
303 rpc_authflavor_t authflavor)
304{ 224{
305 struct nfs_clone_mount mountdata = { 225 struct nfs_clone_mount mountdata = {
306 .sb = dentry->d_sb, 226 .sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
333 dprintk("<-- nfs_do_submount() = %p\n", mnt); 253 dprintk("<-- nfs_do_submount() = %p\n", mnt);
334 return mnt; 254 return mnt;
335} 255}
256
257struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
258 struct nfs_fh *fh, struct nfs_fattr *fattr)
259{
260 int err;
261 struct dentry *parent = dget_parent(dentry);
262
263 /* Look it up again to get its attributes */
264 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
265 dput(parent);
266 if (err != 0)
267 return ERR_PTR(err);
268
269 return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
270}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000fabbd..baf759bccd05 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64static int nfs_stat_to_errno(enum nfs_stat);
64 65
65/* 66/*
66 * While encoding arguments, set up the reply buffer in advance to 67 * While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 p = xdr_decode_time(p, &fattr->atime); 314 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime); 315 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime); 316 xdr_decode_time(p, &fattr->ctime);
317 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
318
316 return 0; 319 return 0;
317out_overflow: 320out_overflow:
318 print_overflow_msg(__func__, xdr); 321 print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
1109 * Returns a local errno value, or -EIO if the NFS status code is 1112 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3. 1113 * not recognized. This function is used jointly by NFSv2 and NFSv3.
1111 */ 1114 */
1112int nfs_stat_to_errno(enum nfs_stat status) 1115static int nfs_stat_to_errno(enum nfs_stat status)
1113{ 1116{
1114 int i; 1117 int i;
1115 1118
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae6711a..48bcad294161 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
142} 142}
143 143
144static int 144static int
145nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 145nfs3_proc_lookup(struct inode *dir, struct qstr *name,
146 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 146 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
147{ 147{
148 struct nfs3_diropargs arg = { 148 struct nfs3_diropargs arg = {
@@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
811 811
812static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 812static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
813{ 813{
814 if (nfs3_async_handle_jukebox(task, data->inode)) 814 struct inode *inode = data->header->inode;
815
816 if (nfs3_async_handle_jukebox(task, inode))
815 return -EAGAIN; 817 return -EAGAIN;
816 818
817 nfs_invalidate_atime(data->inode); 819 nfs_invalidate_atime(inode);
818 nfs_refresh_inode(data->inode, &data->fattr); 820 nfs_refresh_inode(inode, &data->fattr);
819 return 0; 821 return 0;
820} 822}
821 823
@@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
831 833
832static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 834static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
833{ 835{
834 if (nfs3_async_handle_jukebox(task, data->inode)) 836 struct inode *inode = data->header->inode;
837
838 if (nfs3_async_handle_jukebox(task, inode))
835 return -EAGAIN; 839 return -EAGAIN;
836 if (task->tk_status >= 0) 840 if (task->tk_status >= 0)
837 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 841 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
838 return 0; 842 return 0;
839} 843}
840 844
@@ -848,7 +852,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
848 rpc_call_start(task); 852 rpc_call_start(task);
849} 853}
850 854
851static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 855static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
856{
857 rpc_call_start(task);
858}
859
860static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
852{ 861{
853 if (nfs3_async_handle_jukebox(task, data->inode)) 862 if (nfs3_async_handle_jukebox(task, data->inode))
854 return -EAGAIN; 863 return -EAGAIN;
@@ -856,7 +865,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
856 return 0; 865 return 0;
857} 866}
858 867
859static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 868static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
860{ 869{
861 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 870 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
862} 871}
@@ -876,6 +885,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
876 .file_inode_ops = &nfs3_file_inode_operations, 885 .file_inode_ops = &nfs3_file_inode_operations,
877 .file_ops = &nfs_file_operations, 886 .file_ops = &nfs_file_operations,
878 .getroot = nfs3_proc_get_root, 887 .getroot = nfs3_proc_get_root,
888 .submount = nfs_submount,
879 .getattr = nfs3_proc_getattr, 889 .getattr = nfs3_proc_getattr,
880 .setattr = nfs3_proc_setattr, 890 .setattr = nfs3_proc_setattr,
881 .lookup = nfs3_proc_lookup, 891 .lookup = nfs3_proc_lookup,
@@ -907,6 +917,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
907 .write_rpc_prepare = nfs3_proc_write_rpc_prepare, 917 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
908 .write_done = nfs3_write_done, 918 .write_done = nfs3_write_done,
909 .commit_setup = nfs3_proc_commit_setup, 919 .commit_setup = nfs3_proc_commit_setup,
920 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
910 .commit_done = nfs3_commit_done, 921 .commit_done = nfs3_commit_done,
911 .lock = nfs3_proc_lock, 922 .lock = nfs3_proc_lock,
912 .clear_acl_cache = nfs3_forget_cached_acls, 923 .clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a3ce55..902de489ec9b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) 86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) 87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
88 88
89static int nfs3_stat_to_errno(enum nfs_stat);
90
89/* 91/*
90 * Map file type to S_IFMT bits 92 * Map file type to S_IFMT bits
91 */ 93 */
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 p = xdr_decode_nfstime3(p, &fattr->atime); 677 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime); 678 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime); 679 xdr_decode_nfstime3(p, &fattr->ctime);
680 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
678 681
679 fattr->valid |= NFS_ATTR_FATTR_V3; 682 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0; 683 return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
725 goto out_overflow; 728 goto out_overflow;
726 729
727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 730 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
731 | NFS_ATTR_FATTR_PRECHANGE
728 | NFS_ATTR_FATTR_PREMTIME 732 | NFS_ATTR_FATTR_PREMTIME
729 | NFS_ATTR_FATTR_PRECTIME; 733 | NFS_ATTR_FATTR_PRECTIME;
730 734
731 p = xdr_decode_size3(p, &fattr->pre_size); 735 p = xdr_decode_size3(p, &fattr->pre_size);
732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime); 736 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
733 xdr_decode_nfstime3(p, &fattr->pre_ctime); 737 xdr_decode_nfstime3(p, &fattr->pre_ctime);
738 fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
734 739
735 return 0; 740 return 0;
736out_overflow: 741out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
1287 * }; 1292 * };
1288 */ 1293 */
1289static void encode_commit3args(struct xdr_stream *xdr, 1294static void encode_commit3args(struct xdr_stream *xdr,
1290 const struct nfs_writeargs *args) 1295 const struct nfs_commitargs *args)
1291{ 1296{
1292 __be32 *p; 1297 __be32 *p;
1293 1298
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
1300 1305
1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, 1306static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
1302 struct xdr_stream *xdr, 1307 struct xdr_stream *xdr,
1303 const struct nfs_writeargs *args) 1308 const struct nfs_commitargs *args)
1304{ 1309{
1305 encode_commit3args(xdr, args); 1310 encode_commit3args(xdr, args);
1306} 1311}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
1385out: 1390out:
1386 return error; 1391 return error;
1387out_default: 1392out_default:
1388 return nfs_stat_to_errno(status); 1393 return nfs3_stat_to_errno(status);
1389} 1394}
1390 1395
1391/* 1396/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
1424out: 1429out:
1425 return error; 1430 return error;
1426out_status: 1431out_status:
1427 return nfs_stat_to_errno(status); 1432 return nfs3_stat_to_errno(status);
1428} 1433}
1429 1434
1430/* 1435/*
@@ -1472,7 +1477,7 @@ out_default:
1472 error = decode_post_op_attr(xdr, result->dir_attr); 1477 error = decode_post_op_attr(xdr, result->dir_attr);
1473 if (unlikely(error)) 1478 if (unlikely(error))
1474 goto out; 1479 goto out;
1475 return nfs_stat_to_errno(status); 1480 return nfs3_stat_to_errno(status);
1476} 1481}
1477 1482
1478/* 1483/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
1513out: 1518out:
1514 return error; 1519 return error;
1515out_default: 1520out_default:
1516 return nfs_stat_to_errno(status); 1521 return nfs3_stat_to_errno(status);
1517} 1522}
1518 1523
1519/* 1524/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
1554out: 1559out:
1555 return error; 1560 return error;
1556out_default: 1561out_default:
1557 return nfs_stat_to_errno(status); 1562 return nfs3_stat_to_errno(status);
1558} 1563}
1559 1564
1560/* 1565/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1636out: 1641out:
1637 return error; 1642 return error;
1638out_status: 1643out_status:
1639 return nfs_stat_to_errno(status); 1644 return nfs3_stat_to_errno(status);
1640} 1645}
1641 1646
1642/* 1647/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1706out: 1711out:
1707 return error; 1712 return error;
1708out_status: 1713out_status:
1709 return nfs_stat_to_errno(status); 1714 return nfs3_stat_to_errno(status);
1710} 1715}
1711 1716
1712/* 1717/*
@@ -1770,7 +1775,7 @@ out_default:
1770 error = decode_wcc_data(xdr, result->dir_attr); 1775 error = decode_wcc_data(xdr, result->dir_attr);
1771 if (unlikely(error)) 1776 if (unlikely(error))
1772 goto out; 1777 goto out;
1773 return nfs_stat_to_errno(status); 1778 return nfs3_stat_to_errno(status);
1774} 1779}
1775 1780
1776/* 1781/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1809out: 1814out:
1810 return error; 1815 return error;
1811out_status: 1816out_status:
1812 return nfs_stat_to_errno(status); 1817 return nfs3_stat_to_errno(status);
1813} 1818}
1814 1819
1815/* 1820/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1853out: 1858out:
1854 return error; 1859 return error;
1855out_status: 1860out_status:
1856 return nfs_stat_to_errno(status); 1861 return nfs3_stat_to_errno(status);
1857} 1862}
1858 1863
1859/* 1864/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1896out: 1901out:
1897 return error; 1902 return error;
1898out_status: 1903out_status:
1899 return nfs_stat_to_errno(status); 1904 return nfs3_stat_to_errno(status);
1900} 1905}
1901 1906
1902/** 1907/**
@@ -2088,7 +2093,7 @@ out_default:
2088 error = decode_post_op_attr(xdr, result->dir_attr); 2093 error = decode_post_op_attr(xdr, result->dir_attr);
2089 if (unlikely(error)) 2094 if (unlikely(error))
2090 goto out; 2095 goto out;
2091 return nfs_stat_to_errno(status); 2096 return nfs3_stat_to_errno(status);
2092} 2097}
2093 2098
2094/* 2099/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2156out: 2161out:
2157 return error; 2162 return error;
2158out_status: 2163out_status:
2159 return nfs_stat_to_errno(status); 2164 return nfs3_stat_to_errno(status);
2160} 2165}
2161 2166
2162/* 2167/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2232out: 2237out:
2233 return error; 2238 return error;
2234out_status: 2239out_status:
2235 return nfs_stat_to_errno(status); 2240 return nfs3_stat_to_errno(status);
2236} 2241}
2237 2242
2238/* 2243/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2295out: 2300out:
2296 return error; 2301 return error;
2297out_status: 2302out_status:
2298 return nfs_stat_to_errno(status); 2303 return nfs3_stat_to_errno(status);
2299} 2304}
2300 2305
2301/* 2306/*
@@ -2319,7 +2324,7 @@ out_status:
2319 */ 2324 */
2320static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, 2325static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2321 struct xdr_stream *xdr, 2326 struct xdr_stream *xdr,
2322 struct nfs_writeres *result) 2327 struct nfs_commitres *result)
2323{ 2328{
2324 enum nfs_stat status; 2329 enum nfs_stat status;
2325 int error; 2330 int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2336out: 2341out:
2337 return error; 2342 return error;
2338out_status: 2343out_status:
2339 return nfs_stat_to_errno(status); 2344 return nfs3_stat_to_errno(status);
2340} 2345}
2341 2346
2342#ifdef CONFIG_NFS_V3_ACL 2347#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
2401out: 2406out:
2402 return error; 2407 return error;
2403out_default: 2408out_default:
2404 return nfs_stat_to_errno(status); 2409 return nfs3_stat_to_errno(status);
2405} 2410}
2406 2411
2407static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, 2412static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
2420out: 2425out:
2421 return error; 2426 return error;
2422out_default: 2427out_default:
2423 return nfs_stat_to_errno(status); 2428 return nfs3_stat_to_errno(status);
2424} 2429}
2425 2430
2426#endif /* CONFIG_NFS_V3_ACL */ 2431#endif /* CONFIG_NFS_V3_ACL */
2427 2432
2433
2434/*
2435 * We need to translate between nfs status return values and
2436 * the local errno values which may not be the same.
2437 */
2438static const struct {
2439 int stat;
2440 int errno;
2441} nfs_errtbl[] = {
2442 { NFS_OK, 0 },
2443 { NFSERR_PERM, -EPERM },
2444 { NFSERR_NOENT, -ENOENT },
2445 { NFSERR_IO, -errno_NFSERR_IO},
2446 { NFSERR_NXIO, -ENXIO },
2447/* { NFSERR_EAGAIN, -EAGAIN }, */
2448 { NFSERR_ACCES, -EACCES },
2449 { NFSERR_EXIST, -EEXIST },
2450 { NFSERR_XDEV, -EXDEV },
2451 { NFSERR_NODEV, -ENODEV },
2452 { NFSERR_NOTDIR, -ENOTDIR },
2453 { NFSERR_ISDIR, -EISDIR },
2454 { NFSERR_INVAL, -EINVAL },
2455 { NFSERR_FBIG, -EFBIG },
2456 { NFSERR_NOSPC, -ENOSPC },
2457 { NFSERR_ROFS, -EROFS },
2458 { NFSERR_MLINK, -EMLINK },
2459 { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
2460 { NFSERR_NOTEMPTY, -ENOTEMPTY },
2461 { NFSERR_DQUOT, -EDQUOT },
2462 { NFSERR_STALE, -ESTALE },
2463 { NFSERR_REMOTE, -EREMOTE },
2464#ifdef EWFLUSH
2465 { NFSERR_WFLUSH, -EWFLUSH },
2466#endif
2467 { NFSERR_BADHANDLE, -EBADHANDLE },
2468 { NFSERR_NOT_SYNC, -ENOTSYNC },
2469 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
2470 { NFSERR_NOTSUPP, -ENOTSUPP },
2471 { NFSERR_TOOSMALL, -ETOOSMALL },
2472 { NFSERR_SERVERFAULT, -EREMOTEIO },
2473 { NFSERR_BADTYPE, -EBADTYPE },
2474 { NFSERR_JUKEBOX, -EJUKEBOX },
2475 { -1, -EIO }
2476};
2477
2478/**
2479 * nfs3_stat_to_errno - convert an NFS status code to a local errno
2480 * @status: NFS status code to convert
2481 *
2482 * Returns a local errno value, or -EIO if the NFS status code is
2483 * not recognized. This function is used jointly by NFSv2 and NFSv3.
2484 */
2485static int nfs3_stat_to_errno(enum nfs_stat status)
2486{
2487 int i;
2488
2489 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
2490 if (nfs_errtbl[i].stat == (int)status)
2491 return nfs_errtbl[i].errno;
2492 }
2493 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
2494 return nfs_errtbl[i].errno;
2495}
2496
2497
2428#define PROC(proc, argtype, restype, timer) \ 2498#define PROC(proc, argtype, restype, timer) \
2429[NFS3PROC_##proc] = { \ 2499[NFS3PROC_##proc] = { \
2430 .p_proc = NFS3PROC_##proc, \ 2500 .p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021020b3..edeef71f957a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,11 +206,15 @@ extern const struct dentry_operations nfs4_dentry_operations;
206extern const struct inode_operations nfs4_dir_inode_operations; 206extern const struct inode_operations nfs4_dir_inode_operations;
207 207
208/* nfs4namespace.c */ 208/* nfs4namespace.c */
209rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
209struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 210struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
211struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
212 struct nfs_fh *, struct nfs_fattr *);
210 213
211/* nfs4proc.c */ 214/* nfs4proc.c */
212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 215extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 216extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
217extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
214extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 218extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
215extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 219extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 220extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9ea8a31..474c6305afd9 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,84 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
82 BUG(); 82 BUG();
83} 83}
84 84
85static void filelayout_reset_write(struct nfs_write_data *data)
86{
87 struct nfs_pgio_header *hdr = data->header;
88 struct inode *inode = hdr->inode;
89 struct rpc_task *task = &data->task;
90
91 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
92 dprintk("%s Reset task %5u for i/o through MDS "
93 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
94 data->task.tk_pid,
95 inode->i_sb->s_id,
96 (long long)NFS_FILEID(inode),
97 data->args.count,
98 (unsigned long long)data->args.offset);
99
100 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
101 &hdr->pages,
102 hdr->completion_ops);
103 }
104 /* balance nfs_get_client in filelayout_write_pagelist */
105 nfs_put_client(data->ds_clp);
106 data->ds_clp = NULL;
107}
108
109static void filelayout_reset_read(struct nfs_read_data *data)
110{
111 struct nfs_pgio_header *hdr = data->header;
112 struct inode *inode = hdr->inode;
113 struct rpc_task *task = &data->task;
114
115 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
116 dprintk("%s Reset task %5u for i/o through MDS "
117 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
118 data->task.tk_pid,
119 inode->i_sb->s_id,
120 (long long)NFS_FILEID(inode),
121 data->args.count,
122 (unsigned long long)data->args.offset);
123
124 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
125 &hdr->pages,
126 hdr->completion_ops);
127 }
128 /* balance nfs_get_client in filelayout_read_pagelist */
129 nfs_put_client(data->ds_clp);
130 data->ds_clp = NULL;
131}
132
85static int filelayout_async_handle_error(struct rpc_task *task, 133static int filelayout_async_handle_error(struct rpc_task *task,
86 struct nfs4_state *state, 134 struct nfs4_state *state,
87 struct nfs_client *clp, 135 struct nfs_client *clp,
88 int *reset) 136 struct pnfs_layout_segment *lseg)
89{ 137{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode); 138 struct inode *inode = lseg->pls_layout->plh_inode;
139 struct nfs_server *mds_server = NFS_SERVER(inode);
140 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
91 struct nfs_client *mds_client = mds_server->nfs_client; 141 struct nfs_client *mds_client = mds_server->nfs_client;
142 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
92 143
93 if (task->tk_status >= 0) 144 if (task->tk_status >= 0)
94 return 0; 145 return 0;
95 *reset = 0;
96 146
97 switch (task->tk_status) { 147 switch (task->tk_status) {
98 /* MDS state errors */ 148 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED: 149 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED: 150 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID: 151 case -NFS4ERR_BAD_STATEID:
152 if (state == NULL)
153 break;
102 nfs_remove_bad_delegation(state->inode); 154 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE: 155 case -NFS4ERR_OPENMODE:
156 if (state == NULL)
157 break;
104 nfs4_schedule_stateid_recovery(mds_server, state); 158 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery; 159 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED: 160 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state); 161 if (state != NULL)
162 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client); 163 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery; 164 goto wait_on_recovery;
110 /* DS session errors */ 165 /* DS session errors */
@@ -127,11 +182,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
127 break; 182 break;
128 case -NFS4ERR_RETRY_UNCACHED_REP: 183 case -NFS4ERR_RETRY_UNCACHED_REP:
129 break; 184 break;
185 /* Invalidate Layout errors */
186 case -NFS4ERR_PNFS_NO_LAYOUT:
187 case -ESTALE: /* mapped NFS4ERR_STALE */
188 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
189 case -EISDIR: /* mapped NFS4ERR_ISDIR */
190 case -NFS4ERR_FHEXPIRED:
191 case -NFS4ERR_WRONG_TYPE:
192 dprintk("%s Invalid layout error %d\n", __func__,
193 task->tk_status);
194 /*
195 * Destroy layout so new i/o will get a new layout.
196 * Layout will not be destroyed until all current lseg
197 * references are put. Mark layout as invalid to resend failed
198 * i/o and all i/o waiting on the slot table to the MDS until
199 * layout is destroyed and a new valid layout is obtained.
200 */
201 set_bit(NFS_LAYOUT_INVALID,
202 &NFS_I(state->inode)->layout->plh_flags);
203 pnfs_destroy_layout(NFS_I(state->inode));
204 rpc_wake_up(&tbl->slot_tbl_waitq);
205 goto reset;
206 /* RPC connection errors */
207 case -ECONNREFUSED:
208 case -EHOSTDOWN:
209 case -EHOSTUNREACH:
210 case -ENETUNREACH:
211 case -EIO:
212 case -ETIMEDOUT:
213 case -EPIPE:
214 dprintk("%s DS connection error %d\n", __func__,
215 task->tk_status);
216 if (!filelayout_test_devid_invalid(devid))
217 _pnfs_return_layout(state->inode);
218 filelayout_mark_devid_invalid(devid);
219 rpc_wake_up(&tbl->slot_tbl_waitq);
220 nfs4_ds_disconnect(clp);
221 /* fall through */
130 default: 222 default:
131 dprintk("%s DS error. Retry through MDS %d\n", __func__, 223reset:
224 dprintk("%s Retry through MDS. Error %d\n", __func__,
132 task->tk_status); 225 task->tk_status);
133 *reset = 1; 226 return -NFS4ERR_RESET_TO_MDS;
134 break;
135 } 227 }
136out: 228out:
137 task->tk_status = 0; 229 task->tk_status = 0;
@@ -148,18 +240,17 @@ wait_on_recovery:
148static int filelayout_read_done_cb(struct rpc_task *task, 240static int filelayout_read_done_cb(struct rpc_task *task,
149 struct nfs_read_data *data) 241 struct nfs_read_data *data)
150{ 242{
151 int reset = 0; 243 struct nfs_pgio_header *hdr = data->header;
244 int err;
152 245
153 dprintk("%s DS read\n", __func__); 246 err = filelayout_async_handle_error(task, data->args.context->state,
247 data->ds_clp, hdr->lseg);
154 248
155 if (filelayout_async_handle_error(task, data->args.context->state, 249 switch (err) {
156 data->ds_clp, &reset) == -EAGAIN) { 250 case -NFS4ERR_RESET_TO_MDS:
157 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 251 filelayout_reset_read(data);
158 __func__, data->ds_clp, data->ds_clp->cl_session); 252 return task->tk_status;
159 if (reset) { 253 case -EAGAIN:
160 pnfs_set_lo_fail(data->lseg);
161 nfs4_reset_read(task, data);
162 }
163 rpc_restart_call_prepare(task); 254 rpc_restart_call_prepare(task);
164 return -EAGAIN; 255 return -EAGAIN;
165 } 256 }
@@ -175,13 +266,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
175static void 266static void
176filelayout_set_layoutcommit(struct nfs_write_data *wdata) 267filelayout_set_layoutcommit(struct nfs_write_data *wdata)
177{ 268{
178 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || 269 struct nfs_pgio_header *hdr = wdata->header;
270
271 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
179 wdata->res.verf->committed == NFS_FILE_SYNC) 272 wdata->res.verf->committed == NFS_FILE_SYNC)
180 return; 273 return;
181 274
182 pnfs_set_layoutcommit(wdata); 275 pnfs_set_layoutcommit(wdata);
183 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 276 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
184 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); 277 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
185} 278}
186 279
187/* 280/*
@@ -191,8 +284,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
191 */ 284 */
192static void filelayout_read_prepare(struct rpc_task *task, void *data) 285static void filelayout_read_prepare(struct rpc_task *task, void *data)
193{ 286{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 287 struct nfs_read_data *rdata = data;
195 288
289 if (filelayout_reset_to_mds(rdata->header->lseg)) {
290 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
291 filelayout_reset_read(rdata);
292 rpc_exit(task, 0);
293 return;
294 }
196 rdata->read_done_cb = filelayout_read_done_cb; 295 rdata->read_done_cb = filelayout_read_done_cb;
197 296
198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 297 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +304,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
205 304
206static void filelayout_read_call_done(struct rpc_task *task, void *data) 305static void filelayout_read_call_done(struct rpc_task *task, void *data)
207{ 306{
208 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 307 struct nfs_read_data *rdata = data;
209 308
210 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 309 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
211 310
311 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags))
312 return;
313
212 /* Note this may cause RPC to be resent */ 314 /* Note this may cause RPC to be resent */
213 rdata->mds_ops->rpc_call_done(task, data); 315 rdata->header->mds_ops->rpc_call_done(task, data);
214} 316}
215 317
216static void filelayout_read_count_stats(struct rpc_task *task, void *data) 318static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{ 319{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 320 struct nfs_read_data *rdata = data;
219 321
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); 322 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
221} 323}
222 324
223static void filelayout_read_release(void *data) 325static void filelayout_read_release(void *data)
224{ 326{
225 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 327 struct nfs_read_data *rdata = data;
226 328
227 put_lseg(rdata->lseg); 329 if (!test_bit(NFS_IOHDR_REDO, &rdata->header->flags))
228 rdata->mds_ops->rpc_release(data); 330 nfs_put_client(rdata->ds_clp);
331 rdata->header->mds_ops->rpc_release(data);
229} 332}
230 333
231static int filelayout_write_done_cb(struct rpc_task *task, 334static int filelayout_write_done_cb(struct rpc_task *task,
232 struct nfs_write_data *data) 335 struct nfs_write_data *data)
233{ 336{
234 int reset = 0; 337 struct nfs_pgio_header *hdr = data->header;
235 338 int err;
236 if (filelayout_async_handle_error(task, data->args.context->state, 339
237 data->ds_clp, &reset) == -EAGAIN) { 340 err = filelayout_async_handle_error(task, data->args.context->state,
238 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 341 data->ds_clp, hdr->lseg);
239 __func__, data->ds_clp, data->ds_clp->cl_session); 342
240 if (reset) { 343 switch (err) {
241 pnfs_set_lo_fail(data->lseg); 344 case -NFS4ERR_RESET_TO_MDS:
242 nfs4_reset_write(task, data); 345 filelayout_reset_write(data);
243 } 346 return task->tk_status;
347 case -EAGAIN:
244 rpc_restart_call_prepare(task); 348 rpc_restart_call_prepare(task);
245 return -EAGAIN; 349 return -EAGAIN;
246 } 350 }
@@ -250,7 +354,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
250} 354}
251 355
252/* Fake up some data that will cause nfs_commit_release to retry the writes. */ 356/* Fake up some data that will cause nfs_commit_release to retry the writes. */
253static void prepare_to_resend_writes(struct nfs_write_data *data) 357static void prepare_to_resend_writes(struct nfs_commit_data *data)
254{ 358{
255 struct nfs_page *first = nfs_list_entry(data->pages.next); 359 struct nfs_page *first = nfs_list_entry(data->pages.next);
256 360
@@ -261,19 +365,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
261} 365}
262 366
263static int filelayout_commit_done_cb(struct rpc_task *task, 367static int filelayout_commit_done_cb(struct rpc_task *task,
264 struct nfs_write_data *data) 368 struct nfs_commit_data *data)
265{ 369{
266 int reset = 0; 370 int err;
267 371
268 if (filelayout_async_handle_error(task, data->args.context->state, 372 err = filelayout_async_handle_error(task, NULL, data->ds_clp,
269 data->ds_clp, &reset) == -EAGAIN) { 373 data->lseg);
270 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 374
271 __func__, data->ds_clp, data->ds_clp->cl_session); 375 switch (err) {
272 if (reset) { 376 case -NFS4ERR_RESET_TO_MDS:
273 prepare_to_resend_writes(data); 377 prepare_to_resend_writes(data);
274 pnfs_set_lo_fail(data->lseg); 378 return -EAGAIN;
275 } else 379 case -EAGAIN:
276 rpc_restart_call_prepare(task); 380 rpc_restart_call_prepare(task);
277 return -EAGAIN; 381 return -EAGAIN;
278 } 382 }
279 383
@@ -282,8 +386,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
282 386
283static void filelayout_write_prepare(struct rpc_task *task, void *data) 387static void filelayout_write_prepare(struct rpc_task *task, void *data)
284{ 388{
285 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 389 struct nfs_write_data *wdata = data;
286 390
391 if (filelayout_reset_to_mds(wdata->header->lseg)) {
392 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
393 filelayout_reset_write(wdata);
394 rpc_exit(task, 0);
395 return;
396 }
287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 397 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
288 &wdata->args.seq_args, &wdata->res.seq_res, 398 &wdata->args.seq_args, &wdata->res.seq_res,
289 task)) 399 task))
@@ -294,36 +404,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
294 404
295static void filelayout_write_call_done(struct rpc_task *task, void *data) 405static void filelayout_write_call_done(struct rpc_task *task, void *data)
296{ 406{
297 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 407 struct nfs_write_data *wdata = data;
408
409 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags))
410 return;
298 411
299 /* Note this may cause RPC to be resent */ 412 /* Note this may cause RPC to be resent */
300 wdata->mds_ops->rpc_call_done(task, data); 413 wdata->header->mds_ops->rpc_call_done(task, data);
301} 414}
302 415
303static void filelayout_write_count_stats(struct rpc_task *task, void *data) 416static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{ 417{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 418 struct nfs_write_data *wdata = data;
306 419
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); 420 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
308} 421}
309 422
310static void filelayout_write_release(void *data) 423static void filelayout_write_release(void *data)
311{ 424{
312 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 425 struct nfs_write_data *wdata = data;
426
427 if (!test_bit(NFS_IOHDR_REDO, &wdata->header->flags))
428 nfs_put_client(wdata->ds_clp);
429 wdata->header->mds_ops->rpc_release(data);
430}
431
432static void filelayout_commit_prepare(struct rpc_task *task, void *data)
433{
434 struct nfs_commit_data *wdata = data;
313 435
314 put_lseg(wdata->lseg); 436 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
315 wdata->mds_ops->rpc_release(data); 437 &wdata->args.seq_args, &wdata->res.seq_res,
438 task))
439 return;
440
441 rpc_call_start(task);
442}
443
444static void filelayout_write_commit_done(struct rpc_task *task, void *data)
445{
446 struct nfs_commit_data *wdata = data;
447
448 /* Note this may cause RPC to be resent */
449 wdata->mds_ops->rpc_call_done(task, data);
450}
451
452static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
453{
454 struct nfs_commit_data *cdata = data;
455
456 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
316} 457}
317 458
318static void filelayout_commit_release(void *data) 459static void filelayout_commit_release(void *calldata)
319{ 460{
320 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 461 struct nfs_commit_data *data = calldata;
321 462
322 nfs_commit_release_pages(wdata); 463 data->completion_ops->completion(data);
323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 464 put_lseg(data->lseg);
324 nfs_commit_clear_lock(NFS_I(wdata->inode)); 465 nfs_put_client(data->ds_clp);
325 put_lseg(wdata->lseg); 466 nfs_commitdata_release(data);
326 nfs_commitdata_release(wdata);
327} 467}
328 468
329static const struct rpc_call_ops filelayout_read_call_ops = { 469static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +481,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
341}; 481};
342 482
343static const struct rpc_call_ops filelayout_commit_call_ops = { 483static const struct rpc_call_ops filelayout_commit_call_ops = {
344 .rpc_call_prepare = filelayout_write_prepare, 484 .rpc_call_prepare = filelayout_commit_prepare,
345 .rpc_call_done = filelayout_write_call_done, 485 .rpc_call_done = filelayout_write_commit_done,
346 .rpc_count_stats = filelayout_write_count_stats, 486 .rpc_count_stats = filelayout_commit_count_stats,
347 .rpc_release = filelayout_commit_release, 487 .rpc_release = filelayout_commit_release,
348}; 488};
349 489
350static enum pnfs_try_status 490static enum pnfs_try_status
351filelayout_read_pagelist(struct nfs_read_data *data) 491filelayout_read_pagelist(struct nfs_read_data *data)
352{ 492{
353 struct pnfs_layout_segment *lseg = data->lseg; 493 struct nfs_pgio_header *hdr = data->header;
494 struct pnfs_layout_segment *lseg = hdr->lseg;
354 struct nfs4_pnfs_ds *ds; 495 struct nfs4_pnfs_ds *ds;
355 loff_t offset = data->args.offset; 496 loff_t offset = data->args.offset;
356 u32 j, idx; 497 u32 j, idx;
@@ -358,25 +499,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
358 int status; 499 int status;
359 500
360 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 501 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
361 __func__, data->inode->i_ino, 502 __func__, hdr->inode->i_ino,
362 data->args.pgbase, (size_t)data->args.count, offset); 503 data->args.pgbase, (size_t)data->args.count, offset);
363 504
364 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
365 return PNFS_NOT_ATTEMPTED;
366
367 /* Retrieve the correct rpc_client for the byte range */ 505 /* Retrieve the correct rpc_client for the byte range */
368 j = nfs4_fl_calc_j_index(lseg, offset); 506 j = nfs4_fl_calc_j_index(lseg, offset);
369 idx = nfs4_fl_calc_ds_index(lseg, j); 507 idx = nfs4_fl_calc_ds_index(lseg, j);
370 ds = nfs4_fl_prepare_ds(lseg, idx); 508 ds = nfs4_fl_prepare_ds(lseg, idx);
371 if (!ds) { 509 if (!ds)
372 /* Either layout fh index faulty, or ds connect failed */
373 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
374 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
375 return PNFS_NOT_ATTEMPTED; 510 return PNFS_NOT_ATTEMPTED;
376 } 511 dprintk("%s USE DS: %s cl_count %d\n", __func__,
377 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 512 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
378 513
379 /* No multipath support. Use first DS */ 514 /* No multipath support. Use first DS */
515 atomic_inc(&ds->ds_clp->cl_count);
380 data->ds_clp = ds->ds_clp; 516 data->ds_clp = ds->ds_clp;
381 fh = nfs4_fl_select_ds_fh(lseg, j); 517 fh = nfs4_fl_select_ds_fh(lseg, j);
382 if (fh) 518 if (fh)
@@ -386,8 +522,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
386 data->mds_offset = offset; 522 data->mds_offset = offset;
387 523
388 /* Perform an asynchronous read to ds */ 524 /* Perform an asynchronous read to ds */
389 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, 525 status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
390 &filelayout_read_call_ops); 526 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
391 BUG_ON(status != 0); 527 BUG_ON(status != 0);
392 return PNFS_ATTEMPTED; 528 return PNFS_ATTEMPTED;
393} 529}
@@ -396,32 +532,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
396static enum pnfs_try_status 532static enum pnfs_try_status
397filelayout_write_pagelist(struct nfs_write_data *data, int sync) 533filelayout_write_pagelist(struct nfs_write_data *data, int sync)
398{ 534{
399 struct pnfs_layout_segment *lseg = data->lseg; 535 struct nfs_pgio_header *hdr = data->header;
536 struct pnfs_layout_segment *lseg = hdr->lseg;
400 struct nfs4_pnfs_ds *ds; 537 struct nfs4_pnfs_ds *ds;
401 loff_t offset = data->args.offset; 538 loff_t offset = data->args.offset;
402 u32 j, idx; 539 u32 j, idx;
403 struct nfs_fh *fh; 540 struct nfs_fh *fh;
404 int status; 541 int status;
405 542
406 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
407 return PNFS_NOT_ATTEMPTED;
408
409 /* Retrieve the correct rpc_client for the byte range */ 543 /* Retrieve the correct rpc_client for the byte range */
410 j = nfs4_fl_calc_j_index(lseg, offset); 544 j = nfs4_fl_calc_j_index(lseg, offset);
411 idx = nfs4_fl_calc_ds_index(lseg, j); 545 idx = nfs4_fl_calc_ds_index(lseg, j);
412 ds = nfs4_fl_prepare_ds(lseg, idx); 546 ds = nfs4_fl_prepare_ds(lseg, idx);
413 if (!ds) { 547 if (!ds)
414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
418 return PNFS_NOT_ATTEMPTED; 548 return PNFS_NOT_ATTEMPTED;
419 } 549 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
420 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 550 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
421 data->inode->i_ino, sync, (size_t) data->args.count, offset, 551 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
422 ds->ds_remotestr);
423 552
424 data->write_done_cb = filelayout_write_done_cb; 553 data->write_done_cb = filelayout_write_done_cb;
554 atomic_inc(&ds->ds_clp->cl_count);
425 data->ds_clp = ds->ds_clp; 555 data->ds_clp = ds->ds_clp;
426 fh = nfs4_fl_select_ds_fh(lseg, j); 556 fh = nfs4_fl_select_ds_fh(lseg, j);
427 if (fh) 557 if (fh)
@@ -433,8 +563,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
433 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 563 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
434 564
435 /* Perform an asynchronous write */ 565 /* Perform an asynchronous write */
436 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, 566 status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
437 &filelayout_write_call_ops, sync); 567 &filelayout_write_call_ops, sync,
568 RPC_TASK_SOFTCONN);
438 BUG_ON(status != 0); 569 BUG_ON(status != 0);
439 return PNFS_ATTEMPTED; 570 return PNFS_ATTEMPTED;
440} 571}
@@ -650,10 +781,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
650 781
651 dprintk("--> %s\n", __func__); 782 dprintk("--> %s\n", __func__);
652 nfs4_fl_put_deviceid(fl->dsaddr); 783 nfs4_fl_put_deviceid(fl->dsaddr);
653 kfree(fl->commit_buckets); 784 /* This assumes a single RW lseg */
785 if (lseg->pls_range.iomode == IOMODE_RW) {
786 struct nfs4_filelayout *flo;
787
788 flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
789 flo->commit_info.nbuckets = 0;
790 kfree(flo->commit_info.buckets);
791 flo->commit_info.buckets = NULL;
792 }
654 _filelayout_free_lseg(fl); 793 _filelayout_free_lseg(fl);
655} 794}
656 795
796static int
797filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
798 struct nfs_commit_info *cinfo,
799 gfp_t gfp_flags)
800{
801 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
802 struct pnfs_commit_bucket *buckets;
803 int size;
804
805 if (fl->commit_through_mds)
806 return 0;
807 if (cinfo->ds->nbuckets != 0) {
808 /* This assumes there is only one IOMODE_RW lseg. What
809 * we really want to do is have a layout_hdr level
810 * dictionary of <multipath_list4, fh> keys, each
811 * associated with a struct list_head, populated by calls
812 * to filelayout_write_pagelist().
813 * */
814 return 0;
815 }
816
817 size = (fl->stripe_type == STRIPE_SPARSE) ?
818 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
819
820 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
821 gfp_flags);
822 if (!buckets)
823 return -ENOMEM;
824 else {
825 int i;
826
827 spin_lock(cinfo->lock);
828 if (cinfo->ds->nbuckets != 0)
829 kfree(buckets);
830 else {
831 cinfo->ds->buckets = buckets;
832 cinfo->ds->nbuckets = size;
833 for (i = 0; i < size; i++) {
834 INIT_LIST_HEAD(&buckets[i].written);
835 INIT_LIST_HEAD(&buckets[i].committing);
836 }
837 }
838 spin_unlock(cinfo->lock);
839 return 0;
840 }
841}
842
657static struct pnfs_layout_segment * 843static struct pnfs_layout_segment *
658filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 844filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 struct nfs4_layoutget_res *lgr, 845 struct nfs4_layoutget_res *lgr,
@@ -673,29 +859,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
673 _filelayout_free_lseg(fl); 859 _filelayout_free_lseg(fl);
674 return NULL; 860 return NULL;
675 } 861 }
676
677 /* This assumes there is only one IOMODE_RW lseg. What
678 * we really want to do is have a layout_hdr level
679 * dictionary of <multipath_list4, fh> keys, each
680 * associated with a struct list_head, populated by calls
681 * to filelayout_write_pagelist().
682 * */
683 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
684 int i;
685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
687
688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
689 if (!fl->commit_buckets) {
690 filelayout_free_lseg(&fl->generic_hdr);
691 return NULL;
692 }
693 fl->number_of_buckets = size;
694 for (i = 0; i < size; i++) {
695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
698 }
699 return &fl->generic_hdr; 862 return &fl->generic_hdr;
700} 863}
701 864
@@ -716,8 +879,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
716 !nfs_generic_pg_test(pgio, prev, req)) 879 !nfs_generic_pg_test(pgio, prev, req))
717 return false; 880 return false;
718 881
719 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 882 p_stripe = (u64)req_offset(prev);
720 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 883 r_stripe = (u64)req_offset(req);
721 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 884 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
722 885
723 do_div(p_stripe, stripe_unit); 886 do_div(p_stripe, stripe_unit);
@@ -732,6 +895,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
732{ 895{
733 BUG_ON(pgio->pg_lseg != NULL); 896 BUG_ON(pgio->pg_lseg != NULL);
734 897
898 if (req->wb_offset != req->wb_pgbase) {
899 /*
900 * Handling unaligned pages is difficult, because have to
901 * somehow split a req in two in certain cases in the
902 * pg.test code. Avoid this by just not using pnfs
903 * in this case.
904 */
905 nfs_pageio_reset_read_mds(pgio);
906 return;
907 }
735 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 908 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
736 req->wb_context, 909 req->wb_context,
737 0, 910 0,
@@ -747,8 +920,13 @@ static void
747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 920filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
748 struct nfs_page *req) 921 struct nfs_page *req)
749{ 922{
923 struct nfs_commit_info cinfo;
924 int status;
925
750 BUG_ON(pgio->pg_lseg != NULL); 926 BUG_ON(pgio->pg_lseg != NULL);
751 927
928 if (req->wb_offset != req->wb_pgbase)
929 goto out_mds;
752 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 930 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
753 req->wb_context, 931 req->wb_context,
754 0, 932 0,
@@ -757,7 +935,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
757 GFP_NOFS); 935 GFP_NOFS);
758 /* If no lseg, fall back to write through mds */ 936 /* If no lseg, fall back to write through mds */
759 if (pgio->pg_lseg == NULL) 937 if (pgio->pg_lseg == NULL)
760 nfs_pageio_reset_write_mds(pgio); 938 goto out_mds;
939 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
940 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
941 if (status < 0) {
942 put_lseg(pgio->pg_lseg);
943 pgio->pg_lseg = NULL;
944 goto out_mds;
945 }
946 return;
947out_mds:
948 nfs_pageio_reset_write_mds(pgio);
761} 949}
762 950
763static const struct nfs_pageio_ops filelayout_pg_read_ops = { 951static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +972,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
784 * If this will make the bucket empty, it will need to put the lseg reference. 972 * If this will make the bucket empty, it will need to put the lseg reference.
785 */ 973 */
786static void 974static void
787filelayout_clear_request_commit(struct nfs_page *req) 975filelayout_clear_request_commit(struct nfs_page *req,
976 struct nfs_commit_info *cinfo)
788{ 977{
789 struct pnfs_layout_segment *freeme = NULL; 978 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791 979
792 spin_lock(&inode->i_lock); 980 spin_lock(cinfo->lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 981 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out; 982 goto out;
983 cinfo->ds->nwritten--;
795 if (list_is_singular(&req->wb_list)) { 984 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg; 985 struct pnfs_commit_bucket *bucket;
797 986
798 /* From here we can find the bucket, but for the moment, 987 bucket = list_first_entry(&req->wb_list,
799 * since there is only one relevant lseg... 988 struct pnfs_commit_bucket,
800 */ 989 written);
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 990 freeme = bucket->wlseg;
802 if (lseg->pls_range.iomode == IOMODE_RW) { 991 bucket->wlseg = NULL;
803 freeme = lseg;
804 break;
805 }
806 }
807 } 992 }
808out: 993out:
809 nfs_request_remove_commit_list(req); 994 nfs_request_remove_commit_list(req, cinfo);
810 spin_unlock(&inode->i_lock); 995 spin_unlock(cinfo->lock);
811 put_lseg(freeme); 996 put_lseg(freeme);
812} 997}
813 998
814static struct list_head * 999static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req, 1000filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg) 1001 struct pnfs_layout_segment *lseg,
1002 struct nfs_commit_info *cinfo)
817{ 1003{
818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 1004 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
819 u32 i, j; 1005 u32 i, j;
820 struct list_head *list; 1006 struct list_head *list;
1007 struct pnfs_commit_bucket *buckets;
821 1008
822 if (fl->commit_through_mds) 1009 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; 1010 return &cinfo->mds->list;
824 1011
825 /* Note that we are calling nfs4_fl_calc_j_index on each page 1012 /* Note that we are calling nfs4_fl_calc_j_index on each page
826 * that ends up being committed to a data server. An attractive 1013 * that ends up being committed to a data server. An attractive
@@ -828,31 +1015,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
828 * to store the value calculated in filelayout_write_pagelist 1015 * to store the value calculated in filelayout_write_pagelist
829 * and just use that here. 1016 * and just use that here.
830 */ 1017 */
831 j = nfs4_fl_calc_j_index(lseg, 1018 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
833 i = select_bucket_index(fl, j); 1019 i = select_bucket_index(fl, j);
834 list = &fl->commit_buckets[i].written; 1020 buckets = cinfo->ds->buckets;
1021 list = &buckets[i].written;
835 if (list_empty(list)) { 1022 if (list_empty(list)) {
836 /* Non-empty buckets hold a reference on the lseg. That ref 1023 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released 1024 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled 1025 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in 1026 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req 1027 * filelayout_clear_request_commit
841 */ 1028 */
842 get_lseg(lseg); 1029 buckets[i].wlseg = get_lseg(lseg);
843 } 1030 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1031 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1032 cinfo->ds->nwritten++;
845 return list; 1033 return list;
846} 1034}
847 1035
848static void 1036static void
849filelayout_mark_request_commit(struct nfs_page *req, 1037filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg) 1038 struct pnfs_layout_segment *lseg,
1039 struct nfs_commit_info *cinfo)
851{ 1040{
852 struct list_head *list; 1041 struct list_head *list;
853 1042
854 list = filelayout_choose_commit_list(req, lseg); 1043 list = filelayout_choose_commit_list(req, lseg, cinfo);
855 nfs_request_add_commit_list(req, list); 1044 nfs_request_add_commit_list(req, list, cinfo);
856} 1045}
857 1046
858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1047static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1069,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
880 return flseg->fh_array[i]; 1069 return flseg->fh_array[i];
881} 1070}
882 1071
883static int filelayout_initiate_commit(struct nfs_write_data *data, int how) 1072static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
884{ 1073{
885 struct pnfs_layout_segment *lseg = data->lseg; 1074 struct pnfs_layout_segment *lseg = data->lseg;
886 struct nfs4_pnfs_ds *ds; 1075 struct nfs4_pnfs_ds *ds;
@@ -890,135 +1079,137 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1079 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
891 ds = nfs4_fl_prepare_ds(lseg, idx); 1080 ds = nfs4_fl_prepare_ds(lseg, idx);
892 if (!ds) { 1081 if (!ds) {
893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
897 prepare_to_resend_writes(data); 1082 prepare_to_resend_writes(data);
898 filelayout_commit_release(data); 1083 filelayout_commit_release(data);
899 return -EAGAIN; 1084 return -EAGAIN;
900 } 1085 }
901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 1086 dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
902 data->write_done_cb = filelayout_commit_done_cb; 1087 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
1088 data->commit_done_cb = filelayout_commit_done_cb;
1089 atomic_inc(&ds->ds_clp->cl_count);
903 data->ds_clp = ds->ds_clp; 1090 data->ds_clp = ds->ds_clp;
904 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1091 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
905 if (fh) 1092 if (fh)
906 data->args.fh = fh; 1093 data->args.fh = fh;
907 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, 1094 return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
908 &filelayout_commit_call_ops, how); 1095 &filelayout_commit_call_ops, how,
909} 1096 RPC_TASK_SOFTCONN);
910
911/*
912 * This is only useful while we are using whole file layouts.
913 */
914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
916{
917 struct pnfs_layout_segment *lseg;
918
919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
920 if (lseg->pls_range.iomode == IOMODE_RW)
921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
933 spin_unlock(&inode->i_lock);
934 return rv;
935} 1097}
936 1098
937static int 1099static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, 1100transfer_commit_list(struct list_head *src, struct list_head *dst,
939 spinlock_t *lock) 1101 struct nfs_commit_info *cinfo, int max)
940{ 1102{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp; 1103 struct nfs_page *req, *tmp;
944 int ret = 0; 1104 int ret = 0;
945 1105
946 list_for_each_entry_safe(req, tmp, src, wb_list) { 1106 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req)) 1107 if (!nfs_lock_request(req))
948 continue; 1108 continue;
949 if (cond_resched_lock(lock)) 1109 if (cond_resched_lock(cinfo->lock))
950 list_safe_reset_next(req, tmp, wb_list); 1110 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req); 1111 nfs_request_remove_commit_list(req, cinfo);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst); 1113 nfs_list_add_request(req, dst);
954 ret++; 1114 ret++;
955 if (ret == max) 1115 if ((ret == max) && !cinfo->dreq)
956 break; 1116 break;
957 } 1117 }
958 return ret; 1118 return ret;
959} 1119}
960 1120
1121static int
1122filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1123 struct nfs_commit_info *cinfo,
1124 int max)
1125{
1126 struct list_head *src = &bucket->written;
1127 struct list_head *dst = &bucket->committing;
1128 int ret;
1129
1130 ret = transfer_commit_list(src, dst, cinfo, max);
1131 if (ret) {
1132 cinfo->ds->nwritten -= ret;
1133 cinfo->ds->ncommitting += ret;
1134 bucket->clseg = bucket->wlseg;
1135 if (list_empty(src))
1136 bucket->wlseg = NULL;
1137 else
1138 get_lseg(bucket->clseg);
1139 }
1140 return ret;
1141}
1142
961/* Move reqs from written to committing lists, returning count of number moved. 1143/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held. 1144 * Note called with cinfo->lock held.
963 */ 1145 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max, 1146static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
965 spinlock_t *lock) 1147 int max)
966{ 1148{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt; 1149 int i, rv = 0, cnt;
970 1150
971 lseg = find_only_write_lseg_locked(inode); 1151 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
972 if (!lseg) 1152 cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
973 goto out_done; 1153 cinfo, max);
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt; 1154 max -= cnt;
981 rv += cnt; 1155 rv += cnt;
982 } 1156 }
983out_done:
984 return rv; 1157 return rv;
985} 1158}
986 1159
1160/* Pull everything off the committing lists and dump into @dst */
1161static void filelayout_recover_commit_reqs(struct list_head *dst,
1162 struct nfs_commit_info *cinfo)
1163{
1164 struct pnfs_commit_bucket *b;
1165 int i;
1166
1167 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1168 * only called on single thread per dreq.
1169 * Can't take the lock because need to do put_lseg
1170 */
1171 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1172 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1173 BUG_ON(!list_empty(&b->written));
1174 put_lseg(b->wlseg);
1175 b->wlseg = NULL;
1176 }
1177 }
1178 cinfo->ds->nwritten = 0;
1179}
1180
987static unsigned int 1181static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list) 1182alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
989{ 1183{
990 struct pnfs_layout_segment *lseg; 1184 struct pnfs_ds_commit_info *fl_cinfo;
991 struct nfs4_filelayout_segment *fl; 1185 struct pnfs_commit_bucket *bucket;
992 struct nfs_write_data *data; 1186 struct nfs_commit_data *data;
993 int i, j; 1187 int i, j;
994 unsigned int nreq = 0; 1188 unsigned int nreq = 0;
995 1189
996 /* Won't need this when non-whole file layout segments are supported 1190 fl_cinfo = cinfo->ds;
997 * instead we will use a pnfs_layout_hdr structure */ 1191 bucket = fl_cinfo->buckets;
998 lseg = find_only_write_lseg(inode); 1192 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
999 if (!lseg) 1193 if (list_empty(&bucket->committing))
1000 return 0;
1001 fl = FILELAYOUT_LSEG(lseg);
1002 for (i = 0; i < fl->number_of_buckets; i++) {
1003 if (list_empty(&fl->commit_buckets[i].committing))
1004 continue; 1194 continue;
1005 data = nfs_commitdata_alloc(); 1195 data = nfs_commitdata_alloc();
1006 if (!data) 1196 if (!data)
1007 break; 1197 break;
1008 data->ds_commit_index = i; 1198 data->ds_commit_index = i;
1009 data->lseg = lseg; 1199 data->lseg = bucket->clseg;
1200 bucket->clseg = NULL;
1010 list_add(&data->pages, list); 1201 list_add(&data->pages, list);
1011 nreq++; 1202 nreq++;
1012 } 1203 }
1013 1204
1014 /* Clean up on error */ 1205 /* Clean up on error */
1015 for (j = i; j < fl->number_of_buckets; j++) { 1206 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1016 if (list_empty(&fl->commit_buckets[i].committing)) 1207 if (list_empty(&bucket->committing))
1017 continue; 1208 continue;
1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); 1209 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1019 put_lseg(lseg); /* associated with emptying bucket */ 1210 put_lseg(bucket->clseg);
1211 bucket->clseg = NULL;
1020 } 1212 }
1021 put_lseg(lseg);
1022 /* Caller will clean up entries put on list */ 1213 /* Caller will clean up entries put on list */
1023 return nreq; 1214 return nreq;
1024} 1215}
@@ -1026,9 +1217,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
1026/* This follows nfs_commit_list pretty closely */ 1217/* This follows nfs_commit_list pretty closely */
1027static int 1218static int
1028filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1219filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1029 int how) 1220 int how, struct nfs_commit_info *cinfo)
1030{ 1221{
1031 struct nfs_write_data *data, *tmp; 1222 struct nfs_commit_data *data, *tmp;
1032 LIST_HEAD(list); 1223 LIST_HEAD(list);
1033 unsigned int nreq = 0; 1224 unsigned int nreq = 0;
1034 1225
@@ -1039,30 +1230,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1039 list_add(&data->pages, &list); 1230 list_add(&data->pages, &list);
1040 nreq++; 1231 nreq++;
1041 } else 1232 } else
1042 nfs_retry_commit(mds_pages, NULL); 1233 nfs_retry_commit(mds_pages, NULL, cinfo);
1043 } 1234 }
1044 1235
1045 nreq += alloc_ds_commits(inode, &list); 1236 nreq += alloc_ds_commits(cinfo, &list);
1046 1237
1047 if (nreq == 0) { 1238 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode)); 1239 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1049 goto out; 1240 goto out;
1050 } 1241 }
1051 1242
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding); 1243 atomic_add(nreq, &cinfo->mds->rpcs_out);
1053 1244
1054 list_for_each_entry_safe(data, tmp, &list, pages) { 1245 list_for_each_entry_safe(data, tmp, &list, pages) {
1055 list_del_init(&data->pages); 1246 list_del_init(&data->pages);
1056 if (!data->lseg) { 1247 if (!data->lseg) {
1057 nfs_init_commit(data, mds_pages, NULL); 1248 nfs_init_commit(data, mds_pages, NULL, cinfo);
1058 nfs_initiate_commit(data, NFS_CLIENT(inode), 1249 nfs_initiate_commit(NFS_CLIENT(inode), data,
1059 data->mds_ops, how); 1250 data->mds_ops, how, 0);
1060 } else { 1251 } else {
1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); 1252 struct pnfs_commit_bucket *buckets;
1253
1254 buckets = cinfo->ds->buckets;
1255 nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
1062 filelayout_initiate_commit(data, how); 1256 filelayout_initiate_commit(data, how);
1063 } 1257 }
1064 } 1258 }
1065out: 1259out:
1260 cinfo->ds->ncommitting = 0;
1066 return PNFS_ATTEMPTED; 1261 return PNFS_ATTEMPTED;
1067} 1262}
1068 1263
@@ -1072,17 +1267,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
1072 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1267 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1073} 1268}
1074 1269
1270static struct pnfs_layout_hdr *
1271filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1272{
1273 struct nfs4_filelayout *flo;
1274
1275 flo = kzalloc(sizeof(*flo), gfp_flags);
1276 return &flo->generic_hdr;
1277}
1278
1279static void
1280filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1281{
1282 kfree(FILELAYOUT_FROM_HDR(lo));
1283}
1284
1285static struct pnfs_ds_commit_info *
1286filelayout_get_ds_info(struct inode *inode)
1287{
1288 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1289
1290 if (layout == NULL)
1291 return NULL;
1292 else
1293 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1294}
1295
1075static struct pnfs_layoutdriver_type filelayout_type = { 1296static struct pnfs_layoutdriver_type filelayout_type = {
1076 .id = LAYOUT_NFSV4_1_FILES, 1297 .id = LAYOUT_NFSV4_1_FILES,
1077 .name = "LAYOUT_NFSV4_1_FILES", 1298 .name = "LAYOUT_NFSV4_1_FILES",
1078 .owner = THIS_MODULE, 1299 .owner = THIS_MODULE,
1300 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1301 .free_layout_hdr = filelayout_free_layout_hdr,
1079 .alloc_lseg = filelayout_alloc_lseg, 1302 .alloc_lseg = filelayout_alloc_lseg,
1080 .free_lseg = filelayout_free_lseg, 1303 .free_lseg = filelayout_free_lseg,
1081 .pg_read_ops = &filelayout_pg_read_ops, 1304 .pg_read_ops = &filelayout_pg_read_ops,
1082 .pg_write_ops = &filelayout_pg_write_ops, 1305 .pg_write_ops = &filelayout_pg_write_ops,
1306 .get_ds_info = &filelayout_get_ds_info,
1083 .mark_request_commit = filelayout_mark_request_commit, 1307 .mark_request_commit = filelayout_mark_request_commit,
1084 .clear_request_commit = filelayout_clear_request_commit, 1308 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists, 1309 .scan_commit_lists = filelayout_scan_commit_lists,
1310 .recover_commit_reqs = filelayout_recover_commit_reqs,
1086 .commit_pagelist = filelayout_commit_pagelist, 1311 .commit_pagelist = filelayout_commit_pagelist,
1087 .read_pagelist = filelayout_read_pagelist, 1312 .read_pagelist = filelayout_read_pagelist,
1088 .write_pagelist = filelayout_write_pagelist, 1313 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb1f5e3..43fe802dd678 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules.
37 * Set by module paramters dataserver_timeo and dataserver_retrans.
38 */
39#define NFS4_DEF_DS_TIMEO 60
40#define NFS4_DEF_DS_RETRANS 5
41
42/*
36 * Field testing shows we need to support up to 4096 stripe indices. 43 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 44 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 45 * reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096 48#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ 49#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43 50
51/* error codes for internal use */
52#define NFS4ERR_RESET_TO_MDS 12001
53
44enum stripetype4 { 54enum stripetype4 {
45 STRIPE_SPARSE = 1, 55 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2 56 STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
62 atomic_t ds_count; 72 atomic_t ds_count;
63}; 73};
64 74
65/* nfs4_file_layout_dsaddr flags */
66#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
67
68struct nfs4_file_layout_dsaddr { 75struct nfs4_file_layout_dsaddr {
69 struct nfs4_deviceid_node id_node; 76 struct nfs4_deviceid_node id_node;
70 unsigned long flags;
71 u32 stripe_count; 77 u32 stripe_count;
72 u8 *stripe_indices; 78 u8 *stripe_indices;
73 u32 ds_num; 79 u32 ds_num;
74 struct nfs4_pnfs_ds *ds_list[1]; 80 struct nfs4_pnfs_ds *ds_list[1];
75}; 81};
76 82
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
82struct nfs4_filelayout_segment { 83struct nfs4_filelayout_segment {
83 struct pnfs_layout_segment generic_hdr; 84 struct pnfs_layout_segment generic_hdr;
84 u32 stripe_type; 85 u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 90 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
90 unsigned int num_fh; 91 unsigned int num_fh;
91 struct nfs_fh **fh_array; 92 struct nfs_fh **fh_array;
92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
93 int number_of_buckets;
94}; 93};
95 94
95struct nfs4_filelayout {
96 struct pnfs_layout_hdr generic_hdr;
97 struct pnfs_ds_commit_info commit_info;
98};
99
100static inline struct nfs4_filelayout *
101FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
102{
103 return container_of(lo, struct nfs4_filelayout, generic_hdr);
104}
105
96static inline struct nfs4_filelayout_segment * 106static inline struct nfs4_filelayout_segment *
97FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) 107FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
98{ 108{
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
107 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 117 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
108} 118}
109 119
120static inline void
121filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
122{
123 u32 *p = (u32 *)&node->deviceid;
124
125 printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
126 p[0], p[1], p[2], p[3]);
127
128 set_bit(NFS_DEVICEID_INVALID, &node->flags);
129}
130
131static inline bool
132filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
133{
134 return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
135}
136
137static inline bool
138filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
139{
140 return test_bit(NFS_DEVICEID_INVALID, &node->flags);
141}
142
143static inline bool
144filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
145{
146 return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
147 filelayout_test_layout_invalid(lseg->pls_layout);
148}
149
110extern struct nfs_fh * 150extern struct nfs_fh *
111nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 151nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
112 152
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
119extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 159extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
120struct nfs4_file_layout_dsaddr * 160struct nfs4_file_layout_dsaddr *
121get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 161get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
162void nfs4_ds_disconnect(struct nfs_client *clp);
122 163
123#endif /* FS_NFS_NFS4FILELAYOUT_H */ 164#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c9cff9adb2d3..bf49b78db1b3 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
30 30
31#include <linux/nfs_fs.h> 31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/module.h>
33 34
34#include "internal.h" 35#include "internal.h"
35#include "nfs4filelayout.h" 36#include "nfs4filelayout.h"
36 37
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 38#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 39
40static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
41static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
42
39/* 43/*
40 * Data server cache 44 * Data server cache
41 * 45 *
@@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
145} 149}
146 150
147/* 151/*
152 * Lookup DS by nfs_client pointer. Zero data server client pointer
153 */
154void nfs4_ds_disconnect(struct nfs_client *clp)
155{
156 struct nfs4_pnfs_ds *ds;
157 struct nfs_client *found = NULL;
158
159 dprintk("%s clp %p\n", __func__, clp);
160 spin_lock(&nfs4_ds_cache_lock);
161 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
162 if (ds->ds_clp && ds->ds_clp == clp) {
163 found = ds->ds_clp;
164 ds->ds_clp = NULL;
165 }
166 spin_unlock(&nfs4_ds_cache_lock);
167 if (found) {
168 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
169 nfs_put_client(clp);
170 }
171}
172
173/*
148 * Create an rpc connection to the nfs4_pnfs_ds data server 174 * Create an rpc connection to the nfs4_pnfs_ds data server
149 * Currently only supports IPv4 and IPv6 addresses 175 * Currently only supports IPv4 and IPv6 addresses
150 */ 176 */
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
165 __func__, ds->ds_remotestr, da->da_remotestr); 191 __func__, ds->ds_remotestr, da->da_remotestr);
166 192
167 clp = nfs4_set_ds_client(mds_srv->nfs_client, 193 clp = nfs4_set_ds_client(mds_srv->nfs_client,
168 (struct sockaddr *)&da->da_addr, 194 (struct sockaddr *)&da->da_addr,
169 da->da_addrlen, IPPROTO_TCP); 195 da->da_addrlen, IPPROTO_TCP,
196 dataserver_timeo, dataserver_retrans);
170 if (!IS_ERR(clp)) 197 if (!IS_ERR(clp))
171 break; 198 break;
172 } 199 }
@@ -791,48 +818,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
791 return flseg->fh_array[i]; 818 return flseg->fh_array[i];
792} 819}
793 820
794static void
795filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
796 int err, const char *ds_remotestr)
797{
798 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
799
800 printk(KERN_ERR "NFS: data server %s connection error %d."
801 " Deviceid [%x%x%x%x] marked out of use.\n",
802 ds_remotestr, err, p[0], p[1], p[2], p[3]);
803
804 spin_lock(&nfs4_ds_cache_lock);
805 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
806 spin_unlock(&nfs4_ds_cache_lock);
807}
808
809struct nfs4_pnfs_ds * 821struct nfs4_pnfs_ds *
810nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 822nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
811{ 823{
812 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 824 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 825 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
826 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
827
828 if (filelayout_test_devid_invalid(devid))
829 return NULL;
814 830
815 if (ds == NULL) { 831 if (ds == NULL) {
816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 832 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
817 __func__, ds_idx); 833 __func__, ds_idx);
818 return NULL; 834 goto mark_dev_invalid;
819 } 835 }
820 836
821 if (!ds->ds_clp) { 837 if (!ds->ds_clp) {
822 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 838 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
823 int err; 839 int err;
824 840
825 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
826 /* Already tried to connect, don't try again */
827 dprintk("%s Deviceid marked out of use\n", __func__);
828 return NULL;
829 }
830 err = nfs4_ds_connect(s, ds); 841 err = nfs4_ds_connect(s, ds);
831 if (err) { 842 if (err)
832 filelayout_mark_devid_negative(dsaddr, err, 843 goto mark_dev_invalid;
833 ds->ds_remotestr);
834 return NULL;
835 }
836 } 844 }
837 return ds; 845 return ds;
846
847mark_dev_invalid:
848 filelayout_mark_devid_invalid(devid);
849 return NULL;
838} 850}
851
852module_param(dataserver_retrans, uint, 0644);
853MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
854 "retries a request before it attempts further "
855 " recovery action.");
856module_param(dataserver_timeo, uint, 0644);
857MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
858 "NFSv4.1 client waits for a response from a "
859 " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3f5519b7c8c8..017b4b01a69c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
132 return ret; 132 return ret;
133} 133}
134 134
135rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
136{
137 struct gss_api_mech *mech;
138 struct xdr_netobj oid;
139 int i;
140 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
141
142 for (i = 0; i < flavors->num_flavors; i++) {
143 struct nfs4_secinfo_flavor *flavor;
144 flavor = &flavors->flavors[i];
145
146 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
147 pseudoflavor = flavor->flavor;
148 break;
149 } else if (flavor->flavor == RPC_AUTH_GSS) {
150 oid.len = flavor->gss.sec_oid4.len;
151 oid.data = flavor->gss.sec_oid4.data;
152 mech = gss_mech_get_by_OID(&oid);
153 if (!mech)
154 continue;
155 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
156 gss_mech_put(mech);
157 break;
158 }
159 }
160
161 return pseudoflavor;
162}
163
135static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 164static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
136{ 165{
137 struct page *page; 166 struct page *page;
@@ -300,7 +329,7 @@ out:
300 * @dentry - dentry of referral 329 * @dentry - dentry of referral
301 * 330 *
302 */ 331 */
303struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) 332static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
304{ 333{
305 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 334 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
306 struct dentry *parent; 335 struct dentry *parent;
@@ -341,3 +370,25 @@ out:
341 dprintk("%s: done\n", __func__); 370 dprintk("%s: done\n", __func__);
342 return mnt; 371 return mnt;
343} 372}
373
374struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
375 struct nfs_fh *fh, struct nfs_fattr *fattr)
376{
377 struct dentry *parent = dget_parent(dentry);
378 struct rpc_clnt *client;
379 struct vfsmount *mnt;
380
381 /* Look it up again to get its attributes and sec flavor */
382 client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
383 dput(parent);
384 if (IS_ERR(client))
385 return ERR_CAST(client);
386
387 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
388 mnt = nfs_do_refmount(client, dentry);
389 else
390 mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
391
392 rpc_shutdown_client(client);
393 return mnt;
394}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 99650aaf8937..78784e5ca4c1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -80,6 +80,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
83static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 84static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 85static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
85 struct nfs_fattr *fattr, struct iattr *sattr, 86 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -788,7 +789,6 @@ struct nfs4_opendata {
788 struct nfs4_string owner_name; 789 struct nfs4_string owner_name;
789 struct nfs4_string group_name; 790 struct nfs4_string group_name;
790 struct nfs_fattr f_attr; 791 struct nfs_fattr f_attr;
791 struct nfs_fattr dir_attr;
792 struct dentry *dir; 792 struct dentry *dir;
793 struct dentry *dentry; 793 struct dentry *dentry;
794 struct nfs4_state_owner *owner; 794 struct nfs4_state_owner *owner;
@@ -804,12 +804,10 @@ struct nfs4_opendata {
804static void nfs4_init_opendata_res(struct nfs4_opendata *p) 804static void nfs4_init_opendata_res(struct nfs4_opendata *p)
805{ 805{
806 p->o_res.f_attr = &p->f_attr; 806 p->o_res.f_attr = &p->f_attr;
807 p->o_res.dir_attr = &p->dir_attr;
808 p->o_res.seqid = p->o_arg.seqid; 807 p->o_res.seqid = p->o_arg.seqid;
809 p->c_res.seqid = p->c_arg.seqid; 808 p->c_res.seqid = p->c_arg.seqid;
810 p->o_res.server = p->o_arg.server; 809 p->o_res.server = p->o_arg.server;
811 nfs_fattr_init(&p->f_attr); 810 nfs_fattr_init(&p->f_attr);
812 nfs_fattr_init(&p->dir_attr);
813 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); 811 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
814} 812}
815 813
@@ -843,7 +841,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
843 p->o_arg.name = &dentry->d_name; 841 p->o_arg.name = &dentry->d_name;
844 p->o_arg.server = server; 842 p->o_arg.server = server;
845 p->o_arg.bitmask = server->attr_bitmask; 843 p->o_arg.bitmask = server->attr_bitmask;
846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 844 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
848 if (attrs != NULL && attrs->ia_valid != 0) { 845 if (attrs != NULL && attrs->ia_valid != 0) {
849 __be32 verf[2]; 846 __be32 verf[2];
@@ -1611,8 +1608,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1611 1608
1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); 1609 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
1613 1610
1614 nfs_refresh_inode(dir, o_res->dir_attr);
1615
1616 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1611 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1617 status = _nfs4_proc_open_confirm(data); 1612 status = _nfs4_proc_open_confirm(data);
1618 if (status != 0) 1613 if (status != 0)
@@ -1645,11 +1640,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1645 1640
1646 nfs_fattr_map_and_free_names(server, &data->f_attr); 1641 nfs_fattr_map_and_free_names(server, &data->f_attr);
1647 1642
1648 if (o_arg->open_flags & O_CREAT) { 1643 if (o_arg->open_flags & O_CREAT)
1649 update_changeattr(dir, &o_res->cinfo); 1644 update_changeattr(dir, &o_res->cinfo);
1650 nfs_post_op_update_inode(dir, o_res->dir_attr);
1651 } else
1652 nfs_refresh_inode(dir, o_res->dir_attr);
1653 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 1645 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1654 server->caps &= ~NFS_CAP_POSIX_LOCK; 1646 server->caps &= ~NFS_CAP_POSIX_LOCK;
1655 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1647 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -2354,8 +2346,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2354/* 2346/*
2355 * get the file handle for the "/" directory on the server 2347 * get the file handle for the "/" directory on the server
2356 */ 2348 */
2357static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2349int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
2358 struct nfs_fsinfo *info) 2350 struct nfs_fsinfo *info)
2359{ 2351{
2360 int minor_version = server->nfs_client->cl_minorversion; 2352 int minor_version = server->nfs_client->cl_minorversion;
2361 int status = nfs4_lookup_root(server, fhandle, info); 2353 int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2364,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2372 return nfs4_map_errors(status); 2364 return nfs4_map_errors(status);
2373} 2365}
2374 2366
2367static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
2368 struct nfs_fsinfo *info)
2369{
2370 int error;
2371 struct nfs_fattr *fattr = info->fattr;
2372
2373 error = nfs4_server_capabilities(server, mntfh);
2374 if (error < 0) {
2375 dprintk("nfs4_get_root: getcaps error = %d\n", -error);
2376 return error;
2377 }
2378
2379 error = nfs4_proc_getattr(server, mntfh, fattr);
2380 if (error < 0) {
2381 dprintk("nfs4_get_root: getattr error = %d\n", -error);
2382 return error;
2383 }
2384
2385 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
2386 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
2387 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
2388
2389 return error;
2390}
2391
2375/* 2392/*
2376 * Get locations and (maybe) other attributes of a referral. 2393 * Get locations and (maybe) other attributes of a referral.
2377 * Note that we'll actually follow the referral later when 2394 * Note that we'll actually follow the referral later when
@@ -2578,7 +2595,7 @@ out:
2578 return err; 2595 return err;
2579} 2596}
2580 2597
2581static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 2598static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
2582 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2599 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2583{ 2600{
2584 int status; 2601 int status;
@@ -2784,7 +2801,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2784 .fh = NFS_FH(dir), 2801 .fh = NFS_FH(dir),
2785 .name.len = name->len, 2802 .name.len = name->len,
2786 .name.name = name->name, 2803 .name.name = name->name,
2787 .bitmask = server->attr_bitmask,
2788 }; 2804 };
2789 struct nfs_removeres res = { 2805 struct nfs_removeres res = {
2790 .server = server, 2806 .server = server,
@@ -2794,19 +2810,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2794 .rpc_argp = &args, 2810 .rpc_argp = &args,
2795 .rpc_resp = &res, 2811 .rpc_resp = &res,
2796 }; 2812 };
2797 int status = -ENOMEM; 2813 int status;
2798
2799 res.dir_attr = nfs_alloc_fattr();
2800 if (res.dir_attr == NULL)
2801 goto out;
2802 2814
2803 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); 2815 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2804 if (status == 0) { 2816 if (status == 0)
2805 update_changeattr(dir, &res.cinfo); 2817 update_changeattr(dir, &res.cinfo);
2806 nfs_post_op_update_inode(dir, res.dir_attr);
2807 }
2808 nfs_free_fattr(res.dir_attr);
2809out:
2810 return status; 2818 return status;
2811} 2819}
2812 2820
@@ -2828,7 +2836,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2828 struct nfs_removeargs *args = msg->rpc_argp; 2836 struct nfs_removeargs *args = msg->rpc_argp;
2829 struct nfs_removeres *res = msg->rpc_resp; 2837 struct nfs_removeres *res = msg->rpc_resp;
2830 2838
2831 args->bitmask = server->cache_consistency_bitmask;
2832 res->server = server; 2839 res->server = server;
2833 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2840 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2834 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); 2841 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2853,7 +2860,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2853 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2860 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2854 return 0; 2861 return 0;
2855 update_changeattr(dir, &res->cinfo); 2862 update_changeattr(dir, &res->cinfo);
2856 nfs_post_op_update_inode(dir, res->dir_attr);
2857 return 1; 2863 return 1;
2858} 2864}
2859 2865
@@ -2864,7 +2870,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2864 struct nfs_renameres *res = msg->rpc_resp; 2870 struct nfs_renameres *res = msg->rpc_resp;
2865 2871
2866 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2872 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2867 arg->bitmask = server->attr_bitmask;
2868 res->server = server; 2873 res->server = server;
2869 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); 2874 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2870} 2875}
@@ -2890,9 +2895,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2890 return 0; 2895 return 0;
2891 2896
2892 update_changeattr(old_dir, &res->old_cinfo); 2897 update_changeattr(old_dir, &res->old_cinfo);
2893 nfs_post_op_update_inode(old_dir, res->old_fattr);
2894 update_changeattr(new_dir, &res->new_cinfo); 2898 update_changeattr(new_dir, &res->new_cinfo);
2895 nfs_post_op_update_inode(new_dir, res->new_fattr);
2896 return 1; 2899 return 1;
2897} 2900}
2898 2901
@@ -2905,7 +2908,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2905 .new_dir = NFS_FH(new_dir), 2908 .new_dir = NFS_FH(new_dir),
2906 .old_name = old_name, 2909 .old_name = old_name,
2907 .new_name = new_name, 2910 .new_name = new_name,
2908 .bitmask = server->attr_bitmask,
2909 }; 2911 };
2910 struct nfs_renameres res = { 2912 struct nfs_renameres res = {
2911 .server = server, 2913 .server = server,
@@ -2917,21 +2919,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2917 }; 2919 };
2918 int status = -ENOMEM; 2920 int status = -ENOMEM;
2919 2921
2920 res.old_fattr = nfs_alloc_fattr();
2921 res.new_fattr = nfs_alloc_fattr();
2922 if (res.old_fattr == NULL || res.new_fattr == NULL)
2923 goto out;
2924
2925 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2922 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2926 if (!status) { 2923 if (!status) {
2927 update_changeattr(old_dir, &res.old_cinfo); 2924 update_changeattr(old_dir, &res.old_cinfo);
2928 nfs_post_op_update_inode(old_dir, res.old_fattr);
2929 update_changeattr(new_dir, &res.new_cinfo); 2925 update_changeattr(new_dir, &res.new_cinfo);
2930 nfs_post_op_update_inode(new_dir, res.new_fattr);
2931 } 2926 }
2932out:
2933 nfs_free_fattr(res.new_fattr);
2934 nfs_free_fattr(res.old_fattr);
2935 return status; 2927 return status;
2936} 2928}
2937 2929
@@ -2969,18 +2961,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2969 int status = -ENOMEM; 2961 int status = -ENOMEM;
2970 2962
2971 res.fattr = nfs_alloc_fattr(); 2963 res.fattr = nfs_alloc_fattr();
2972 res.dir_attr = nfs_alloc_fattr(); 2964 if (res.fattr == NULL)
2973 if (res.fattr == NULL || res.dir_attr == NULL)
2974 goto out; 2965 goto out;
2975 2966
2976 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2967 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2977 if (!status) { 2968 if (!status) {
2978 update_changeattr(dir, &res.cinfo); 2969 update_changeattr(dir, &res.cinfo);
2979 nfs_post_op_update_inode(dir, res.dir_attr);
2980 nfs_post_op_update_inode(inode, res.fattr); 2970 nfs_post_op_update_inode(inode, res.fattr);
2981 } 2971 }
2982out: 2972out:
2983 nfs_free_fattr(res.dir_attr);
2984 nfs_free_fattr(res.fattr); 2973 nfs_free_fattr(res.fattr);
2985 return status; 2974 return status;
2986} 2975}
@@ -3003,7 +2992,6 @@ struct nfs4_createdata {
3003 struct nfs4_create_res res; 2992 struct nfs4_create_res res;
3004 struct nfs_fh fh; 2993 struct nfs_fh fh;
3005 struct nfs_fattr fattr; 2994 struct nfs_fattr fattr;
3006 struct nfs_fattr dir_fattr;
3007}; 2995};
3008 2996
3009static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, 2997static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3027,9 +3015,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
3027 data->res.server = server; 3015 data->res.server = server;
3028 data->res.fh = &data->fh; 3016 data->res.fh = &data->fh;
3029 data->res.fattr = &data->fattr; 3017 data->res.fattr = &data->fattr;
3030 data->res.dir_fattr = &data->dir_fattr;
3031 nfs_fattr_init(data->res.fattr); 3018 nfs_fattr_init(data->res.fattr);
3032 nfs_fattr_init(data->res.dir_fattr);
3033 } 3019 }
3034 return data; 3020 return data;
3035} 3021}
@@ -3040,7 +3026,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
3040 &data->arg.seq_args, &data->res.seq_res, 1); 3026 &data->arg.seq_args, &data->res.seq_res, 1);
3041 if (status == 0) { 3027 if (status == 0) {
3042 update_changeattr(dir, &data->res.dir_cinfo); 3028 update_changeattr(dir, &data->res.dir_cinfo);
3043 nfs_post_op_update_inode(dir, data->res.dir_fattr);
3044 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); 3029 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
3045 } 3030 }
3046 return status; 3031 return status;
@@ -3336,12 +3321,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3336 3321
3337void __nfs4_read_done_cb(struct nfs_read_data *data) 3322void __nfs4_read_done_cb(struct nfs_read_data *data)
3338{ 3323{
3339 nfs_invalidate_atime(data->inode); 3324 nfs_invalidate_atime(data->header->inode);
3340} 3325}
3341 3326
3342static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3327static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3343{ 3328{
3344 struct nfs_server *server = NFS_SERVER(data->inode); 3329 struct nfs_server *server = NFS_SERVER(data->header->inode);
3345 3330
3346 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3331 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3347 rpc_restart_call_prepare(task); 3332 rpc_restart_call_prepare(task);
@@ -3376,7 +3361,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3376 3361
3377static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 3362static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3378{ 3363{
3379 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3364 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3380 &data->args.seq_args, 3365 &data->args.seq_args,
3381 &data->res.seq_res, 3366 &data->res.seq_res,
3382 task)) 3367 task))
@@ -3384,25 +3369,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
3384 rpc_call_start(task); 3369 rpc_call_start(task);
3385} 3370}
3386 3371
3387/* Reset the the nfs_read_data to send the read to the MDS. */
3388void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3389{
3390 dprintk("%s Reset task for i/o through\n", __func__);
3391 put_lseg(data->lseg);
3392 data->lseg = NULL;
3393 /* offsets will differ in the dense stripe case */
3394 data->args.offset = data->mds_offset;
3395 data->ds_clp = NULL;
3396 data->args.fh = NFS_FH(data->inode);
3397 data->read_done_cb = nfs4_read_done_cb;
3398 task->tk_ops = data->mds_ops;
3399 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3400}
3401EXPORT_SYMBOL_GPL(nfs4_reset_read);
3402
3403static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3372static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3404{ 3373{
3405 struct inode *inode = data->inode; 3374 struct inode *inode = data->header->inode;
3406 3375
3407 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3376 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3408 rpc_restart_call_prepare(task); 3377 rpc_restart_call_prepare(task);
@@ -3410,7 +3379,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3410 } 3379 }
3411 if (task->tk_status >= 0) { 3380 if (task->tk_status >= 0) {
3412 renew_lease(NFS_SERVER(inode), data->timestamp); 3381 renew_lease(NFS_SERVER(inode), data->timestamp);
3413 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 3382 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
3414 } 3383 }
3415 return 0; 3384 return 0;
3416} 3385}
@@ -3423,32 +3392,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3423 nfs4_write_done_cb(task, data); 3392 nfs4_write_done_cb(task, data);
3424} 3393}
3425 3394
3426/* Reset the the nfs_write_data to send the write to the MDS. */ 3395static
3427void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) 3396bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
3428{ 3397{
3429 dprintk("%s Reset task for i/o through\n", __func__); 3398 const struct nfs_pgio_header *hdr = data->header;
3430 put_lseg(data->lseg); 3399
3431 data->lseg = NULL; 3400 /* Don't request attributes for pNFS or O_DIRECT writes */
3432 data->ds_clp = NULL; 3401 if (data->ds_clp != NULL || hdr->dreq != NULL)
3433 data->write_done_cb = nfs4_write_done_cb; 3402 return false;
3434 data->args.fh = NFS_FH(data->inode); 3403 /* Otherwise, request attributes if and only if we don't hold
3435 data->args.bitmask = data->res.server->cache_consistency_bitmask; 3404 * a delegation
3436 data->args.offset = data->mds_offset; 3405 */
3437 data->res.fattr = &data->fattr; 3406 return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
3438 task->tk_ops = data->mds_ops;
3439 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3440} 3407}
3441EXPORT_SYMBOL_GPL(nfs4_reset_write);
3442 3408
3443static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3409static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3444{ 3410{
3445 struct nfs_server *server = NFS_SERVER(data->inode); 3411 struct nfs_server *server = NFS_SERVER(data->header->inode);
3446 3412
3447 if (data->lseg) { 3413 if (!nfs4_write_need_cache_consistency_data(data)) {
3448 data->args.bitmask = NULL; 3414 data->args.bitmask = NULL;
3449 data->res.fattr = NULL; 3415 data->res.fattr = NULL;
3450 } else 3416 } else
3451 data->args.bitmask = server->cache_consistency_bitmask; 3417 data->args.bitmask = server->cache_consistency_bitmask;
3418
3452 if (!data->write_done_cb) 3419 if (!data->write_done_cb)
3453 data->write_done_cb = nfs4_write_done_cb; 3420 data->write_done_cb = nfs4_write_done_cb;
3454 data->res.server = server; 3421 data->res.server = server;
@@ -3460,6 +3427,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3460 3427
3461static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) 3428static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3462{ 3429{
3430 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3431 &data->args.seq_args,
3432 &data->res.seq_res,
3433 task))
3434 return;
3435 rpc_call_start(task);
3436}
3437
3438static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
3439{
3463 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3440 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3464 &data->args.seq_args, 3441 &data->args.seq_args,
3465 &data->res.seq_res, 3442 &data->res.seq_res,
@@ -3468,7 +3445,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
3468 rpc_call_start(task); 3445 rpc_call_start(task);
3469} 3446}
3470 3447
3471static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3448static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
3472{ 3449{
3473 struct inode *inode = data->inode; 3450 struct inode *inode = data->inode;
3474 3451
@@ -3476,28 +3453,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
3476 rpc_restart_call_prepare(task); 3453 rpc_restart_call_prepare(task);
3477 return -EAGAIN; 3454 return -EAGAIN;
3478 } 3455 }
3479 nfs_refresh_inode(inode, data->res.fattr);
3480 return 0; 3456 return 0;
3481} 3457}
3482 3458
3483static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3459static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
3484{ 3460{
3485 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3461 if (!nfs4_sequence_done(task, &data->res.seq_res))
3486 return -EAGAIN; 3462 return -EAGAIN;
3487 return data->write_done_cb(task, data); 3463 return data->commit_done_cb(task, data);
3488} 3464}
3489 3465
3490static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3466static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
3491{ 3467{
3492 struct nfs_server *server = NFS_SERVER(data->inode); 3468 struct nfs_server *server = NFS_SERVER(data->inode);
3493 3469
3494 if (data->lseg) { 3470 if (data->commit_done_cb == NULL)
3495 data->args.bitmask = NULL; 3471 data->commit_done_cb = nfs4_commit_done_cb;
3496 data->res.fattr = NULL;
3497 } else
3498 data->args.bitmask = server->cache_consistency_bitmask;
3499 if (!data->write_done_cb)
3500 data->write_done_cb = nfs4_commit_done_cb;
3501 data->res.server = server; 3472 data->res.server = server;
3502 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3473 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3503 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 3474 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -4105,7 +4076,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4105 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4076 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4106 data->args.fhandle = &data->fh; 4077 data->args.fhandle = &data->fh;
4107 data->args.stateid = &data->stateid; 4078 data->args.stateid = &data->stateid;
4108 data->args.bitmask = server->attr_bitmask; 4079 data->args.bitmask = server->cache_consistency_bitmask;
4109 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4080 nfs_copy_fh(&data->fh, NFS_FH(inode));
4110 nfs4_stateid_copy(&data->stateid, stateid); 4081 nfs4_stateid_copy(&data->stateid, stateid);
4111 data->res.fattr = &data->fattr; 4082 data->res.fattr = &data->fattr;
@@ -4126,9 +4097,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4126 if (status != 0) 4097 if (status != 0)
4127 goto out; 4098 goto out;
4128 status = data->rpc_status; 4099 status = data->rpc_status;
4129 if (status != 0) 4100 if (status == 0)
4130 goto out; 4101 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
4131 nfs_refresh_inode(inode, &data->fattr); 4102 else
4103 nfs_refresh_inode(inode, &data->fattr);
4132out: 4104out:
4133 rpc_put_task(task); 4105 rpc_put_task(task);
4134 return status; 4106 return status;
@@ -6558,6 +6530,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6558 .file_inode_ops = &nfs4_file_inode_operations, 6530 .file_inode_ops = &nfs4_file_inode_operations,
6559 .file_ops = &nfs4_file_operations, 6531 .file_ops = &nfs4_file_operations,
6560 .getroot = nfs4_proc_get_root, 6532 .getroot = nfs4_proc_get_root,
6533 .submount = nfs4_submount,
6561 .getattr = nfs4_proc_getattr, 6534 .getattr = nfs4_proc_getattr,
6562 .setattr = nfs4_proc_setattr, 6535 .setattr = nfs4_proc_setattr,
6563 .lookup = nfs4_proc_lookup, 6536 .lookup = nfs4_proc_lookup,
@@ -6590,13 +6563,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6590 .write_rpc_prepare = nfs4_proc_write_rpc_prepare, 6563 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6591 .write_done = nfs4_write_done, 6564 .write_done = nfs4_write_done,
6592 .commit_setup = nfs4_proc_commit_setup, 6565 .commit_setup = nfs4_proc_commit_setup,
6566 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
6593 .commit_done = nfs4_commit_done, 6567 .commit_done = nfs4_commit_done,
6594 .lock = nfs4_proc_lock, 6568 .lock = nfs4_proc_lock,
6595 .clear_acl_cache = nfs4_zap_acl_attr, 6569 .clear_acl_cache = nfs4_zap_acl_attr,
6596 .close_context = nfs4_close_context, 6570 .close_context = nfs4_close_context,
6597 .open_context = nfs4_atomic_open, 6571 .open_context = nfs4_atomic_open,
6598 .init_client = nfs4_init_client, 6572 .init_client = nfs4_init_client,
6599 .secinfo = nfs4_proc_secinfo,
6600}; 6573};
6601 6574
6602static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 6575static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..db040e971932 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -421,30 +421,22 @@ static int nfs4_stat_to_errno(int);
421#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 421#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
422 encode_sequence_maxsz + \ 422 encode_sequence_maxsz + \
423 encode_putfh_maxsz + \ 423 encode_putfh_maxsz + \
424 encode_commit_maxsz + \ 424 encode_commit_maxsz)
425 encode_getattr_maxsz)
426#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 425#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
427 decode_sequence_maxsz + \ 426 decode_sequence_maxsz + \
428 decode_putfh_maxsz + \ 427 decode_putfh_maxsz + \
429 decode_commit_maxsz + \ 428 decode_commit_maxsz)
430 decode_getattr_maxsz)
431#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 429#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
432 encode_sequence_maxsz + \ 430 encode_sequence_maxsz + \
433 encode_putfh_maxsz + \ 431 encode_putfh_maxsz + \
434 encode_savefh_maxsz + \
435 encode_open_maxsz + \ 432 encode_open_maxsz + \
436 encode_getfh_maxsz + \ 433 encode_getfh_maxsz + \
437 encode_getattr_maxsz + \
438 encode_restorefh_maxsz + \
439 encode_getattr_maxsz) 434 encode_getattr_maxsz)
440#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 435#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
441 decode_sequence_maxsz + \ 436 decode_sequence_maxsz + \
442 decode_putfh_maxsz + \ 437 decode_putfh_maxsz + \
443 decode_savefh_maxsz + \
444 decode_open_maxsz + \ 438 decode_open_maxsz + \
445 decode_getfh_maxsz + \ 439 decode_getfh_maxsz + \
446 decode_getattr_maxsz + \
447 decode_restorefh_maxsz + \
448 decode_getattr_maxsz) 440 decode_getattr_maxsz)
449#define NFS4_enc_open_confirm_sz \ 441#define NFS4_enc_open_confirm_sz \
450 (compound_encode_hdr_maxsz + \ 442 (compound_encode_hdr_maxsz + \
@@ -595,47 +587,37 @@ static int nfs4_stat_to_errno(int);
595#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 587#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
596 encode_sequence_maxsz + \ 588 encode_sequence_maxsz + \
597 encode_putfh_maxsz + \ 589 encode_putfh_maxsz + \
598 encode_remove_maxsz + \ 590 encode_remove_maxsz)
599 encode_getattr_maxsz)
600#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 591#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
601 decode_sequence_maxsz + \ 592 decode_sequence_maxsz + \
602 decode_putfh_maxsz + \ 593 decode_putfh_maxsz + \
603 decode_remove_maxsz + \ 594 decode_remove_maxsz)
604 decode_getattr_maxsz)
605#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 595#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
606 encode_sequence_maxsz + \ 596 encode_sequence_maxsz + \
607 encode_putfh_maxsz + \ 597 encode_putfh_maxsz + \
608 encode_savefh_maxsz + \ 598 encode_savefh_maxsz + \
609 encode_putfh_maxsz + \ 599 encode_putfh_maxsz + \
610 encode_rename_maxsz + \ 600 encode_rename_maxsz)
611 encode_getattr_maxsz + \
612 encode_restorefh_maxsz + \
613 encode_getattr_maxsz)
614#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 601#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
615 decode_sequence_maxsz + \ 602 decode_sequence_maxsz + \
616 decode_putfh_maxsz + \ 603 decode_putfh_maxsz + \
617 decode_savefh_maxsz + \ 604 decode_savefh_maxsz + \
618 decode_putfh_maxsz + \ 605 decode_putfh_maxsz + \
619 decode_rename_maxsz + \ 606 decode_rename_maxsz)
620 decode_getattr_maxsz + \
621 decode_restorefh_maxsz + \
622 decode_getattr_maxsz)
623#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 607#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
624 encode_sequence_maxsz + \ 608 encode_sequence_maxsz + \
625 encode_putfh_maxsz + \ 609 encode_putfh_maxsz + \
626 encode_savefh_maxsz + \ 610 encode_savefh_maxsz + \
627 encode_putfh_maxsz + \ 611 encode_putfh_maxsz + \
628 encode_link_maxsz + \ 612 encode_link_maxsz + \
629 decode_getattr_maxsz + \
630 encode_restorefh_maxsz + \ 613 encode_restorefh_maxsz + \
631 decode_getattr_maxsz) 614 encode_getattr_maxsz)
632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 615#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
633 decode_sequence_maxsz + \ 616 decode_sequence_maxsz + \
634 decode_putfh_maxsz + \ 617 decode_putfh_maxsz + \
635 decode_savefh_maxsz + \ 618 decode_savefh_maxsz + \
636 decode_putfh_maxsz + \ 619 decode_putfh_maxsz + \
637 decode_link_maxsz + \ 620 decode_link_maxsz + \
638 decode_getattr_maxsz + \
639 decode_restorefh_maxsz + \ 621 decode_restorefh_maxsz + \
640 decode_getattr_maxsz) 622 decode_getattr_maxsz)
641#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 623#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -653,20 +635,14 @@ static int nfs4_stat_to_errno(int);
653#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 635#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
654 encode_sequence_maxsz + \ 636 encode_sequence_maxsz + \
655 encode_putfh_maxsz + \ 637 encode_putfh_maxsz + \
656 encode_savefh_maxsz + \
657 encode_create_maxsz + \ 638 encode_create_maxsz + \
658 encode_getfh_maxsz + \ 639 encode_getfh_maxsz + \
659 encode_getattr_maxsz + \
660 encode_restorefh_maxsz + \
661 encode_getattr_maxsz) 640 encode_getattr_maxsz)
662#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 641#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
663 decode_sequence_maxsz + \ 642 decode_sequence_maxsz + \
664 decode_putfh_maxsz + \ 643 decode_putfh_maxsz + \
665 decode_savefh_maxsz + \
666 decode_create_maxsz + \ 644 decode_create_maxsz + \
667 decode_getfh_maxsz + \ 645 decode_getfh_maxsz + \
668 decode_getattr_maxsz + \
669 decode_restorefh_maxsz + \
670 decode_getattr_maxsz) 646 decode_getattr_maxsz)
671#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 647#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
672 encode_sequence_maxsz + \ 648 encode_sequence_maxsz + \
@@ -1103,7 +1079,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
1103 encode_nfs4_stateid(xdr, arg->stateid); 1079 encode_nfs4_stateid(xdr, arg->stateid);
1104} 1080}
1105 1081
1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1082static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
1107{ 1083{
1108 __be32 *p; 1084 __be32 *p;
1109 1085
@@ -2064,7 +2040,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
2064 encode_sequence(xdr, &args->seq_args, &hdr); 2040 encode_sequence(xdr, &args->seq_args, &hdr);
2065 encode_putfh(xdr, args->fh, &hdr); 2041 encode_putfh(xdr, args->fh, &hdr);
2066 encode_remove(xdr, &args->name, &hdr); 2042 encode_remove(xdr, &args->name, &hdr);
2067 encode_getfattr(xdr, args->bitmask, &hdr);
2068 encode_nops(&hdr); 2043 encode_nops(&hdr);
2069} 2044}
2070 2045
@@ -2084,9 +2059,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
2084 encode_savefh(xdr, &hdr); 2059 encode_savefh(xdr, &hdr);
2085 encode_putfh(xdr, args->new_dir, &hdr); 2060 encode_putfh(xdr, args->new_dir, &hdr);
2086 encode_rename(xdr, args->old_name, args->new_name, &hdr); 2061 encode_rename(xdr, args->old_name, args->new_name, &hdr);
2087 encode_getfattr(xdr, args->bitmask, &hdr);
2088 encode_restorefh(xdr, &hdr);
2089 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_nops(&hdr); 2062 encode_nops(&hdr);
2091} 2063}
2092 2064
@@ -2106,7 +2078,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
2106 encode_savefh(xdr, &hdr); 2078 encode_savefh(xdr, &hdr);
2107 encode_putfh(xdr, args->dir_fh, &hdr); 2079 encode_putfh(xdr, args->dir_fh, &hdr);
2108 encode_link(xdr, args->name, &hdr); 2080 encode_link(xdr, args->name, &hdr);
2109 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_restorefh(xdr, &hdr); 2081 encode_restorefh(xdr, &hdr);
2111 encode_getfattr(xdr, args->bitmask, &hdr); 2082 encode_getfattr(xdr, args->bitmask, &hdr);
2112 encode_nops(&hdr); 2083 encode_nops(&hdr);
@@ -2125,12 +2096,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
2125 encode_compound_hdr(xdr, req, &hdr); 2096 encode_compound_hdr(xdr, req, &hdr);
2126 encode_sequence(xdr, &args->seq_args, &hdr); 2097 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_putfh(xdr, args->dir_fh, &hdr); 2098 encode_putfh(xdr, args->dir_fh, &hdr);
2128 encode_savefh(xdr, &hdr);
2129 encode_create(xdr, args, &hdr); 2099 encode_create(xdr, args, &hdr);
2130 encode_getfh(xdr, &hdr); 2100 encode_getfh(xdr, &hdr);
2131 encode_getfattr(xdr, args->bitmask, &hdr); 2101 encode_getfattr(xdr, args->bitmask, &hdr);
2132 encode_restorefh(xdr, &hdr);
2133 encode_getfattr(xdr, args->bitmask, &hdr);
2134 encode_nops(&hdr); 2102 encode_nops(&hdr);
2135} 2103}
2136 2104
@@ -2191,12 +2159,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2191 encode_compound_hdr(xdr, req, &hdr); 2159 encode_compound_hdr(xdr, req, &hdr);
2192 encode_sequence(xdr, &args->seq_args, &hdr); 2160 encode_sequence(xdr, &args->seq_args, &hdr);
2193 encode_putfh(xdr, args->fh, &hdr); 2161 encode_putfh(xdr, args->fh, &hdr);
2194 encode_savefh(xdr, &hdr);
2195 encode_open(xdr, args, &hdr); 2162 encode_open(xdr, args, &hdr);
2196 encode_getfh(xdr, &hdr); 2163 encode_getfh(xdr, &hdr);
2197 encode_getfattr(xdr, args->bitmask, &hdr); 2164 encode_getfattr(xdr, args->bitmask, &hdr);
2198 encode_restorefh(xdr, &hdr);
2199 encode_getfattr(xdr, args->dir_bitmask, &hdr);
2200 encode_nops(&hdr); 2165 encode_nops(&hdr);
2201} 2166}
2202 2167
@@ -2448,7 +2413,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2448 * a COMMIT request 2413 * a COMMIT request
2449 */ 2414 */
2450static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, 2415static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 struct nfs_writeargs *args) 2416 struct nfs_commitargs *args)
2452{ 2417{
2453 struct compound_hdr hdr = { 2418 struct compound_hdr hdr = {
2454 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2419 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2423,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2458 encode_sequence(xdr, &args->seq_args, &hdr); 2423 encode_sequence(xdr, &args->seq_args, &hdr);
2459 encode_putfh(xdr, args->fh, &hdr); 2424 encode_putfh(xdr, args->fh, &hdr);
2460 encode_commit(xdr, args, &hdr); 2425 encode_commit(xdr, args, &hdr);
2461 if (args->bitmask)
2462 encode_getfattr(xdr, args->bitmask, &hdr);
2463 encode_nops(&hdr); 2426 encode_nops(&hdr);
2464} 2427}
2465 2428
@@ -2602,8 +2565,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2602 encode_compound_hdr(xdr, req, &hdr); 2565 encode_compound_hdr(xdr, req, &hdr);
2603 encode_sequence(xdr, &args->seq_args, &hdr); 2566 encode_sequence(xdr, &args->seq_args, &hdr);
2604 encode_putfh(xdr, args->fhandle, &hdr); 2567 encode_putfh(xdr, args->fhandle, &hdr);
2605 encode_delegreturn(xdr, args->stateid, &hdr);
2606 encode_getfattr(xdr, args->bitmask, &hdr); 2568 encode_getfattr(xdr, args->bitmask, &hdr);
2569 encode_delegreturn(xdr, args->stateid, &hdr);
2607 encode_nops(&hdr); 2570 encode_nops(&hdr);
2608} 2571}
2609 2572
@@ -4102,7 +4065,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4102 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); 4065 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4103} 4066}
4104 4067
4105static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4068static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
4106{ 4069{
4107 int status; 4070 int status;
4108 4071
@@ -5800,9 +5763,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5800 if (status) 5763 if (status)
5801 goto out; 5764 goto out;
5802 status = decode_remove(xdr, &res->cinfo); 5765 status = decode_remove(xdr, &res->cinfo);
5803 if (status)
5804 goto out;
5805 decode_getfattr(xdr, res->dir_attr, res->server);
5806out: 5766out:
5807 return status; 5767 return status;
5808} 5768}
@@ -5832,15 +5792,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5832 if (status) 5792 if (status)
5833 goto out; 5793 goto out;
5834 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); 5794 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5835 if (status)
5836 goto out;
5837 /* Current FH is target directory */
5838 if (decode_getfattr(xdr, res->new_fattr, res->server))
5839 goto out;
5840 status = decode_restorefh(xdr);
5841 if (status)
5842 goto out;
5843 decode_getfattr(xdr, res->old_fattr, res->server);
5844out: 5795out:
5845 return status; 5796 return status;
5846} 5797}
@@ -5876,8 +5827,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5876 * Note order: OP_LINK leaves the directory as the current 5827 * Note order: OP_LINK leaves the directory as the current
5877 * filehandle. 5828 * filehandle.
5878 */ 5829 */
5879 if (decode_getfattr(xdr, res->dir_attr, res->server))
5880 goto out;
5881 status = decode_restorefh(xdr); 5830 status = decode_restorefh(xdr);
5882 if (status) 5831 if (status)
5883 goto out; 5832 goto out;
@@ -5904,21 +5853,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5904 status = decode_putfh(xdr); 5853 status = decode_putfh(xdr);
5905 if (status) 5854 if (status)
5906 goto out; 5855 goto out;
5907 status = decode_savefh(xdr);
5908 if (status)
5909 goto out;
5910 status = decode_create(xdr, &res->dir_cinfo); 5856 status = decode_create(xdr, &res->dir_cinfo);
5911 if (status) 5857 if (status)
5912 goto out; 5858 goto out;
5913 status = decode_getfh(xdr, res->fh); 5859 status = decode_getfh(xdr, res->fh);
5914 if (status) 5860 if (status)
5915 goto out; 5861 goto out;
5916 if (decode_getfattr(xdr, res->fattr, res->server)) 5862 decode_getfattr(xdr, res->fattr, res->server);
5917 goto out;
5918 status = decode_restorefh(xdr);
5919 if (status)
5920 goto out;
5921 decode_getfattr(xdr, res->dir_fattr, res->server);
5922out: 5863out:
5923 return status; 5864 return status;
5924} 5865}
@@ -6075,19 +6016,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6075 status = decode_putfh(xdr); 6016 status = decode_putfh(xdr);
6076 if (status) 6017 if (status)
6077 goto out; 6018 goto out;
6078 status = decode_savefh(xdr);
6079 if (status)
6080 goto out;
6081 status = decode_open(xdr, res); 6019 status = decode_open(xdr, res);
6082 if (status) 6020 if (status)
6083 goto out; 6021 goto out;
6084 if (decode_getfh(xdr, &res->fh) != 0) 6022 if (decode_getfh(xdr, &res->fh) != 0)
6085 goto out; 6023 goto out;
6086 if (decode_getfattr(xdr, res->f_attr, res->server) != 0) 6024 decode_getfattr(xdr, res->f_attr, res->server);
6087 goto out;
6088 if (decode_restorefh(xdr) != 0)
6089 goto out;
6090 decode_getfattr(xdr, res->dir_attr, res->server);
6091out: 6025out:
6092 return status; 6026 return status;
6093} 6027}
@@ -6353,7 +6287,7 @@ out:
6353 * Decode COMMIT response 6287 * Decode COMMIT response
6354 */ 6288 */
6355static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6289static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6356 struct nfs_writeres *res) 6290 struct nfs_commitres *res)
6357{ 6291{
6358 struct compound_hdr hdr; 6292 struct compound_hdr hdr;
6359 int status; 6293 int status;
@@ -6368,10 +6302,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6368 if (status) 6302 if (status)
6369 goto out; 6303 goto out;
6370 status = decode_commit(xdr, res); 6304 status = decode_commit(xdr, res);
6371 if (status)
6372 goto out;
6373 if (res->fattr)
6374 decode_getfattr(xdr, res->fattr, res->server);
6375out: 6305out:
6376 return status; 6306 return status;
6377} 6307}
@@ -6527,10 +6457,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
6527 status = decode_putfh(xdr); 6457 status = decode_putfh(xdr);
6528 if (status != 0) 6458 if (status != 0)
6529 goto out; 6459 goto out;
6530 status = decode_delegreturn(xdr); 6460 status = decode_getfattr(xdr, res->fattr, res->server);
6531 if (status != 0) 6461 if (status != 0)
6532 goto out; 6462 goto out;
6533 decode_getfattr(xdr, res->fattr, res->server); 6463 status = decode_delegreturn(xdr);
6534out: 6464out:
6535 return status; 6465 return status;
6536} 6466}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..b47277baebab 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); 211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
212} 212}
213 213
214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
215 struct objio_segment **pseg) 215 struct objio_segment **pseg)
216{ 216{
217/* This is the in memory structure of the objio_segment 217/* This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
440 440
441int objio_read_pagelist(struct nfs_read_data *rdata) 441int objio_read_pagelist(struct nfs_read_data *rdata)
442{ 442{
443 struct nfs_pgio_header *hdr = rdata->header;
443 struct objio_state *objios; 444 struct objio_state *objios;
444 int ret; 445 int ret;
445 446
446 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
447 rdata->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, rdata->args.pages, rdata->args.pgbase,
448 rdata->args.offset, rdata->args.count, rdata, 449 rdata->args.offset, rdata->args.count, rdata,
449 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
450 if (unlikely(ret)) 451 if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
483{ 484{
484 struct objio_state *objios = priv; 485 struct objio_state *objios = priv;
485 struct nfs_write_data *wdata = objios->oir.rpcdata; 486 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping;
486 pgoff_t index = offset / PAGE_SIZE; 488 pgoff_t index = offset / PAGE_SIZE;
487 struct page *page = find_get_page(wdata->inode->i_mapping, index); 489 struct page *page = find_get_page(mapping, index);
488 490
489 if (!page) { 491 if (!page) {
490 page = find_or_create_page(wdata->inode->i_mapping, 492 page = find_or_create_page(mapping, index, GFP_NOFS);
491 index, GFP_NOFS);
492 if (unlikely(!page)) { 493 if (unlikely(!page)) {
493 dprintk("%s: grab_cache_page Failed index=0x%lx\n", 494 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
494 __func__, index); 495 __func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
518 519
519int objio_write_pagelist(struct nfs_write_data *wdata, int how) 520int objio_write_pagelist(struct nfs_write_data *wdata, int how)
520{ 521{
522 struct nfs_pgio_header *hdr = wdata->header;
521 struct objio_state *objios; 523 struct objio_state *objios;
522 int ret; 524 int ret;
523 525
524 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, 526 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
525 wdata->lseg, wdata->args.pages, wdata->args.pgbase, 527 hdr->lseg, wdata->args.pages, wdata->args.pgbase,
526 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 528 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
527 &objios); 529 &objios);
528 if (unlikely(ret)) 530 if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc21a19..874613545301 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
258 if (status >= 0) 258 if (status >= 0)
259 rdata->res.count = status; 259 rdata->res.count = status;
260 else 260 else
261 rdata->pnfs_error = status; 261 rdata->header->pnfs_error = status;
262 objlayout_iodone(oir); 262 objlayout_iodone(oir);
263 /* must not use oir after this point */ 263 /* must not use oir after this point */
264 264
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
279enum pnfs_try_status 279enum pnfs_try_status
280objlayout_read_pagelist(struct nfs_read_data *rdata) 280objlayout_read_pagelist(struct nfs_read_data *rdata)
281{ 281{
282 struct nfs_pgio_header *hdr = rdata->header;
283 struct inode *inode = hdr->inode;
282 loff_t offset = rdata->args.offset; 284 loff_t offset = rdata->args.offset;
283 size_t count = rdata->args.count; 285 size_t count = rdata->args.count;
284 int err; 286 int err;
285 loff_t eof; 287 loff_t eof;
286 288
287 eof = i_size_read(rdata->inode); 289 eof = i_size_read(inode);
288 if (unlikely(offset + count > eof)) { 290 if (unlikely(offset + count > eof)) {
289 if (offset >= eof) { 291 if (offset >= eof) {
290 err = 0; 292 err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
297 } 299 }
298 300
299 rdata->res.eof = (offset + count) >= eof; 301 rdata->res.eof = (offset + count) >= eof;
300 _fix_verify_io_params(rdata->lseg, &rdata->args.pages, 302 _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
301 &rdata->args.pgbase, 303 &rdata->args.pgbase,
302 rdata->args.offset, rdata->args.count); 304 rdata->args.offset, rdata->args.count);
303 305
304 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 306 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
305 __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); 307 __func__, inode->i_ino, offset, count, rdata->res.eof);
306 308
307 err = objio_read_pagelist(rdata); 309 err = objio_read_pagelist(rdata);
308 out: 310 out:
309 if (unlikely(err)) { 311 if (unlikely(err)) {
310 rdata->pnfs_error = err; 312 hdr->pnfs_error = err;
311 dprintk("%s: Returned Error %d\n", __func__, err); 313 dprintk("%s: Returned Error %d\n", __func__, err);
312 return PNFS_NOT_ATTEMPTED; 314 return PNFS_NOT_ATTEMPTED;
313 } 315 }
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
340 wdata->res.count = status; 342 wdata->res.count = status;
341 wdata->verf.committed = oir->committed; 343 wdata->verf.committed = oir->committed;
342 } else { 344 } else {
343 wdata->pnfs_error = status; 345 wdata->header->pnfs_error = status;
344 } 346 }
345 objlayout_iodone(oir); 347 objlayout_iodone(oir);
346 /* must not use oir after this point */ 348 /* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
363objlayout_write_pagelist(struct nfs_write_data *wdata, 365objlayout_write_pagelist(struct nfs_write_data *wdata,
364 int how) 366 int how)
365{ 367{
368 struct nfs_pgio_header *hdr = wdata->header;
366 int err; 369 int err;
367 370
368 _fix_verify_io_params(wdata->lseg, &wdata->args.pages, 371 _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
369 &wdata->args.pgbase, 372 &wdata->args.pgbase,
370 wdata->args.offset, wdata->args.count); 373 wdata->args.offset, wdata->args.count);
371 374
372 err = objio_write_pagelist(wdata, how); 375 err = objio_write_pagelist(wdata, how);
373 if (unlikely(err)) { 376 if (unlikely(err)) {
374 wdata->pnfs_error = err; 377 hdr->pnfs_error = err;
375 dprintk("%s: Returned Error %d\n", __func__, err); 378 dprintk("%s: Returned Error %d\n", __func__, err);
376 return PNFS_NOT_ATTEMPTED; 379 return PNFS_NOT_ATTEMPTED;
377 } 380 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fceaa9f62..aed913c833f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,47 @@
26 26
27static struct kmem_cache *nfs_page_cachep; 27static struct kmem_cache *nfs_page_cachep;
28 28
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{
31 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array))
33 p->pagevec = p->page_array;
34 else {
35 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
36 if (!p->pagevec)
37 p->npages = 0;
38 }
39 return p->pagevec != NULL;
40}
41
42void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
43 struct nfs_pgio_header *hdr,
44 void (*release)(struct nfs_pgio_header *hdr))
45{
46 hdr->req = nfs_list_entry(desc->pg_list.next);
47 hdr->inode = desc->pg_inode;
48 hdr->cred = hdr->req->wb_context->cred;
49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq;
52 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr)
55 hdr->completion_ops->init_hdr(hdr);
56}
57
58void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
59{
60 spin_lock(&hdr->lock);
61 if (pos < hdr->io_start + hdr->good_bytes) {
62 set_bit(NFS_IOHDR_ERROR, &hdr->flags);
63 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
64 hdr->good_bytes = pos - hdr->io_start;
65 hdr->error = error;
66 }
67 spin_unlock(&hdr->lock);
68}
69
29static inline struct nfs_page * 70static inline struct nfs_page *
30nfs_page_alloc(void) 71nfs_page_alloc(void)
31{ 72{
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
76 * long write-back delay. This will be adjusted in 117 * long write-back delay. This will be adjusted in
77 * update_nfs_request below if the region is not locked. */ 118 * update_nfs_request below if the region is not locked. */
78 req->wb_page = page; 119 req->wb_page = page;
79 atomic_set(&req->wb_complete, 0);
80 req->wb_index = page->index; 120 req->wb_index = page->index;
81 page_cache_get(page); 121 page_cache_get(page);
82 BUG_ON(PagePrivate(page));
83 BUG_ON(!PageLocked(page));
84 BUG_ON(page->mapping->host != inode);
85 req->wb_offset = offset; 122 req->wb_offset = offset;
86 req->wb_pgbase = offset; 123 req->wb_pgbase = offset;
87 req->wb_bytes = count; 124 req->wb_bytes = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
104 clear_bit(PG_BUSY, &req->wb_flags); 141 clear_bit(PG_BUSY, &req->wb_flags);
105 smp_mb__after_clear_bit(); 142 smp_mb__after_clear_bit();
106 wake_up_bit(&req->wb_flags, PG_BUSY); 143 wake_up_bit(&req->wb_flags, PG_BUSY);
144}
145
146/**
147 * nfs_unlock_and_release_request - Unlock request and release the nfs_page
148 * @req:
149 */
150void nfs_unlock_and_release_request(struct nfs_page *req)
151{
152 nfs_unlock_request(req);
107 nfs_release_request(req); 153 nfs_release_request(req);
108} 154}
109 155
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
203void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 249void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
204 struct inode *inode, 250 struct inode *inode,
205 const struct nfs_pageio_ops *pg_ops, 251 const struct nfs_pageio_ops *pg_ops,
252 const struct nfs_pgio_completion_ops *compl_ops,
206 size_t bsize, 253 size_t bsize,
207 int io_flags) 254 int io_flags)
208{ 255{
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 desc->pg_recoalesce = 0; 262 desc->pg_recoalesce = 0;
216 desc->pg_inode = inode; 263 desc->pg_inode = inode;
217 desc->pg_ops = pg_ops; 264 desc->pg_ops = pg_ops;
265 desc->pg_completion_ops = compl_ops;
218 desc->pg_ioflags = io_flags; 266 desc->pg_ioflags = io_flags;
219 desc->pg_error = 0; 267 desc->pg_error = 0;
220 desc->pg_lseg = NULL; 268 desc->pg_lseg = NULL;
269 desc->pg_dreq = NULL;
221} 270}
222 271
223/** 272/**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
241 return false; 290 return false;
242 if (req->wb_context->state != prev->wb_context->state) 291 if (req->wb_context->state != prev->wb_context->state)
243 return false; 292 return false;
244 if (req->wb_index != (prev->wb_index + 1))
245 return false;
246 if (req->wb_pgbase != 0) 293 if (req->wb_pgbase != 0)
247 return false; 294 return false;
248 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 295 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
249 return false; 296 return false;
297 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
298 return false;
250 return pgio->pg_ops->pg_test(pgio, prev, req); 299 return pgio->pg_ops->pg_test(pgio, prev, req);
251} 300}
252 301
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..5d09a36b2cd8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -455,6 +455,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
455 spin_unlock(&nfsi->vfs_inode.i_lock); 455 spin_unlock(&nfsi->vfs_inode.i_lock);
456 pnfs_free_lseg_list(&tmp_list); 456 pnfs_free_lseg_list(&tmp_list);
457} 457}
458EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
458 459
459/* 460/*
460 * Called by the state manger to remove all layouts established under an 461 * Called by the state manger to remove all layouts established under an
@@ -692,6 +693,7 @@ out:
692 dprintk("<-- %s status: %d\n", __func__, status); 693 dprintk("<-- %s status: %d\n", __func__, status);
693 return status; 694 return status;
694} 695}
696EXPORT_SYMBOL_GPL(_pnfs_return_layout);
695 697
696bool pnfs_roc(struct inode *ino) 698bool pnfs_roc(struct inode *ino)
697{ 699{
@@ -1082,6 +1084,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1082{ 1084{
1083 BUG_ON(pgio->pg_lseg != NULL); 1085 BUG_ON(pgio->pg_lseg != NULL);
1084 1086
1087 if (req->wb_offset != req->wb_pgbase) {
1088 nfs_pageio_reset_read_mds(pgio);
1089 return;
1090 }
1085 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1091 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1086 req->wb_context, 1092 req->wb_context,
1087 req_offset(req), 1093 req_offset(req),
@@ -1100,6 +1106,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1100{ 1106{
1101 BUG_ON(pgio->pg_lseg != NULL); 1107 BUG_ON(pgio->pg_lseg != NULL);
1102 1108
1109 if (req->wb_offset != req->wb_pgbase) {
1110 nfs_pageio_reset_write_mds(pgio);
1111 return;
1112 }
1103 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1113 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1104 req->wb_context, 1114 req->wb_context,
1105 req_offset(req), 1115 req_offset(req),
@@ -1113,26 +1123,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1113EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1123EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1114 1124
1115bool 1125bool
1116pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1126pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1127 const struct nfs_pgio_completion_ops *compl_ops)
1117{ 1128{
1118 struct nfs_server *server = NFS_SERVER(inode); 1129 struct nfs_server *server = NFS_SERVER(inode);
1119 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1130 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1120 1131
1121 if (ld == NULL) 1132 if (ld == NULL)
1122 return false; 1133 return false;
1123 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1134 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1135 server->rsize, 0);
1124 return true; 1136 return true;
1125} 1137}
1126 1138
1127bool 1139bool
1128pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1140pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1141 int ioflags,
1142 const struct nfs_pgio_completion_ops *compl_ops)
1129{ 1143{
1130 struct nfs_server *server = NFS_SERVER(inode); 1144 struct nfs_server *server = NFS_SERVER(inode);
1131 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1145 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1132 1146
1133 if (ld == NULL) 1147 if (ld == NULL)
1134 return false; 1148 return false;
1135 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1149 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1150 server->wsize, ioflags);
1136 return true; 1151 return true;
1137} 1152}
1138 1153
@@ -1162,13 +1177,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1162} 1177}
1163EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1178EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1164 1179
1165static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) 1180int pnfs_write_done_resend_to_mds(struct inode *inode,
1181 struct list_head *head,
1182 const struct nfs_pgio_completion_ops *compl_ops)
1166{ 1183{
1167 struct nfs_pageio_descriptor pgio; 1184 struct nfs_pageio_descriptor pgio;
1168 LIST_HEAD(failed); 1185 LIST_HEAD(failed);
1169 1186
1170 /* Resend all requests through the MDS */ 1187 /* Resend all requests through the MDS */
1171 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); 1188 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
1172 while (!list_empty(head)) { 1189 while (!list_empty(head)) {
1173 struct nfs_page *req = nfs_list_entry(head->next); 1190 struct nfs_page *req = nfs_list_entry(head->next);
1174 1191
@@ -1188,30 +1205,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
1188 } 1205 }
1189 return 0; 1206 return 0;
1190} 1207}
1208EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1209
1210static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1211{
1212 struct nfs_pgio_header *hdr = data->header;
1213
1214 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1215 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1216 PNFS_LAYOUTRET_ON_ERROR) {
1217 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1218 pnfs_return_layout(hdr->inode);
1219 }
1220 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1221 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1222 &hdr->pages,
1223 hdr->completion_ops);
1224}
1191 1225
1192/* 1226/*
1193 * Called by non rpc-based layout drivers 1227 * Called by non rpc-based layout drivers
1194 */ 1228 */
1195void pnfs_ld_write_done(struct nfs_write_data *data) 1229void pnfs_ld_write_done(struct nfs_write_data *data)
1196{ 1230{
1197 if (likely(!data->pnfs_error)) { 1231 struct nfs_pgio_header *hdr = data->header;
1232
1233 if (!hdr->pnfs_error) {
1198 pnfs_set_layoutcommit(data); 1234 pnfs_set_layoutcommit(data);
1199 data->mds_ops->rpc_call_done(&data->task, data); 1235 hdr->mds_ops->rpc_call_done(&data->task, data);
1200 } else { 1236 } else
1201 dprintk("pnfs write error = %d\n", data->pnfs_error); 1237 pnfs_ld_handle_write_error(data);
1202 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1238 hdr->mds_ops->rpc_release(data);
1203 PNFS_LAYOUTRET_ON_ERROR) {
1204 /* Don't lo_commit on error, Server will needs to
1205 * preform a file recovery.
1206 */
1207 clear_bit(NFS_INO_LAYOUTCOMMIT,
1208 &NFS_I(data->inode)->flags);
1209 pnfs_return_layout(data->inode);
1210 }
1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1212 }
1213 put_lseg(data->lseg);
1214 data->mds_ops->rpc_release(data);
1215} 1239}
1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1240EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1217 1241
@@ -1219,12 +1243,13 @@ static void
1219pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1243pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1220 struct nfs_write_data *data) 1244 struct nfs_write_data *data)
1221{ 1245{
1222 list_splice_tail_init(&data->pages, &desc->pg_list); 1246 struct nfs_pgio_header *hdr = data->header;
1223 if (data->req && list_empty(&data->req->wb_list)) 1247
1224 nfs_list_add_request(data->req, &desc->pg_list); 1248 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1225 nfs_pageio_reset_write_mds(desc); 1249 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1226 desc->pg_recoalesce = 1; 1250 nfs_pageio_reset_write_mds(desc);
1227 put_lseg(data->lseg); 1251 desc->pg_recoalesce = 1;
1252 }
1228 nfs_writedata_release(data); 1253 nfs_writedata_release(data);
1229} 1254}
1230 1255
@@ -1234,23 +1259,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1234 struct pnfs_layout_segment *lseg, 1259 struct pnfs_layout_segment *lseg,
1235 int how) 1260 int how)
1236{ 1261{
1237 struct inode *inode = wdata->inode; 1262 struct nfs_pgio_header *hdr = wdata->header;
1263 struct inode *inode = hdr->inode;
1238 enum pnfs_try_status trypnfs; 1264 enum pnfs_try_status trypnfs;
1239 struct nfs_server *nfss = NFS_SERVER(inode); 1265 struct nfs_server *nfss = NFS_SERVER(inode);
1240 1266
1241 wdata->mds_ops = call_ops; 1267 hdr->mds_ops = call_ops;
1242 wdata->lseg = get_lseg(lseg);
1243 1268
1244 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1269 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1245 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1270 inode->i_ino, wdata->args.count, wdata->args.offset, how);
1246
1247 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1271 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1248 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1272 if (trypnfs != PNFS_NOT_ATTEMPTED)
1249 put_lseg(wdata->lseg);
1250 wdata->lseg = NULL;
1251 } else
1252 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1273 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1253
1254 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1274 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1255 return trypnfs; 1275 return trypnfs;
1256} 1276}
@@ -1266,7 +1286,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1266 while (!list_empty(head)) { 1286 while (!list_empty(head)) {
1267 enum pnfs_try_status trypnfs; 1287 enum pnfs_try_status trypnfs;
1268 1288
1269 data = list_entry(head->next, struct nfs_write_data, list); 1289 data = list_first_entry(head, struct nfs_write_data, list);
1270 list_del_init(&data->list); 1290 list_del_init(&data->list);
1271 1291
1272 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1292 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1296,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1276 put_lseg(lseg); 1296 put_lseg(lseg);
1277} 1297}
1278 1298
1299static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1300{
1301 put_lseg(hdr->lseg);
1302 nfs_writehdr_free(hdr);
1303}
1304
1279int 1305int
1280pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1306pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1281{ 1307{
1282 LIST_HEAD(head); 1308 struct nfs_write_header *whdr;
1309 struct nfs_pgio_header *hdr;
1283 int ret; 1310 int ret;
1284 1311
1285 ret = nfs_generic_flush(desc, &head); 1312 whdr = nfs_writehdr_alloc();
1286 if (ret != 0) { 1313 if (!whdr) {
1314 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1287 put_lseg(desc->pg_lseg); 1315 put_lseg(desc->pg_lseg);
1288 desc->pg_lseg = NULL; 1316 desc->pg_lseg = NULL;
1289 return ret; 1317 return -ENOMEM;
1290 } 1318 }
1291 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1319 hdr = &whdr->header;
1292 return 0; 1320 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1321 hdr->lseg = get_lseg(desc->pg_lseg);
1322 atomic_inc(&hdr->refcnt);
1323 ret = nfs_generic_flush(desc, hdr);
1324 if (ret != 0) {
1325 put_lseg(desc->pg_lseg);
1326 desc->pg_lseg = NULL;
1327 } else
1328 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1329 if (atomic_dec_and_test(&hdr->refcnt))
1330 hdr->completion_ops->completion(hdr);
1331 return ret;
1293} 1332}
1294EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1333EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1295 1334
1296static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1335int pnfs_read_done_resend_to_mds(struct inode *inode,
1336 struct list_head *head,
1337 const struct nfs_pgio_completion_ops *compl_ops)
1297{ 1338{
1298 struct nfs_pageio_descriptor pgio; 1339 struct nfs_pageio_descriptor pgio;
1340 LIST_HEAD(failed);
1299 1341
1300 put_lseg(data->lseg); 1342 /* Resend all requests through the MDS */
1301 data->lseg = NULL; 1343 nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
1302 dprintk("pnfs write error = %d\n", data->pnfs_error); 1344 while (!list_empty(head)) {
1303 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1345 struct nfs_page *req = nfs_list_entry(head->next);
1304 PNFS_LAYOUTRET_ON_ERROR)
1305 pnfs_return_layout(data->inode);
1306
1307 nfs_pageio_init_read_mds(&pgio, data->inode);
1308
1309 while (!list_empty(&data->pages)) {
1310 struct nfs_page *req = nfs_list_entry(data->pages.next);
1311 1346
1312 nfs_list_remove_request(req); 1347 nfs_list_remove_request(req);
1313 nfs_pageio_add_request(&pgio, req); 1348 if (!nfs_pageio_add_request(&pgio, req))
1349 nfs_list_add_request(req, &failed);
1314 } 1350 }
1315 nfs_pageio_complete(&pgio); 1351 nfs_pageio_complete(&pgio);
1352
1353 if (!list_empty(&failed)) {
1354 list_move(&failed, head);
1355 return -EIO;
1356 }
1357 return 0;
1358}
1359EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1360
1361static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1362{
1363 struct nfs_pgio_header *hdr = data->header;
1364
1365 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1366 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1367 PNFS_LAYOUTRET_ON_ERROR) {
1368 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1369 pnfs_return_layout(hdr->inode);
1370 }
1371 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1372 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1373 &hdr->pages,
1374 hdr->completion_ops);
1316} 1375}
1317 1376
1318/* 1377/*
@@ -1320,13 +1379,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1320 */ 1379 */
1321void pnfs_ld_read_done(struct nfs_read_data *data) 1380void pnfs_ld_read_done(struct nfs_read_data *data)
1322{ 1381{
1323 if (likely(!data->pnfs_error)) { 1382 struct nfs_pgio_header *hdr = data->header;
1383
1384 if (likely(!hdr->pnfs_error)) {
1324 __nfs4_read_done_cb(data); 1385 __nfs4_read_done_cb(data);
1325 data->mds_ops->rpc_call_done(&data->task, data); 1386 hdr->mds_ops->rpc_call_done(&data->task, data);
1326 } else 1387 } else
1327 pnfs_ld_handle_read_error(data); 1388 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg); 1389 hdr->mds_ops->rpc_release(data);
1329 data->mds_ops->rpc_release(data);
1330} 1390}
1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1391EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1332 1392
@@ -1334,11 +1394,13 @@ static void
1334pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1394pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1335 struct nfs_read_data *data) 1395 struct nfs_read_data *data)
1336{ 1396{
1337 list_splice_tail_init(&data->pages, &desc->pg_list); 1397 struct nfs_pgio_header *hdr = data->header;
1338 if (data->req && list_empty(&data->req->wb_list)) 1398
1339 nfs_list_add_request(data->req, &desc->pg_list); 1399 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1340 nfs_pageio_reset_read_mds(desc); 1400 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1341 desc->pg_recoalesce = 1; 1401 nfs_pageio_reset_read_mds(desc);
1402 desc->pg_recoalesce = 1;
1403 }
1342 nfs_readdata_release(data); 1404 nfs_readdata_release(data);
1343} 1405}
1344 1406
@@ -1350,23 +1412,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1350 const struct rpc_call_ops *call_ops, 1412 const struct rpc_call_ops *call_ops,
1351 struct pnfs_layout_segment *lseg) 1413 struct pnfs_layout_segment *lseg)
1352{ 1414{
1353 struct inode *inode = rdata->inode; 1415 struct nfs_pgio_header *hdr = rdata->header;
1416 struct inode *inode = hdr->inode;
1354 struct nfs_server *nfss = NFS_SERVER(inode); 1417 struct nfs_server *nfss = NFS_SERVER(inode);
1355 enum pnfs_try_status trypnfs; 1418 enum pnfs_try_status trypnfs;
1356 1419
1357 rdata->mds_ops = call_ops; 1420 hdr->mds_ops = call_ops;
1358 rdata->lseg = get_lseg(lseg);
1359 1421
1360 dprintk("%s: Reading ino:%lu %u@%llu\n", 1422 dprintk("%s: Reading ino:%lu %u@%llu\n",
1361 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1423 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1362 1424
1363 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1425 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1364 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1426 if (trypnfs != PNFS_NOT_ATTEMPTED)
1365 put_lseg(rdata->lseg);
1366 rdata->lseg = NULL;
1367 } else {
1368 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1427 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1369 }
1370 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1428 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1371 return trypnfs; 1429 return trypnfs;
1372} 1430}
@@ -1382,7 +1440,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1382 while (!list_empty(head)) { 1440 while (!list_empty(head)) {
1383 enum pnfs_try_status trypnfs; 1441 enum pnfs_try_status trypnfs;
1384 1442
1385 data = list_entry(head->next, struct nfs_read_data, list); 1443 data = list_first_entry(head, struct nfs_read_data, list);
1386 list_del_init(&data->list); 1444 list_del_init(&data->list);
1387 1445
1388 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1446 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1450,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1392 put_lseg(lseg); 1450 put_lseg(lseg);
1393} 1451}
1394 1452
1453static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1454{
1455 put_lseg(hdr->lseg);
1456 nfs_readhdr_free(hdr);
1457}
1458
1395int 1459int
1396pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1460pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1397{ 1461{
1398 LIST_HEAD(head); 1462 struct nfs_read_header *rhdr;
1463 struct nfs_pgio_header *hdr;
1399 int ret; 1464 int ret;
1400 1465
1401 ret = nfs_generic_pagein(desc, &head); 1466 rhdr = nfs_readhdr_alloc();
1402 if (ret != 0) { 1467 if (!rhdr) {
1468 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1469 ret = -ENOMEM;
1403 put_lseg(desc->pg_lseg); 1470 put_lseg(desc->pg_lseg);
1404 desc->pg_lseg = NULL; 1471 desc->pg_lseg = NULL;
1405 return ret; 1472 return ret;
1406 } 1473 }
1407 pnfs_do_multiple_reads(desc, &head); 1474 hdr = &rhdr->header;
1408 return 0; 1475 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1476 hdr->lseg = get_lseg(desc->pg_lseg);
1477 atomic_inc(&hdr->refcnt);
1478 ret = nfs_generic_pagein(desc, hdr);
1479 if (ret != 0) {
1480 put_lseg(desc->pg_lseg);
1481 desc->pg_lseg = NULL;
1482 } else
1483 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1484 if (atomic_dec_and_test(&hdr->refcnt))
1485 hdr->completion_ops->completion(hdr);
1486 return ret;
1409} 1487}
1410EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1411 1489
@@ -1438,30 +1516,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1438void 1516void
1439pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1517pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1440{ 1518{
1441 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1519 struct nfs_pgio_header *hdr = wdata->header;
1520 struct inode *inode = hdr->inode;
1521 struct nfs_inode *nfsi = NFS_I(inode);
1442 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1522 loff_t end_pos = wdata->mds_offset + wdata->res.count;
1443 bool mark_as_dirty = false; 1523 bool mark_as_dirty = false;
1444 1524
1445 spin_lock(&nfsi->vfs_inode.i_lock); 1525 spin_lock(&inode->i_lock);
1446 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1526 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1447 mark_as_dirty = true; 1527 mark_as_dirty = true;
1448 dprintk("%s: Set layoutcommit for inode %lu ", 1528 dprintk("%s: Set layoutcommit for inode %lu ",
1449 __func__, wdata->inode->i_ino); 1529 __func__, inode->i_ino);
1450 } 1530 }
1451 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { 1531 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1452 /* references matched in nfs4_layoutcommit_release */ 1532 /* references matched in nfs4_layoutcommit_release */
1453 get_lseg(wdata->lseg); 1533 get_lseg(hdr->lseg);
1454 } 1534 }
1455 if (end_pos > nfsi->layout->plh_lwb) 1535 if (end_pos > nfsi->layout->plh_lwb)
1456 nfsi->layout->plh_lwb = end_pos; 1536 nfsi->layout->plh_lwb = end_pos;
1457 spin_unlock(&nfsi->vfs_inode.i_lock); 1537 spin_unlock(&inode->i_lock);
1458 dprintk("%s: lseg %p end_pos %llu\n", 1538 dprintk("%s: lseg %p end_pos %llu\n",
1459 __func__, wdata->lseg, nfsi->layout->plh_lwb); 1539 __func__, hdr->lseg, nfsi->layout->plh_lwb);
1460 1540
1461 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1541 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1462 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1542 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1463 if (mark_as_dirty) 1543 if (mark_as_dirty)
1464 mark_inode_dirty_sync(wdata->inode); 1544 mark_inode_dirty_sync(inode);
1465} 1545}
1466EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1546EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1467 1547
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..7980756b2f57 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -63,6 +63,7 @@ enum {
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
66 NFS_LAYOUT_INVALID, /* layout is being destroyed */
66}; 67};
67 68
68enum layoutdriver_policy_flags { 69enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 95 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 96 const struct nfs_pageio_ops *pg_write_ops;
96 97
98 struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
97 void (*mark_request_commit) (struct nfs_page *req, 99 void (*mark_request_commit) (struct nfs_page *req,
98 struct pnfs_layout_segment *lseg); 100 struct pnfs_layout_segment *lseg,
99 void (*clear_request_commit) (struct nfs_page *req); 101 struct nfs_commit_info *cinfo);
100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); 102 void (*clear_request_commit) (struct nfs_page *req,
101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 103 struct nfs_commit_info *cinfo);
104 int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
105 int max);
106 void (*recover_commit_reqs) (struct list_head *list,
107 struct nfs_commit_info *cinfo);
108 int (*commit_pagelist)(struct inode *inode,
109 struct list_head *mds_pages,
110 int how,
111 struct nfs_commit_info *cinfo);
102 112
103 /* 113 /*
104 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 114 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
168void get_layout_hdr(struct pnfs_layout_hdr *lo); 178void get_layout_hdr(struct pnfs_layout_hdr *lo);
169void put_lseg(struct pnfs_layout_segment *lseg); 179void put_lseg(struct pnfs_layout_segment *lseg);
170 180
171bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 181bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
172bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); 182 const struct nfs_pgio_completion_ops *);
183bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
184 int, const struct nfs_pgio_completion_ops *);
173 185
174void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 186void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
175void unset_pnfs_layoutdriver(struct nfs_server *); 187void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,10 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
211 gfp_t gfp_flags); 223 gfp_t gfp_flags);
212 224
213void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 225void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
226int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
227 const struct nfs_pgio_completion_ops *compl_ops);
228int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
229 const struct nfs_pgio_completion_ops *compl_ops);
214 230
215/* nfs4_deviceid_flags */ 231/* nfs4_deviceid_flags */
216enum { 232enum {
@@ -261,49 +277,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
261} 277}
262 278
263static inline int 279static inline int
264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 280pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
281 struct nfs_commit_info *cinfo)
265{ 282{
266 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) 283 if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
267 return PNFS_NOT_ATTEMPTED; 284 return PNFS_NOT_ATTEMPTED;
268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 285 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
286}
287
288static inline struct pnfs_ds_commit_info *
289pnfs_get_ds_info(struct inode *inode)
290{
291 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
292
293 if (ld == NULL || ld->get_ds_info == NULL)
294 return NULL;
295 return ld->get_ds_info(inode);
269} 296}
270 297
271static inline bool 298static inline bool
272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 299pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
300 struct nfs_commit_info *cinfo)
273{ 301{
274 struct inode *inode = req->wb_context->dentry->d_inode; 302 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 303 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
276 304
277 if (lseg == NULL || ld->mark_request_commit == NULL) 305 if (lseg == NULL || ld->mark_request_commit == NULL)
278 return false; 306 return false;
279 ld->mark_request_commit(req, lseg); 307 ld->mark_request_commit(req, lseg, cinfo);
280 return true; 308 return true;
281} 309}
282 310
283static inline bool 311static inline bool
284pnfs_clear_request_commit(struct nfs_page *req) 312pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
285{ 313{
286 struct inode *inode = req->wb_context->dentry->d_inode; 314 struct inode *inode = req->wb_context->dentry->d_inode;
287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 315 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
288 316
289 if (ld == NULL || ld->clear_request_commit == NULL) 317 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false; 318 return false;
291 ld->clear_request_commit(req); 319 ld->clear_request_commit(req, cinfo);
292 return true; 320 return true;
293} 321}
294 322
295static inline int 323static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 324pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
325 int max)
297{ 326{
298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 327 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0; 328 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock); 329 else
304 if (ret != 0) 330 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 331}
306 return ret; 332
333static inline void
334pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
335 struct nfs_commit_info *cinfo)
336{
337 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
338 return;
339 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
307} 340}
308 341
309/* Should the pNFS client commit and return the layout upon a setattr */ 342/* Should the pNFS client commit and return the layout upon a setattr */
@@ -396,40 +429,57 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
396{ 429{
397} 430}
398 431
399static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 432static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
433 const struct nfs_pgio_completion_ops *compl_ops)
400{ 434{
401 return false; 435 return false;
402} 436}
403 437
404static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 438static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
439 const struct nfs_pgio_completion_ops *compl_ops)
405{ 440{
406 return false; 441 return false;
407} 442}
408 443
409static inline int 444static inline int
410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 445pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
446 struct nfs_commit_info *cinfo)
411{ 447{
412 return PNFS_NOT_ATTEMPTED; 448 return PNFS_NOT_ATTEMPTED;
413} 449}
414 450
451static inline struct pnfs_ds_commit_info *
452pnfs_get_ds_info(struct inode *inode)
453{
454 return NULL;
455}
456
415static inline bool 457static inline bool
416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 458pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
459 struct nfs_commit_info *cinfo)
417{ 460{
418 return false; 461 return false;
419} 462}
420 463
421static inline bool 464static inline bool
422pnfs_clear_request_commit(struct nfs_page *req) 465pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
423{ 466{
424 return false; 467 return false;
425} 468}
426 469
427static inline int 470static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 471pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
472 int max)
429{ 473{
430 return 0; 474 return 0;
431} 475}
432 476
477static inline void
478pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
479 struct nfs_commit_info *cinfo)
480{
481}
482
433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 483static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
434{ 484{
435 return 0; 485 return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b63b6f4d14fb..fea9163d6f8e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
178} 178}
179 179
180static int 180static int
181nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 181nfs_proc_lookup(struct inode *dir, struct qstr *name,
182 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 182 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
183{ 183{
184 struct nfs_diropargs arg = { 184 struct nfs_diropargs arg = {
@@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
641 641
642static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 642static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
643{ 643{
644 struct inode *inode = data->header->inode;
645
644 if (nfs_async_handle_expired_key(task)) 646 if (nfs_async_handle_expired_key(task))
645 return -EAGAIN; 647 return -EAGAIN;
646 648
647 nfs_invalidate_atime(data->inode); 649 nfs_invalidate_atime(inode);
648 if (task->tk_status >= 0) { 650 if (task->tk_status >= 0) {
649 nfs_refresh_inode(data->inode, data->res.fattr); 651 nfs_refresh_inode(inode, data->res.fattr);
650 /* Emulate the eof flag, which isn't normally needed in NFSv2 652 /* Emulate the eof flag, which isn't normally needed in NFSv2
651 * as it is guaranteed to always return the file attributes 653 * as it is guaranteed to always return the file attributes
652 */ 654 */
@@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
668 670
669static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 671static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
670{ 672{
673 struct inode *inode = data->header->inode;
674
671 if (nfs_async_handle_expired_key(task)) 675 if (nfs_async_handle_expired_key(task))
672 return -EAGAIN; 676 return -EAGAIN;
673 677
674 if (task->tk_status >= 0) 678 if (task->tk_status >= 0)
675 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 679 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
676 return 0; 680 return 0;
677} 681}
678 682
@@ -688,8 +692,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
688 rpc_call_start(task); 692 rpc_call_start(task);
689} 693}
690 694
695static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
696{
697 BUG();
698}
699
691static void 700static void
692nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 701nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
693{ 702{
694 BUG(); 703 BUG();
695} 704}
@@ -733,6 +742,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
733 .file_inode_ops = &nfs_file_inode_operations, 742 .file_inode_ops = &nfs_file_inode_operations,
734 .file_ops = &nfs_file_operations, 743 .file_ops = &nfs_file_operations,
735 .getroot = nfs_proc_get_root, 744 .getroot = nfs_proc_get_root,
745 .submount = nfs_submount,
736 .getattr = nfs_proc_getattr, 746 .getattr = nfs_proc_getattr,
737 .setattr = nfs_proc_setattr, 747 .setattr = nfs_proc_setattr,
738 .lookup = nfs_proc_lookup, 748 .lookup = nfs_proc_lookup,
@@ -764,6 +774,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
764 .write_rpc_prepare = nfs_proc_write_rpc_prepare, 774 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
765 .write_done = nfs_write_done, 775 .write_done = nfs_write_done,
766 .commit_setup = nfs_proc_commit_setup, 776 .commit_setup = nfs_proc_commit_setup,
777 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
767 .lock = nfs_proc_lock, 778 .lock = nfs_proc_lock,
768 .lock_check_bounds = nfs_lock_check_bounds, 779 .lock_check_bounds = nfs_lock_check_bounds,
769 .close_context = nfs_close_context, 780 .close_context = nfs_close_context,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28c2ea3..2cfdd7785411 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,43 +30,73 @@
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 31
32static const struct nfs_pageio_ops nfs_pageio_read_ops; 32static const struct nfs_pageio_ops nfs_pageio_read_ops;
33static const struct rpc_call_ops nfs_read_partial_ops; 33static const struct rpc_call_ops nfs_read_common_ops;
34static const struct rpc_call_ops nfs_read_full_ops; 34static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
35 35
36static struct kmem_cache *nfs_rdata_cachep; 36static struct kmem_cache *nfs_rdata_cachep;
37 37
38struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 38struct nfs_read_header *nfs_readhdr_alloc(void)
39{ 39{
40 struct nfs_read_data *p; 40 struct nfs_read_header *rhdr;
41 41
42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 42 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
43 if (p) { 43 if (rhdr) {
44 INIT_LIST_HEAD(&p->pages); 44 struct nfs_pgio_header *hdr = &rhdr->header;
45 p->npages = pagecount; 45
46 if (pagecount <= ARRAY_SIZE(p->page_array)) 46 INIT_LIST_HEAD(&hdr->pages);
47 p->pagevec = p->page_array; 47 INIT_LIST_HEAD(&hdr->rpc_list);
48 else { 48 spin_lock_init(&hdr->lock);
49 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 49 atomic_set(&hdr->refcnt, 0);
50 if (!p->pagevec) { 50 }
51 kmem_cache_free(nfs_rdata_cachep, p); 51 return rhdr;
52 p = NULL; 52}
53 } 53
54 } 54static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
55 unsigned int pagecount)
56{
57 struct nfs_read_data *data, *prealloc;
58
59 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
60 if (prealloc->header == NULL)
61 data = prealloc;
62 else
63 data = kzalloc(sizeof(*data), GFP_KERNEL);
64 if (!data)
65 goto out;
66
67 if (nfs_pgarray_set(&data->pages, pagecount)) {
68 data->header = hdr;
69 atomic_inc(&hdr->refcnt);
70 } else {
71 if (data != prealloc)
72 kfree(data);
73 data = NULL;
55 } 74 }
56 return p; 75out:
76 return data;
57} 77}
58 78
59void nfs_readdata_free(struct nfs_read_data *p) 79void nfs_readhdr_free(struct nfs_pgio_header *hdr)
60{ 80{
61 if (p && (p->pagevec != &p->page_array[0])) 81 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
62 kfree(p->pagevec); 82
63 kmem_cache_free(nfs_rdata_cachep, p); 83 kmem_cache_free(nfs_rdata_cachep, rhdr);
64} 84}
65 85
66void nfs_readdata_release(struct nfs_read_data *rdata) 86void nfs_readdata_release(struct nfs_read_data *rdata)
67{ 87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
68 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
69 nfs_readdata_free(rdata); 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data)
95 kfree(rdata);
96 else
97 rdata->header = NULL;
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
70} 100}
71 101
72static 102static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
78 return 0; 108 return 0;
79} 109}
80 110
81static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
82{
83 unsigned int remainder = data->args.count - data->res.count;
84 unsigned int base = data->args.pgbase + data->res.count;
85 unsigned int pglen;
86 struct page **pages;
87
88 if (data->res.eof == 0 || remainder == 0)
89 return;
90 /*
91 * Note: "remainder" can never be negative, since we check for
92 * this in the XDR code.
93 */
94 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
95 base &= ~PAGE_CACHE_MASK;
96 pglen = PAGE_CACHE_SIZE - base;
97 for (;;) {
98 if (remainder <= pglen) {
99 zero_user(*pages, base, remainder);
100 break;
101 }
102 zero_user(*pages, base, pglen);
103 pages++;
104 remainder -= pglen;
105 pglen = PAGE_CACHE_SIZE;
106 base = 0;
107 }
108}
109
110void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 111void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
111 struct inode *inode) 112 struct inode *inode,
113 const struct nfs_pgio_completion_ops *compl_ops)
112{ 114{
113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
114 NFS_SERVER(inode)->rsize, 0); 116 NFS_SERVER(inode)->rsize, 0);
115} 117}
116 118
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
121} 123}
122EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 124EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
123 125
124static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 126void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
125 struct inode *inode) 127 struct inode *inode,
128 const struct nfs_pgio_completion_ops *compl_ops)
126{ 129{
127 if (!pnfs_pageio_init_read(pgio, inode)) 130 if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
128 nfs_pageio_init_read_mds(pgio, inode); 131 nfs_pageio_init_read_mds(pgio, inode, compl_ops);
129} 132}
130 133
131int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 134int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,7 +149,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
146 if (len < PAGE_CACHE_SIZE) 149 if (len < PAGE_CACHE_SIZE)
147 zero_user_segment(page, len, PAGE_CACHE_SIZE); 150 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 151
149 nfs_pageio_init_read(&pgio, inode); 152 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
150 nfs_pageio_add_request(&pgio, new); 153 nfs_pageio_add_request(&pgio, new);
151 nfs_pageio_complete(&pgio); 154 nfs_pageio_complete(&pgio);
152 return 0; 155 return 0;
@@ -169,16 +172,49 @@ static void nfs_readpage_release(struct nfs_page *req)
169 nfs_release_request(req); 172 nfs_release_request(req);
170} 173}
171 174
172int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 175/* Note io was page aligned */
173 const struct rpc_call_ops *call_ops) 176static void nfs_read_completion(struct nfs_pgio_header *hdr)
177{
178 unsigned long bytes = 0;
179
180 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
181 goto out;
182 while (!list_empty(&hdr->pages)) {
183 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
184 struct page *page = req->wb_page;
185
186 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
187 if (bytes > hdr->good_bytes)
188 zero_user(page, 0, PAGE_SIZE);
189 else if (hdr->good_bytes - bytes < PAGE_SIZE)
190 zero_user_segment(page,
191 hdr->good_bytes & ~PAGE_MASK,
192 PAGE_SIZE);
193 }
194 bytes += req->wb_bytes;
195 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
196 if (bytes <= hdr->good_bytes)
197 SetPageUptodate(page);
198 } else
199 SetPageUptodate(page);
200 nfs_list_remove_request(req);
201 nfs_readpage_release(req);
202 }
203out:
204 hdr->release(hdr);
205}
206
207int nfs_initiate_read(struct rpc_clnt *clnt,
208 struct nfs_read_data *data,
209 const struct rpc_call_ops *call_ops, int flags)
174{ 210{
175 struct inode *inode = data->inode; 211 struct inode *inode = data->header->inode;
176 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 212 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
177 struct rpc_task *task; 213 struct rpc_task *task;
178 struct rpc_message msg = { 214 struct rpc_message msg = {
179 .rpc_argp = &data->args, 215 .rpc_argp = &data->args,
180 .rpc_resp = &data->res, 216 .rpc_resp = &data->res,
181 .rpc_cred = data->cred, 217 .rpc_cred = data->header->cred,
182 }; 218 };
183 struct rpc_task_setup task_setup_data = { 219 struct rpc_task_setup task_setup_data = {
184 .task = &data->task, 220 .task = &data->task,
@@ -187,7 +223,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
187 .callback_ops = call_ops, 223 .callback_ops = call_ops,
188 .callback_data = data, 224 .callback_data = data,
189 .workqueue = nfsiod_workqueue, 225 .workqueue = nfsiod_workqueue,
190 .flags = RPC_TASK_ASYNC | swap_flags, 226 .flags = RPC_TASK_ASYNC | swap_flags | flags,
191 }; 227 };
192 228
193 /* Set up the initial task struct. */ 229 /* Set up the initial task struct. */
@@ -212,19 +248,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
212/* 248/*
213 * Set up the NFS read request struct 249 * Set up the NFS read request struct
214 */ 250 */
215static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 251static void nfs_read_rpcsetup(struct nfs_read_data *data,
216 unsigned int count, unsigned int offset) 252 unsigned int count, unsigned int offset)
217{ 253{
218 struct inode *inode = req->wb_context->dentry->d_inode; 254 struct nfs_page *req = data->header->req;
219
220 data->req = req;
221 data->inode = inode;
222 data->cred = req->wb_context->cred;
223 255
224 data->args.fh = NFS_FH(inode); 256 data->args.fh = NFS_FH(data->header->inode);
225 data->args.offset = req_offset(req) + offset; 257 data->args.offset = req_offset(req) + offset;
226 data->args.pgbase = req->wb_pgbase + offset; 258 data->args.pgbase = req->wb_pgbase + offset;
227 data->args.pages = data->pagevec; 259 data->args.pages = data->pages.pagevec;
228 data->args.count = count; 260 data->args.count = count;
229 data->args.context = get_nfs_open_context(req->wb_context); 261 data->args.context = get_nfs_open_context(req->wb_context);
230 data->args.lock_context = req->wb_lock_context; 262 data->args.lock_context = req->wb_lock_context;
@@ -238,9 +270,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
238static int nfs_do_read(struct nfs_read_data *data, 270static int nfs_do_read(struct nfs_read_data *data,
239 const struct rpc_call_ops *call_ops) 271 const struct rpc_call_ops *call_ops)
240{ 272{
241 struct inode *inode = data->args.context->dentry->d_inode; 273 struct inode *inode = data->header->inode;
242 274
243 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 275 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
244} 276}
245 277
246static int 278static int
@@ -253,7 +285,7 @@ nfs_do_multiple_reads(struct list_head *head,
253 while (!list_empty(head)) { 285 while (!list_empty(head)) {
254 int ret2; 286 int ret2;
255 287
256 data = list_entry(head->next, struct nfs_read_data, list); 288 data = list_first_entry(head, struct nfs_read_data, list);
257 list_del_init(&data->list); 289 list_del_init(&data->list);
258 290
259 ret2 = nfs_do_read(data, call_ops); 291 ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +307,24 @@ nfs_async_read_error(struct list_head *head)
275 } 307 }
276} 308}
277 309
310static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
311 .error_cleanup = nfs_async_read_error,
312 .completion = nfs_read_completion,
313};
314
315static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
316 struct nfs_pgio_header *hdr)
317{
318 set_bit(NFS_IOHDR_REDO, &hdr->flags);
319 while (!list_empty(&hdr->rpc_list)) {
320 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
321 struct nfs_read_data, list);
322 list_del(&data->list);
323 nfs_readdata_release(data);
324 }
325 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
326}
327
278/* 328/*
279 * Generate multiple requests to fill a single page. 329 * Generate multiple requests to fill a single page.
280 * 330 *
@@ -288,93 +338,95 @@ nfs_async_read_error(struct list_head *head)
288 * won't see the new data until our attribute cache is updated. This is more 338 * won't see the new data until our attribute cache is updated. This is more
289 * or less conventional NFS client behavior. 339 * or less conventional NFS client behavior.
290 */ 340 */
291static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 341static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
342 struct nfs_pgio_header *hdr)
292{ 343{
293 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 344 struct nfs_page *req = hdr->req;
294 struct page *page = req->wb_page; 345 struct page *page = req->wb_page;
295 struct nfs_read_data *data; 346 struct nfs_read_data *data;
296 size_t rsize = desc->pg_bsize, nbytes; 347 size_t rsize = desc->pg_bsize, nbytes;
297 unsigned int offset; 348 unsigned int offset;
298 int requests = 0;
299 int ret = 0;
300
301 nfs_list_remove_request(req);
302 349
303 offset = 0; 350 offset = 0;
304 nbytes = desc->pg_count; 351 nbytes = desc->pg_count;
305 do { 352 do {
306 size_t len = min(nbytes,rsize); 353 size_t len = min(nbytes,rsize);
307 354
308 data = nfs_readdata_alloc(1); 355 data = nfs_readdata_alloc(hdr, 1);
309 if (!data) 356 if (!data) {
310 goto out_bad; 357 nfs_pagein_error(desc, hdr);
311 data->pagevec[0] = page; 358 return -ENOMEM;
312 nfs_read_rpcsetup(req, data, len, offset); 359 }
313 list_add(&data->list, res); 360 data->pages.pagevec[0] = page;
314 requests++; 361 nfs_read_rpcsetup(data, len, offset);
362 list_add(&data->list, &hdr->rpc_list);
315 nbytes -= len; 363 nbytes -= len;
316 offset += len; 364 offset += len;
317 } while(nbytes != 0); 365 } while (nbytes != 0);
318 atomic_set(&req->wb_complete, requests); 366
319 desc->pg_rpc_callops = &nfs_read_partial_ops; 367 nfs_list_remove_request(req);
320 return ret; 368 nfs_list_add_request(req, &hdr->pages);
321out_bad: 369 desc->pg_rpc_callops = &nfs_read_common_ops;
322 while (!list_empty(res)) { 370 return 0;
323 data = list_entry(res->next, struct nfs_read_data, list);
324 list_del(&data->list);
325 nfs_readdata_release(data);
326 }
327 nfs_readpage_release(req);
328 return -ENOMEM;
329} 371}
330 372
331static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 373static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
374 struct nfs_pgio_header *hdr)
332{ 375{
333 struct nfs_page *req; 376 struct nfs_page *req;
334 struct page **pages; 377 struct page **pages;
335 struct nfs_read_data *data; 378 struct nfs_read_data *data;
336 struct list_head *head = &desc->pg_list; 379 struct list_head *head = &desc->pg_list;
337 int ret = 0;
338 380
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 381 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 382 desc->pg_count));
341 if (!data) { 383 if (!data) {
342 nfs_async_read_error(head); 384 nfs_pagein_error(desc, hdr);
343 ret = -ENOMEM; 385 return -ENOMEM;
344 goto out;
345 } 386 }
346 387
347 pages = data->pagevec; 388 pages = data->pages.pagevec;
348 while (!list_empty(head)) { 389 while (!list_empty(head)) {
349 req = nfs_list_entry(head->next); 390 req = nfs_list_entry(head->next);
350 nfs_list_remove_request(req); 391 nfs_list_remove_request(req);
351 nfs_list_add_request(req, &data->pages); 392 nfs_list_add_request(req, &hdr->pages);
352 *pages++ = req->wb_page; 393 *pages++ = req->wb_page;
353 } 394 }
354 req = nfs_list_entry(data->pages.next);
355 395
356 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 396 nfs_read_rpcsetup(data, desc->pg_count, 0);
357 list_add(&data->list, res); 397 list_add(&data->list, &hdr->rpc_list);
358 desc->pg_rpc_callops = &nfs_read_full_ops; 398 desc->pg_rpc_callops = &nfs_read_common_ops;
359out: 399 return 0;
360 return ret;
361} 400}
362 401
363int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 402int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
403 struct nfs_pgio_header *hdr)
364{ 404{
365 if (desc->pg_bsize < PAGE_CACHE_SIZE) 405 if (desc->pg_bsize < PAGE_CACHE_SIZE)
366 return nfs_pagein_multi(desc, head); 406 return nfs_pagein_multi(desc, hdr);
367 return nfs_pagein_one(desc, head); 407 return nfs_pagein_one(desc, hdr);
368} 408}
369 409
370static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 410static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
371{ 411{
372 LIST_HEAD(head); 412 struct nfs_read_header *rhdr;
413 struct nfs_pgio_header *hdr;
373 int ret; 414 int ret;
374 415
375 ret = nfs_generic_pagein(desc, &head); 416 rhdr = nfs_readhdr_alloc();
417 if (!rhdr) {
418 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
419 return -ENOMEM;
420 }
421 hdr = &rhdr->header;
422 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
423 atomic_inc(&hdr->refcnt);
424 ret = nfs_generic_pagein(desc, hdr);
376 if (ret == 0) 425 if (ret == 0)
377 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 426 ret = nfs_do_multiple_reads(&hdr->rpc_list,
427 desc->pg_rpc_callops);
428 if (atomic_dec_and_test(&hdr->refcnt))
429 hdr->completion_ops->completion(hdr);
378 return ret; 430 return ret;
379} 431}
380 432
@@ -389,20 +441,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
389 */ 441 */
390int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 442int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
391{ 443{
444 struct inode *inode = data->header->inode;
392 int status; 445 int status;
393 446
394 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 447 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
395 task->tk_status); 448 task->tk_status);
396 449
397 status = NFS_PROTO(data->inode)->read_done(task, data); 450 status = NFS_PROTO(inode)->read_done(task, data);
398 if (status != 0) 451 if (status != 0)
399 return status; 452 return status;
400 453
401 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 454 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
402 455
403 if (task->tk_status == -ESTALE) { 456 if (task->tk_status == -ESTALE) {
404 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 457 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
405 nfs_mark_for_revalidate(data->inode); 458 nfs_mark_for_revalidate(inode);
406 } 459 }
407 return 0; 460 return 0;
408} 461}
@@ -412,15 +465,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
412 struct nfs_readargs *argp = &data->args; 465 struct nfs_readargs *argp = &data->args;
413 struct nfs_readres *resp = &data->res; 466 struct nfs_readres *resp = &data->res;
414 467
415 if (resp->eof || resp->count == argp->count)
416 return;
417
418 /* This is a short read! */ 468 /* This is a short read! */
419 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 469 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
420 /* Has the server at least made some progress? */ 470 /* Has the server at least made some progress? */
421 if (resp->count == 0) 471 if (resp->count == 0) {
472 nfs_set_pgio_error(data->header, -EIO, argp->offset);
422 return; 473 return;
423 474 }
424 /* Yes, so retry the read at the end of the data */ 475 /* Yes, so retry the read at the end of the data */
425 data->mds_offset += resp->count; 476 data->mds_offset += resp->count;
426 argp->offset += resp->count; 477 argp->offset += resp->count;
@@ -429,114 +480,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
429 rpc_restart_call_prepare(task); 480 rpc_restart_call_prepare(task);
430} 481}
431 482
432/* 483static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
433 * Handle a read reply that fills part of a page.
434 */
435static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
436{ 484{
437 struct nfs_read_data *data = calldata; 485 struct nfs_read_data *data = calldata;
438 486 struct nfs_pgio_header *hdr = data->header;
487
488 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
439 if (nfs_readpage_result(task, data) != 0) 489 if (nfs_readpage_result(task, data) != 0)
440 return; 490 return;
441 if (task->tk_status < 0) 491 if (task->tk_status < 0)
442 return; 492 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
443 493 else if (data->res.eof) {
444 nfs_readpage_truncate_uninitialised_page(data); 494 loff_t bound;
445 nfs_readpage_retry(task, data); 495
496 bound = data->args.offset + data->res.count;
497 spin_lock(&hdr->lock);
498 if (bound < hdr->io_start + hdr->good_bytes) {
499 set_bit(NFS_IOHDR_EOF, &hdr->flags);
500 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
501 hdr->good_bytes = bound - hdr->io_start;
502 }
503 spin_unlock(&hdr->lock);
504 } else if (data->res.count != data->args.count)
505 nfs_readpage_retry(task, data);
446} 506}
447 507
448static void nfs_readpage_release_partial(void *calldata) 508static void nfs_readpage_release_common(void *calldata)
449{ 509{
450 struct nfs_read_data *data = calldata;
451 struct nfs_page *req = data->req;
452 struct page *page = req->wb_page;
453 int status = data->task.tk_status;
454
455 if (status < 0)
456 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
457
458 if (atomic_dec_and_test(&req->wb_complete)) {
459 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
460 SetPageUptodate(page);
461 nfs_readpage_release(req);
462 }
463 nfs_readdata_release(calldata); 510 nfs_readdata_release(calldata);
464} 511}
465 512
466void nfs_read_prepare(struct rpc_task *task, void *calldata) 513void nfs_read_prepare(struct rpc_task *task, void *calldata)
467{ 514{
468 struct nfs_read_data *data = calldata; 515 struct nfs_read_data *data = calldata;
469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data); 516 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
470}
471
472static const struct rpc_call_ops nfs_read_partial_ops = {
473 .rpc_call_prepare = nfs_read_prepare,
474 .rpc_call_done = nfs_readpage_result_partial,
475 .rpc_release = nfs_readpage_release_partial,
476};
477
478static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
479{
480 unsigned int count = data->res.count;
481 unsigned int base = data->args.pgbase;
482 struct page **pages;
483
484 if (data->res.eof)
485 count = data->args.count;
486 if (unlikely(count == 0))
487 return;
488 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
489 base &= ~PAGE_CACHE_MASK;
490 count += base;
491 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
492 SetPageUptodate(*pages);
493 if (count == 0)
494 return;
495 /* Was this a short read? */
496 if (data->res.eof || data->res.count == data->args.count)
497 SetPageUptodate(*pages);
498}
499
500/*
501 * This is the callback from RPC telling us whether a reply was
502 * received or some error occurred (timeout or socket shutdown).
503 */
504static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
505{
506 struct nfs_read_data *data = calldata;
507
508 if (nfs_readpage_result(task, data) != 0)
509 return;
510 if (task->tk_status < 0)
511 return;
512 /*
513 * Note: nfs_readpage_retry may change the values of
514 * data->args. In the multi-page case, we therefore need
515 * to ensure that we call nfs_readpage_set_pages_uptodate()
516 * first.
517 */
518 nfs_readpage_truncate_uninitialised_page(data);
519 nfs_readpage_set_pages_uptodate(data);
520 nfs_readpage_retry(task, data);
521}
522
523static void nfs_readpage_release_full(void *calldata)
524{
525 struct nfs_read_data *data = calldata;
526
527 while (!list_empty(&data->pages)) {
528 struct nfs_page *req = nfs_list_entry(data->pages.next);
529
530 nfs_list_remove_request(req);
531 nfs_readpage_release(req);
532 }
533 nfs_readdata_release(calldata);
534} 517}
535 518
536static const struct rpc_call_ops nfs_read_full_ops = { 519static const struct rpc_call_ops nfs_read_common_ops = {
537 .rpc_call_prepare = nfs_read_prepare, 520 .rpc_call_prepare = nfs_read_prepare,
538 .rpc_call_done = nfs_readpage_result_full, 521 .rpc_call_done = nfs_readpage_result_common,
539 .rpc_release = nfs_readpage_release_full, 522 .rpc_release = nfs_readpage_release_common,
540}; 523};
541 524
542/* 525/*
@@ -668,7 +651,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
668 if (ret == 0) 651 if (ret == 0)
669 goto read_complete; /* all pages were read */ 652 goto read_complete; /* all pages were read */
670 653
671 nfs_pageio_init_read(&pgio, inode); 654 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
672 655
673 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 656 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
674 657
@@ -684,7 +667,7 @@ out:
684int __init nfs_init_readpagecache(void) 667int __init nfs_init_readpagecache(void)
685{ 668{
686 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 669 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
687 sizeof(struct nfs_read_data), 670 sizeof(struct nfs_read_header),
688 0, SLAB_HWCACHE_ALIGN, 671 0, SLAB_HWCACHE_ALIGN,
689 NULL); 672 NULL);
690 if (nfs_rdata_cachep == NULL) 673 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca7e4bf..a973eb101a92 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -66,6 +66,7 @@
66#include "pnfs.h" 66#include "pnfs.h"
67 67
68#define NFSDBG_FACILITY NFSDBG_VFS 68#define NFSDBG_FACILITY NFSDBG_VFS
69#define NFS_TEXT_DATA 1
69 70
70#ifdef CONFIG_NFS_V3 71#ifdef CONFIG_NFS_V3
71#define NFS_DEFAULT_VERSION 3 72#define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
277 { Opt_vers_err, NULL } 278 { Opt_vers_err, NULL }
278}; 279};
279 280
281struct nfs_mount_info {
282 void (*fill_super)(struct super_block *, struct nfs_mount_info *);
283 int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
284 struct nfs_parsed_mount_data *parsed;
285 struct nfs_clone_mount *cloned;
286 struct nfs_fh *mntfh;
287};
288
280static void nfs_umount_begin(struct super_block *); 289static void nfs_umount_begin(struct super_block *);
281static int nfs_statfs(struct dentry *, struct kstatfs *); 290static int nfs_statfs(struct dentry *, struct kstatfs *);
282static int nfs_show_options(struct seq_file *, struct dentry *); 291static int nfs_show_options(struct seq_file *, struct dentry *);
283static int nfs_show_devname(struct seq_file *, struct dentry *); 292static int nfs_show_devname(struct seq_file *, struct dentry *);
284static int nfs_show_path(struct seq_file *, struct dentry *); 293static int nfs_show_path(struct seq_file *, struct dentry *);
285static int nfs_show_stats(struct seq_file *, struct dentry *); 294static int nfs_show_stats(struct seq_file *, struct dentry *);
295static struct dentry *nfs_fs_mount_common(struct file_system_type *,
296 struct nfs_server *, int, const char *, struct nfs_mount_info *);
286static struct dentry *nfs_fs_mount(struct file_system_type *, 297static struct dentry *nfs_fs_mount(struct file_system_type *,
287 int, const char *, void *); 298 int, const char *, void *);
288static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 299static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
323}; 334};
324 335
325#ifdef CONFIG_NFS_V4 336#ifdef CONFIG_NFS_V4
326static int nfs4_validate_text_mount_data(void *options, 337static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
338static int nfs4_validate_mount_data(void *options,
327 struct nfs_parsed_mount_data *args, const char *dev_name); 339 struct nfs_parsed_mount_data *args, const char *dev_name);
328static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 340static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
329 struct nfs_parsed_mount_data *data); 341 struct nfs_mount_info *mount_info);
330static struct dentry *nfs4_mount(struct file_system_type *fs_type,
331 int flags, const char *dev_name, void *raw_data);
332static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 342static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
333 int flags, const char *dev_name, void *raw_data); 343 int flags, const char *dev_name, void *raw_data);
334static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 344static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
342static struct file_system_type nfs4_fs_type = { 352static struct file_system_type nfs4_fs_type = {
343 .owner = THIS_MODULE, 353 .owner = THIS_MODULE,
344 .name = "nfs4", 354 .name = "nfs4",
345 .mount = nfs4_mount, 355 .mount = nfs_fs_mount,
346 .kill_sb = nfs4_kill_super, 356 .kill_sb = nfs4_kill_super,
347 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
348}; 358};
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
938 rpc_killall_tasks(rpc); 948 rpc_killall_tasks(rpc);
939} 949}
940 950
941static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) 951static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
942{ 952{
943 struct nfs_parsed_mount_data *data; 953 struct nfs_parsed_mount_data *data;
944 954
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
953 data->nfs_server.protocol = XPRT_TRANSPORT_TCP; 963 data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
954 data->auth_flavors[0] = RPC_AUTH_UNIX; 964 data->auth_flavors[0] = RPC_AUTH_UNIX;
955 data->auth_flavor_len = 1; 965 data->auth_flavor_len = 1;
956 data->version = version;
957 data->minorversion = 0; 966 data->minorversion = 0;
967 data->need_mount = true;
958 data->net = current->nsproxy->net_ns; 968 data->net = current->nsproxy->net_ns;
959 security_init_mnt_opts(&data->lsm_opts); 969 security_init_mnt_opts(&data->lsm_opts);
960 } 970 }
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1674 * Use the remote server's MOUNT service to request the NFS file handle 1684 * Use the remote server's MOUNT service to request the NFS file handle
1675 * corresponding to the provided path. 1685 * corresponding to the provided path.
1676 */ 1686 */
1677static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1687static int nfs_request_mount(struct nfs_parsed_mount_data *args,
1678 struct nfs_fh *root_fh) 1688 struct nfs_fh *root_fh)
1679{ 1689{
1680 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; 1690 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1681 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); 1691 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1738 return nfs_walk_authlist(args, &request); 1748 return nfs_walk_authlist(args, &request);
1739} 1749}
1740 1750
1751static struct dentry *nfs_try_mount(int flags, const char *dev_name,
1752 struct nfs_mount_info *mount_info)
1753{
1754 int status;
1755 struct nfs_server *server;
1756
1757 if (mount_info->parsed->need_mount) {
1758 status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
1759 if (status)
1760 return ERR_PTR(status);
1761 }
1762
1763 /* Get a volume representation */
1764 server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
1765 if (IS_ERR(server))
1766 return ERR_CAST(server);
1767
1768 return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
1769}
1770
1741/* 1771/*
1742 * Split "dev_name" into "hostname:export_path". 1772 * Split "dev_name" into "hostname:export_path".
1743 * 1773 *
@@ -1826,10 +1856,10 @@ out_path:
1826 * + breaking back: trying proto=udp after proto=tcp, v2 after v3, 1856 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1827 * mountproto=tcp after mountproto=udp, and so on 1857 * mountproto=tcp after mountproto=udp, and so on
1828 */ 1858 */
1829static int nfs_validate_mount_data(void *options, 1859static int nfs23_validate_mount_data(void *options,
1830 struct nfs_parsed_mount_data *args, 1860 struct nfs_parsed_mount_data *args,
1831 struct nfs_fh *mntfh, 1861 struct nfs_fh *mntfh,
1832 const char *dev_name) 1862 const char *dev_name)
1833{ 1863{
1834 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1864 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1835 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; 1865 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
1883 args->acregmax = data->acregmax; 1913 args->acregmax = data->acregmax;
1884 args->acdirmin = data->acdirmin; 1914 args->acdirmin = data->acdirmin;
1885 args->acdirmax = data->acdirmax; 1915 args->acdirmax = data->acdirmax;
1916 args->need_mount = false;
1886 1917
1887 memcpy(sap, &data->addr, sizeof(data->addr)); 1918 memcpy(sap, &data->addr, sizeof(data->addr));
1888 args->nfs_server.addrlen = sizeof(data->addr); 1919 args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
1934 } 1965 }
1935 1966
1936 break; 1967 break;
1937 default: { 1968 default:
1938 int status; 1969 return NFS_TEXT_DATA;
1939
1940 if (nfs_parse_mount_options((char *)options, args) == 0)
1941 return -EINVAL;
1942
1943 if (!nfs_verify_server_address(sap))
1944 goto out_no_address;
1945
1946 if (args->version == 4)
1947#ifdef CONFIG_NFS_V4
1948 return nfs4_validate_text_mount_data(options,
1949 args, dev_name);
1950#else
1951 goto out_v4_not_compiled;
1952#endif
1953
1954 nfs_set_port(sap, &args->nfs_server.port, 0);
1955
1956 nfs_set_mount_transport_protocol(args);
1957
1958 status = nfs_parse_devname(dev_name,
1959 &args->nfs_server.hostname,
1960 PAGE_SIZE,
1961 &args->nfs_server.export_path,
1962 NFS_MAXPATHLEN);
1963 if (!status)
1964 status = nfs_try_mount(args, mntfh);
1965
1966 kfree(args->nfs_server.export_path);
1967 args->nfs_server.export_path = NULL;
1968
1969 if (status)
1970 return status;
1971
1972 break;
1973 }
1974 } 1970 }
1975 1971
1976#ifndef CONFIG_NFS_V3 1972#ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
1999 return -EPROTONOSUPPORT; 1995 return -EPROTONOSUPPORT;
2000#endif /* !CONFIG_NFS_V3 */ 1996#endif /* !CONFIG_NFS_V3 */
2001 1997
2002#ifndef CONFIG_NFS_V4
2003out_v4_not_compiled:
2004 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2005 return -EPROTONOSUPPORT;
2006#endif /* !CONFIG_NFS_V4 */
2007
2008out_nomem: 1998out_nomem:
2009 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1999 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
2010 return -ENOMEM; 2000 return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
2018 return -EINVAL; 2008 return -EINVAL;
2019} 2009}
2020 2010
2011#ifdef CONFIG_NFS_V4
2012static int nfs_validate_mount_data(struct file_system_type *fs_type,
2013 void *options,
2014 struct nfs_parsed_mount_data *args,
2015 struct nfs_fh *mntfh,
2016 const char *dev_name)
2017{
2018 if (fs_type == &nfs_fs_type)
2019 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2020 return nfs4_validate_mount_data(options, args, dev_name);
2021}
2022#else
2023static int nfs_validate_mount_data(struct file_system_type *fs_type,
2024 void *options,
2025 struct nfs_parsed_mount_data *args,
2026 struct nfs_fh *mntfh,
2027 const char *dev_name)
2028{
2029 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2030}
2031#endif
2032
2033static int nfs_validate_text_mount_data(void *options,
2034 struct nfs_parsed_mount_data *args,
2035 const char *dev_name)
2036{
2037 int port = 0;
2038 int max_namelen = PAGE_SIZE;
2039 int max_pathlen = NFS_MAXPATHLEN;
2040 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2041
2042 if (nfs_parse_mount_options((char *)options, args) == 0)
2043 return -EINVAL;
2044
2045 if (!nfs_verify_server_address(sap))
2046 goto out_no_address;
2047
2048 if (args->version == 4) {
2049#ifdef CONFIG_NFS_V4
2050 port = NFS_PORT;
2051 max_namelen = NFS4_MAXNAMLEN;
2052 max_pathlen = NFS4_MAXPATHLEN;
2053 nfs_validate_transport_protocol(args);
2054 nfs4_validate_mount_flags(args);
2055#else
2056 goto out_v4_not_compiled;
2057#endif /* CONFIG_NFS_V4 */
2058 } else
2059 nfs_set_mount_transport_protocol(args);
2060
2061 nfs_set_port(sap, &args->nfs_server.port, port);
2062
2063 if (args->auth_flavor_len > 1)
2064 goto out_bad_auth;
2065
2066 return nfs_parse_devname(dev_name,
2067 &args->nfs_server.hostname,
2068 max_namelen,
2069 &args->nfs_server.export_path,
2070 max_pathlen);
2071
2072#ifndef CONFIG_NFS_V4
2073out_v4_not_compiled:
2074 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2075 return -EPROTONOSUPPORT;
2076#endif /* !CONFIG_NFS_V4 */
2077
2078out_no_address:
2079 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
2080 return -EINVAL;
2081
2082out_bad_auth:
2083 dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
2084 return -EINVAL;
2085}
2086
2021static int 2087static int
2022nfs_compare_remount_data(struct nfs_server *nfss, 2088nfs_compare_remount_data(struct nfs_server *nfss,
2023 struct nfs_parsed_mount_data *data) 2089 struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2129 * Finish setting up an NFS2/3 superblock 2195 * Finish setting up an NFS2/3 superblock
2130 */ 2196 */
2131static void nfs_fill_super(struct super_block *sb, 2197static void nfs_fill_super(struct super_block *sb,
2132 struct nfs_parsed_mount_data *data) 2198 struct nfs_mount_info *mount_info)
2133{ 2199{
2200 struct nfs_parsed_mount_data *data = mount_info->parsed;
2134 struct nfs_server *server = NFS_SB(sb); 2201 struct nfs_server *server = NFS_SB(sb);
2135 2202
2136 sb->s_blocksize_bits = 0; 2203 sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
2154 * Finish setting up a cloned NFS2/3 superblock 2221 * Finish setting up a cloned NFS2/3 superblock
2155 */ 2222 */
2156static void nfs_clone_super(struct super_block *sb, 2223static void nfs_clone_super(struct super_block *sb,
2157 const struct super_block *old_sb) 2224 struct nfs_mount_info *mount_info)
2158{ 2225{
2226 const struct super_block *old_sb = mount_info->cloned->sb;
2159 struct nfs_server *server = NFS_SB(sb); 2227 struct nfs_server *server = NFS_SB(sb);
2160 2228
2161 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2229 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
2278 return nfs_compare_mount_options(sb, server, mntflags); 2346 return nfs_compare_mount_options(sb, server, mntflags);
2279} 2347}
2280 2348
2349#ifdef CONFIG_NFS_FSCACHE
2350static void nfs_get_cache_cookie(struct super_block *sb,
2351 struct nfs_parsed_mount_data *parsed,
2352 struct nfs_clone_mount *cloned)
2353{
2354 char *uniq = NULL;
2355 int ulen = 0;
2356
2357 if (parsed && parsed->fscache_uniq) {
2358 uniq = parsed->fscache_uniq;
2359 ulen = strlen(parsed->fscache_uniq);
2360 } else if (cloned) {
2361 struct nfs_server *mnt_s = NFS_SB(cloned->sb);
2362 if (mnt_s->fscache_key) {
2363 uniq = mnt_s->fscache_key->key.uniquifier;
2364 ulen = mnt_s->fscache_key->key.uniq_len;
2365 };
2366 }
2367
2368 nfs_fscache_get_super_cookie(sb, uniq, ulen);
2369}
2370#else
2371static void nfs_get_cache_cookie(struct super_block *sb,
2372 struct nfs_parsed_mount_data *parsed,
2373 struct nfs_clone_mount *cloned)
2374{
2375}
2376#endif
2377
2281static int nfs_bdi_register(struct nfs_server *server) 2378static int nfs_bdi_register(struct nfs_server *server)
2282{ 2379{
2283 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2380 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2284} 2381}
2285 2382
2286static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, 2383static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
2287 int flags, const char *dev_name, void *raw_data) 2384 struct nfs_mount_info *mount_info)
2385{
2386 return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
2387}
2388
2389static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
2390 struct nfs_mount_info *mount_info)
2391{
2392 /* clone any lsm security options from the parent to the new sb */
2393 security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
2394 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
2395 return -ESTALE;
2396 return 0;
2397}
2398
2399static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
2400 struct nfs_server *server,
2401 int flags, const char *dev_name,
2402 struct nfs_mount_info *mount_info)
2288{ 2403{
2289 struct nfs_server *server = NULL;
2290 struct super_block *s; 2404 struct super_block *s;
2291 struct nfs_parsed_mount_data *data;
2292 struct nfs_fh *mntfh;
2293 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2405 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2294 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2406 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2295 struct nfs_sb_mountdata sb_mntdata = { 2407 struct nfs_sb_mountdata sb_mntdata = {
2296 .mntflags = flags, 2408 .mntflags = flags,
2409 .server = server,
2297 }; 2410 };
2298 int error; 2411 int error;
2299 2412
2300 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2301 mntfh = nfs_alloc_fhandle();
2302 if (data == NULL || mntfh == NULL)
2303 goto out;
2304
2305 /* Validate the mount data */
2306 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2307 if (error < 0) {
2308 mntroot = ERR_PTR(error);
2309 goto out;
2310 }
2311
2312#ifdef CONFIG_NFS_V4
2313 if (data->version == 4) {
2314 mntroot = nfs4_try_mount(flags, dev_name, data);
2315 goto out;
2316 }
2317#endif /* CONFIG_NFS_V4 */
2318
2319 /* Get a volume representation */
2320 server = nfs_create_server(data, mntfh);
2321 if (IS_ERR(server)) {
2322 mntroot = ERR_CAST(server);
2323 goto out;
2324 }
2325 sb_mntdata.server = server;
2326
2327 if (server->flags & NFS_MOUNT_UNSHARED) 2413 if (server->flags & NFS_MOUNT_UNSHARED)
2328 compare_super = NULL; 2414 compare_super = NULL;
2329 2415
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2351 2437
2352 if (!s->s_root) { 2438 if (!s->s_root) {
2353 /* initial superblock/root creation */ 2439 /* initial superblock/root creation */
2354 nfs_fill_super(s, data); 2440 mount_info->fill_super(s, mount_info);
2355 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); 2441 nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
2356 } 2442 }
2357 2443
2358 mntroot = nfs_get_root(s, mntfh, dev_name); 2444 mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
2359 if (IS_ERR(mntroot)) 2445 if (IS_ERR(mntroot))
2360 goto error_splat_super; 2446 goto error_splat_super;
2361 2447
2362 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2448 error = mount_info->set_security(s, mntroot, mount_info);
2363 if (error) 2449 if (error)
2364 goto error_splat_root; 2450 goto error_splat_root;
2365 2451
2366 s->s_flags |= MS_ACTIVE; 2452 s->s_flags |= MS_ACTIVE;
2367 2453
2368out: 2454out:
2369 nfs_free_parsed_mount_data(data);
2370 nfs_free_fhandle(mntfh);
2371 return mntroot; 2455 return mntroot;
2372 2456
2373out_err_nosb: 2457out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
2385 goto out; 2469 goto out;
2386} 2470}
2387 2471
2472static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2473 int flags, const char *dev_name, void *raw_data)
2474{
2475 struct nfs_mount_info mount_info = {
2476 .fill_super = nfs_fill_super,
2477 .set_security = nfs_set_sb_security,
2478 };
2479 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2480 int error;
2481
2482 mount_info.parsed = nfs_alloc_parsed_mount_data();
2483 mount_info.mntfh = nfs_alloc_fhandle();
2484 if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
2485 goto out;
2486
2487 /* Validate the mount data */
2488 error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
2489 if (error == NFS_TEXT_DATA)
2490 error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
2491 if (error < 0) {
2492 mntroot = ERR_PTR(error);
2493 goto out;
2494 }
2495
2496#ifdef CONFIG_NFS_V4
2497 if (mount_info.parsed->version == 4)
2498 mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
2499 else
2500#endif /* CONFIG_NFS_V4 */
2501 mntroot = nfs_try_mount(flags, dev_name, &mount_info);
2502
2503out:
2504 nfs_free_parsed_mount_data(mount_info.parsed);
2505 nfs_free_fhandle(mount_info.mntfh);
2506 return mntroot;
2507}
2508
2388/* 2509/*
2389 * Ensure that we unregister the bdi before kill_anon_super 2510 * Ensure that we unregister the bdi before kill_anon_super
2390 * releases the device name 2511 * releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
2409} 2530}
2410 2531
2411/* 2532/*
2412 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2533 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
2413 */ 2534 */
2414static struct dentry * 2535static struct dentry *
2415nfs_xdev_mount(struct file_system_type *fs_type, int flags, 2536nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
2416 const char *dev_name, void *raw_data) 2537 const char *dev_name, struct nfs_mount_info *mount_info)
2417{ 2538{
2418 struct nfs_clone_mount *data = raw_data; 2539 struct nfs_clone_mount *data = mount_info->cloned;
2419 struct super_block *s;
2420 struct nfs_server *server; 2540 struct nfs_server *server;
2421 struct dentry *mntroot; 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2422 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2423 struct nfs_sb_mountdata sb_mntdata = {
2424 .mntflags = flags,
2425 };
2426 int error; 2542 int error;
2427 2543
2428 dprintk("--> nfs_xdev_mount()\n"); 2544 dprintk("--> nfs_xdev_mount_common()\n");
2545
2546 mount_info->mntfh = data->fh;
2429 2547
2430 /* create a new volume representation */ 2548 /* create a new volume representation */
2431 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2432 if (IS_ERR(server)) { 2550 if (IS_ERR(server)) {
2433 error = PTR_ERR(server); 2551 error = PTR_ERR(server);
2434 goto out_err_noserver; 2552 goto out_err;
2435 }
2436 sb_mntdata.server = server;
2437
2438 if (server->flags & NFS_MOUNT_UNSHARED)
2439 compare_super = NULL;
2440
2441 /* -o noac implies -o sync */
2442 if (server->flags & NFS_MOUNT_NOAC)
2443 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2444
2445 /* Get a superblock - note that we may end up sharing one that already exists */
2446 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2447 if (IS_ERR(s)) {
2448 error = PTR_ERR(s);
2449 goto out_err_nosb;
2450 }
2451
2452 if (s->s_fs_info != server) {
2453 nfs_free_server(server);
2454 server = NULL;
2455 } else {
2456 error = nfs_bdi_register(server);
2457 if (error)
2458 goto error_splat_bdi;
2459 }
2460
2461 if (!s->s_root) {
2462 /* initial superblock/root creation */
2463 nfs_clone_super(s, data->sb);
2464 nfs_fscache_get_super_cookie(s, NULL, data);
2465 }
2466
2467 mntroot = nfs_get_root(s, data->fh, dev_name);
2468 if (IS_ERR(mntroot)) {
2469 error = PTR_ERR(mntroot);
2470 goto error_splat_super;
2471 }
2472 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
2473 dput(mntroot);
2474 error = -ESTALE;
2475 goto error_splat_super;
2476 } 2553 }
2477 2554
2478 s->s_flags |= MS_ACTIVE; 2555 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2479 2556 dprintk("<-- nfs_xdev_mount_common() = 0\n");
2480 /* clone any lsm security options from the parent to the new sb */ 2557out:
2481 security_sb_clone_mnt_opts(data->sb, s);
2482
2483 dprintk("<-- nfs_xdev_mount() = 0\n");
2484 return mntroot; 2558 return mntroot;
2485 2559
2486out_err_nosb: 2560out_err:
2487 nfs_free_server(server); 2561 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
2488out_err_noserver: 2562 goto out;
2489 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); 2563}
2490 return ERR_PTR(error);
2491 2564
2492error_splat_super: 2565/*
2493 if (server && !s->s_root) 2566 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2494 bdi_unregister(&server->backing_dev_info); 2567 */
2495error_splat_bdi: 2568static struct dentry *
2496 deactivate_locked_super(s); 2569nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2497 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); 2570 const char *dev_name, void *raw_data)
2498 return ERR_PTR(error); 2571{
2572 struct nfs_mount_info mount_info = {
2573 .fill_super = nfs_clone_super,
2574 .set_security = nfs_clone_sb_security,
2575 .cloned = raw_data,
2576 };
2577 return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
2499} 2578}
2500 2579
2501#ifdef CONFIG_NFS_V4 2580#ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
2504 * Finish setting up a cloned NFS4 superblock 2583 * Finish setting up a cloned NFS4 superblock
2505 */ 2584 */
2506static void nfs4_clone_super(struct super_block *sb, 2585static void nfs4_clone_super(struct super_block *sb,
2507 const struct super_block *old_sb) 2586 struct nfs_mount_info *mount_info)
2508{ 2587{
2588 const struct super_block *old_sb = mount_info->cloned->sb;
2509 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2589 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
2510 sb->s_blocksize = old_sb->s_blocksize; 2590 sb->s_blocksize = old_sb->s_blocksize;
2511 sb->s_maxbytes = old_sb->s_maxbytes; 2591 sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
2523/* 2603/*
2524 * Set up an NFS4 superblock 2604 * Set up an NFS4 superblock
2525 */ 2605 */
2526static void nfs4_fill_super(struct super_block *sb) 2606static void nfs4_fill_super(struct super_block *sb,
2607 struct nfs_mount_info *mount_info)
2527{ 2608{
2528 sb->s_time_gran = 1; 2609 sb->s_time_gran = 1;
2529 sb->s_op = &nfs4_sops; 2610 sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2542 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); 2623 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2543} 2624}
2544 2625
2545static int nfs4_validate_text_mount_data(void *options,
2546 struct nfs_parsed_mount_data *args,
2547 const char *dev_name)
2548{
2549 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2550
2551 nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
2552
2553 nfs_validate_transport_protocol(args);
2554
2555 nfs4_validate_mount_flags(args);
2556
2557 if (args->version != 4) {
2558 dfprintk(MOUNT,
2559 "NFS4: Illegal mount version\n");
2560 return -EINVAL;
2561 }
2562
2563 if (args->auth_flavor_len > 1) {
2564 dfprintk(MOUNT,
2565 "NFS4: Too many RPC auth flavours specified\n");
2566 return -EINVAL;
2567 }
2568
2569 return nfs_parse_devname(dev_name,
2570 &args->nfs_server.hostname,
2571 NFS4_MAXNAMLEN,
2572 &args->nfs_server.export_path,
2573 NFS4_MAXPATHLEN);
2574}
2575
2576/* 2626/*
2577 * Validate NFSv4 mount options 2627 * Validate NFSv4 mount options
2578 */ 2628 */
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
2643 2693
2644 break; 2694 break;
2645 default: 2695 default:
2646 if (nfs_parse_mount_options((char *)options, args) == 0) 2696 return NFS_TEXT_DATA;
2647 return -EINVAL;
2648
2649 if (!nfs_verify_server_address(sap))
2650 return -EINVAL;
2651
2652 return nfs4_validate_text_mount_data(options, args, dev_name);
2653 } 2697 }
2654 2698
2655 return 0; 2699 return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
2673 */ 2717 */
2674static struct dentry * 2718static struct dentry *
2675nfs4_remote_mount(struct file_system_type *fs_type, int flags, 2719nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2676 const char *dev_name, void *raw_data) 2720 const char *dev_name, void *info)
2677{ 2721{
2678 struct nfs_parsed_mount_data *data = raw_data; 2722 struct nfs_mount_info *mount_info = info;
2679 struct super_block *s;
2680 struct nfs_server *server; 2723 struct nfs_server *server;
2681 struct nfs_fh *mntfh; 2724 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2682 struct dentry *mntroot;
2683 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2684 struct nfs_sb_mountdata sb_mntdata = {
2685 .mntflags = flags,
2686 };
2687 int error = -ENOMEM;
2688 2725
2689 mntfh = nfs_alloc_fhandle(); 2726 mount_info->fill_super = nfs4_fill_super;
2690 if (data == NULL || mntfh == NULL) 2727 mount_info->set_security = nfs_set_sb_security;
2691 goto out;
2692 2728
2693 /* Get a volume representation */ 2729 /* Get a volume representation */
2694 server = nfs4_create_server(data, mntfh); 2730 server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
2695 if (IS_ERR(server)) { 2731 if (IS_ERR(server)) {
2696 error = PTR_ERR(server); 2732 mntroot = ERR_CAST(server);
2697 goto out; 2733 goto out;
2698 } 2734 }
2699 sb_mntdata.server = server;
2700 2735
2701 if (server->flags & NFS4_MOUNT_UNSHARED) 2736 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2702 compare_super = NULL;
2703
2704 /* -o noac implies -o sync */
2705 if (server->flags & NFS_MOUNT_NOAC)
2706 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2707
2708 /* Get a superblock - note that we may end up sharing one that already exists */
2709 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2710 if (IS_ERR(s)) {
2711 error = PTR_ERR(s);
2712 goto out_free;
2713 }
2714
2715 if (s->s_fs_info != server) {
2716 nfs_free_server(server);
2717 server = NULL;
2718 } else {
2719 error = nfs_bdi_register(server);
2720 if (error)
2721 goto error_splat_bdi;
2722 }
2723
2724 if (!s->s_root) {
2725 /* initial superblock/root creation */
2726 nfs4_fill_super(s);
2727 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2728 }
2729
2730 mntroot = nfs4_get_root(s, mntfh, dev_name);
2731 if (IS_ERR(mntroot)) {
2732 error = PTR_ERR(mntroot);
2733 goto error_splat_super;
2734 }
2735
2736 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2737 if (error)
2738 goto error_splat_root;
2739
2740 s->s_flags |= MS_ACTIVE;
2741
2742 nfs_free_fhandle(mntfh);
2743 return mntroot;
2744 2737
2745out: 2738out:
2746 nfs_free_fhandle(mntfh); 2739 return mntroot;
2747 return ERR_PTR(error);
2748
2749out_free:
2750 nfs_free_server(server);
2751 goto out;
2752
2753error_splat_root:
2754 dput(mntroot);
2755error_splat_super:
2756 if (server && !s->s_root)
2757 bdi_unregister(&server->backing_dev_info);
2758error_splat_bdi:
2759 deactivate_locked_super(s);
2760 goto out;
2761} 2740}
2762 2741
2763static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, 2742static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2869} 2848}
2870 2849
2871static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 2850static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2872 struct nfs_parsed_mount_data *data) 2851 struct nfs_mount_info *mount_info)
2873{ 2852{
2874 char *export_path; 2853 char *export_path;
2875 struct vfsmount *root_mnt; 2854 struct vfsmount *root_mnt;
2876 struct dentry *res; 2855 struct dentry *res;
2856 struct nfs_parsed_mount_data *data = mount_info->parsed;
2877 2857
2878 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2858 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2879 2859
2880 export_path = data->nfs_server.export_path; 2860 export_path = data->nfs_server.export_path;
2881 data->nfs_server.export_path = "/"; 2861 data->nfs_server.export_path = "/";
2882 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, 2862 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
2883 data->nfs_server.hostname); 2863 data->nfs_server.hostname);
2884 data->nfs_server.export_path = export_path; 2864 data->nfs_server.export_path = export_path;
2885 2865
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2891 return res; 2871 return res;
2892} 2872}
2893 2873
2894/*
2895 * Get the superblock for an NFS4 mountpoint
2896 */
2897static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2898 int flags, const char *dev_name, void *raw_data)
2899{
2900 struct nfs_parsed_mount_data *data;
2901 int error = -ENOMEM;
2902 struct dentry *res = ERR_PTR(-ENOMEM);
2903
2904 data = nfs_alloc_parsed_mount_data(4);
2905 if (data == NULL)
2906 goto out;
2907
2908 /* Validate the mount data */
2909 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2910 if (error < 0) {
2911 res = ERR_PTR(error);
2912 goto out;
2913 }
2914
2915 res = nfs4_try_mount(flags, dev_name, data);
2916 if (IS_ERR(res))
2917 error = PTR_ERR(res);
2918
2919out:
2920 nfs_free_parsed_mount_data(data);
2921 dprintk("<-- nfs4_mount() = %d%s\n", error,
2922 error != 0 ? " [error]" : "");
2923 return res;
2924}
2925
2926static void nfs4_kill_super(struct super_block *sb) 2874static void nfs4_kill_super(struct super_block *sb)
2927{ 2875{
2928 struct nfs_server *server = NFS_SB(sb); 2876 struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
2942nfs4_xdev_mount(struct file_system_type *fs_type, int flags, 2890nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2943 const char *dev_name, void *raw_data) 2891 const char *dev_name, void *raw_data)
2944{ 2892{
2945 struct nfs_clone_mount *data = raw_data; 2893 struct nfs_mount_info mount_info = {
2946 struct super_block *s; 2894 .fill_super = nfs4_clone_super,
2947 struct nfs_server *server; 2895 .set_security = nfs_clone_sb_security,
2948 struct dentry *mntroot; 2896 .cloned = raw_data,
2949 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2950 struct nfs_sb_mountdata sb_mntdata = {
2951 .mntflags = flags,
2952 }; 2897 };
2953 int error; 2898 return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
2954
2955 dprintk("--> nfs4_xdev_mount()\n");
2956
2957 /* create a new volume representation */
2958 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2959 if (IS_ERR(server)) {
2960 error = PTR_ERR(server);
2961 goto out_err_noserver;
2962 }
2963 sb_mntdata.server = server;
2964
2965 if (server->flags & NFS4_MOUNT_UNSHARED)
2966 compare_super = NULL;
2967
2968 /* -o noac implies -o sync */
2969 if (server->flags & NFS_MOUNT_NOAC)
2970 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2971
2972 /* Get a superblock - note that we may end up sharing one that already exists */
2973 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2974 if (IS_ERR(s)) {
2975 error = PTR_ERR(s);
2976 goto out_err_nosb;
2977 }
2978
2979 if (s->s_fs_info != server) {
2980 nfs_free_server(server);
2981 server = NULL;
2982 } else {
2983 error = nfs_bdi_register(server);
2984 if (error)
2985 goto error_splat_bdi;
2986 }
2987
2988 if (!s->s_root) {
2989 /* initial superblock/root creation */
2990 nfs4_clone_super(s, data->sb);
2991 nfs_fscache_get_super_cookie(s, NULL, data);
2992 }
2993
2994 mntroot = nfs4_get_root(s, data->fh, dev_name);
2995 if (IS_ERR(mntroot)) {
2996 error = PTR_ERR(mntroot);
2997 goto error_splat_super;
2998 }
2999 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3000 dput(mntroot);
3001 error = -ESTALE;
3002 goto error_splat_super;
3003 }
3004
3005 s->s_flags |= MS_ACTIVE;
3006
3007 security_sb_clone_mnt_opts(data->sb, s);
3008
3009 dprintk("<-- nfs4_xdev_mount() = 0\n");
3010 return mntroot;
3011
3012out_err_nosb:
3013 nfs_free_server(server);
3014out_err_noserver:
3015 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3016 return ERR_PTR(error);
3017
3018error_splat_super:
3019 if (server && !s->s_root)
3020 bdi_unregister(&server->backing_dev_info);
3021error_splat_bdi:
3022 deactivate_locked_super(s);
3023 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3024 return ERR_PTR(error);
3025} 2899}
3026 2900
3027static struct dentry * 2901static struct dentry *
3028nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, 2902nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3029 const char *dev_name, void *raw_data) 2903 const char *dev_name, void *raw_data)
3030{ 2904{
3031 struct nfs_clone_mount *data = raw_data; 2905 struct nfs_mount_info mount_info = {
3032 struct super_block *s; 2906 .fill_super = nfs4_fill_super,
3033 struct nfs_server *server; 2907 .set_security = nfs_clone_sb_security,
3034 struct dentry *mntroot; 2908 .cloned = raw_data,
3035 struct nfs_fh *mntfh;
3036 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
3037 struct nfs_sb_mountdata sb_mntdata = {
3038 .mntflags = flags,
3039 }; 2909 };
3040 int error = -ENOMEM; 2910 struct nfs_server *server;
2911 struct dentry *mntroot = ERR_PTR(-ENOMEM);
3041 2912
3042 dprintk("--> nfs4_referral_get_sb()\n"); 2913 dprintk("--> nfs4_referral_get_sb()\n");
3043 2914
3044 mntfh = nfs_alloc_fhandle(); 2915 mount_info.mntfh = nfs_alloc_fhandle();
3045 if (mntfh == NULL) 2916 if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
3046 goto out_err_nofh; 2917 goto out;
3047 2918
3048 /* create a new volume representation */ 2919 /* create a new volume representation */
3049 server = nfs4_create_referral_server(data, mntfh); 2920 server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
3050 if (IS_ERR(server)) { 2921 if (IS_ERR(server)) {
3051 error = PTR_ERR(server); 2922 mntroot = ERR_CAST(server);
3052 goto out_err_noserver; 2923 goto out;
3053 }
3054 sb_mntdata.server = server;
3055
3056 if (server->flags & NFS4_MOUNT_UNSHARED)
3057 compare_super = NULL;
3058
3059 /* -o noac implies -o sync */
3060 if (server->flags & NFS_MOUNT_NOAC)
3061 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
3062
3063 /* Get a superblock - note that we may end up sharing one that already exists */
3064 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
3065 if (IS_ERR(s)) {
3066 error = PTR_ERR(s);
3067 goto out_err_nosb;
3068 }
3069
3070 if (s->s_fs_info != server) {
3071 nfs_free_server(server);
3072 server = NULL;
3073 } else {
3074 error = nfs_bdi_register(server);
3075 if (error)
3076 goto error_splat_bdi;
3077 }
3078
3079 if (!s->s_root) {
3080 /* initial superblock/root creation */
3081 nfs4_fill_super(s);
3082 nfs_fscache_get_super_cookie(s, NULL, data);
3083 }
3084
3085 mntroot = nfs4_get_root(s, mntfh, dev_name);
3086 if (IS_ERR(mntroot)) {
3087 error = PTR_ERR(mntroot);
3088 goto error_splat_super;
3089 }
3090 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3091 dput(mntroot);
3092 error = -ESTALE;
3093 goto error_splat_super;
3094 } 2924 }
3095 2925
3096 s->s_flags |= MS_ACTIVE; 2926 mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
3097 2927out:
3098 security_sb_clone_mnt_opts(data->sb, s); 2928 nfs_free_fhandle(mount_info.mntfh);
3099
3100 nfs_free_fhandle(mntfh);
3101 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3102 return mntroot; 2929 return mntroot;
3103
3104out_err_nosb:
3105 nfs_free_server(server);
3106out_err_noserver:
3107 nfs_free_fhandle(mntfh);
3108out_err_nofh:
3109 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3110 return ERR_PTR(error);
3111
3112error_splat_super:
3113 if (server && !s->s_root)
3114 bdi_unregister(&server->backing_dev_info);
3115error_splat_bdi:
3116 deactivate_locked_super(s);
3117 nfs_free_fhandle(mntfh);
3118 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3119 return ERR_PTR(error);
3120} 2930}
3121 2931
3122/* 2932/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c07462320f6b..e6fe3d69d14c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,20 +39,20 @@
39/* 39/*
40 * Local function declarations 40 * Local function declarations
41 */ 41 */
42static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
43 struct inode *inode, int ioflags);
44static void nfs_redirty_request(struct nfs_page *req); 42static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_write_full_ops;
47static const struct rpc_call_ops nfs_commit_ops; 44static const struct rpc_call_ops nfs_commit_ops;
45static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
46static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48 47
49static struct kmem_cache *nfs_wdata_cachep; 48static struct kmem_cache *nfs_wdata_cachep;
50static mempool_t *nfs_wdata_mempool; 49static mempool_t *nfs_wdata_mempool;
50static struct kmem_cache *nfs_cdata_cachep;
51static mempool_t *nfs_commit_mempool; 51static mempool_t *nfs_commit_mempool;
52 52
53struct nfs_write_data *nfs_commitdata_alloc(void) 53struct nfs_commit_data *nfs_commitdata_alloc(void)
54{ 54{
55 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 55 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
56 56
57 if (p) { 57 if (p) {
58 memset(p, 0, sizeof(*p)); 58 memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
62} 62}
63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
64 64
65void nfs_commit_free(struct nfs_write_data *p) 65void nfs_commit_free(struct nfs_commit_data *p)
66{ 66{
67 if (p && (p->pagevec != &p->page_array[0]))
68 kfree(p->pagevec);
69 mempool_free(p, nfs_commit_mempool); 67 mempool_free(p, nfs_commit_mempool);
70} 68}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 69EXPORT_SYMBOL_GPL(nfs_commit_free);
72 70
73struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 71struct nfs_write_header *nfs_writehdr_alloc(void)
74{ 72{
75 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 73 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
76 74
77 if (p) { 75 if (p) {
76 struct nfs_pgio_header *hdr = &p->header;
77
78 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
79 INIT_LIST_HEAD(&p->pages); 79 INIT_LIST_HEAD(&hdr->pages);
80 p->npages = pagecount; 80 INIT_LIST_HEAD(&hdr->rpc_list);
81 if (pagecount <= ARRAY_SIZE(p->page_array)) 81 spin_lock_init(&hdr->lock);
82 p->pagevec = p->page_array; 82 atomic_set(&hdr->refcnt, 0);
83 else {
84 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
85 if (!p->pagevec) {
86 mempool_free(p, nfs_wdata_mempool);
87 p = NULL;
88 }
89 }
90 } 83 }
91 return p; 84 return p;
92} 85}
93 86
94void nfs_writedata_free(struct nfs_write_data *p) 87static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
88 unsigned int pagecount)
89{
90 struct nfs_write_data *data, *prealloc;
91
92 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
93 if (prealloc->header == NULL)
94 data = prealloc;
95 else
96 data = kzalloc(sizeof(*data), GFP_KERNEL);
97 if (!data)
98 goto out;
99
100 if (nfs_pgarray_set(&data->pages, pagecount)) {
101 data->header = hdr;
102 atomic_inc(&hdr->refcnt);
103 } else {
104 if (data != prealloc)
105 kfree(data);
106 data = NULL;
107 }
108out:
109 return data;
110}
111
112void nfs_writehdr_free(struct nfs_pgio_header *hdr)
95{ 113{
96 if (p && (p->pagevec != &p->page_array[0])) 114 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
97 kfree(p->pagevec); 115 mempool_free(whdr, nfs_wdata_mempool);
98 mempool_free(p, nfs_wdata_mempool);
99} 116}
100 117
101void nfs_writedata_release(struct nfs_write_data *wdata) 118void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 119{
120 struct nfs_pgio_header *hdr = wdata->header;
121 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
122
103 put_nfs_open_context(wdata->args.context); 123 put_nfs_open_context(wdata->args.context);
104 nfs_writedata_free(wdata); 124 if (wdata->pages.pagevec != wdata->pages.page_array)
125 kfree(wdata->pages.pagevec);
126 if (wdata != &write_header->rpc_data)
127 kfree(wdata);
128 else
129 wdata->header = NULL;
130 if (atomic_dec_and_test(&hdr->refcnt))
131 hdr->completion_ops->completion(hdr);
105} 132}
106 133
107static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 134static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
203 struct inode *inode = page->mapping->host; 230 struct inode *inode = page->mapping->host;
204 struct nfs_server *nfss = NFS_SERVER(inode); 231 struct nfs_server *nfss = NFS_SERVER(inode);
205 232
206 page_cache_get(page);
207 if (atomic_long_inc_return(&nfss->writeback) > 233 if (atomic_long_inc_return(&nfss->writeback) >
208 NFS_CONGESTION_ON_THRESH) { 234 NFS_CONGESTION_ON_THRESH) {
209 set_bdi_congested(&nfss->backing_dev_info, 235 set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
219 struct nfs_server *nfss = NFS_SERVER(inode); 245 struct nfs_server *nfss = NFS_SERVER(inode);
220 246
221 end_page_writeback(page); 247 end_page_writeback(page);
222 page_cache_release(page);
223 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 248 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
224 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 249 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
225} 250}
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
235 req = nfs_page_find_request_locked(page); 260 req = nfs_page_find_request_locked(page);
236 if (req == NULL) 261 if (req == NULL)
237 break; 262 break;
238 if (nfs_lock_request_dontget(req)) 263 if (nfs_lock_request(req))
239 break; 264 break;
240 /* Note: If we hold the page lock, as is the case in nfs_writepage, 265 /* Note: If we hold the page lock, as is the case in nfs_writepage,
241 * then the call to nfs_lock_request_dontget() will always 266 * then the call to nfs_lock_request() will always
242 * succeed provided that someone hasn't already marked the 267 * succeed provided that someone hasn't already marked the
243 * request as dirty (in which case we don't care). 268 * request as dirty (in which case we don't care).
244 */ 269 */
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
310 struct nfs_pageio_descriptor pgio; 335 struct nfs_pageio_descriptor pgio;
311 int err; 336 int err;
312 337
313 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 338 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
339 &nfs_async_write_completion_ops);
314 err = nfs_do_writepage(page, wbc, &pgio); 340 err = nfs_do_writepage(page, wbc, &pgio);
315 nfs_pageio_complete(&pgio); 341 nfs_pageio_complete(&pgio);
316 if (err < 0) 342 if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353 379
354 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 380 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
355 381
356 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 382 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
383 &nfs_async_write_completion_ops);
357 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 384 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
358 nfs_pageio_complete(&pgio); 385 nfs_pageio_complete(&pgio);
359 386
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379 struct nfs_inode *nfsi = NFS_I(inode); 406 struct nfs_inode *nfsi = NFS_I(inode);
380 407
381 /* Lock the request! */ 408 /* Lock the request! */
382 nfs_lock_request_dontget(req); 409 nfs_lock_request(req);
383 410
384 spin_lock(&inode->i_lock); 411 spin_lock(&inode->i_lock);
385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 412 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
421/** 448/**
422 * nfs_request_add_commit_list - add request to a commit list 449 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page 450 * @req: pointer to a struct nfs_page
424 * @head: commit list head 451 * @dst: commit list head
452 * @cinfo: holds list lock and accounting info
425 * 453 *
426 * This sets the PG_CLEAN bit, updates the inode global count of 454 * This sets the PG_CLEAN bit, updates the cinfo count of
427 * number of outstanding requests requiring a commit as well as 455 * number of outstanding requests requiring a commit as well as
428 * the MM page stats. 456 * the MM page stats.
429 * 457 *
430 * The caller must _not_ hold the inode->i_lock, but must be 458 * The caller must _not_ hold the cinfo->lock, but must be
431 * holding the nfs_page lock. 459 * holding the nfs_page lock.
432 */ 460 */
433void 461void
434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) 462nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
463 struct nfs_commit_info *cinfo)
435{ 464{
436 struct inode *inode = req->wb_context->dentry->d_inode;
437
438 set_bit(PG_CLEAN, &(req)->wb_flags); 465 set_bit(PG_CLEAN, &(req)->wb_flags);
439 spin_lock(&inode->i_lock); 466 spin_lock(cinfo->lock);
440 nfs_list_add_request(req, head); 467 nfs_list_add_request(req, dst);
441 NFS_I(inode)->ncommit++; 468 cinfo->mds->ncommit++;
442 spin_unlock(&inode->i_lock); 469 spin_unlock(cinfo->lock);
443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 470 if (!cinfo->dreq) {
444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 471 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 472 inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
473 BDI_RECLAIMABLE);
474 __mark_inode_dirty(req->wb_context->dentry->d_inode,
475 I_DIRTY_DATASYNC);
476 }
446} 477}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 478EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
448 479
449/** 480/**
450 * nfs_request_remove_commit_list - Remove request from a commit list 481 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page 482 * @req: pointer to a nfs_page
483 * @cinfo: holds list lock and accounting info
452 * 484 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of 485 * This clears the PG_CLEAN bit, and updates the cinfo's count of
454 * number of outstanding requests requiring a commit 486 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats. 487 * It does not update the MM page stats.
456 * 488 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock. 489 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
458 */ 490 */
459void 491void
460nfs_request_remove_commit_list(struct nfs_page *req) 492nfs_request_remove_commit_list(struct nfs_page *req,
493 struct nfs_commit_info *cinfo)
461{ 494{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 495 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return; 496 return;
466 nfs_list_remove_request(req); 497 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--; 498 cinfo->mds->ncommit--;
468} 499}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 500EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470 501
502static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
503 struct inode *inode)
504{
505 cinfo->lock = &inode->i_lock;
506 cinfo->mds = &NFS_I(inode)->commit_info;
507 cinfo->ds = pnfs_get_ds_info(inode);
508 cinfo->dreq = NULL;
509 cinfo->completion_ops = &nfs_commit_completion_ops;
510}
511
512void nfs_init_cinfo(struct nfs_commit_info *cinfo,
513 struct inode *inode,
514 struct nfs_direct_req *dreq)
515{
516 if (dreq)
517 nfs_init_cinfo_from_dreq(cinfo, dreq);
518 else
519 nfs_init_cinfo_from_inode(cinfo, inode);
520}
521EXPORT_SYMBOL_GPL(nfs_init_cinfo);
471 522
472/* 523/*
473 * Add a request to the inode's commit list. 524 * Add a request to the inode's commit list.
474 */ 525 */
475static void 526void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 527nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
528 struct nfs_commit_info *cinfo)
477{ 529{
478 struct inode *inode = req->wb_context->dentry->d_inode; 530 if (pnfs_mark_request_commit(req, lseg, cinfo))
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return; 531 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); 532 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
483} 533}
484 534
485static void 535static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
494{ 544{
495 if (test_bit(PG_CLEAN, &req->wb_flags)) { 545 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode; 546 struct inode *inode = req->wb_context->dentry->d_inode;
547 struct nfs_commit_info cinfo;
497 548
498 if (!pnfs_clear_request_commit(req)) { 549 nfs_init_cinfo_from_inode(&cinfo, inode);
499 spin_lock(&inode->i_lock); 550 if (!pnfs_clear_request_commit(req, &cinfo)) {
500 nfs_request_remove_commit_list(req); 551 spin_lock(cinfo.lock);
501 spin_unlock(&inode->i_lock); 552 nfs_request_remove_commit_list(req, &cinfo);
553 spin_unlock(cinfo.lock);
502 } 554 }
503 nfs_clear_page_commit(req->wb_page); 555 nfs_clear_page_commit(req->wb_page);
504 } 556 }
@@ -508,28 +560,25 @@ static inline
508int nfs_write_need_commit(struct nfs_write_data *data) 560int nfs_write_need_commit(struct nfs_write_data *data)
509{ 561{
510 if (data->verf.committed == NFS_DATA_SYNC) 562 if (data->verf.committed == NFS_DATA_SYNC)
511 return data->lseg == NULL; 563 return data->header->lseg == NULL;
512 else 564 return data->verf.committed != NFS_FILE_SYNC;
513 return data->verf.committed != NFS_FILE_SYNC;
514} 565}
515 566
516static inline 567#else
517int nfs_reschedule_unstable_write(struct nfs_page *req, 568static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
518 struct nfs_write_data *data) 569 struct inode *inode)
519{ 570{
520 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
521 nfs_mark_request_commit(req, data->lseg);
522 return 1;
523 }
524 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
525 nfs_mark_request_dirty(req);
526 return 1;
527 }
528 return 0;
529} 571}
530#else 572
531static void 573void nfs_init_cinfo(struct nfs_commit_info *cinfo,
532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 574 struct inode *inode,
575 struct nfs_direct_req *dreq)
576{
577}
578
579void
580nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
581 struct nfs_commit_info *cinfo)
533{ 582{
534} 583}
535 584
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
544 return 0; 593 return 0;
545} 594}
546 595
547static inline 596#endif
548int nfs_reschedule_unstable_write(struct nfs_page *req, 597
549 struct nfs_write_data *data) 598static void nfs_write_completion(struct nfs_pgio_header *hdr)
550{ 599{
551 return 0; 600 struct nfs_commit_info cinfo;
601 unsigned long bytes = 0;
602
603 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
604 goto out;
605 nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
606 while (!list_empty(&hdr->pages)) {
607 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
608
609 bytes += req->wb_bytes;
610 nfs_list_remove_request(req);
611 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
612 (hdr->good_bytes < bytes)) {
613 nfs_set_pageerror(req->wb_page);
614 nfs_context_set_write_error(req->wb_context, hdr->error);
615 goto remove_req;
616 }
617 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
618 nfs_mark_request_dirty(req);
619 goto next;
620 }
621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
622 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
623 goto next;
624 }
625remove_req:
626 nfs_inode_remove_request(req);
627next:
628 nfs_unlock_request(req);
629 nfs_end_page_writeback(req->wb_page);
630 nfs_release_request(req);
631 }
632out:
633 hdr->release(hdr);
552} 634}
553#endif
554 635
555#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 636#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
556static int 637static unsigned long
557nfs_need_commit(struct nfs_inode *nfsi) 638nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
558{ 639{
559 return nfsi->ncommit > 0; 640 return cinfo->mds->ncommit;
560} 641}
561 642
562/* i_lock held by caller */ 643/* cinfo->lock held by caller */
563static int 644int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, 645nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
565 spinlock_t *lock) 646 struct nfs_commit_info *cinfo, int max)
566{ 647{
567 struct nfs_page *req, *tmp; 648 struct nfs_page *req, *tmp;
568 int ret = 0; 649 int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
570 list_for_each_entry_safe(req, tmp, src, wb_list) { 651 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req)) 652 if (!nfs_lock_request(req))
572 continue; 653 continue;
573 if (cond_resched_lock(lock)) 654 kref_get(&req->wb_kref);
655 if (cond_resched_lock(cinfo->lock))
574 list_safe_reset_next(req, tmp, wb_list); 656 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req); 657 nfs_request_remove_commit_list(req, cinfo);
576 nfs_list_add_request(req, dst); 658 nfs_list_add_request(req, dst);
577 ret++; 659 ret++;
578 if (ret == max) 660 if ((ret == max) && !cinfo->dreq)
579 break; 661 break;
580 } 662 }
581 return ret; 663 return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
584/* 666/*
585 * nfs_scan_commit - Scan an inode for commit requests 667 * nfs_scan_commit - Scan an inode for commit requests
586 * @inode: NFS inode to scan 668 * @inode: NFS inode to scan
587 * @dst: destination list 669 * @dst: mds destination list
670 * @cinfo: mds and ds lists of reqs ready to commit
588 * 671 *
589 * Moves requests from the inode's 'commit' request list. 672 * Moves requests from the inode's 'commit' request list.
590 * The requests are *not* checked to ensure that they form a contiguous set. 673 * The requests are *not* checked to ensure that they form a contiguous set.
591 */ 674 */
592static int 675int
593nfs_scan_commit(struct inode *inode, struct list_head *dst) 676nfs_scan_commit(struct inode *inode, struct list_head *dst,
677 struct nfs_commit_info *cinfo)
594{ 678{
595 struct nfs_inode *nfsi = NFS_I(inode);
596 int ret = 0; 679 int ret = 0;
597 680
598 spin_lock(&inode->i_lock); 681 spin_lock(cinfo->lock);
599 if (nfsi->ncommit > 0) { 682 if (cinfo->mds->ncommit > 0) {
600 const int max = INT_MAX; 683 const int max = INT_MAX;
601 684
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, 685 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
603 &inode->i_lock); 686 cinfo, max);
604 ret += pnfs_scan_commit_lists(inode, max - ret, 687 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
605 &inode->i_lock);
606 } 688 }
607 spin_unlock(&inode->i_lock); 689 spin_unlock(cinfo->lock);
608 return ret; 690 return ret;
609} 691}
610 692
611#else 693#else
612static inline int nfs_need_commit(struct nfs_inode *nfsi) 694static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
613{ 695{
614 return 0; 696 return 0;
615} 697}
616 698
617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) 699int nfs_scan_commit(struct inode *inode, struct list_head *dst,
700 struct nfs_commit_info *cinfo)
618{ 701{
619 return 0; 702 return 0;
620} 703}
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
659 || end < req->wb_offset) 742 || end < req->wb_offset)
660 goto out_flushme; 743 goto out_flushme;
661 744
662 if (nfs_lock_request_dontget(req)) 745 if (nfs_lock_request(req))
663 break; 746 break;
664 747
665 /* The request is locked, so wait and then retry */ 748 /* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
729 nfs_grow_file(page, offset, count); 812 nfs_grow_file(page, offset, count);
730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 813 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
731 nfs_mark_request_dirty(req); 814 nfs_mark_request_dirty(req);
732 nfs_unlock_request(req); 815 nfs_unlock_and_release_request(req);
733 return 0; 816 return 0;
734} 817}
735 818
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
766 * the PageUptodate() flag. In this case, we will need to turn off 849 * the PageUptodate() flag. In this case, we will need to turn off
767 * write optimisations that depend on the page contents being correct. 850 * write optimisations that depend on the page contents being correct.
768 */ 851 */
769static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 852static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
770{ 853{
771 return PageUptodate(page) && 854 if (nfs_have_delegated_attributes(inode))
772 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 855 goto out;
856 if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
857 return false;
858out:
859 return PageUptodate(page) != 0;
773} 860}
774 861
775/* 862/*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
815 return status; 902 return status;
816} 903}
817 904
818static void nfs_writepage_release(struct nfs_page *req,
819 struct nfs_write_data *data)
820{
821 struct page *page = req->wb_page;
822
823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
824 nfs_inode_remove_request(req);
825 nfs_unlock_request(req);
826 nfs_end_page_writeback(page);
827}
828
829static int flush_task_priority(int how) 905static int flush_task_priority(int how)
830{ 906{
831 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 907 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
837 return RPC_PRIORITY_NORMAL; 913 return RPC_PRIORITY_NORMAL;
838} 914}
839 915
840int nfs_initiate_write(struct nfs_write_data *data, 916int nfs_initiate_write(struct rpc_clnt *clnt,
841 struct rpc_clnt *clnt, 917 struct nfs_write_data *data,
842 const struct rpc_call_ops *call_ops, 918 const struct rpc_call_ops *call_ops,
843 int how) 919 int how, int flags)
844{ 920{
845 struct inode *inode = data->inode; 921 struct inode *inode = data->header->inode;
846 int priority = flush_task_priority(how); 922 int priority = flush_task_priority(how);
847 struct rpc_task *task; 923 struct rpc_task *task;
848 struct rpc_message msg = { 924 struct rpc_message msg = {
849 .rpc_argp = &data->args, 925 .rpc_argp = &data->args,
850 .rpc_resp = &data->res, 926 .rpc_resp = &data->res,
851 .rpc_cred = data->cred, 927 .rpc_cred = data->header->cred,
852 }; 928 };
853 struct rpc_task_setup task_setup_data = { 929 struct rpc_task_setup task_setup_data = {
854 .rpc_client = clnt, 930 .rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
857 .callback_ops = call_ops, 933 .callback_ops = call_ops,
858 .callback_data = data, 934 .callback_data = data,
859 .workqueue = nfsiod_workqueue, 935 .workqueue = nfsiod_workqueue,
860 .flags = RPC_TASK_ASYNC, 936 .flags = RPC_TASK_ASYNC | flags,
861 .priority = priority, 937 .priority = priority,
862 }; 938 };
863 int ret = 0; 939 int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
892/* 968/*
893 * Set up the argument/result storage required for the RPC call. 969 * Set up the argument/result storage required for the RPC call.
894 */ 970 */
895static void nfs_write_rpcsetup(struct nfs_page *req, 971static void nfs_write_rpcsetup(struct nfs_write_data *data,
896 struct nfs_write_data *data,
897 unsigned int count, unsigned int offset, 972 unsigned int count, unsigned int offset,
898 int how) 973 int how, struct nfs_commit_info *cinfo)
899{ 974{
900 struct inode *inode = req->wb_context->dentry->d_inode; 975 struct nfs_page *req = data->header->req;
901 976
902 /* Set up the RPC argument and reply structs 977 /* Set up the RPC argument and reply structs
903 * NB: take care not to mess about with data->commit et al. */ 978 * NB: take care not to mess about with data->commit et al. */
904 979
905 data->req = req; 980 data->args.fh = NFS_FH(data->header->inode);
906 data->inode = inode = req->wb_context->dentry->d_inode;
907 data->cred = req->wb_context->cred;
908
909 data->args.fh = NFS_FH(inode);
910 data->args.offset = req_offset(req) + offset; 981 data->args.offset = req_offset(req) + offset;
911 /* pnfs_set_layoutcommit needs this */ 982 /* pnfs_set_layoutcommit needs this */
912 data->mds_offset = data->args.offset; 983 data->mds_offset = data->args.offset;
913 data->args.pgbase = req->wb_pgbase + offset; 984 data->args.pgbase = req->wb_pgbase + offset;
914 data->args.pages = data->pagevec; 985 data->args.pages = data->pages.pagevec;
915 data->args.count = count; 986 data->args.count = count;
916 data->args.context = get_nfs_open_context(req->wb_context); 987 data->args.context = get_nfs_open_context(req->wb_context);
917 data->args.lock_context = req->wb_lock_context; 988 data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
920 case 0: 991 case 0:
921 break; 992 break;
922 case FLUSH_COND_STABLE: 993 case FLUSH_COND_STABLE:
923 if (nfs_need_commit(NFS_I(inode))) 994 if (nfs_reqs_to_commit(cinfo))
924 break; 995 break;
925 default: 996 default:
926 data->args.stable = NFS_FILE_SYNC; 997 data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
936 const struct rpc_call_ops *call_ops, 1007 const struct rpc_call_ops *call_ops,
937 int how) 1008 int how)
938{ 1009{
939 struct inode *inode = data->args.context->dentry->d_inode; 1010 struct inode *inode = data->header->inode;
940 1011
941 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 1012 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
942} 1013}
943 1014
944static int nfs_do_multiple_writes(struct list_head *head, 1015static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
951 while (!list_empty(head)) { 1022 while (!list_empty(head)) {
952 int ret2; 1023 int ret2;
953 1024
954 data = list_entry(head->next, struct nfs_write_data, list); 1025 data = list_first_entry(head, struct nfs_write_data, list);
955 list_del_init(&data->list); 1026 list_del_init(&data->list);
956 1027
957 ret2 = nfs_do_write(data, call_ops, how); 1028 ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
967 */ 1038 */
968static void nfs_redirty_request(struct nfs_page *req) 1039static void nfs_redirty_request(struct nfs_page *req)
969{ 1040{
970 struct page *page = req->wb_page;
971
972 nfs_mark_request_dirty(req); 1041 nfs_mark_request_dirty(req);
973 nfs_unlock_request(req); 1042 nfs_unlock_request(req);
974 nfs_end_page_writeback(page); 1043 nfs_end_page_writeback(req->wb_page);
1044 nfs_release_request(req);
1045}
1046
1047static void nfs_async_write_error(struct list_head *head)
1048{
1049 struct nfs_page *req;
1050
1051 while (!list_empty(head)) {
1052 req = nfs_list_entry(head->next);
1053 nfs_list_remove_request(req);
1054 nfs_redirty_request(req);
1055 }
1056}
1057
1058static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1059 .error_cleanup = nfs_async_write_error,
1060 .completion = nfs_write_completion,
1061};
1062
1063static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1064 struct nfs_pgio_header *hdr)
1065{
1066 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1067 while (!list_empty(&hdr->rpc_list)) {
1068 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1069 struct nfs_write_data, list);
1070 list_del(&data->list);
1071 nfs_writedata_release(data);
1072 }
1073 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
975} 1074}
976 1075
977/* 1076/*
978 * Generate multiple small requests to write out a single 1077 * Generate multiple small requests to write out a single
979 * contiguous dirty area on one page. 1078 * contiguous dirty area on one page.
980 */ 1079 */
981static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 1080static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1081 struct nfs_pgio_header *hdr)
982{ 1082{
983 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 1083 struct nfs_page *req = hdr->req;
984 struct page *page = req->wb_page; 1084 struct page *page = req->wb_page;
985 struct nfs_write_data *data; 1085 struct nfs_write_data *data;
986 size_t wsize = desc->pg_bsize, nbytes; 1086 size_t wsize = desc->pg_bsize, nbytes;
987 unsigned int offset; 1087 unsigned int offset;
988 int requests = 0; 1088 int requests = 0;
989 int ret = 0; 1089 struct nfs_commit_info cinfo;
990 1090
991 nfs_list_remove_request(req); 1091 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
992 1092
993 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1093 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
994 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || 1094 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
995 desc->pg_count > wsize)) 1095 desc->pg_count > wsize))
996 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1096 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
997 1097
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
1001 do { 1101 do {
1002 size_t len = min(nbytes, wsize); 1102 size_t len = min(nbytes, wsize);
1003 1103
1004 data = nfs_writedata_alloc(1); 1104 data = nfs_writedata_alloc(hdr, 1);
1005 if (!data) 1105 if (!data) {
1006 goto out_bad; 1106 nfs_flush_error(desc, hdr);
1007 data->pagevec[0] = page; 1107 return -ENOMEM;
1008 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); 1108 }
1009 list_add(&data->list, res); 1109 data->pages.pagevec[0] = page;
1110 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1111 list_add(&data->list, &hdr->rpc_list);
1010 requests++; 1112 requests++;
1011 nbytes -= len; 1113 nbytes -= len;
1012 offset += len; 1114 offset += len;
1013 } while (nbytes != 0); 1115 } while (nbytes != 0);
1014 atomic_set(&req->wb_complete, requests); 1116 nfs_list_remove_request(req);
1015 desc->pg_rpc_callops = &nfs_write_partial_ops; 1117 nfs_list_add_request(req, &hdr->pages);
1016 return ret; 1118 desc->pg_rpc_callops = &nfs_write_common_ops;
1017 1119 return 0;
1018out_bad:
1019 while (!list_empty(res)) {
1020 data = list_entry(res->next, struct nfs_write_data, list);
1021 list_del(&data->list);
1022 nfs_writedata_release(data);
1023 }
1024 nfs_redirty_request(req);
1025 return -ENOMEM;
1026} 1120}
1027 1121
1028/* 1122/*
@@ -1033,62 +1127,71 @@ out_bad:
1033 * This is the case if nfs_updatepage detects a conflicting request 1127 * This is the case if nfs_updatepage detects a conflicting request
1034 * that has been written but not committed. 1128 * that has been written but not committed.
1035 */ 1129 */
1036static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 1130static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1131 struct nfs_pgio_header *hdr)
1037{ 1132{
1038 struct nfs_page *req; 1133 struct nfs_page *req;
1039 struct page **pages; 1134 struct page **pages;
1040 struct nfs_write_data *data; 1135 struct nfs_write_data *data;
1041 struct list_head *head = &desc->pg_list; 1136 struct list_head *head = &desc->pg_list;
1042 int ret = 0; 1137 struct nfs_commit_info cinfo;
1043 1138
1044 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 1139 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1045 desc->pg_count)); 1140 desc->pg_count));
1046 if (!data) { 1141 if (!data) {
1047 while (!list_empty(head)) { 1142 nfs_flush_error(desc, hdr);
1048 req = nfs_list_entry(head->next); 1143 return -ENOMEM;
1049 nfs_list_remove_request(req);
1050 nfs_redirty_request(req);
1051 }
1052 ret = -ENOMEM;
1053 goto out;
1054 } 1144 }
1055 pages = data->pagevec; 1145
1146 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1147 pages = data->pages.pagevec;
1056 while (!list_empty(head)) { 1148 while (!list_empty(head)) {
1057 req = nfs_list_entry(head->next); 1149 req = nfs_list_entry(head->next);
1058 nfs_list_remove_request(req); 1150 nfs_list_remove_request(req);
1059 nfs_list_add_request(req, &data->pages); 1151 nfs_list_add_request(req, &hdr->pages);
1060 *pages++ = req->wb_page; 1152 *pages++ = req->wb_page;
1061 } 1153 }
1062 req = nfs_list_entry(data->pages.next);
1063 1154
1064 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1155 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1065 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1156 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1066 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1157 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1067 1158
1068 /* Set up the argument struct */ 1159 /* Set up the argument struct */
1069 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1160 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1070 list_add(&data->list, res); 1161 list_add(&data->list, &hdr->rpc_list);
1071 desc->pg_rpc_callops = &nfs_write_full_ops; 1162 desc->pg_rpc_callops = &nfs_write_common_ops;
1072out: 1163 return 0;
1073 return ret;
1074} 1164}
1075 1165
1076int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1166int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1167 struct nfs_pgio_header *hdr)
1077{ 1168{
1078 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1169 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1079 return nfs_flush_multi(desc, head); 1170 return nfs_flush_multi(desc, hdr);
1080 return nfs_flush_one(desc, head); 1171 return nfs_flush_one(desc, hdr);
1081} 1172}
1082 1173
1083static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1174static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1084{ 1175{
1085 LIST_HEAD(head); 1176 struct nfs_write_header *whdr;
1177 struct nfs_pgio_header *hdr;
1086 int ret; 1178 int ret;
1087 1179
1088 ret = nfs_generic_flush(desc, &head); 1180 whdr = nfs_writehdr_alloc();
1181 if (!whdr) {
1182 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1183 return -ENOMEM;
1184 }
1185 hdr = &whdr->header;
1186 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1187 atomic_inc(&hdr->refcnt);
1188 ret = nfs_generic_flush(desc, hdr);
1089 if (ret == 0) 1189 if (ret == 0)
1090 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1190 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1091 desc->pg_ioflags); 1191 desc->pg_rpc_callops,
1192 desc->pg_ioflags);
1193 if (atomic_dec_and_test(&hdr->refcnt))
1194 hdr->completion_ops->completion(hdr);
1092 return ret; 1195 return ret;
1093} 1196}
1094 1197
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1098}; 1201};
1099 1202
1100void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1203void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1101 struct inode *inode, int ioflags) 1204 struct inode *inode, int ioflags,
1205 const struct nfs_pgio_completion_ops *compl_ops)
1102{ 1206{
1103 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1207 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
1104 NFS_SERVER(inode)->wsize, ioflags); 1208 NFS_SERVER(inode)->wsize, ioflags);
1105} 1209}
1106 1210
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1111} 1215}
1112EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1216EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1113 1217
1114static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1218void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1115 struct inode *inode, int ioflags) 1219 struct inode *inode, int ioflags,
1220 const struct nfs_pgio_completion_ops *compl_ops)
1116{ 1221{
1117 if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1222 if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
1118 nfs_pageio_init_write_mds(pgio, inode, ioflags); 1223 nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
1119} 1224}
1120 1225
1121/* 1226void nfs_write_prepare(struct rpc_task *task, void *calldata)
1122 * Handle a write reply that flushed part of a page.
1123 */
1124static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1125{ 1227{
1126 struct nfs_write_data *data = calldata; 1228 struct nfs_write_data *data = calldata;
1127 1229 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1128 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1129 task->tk_pid,
1130 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1131 (long long)
1132 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1133 data->req->wb_bytes, (long long)req_offset(data->req));
1134
1135 nfs_writeback_done(task, data);
1136} 1230}
1137 1231
1138static void nfs_writeback_release_partial(void *calldata) 1232void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1139{ 1233{
1140 struct nfs_write_data *data = calldata; 1234 struct nfs_commit_data *data = calldata;
1141 struct nfs_page *req = data->req;
1142 struct page *page = req->wb_page;
1143 int status = data->task.tk_status;
1144 1235
1145 if (status < 0) { 1236 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1146 nfs_set_pageerror(page);
1147 nfs_context_set_write_error(req->wb_context, status);
1148 dprintk(", error = %d\n", status);
1149 goto out;
1150 }
1151
1152 if (nfs_write_need_commit(data)) {
1153 struct inode *inode = page->mapping->host;
1154
1155 spin_lock(&inode->i_lock);
1156 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1157 /* Do nothing we need to resend the writes */
1158 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1159 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1160 dprintk(" defer commit\n");
1161 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1162 set_bit(PG_NEED_RESCHED, &req->wb_flags);
1163 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1164 dprintk(" server reboot detected\n");
1165 }
1166 spin_unlock(&inode->i_lock);
1167 } else
1168 dprintk(" OK\n");
1169
1170out:
1171 if (atomic_dec_and_test(&req->wb_complete))
1172 nfs_writepage_release(req, data);
1173 nfs_writedata_release(calldata);
1174} 1237}
1175 1238
1176void nfs_write_prepare(struct rpc_task *task, void *calldata)
1177{
1178 struct nfs_write_data *data = calldata;
1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1180}
1181
1182static const struct rpc_call_ops nfs_write_partial_ops = {
1183 .rpc_call_prepare = nfs_write_prepare,
1184 .rpc_call_done = nfs_writeback_done_partial,
1185 .rpc_release = nfs_writeback_release_partial,
1186};
1187
1188/* 1239/*
1189 * Handle a write reply that flushes a whole page. 1240 * Handle a write reply that flushes a whole page.
1190 * 1241 *
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
1192 * writebacks since the page->count is kept > 1 for as long 1243 * writebacks since the page->count is kept > 1 for as long
1193 * as the page has a write request pending. 1244 * as the page has a write request pending.
1194 */ 1245 */
1195static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1246static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1196{ 1247{
1197 struct nfs_write_data *data = calldata; 1248 struct nfs_write_data *data = calldata;
1198 1249
1199 nfs_writeback_done(task, data); 1250 nfs_writeback_done(task, data);
1200} 1251}
1201 1252
1202static void nfs_writeback_release_full(void *calldata) 1253static void nfs_writeback_release_common(void *calldata)
1203{ 1254{
1204 struct nfs_write_data *data = calldata; 1255 struct nfs_write_data *data = calldata;
1256 struct nfs_pgio_header *hdr = data->header;
1205 int status = data->task.tk_status; 1257 int status = data->task.tk_status;
1258 struct nfs_page *req = hdr->req;
1206 1259
1207 /* Update attributes as result of writeback. */ 1260 if ((status >= 0) && nfs_write_need_commit(data)) {
1208 while (!list_empty(&data->pages)) { 1261 spin_lock(&hdr->lock);
1209 struct nfs_page *req = nfs_list_entry(data->pages.next); 1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1210 struct page *page = req->wb_page; 1263 ; /* Do nothing */
1211 1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1212 nfs_list_remove_request(req);
1213
1214 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1215 data->task.tk_pid,
1216 req->wb_context->dentry->d_inode->i_sb->s_id,
1217 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1218 req->wb_bytes,
1219 (long long)req_offset(req));
1220
1221 if (status < 0) {
1222 nfs_set_pageerror(page);
1223 nfs_context_set_write_error(req->wb_context, status);
1224 dprintk(", error = %d\n", status);
1225 goto remove_request;
1226 }
1227
1228 if (nfs_write_need_commit(data)) {
1229 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1230 nfs_mark_request_commit(req, data->lseg); 1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
1231 dprintk(" marked for commit\n"); 1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1232 goto next; 1268 spin_unlock(&hdr->lock);
1233 }
1234 dprintk(" OK\n");
1235remove_request:
1236 nfs_inode_remove_request(req);
1237 next:
1238 nfs_unlock_request(req);
1239 nfs_end_page_writeback(page);
1240 } 1269 }
1241 nfs_writedata_release(calldata); 1270 nfs_writedata_release(data);
1242} 1271}
1243 1272
1244static const struct rpc_call_ops nfs_write_full_ops = { 1273static const struct rpc_call_ops nfs_write_common_ops = {
1245 .rpc_call_prepare = nfs_write_prepare, 1274 .rpc_call_prepare = nfs_write_prepare,
1246 .rpc_call_done = nfs_writeback_done_full, 1275 .rpc_call_done = nfs_writeback_done_common,
1247 .rpc_release = nfs_writeback_release_full, 1276 .rpc_release = nfs_writeback_release_common,
1248}; 1277};
1249 1278
1250 1279
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1255{ 1284{
1256 struct nfs_writeargs *argp = &data->args; 1285 struct nfs_writeargs *argp = &data->args;
1257 struct nfs_writeres *resp = &data->res; 1286 struct nfs_writeres *resp = &data->res;
1287 struct inode *inode = data->header->inode;
1258 int status; 1288 int status;
1259 1289
1260 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1290 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1267 * another writer had changed the file, but some applications 1297 * another writer had changed the file, but some applications
1268 * depend on tighter cache coherency when writing. 1298 * depend on tighter cache coherency when writing.
1269 */ 1299 */
1270 status = NFS_PROTO(data->inode)->write_done(task, data); 1300 status = NFS_PROTO(inode)->write_done(task, data);
1271 if (status != 0) 1301 if (status != 0)
1272 return; 1302 return;
1273 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1303 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1274 1304
1275#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1305#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1276 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1306 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1288 if (time_before(complain, jiffies)) { 1318 if (time_before(complain, jiffies)) {
1289 dprintk("NFS: faulty NFS server %s:" 1319 dprintk("NFS: faulty NFS server %s:"
1290 " (committed = %d) != (stable = %d)\n", 1320 " (committed = %d) != (stable = %d)\n",
1291 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1321 NFS_SERVER(inode)->nfs_client->cl_hostname,
1292 resp->verf->committed, argp->stable); 1322 resp->verf->committed, argp->stable);
1293 complain = jiffies + 300 * HZ; 1323 complain = jiffies + 300 * HZ;
1294 } 1324 }
1295 } 1325 }
1296#endif 1326#endif
1297 /* Is this a short write? */ 1327 if (task->tk_status < 0)
1298 if (task->tk_status >= 0 && resp->count < argp->count) { 1328 nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
1329 else if (resp->count < argp->count) {
1299 static unsigned long complain; 1330 static unsigned long complain;
1300 1331
1301 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1332 /* This a short write! */
1333 nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
1302 1334
1303 /* Has the server at least made some progress? */ 1335 /* Has the server at least made some progress? */
1304 if (resp->count != 0) { 1336 if (resp->count == 0) {
1305 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1337 if (time_before(complain, jiffies)) {
1306 if (resp->verf->committed != NFS_UNSTABLE) { 1338 printk(KERN_WARNING
1307 /* Resend from where the server left off */ 1339 "NFS: Server wrote zero bytes, expected %u.\n",
1308 data->mds_offset += resp->count; 1340 argp->count);
1309 argp->offset += resp->count; 1341 complain = jiffies + 300 * HZ;
1310 argp->pgbase += resp->count;
1311 argp->count -= resp->count;
1312 } else {
1313 /* Resend as a stable write in order to avoid
1314 * headaches in the case of a server crash.
1315 */
1316 argp->stable = NFS_FILE_SYNC;
1317 } 1342 }
1318 rpc_restart_call_prepare(task); 1343 nfs_set_pgio_error(data->header, -EIO, argp->offset);
1344 task->tk_status = -EIO;
1319 return; 1345 return;
1320 } 1346 }
1321 if (time_before(complain, jiffies)) { 1347 /* Was this an NFSv2 write or an NFSv3 stable write? */
1322 printk(KERN_WARNING 1348 if (resp->verf->committed != NFS_UNSTABLE) {
1323 "NFS: Server wrote zero bytes, expected %u.\n", 1349 /* Resend from where the server left off */
1324 argp->count); 1350 data->mds_offset += resp->count;
1325 complain = jiffies + 300 * HZ; 1351 argp->offset += resp->count;
1352 argp->pgbase += resp->count;
1353 argp->count -= resp->count;
1354 } else {
1355 /* Resend as a stable write in order to avoid
1356 * headaches in the case of a server crash.
1357 */
1358 argp->stable = NFS_FILE_SYNC;
1326 } 1359 }
1327 /* Can't do anything about it except throw an error. */ 1360 rpc_restart_call_prepare(task);
1328 task->tk_status = -EIO;
1329 } 1361 }
1330 return;
1331} 1362}
1332 1363
1333 1364
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1347 return (ret < 0) ? ret : 1; 1378 return (ret < 0) ? ret : 1;
1348} 1379}
1349 1380
1350void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1381static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1351{ 1382{
1352 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1383 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1353 smp_mb__after_clear_bit(); 1384 smp_mb__after_clear_bit();
1354 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1385 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1355} 1386}
1356EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1357 1387
1358void nfs_commitdata_release(void *data) 1388void nfs_commitdata_release(struct nfs_commit_data *data)
1359{ 1389{
1360 struct nfs_write_data *wdata = data; 1390 put_nfs_open_context(data->context);
1361 1391 nfs_commit_free(data);
1362 put_nfs_open_context(wdata->args.context);
1363 nfs_commit_free(wdata);
1364} 1392}
1365EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1393EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1366 1394
1367int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, 1395int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1368 const struct rpc_call_ops *call_ops, 1396 const struct rpc_call_ops *call_ops,
1369 int how) 1397 int how, int flags)
1370{ 1398{
1371 struct rpc_task *task; 1399 struct rpc_task *task;
1372 int priority = flush_task_priority(how); 1400 int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1382 .callback_ops = call_ops, 1410 .callback_ops = call_ops,
1383 .callback_data = data, 1411 .callback_data = data,
1384 .workqueue = nfsiod_workqueue, 1412 .workqueue = nfsiod_workqueue,
1385 .flags = RPC_TASK_ASYNC, 1413 .flags = RPC_TASK_ASYNC | flags,
1386 .priority = priority, 1414 .priority = priority,
1387 }; 1415 };
1388 /* Set up the initial task struct. */ 1416 /* Set up the initial task struct. */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1403/* 1431/*
1404 * Set up the argument/result storage required for the RPC call. 1432 * Set up the argument/result storage required for the RPC call.
1405 */ 1433 */
1406void nfs_init_commit(struct nfs_write_data *data, 1434void nfs_init_commit(struct nfs_commit_data *data,
1407 struct list_head *head, 1435 struct list_head *head,
1408 struct pnfs_layout_segment *lseg) 1436 struct pnfs_layout_segment *lseg,
1437 struct nfs_commit_info *cinfo)
1409{ 1438{
1410 struct nfs_page *first = nfs_list_entry(head->next); 1439 struct nfs_page *first = nfs_list_entry(head->next);
1411 struct inode *inode = first->wb_context->dentry->d_inode; 1440 struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
1419 data->cred = first->wb_context->cred; 1448 data->cred = first->wb_context->cred;
1420 data->lseg = lseg; /* reference transferred */ 1449 data->lseg = lseg; /* reference transferred */
1421 data->mds_ops = &nfs_commit_ops; 1450 data->mds_ops = &nfs_commit_ops;
1451 data->completion_ops = cinfo->completion_ops;
1452 data->dreq = cinfo->dreq;
1422 1453
1423 data->args.fh = NFS_FH(data->inode); 1454 data->args.fh = NFS_FH(data->inode);
1424 /* Note: we always request a commit of the entire inode */ 1455 /* Note: we always request a commit of the entire inode */
1425 data->args.offset = 0; 1456 data->args.offset = 0;
1426 data->args.count = 0; 1457 data->args.count = 0;
1427 data->args.context = get_nfs_open_context(first->wb_context); 1458 data->context = get_nfs_open_context(first->wb_context);
1428 data->res.count = 0;
1429 data->res.fattr = &data->fattr; 1459 data->res.fattr = &data->fattr;
1430 data->res.verf = &data->verf; 1460 data->res.verf = &data->verf;
1431 nfs_fattr_init(&data->fattr); 1461 nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
1433EXPORT_SYMBOL_GPL(nfs_init_commit); 1463EXPORT_SYMBOL_GPL(nfs_init_commit);
1434 1464
1435void nfs_retry_commit(struct list_head *page_list, 1465void nfs_retry_commit(struct list_head *page_list,
1436 struct pnfs_layout_segment *lseg) 1466 struct pnfs_layout_segment *lseg,
1467 struct nfs_commit_info *cinfo)
1437{ 1468{
1438 struct nfs_page *req; 1469 struct nfs_page *req;
1439 1470
1440 while (!list_empty(page_list)) { 1471 while (!list_empty(page_list)) {
1441 req = nfs_list_entry(page_list->next); 1472 req = nfs_list_entry(page_list->next);
1442 nfs_list_remove_request(req); 1473 nfs_list_remove_request(req);
1443 nfs_mark_request_commit(req, lseg); 1474 nfs_mark_request_commit(req, lseg, cinfo);
1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1475 if (!cinfo->dreq) {
1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1476 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1446 BDI_RECLAIMABLE); 1477 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1447 nfs_unlock_request(req); 1478 BDI_RECLAIMABLE);
1479 }
1480 nfs_unlock_and_release_request(req);
1448 } 1481 }
1449} 1482}
1450EXPORT_SYMBOL_GPL(nfs_retry_commit); 1483EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
1453 * Commit dirty pages 1486 * Commit dirty pages
1454 */ 1487 */
1455static int 1488static int
1456nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1489nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1490 struct nfs_commit_info *cinfo)
1457{ 1491{
1458 struct nfs_write_data *data; 1492 struct nfs_commit_data *data;
1459 1493
1460 data = nfs_commitdata_alloc(); 1494 data = nfs_commitdata_alloc();
1461 1495
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1463 goto out_bad; 1497 goto out_bad;
1464 1498
1465 /* Set up the argument struct */ 1499 /* Set up the argument struct */
1466 nfs_init_commit(data, head, NULL); 1500 nfs_init_commit(data, head, NULL, cinfo);
1467 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); 1501 atomic_inc(&cinfo->mds->rpcs_out);
1502 return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
1503 how, 0);
1468 out_bad: 1504 out_bad:
1469 nfs_retry_commit(head, NULL); 1505 nfs_retry_commit(head, NULL, cinfo);
1470 nfs_commit_clear_lock(NFS_I(inode)); 1506 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1471 return -ENOMEM; 1507 return -ENOMEM;
1472} 1508}
1473 1509
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1476 */ 1512 */
1477static void nfs_commit_done(struct rpc_task *task, void *calldata) 1513static void nfs_commit_done(struct rpc_task *task, void *calldata)
1478{ 1514{
1479 struct nfs_write_data *data = calldata; 1515 struct nfs_commit_data *data = calldata;
1480 1516
1481 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1517 dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1482 task->tk_pid, task->tk_status); 1518 task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1485 NFS_PROTO(data->inode)->commit_done(task, data); 1521 NFS_PROTO(data->inode)->commit_done(task, data);
1486} 1522}
1487 1523
1488void nfs_commit_release_pages(struct nfs_write_data *data) 1524static void nfs_commit_release_pages(struct nfs_commit_data *data)
1489{ 1525{
1490 struct nfs_page *req; 1526 struct nfs_page *req;
1491 int status = data->task.tk_status; 1527 int status = data->task.tk_status;
1528 struct nfs_commit_info cinfo;
1492 1529
1493 while (!list_empty(&data->pages)) { 1530 while (!list_empty(&data->pages)) {
1494 req = nfs_list_entry(data->pages.next); 1531 req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1519 dprintk(" mismatch\n"); 1556 dprintk(" mismatch\n");
1520 nfs_mark_request_dirty(req); 1557 nfs_mark_request_dirty(req);
1521 next: 1558 next:
1522 nfs_unlock_request(req); 1559 nfs_unlock_and_release_request(req);
1523 } 1560 }
1561 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1562 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
1563 nfs_commit_clear_lock(NFS_I(data->inode));
1524} 1564}
1525EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1526 1565
1527static void nfs_commit_release(void *calldata) 1566static void nfs_commit_release(void *calldata)
1528{ 1567{
1529 struct nfs_write_data *data = calldata; 1568 struct nfs_commit_data *data = calldata;
1530 1569
1531 nfs_commit_release_pages(data); 1570 data->completion_ops->completion(data);
1532 nfs_commit_clear_lock(NFS_I(data->inode));
1533 nfs_commitdata_release(calldata); 1571 nfs_commitdata_release(calldata);
1534} 1572}
1535 1573
1536static const struct rpc_call_ops nfs_commit_ops = { 1574static const struct rpc_call_ops nfs_commit_ops = {
1537 .rpc_call_prepare = nfs_write_prepare, 1575 .rpc_call_prepare = nfs_commit_prepare,
1538 .rpc_call_done = nfs_commit_done, 1576 .rpc_call_done = nfs_commit_done,
1539 .rpc_release = nfs_commit_release, 1577 .rpc_release = nfs_commit_release,
1540}; 1578};
1541 1579
1580static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1581 .completion = nfs_commit_release_pages,
1582 .error_cleanup = nfs_commit_clear_lock,
1583};
1584
1585int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1586 int how, struct nfs_commit_info *cinfo)
1587{
1588 int status;
1589
1590 status = pnfs_commit_list(inode, head, how, cinfo);
1591 if (status == PNFS_NOT_ATTEMPTED)
1592 status = nfs_commit_list(inode, head, how, cinfo);
1593 return status;
1594}
1595
1542int nfs_commit_inode(struct inode *inode, int how) 1596int nfs_commit_inode(struct inode *inode, int how)
1543{ 1597{
1544 LIST_HEAD(head); 1598 LIST_HEAD(head);
1599 struct nfs_commit_info cinfo;
1545 int may_wait = how & FLUSH_SYNC; 1600 int may_wait = how & FLUSH_SYNC;
1546 int res; 1601 int res;
1547 1602
1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1603 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1549 if (res <= 0) 1604 if (res <= 0)
1550 goto out_mark_dirty; 1605 goto out_mark_dirty;
1551 res = nfs_scan_commit(inode, &head); 1606 nfs_init_cinfo_from_inode(&cinfo, inode);
1607 res = nfs_scan_commit(inode, &head, &cinfo);
1552 if (res) { 1608 if (res) {
1553 int error; 1609 int error;
1554 1610
1555 error = pnfs_commit_list(inode, &head, how); 1611 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1556 if (error == PNFS_NOT_ATTEMPTED)
1557 error = nfs_commit_list(inode, &head, how);
1558 if (error < 0) 1612 if (error < 0)
1559 return error; 1613 return error;
1560 if (!may_wait) 1614 if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1585 int ret = 0; 1639 int ret = 0;
1586 1640
1587 /* no commits means nothing needs to be done */ 1641 /* no commits means nothing needs to be done */
1588 if (!nfsi->ncommit) 1642 if (!nfsi->commit_info.ncommit)
1589 return ret; 1643 return ret;
1590 1644
1591 if (wbc->sync_mode == WB_SYNC_NONE) { 1645 if (wbc->sync_mode == WB_SYNC_NONE) {
1592 /* Don't commit yet if this is a non-blocking flush and there 1646 /* Don't commit yet if this is a non-blocking flush and there
1593 * are a lot of outstanding writes for this mapping. 1647 * are a lot of outstanding writes for this mapping.
1594 */ 1648 */
1595 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1649 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1596 goto out_mark_dirty; 1650 goto out_mark_dirty;
1597 1651
1598 /* don't wait for the COMMIT response */ 1652 /* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1665 req = nfs_page_find_request(page); 1719 req = nfs_page_find_request(page);
1666 if (req == NULL) 1720 if (req == NULL)
1667 break; 1721 break;
1668 if (nfs_lock_request_dontget(req)) { 1722 if (nfs_lock_request(req)) {
1669 nfs_clear_request_commit(req); 1723 nfs_clear_request_commit(req);
1670 nfs_inode_remove_request(req); 1724 nfs_inode_remove_request(req);
1671 /* 1725 /*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1673 * page as being dirty 1727 * page as being dirty
1674 */ 1728 */
1675 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1729 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1676 nfs_unlock_request(req); 1730 nfs_unlock_and_release_request(req);
1677 break; 1731 break;
1678 } 1732 }
1679 ret = nfs_wait_on_request(req); 1733 ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1742int __init nfs_init_writepagecache(void) 1796int __init nfs_init_writepagecache(void)
1743{ 1797{
1744 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1798 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1745 sizeof(struct nfs_write_data), 1799 sizeof(struct nfs_write_header),
1746 0, SLAB_HWCACHE_ALIGN, 1800 0, SLAB_HWCACHE_ALIGN,
1747 NULL); 1801 NULL);
1748 if (nfs_wdata_cachep == NULL) 1802 if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
1753 if (nfs_wdata_mempool == NULL) 1807 if (nfs_wdata_mempool == NULL)
1754 return -ENOMEM; 1808 return -ENOMEM;
1755 1809
1810 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1811 sizeof(struct nfs_commit_data),
1812 0, SLAB_HWCACHE_ALIGN,
1813 NULL);
1814 if (nfs_cdata_cachep == NULL)
1815 return -ENOMEM;
1816
1756 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1817 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1757 nfs_wdata_cachep); 1818 nfs_wdata_cachep);
1758 if (nfs_commit_mempool == NULL) 1819 if (nfs_commit_mempool == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 52a1bdb4ee2b..6cc7dbaf0695 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -179,8 +179,7 @@ struct nfs_inode {
179 __be32 cookieverf[2]; 179 __be32 cookieverf[2];
180 180
181 unsigned long npages; 181 unsigned long npages;
182 unsigned long ncommit; 182 struct nfs_mds_commit_info commit_info;
183 struct list_head commit_list;
184 183
185 /* Open contexts for shared mmap writes */ 184 /* Open contexts for shared mmap writes */
186 struct list_head open_files; 185 struct list_head open_files;
@@ -201,7 +200,6 @@ struct nfs_inode {
201 200
202 /* pNFS layout information */ 201 /* pNFS layout information */
203 struct pnfs_layout_hdr *layout; 202 struct pnfs_layout_hdr *layout;
204 atomic_t commits_outstanding;
205#endif /* CONFIG_NFS_V4*/ 203#endif /* CONFIG_NFS_V4*/
206#ifdef CONFIG_NFS_FSCACHE 204#ifdef CONFIG_NFS_FSCACHE
207 struct fscache_cookie *fscache; 205 struct fscache_cookie *fscache;
@@ -230,7 +228,6 @@ struct nfs_inode {
230#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 228#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
231#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 229#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
232#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ 230#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
233#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */
234#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 231#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
235#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 232#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
236 233
@@ -317,11 +314,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
317 return NFS_SERVER(inode)->caps & cap; 314 return NFS_SERVER(inode)->caps & cap;
318} 315}
319 316
320static inline int NFS_USE_READDIRPLUS(struct inode *inode)
321{
322 return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
323}
324
325static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) 317static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
326{ 318{
327 dentry->d_time = verf; 319 dentry->d_time = verf;
@@ -552,8 +544,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
552extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); 544extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
553#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 545#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
554extern int nfs_commit_inode(struct inode *, int); 546extern int nfs_commit_inode(struct inode *, int);
555extern struct nfs_write_data *nfs_commitdata_alloc(void); 547extern struct nfs_commit_data *nfs_commitdata_alloc(void);
556extern void nfs_commit_free(struct nfs_write_data *wdata); 548extern void nfs_commit_free(struct nfs_commit_data *data);
557#else 549#else
558static inline int 550static inline int
559nfs_commit_inode(struct inode *inode, int how) 551nfs_commit_inode(struct inode *inode, int how)
@@ -569,12 +561,6 @@ nfs_have_writebacks(struct inode *inode)
569} 561}
570 562
571/* 563/*
572 * Allocate nfs_write_data structures
573 */
574extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
575extern void nfs_writedata_free(struct nfs_write_data *);
576
577/*
578 * linux/fs/nfs/read.c 564 * linux/fs/nfs/read.c
579 */ 565 */
580extern int nfs_readpage(struct file *, struct page *); 566extern int nfs_readpage(struct file *, struct page *);
@@ -585,12 +571,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
585 struct page *); 571 struct page *);
586 572
587/* 573/*
588 * Allocate nfs_read_data structures
589 */
590extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
591extern void nfs_readdata_free(struct nfs_read_data *);
592
593/*
594 * linux/fs/nfs3proc.c 574 * linux/fs/nfs3proc.c
595 */ 575 */
596#ifdef CONFIG_NFS_V3_ACL 576#ifdef CONFIG_NFS_V3_ACL
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6bec17..88d166b555e8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
27 PG_CLEAN, 27 PG_CLEAN,
28 PG_NEED_COMMIT, 28 PG_NEED_COMMIT,
29 PG_NEED_RESCHED, 29 PG_NEED_RESCHED,
30 PG_PARTIAL_READ_FAILED,
31 PG_COMMIT_TO_DS, 30 PG_COMMIT_TO_DS,
32}; 31};
33 32
@@ -37,7 +36,6 @@ struct nfs_page {
37 struct page *wb_page; /* page to read in/write out */ 36 struct page *wb_page; /* page to read in/write out */
38 struct nfs_open_context *wb_context; /* File state context info */ 37 struct nfs_open_context *wb_context; /* File state context info */
39 struct nfs_lock_context *wb_lock_context; /* lock context info */ 38 struct nfs_lock_context *wb_lock_context; /* lock context info */
40 atomic_t wb_complete; /* i/os we're waiting for */
41 pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ 39 pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */
42 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ 40 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
43 wb_pgbase, /* Start of page data */ 41 wb_pgbase, /* Start of page data */
@@ -68,7 +66,9 @@ struct nfs_pageio_descriptor {
68 int pg_ioflags; 66 int pg_ioflags;
69 int pg_error; 67 int pg_error;
70 const struct rpc_call_ops *pg_rpc_callops; 68 const struct rpc_call_ops *pg_rpc_callops;
69 const struct nfs_pgio_completion_ops *pg_completion_ops;
71 struct pnfs_layout_segment *pg_lseg; 70 struct pnfs_layout_segment *pg_lseg;
71 struct nfs_direct_req *pg_dreq;
72}; 72};
73 73
74#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 74#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -84,6 +84,7 @@ extern void nfs_release_request(struct nfs_page *req);
84extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 84extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
85 struct inode *inode, 85 struct inode *inode,
86 const struct nfs_pageio_ops *pg_ops, 86 const struct nfs_pageio_ops *pg_ops,
87 const struct nfs_pgio_completion_ops *compl_ops,
87 size_t bsize, 88 size_t bsize,
88 int how); 89 int how);
89extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, 90extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -95,26 +96,17 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
95 struct nfs_page *req); 96 struct nfs_page *req);
96extern int nfs_wait_on_request(struct nfs_page *); 97extern int nfs_wait_on_request(struct nfs_page *);
97extern void nfs_unlock_request(struct nfs_page *req); 98extern void nfs_unlock_request(struct nfs_page *req);
99extern void nfs_unlock_and_release_request(struct nfs_page *req);
98 100
99/* 101/*
100 * Lock the page of an asynchronous request without getting a new reference 102 * Lock the page of an asynchronous request
101 */ 103 */
102static inline int 104static inline int
103nfs_lock_request_dontget(struct nfs_page *req)
104{
105 return !test_and_set_bit(PG_BUSY, &req->wb_flags);
106}
107
108static inline int
109nfs_lock_request(struct nfs_page *req) 105nfs_lock_request(struct nfs_page *req)
110{ 106{
111 if (test_and_set_bit(PG_BUSY, &req->wb_flags)) 107 return !test_and_set_bit(PG_BUSY, &req->wb_flags);
112 return 0;
113 kref_get(&req->wb_kref);
114 return 1;
115} 108}
116 109
117
118/** 110/**
119 * nfs_list_add_request - Insert a request into a list 111 * nfs_list_add_request - Insert a request into a list
120 * @req: request 112 * @req: request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7ba3551a0414..2e53a3f1d2ff 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -106,14 +106,14 @@ struct nfs_fattr {
106 | NFS_ATTR_FATTR_FILEID \ 106 | NFS_ATTR_FATTR_FILEID \
107 | NFS_ATTR_FATTR_ATIME \ 107 | NFS_ATTR_FATTR_ATIME \
108 | NFS_ATTR_FATTR_MTIME \ 108 | NFS_ATTR_FATTR_MTIME \
109 | NFS_ATTR_FATTR_CTIME) 109 | NFS_ATTR_FATTR_CTIME \
110 | NFS_ATTR_FATTR_CHANGE)
110#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ 111#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
111 | NFS_ATTR_FATTR_BLOCKS_USED) 112 | NFS_ATTR_FATTR_BLOCKS_USED)
112#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ 113#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
113 | NFS_ATTR_FATTR_SPACE_USED) 114 | NFS_ATTR_FATTR_SPACE_USED)
114#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ 115#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
115 | NFS_ATTR_FATTR_SPACE_USED \ 116 | NFS_ATTR_FATTR_SPACE_USED)
116 | NFS_ATTR_FATTR_CHANGE)
117 117
118/* 118/*
119 * Info on the file system 119 * Info on the file system
@@ -338,7 +338,6 @@ struct nfs_openargs {
338 const struct qstr * name; 338 const struct qstr * name;
339 const struct nfs_server *server; /* Needed for ID mapping */ 339 const struct nfs_server *server; /* Needed for ID mapping */
340 const u32 * bitmask; 340 const u32 * bitmask;
341 const u32 * dir_bitmask;
342 __u32 claim; 341 __u32 claim;
343 struct nfs4_sequence_args seq_args; 342 struct nfs4_sequence_args seq_args;
344}; 343};
@@ -349,7 +348,6 @@ struct nfs_openres {
349 struct nfs4_change_info cinfo; 348 struct nfs4_change_info cinfo;
350 __u32 rflags; 349 __u32 rflags;
351 struct nfs_fattr * f_attr; 350 struct nfs_fattr * f_attr;
352 struct nfs_fattr * dir_attr;
353 struct nfs_seqid * seqid; 351 struct nfs_seqid * seqid;
354 const struct nfs_server *server; 352 const struct nfs_server *server;
355 fmode_t delegation_type; 353 fmode_t delegation_type;
@@ -519,12 +517,29 @@ struct nfs_writeres {
519}; 517};
520 518
521/* 519/*
520 * Arguments to the commit call.
521 */
522struct nfs_commitargs {
523 struct nfs_fh *fh;
524 __u64 offset;
525 __u32 count;
526 const u32 *bitmask;
527 struct nfs4_sequence_args seq_args;
528};
529
530struct nfs_commitres {
531 struct nfs_fattr *fattr;
532 struct nfs_writeverf *verf;
533 const struct nfs_server *server;
534 struct nfs4_sequence_res seq_res;
535};
536
537/*
522 * Common arguments to the unlink call 538 * Common arguments to the unlink call
523 */ 539 */
524struct nfs_removeargs { 540struct nfs_removeargs {
525 const struct nfs_fh *fh; 541 const struct nfs_fh *fh;
526 struct qstr name; 542 struct qstr name;
527 const u32 * bitmask;
528 struct nfs4_sequence_args seq_args; 543 struct nfs4_sequence_args seq_args;
529}; 544};
530 545
@@ -543,7 +558,6 @@ struct nfs_renameargs {
543 const struct nfs_fh *new_dir; 558 const struct nfs_fh *new_dir;
544 const struct qstr *old_name; 559 const struct qstr *old_name;
545 const struct qstr *new_name; 560 const struct qstr *new_name;
546 const u32 *bitmask;
547 struct nfs4_sequence_args seq_args; 561 struct nfs4_sequence_args seq_args;
548}; 562};
549 563
@@ -839,7 +853,6 @@ struct nfs4_create_res {
839 struct nfs_fh * fh; 853 struct nfs_fh * fh;
840 struct nfs_fattr * fattr; 854 struct nfs_fattr * fattr;
841 struct nfs4_change_info dir_cinfo; 855 struct nfs4_change_info dir_cinfo;
842 struct nfs_fattr * dir_fattr;
843 struct nfs4_sequence_res seq_res; 856 struct nfs4_sequence_res seq_res;
844}; 857};
845 858
@@ -1061,6 +1074,21 @@ struct nfstime4 {
1061}; 1074};
1062 1075
1063#ifdef CONFIG_NFS_V4_1 1076#ifdef CONFIG_NFS_V4_1
1077
1078struct pnfs_commit_bucket {
1079 struct list_head written;
1080 struct list_head committing;
1081 struct pnfs_layout_segment *wlseg;
1082 struct pnfs_layout_segment *clseg;
1083};
1084
1085struct pnfs_ds_commit_info {
1086 int nwritten;
1087 int ncommitting;
1088 int nbuckets;
1089 struct pnfs_commit_bucket *buckets;
1090};
1091
1064#define NFS4_EXCHANGE_ID_LEN (48) 1092#define NFS4_EXCHANGE_ID_LEN (48)
1065struct nfs41_exchange_id_args { 1093struct nfs41_exchange_id_args {
1066 struct nfs_client *client; 1094 struct nfs_client *client;
@@ -1143,35 +1171,114 @@ struct nfs41_free_stateid_res {
1143 struct nfs4_sequence_res seq_res; 1171 struct nfs4_sequence_res seq_res;
1144}; 1172};
1145 1173
1174#else
1175
1176struct pnfs_ds_commit_info {
1177};
1178
1146#endif /* CONFIG_NFS_V4_1 */ 1179#endif /* CONFIG_NFS_V4_1 */
1147 1180
1148struct nfs_page; 1181struct nfs_page;
1149 1182
1150#define NFS_PAGEVEC_SIZE (8U) 1183#define NFS_PAGEVEC_SIZE (8U)
1151 1184
1185struct nfs_page_array {
1186 struct page **pagevec;
1187 unsigned int npages; /* Max length of pagevec */
1188 struct page *page_array[NFS_PAGEVEC_SIZE];
1189};
1190
1152struct nfs_read_data { 1191struct nfs_read_data {
1192 struct nfs_pgio_header *header;
1193 struct list_head list;
1153 struct rpc_task task; 1194 struct rpc_task task;
1154 struct inode *inode;
1155 struct rpc_cred *cred;
1156 struct nfs_fattr fattr; /* fattr storage */ 1195 struct nfs_fattr fattr; /* fattr storage */
1157 struct list_head pages; /* Coalesced read requests */
1158 struct list_head list; /* lists of struct nfs_read_data */
1159 struct nfs_page *req; /* multi ops per nfs_page */
1160 struct page **pagevec;
1161 unsigned int npages; /* Max length of pagevec */
1162 struct nfs_readargs args; 1196 struct nfs_readargs args;
1163 struct nfs_readres res; 1197 struct nfs_readres res;
1164 unsigned long timestamp; /* For lease renewal */ 1198 unsigned long timestamp; /* For lease renewal */
1165 struct pnfs_layout_segment *lseg;
1166 struct nfs_client *ds_clp; /* pNFS data server */
1167 const struct rpc_call_ops *mds_ops;
1168 int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); 1199 int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
1169 __u64 mds_offset; 1200 __u64 mds_offset;
1201 struct nfs_page_array pages;
1202 struct nfs_client *ds_clp; /* pNFS data server */
1203};
1204
1205/* used as flag bits in nfs_pgio_header */
1206enum {
1207 NFS_IOHDR_ERROR = 0,
1208 NFS_IOHDR_EOF,
1209 NFS_IOHDR_REDO,
1210 NFS_IOHDR_NEED_COMMIT,
1211 NFS_IOHDR_NEED_RESCHED,
1212};
1213
1214struct nfs_pgio_header {
1215 struct inode *inode;
1216 struct rpc_cred *cred;
1217 struct list_head pages;
1218 struct list_head rpc_list;
1219 atomic_t refcnt;
1220 struct nfs_page *req;
1221 struct pnfs_layout_segment *lseg;
1222 loff_t io_start;
1223 const struct rpc_call_ops *mds_ops;
1224 void (*release) (struct nfs_pgio_header *hdr);
1225 const struct nfs_pgio_completion_ops *completion_ops;
1226 struct nfs_direct_req *dreq;
1227 spinlock_t lock;
1228 /* fields protected by lock */
1170 int pnfs_error; 1229 int pnfs_error;
1171 struct page *page_array[NFS_PAGEVEC_SIZE]; 1230 int error; /* merge with pnfs_error */
1231 unsigned long good_bytes; /* boundary of good data */
1232 unsigned long flags;
1233};
1234
1235struct nfs_read_header {
1236 struct nfs_pgio_header header;
1237 struct nfs_read_data rpc_data;
1172}; 1238};
1173 1239
1174struct nfs_write_data { 1240struct nfs_write_data {
1241 struct nfs_pgio_header *header;
1242 struct list_head list;
1243 struct rpc_task task;
1244 struct nfs_fattr fattr;
1245 struct nfs_writeverf verf;
1246 struct nfs_writeargs args; /* argument struct */
1247 struct nfs_writeres res; /* result struct */
1248 unsigned long timestamp; /* For lease renewal */
1249 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
1250 __u64 mds_offset; /* Filelayout dense stripe */
1251 struct nfs_page_array pages;
1252 struct nfs_client *ds_clp; /* pNFS data server */
1253};
1254
1255struct nfs_write_header {
1256 struct nfs_pgio_header header;
1257 struct nfs_write_data rpc_data;
1258};
1259
1260struct nfs_mds_commit_info {
1261 atomic_t rpcs_out;
1262 unsigned long ncommit;
1263 struct list_head list;
1264};
1265
1266struct nfs_commit_data;
1267struct nfs_inode;
1268struct nfs_commit_completion_ops {
1269 void (*error_cleanup) (struct nfs_inode *nfsi);
1270 void (*completion) (struct nfs_commit_data *data);
1271};
1272
1273struct nfs_commit_info {
1274 spinlock_t *lock;
1275 struct nfs_mds_commit_info *mds;
1276 struct pnfs_ds_commit_info *ds;
1277 struct nfs_direct_req *dreq; /* O_DIRECT request */
1278 const struct nfs_commit_completion_ops *completion_ops;
1279};
1280
1281struct nfs_commit_data {
1175 struct rpc_task task; 1282 struct rpc_task task;
1176 struct inode *inode; 1283 struct inode *inode;
1177 struct rpc_cred *cred; 1284 struct rpc_cred *cred;
@@ -1179,22 +1286,22 @@ struct nfs_write_data {
1179 struct nfs_writeverf verf; 1286 struct nfs_writeverf verf;
1180 struct list_head pages; /* Coalesced requests we wish to flush */ 1287 struct list_head pages; /* Coalesced requests we wish to flush */
1181 struct list_head list; /* lists of struct nfs_write_data */ 1288 struct list_head list; /* lists of struct nfs_write_data */
1182 struct nfs_page *req; /* multi ops per nfs_page */ 1289 struct nfs_direct_req *dreq; /* O_DIRECT request */
1183 struct page **pagevec; 1290 struct nfs_commitargs args; /* argument struct */
1184 unsigned int npages; /* Max length of pagevec */ 1291 struct nfs_commitres res; /* result struct */
1185 struct nfs_writeargs args; /* argument struct */ 1292 struct nfs_open_context *context;
1186 struct nfs_writeres res; /* result struct */
1187 struct pnfs_layout_segment *lseg; 1293 struct pnfs_layout_segment *lseg;
1188 struct nfs_client *ds_clp; /* pNFS data server */ 1294 struct nfs_client *ds_clp; /* pNFS data server */
1189 int ds_commit_index; 1295 int ds_commit_index;
1190 const struct rpc_call_ops *mds_ops; 1296 const struct rpc_call_ops *mds_ops;
1191 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); 1297 const struct nfs_commit_completion_ops *completion_ops;
1192#ifdef CONFIG_NFS_V4 1298 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
1193 unsigned long timestamp; /* For lease renewal */ 1299};
1194#endif 1300
1195 __u64 mds_offset; /* Filelayout dense stripe */ 1301struct nfs_pgio_completion_ops {
1196 int pnfs_error; 1302 void (*error_cleanup)(struct list_head *head);
1197 struct page *page_array[NFS_PAGEVEC_SIZE]; 1303 void (*init_hdr)(struct nfs_pgio_header *hdr);
1304 void (*completion)(struct nfs_pgio_header *hdr);
1198}; 1305};
1199 1306
1200struct nfs_unlinkdata { 1307struct nfs_unlinkdata {
@@ -1234,11 +1341,13 @@ struct nfs_rpc_ops {
1234 1341
1235 int (*getroot) (struct nfs_server *, struct nfs_fh *, 1342 int (*getroot) (struct nfs_server *, struct nfs_fh *,
1236 struct nfs_fsinfo *); 1343 struct nfs_fsinfo *);
1344 struct vfsmount *(*submount) (struct nfs_server *, struct dentry *,
1345 struct nfs_fh *, struct nfs_fattr *);
1237 int (*getattr) (struct nfs_server *, struct nfs_fh *, 1346 int (*getattr) (struct nfs_server *, struct nfs_fh *,
1238 struct nfs_fattr *); 1347 struct nfs_fattr *);
1239 int (*setattr) (struct dentry *, struct nfs_fattr *, 1348 int (*setattr) (struct dentry *, struct nfs_fattr *,
1240 struct iattr *); 1349 struct iattr *);
1241 int (*lookup) (struct rpc_clnt *clnt, struct inode *, struct qstr *, 1350 int (*lookup) (struct inode *, struct qstr *,
1242 struct nfs_fh *, struct nfs_fattr *); 1351 struct nfs_fh *, struct nfs_fattr *);
1243 int (*access) (struct inode *, struct nfs_access_entry *); 1352 int (*access) (struct inode *, struct nfs_access_entry *);
1244 int (*readlink)(struct inode *, struct page *, unsigned int, 1353 int (*readlink)(struct inode *, struct page *, unsigned int,
@@ -1277,8 +1386,9 @@ struct nfs_rpc_ops {
1277 void (*write_setup) (struct nfs_write_data *, struct rpc_message *); 1386 void (*write_setup) (struct nfs_write_data *, struct rpc_message *);
1278 void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); 1387 void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
1279 int (*write_done) (struct rpc_task *, struct nfs_write_data *); 1388 int (*write_done) (struct rpc_task *, struct nfs_write_data *);
1280 void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); 1389 void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
1281 int (*commit_done) (struct rpc_task *, struct nfs_write_data *); 1390 void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
1391 int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
1282 int (*lock)(struct file *, int, struct file_lock *); 1392 int (*lock)(struct file *, int, struct file_lock *);
1283 int (*lock_check_bounds)(const struct file_lock *); 1393 int (*lock_check_bounds)(const struct file_lock *);
1284 void (*clear_acl_cache)(struct inode *); 1394 void (*clear_acl_cache)(struct inode *);
@@ -1289,7 +1399,6 @@ struct nfs_rpc_ops {
1289 struct iattr *iattr); 1399 struct iattr *iattr);
1290 int (*init_client) (struct nfs_client *, const struct rpc_timeout *, 1400 int (*init_client) (struct nfs_client *, const struct rpc_timeout *,
1291 const char *, rpc_authflavor_t, int); 1401 const char *, rpc_authflavor_t, int);
1292 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
1293}; 1402};
1294 1403
1295/* 1404/*
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7fb49c53ed61..88945d0f7594 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1116,7 +1116,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1116 sb->s_op = &s_ops; 1116 sb->s_op = &s_ops;
1117 sb->s_time_gran = 1; 1117 sb->s_time_gran = 1;
1118 1118
1119 inode = rpc_get_inode(sb, S_IFDIR | 0755); 1119 inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
1120 sb->s_root = root = d_make_root(inode); 1120 sb->s_root = root = d_make_root(inode);
1121 if (!root) 1121 if (!root)
1122 return -ENOMEM; 1122 return -ENOMEM;