aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 13:43:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 13:43:51 -0400
commit53f2c4a8fd882009a2a75c5b72d6898c0808616e (patch)
tree922293a1056c0c2358203cdab832f0e0891e628a /fs
parent8f6576ad476b2a22d05ddafd2ddaee102577a4ed (diff)
parentcc0a98436820b161b595b8cc1d2329bcf7328107 (diff)
Merge tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "New features include: - Rewrite the O_DIRECT code so that it can share the same coalescing and pNFS functionality as the page cache code. - Allow the server to provide hints as to when we should use pNFS, and when it is more efficient to read and write through the metadata server. - NFS cache consistency updates: * Use the ctime to emulate a change attribute for NFSv2/v3 so that all NFS versions can share the same cache management code. * New cache management code will only look at the change attribute and size attribute when deciding whether or not our cached data is still valid or not. * Don't request NFSv4 post-op attributes on writes in cases such as O_DIRECT, where we don't care about data cache consistency, or when we have a write delegation, and know that our cache is still consistent. * Don't request NFSv4 post-op attributes on operations such as COMMIT, where there are no expected metadata updates. * Don't request NFSv4 directory post-op attributes in cases where the operations themselves already return change attribute updates: i.e. operations such as OPEN, CREATE, REMOVE, LINK and RENAME. - Speed up 'ls' and friends by using READDIR rather than READDIRPLUS if we detect no attempts to lookup filenames. - Improve the code sharing between NFSv2/v3 and v4 mounts - NFSv4.1 state management efficiency improvements - More patches in preparation for NFSv4/v4.1 migration functionality." Fix trivial conflict in fs/nfs/nfs4proc.c that was due to the dcache qstr name initialization changes (that made the length/hash a 64-bit union) * tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (146 commits) NFSv4: Add debugging printks to state manager NFSv4: Map NFS4ERR_SHARE_DENIED into an EACCES error instead of EIO NFSv4: update_changeattr does not need to set NFS_INO_REVAL_PAGECACHE NFSv4.1: nfs4_reset_session should use nfs4_handle_reclaim_lease_error NFSv4.1: Handle other occurrences of NFS4ERR_CONN_NOT_BOUND_TO_SESSION NFSv4.1: Handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION in the state manager NFSv4.1: Handle errors in nfs4_bind_conn_to_session NFSv4.1: nfs4_bind_conn_to_session should drain the session NFSv4.1: Don't clobber the seqid if exchange_id returns a confirmed clientid NFSv4.1: Add DESTROY_CLIENTID NFSv4.1: Ensure we use the correct credentials for bind_conn_to_session NFSv4.1: Ensure we use the correct credentials for session create/destroy NFSv4.1: Move NFSPROC4_CLNT_BIND_CONN_TO_SESSION to the end of the operations NFSv4.1: Handle NFS4ERR_SEQ_MISORDERED when confirming the lease NFSv4: When purging the lease, we must clear NFS4CLNT_LEASE_CONFIRM NFSv4: Clean up the error handling for nfs4_reclaim_lease NFSv4.1: Exchange ID must use GFP_NOFS allocation mode nfs41: Use BIND_CONN_TO_SESSION for CB_PATH_DOWN* nfs4.1: add BIND_CONN_TO_SESSION operation NFSv4.1 test the mdsthreshold hint parameters ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c90
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c2
-rw-r--r--fs/nfs/client.c268
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c39
-rw-r--r--fs/nfs/direct.c746
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/fscache.c15
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c85
-rw-r--r--fs/nfs/idmap.c30
-rw-r--r--fs/nfs/inode.c118
-rw-r--r--fs/nfs/internal.h135
-rw-r--r--fs/nfs/namespace.c103
-rw-r--r--fs/nfs/netns.h5
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3proc.c27
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs4_fs.h23
-rw-r--r--fs/nfs/nfs4filelayout.c688
-rw-r--r--fs/nfs/nfs4filelayout.h63
-rw-r--r--fs/nfs/nfs4filelayoutdev.c102
-rw-r--r--fs/nfs/nfs4namespace.c55
-rw-r--r--fs/nfs/nfs4proc.c537
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c225
-rw-r--r--fs/nfs/nfs4xdr.c399
-rw-r--r--fs/nfs/objlayout/objio_osd.c18
-rw-r--r--fs/nfs/objlayout/objlayout.c19
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.c352
-rw-r--r--fs/nfs/pnfs.h127
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c437
-rw-r--r--fs/nfs/super.c760
-rw-r--r--fs/nfs/write.c809
39 files changed, 3743 insertions, 2786 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a0e6c599147..f90f4f5cd421 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFS_V2
33 bool "NFS client support for NFS version 2"
34 depends on NFS_FS
35 default y
36 help
37 This option enables support for version 2 of the NFS protocol
38 (RFC 1094) in the kernel's NFS client.
39
40 If unsure, say Y.
41
32config NFS_V3 42config NFS_V3
33 bool "NFS client support for NFS version 3" 43 bool "NFS client support for NFS version 3"
34 depends on NFS_FS 44 depends on NFS_FS
45 default y
35 help 46 help
36 This option enables support for version 3 of the NFS protocol 47 This option enables support for version 3 of the NFS protocol
37 (RFC 1813) in the kernel's NFS client. 48 (RFC 1813) in the kernel's NFS client.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d0abb3..7ddd45d9f170 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o \ 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o 10 dns_resolve.o cache_lib.o
11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 13nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 14nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
14nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 15nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..7ae8a608956f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
187 struct parallel_io *par = bio->bi_private; 187 struct parallel_io *par = bio->bi_private;
188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
190 struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
191 190
192 do { 191 do {
193 struct page *page = bvec->bv_page; 192 struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
198 SetPageUptodate(page); 197 SetPageUptodate(page);
199 } while (bvec >= bio->bi_io_vec); 198 } while (bvec >= bio->bi_io_vec);
200 if (!uptodate) { 199 if (!uptodate) {
201 if (!rdata->pnfs_error) 200 struct nfs_read_data *rdata = par->data;
202 rdata->pnfs_error = -EIO; 201 struct nfs_pgio_header *header = rdata->header;
203 pnfs_set_lo_fail(rdata->lseg); 202
203 if (!header->pnfs_error)
204 header->pnfs_error = -EIO;
205 pnfs_set_lo_fail(header->lseg);
204 } 206 }
205 bio_put(bio); 207 bio_put(bio);
206 put_parallel(par); 208 put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
221{ 223{
222 struct nfs_read_data *rdata = data; 224 struct nfs_read_data *rdata = data;
223 225
224 rdata->task.tk_status = rdata->pnfs_error; 226 rdata->task.tk_status = rdata->header->pnfs_error;
225 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 227 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
226 schedule_work(&rdata->task.u.tk_work); 228 schedule_work(&rdata->task.u.tk_work);
227} 229}
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
229static enum pnfs_try_status 231static enum pnfs_try_status
230bl_read_pagelist(struct nfs_read_data *rdata) 232bl_read_pagelist(struct nfs_read_data *rdata)
231{ 233{
234 struct nfs_pgio_header *header = rdata->header;
232 int i, hole; 235 int i, hole;
233 struct bio *bio = NULL; 236 struct bio *bio = NULL;
234 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 237 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
239 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 242 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
240 243
241 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 244 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
242 rdata->npages, f_offset, (unsigned int)rdata->args.count); 245 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
243 246
244 par = alloc_parallel(rdata); 247 par = alloc_parallel(rdata);
245 if (!par) 248 if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
249 252
250 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 253 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251 /* Code assumes extents are page-aligned */ 254 /* Code assumes extents are page-aligned */
252 for (i = pg_index; i < rdata->npages; i++) { 255 for (i = pg_index; i < rdata->pages.npages; i++) {
253 if (!extent_length) { 256 if (!extent_length) {
254 /* We've used up the previous extent */ 257 /* We've used up the previous extent */
255 bl_put_extent(be); 258 bl_put_extent(be);
256 bl_put_extent(cow_read); 259 bl_put_extent(cow_read);
257 bio = bl_submit_bio(READ, bio); 260 bio = bl_submit_bio(READ, bio);
258 /* Get the next one */ 261 /* Get the next one */
259 be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), 262 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
260 isect, &cow_read); 263 isect, &cow_read);
261 if (!be) { 264 if (!be) {
262 rdata->pnfs_error = -EIO; 265 header->pnfs_error = -EIO;
263 goto out; 266 goto out;
264 } 267 }
265 extent_length = be->be_length - 268 extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
282 struct pnfs_block_extent *be_read; 285 struct pnfs_block_extent *be_read;
283 286
284 be_read = (hole && cow_read) ? cow_read : be; 287 be_read = (hole && cow_read) ? cow_read : be;
285 bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, 288 bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
289 READ,
286 isect, pages[i], be_read, 290 isect, pages[i], be_read,
287 bl_end_io_read, par); 291 bl_end_io_read, par);
288 if (IS_ERR(bio)) { 292 if (IS_ERR(bio)) {
289 rdata->pnfs_error = PTR_ERR(bio); 293 header->pnfs_error = PTR_ERR(bio);
290 bio = NULL; 294 bio = NULL;
291 goto out; 295 goto out;
292 } 296 }
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
294 isect += PAGE_CACHE_SECTORS; 298 isect += PAGE_CACHE_SECTORS;
295 extent_length -= PAGE_CACHE_SECTORS; 299 extent_length -= PAGE_CACHE_SECTORS;
296 } 300 }
297 if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { 301 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
298 rdata->res.eof = 1; 302 rdata->res.eof = 1;
299 rdata->res.count = rdata->inode->i_size - f_offset; 303 rdata->res.count = header->inode->i_size - f_offset;
300 } else { 304 } else {
301 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; 305 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
302 } 306 }
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
345 struct parallel_io *par = bio->bi_private; 349 struct parallel_io *par = bio->bi_private;
346 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 350 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
347 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 351 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
348 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
349 352
350 do { 353 do {
351 struct page *page = bvec->bv_page; 354 struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
358 } while (bvec >= bio->bi_io_vec); 361 } while (bvec >= bio->bi_io_vec);
359 362
360 if (unlikely(!uptodate)) { 363 if (unlikely(!uptodate)) {
361 if (!wdata->pnfs_error) 364 struct nfs_write_data *data = par->data;
362 wdata->pnfs_error = -EIO; 365 struct nfs_pgio_header *header = data->header;
363 pnfs_set_lo_fail(wdata->lseg); 366
367 if (!header->pnfs_error)
368 header->pnfs_error = -EIO;
369 pnfs_set_lo_fail(header->lseg);
364 } 370 }
365 bio_put(bio); 371 bio_put(bio);
366 put_parallel(par); 372 put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
370{ 376{
371 struct parallel_io *par = bio->bi_private; 377 struct parallel_io *par = bio->bi_private;
372 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 378 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
373 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; 379 struct nfs_write_data *data = par->data;
380 struct nfs_pgio_header *header = data->header;
374 381
375 if (!uptodate) { 382 if (!uptodate) {
376 if (!wdata->pnfs_error) 383 if (!header->pnfs_error)
377 wdata->pnfs_error = -EIO; 384 header->pnfs_error = -EIO;
378 pnfs_set_lo_fail(wdata->lseg); 385 pnfs_set_lo_fail(header->lseg);
379 } 386 }
380 bio_put(bio); 387 bio_put(bio);
381 put_parallel(par); 388 put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
391 dprintk("%s enter\n", __func__); 398 dprintk("%s enter\n", __func__);
392 task = container_of(work, struct rpc_task, u.tk_work); 399 task = container_of(work, struct rpc_task, u.tk_work);
393 wdata = container_of(task, struct nfs_write_data, task); 400 wdata = container_of(task, struct nfs_write_data, task);
394 if (likely(!wdata->pnfs_error)) { 401 if (likely(!wdata->header->pnfs_error)) {
395 /* Marks for LAYOUTCOMMIT */ 402 /* Marks for LAYOUTCOMMIT */
396 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 403 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
397 wdata->args.offset, wdata->args.count); 404 wdata->args.offset, wdata->args.count);
398 } 405 }
399 pnfs_ld_write_done(wdata); 406 pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
404{ 411{
405 struct nfs_write_data *wdata = data; 412 struct nfs_write_data *wdata = data;
406 413
407 if (unlikely(wdata->pnfs_error)) { 414 if (unlikely(wdata->header->pnfs_error)) {
408 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, 415 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
409 num_se); 416 num_se);
410 } 417 }
411 418
412 wdata->task.tk_status = wdata->pnfs_error; 419 wdata->task.tk_status = wdata->header->pnfs_error;
413 wdata->verf.committed = NFS_FILE_SYNC; 420 wdata->verf.committed = NFS_FILE_SYNC;
414 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 421 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
415 schedule_work(&wdata->task.u.tk_work); 422 schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
540static enum pnfs_try_status 547static enum pnfs_try_status
541bl_write_pagelist(struct nfs_write_data *wdata, int sync) 548bl_write_pagelist(struct nfs_write_data *wdata, int sync)
542{ 549{
550 struct nfs_pgio_header *header = wdata->header;
543 int i, ret, npg_zero, pg_index, last = 0; 551 int i, ret, npg_zero, pg_index, last = 0;
544 struct bio *bio = NULL; 552 struct bio *bio = NULL;
545 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 553 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
552 pgoff_t index; 560 pgoff_t index;
553 u64 temp; 561 u64 temp;
554 int npg_per_block = 562 int npg_per_block =
555 NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 563 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
556 564
557 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 565 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
558 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 566 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
566 /* At this point, have to be more careful with error handling */ 574 /* At this point, have to be more careful with error handling */
567 575
568 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 576 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
569 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 577 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
570 if (!be || !is_writable(be, isect)) { 578 if (!be || !is_writable(be, isect)) {
571 dprintk("%s no matching extents!\n", __func__); 579 dprintk("%s no matching extents!\n", __func__);
572 goto out_mds; 580 goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
597 dprintk("%s zero %dth page: index %lu isect %llu\n", 605 dprintk("%s zero %dth page: index %lu isect %llu\n",
598 __func__, npg_zero, index, 606 __func__, npg_zero, index,
599 (unsigned long long)isect); 607 (unsigned long long)isect);
600 page = bl_find_get_zeroing_page(wdata->inode, index, 608 page = bl_find_get_zeroing_page(header->inode, index,
601 cow_read); 609 cow_read);
602 if (unlikely(IS_ERR(page))) { 610 if (unlikely(IS_ERR(page))) {
603 wdata->pnfs_error = PTR_ERR(page); 611 header->pnfs_error = PTR_ERR(page);
604 goto out; 612 goto out;
605 } else if (page == NULL) 613 } else if (page == NULL)
606 goto next_page; 614 goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
612 __func__, ret); 620 __func__, ret);
613 end_page_writeback(page); 621 end_page_writeback(page);
614 page_cache_release(page); 622 page_cache_release(page);
615 wdata->pnfs_error = ret; 623 header->pnfs_error = ret;
616 goto out; 624 goto out;
617 } 625 }
618 if (likely(!bl_push_one_short_extent(be->be_inval))) 626 if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
620 else { 628 else {
621 end_page_writeback(page); 629 end_page_writeback(page);
622 page_cache_release(page); 630 page_cache_release(page);
623 wdata->pnfs_error = -ENOMEM; 631 header->pnfs_error = -ENOMEM;
624 goto out; 632 goto out;
625 } 633 }
626 /* FIXME: This should be done in bi_end_io */ 634 /* FIXME: This should be done in bi_end_io */
627 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 635 mark_extents_written(BLK_LSEG2EXT(header->lseg),
628 page->index << PAGE_CACHE_SHIFT, 636 page->index << PAGE_CACHE_SHIFT,
629 PAGE_CACHE_SIZE); 637 PAGE_CACHE_SIZE);
630 638
@@ -632,7 +640,7 @@ fill_invalid_ext:
632 isect, page, be, 640 isect, page, be,
633 bl_end_io_write_zero, par); 641 bl_end_io_write_zero, par);
634 if (IS_ERR(bio)) { 642 if (IS_ERR(bio)) {
635 wdata->pnfs_error = PTR_ERR(bio); 643 header->pnfs_error = PTR_ERR(bio);
636 bio = NULL; 644 bio = NULL;
637 goto out; 645 goto out;
638 } 646 }
@@ -647,16 +655,16 @@ next_page:
647 655
648 /* Middle pages */ 656 /* Middle pages */
649 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 657 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
650 for (i = pg_index; i < wdata->npages; i++) { 658 for (i = pg_index; i < wdata->pages.npages; i++) {
651 if (!extent_length) { 659 if (!extent_length) {
652 /* We've used up the previous extent */ 660 /* We've used up the previous extent */
653 bl_put_extent(be); 661 bl_put_extent(be);
654 bio = bl_submit_bio(WRITE, bio); 662 bio = bl_submit_bio(WRITE, bio);
655 /* Get the next one */ 663 /* Get the next one */
656 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), 664 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
657 isect, NULL); 665 isect, NULL);
658 if (!be || !is_writable(be, isect)) { 666 if (!be || !is_writable(be, isect)) {
659 wdata->pnfs_error = -EINVAL; 667 header->pnfs_error = -EINVAL;
660 goto out; 668 goto out;
661 } 669 }
662 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 670 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
664 be->be_inval))) 672 be->be_inval)))
665 par->bse_count++; 673 par->bse_count++;
666 else { 674 else {
667 wdata->pnfs_error = -ENOMEM; 675 header->pnfs_error = -ENOMEM;
668 goto out; 676 goto out;
669 } 677 }
670 } 678 }
@@ -677,15 +685,15 @@ next_page:
677 if (unlikely(ret)) { 685 if (unlikely(ret)) {
678 dprintk("%s bl_mark_sectors_init fail %d\n", 686 dprintk("%s bl_mark_sectors_init fail %d\n",
679 __func__, ret); 687 __func__, ret);
680 wdata->pnfs_error = ret; 688 header->pnfs_error = ret;
681 goto out; 689 goto out;
682 } 690 }
683 } 691 }
684 bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, 692 bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
685 isect, pages[i], be, 693 isect, pages[i], be,
686 bl_end_io_write, par); 694 bl_end_io_write, par);
687 if (IS_ERR(bio)) { 695 if (IS_ERR(bio)) {
688 wdata->pnfs_error = PTR_ERR(bio); 696 header->pnfs_error = PTR_ERR(bio);
689 bio = NULL; 697 bio = NULL;
690 goto out; 698 goto out;
691 } 699 }
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a5c88a554d92..c96554245ccf 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
123 uint8_t *dataptr; 123 uint8_t *dataptr;
124 DECLARE_WAITQUEUE(wq, current); 124 DECLARE_WAITQUEUE(wq, current);
125 int offset, len, i, rc; 125 int offset, len, i, rc;
126 struct net *net = server->nfs_client->net; 126 struct net *net = server->nfs_client->cl_net;
127 struct nfs_net *nn = net_generic(net, nfs_net_id); 127 struct nfs_net *nn = net_generic(net, nfs_net_id);
128 struct bl_dev_msg *reply = &nn->bl_mount_reply; 128 struct bl_dev_msg *reply = &nn->bl_mount_reply;
129 129
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..7d108753af81 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) 65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
66{ 66{
67 int ret = 0; 67 int ret = 0;
68 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 68 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
69 69
70 if (clp->rpc_ops->version != 4 || minorversion != 0) 70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret; 71 return ret;
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
90 * RPC cruft for NFS 90 * RPC cruft for NFS
91 */ 91 */
92static const struct rpc_version *nfs_version[5] = { 92static const struct rpc_version *nfs_version[5] = {
93#ifdef CONFIG_NFS_V2
93 [2] = &nfs_version2, 94 [2] = &nfs_version2,
95#endif
94#ifdef CONFIG_NFS_V3 96#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 97 [3] = &nfs_version3,
96#endif 98#endif
@@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {
129#endif /* CONFIG_NFS_V3_ACL */ 131#endif /* CONFIG_NFS_V3_ACL */
130 132
131struct nfs_client_initdata { 133struct nfs_client_initdata {
134 unsigned long init_flags;
132 const char *hostname; 135 const char *hostname;
133 const struct sockaddr *addr; 136 const struct sockaddr *addr;
134 size_t addrlen; 137 size_t addrlen;
@@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
172 clp->cl_rpcclient = ERR_PTR(-EINVAL); 175 clp->cl_rpcclient = ERR_PTR(-EINVAL);
173 176
174 clp->cl_proto = cl_init->proto; 177 clp->cl_proto = cl_init->proto;
175 clp->net = get_net(cl_init->net); 178 clp->cl_net = get_net(cl_init->net);
176 179
177#ifdef CONFIG_NFS_V4 180#ifdef CONFIG_NFS_V4
178 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); 181 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
182 spin_lock_init(&clp->cl_lock); 185 spin_lock_init(&clp->cl_lock);
183 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 186 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
184 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 187 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
185 clp->cl_boot_time = CURRENT_TIME;
186 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 188 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
187 clp->cl_minorversion = cl_init->minorversion; 189 clp->cl_minorversion = cl_init->minorversion;
188 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; 190 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
207 if (nfs4_has_session(clp)) { 209 if (nfs4_has_session(clp)) {
208 nfs4_deviceid_purge_client(clp); 210 nfs4_deviceid_purge_client(clp);
209 nfs4_destroy_session(clp->cl_session); 211 nfs4_destroy_session(clp->cl_session);
212 nfs4_destroy_clientid(clp);
210 } 213 }
211 214
212} 215}
@@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
235 nfs_idmap_delete(clp); 238 nfs_idmap_delete(clp);
236 239
237 rpc_destroy_wait_queue(&clp->cl_rpcwaitq); 240 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
241 kfree(clp->cl_serverowner);
242 kfree(clp->cl_serverscope);
243 kfree(clp->cl_implid);
238} 244}
239 245
240/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ 246/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
248/* nfs_client_lock held */ 254/* nfs_client_lock held */
249static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 255static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
250{ 256{
251 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 257 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
252 258
253 if (clp->cl_cb_ident) 259 if (clp->cl_cb_ident)
254 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); 260 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)
301 if (clp->cl_machine_cred != NULL) 307 if (clp->cl_machine_cred != NULL)
302 put_rpccred(clp->cl_machine_cred); 308 put_rpccred(clp->cl_machine_cred);
303 309
304 put_net(clp->net); 310 put_net(clp->cl_net);
305 kfree(clp->cl_hostname); 311 kfree(clp->cl_hostname);
306 kfree(clp->server_scope);
307 kfree(clp->impl_id);
308 kfree(clp); 312 kfree(clp);
309 313
310 dprintk("<-- nfs_free_client()\n"); 314 dprintk("<-- nfs_free_client()\n");
@@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)
321 return; 325 return;
322 326
323 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); 327 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
324 nn = net_generic(clp->net, nfs_net_id); 328 nn = net_generic(clp->cl_net, nfs_net_id);
325 329
326 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { 330 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
327 list_del(&clp->cl_share_link); 331 list_del(&clp->cl_share_link);
@@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
456 clp->cl_cons_state == NFS_CS_SESSION_INITING)) 460 clp->cl_cons_state == NFS_CS_SESSION_INITING))
457 return false; 461 return false;
458 462
463 smp_rmb();
464
459 /* Match the version and minorversion */ 465 /* Match the version and minorversion */
460 if (clp->rpc_ops->version != 4 || 466 if (clp->rpc_ops->version != 4 ||
461 clp->cl_minorversion != minorversion) 467 clp->cl_minorversion != minorversion)
@@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
504 return NULL; 510 return NULL;
505} 511}
506 512
513static bool nfs_client_init_is_complete(const struct nfs_client *clp)
514{
515 return clp->cl_cons_state != NFS_CS_INITING;
516}
517
518int nfs_wait_client_init_complete(const struct nfs_client *clp)
519{
520 return wait_event_killable(nfs_client_active_wq,
521 nfs_client_init_is_complete(clp));
522}
523
524/*
525 * Found an existing client. Make sure it's ready before returning.
526 */
527static struct nfs_client *
528nfs_found_client(const struct nfs_client_initdata *cl_init,
529 struct nfs_client *clp)
530{
531 int error;
532
533 error = nfs_wait_client_init_complete(clp);
534 if (error < 0) {
535 nfs_put_client(clp);
536 return ERR_PTR(-ERESTARTSYS);
537 }
538
539 if (clp->cl_cons_state < NFS_CS_READY) {
540 error = clp->cl_cons_state;
541 nfs_put_client(clp);
542 return ERR_PTR(error);
543 }
544
545 smp_rmb();
546
547 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
548
549 dprintk("<-- %s found nfs_client %p for %s\n",
550 __func__, clp, cl_init->hostname ?: "");
551 return clp;
552}
553
507/* 554/*
508 * Look up a client by IP address and protocol version 555 * Look up a client by IP address and protocol version
509 * - creates a new record if one doesn't yet exist 556 * - creates a new record if one doesn't yet exist
@@ -512,11 +559,9 @@ static struct nfs_client *
512nfs_get_client(const struct nfs_client_initdata *cl_init, 559nfs_get_client(const struct nfs_client_initdata *cl_init,
513 const struct rpc_timeout *timeparms, 560 const struct rpc_timeout *timeparms,
514 const char *ip_addr, 561 const char *ip_addr,
515 rpc_authflavor_t authflavour, 562 rpc_authflavor_t authflavour)
516 int noresvport)
517{ 563{
518 struct nfs_client *clp, *new = NULL; 564 struct nfs_client *clp, *new = NULL;
519 int error;
520 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 565 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
521 566
522 dprintk("--> nfs_get_client(%s,v%u)\n", 567 dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
527 spin_lock(&nn->nfs_client_lock); 572 spin_lock(&nn->nfs_client_lock);
528 573
529 clp = nfs_match_client(cl_init); 574 clp = nfs_match_client(cl_init);
530 if (clp) 575 if (clp) {
531 goto found_client; 576 spin_unlock(&nn->nfs_client_lock);
532 if (new) 577 if (new)
533 goto install_client; 578 nfs_free_client(new);
579 return nfs_found_client(cl_init, clp);
580 }
581 if (new) {
582 list_add(&new->cl_share_link, &nn->nfs_client_list);
583 spin_unlock(&nn->nfs_client_lock);
584 new->cl_flags = cl_init->init_flags;
585 return cl_init->rpc_ops->init_client(new,
586 timeparms, ip_addr,
587 authflavour);
588 }
534 589
535 spin_unlock(&nn->nfs_client_lock); 590 spin_unlock(&nn->nfs_client_lock);
536 591
537 new = nfs_alloc_client(cl_init); 592 new = nfs_alloc_client(cl_init);
538 } while (!IS_ERR(new)); 593 } while (!IS_ERR(new));
539 594
540 dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); 595 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
596 cl_init->hostname ?: "", PTR_ERR(new));
541 return new; 597 return new;
542
543 /* install a new client and return with it unready */
544install_client:
545 clp = new;
546 list_add(&clp->cl_share_link, &nn->nfs_client_list);
547 spin_unlock(&nn->nfs_client_lock);
548
549 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
550 authflavour, noresvport);
551 if (error < 0) {
552 nfs_put_client(clp);
553 return ERR_PTR(error);
554 }
555 dprintk("--> nfs_get_client() = %p [new]\n", clp);
556 return clp;
557
558 /* found an existing client
559 * - make sure it's ready before returning
560 */
561found_client:
562 spin_unlock(&nn->nfs_client_lock);
563
564 if (new)
565 nfs_free_client(new);
566
567 error = wait_event_killable(nfs_client_active_wq,
568 clp->cl_cons_state < NFS_CS_INITING);
569 if (error < 0) {
570 nfs_put_client(clp);
571 return ERR_PTR(-ERESTARTSYS);
572 }
573
574 if (clp->cl_cons_state < NFS_CS_READY) {
575 error = clp->cl_cons_state;
576 nfs_put_client(clp);
577 return ERR_PTR(error);
578 }
579
580 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
581
582 dprintk("--> nfs_get_client() = %p [share]\n", clp);
583 return clp;
584} 598}
585 599
586/* 600/*
@@ -588,27 +602,12 @@ found_client:
588 */ 602 */
589void nfs_mark_client_ready(struct nfs_client *clp, int state) 603void nfs_mark_client_ready(struct nfs_client *clp, int state)
590{ 604{
605 smp_wmb();
591 clp->cl_cons_state = state; 606 clp->cl_cons_state = state;
592 wake_up_all(&nfs_client_active_wq); 607 wake_up_all(&nfs_client_active_wq);
593} 608}
594 609
595/* 610/*
596 * With sessions, the client is not marked ready until after a
597 * successful EXCHANGE_ID and CREATE_SESSION.
598 *
599 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
600 * other versions of NFS can be tried.
601 */
602int nfs4_check_client_ready(struct nfs_client *clp)
603{
604 if (!nfs4_has_session(clp))
605 return 0;
606 if (clp->cl_cons_state < NFS_CS_READY)
607 return -EPROTONOSUPPORT;
608 return 0;
609}
610
611/*
612 * Initialise the timeout values for a connection 611 * Initialise the timeout values for a connection
613 */ 612 */
614static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, 613static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
654 */ 653 */
655static int nfs_create_rpc_client(struct nfs_client *clp, 654static int nfs_create_rpc_client(struct nfs_client *clp,
656 const struct rpc_timeout *timeparms, 655 const struct rpc_timeout *timeparms,
657 rpc_authflavor_t flavor, 656 rpc_authflavor_t flavor)
658 int discrtry, int noresvport)
659{ 657{
660 struct rpc_clnt *clnt = NULL; 658 struct rpc_clnt *clnt = NULL;
661 struct rpc_create_args args = { 659 struct rpc_create_args args = {
662 .net = clp->net, 660 .net = clp->cl_net,
663 .protocol = clp->cl_proto, 661 .protocol = clp->cl_proto,
664 .address = (struct sockaddr *)&clp->cl_addr, 662 .address = (struct sockaddr *)&clp->cl_addr,
665 .addrsize = clp->cl_addrlen, 663 .addrsize = clp->cl_addrlen,
@@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
670 .authflavor = flavor, 668 .authflavor = flavor,
671 }; 669 };
672 670
673 if (discrtry) 671 if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
674 args.flags |= RPC_CLNT_CREATE_DISCRTRY; 672 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
675 if (noresvport) 673 if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
676 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 674 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
677 675
678 if (!IS_ERR(clp->cl_rpcclient)) 676 if (!IS_ERR(clp->cl_rpcclient))
@@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)
713 .nfs_version = clp->rpc_ops->version, 711 .nfs_version = clp->rpc_ops->version,
714 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 712 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
715 1 : 0, 713 1 : 0,
716 .net = clp->net, 714 .net = clp->cl_net,
717 }; 715 };
718 716
719 if (nlm_init.nfs_version > 3) 717 if (nlm_init.nfs_version > 3)
@@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
805 return 0; 803 return 0;
806} 804}
807 805
808/* 806/**
809 * Initialise an NFS2 or NFS3 client 807 * nfs_init_client - Initialise an NFS2 or NFS3 client
808 *
809 * @clp: nfs_client to initialise
810 * @timeparms: timeout parameters for underlying RPC transport
811 * @ip_addr: IP presentation address (not used)
812 * @authflavor: authentication flavor for underlying RPC transport
813 *
814 * Returns pointer to an NFS client, or an ERR_PTR value.
810 */ 815 */
811int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, 816struct nfs_client *nfs_init_client(struct nfs_client *clp,
812 const char *ip_addr, rpc_authflavor_t authflavour, 817 const struct rpc_timeout *timeparms,
813 int noresvport) 818 const char *ip_addr, rpc_authflavor_t authflavour)
814{ 819{
815 int error; 820 int error;
816 821
817 if (clp->cl_cons_state == NFS_CS_READY) { 822 if (clp->cl_cons_state == NFS_CS_READY) {
818 /* the client is already initialised */ 823 /* the client is already initialised */
819 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); 824 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
820 return 0; 825 return clp;
821 } 826 }
822 827
823 /* 828 /*
824 * Create a client RPC handle for doing FSSTAT with UNIX auth only 829 * Create a client RPC handle for doing FSSTAT with UNIX auth only
825 * - RFC 2623, sec 2.3.2 830 * - RFC 2623, sec 2.3.2
826 */ 831 */
827 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 832 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
828 0, noresvport);
829 if (error < 0) 833 if (error < 0)
830 goto error; 834 goto error;
831 nfs_mark_client_ready(clp, NFS_CS_READY); 835 nfs_mark_client_ready(clp, NFS_CS_READY);
832 return 0; 836 return clp;
833 837
834error: 838error:
835 nfs_mark_client_ready(clp, error); 839 nfs_mark_client_ready(clp, error);
840 nfs_put_client(clp);
836 dprintk("<-- nfs_init_client() = xerror %d\n", error); 841 dprintk("<-- nfs_init_client() = xerror %d\n", error);
837 return error; 842 return ERR_PTR(error);
838} 843}
839 844
840/* 845/*
@@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,
847 .hostname = data->nfs_server.hostname, 852 .hostname = data->nfs_server.hostname,
848 .addr = (const struct sockaddr *)&data->nfs_server.address, 853 .addr = (const struct sockaddr *)&data->nfs_server.address,
849 .addrlen = data->nfs_server.addrlen, 854 .addrlen = data->nfs_server.addrlen,
850 .rpc_ops = &nfs_v2_clientops, 855 .rpc_ops = NULL,
851 .proto = data->nfs_server.protocol, 856 .proto = data->nfs_server.protocol,
852 .net = data->net, 857 .net = data->net,
853 }; 858 };
@@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,
857 862
858 dprintk("--> nfs_init_server()\n"); 863 dprintk("--> nfs_init_server()\n");
859 864
865 switch (data->version) {
866#ifdef CONFIG_NFS_V2
867 case 2:
868 cl_init.rpc_ops = &nfs_v2_clientops;
869 break;
870#endif
860#ifdef CONFIG_NFS_V3 871#ifdef CONFIG_NFS_V3
861 if (data->version == 3) 872 case 3:
862 cl_init.rpc_ops = &nfs_v3_clientops; 873 cl_init.rpc_ops = &nfs_v3_clientops;
874 break;
863#endif 875#endif
876 default:
877 return -EPROTONOSUPPORT;
878 }
864 879
865 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 880 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
866 data->timeo, data->retrans); 881 data->timeo, data->retrans);
882 if (data->flags & NFS_MOUNT_NORESVPORT)
883 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
867 884
868 /* Allocate or find a client reference we can use */ 885 /* Allocate or find a client reference we can use */
869 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, 886 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
870 data->flags & NFS_MOUNT_NORESVPORT);
871 if (IS_ERR(clp)) { 887 if (IS_ERR(clp)) {
872 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 888 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
873 return PTR_ERR(clp); 889 return PTR_ERR(clp);
@@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,
880 server->options = data->options; 896 server->options = data->options;
881 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| 897 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
882 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| 898 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
883 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; 899 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
884 900
885 if (data->rsize) 901 if (data->rsize)
886 server->rsize = nfs_block_size(data->rsize, NULL); 902 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
1048static void nfs_server_insert_lists(struct nfs_server *server) 1064static void nfs_server_insert_lists(struct nfs_server *server)
1049{ 1065{
1050 struct nfs_client *clp = server->nfs_client; 1066 struct nfs_client *clp = server->nfs_client;
1051 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 1067 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1052 1068
1053 spin_lock(&nn->nfs_client_lock); 1069 spin_lock(&nn->nfs_client_lock);
1054 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1070 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
1065 1081
1066 if (clp == NULL) 1082 if (clp == NULL)
1067 return; 1083 return;
1068 nn = net_generic(clp->net, nfs_net_id); 1084 nn = net_generic(clp->cl_net, nfs_net_id);
1069 spin_lock(&nn->nfs_client_lock); 1085 spin_lock(&nn->nfs_client_lock);
1070 list_del_rcu(&server->client_link); 1086 list_del_rcu(&server->client_link);
1071 if (list_empty(&clp->cl_superblocks)) 1087 if (list_empty(&clp->cl_superblocks))
@@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1333 * so that the client back channel can find the 1349 * so that the client back channel can find the
1334 * nfs_client struct 1350 * nfs_client struct
1335 */ 1351 */
1336 clp->cl_cons_state = NFS_CS_SESSION_INITING; 1352 nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
1337 } 1353 }
1338#endif /* CONFIG_NFS_V4_1 */ 1354#endif /* CONFIG_NFS_V4_1 */
1339 1355
1340 return nfs4_init_callback(clp); 1356 return nfs4_init_callback(clp);
1341} 1357}
1342 1358
1343/* 1359/**
1344 * Initialise an NFS4 client record 1360 * nfs4_init_client - Initialise an NFS4 client record
1361 *
1362 * @clp: nfs_client to initialise
1363 * @timeparms: timeout parameters for underlying RPC transport
1364 * @ip_addr: callback IP address in presentation format
1365 * @authflavor: authentication flavor for underlying RPC transport
1366 *
1367 * Returns pointer to an NFS client, or an ERR_PTR value.
1345 */ 1368 */
1346int nfs4_init_client(struct nfs_client *clp, 1369struct nfs_client *nfs4_init_client(struct nfs_client *clp,
1347 const struct rpc_timeout *timeparms, 1370 const struct rpc_timeout *timeparms,
1348 const char *ip_addr, 1371 const char *ip_addr,
1349 rpc_authflavor_t authflavour, 1372 rpc_authflavor_t authflavour)
1350 int noresvport)
1351{ 1373{
1352 char buf[INET6_ADDRSTRLEN + 1]; 1374 char buf[INET6_ADDRSTRLEN + 1];
1353 int error; 1375 int error;
@@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,
1355 if (clp->cl_cons_state == NFS_CS_READY) { 1377 if (clp->cl_cons_state == NFS_CS_READY) {
1356 /* the client is initialised already */ 1378 /* the client is initialised already */
1357 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); 1379 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
1358 return 0; 1380 return clp;
1359 } 1381 }
1360 1382
1361 /* Check NFS protocol revision and initialize RPC op vector */ 1383 /* Check NFS protocol revision and initialize RPC op vector */
1362 clp->rpc_ops = &nfs_v4_clientops; 1384 clp->rpc_ops = &nfs_v4_clientops;
1363 1385
1364 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1386 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
1365 1, noresvport); 1387 error = nfs_create_rpc_client(clp, timeparms, authflavour);
1366 if (error < 0) 1388 if (error < 0)
1367 goto error; 1389 goto error;
1368 1390
@@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,
1395 1417
1396 if (!nfs4_has_session(clp)) 1418 if (!nfs4_has_session(clp))
1397 nfs_mark_client_ready(clp, NFS_CS_READY); 1419 nfs_mark_client_ready(clp, NFS_CS_READY);
1398 return 0; 1420 return clp;
1399 1421
1400error: 1422error:
1401 nfs_mark_client_ready(clp, error); 1423 nfs_mark_client_ready(clp, error);
1424 nfs_put_client(clp);
1402 dprintk("<-- nfs4_init_client() = xerror %d\n", error); 1425 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
1403 return error; 1426 return ERR_PTR(error);
1404} 1427}
1405 1428
1406/* 1429/*
@@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,
1429 1452
1430 dprintk("--> nfs4_set_client()\n"); 1453 dprintk("--> nfs4_set_client()\n");
1431 1454
1455 if (server->flags & NFS_MOUNT_NORESVPORT)
1456 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
1457
1432 /* Allocate or find a client reference we can use */ 1458 /* Allocate or find a client reference we can use */
1433 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, 1459 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
1434 server->flags & NFS_MOUNT_NORESVPORT);
1435 if (IS_ERR(clp)) { 1460 if (IS_ERR(clp)) {
1436 error = PTR_ERR(clp); 1461 error = PTR_ERR(clp);
1437 goto error; 1462 goto error;
@@ -1465,8 +1490,8 @@ error:
1465 * the MDS. 1490 * the MDS.
1466 */ 1491 */
1467struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 1492struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1468 const struct sockaddr *ds_addr, 1493 const struct sockaddr *ds_addr, int ds_addrlen,
1469 int ds_addrlen, int ds_proto) 1494 int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
1470{ 1495{
1471 struct nfs_client_initdata cl_init = { 1496 struct nfs_client_initdata cl_init = {
1472 .addr = ds_addr, 1497 .addr = ds_addr,
@@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1474 .rpc_ops = &nfs_v4_clientops, 1499 .rpc_ops = &nfs_v4_clientops,
1475 .proto = ds_proto, 1500 .proto = ds_proto,
1476 .minorversion = mds_clp->cl_minorversion, 1501 .minorversion = mds_clp->cl_minorversion,
1477 .net = mds_clp->net, 1502 .net = mds_clp->cl_net,
1478 };
1479 struct rpc_timeout ds_timeout = {
1480 .to_initval = 15 * HZ,
1481 .to_maxval = 15 * HZ,
1482 .to_retries = 1,
1483 .to_exponential = 1,
1484 }; 1503 };
1504 struct rpc_timeout ds_timeout;
1485 struct nfs_client *clp; 1505 struct nfs_client *clp;
1486 1506
1487 /* 1507 /*
@@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1489 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS 1509 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1490 * (section 13.1 RFC 5661). 1510 * (section 13.1 RFC 5661).
1491 */ 1511 */
1512 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
1492 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 1513 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1493 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); 1514 mds_clp->cl_rpcclient->cl_auth->au_flavor);
1494 1515
1495 dprintk("<-- %s %p\n", __func__, clp); 1516 dprintk("<-- %s %p\n", __func__, clp);
1496 return clp; 1517 return clp;
@@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1701 rpc_protocol(parent_server->client), 1722 rpc_protocol(parent_server->client),
1702 parent_server->client->cl_timeout, 1723 parent_server->client->cl_timeout,
1703 parent_client->cl_mvops->minor_version, 1724 parent_client->cl_mvops->minor_version,
1704 parent_client->net); 1725 parent_client->cl_net);
1705 if (error < 0) 1726 if (error < 0)
1706 goto error; 1727 goto error;
1707 1728
@@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)
1805 idr_init(&nn->cb_ident_idr); 1826 idr_init(&nn->cb_ident_idr);
1806#endif 1827#endif
1807 spin_lock_init(&nn->nfs_client_lock); 1828 spin_lock_init(&nn->nfs_client_lock);
1829 nn->boot_time = CURRENT_TIME;
1808} 1830}
1809 1831
1810#ifdef CONFIG_PROC_FS 1832#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 89af1d269274..bd3a9601d32d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -316,6 +316,10 @@ out:
316 * nfs_client_return_marked_delegations - return previously marked delegations 316 * nfs_client_return_marked_delegations - return previously marked delegations
317 * @clp: nfs_client to process 317 * @clp: nfs_client to process
318 * 318 *
319 * Note that this function is designed to be called by the state
320 * manager thread. For this reason, it cannot flush the dirty data,
321 * since that could deadlock in case of a state recovery error.
322 *
319 * Returns zero on success, or a negative errno value. 323 * Returns zero on success, or a negative errno value.
320 */ 324 */
321int nfs_client_return_marked_delegations(struct nfs_client *clp) 325int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
340 server); 344 server);
341 rcu_read_unlock(); 345 rcu_read_unlock();
342 346
343 if (delegation != NULL) { 347 if (delegation != NULL)
344 filemap_flush(inode->i_mapping);
345 err = __nfs_inode_return_delegation(inode, 348 err = __nfs_inode_return_delegation(inode,
346 delegation, 0); 349 delegation, 0);
347 }
348 iput(inode); 350 iput(inode);
349 if (!err) 351 if (!err)
350 goto restart; 352 goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
380 * nfs_inode_return_delegation - synchronously return a delegation 382 * nfs_inode_return_delegation - synchronously return a delegation
381 * @inode: inode to process 383 * @inode: inode to process
382 * 384 *
385 * This routine will always flush any dirty data to disk on the
386 * assumption that if we need to return the delegation, then
387 * we should stop caching.
388 *
383 * Returns zero on success, or a negative errno value. 389 * Returns zero on success, or a negative errno value.
384 */ 390 */
385int nfs_inode_return_delegation(struct inode *inode) 391int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
389 struct nfs_delegation *delegation; 395 struct nfs_delegation *delegation;
390 int err = 0; 396 int err = 0;
391 397
398 nfs_wb_all(inode);
392 if (rcu_access_pointer(nfsi->delegation) != NULL) { 399 if (rcu_access_pointer(nfsi->delegation) != NULL) {
393 delegation = nfs_detach_delegation(nfsi, server); 400 delegation = nfs_detach_delegation(nfsi, server);
394 if (delegation != NULL) { 401 if (delegation != NULL) {
395 nfs_wb_all(inode);
396 err = __nfs_inode_return_delegation(inode, delegation, 1); 402 err = __nfs_inode_return_delegation(inode, delegation, 1);
397 } 403 }
398 } 404 }
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
538 struct nfs_client *clp = server->nfs_client; 544 struct nfs_client *clp = server->nfs_client;
539 struct nfs_delegation *delegation; 545 struct nfs_delegation *delegation;
540 546
547 filemap_flush(inode->i_mapping);
548
541 rcu_read_lock(); 549 rcu_read_lock();
542 delegation = rcu_dereference(NFS_I(inode)->delegation); 550 delegation = rcu_dereference(NFS_I(inode)->delegation);
543 551
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index cd6a7a8dadae..72709c4193fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
66 66
67static inline int nfs_inode_return_delegation(struct inode *inode) 67static inline int nfs_inode_return_delegation(struct inode *inode)
68{ 68{
69 nfs_wb_all(inode);
69 return 0; 70 return 0;
70} 71}
71#endif 72#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eedd24d0ad2e..0989a2099688 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -475,6 +475,29 @@ different:
475} 475}
476 476
477static 477static
478bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
479{
480 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
481 return false;
482 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
483 return true;
484 if (filp->f_pos == 0)
485 return true;
486 return false;
487}
488
489/*
490 * This function is called by the lookup code to request the use of
491 * readdirplus to accelerate any future lookups in the same
492 * directory.
493 */
494static
495void nfs_advise_use_readdirplus(struct inode *dir)
496{
497 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
498}
499
500static
478void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 501void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479{ 502{
480 struct qstr filename = QSTR_INIT(entry->name, entry->len); 503 struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
871 desc->file = filp; 894 desc->file = filp;
872 desc->dir_cookie = &dir_ctx->dir_cookie; 895 desc->dir_cookie = &dir_ctx->dir_cookie;
873 desc->decode = NFS_PROTO(inode)->decode_dirent; 896 desc->decode = NFS_PROTO(inode)->decode_dirent;
874 desc->plus = NFS_USE_READDIRPLUS(inode); 897 desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
875 898
876 nfs_block_sillyrename(dentry); 899 nfs_block_sillyrename(dentry);
877 res = nfs_revalidate_mapping(inode, filp->f_mapping); 900 res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1111 if (!inode) { 1134 if (!inode) {
1112 if (nfs_neg_need_reval(dir, dentry, nd)) 1135 if (nfs_neg_need_reval(dir, dentry, nd))
1113 goto out_bad; 1136 goto out_bad;
1114 goto out_valid; 1137 goto out_valid_noent;
1115 } 1138 }
1116 1139
1117 if (is_bad_inode(inode)) { 1140 if (is_bad_inode(inode)) {
@@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1140 if (fhandle == NULL || fattr == NULL) 1163 if (fhandle == NULL || fattr == NULL)
1141 goto out_error; 1164 goto out_error;
1142 1165
1143 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1166 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1144 if (error) 1167 if (error)
1145 goto out_bad; 1168 goto out_bad;
1146 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1169 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1153out_set_verifier: 1176out_set_verifier:
1154 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1177 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1155 out_valid: 1178 out_valid:
1179 /* Success: notify readdir to use READDIRPLUS */
1180 nfs_advise_use_readdirplus(dir);
1181 out_valid_noent:
1156 dput(parent); 1182 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", 1183 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
1158 __func__, dentry->d_parent->d_name.name, 1184 __func__, dentry->d_parent->d_name.name,
@@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1296 parent = dentry->d_parent; 1322 parent = dentry->d_parent;
1297 /* Protect against concurrent sillydeletes */ 1323 /* Protect against concurrent sillydeletes */
1298 nfs_block_sillyrename(parent); 1324 nfs_block_sillyrename(parent);
1299 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1325 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1300 if (error == -ENOENT) 1326 if (error == -ENOENT)
1301 goto no_entry; 1327 goto no_entry;
1302 if (error < 0) { 1328 if (error < 0) {
@@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1308 if (IS_ERR(res)) 1334 if (IS_ERR(res))
1309 goto out_unblock_sillyrename; 1335 goto out_unblock_sillyrename;
1310 1336
1337 /* Success: notify readdir to use READDIRPLUS */
1338 nfs_advise_use_readdirplus(dir);
1339
1311no_entry: 1340no_entry:
1312 res = d_materialise_unique(dentry, inode); 1341 res = d_materialise_unique(dentry, inode);
1313 if (res != NULL) { 1342 if (res != NULL) {
@@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1643 if (dentry->d_inode) 1672 if (dentry->d_inode)
1644 goto out; 1673 goto out;
1645 if (fhandle->size == 0) { 1674 if (fhandle->size == 0) {
1646 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1675 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1647 if (error) 1676 if (error)
1648 goto out_error; 1677 goto out_error;
1649 } 1678 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f7bdd3..23d170bc44f4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
56 56
57#include "internal.h" 57#include "internal.h"
58#include "iostat.h" 58#include "iostat.h"
59#include "pnfs.h"
59 60
60#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
61 62
@@ -81,16 +82,19 @@ struct nfs_direct_req {
81 struct completion completion; /* wait for i/o completion */ 82 struct completion completion; /* wait for i/o completion */
82 83
83 /* commit state */ 84 /* commit state */
84 struct list_head rewrite_list; /* saved nfs_write_data structs */ 85 struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
85 struct nfs_write_data * commit_data; /* special write_data for commits */ 86 struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
87 struct work_struct work;
86 int flags; 88 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 89#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
88#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 90#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
89 struct nfs_writeverf verf; /* unstable write verifier */ 91 struct nfs_writeverf verf; /* unstable write verifier */
90}; 92};
91 93
94static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
95static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
92static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 96static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
93static const struct rpc_call_ops nfs_write_direct_ops; 97static void nfs_direct_write_schedule_work(struct work_struct *work);
94 98
95static inline void get_dreq(struct nfs_direct_req *dreq) 99static inline void get_dreq(struct nfs_direct_req *dreq)
96{ 100{
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124 return -EINVAL; 128 return -EINVAL;
125} 129}
126 130
127static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
128{
129 unsigned int npages;
130 unsigned int i;
131
132 if (count == 0)
133 return;
134 pages += (pgbase >> PAGE_SHIFT);
135 npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136 for (i = 0; i < npages; i++) {
137 struct page *page = pages[i];
138 if (!PageCompound(page))
139 set_page_dirty(page);
140 }
141}
142
143static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 131static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
144{ 132{
145 unsigned int i; 133 unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
147 page_cache_release(pages[i]); 135 page_cache_release(pages[i]);
148} 136}
149 137
138void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
139 struct nfs_direct_req *dreq)
140{
141 cinfo->lock = &dreq->lock;
142 cinfo->mds = &dreq->mds_cinfo;
143 cinfo->ds = &dreq->ds_cinfo;
144 cinfo->dreq = dreq;
145 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
146}
147
150static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 148static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
151{ 149{
152 struct nfs_direct_req *dreq; 150 struct nfs_direct_req *dreq;
153 151
154 dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); 152 dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
155 if (!dreq) 153 if (!dreq)
156 return NULL; 154 return NULL;
157 155
158 kref_init(&dreq->kref); 156 kref_init(&dreq->kref);
159 kref_get(&dreq->kref); 157 kref_get(&dreq->kref);
160 init_completion(&dreq->completion); 158 init_completion(&dreq->completion);
161 INIT_LIST_HEAD(&dreq->rewrite_list); 159 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
162 dreq->iocb = NULL; 160 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
165 spin_lock_init(&dreq->lock); 161 spin_lock_init(&dreq->lock);
166 atomic_set(&dreq->io_count, 0);
167 dreq->count = 0;
168 dreq->error = 0;
169 dreq->flags = 0;
170 162
171 return dreq; 163 return dreq;
172} 164}
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226 nfs_direct_req_release(dreq); 218 nfs_direct_req_release(dreq);
227} 219}
228 220
229/* 221static void nfs_direct_readpage_release(struct nfs_page *req)
230 * We must hold a reference to all the pages in this direct read request
231 * until the RPCs complete. This could be long *after* we are woken up in
232 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233 */
234static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
235{ 222{
236 struct nfs_read_data *data = calldata; 223 dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
237 224 req->wb_context->dentry->d_inode->i_sb->s_id,
238 nfs_readpage_result(task, data); 225 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
226 req->wb_bytes,
227 (long long)req_offset(req));
228 nfs_release_request(req);
239} 229}
240 230
241static void nfs_direct_read_release(void *calldata) 231static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
242{ 232{
233 unsigned long bytes = 0;
234 struct nfs_direct_req *dreq = hdr->dreq;
243 235
244 struct nfs_read_data *data = calldata; 236 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
245 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 237 goto out_put;
246 int status = data->task.tk_status;
247 238
248 spin_lock(&dreq->lock); 239 spin_lock(&dreq->lock);
249 if (unlikely(status < 0)) { 240 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
250 dreq->error = status; 241 dreq->error = hdr->error;
251 spin_unlock(&dreq->lock); 242 else
252 } else { 243 dreq->count += hdr->good_bytes;
253 dreq->count += data->res.count; 244 spin_unlock(&dreq->lock);
254 spin_unlock(&dreq->lock);
255 nfs_direct_dirty_pages(data->pagevec,
256 data->args.pgbase,
257 data->res.count);
258 }
259 nfs_direct_release_pages(data->pagevec, data->npages);
260 245
246 while (!list_empty(&hdr->pages)) {
247 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
248 struct page *page = req->wb_page;
249
250 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
251 if (bytes > hdr->good_bytes)
252 zero_user(page, 0, PAGE_SIZE);
253 else if (hdr->good_bytes - bytes < PAGE_SIZE)
254 zero_user_segment(page,
255 hdr->good_bytes & ~PAGE_MASK,
256 PAGE_SIZE);
257 }
258 if (!PageCompound(page)) {
259 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
260 if (bytes < hdr->good_bytes)
261 set_page_dirty(page);
262 } else
263 set_page_dirty(page);
264 }
265 bytes += req->wb_bytes;
266 nfs_list_remove_request(req);
267 nfs_direct_readpage_release(req);
268 }
269out_put:
261 if (put_dreq(dreq)) 270 if (put_dreq(dreq))
262 nfs_direct_complete(dreq); 271 nfs_direct_complete(dreq);
263 nfs_readdata_free(data); 272 hdr->release(hdr);
273}
274
275static void nfs_read_sync_pgio_error(struct list_head *head)
276{
277 struct nfs_page *req;
278
279 while (!list_empty(head)) {
280 req = nfs_list_entry(head->next);
281 nfs_list_remove_request(req);
282 nfs_release_request(req);
283 }
264} 284}
265 285
266static const struct rpc_call_ops nfs_read_direct_ops = { 286static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
267 .rpc_call_prepare = nfs_read_prepare, 287{
268 .rpc_call_done = nfs_direct_read_result, 288 get_dreq(hdr->dreq);
269 .rpc_release = nfs_direct_read_release, 289}
290
291static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
292 .error_cleanup = nfs_read_sync_pgio_error,
293 .init_hdr = nfs_direct_pgio_init,
294 .completion = nfs_direct_read_completion,
270}; 295};
271 296
272/* 297/*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
276 * handled automatically by nfs_direct_read_result(). Otherwise, if 301 * handled automatically by nfs_direct_read_result(). Otherwise, if
277 * no requests have been sent, just return an error. 302 * no requests have been sent, just return an error.
278 */ 303 */
279static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, 304static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
280 const struct iovec *iov, 305 const struct iovec *iov,
281 loff_t pos) 306 loff_t pos)
282{ 307{
308 struct nfs_direct_req *dreq = desc->pg_dreq;
283 struct nfs_open_context *ctx = dreq->ctx; 309 struct nfs_open_context *ctx = dreq->ctx;
284 struct inode *inode = ctx->dentry->d_inode; 310 struct inode *inode = ctx->dentry->d_inode;
285 unsigned long user_addr = (unsigned long)iov->iov_base; 311 unsigned long user_addr = (unsigned long)iov->iov_base;
286 size_t count = iov->iov_len; 312 size_t count = iov->iov_len;
287 size_t rsize = NFS_SERVER(inode)->rsize; 313 size_t rsize = NFS_SERVER(inode)->rsize;
288 struct rpc_task *task;
289 struct rpc_message msg = {
290 .rpc_cred = ctx->cred,
291 };
292 struct rpc_task_setup task_setup_data = {
293 .rpc_client = NFS_CLIENT(inode),
294 .rpc_message = &msg,
295 .callback_ops = &nfs_read_direct_ops,
296 .workqueue = nfsiod_workqueue,
297 .flags = RPC_TASK_ASYNC,
298 };
299 unsigned int pgbase; 314 unsigned int pgbase;
300 int result; 315 int result;
301 ssize_t started = 0; 316 ssize_t started = 0;
317 struct page **pagevec = NULL;
318 unsigned int npages;
302 319
303 do { 320 do {
304 struct nfs_read_data *data;
305 size_t bytes; 321 size_t bytes;
322 int i;
306 323
307 pgbase = user_addr & ~PAGE_MASK; 324 pgbase = user_addr & ~PAGE_MASK;
308 bytes = min(rsize,count); 325 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
309 326
310 result = -ENOMEM; 327 result = -ENOMEM;
311 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); 328 npages = nfs_page_array_len(pgbase, bytes);
312 if (unlikely(!data)) 329 if (!pagevec)
330 pagevec = kmalloc(npages * sizeof(struct page *),
331 GFP_KERNEL);
332 if (!pagevec)
313 break; 333 break;
314
315 down_read(&current->mm->mmap_sem); 334 down_read(&current->mm->mmap_sem);
316 result = get_user_pages(current, current->mm, user_addr, 335 result = get_user_pages(current, current->mm, user_addr,
317 data->npages, 1, 0, data->pagevec, NULL); 336 npages, 1, 0, pagevec, NULL);
318 up_read(&current->mm->mmap_sem); 337 up_read(&current->mm->mmap_sem);
319 if (result < 0) { 338 if (result < 0)
320 nfs_readdata_free(data);
321 break; 339 break;
322 } 340 if ((unsigned)result < npages) {
323 if ((unsigned)result < data->npages) {
324 bytes = result * PAGE_SIZE; 341 bytes = result * PAGE_SIZE;
325 if (bytes <= pgbase) { 342 if (bytes <= pgbase) {
326 nfs_direct_release_pages(data->pagevec, result); 343 nfs_direct_release_pages(pagevec, result);
327 nfs_readdata_free(data);
328 break; 344 break;
329 } 345 }
330 bytes -= pgbase; 346 bytes -= pgbase;
331 data->npages = result; 347 npages = result;
332 } 348 }
333 349
334 get_dreq(dreq); 350 for (i = 0; i < npages; i++) {
335 351 struct nfs_page *req;
336 data->req = (struct nfs_page *) dreq; 352 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
337 data->inode = inode; 353 /* XXX do we need to do the eof zeroing found in async_filler? */
338 data->cred = msg.rpc_cred; 354 req = nfs_create_request(dreq->ctx, dreq->inode,
339 data->args.fh = NFS_FH(inode); 355 pagevec[i],
340 data->args.context = ctx; 356 pgbase, req_len);
341 data->args.lock_context = dreq->l_ctx; 357 if (IS_ERR(req)) {
342 data->args.offset = pos; 358 result = PTR_ERR(req);
343 data->args.pgbase = pgbase; 359 break;
344 data->args.pages = data->pagevec; 360 }
345 data->args.count = bytes; 361 req->wb_index = pos >> PAGE_SHIFT;
346 data->res.fattr = &data->fattr; 362 req->wb_offset = pos & ~PAGE_MASK;
347 data->res.eof = 0; 363 if (!nfs_pageio_add_request(desc, req)) {
348 data->res.count = bytes; 364 result = desc->pg_error;
349 nfs_fattr_init(&data->fattr); 365 nfs_release_request(req);
350 msg.rpc_argp = &data->args; 366 break;
351 msg.rpc_resp = &data->res; 367 }
352 368 pgbase = 0;
353 task_setup_data.task = &data->task; 369 bytes -= req_len;
354 task_setup_data.callback_data = data; 370 started += req_len;
355 NFS_PROTO(inode)->read_setup(data, &msg); 371 user_addr += req_len;
356 372 pos += req_len;
357 task = rpc_run_task(&task_setup_data); 373 count -= req_len;
358 if (IS_ERR(task)) 374 }
359 break; 375 /* The nfs_page now hold references to these pages */
360 rpc_put_task(task); 376 nfs_direct_release_pages(pagevec, npages);
361 377 } while (count != 0 && result >= 0);
362 dprintk("NFS: %5u initiated direct read call " 378
363 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 379 kfree(pagevec);
364 data->task.tk_pid,
365 inode->i_sb->s_id,
366 (long long)NFS_FILEID(inode),
367 bytes,
368 (unsigned long long)data->args.offset);
369
370 started += bytes;
371 user_addr += bytes;
372 pos += bytes;
373 /* FIXME: Remove this unnecessary math from final patch */
374 pgbase += bytes;
375 pgbase &= ~PAGE_MASK;
376 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
377
378 count -= bytes;
379 } while (count != 0);
380 380
381 if (started) 381 if (started)
382 return started; 382 return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
388 unsigned long nr_segs, 388 unsigned long nr_segs,
389 loff_t pos) 389 loff_t pos)
390{ 390{
391 struct nfs_pageio_descriptor desc;
391 ssize_t result = -EINVAL; 392 ssize_t result = -EINVAL;
392 size_t requested_bytes = 0; 393 size_t requested_bytes = 0;
393 unsigned long seg; 394 unsigned long seg;
394 395
396 nfs_pageio_init_read(&desc, dreq->inode,
397 &nfs_direct_read_completion_ops);
395 get_dreq(dreq); 398 get_dreq(dreq);
399 desc.pg_dreq = dreq;
396 400
397 for (seg = 0; seg < nr_segs; seg++) { 401 for (seg = 0; seg < nr_segs; seg++) {
398 const struct iovec *vec = &iov[seg]; 402 const struct iovec *vec = &iov[seg];
399 result = nfs_direct_read_schedule_segment(dreq, vec, pos); 403 result = nfs_direct_read_schedule_segment(&desc, vec, pos);
400 if (result < 0) 404 if (result < 0)
401 break; 405 break;
402 requested_bytes += result; 406 requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
405 pos += vec->iov_len; 409 pos += vec->iov_len;
406 } 410 }
407 411
412 nfs_pageio_complete(&desc);
413
408 /* 414 /*
409 * If no bytes were started, return the error, and let the 415 * If no bytes were started, return the error, and let the
410 * generic layer handle the completion. 416 * generic layer handle the completion.
@@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
441 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 447 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
442 if (!result) 448 if (!result)
443 result = nfs_direct_wait(dreq); 449 result = nfs_direct_wait(dreq);
450 NFS_I(inode)->read_io += result;
444out_release: 451out_release:
445 nfs_direct_req_release(dreq); 452 nfs_direct_req_release(dreq);
446out: 453out:
447 return result; 454 return result;
448} 455}
449 456
450static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
451{
452 while (!list_empty(&dreq->rewrite_list)) {
453 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
454 list_del(&data->pages);
455 nfs_direct_release_pages(data->pagevec, data->npages);
456 nfs_writedata_free(data);
457 }
458}
459
460#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 457#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
461static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 458static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
462{ 459{
463 struct inode *inode = dreq->inode; 460 struct nfs_pageio_descriptor desc;
464 struct list_head *p; 461 struct nfs_page *req, *tmp;
465 struct nfs_write_data *data; 462 LIST_HEAD(reqs);
466 struct rpc_task *task; 463 struct nfs_commit_info cinfo;
467 struct rpc_message msg = { 464 LIST_HEAD(failed);
468 .rpc_cred = dreq->ctx->cred, 465
469 }; 466 nfs_init_cinfo_from_dreq(&cinfo, dreq);
470 struct rpc_task_setup task_setup_data = { 467 pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
471 .rpc_client = NFS_CLIENT(inode), 468 spin_lock(cinfo.lock);
472 .rpc_message = &msg, 469 nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
473 .callback_ops = &nfs_write_direct_ops, 470 spin_unlock(cinfo.lock);
474 .workqueue = nfsiod_workqueue,
475 .flags = RPC_TASK_ASYNC,
476 };
477 471
478 dreq->count = 0; 472 dreq->count = 0;
479 get_dreq(dreq); 473 get_dreq(dreq);
480 474
481 list_for_each(p, &dreq->rewrite_list) { 475 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
482 data = list_entry(p, struct nfs_write_data, pages); 476 &nfs_direct_write_completion_ops);
483 477 desc.pg_dreq = dreq;
484 get_dreq(dreq); 478
485 479 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 /* Use stable writes */ 480 if (!nfs_pageio_add_request(&desc, req)) {
487 data->args.stable = NFS_FILE_SYNC; 481 nfs_list_add_request(req, &failed);
488 482 spin_lock(cinfo.lock);
489 /* 483 dreq->flags = 0;
490 * Reset data->res. 484 dreq->error = -EIO;
491 */ 485 spin_unlock(cinfo.lock);
492 nfs_fattr_init(&data->fattr); 486 }
493 data->res.count = data->args.count;
494 memset(&data->verf, 0, sizeof(data->verf));
495
496 /*
497 * Reuse data->task; data->args should not have changed
498 * since the original request was sent.
499 */
500 task_setup_data.task = &data->task;
501 task_setup_data.callback_data = data;
502 msg.rpc_argp = &data->args;
503 msg.rpc_resp = &data->res;
504 NFS_PROTO(inode)->write_setup(data, &msg);
505
506 /*
507 * We're called via an RPC callback, so BKL is already held.
508 */
509 task = rpc_run_task(&task_setup_data);
510 if (!IS_ERR(task))
511 rpc_put_task(task);
512
513 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
514 data->task.tk_pid,
515 inode->i_sb->s_id,
516 (long long)NFS_FILEID(inode),
517 data->args.count,
518 (unsigned long long)data->args.offset);
519 } 487 }
488 nfs_pageio_complete(&desc);
520 489
521 if (put_dreq(dreq)) 490 while (!list_empty(&failed))
522 nfs_direct_write_complete(dreq, inode); 491 nfs_unlock_and_release_request(req);
523}
524
525static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
526{
527 struct nfs_write_data *data = calldata;
528 492
529 /* Call the NFS version-specific code */ 493 if (put_dreq(dreq))
530 NFS_PROTO(data->inode)->commit_done(task, data); 494 nfs_direct_write_complete(dreq, dreq->inode);
531} 495}
532 496
533static void nfs_direct_commit_release(void *calldata) 497static void nfs_direct_commit_complete(struct nfs_commit_data *data)
534{ 498{
535 struct nfs_write_data *data = calldata; 499 struct nfs_direct_req *dreq = data->dreq;
536 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 500 struct nfs_commit_info cinfo;
501 struct nfs_page *req;
537 int status = data->task.tk_status; 502 int status = data->task.tk_status;
538 503
504 nfs_init_cinfo_from_dreq(&cinfo, dreq);
539 if (status < 0) { 505 if (status < 0) {
540 dprintk("NFS: %5u commit failed with error %d.\n", 506 dprintk("NFS: %5u commit failed with error %d.\n",
541 data->task.tk_pid, status); 507 data->task.tk_pid, status);
542 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 508 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
543 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 509 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
544 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 510 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)
546 } 512 }
547 513
548 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 514 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
549 nfs_direct_write_complete(dreq, data->inode); 515 while (!list_empty(&data->pages)) {
550 nfs_commit_free(data); 516 req = nfs_list_entry(data->pages.next);
517 nfs_list_remove_request(req);
518 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
519 /* Note the rewrite will go through mds */
520 kref_get(&req->wb_kref);
521 nfs_mark_request_commit(req, NULL, &cinfo);
522 }
523 nfs_unlock_and_release_request(req);
524 }
525
526 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
527 nfs_direct_write_complete(dreq, data->inode);
528}
529
530static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
531{
532 /* There is no lock to clear */
551} 533}
552 534
553static const struct rpc_call_ops nfs_commit_direct_ops = { 535static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
554 .rpc_call_prepare = nfs_write_prepare, 536 .completion = nfs_direct_commit_complete,
555 .rpc_call_done = nfs_direct_commit_result, 537 .error_cleanup = nfs_direct_error_cleanup,
556 .rpc_release = nfs_direct_commit_release,
557}; 538};
558 539
559static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 540static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
560{ 541{
561 struct nfs_write_data *data = dreq->commit_data; 542 int res;
562 struct rpc_task *task; 543 struct nfs_commit_info cinfo;
563 struct rpc_message msg = { 544 LIST_HEAD(mds_list);
564 .rpc_argp = &data->args, 545
565 .rpc_resp = &data->res, 546 nfs_init_cinfo_from_dreq(&cinfo, dreq);
566 .rpc_cred = dreq->ctx->cred, 547 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
567 }; 548 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
568 struct rpc_task_setup task_setup_data = { 549 if (res < 0) /* res == -ENOMEM */
569 .task = &data->task, 550 nfs_direct_write_reschedule(dreq);
570 .rpc_client = NFS_CLIENT(dreq->inode),
571 .rpc_message = &msg,
572 .callback_ops = &nfs_commit_direct_ops,
573 .callback_data = data,
574 .workqueue = nfsiod_workqueue,
575 .flags = RPC_TASK_ASYNC,
576 };
577
578 data->inode = dreq->inode;
579 data->cred = msg.rpc_cred;
580
581 data->args.fh = NFS_FH(data->inode);
582 data->args.offset = 0;
583 data->args.count = 0;
584 data->args.context = dreq->ctx;
585 data->args.lock_context = dreq->l_ctx;
586 data->res.count = 0;
587 data->res.fattr = &data->fattr;
588 data->res.verf = &data->verf;
589 nfs_fattr_init(&data->fattr);
590
591 NFS_PROTO(data->inode)->commit_setup(data, &msg);
592
593 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
594 dreq->commit_data = NULL;
595
596 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
597
598 task = rpc_run_task(&task_setup_data);
599 if (!IS_ERR(task))
600 rpc_put_task(task);
601} 551}
602 552
603static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 553static void nfs_direct_write_schedule_work(struct work_struct *work)
604{ 554{
555 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
605 int flags = dreq->flags; 556 int flags = dreq->flags;
606 557
607 dreq->flags = 0; 558 dreq->flags = 0;
@@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
613 nfs_direct_write_reschedule(dreq); 564 nfs_direct_write_reschedule(dreq);
614 break; 565 break;
615 default: 566 default:
616 if (dreq->commit_data != NULL) 567 nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
617 nfs_commit_free(dreq->commit_data);
618 nfs_direct_free_writedata(dreq);
619 nfs_zap_mapping(inode, inode->i_mapping);
620 nfs_direct_complete(dreq); 568 nfs_direct_complete(dreq);
621 } 569 }
622} 570}
623 571
624static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 572static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
625{ 573{
626 dreq->commit_data = nfs_commitdata_alloc(); 574 schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
627 if (dreq->commit_data != NULL)
628 dreq->commit_data->req = (struct nfs_page *) dreq;
629} 575}
576
630#else 577#else
631static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 578static void nfs_direct_write_schedule_work(struct work_struct *work)
632{ 579{
633 dreq->commit_data = NULL;
634} 580}
635 581
636static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 582static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
637{ 583{
638 nfs_direct_free_writedata(dreq);
639 nfs_zap_mapping(inode, inode->i_mapping); 584 nfs_zap_mapping(inode, inode->i_mapping);
640 nfs_direct_complete(dreq); 585 nfs_direct_complete(dreq);
641} 586}
642#endif 587#endif
643 588
644static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
645{
646 struct nfs_write_data *data = calldata;
647
648 nfs_writeback_done(task, data);
649}
650
651/* 589/*
652 * NB: Return the value of the first error return code. Subsequent 590 * NB: Return the value of the first error return code. Subsequent
653 * errors after the first one are ignored. 591 * errors after the first one are ignored.
654 */ 592 */
655static void nfs_direct_write_release(void *calldata)
656{
657 struct nfs_write_data *data = calldata;
658 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
659 int status = data->task.tk_status;
660
661 spin_lock(&dreq->lock);
662
663 if (unlikely(status < 0)) {
664 /* An error has occurred, so we should not commit */
665 dreq->flags = 0;
666 dreq->error = status;
667 }
668 if (unlikely(dreq->error != 0))
669 goto out_unlock;
670
671 dreq->count += data->res.count;
672
673 if (data->res.verf->committed != NFS_FILE_SYNC) {
674 switch (dreq->flags) {
675 case 0:
676 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
677 dreq->flags = NFS_ODIRECT_DO_COMMIT;
678 break;
679 case NFS_ODIRECT_DO_COMMIT:
680 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
681 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
682 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
683 }
684 }
685 }
686out_unlock:
687 spin_unlock(&dreq->lock);
688
689 if (put_dreq(dreq))
690 nfs_direct_write_complete(dreq, data->inode);
691}
692
693static const struct rpc_call_ops nfs_write_direct_ops = {
694 .rpc_call_prepare = nfs_write_prepare,
695 .rpc_call_done = nfs_direct_write_result,
696 .rpc_release = nfs_direct_write_release,
697};
698
699/* 593/*
700 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 594 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
701 * operation. If nfs_writedata_alloc() or get_user_pages() fails, 595 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
703 * handled automatically by nfs_direct_write_result(). Otherwise, if 597 * handled automatically by nfs_direct_write_result(). Otherwise, if
704 * no requests have been sent, just return an error. 598 * no requests have been sent, just return an error.
705 */ 599 */
706static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, 600static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
707 const struct iovec *iov, 601 const struct iovec *iov,
708 loff_t pos, int sync) 602 loff_t pos)
709{ 603{
604 struct nfs_direct_req *dreq = desc->pg_dreq;
710 struct nfs_open_context *ctx = dreq->ctx; 605 struct nfs_open_context *ctx = dreq->ctx;
711 struct inode *inode = ctx->dentry->d_inode; 606 struct inode *inode = ctx->dentry->d_inode;
712 unsigned long user_addr = (unsigned long)iov->iov_base; 607 unsigned long user_addr = (unsigned long)iov->iov_base;
713 size_t count = iov->iov_len; 608 size_t count = iov->iov_len;
714 struct rpc_task *task;
715 struct rpc_message msg = {
716 .rpc_cred = ctx->cred,
717 };
718 struct rpc_task_setup task_setup_data = {
719 .rpc_client = NFS_CLIENT(inode),
720 .rpc_message = &msg,
721 .callback_ops = &nfs_write_direct_ops,
722 .workqueue = nfsiod_workqueue,
723 .flags = RPC_TASK_ASYNC,
724 };
725 size_t wsize = NFS_SERVER(inode)->wsize; 609 size_t wsize = NFS_SERVER(inode)->wsize;
726 unsigned int pgbase; 610 unsigned int pgbase;
727 int result; 611 int result;
728 ssize_t started = 0; 612 ssize_t started = 0;
613 struct page **pagevec = NULL;
614 unsigned int npages;
729 615
730 do { 616 do {
731 struct nfs_write_data *data;
732 size_t bytes; 617 size_t bytes;
618 int i;
733 619
734 pgbase = user_addr & ~PAGE_MASK; 620 pgbase = user_addr & ~PAGE_MASK;
735 bytes = min(wsize,count); 621 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
736 622
737 result = -ENOMEM; 623 result = -ENOMEM;
738 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); 624 npages = nfs_page_array_len(pgbase, bytes);
739 if (unlikely(!data)) 625 if (!pagevec)
626 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
627 if (!pagevec)
740 break; 628 break;
741 629
742 down_read(&current->mm->mmap_sem); 630 down_read(&current->mm->mmap_sem);
743 result = get_user_pages(current, current->mm, user_addr, 631 result = get_user_pages(current, current->mm, user_addr,
744 data->npages, 0, 0, data->pagevec, NULL); 632 npages, 0, 0, pagevec, NULL);
745 up_read(&current->mm->mmap_sem); 633 up_read(&current->mm->mmap_sem);
746 if (result < 0) { 634 if (result < 0)
747 nfs_writedata_free(data);
748 break; 635 break;
749 } 636
750 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < npages) {
751 bytes = result * PAGE_SIZE; 638 bytes = result * PAGE_SIZE;
752 if (bytes <= pgbase) { 639 if (bytes <= pgbase) {
753 nfs_direct_release_pages(data->pagevec, result); 640 nfs_direct_release_pages(pagevec, result);
754 nfs_writedata_free(data);
755 break; 641 break;
756 } 642 }
757 bytes -= pgbase; 643 bytes -= pgbase;
758 data->npages = result; 644 npages = result;
759 } 645 }
760 646
761 get_dreq(dreq); 647 for (i = 0; i < npages; i++) {
762 648 struct nfs_page *req;
763 list_move_tail(&data->pages, &dreq->rewrite_list); 649 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
764
765 data->req = (struct nfs_page *) dreq;
766 data->inode = inode;
767 data->cred = msg.rpc_cred;
768 data->args.fh = NFS_FH(inode);
769 data->args.context = ctx;
770 data->args.lock_context = dreq->l_ctx;
771 data->args.offset = pos;
772 data->args.pgbase = pgbase;
773 data->args.pages = data->pagevec;
774 data->args.count = bytes;
775 data->args.stable = sync;
776 data->res.fattr = &data->fattr;
777 data->res.count = bytes;
778 data->res.verf = &data->verf;
779 nfs_fattr_init(&data->fattr);
780
781 task_setup_data.task = &data->task;
782 task_setup_data.callback_data = data;
783 msg.rpc_argp = &data->args;
784 msg.rpc_resp = &data->res;
785 NFS_PROTO(inode)->write_setup(data, &msg);
786
787 task = rpc_run_task(&task_setup_data);
788 if (IS_ERR(task))
789 break;
790 rpc_put_task(task);
791
792 dprintk("NFS: %5u initiated direct write call "
793 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
794 data->task.tk_pid,
795 inode->i_sb->s_id,
796 (long long)NFS_FILEID(inode),
797 bytes,
798 (unsigned long long)data->args.offset);
799 650
800 started += bytes; 651 req = nfs_create_request(dreq->ctx, dreq->inode,
801 user_addr += bytes; 652 pagevec[i],
802 pos += bytes; 653 pgbase, req_len);
803 654 if (IS_ERR(req)) {
804 /* FIXME: Remove this useless math from the final patch */ 655 result = PTR_ERR(req);
805 pgbase += bytes; 656 break;
806 pgbase &= ~PAGE_MASK; 657 }
807 BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 658 nfs_lock_request(req);
659 req->wb_index = pos >> PAGE_SHIFT;
660 req->wb_offset = pos & ~PAGE_MASK;
661 if (!nfs_pageio_add_request(desc, req)) {
662 result = desc->pg_error;
663 nfs_unlock_and_release_request(req);
664 break;
665 }
666 pgbase = 0;
667 bytes -= req_len;
668 started += req_len;
669 user_addr += req_len;
670 pos += req_len;
671 count -= req_len;
672 }
673 /* The nfs_page now hold references to these pages */
674 nfs_direct_release_pages(pagevec, npages);
675 } while (count != 0 && result >= 0);
808 676
809 count -= bytes; 677 kfree(pagevec);
810 } while (count != 0);
811 678
812 if (started) 679 if (started)
813 return started; 680 return started;
814 return result < 0 ? (ssize_t) result : -EFAULT; 681 return result < 0 ? (ssize_t) result : -EFAULT;
815} 682}
816 683
684static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
685{
686 struct nfs_direct_req *dreq = hdr->dreq;
687 struct nfs_commit_info cinfo;
688 int bit = -1;
689 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
690
691 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
692 goto out_put;
693
694 nfs_init_cinfo_from_dreq(&cinfo, dreq);
695
696 spin_lock(&dreq->lock);
697
698 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
699 dreq->flags = 0;
700 dreq->error = hdr->error;
701 }
702 if (dreq->error != 0)
703 bit = NFS_IOHDR_ERROR;
704 else {
705 dreq->count += hdr->good_bytes;
706 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
707 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
708 bit = NFS_IOHDR_NEED_RESCHED;
709 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
710 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
711 bit = NFS_IOHDR_NEED_RESCHED;
712 else if (dreq->flags == 0) {
713 memcpy(&dreq->verf, &req->wb_verf,
714 sizeof(dreq->verf));
715 bit = NFS_IOHDR_NEED_COMMIT;
716 dreq->flags = NFS_ODIRECT_DO_COMMIT;
717 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
718 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
719 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
720 bit = NFS_IOHDR_NEED_RESCHED;
721 } else
722 bit = NFS_IOHDR_NEED_COMMIT;
723 }
724 }
725 }
726 spin_unlock(&dreq->lock);
727
728 while (!list_empty(&hdr->pages)) {
729 req = nfs_list_entry(hdr->pages.next);
730 nfs_list_remove_request(req);
731 switch (bit) {
732 case NFS_IOHDR_NEED_RESCHED:
733 case NFS_IOHDR_NEED_COMMIT:
734 kref_get(&req->wb_kref);
735 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
736 }
737 nfs_unlock_and_release_request(req);
738 }
739
740out_put:
741 if (put_dreq(dreq))
742 nfs_direct_write_complete(dreq, hdr->inode);
743 hdr->release(hdr);
744}
745
746static void nfs_write_sync_pgio_error(struct list_head *head)
747{
748 struct nfs_page *req;
749
750 while (!list_empty(head)) {
751 req = nfs_list_entry(head->next);
752 nfs_list_remove_request(req);
753 nfs_unlock_and_release_request(req);
754 }
755}
756
757static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
758 .error_cleanup = nfs_write_sync_pgio_error,
759 .init_hdr = nfs_direct_pgio_init,
760 .completion = nfs_direct_write_completion,
761};
762
817static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 763static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
818 const struct iovec *iov, 764 const struct iovec *iov,
819 unsigned long nr_segs, 765 unsigned long nr_segs,
820 loff_t pos, int sync) 766 loff_t pos)
821{ 767{
768 struct nfs_pageio_descriptor desc;
822 ssize_t result = 0; 769 ssize_t result = 0;
823 size_t requested_bytes = 0; 770 size_t requested_bytes = 0;
824 unsigned long seg; 771 unsigned long seg;
825 772
773 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
774 &nfs_direct_write_completion_ops);
775 desc.pg_dreq = dreq;
826 get_dreq(dreq); 776 get_dreq(dreq);
827 777
828 for (seg = 0; seg < nr_segs; seg++) { 778 for (seg = 0; seg < nr_segs; seg++) {
829 const struct iovec *vec = &iov[seg]; 779 const struct iovec *vec = &iov[seg];
830 result = nfs_direct_write_schedule_segment(dreq, vec, 780 result = nfs_direct_write_schedule_segment(&desc, vec, pos);
831 pos, sync);
832 if (result < 0) 781 if (result < 0)
833 break; 782 break;
834 requested_bytes += result; 783 requested_bytes += result;
@@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
836 break; 785 break;
837 pos += vec->iov_len; 786 pos += vec->iov_len;
838 } 787 }
788 nfs_pageio_complete(&desc);
789 NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
839 790
840 /* 791 /*
841 * If no bytes were started, return the error, and let the 792 * If no bytes were started, return the error, and let the
@@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
858 ssize_t result = -ENOMEM; 809 ssize_t result = -ENOMEM;
859 struct inode *inode = iocb->ki_filp->f_mapping->host; 810 struct inode *inode = iocb->ki_filp->f_mapping->host;
860 struct nfs_direct_req *dreq; 811 struct nfs_direct_req *dreq;
861 size_t wsize = NFS_SERVER(inode)->wsize;
862 int sync = NFS_UNSTABLE;
863 812
864 dreq = nfs_direct_req_alloc(); 813 dreq = nfs_direct_req_alloc();
865 if (!dreq) 814 if (!dreq)
866 goto out; 815 goto out;
867 nfs_alloc_commit_data(dreq);
868
869 if (dreq->commit_data == NULL || count <= wsize)
870 sync = NFS_FILE_SYNC;
871 816
872 dreq->inode = inode; 817 dreq->inode = inode;
873 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 818 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
877 if (!is_sync_kiocb(iocb)) 822 if (!is_sync_kiocb(iocb))
878 dreq->iocb = iocb; 823 dreq->iocb = iocb;
879 824
880 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 825 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
881 if (!result) 826 if (!result)
882 result = nfs_direct_wait(dreq); 827 result = nfs_direct_wait(dreq);
883out_release: 828out_release:
@@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
997 task_io_account_write(count); 942 task_io_account_write(count);
998 943
999 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 944 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
945 if (retval > 0) {
946 struct inode *inode = mapping->host;
1000 947
1001 if (retval > 0)
1002 iocb->ki_pos = pos + retval; 948 iocb->ki_pos = pos + retval;
1003 949 spin_lock(&inode->i_lock);
950 if (i_size_read(inode) < iocb->ki_pos)
951 i_size_write(inode, iocb->ki_pos);
952 spin_unlock(&inode->i_lock);
953 }
1004out: 954out:
1005 return retval; 955 return retval;
1006} 956}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709fd328..56311ca5f9f8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
174 if ((file->f_mode & FMODE_WRITE) == 0) 174 if ((file->f_mode & FMODE_WRITE) == 0)
175 return 0; 175 return 0;
176 176
177 /*
178 * If we're holding a write delegation, then just start the i/o
179 * but don't wait for completion (or send a commit).
180 */
181 if (nfs_have_delegation(inode, FMODE_WRITE))
182 return filemap_fdatawrite(file->f_mapping);
183
177 /* Flush writes to the server and return any errors */ 184 /* Flush writes to the server and return any errors */
178 return vfs_fsync(file, 0); 185 return vfs_fsync(file, 0);
179} 186}
@@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
417 424
418 if (status < 0) 425 if (status < 0)
419 return status; 426 return status;
427 NFS_I(mapping->host)->write_io += copied;
420 return copied; 428 return copied;
421} 429}
422 430
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ae65c16b3670..c817787fbdb4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent 64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
65 * superblock across an automount point of some nature. 65 * superblock across an automount point of some nature.
66 */ 66 */
67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, 67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
68 struct nfs_clone_mount *mntdata)
69{ 68{
70 struct nfs_fscache_key *key, *xkey; 69 struct nfs_fscache_key *key, *xkey;
71 struct nfs_server *nfss = NFS_SB(sb); 70 struct nfs_server *nfss = NFS_SB(sb);
72 struct rb_node **p, *parent; 71 struct rb_node **p, *parent;
73 int diff, ulen; 72 int diff;
74
75 if (uniq) {
76 ulen = strlen(uniq);
77 } else if (mntdata) {
78 struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
79 if (mnt_s->fscache_key) {
80 uniq = mnt_s->fscache_key->key.uniquifier;
81 ulen = mnt_s->fscache_key->key.uniq_len;
82 }
83 }
84 73
85 if (!uniq) { 74 if (!uniq) {
86 uniq = ""; 75 uniq = "";
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d0679f..c5b11b53ff33 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
73extern void nfs_fscache_get_client_cookie(struct nfs_client *); 73extern void nfs_fscache_get_client_cookie(struct nfs_client *);
74extern void nfs_fscache_release_client_cookie(struct nfs_client *); 74extern void nfs_fscache_release_client_cookie(struct nfs_client *);
75 75
76extern void nfs_fscache_get_super_cookie(struct super_block *, 76extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
77 const char *,
78 struct nfs_clone_mount *);
79extern void nfs_fscache_release_super_cookie(struct super_block *); 77extern void nfs_fscache_release_super_cookie(struct super_block *);
80 78
81extern void nfs_fscache_init_inode_cookie(struct inode *); 79extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
172static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} 170static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
173static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} 171static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
174 172
175static inline void nfs_fscache_get_super_cookie(
176 struct super_block *sb,
177 const char *uniq,
178 struct nfs_clone_mount *mntdata)
179{
180}
181static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} 173static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
182 174
183static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} 175static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4ca6f5c8038e..8abfb19bd3aa 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
150 goto out; 150 goto out;
151 151
152 /* Start by getting the root filehandle from the server */ 152 /* Start by getting the root filehandle from the server */
153 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 153 ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
154 if (ret < 0) { 154 if (ret < 0) {
155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); 155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
156 goto out; 156 goto out;
@@ -178,87 +178,4 @@ out:
178 return ret; 178 return ret;
179} 179}
180 180
181/*
182 * get an NFS4 root dentry from the root filehandle
183 */
184struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
185 const char *devname)
186{
187 struct nfs_server *server = NFS_SB(sb);
188 struct nfs_fattr *fattr = NULL;
189 struct dentry *ret;
190 struct inode *inode;
191 void *name = kstrdup(devname, GFP_KERNEL);
192 int error;
193
194 dprintk("--> nfs4_get_root()\n");
195
196 if (!name)
197 return ERR_PTR(-ENOMEM);
198
199 /* get the info about the server and filesystem */
200 error = nfs4_server_capabilities(server, mntfh);
201 if (error < 0) {
202 dprintk("nfs_get_root: getcaps error = %d\n",
203 -error);
204 kfree(name);
205 return ERR_PTR(error);
206 }
207
208 fattr = nfs_alloc_fattr();
209 if (fattr == NULL) {
210 kfree(name);
211 return ERR_PTR(-ENOMEM);
212 }
213
214 /* get the actual root for this mount */
215 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
216 if (error < 0) {
217 dprintk("nfs_get_root: getattr error = %d\n", -error);
218 ret = ERR_PTR(error);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
223 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
224 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
225
226 inode = nfs_fhget(sb, mntfh, fattr);
227 if (IS_ERR(inode)) {
228 dprintk("nfs_get_root: get root inode failed\n");
229 ret = ERR_CAST(inode);
230 goto out;
231 }
232
233 error = nfs_superblock_set_dummy_root(sb, inode);
234 if (error != 0) {
235 ret = ERR_PTR(error);
236 goto out;
237 }
238
239 /* root dentries normally start off anonymous and get spliced in later
240 * if the dentry tree reaches them; however if the dentry already
241 * exists, we'll pick it up at this point and use it as the root
242 */
243 ret = d_obtain_alias(inode);
244 if (IS_ERR(ret)) {
245 dprintk("nfs_get_root: get root dentry failed\n");
246 goto out;
247 }
248
249 security_d_instantiate(ret, inode);
250 spin_lock(&ret->d_lock);
251 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
252 ret->d_fsdata = name;
253 name = NULL;
254 }
255 spin_unlock(&ret->d_lock);
256out:
257 if (name)
258 kfree(name);
259 nfs_free_fattr(fattr);
260 dprintk("<-- nfs4_get_root()\n");
261 return ret;
262}
263
264#endif /* CONFIG_NFS_V4 */ 181#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ba3019f5934c..b5b86a05059c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
415static void nfs_idmap_unregister(struct nfs_client *clp, 415static void nfs_idmap_unregister(struct nfs_client *clp,
416 struct rpc_pipe *pipe) 416 struct rpc_pipe *pipe)
417{ 417{
418 struct net *net = clp->net; 418 struct net *net = clp->cl_net;
419 struct super_block *pipefs_sb; 419 struct super_block *pipefs_sb;
420 420
421 pipefs_sb = rpc_get_sb_net(net); 421 pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
429 struct idmap *idmap, 429 struct idmap *idmap,
430 struct rpc_pipe *pipe) 430 struct rpc_pipe *pipe)
431{ 431{
432 struct net *net = clp->net; 432 struct net *net = clp->cl_net;
433 struct super_block *pipefs_sb; 433 struct super_block *pipefs_sb;
434 int err = 0; 434 int err = 0;
435 435
@@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
530 struct nfs_net *nn = net_generic(net, nfs_net_id); 530 struct nfs_net *nn = net_generic(net, nfs_net_id);
531 struct dentry *cl_dentry; 531 struct dentry *cl_dentry;
532 struct nfs_client *clp; 532 struct nfs_client *clp;
533 int err;
533 534
535restart:
534 spin_lock(&nn->nfs_client_lock); 536 spin_lock(&nn->nfs_client_lock);
535 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 537 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
538 /* Wait for initialisation to finish */
539 if (clp->cl_cons_state == NFS_CS_INITING) {
540 atomic_inc(&clp->cl_count);
541 spin_unlock(&nn->nfs_client_lock);
542 err = nfs_wait_client_init_complete(clp);
543 nfs_put_client(clp);
544 if (err)
545 return NULL;
546 goto restart;
547 }
548 /* Skip nfs_clients that failed to initialise */
549 if (clp->cl_cons_state < 0)
550 continue;
551 smp_rmb();
536 if (clp->rpc_ops != &nfs_v4_clientops) 552 if (clp->rpc_ops != &nfs_v4_clientops)
537 continue; 553 continue;
538 cl_dentry = clp->cl_idmap->idmap_pipe->dentry; 554 cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
@@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
640 struct idmap_msg *im; 656 struct idmap_msg *im;
641 struct idmap *idmap = (struct idmap *)aux; 657 struct idmap *idmap = (struct idmap *)aux;
642 struct key *key = cons->key; 658 struct key *key = cons->key;
643 int ret; 659 int ret = -ENOMEM;
644 660
645 /* msg and im are freed in idmap_pipe_destroy_msg */ 661 /* msg and im are freed in idmap_pipe_destroy_msg */
646 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 662 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
647 if (IS_ERR(msg)) { 663 if (!msg)
648 ret = PTR_ERR(msg);
649 goto out0; 664 goto out0;
650 }
651 665
652 im = kmalloc(sizeof(*im), GFP_KERNEL); 666 im = kmalloc(sizeof(*im), GFP_KERNEL);
653 if (IS_ERR(im)) { 667 if (!im)
654 ret = PTR_ERR(im);
655 goto out1; 668 goto out1;
656 }
657 669
658 ret = nfs_idmap_prepare_message(key->description, im, msg); 670 ret = nfs_idmap_prepare_message(key->description, im, msg);
659 if (ret < 0) 671 if (ret < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6073139b402..2f6f78c4b42d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
285 inode->i_mode = fattr->mode; 285 inode->i_mode = fattr->mode;
286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
287 && nfs_server_capable(inode, NFS_CAP_MODE)) 287 && nfs_server_capable(inode, NFS_CAP_MODE))
288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
289 | NFS_INO_INVALID_ACCESS
290 | NFS_INO_INVALID_ACL;
291 /* Why so? Because we want revalidate for devices/FIFOs, and 289 /* Why so? Because we want revalidate for devices/FIFOs, and
292 * that's precisely what we have in nfs_file_inode_operations. 290 * that's precisely what we have in nfs_file_inode_operations.
293 */ 291 */
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
300 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 298 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
301 inode->i_fop = &nfs_dir_operations; 299 inode->i_fop = &nfs_dir_operations;
302 inode->i_data.a_ops = &nfs_dir_aops; 300 inode->i_data.a_ops = &nfs_dir_aops;
303 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
304 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
305 /* Deal with crossing mountpoints */ 301 /* Deal with crossing mountpoints */
306 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || 302 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
307 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { 303 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
327 inode->i_gid = -2; 323 inode->i_gid = -2;
328 inode->i_blocks = 0; 324 inode->i_blocks = 0;
329 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 325 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
326 nfsi->write_io = 0;
327 nfsi->read_io = 0;
330 328
331 nfsi->read_cache_jiffies = fattr->time_start; 329 nfsi->read_cache_jiffies = fattr->time_start;
332 nfsi->attr_gencount = fattr->gencount; 330 nfsi->attr_gencount = fattr->gencount;
@@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
337 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 335 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
338 inode->i_mtime = fattr->mtime; 336 inode->i_mtime = fattr->mtime;
339 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 337 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 338 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
341 | NFS_INO_INVALID_DATA;
342 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 339 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
343 inode->i_ctime = fattr->ctime; 340 inode->i_ctime = fattr->ctime;
344 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 341 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
345 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 342 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
346 | NFS_INO_INVALID_ACCESS
347 | NFS_INO_INVALID_ACL;
348 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 343 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
349 inode->i_version = fattr->change_attr; 344 inode->i_version = fattr->change_attr;
350 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 345 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
351 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 346 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
352 | NFS_INO_INVALID_DATA;
353 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 347 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
354 inode->i_size = nfs_size_to_loff_t(fattr->size); 348 inode->i_size = nfs_size_to_loff_t(fattr->size);
355 else 349 else
356 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 350 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
357 | NFS_INO_INVALID_DATA
358 | NFS_INO_REVAL_PAGECACHE; 351 | NFS_INO_REVAL_PAGECACHE;
359 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 352 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
360 set_nlink(inode, fattr->nlink); 353 set_nlink(inode, fattr->nlink);
@@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
363 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 356 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
364 inode->i_uid = fattr->uid; 357 inode->i_uid = fattr->uid;
365 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 358 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 359 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
367 | NFS_INO_INVALID_ACCESS
368 | NFS_INO_INVALID_ACL;
369 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 360 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
370 inode->i_gid = fattr->gid; 361 inode->i_gid = fattr->gid;
371 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 362 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 363 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
373 | NFS_INO_INVALID_ACCESS
374 | NFS_INO_INVALID_ACL;
375 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 364 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
376 inode->i_blocks = fattr->du.nfs2.blocks; 365 inode->i_blocks = fattr->du.nfs2.blocks;
377 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 366 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
654 nfs_init_lock_context(&ctx->lock_context); 643 nfs_init_lock_context(&ctx->lock_context);
655 ctx->lock_context.open_context = ctx; 644 ctx->lock_context.open_context = ctx;
656 INIT_LIST_HEAD(&ctx->list); 645 INIT_LIST_HEAD(&ctx->list);
646 ctx->mdsthreshold = NULL;
657 return ctx; 647 return ctx;
658} 648}
659 649
@@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
682 put_rpccred(ctx->cred); 672 put_rpccred(ctx->cred);
683 dput(ctx->dentry); 673 dput(ctx->dentry);
684 nfs_sb_deactive(sb); 674 nfs_sb_deactive(sb);
675 kfree(ctx->mdsthreshold);
685 kfree(ctx); 676 kfree(ctx);
686} 677}
687 678
@@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
870 return 0; 861 return 0;
871} 862}
872 863
864static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
865{
866 if (nfs_have_delegated_attributes(inode))
867 return false;
868 return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
869 || nfs_attribute_timeout(inode)
870 || NFS_STALE(inode);
871}
872
873/** 873/**
874 * nfs_revalidate_mapping - Revalidate the pagecache 874 * nfs_revalidate_mapping - Revalidate the pagecache
875 * @inode - pointer to host inode 875 * @inode - pointer to host inode
@@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
880 struct nfs_inode *nfsi = NFS_I(inode); 880 struct nfs_inode *nfsi = NFS_I(inode);
881 int ret = 0; 881 int ret = 0;
882 882
883 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 883 if (nfs_mapping_need_revalidate_inode(inode)) {
884 || nfs_attribute_cache_expired(inode)
885 || NFS_STALE(inode)) {
886 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 884 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
887 if (ret < 0) 885 if (ret < 0)
888 goto out; 886 goto out;
@@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
948 unsigned long invalid = 0; 946 unsigned long invalid = 0;
949 947
950 948
949 if (nfs_have_delegated_attributes(inode))
950 return 0;
951 /* Has the inode gone and changed behind our back? */ 951 /* Has the inode gone and changed behind our back? */
952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
953 return -EIO; 953 return -EIO;
@@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
960 960
961 /* Verify a few of the more important attributes */ 961 /* Verify a few of the more important attributes */
962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) 962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
963 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 963 invalid |= NFS_INO_INVALID_ATTR;
964 964
965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
966 cur_size = i_size_read(inode); 966 cur_size = i_size_read(inode);
@@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1279 nfs_display_fhandle_hash(NFS_FH(inode)), 1279 nfs_display_fhandle_hash(NFS_FH(inode)),
1280 atomic_read(&inode->i_count), fattr->valid); 1280 atomic_read(&inode->i_count), fattr->valid);
1281 1281
1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
1283 goto out_fileid; 1283 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1284 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1285 NFS_SERVER(inode)->nfs_client->cl_hostname,
1286 inode->i_sb->s_id, (long long)nfsi->fileid,
1287 (long long)fattr->fileid);
1288 goto out_err;
1289 }
1284 1290
1285 /* 1291 /*
1286 * Make sure the inode's type hasn't changed. 1292 * Make sure the inode's type hasn't changed.
1287 */ 1293 */
1288 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1294 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1289 goto out_changed; 1295 /*
1296 * Big trouble! The inode has become a different object.
1297 */
1298 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1299 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1300 goto out_err;
1301 }
1290 1302
1291 server = NFS_SERVER(inode); 1303 server = NFS_SERVER(inode);
1292 /* Update the fsid? */ 1304 /* Update the fsid? */
@@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1314 if (inode->i_version != fattr->change_attr) { 1326 if (inode->i_version != fattr->change_attr) {
1315 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1327 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1316 inode->i_sb->s_id, inode->i_ino); 1328 inode->i_sb->s_id, inode->i_ino);
1317 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1329 invalid |= NFS_INO_INVALID_ATTR
1330 | NFS_INO_INVALID_DATA
1331 | NFS_INO_INVALID_ACCESS
1332 | NFS_INO_INVALID_ACL
1333 | NFS_INO_REVAL_PAGECACHE;
1318 if (S_ISDIR(inode->i_mode)) 1334 if (S_ISDIR(inode->i_mode))
1319 nfs_force_lookup_revalidate(inode); 1335 nfs_force_lookup_revalidate(inode);
1320 inode->i_version = fattr->change_attr; 1336 inode->i_version = fattr->change_attr;
@@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1323 invalid |= save_cache_validity; 1339 invalid |= save_cache_validity;
1324 1340
1325 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1341 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1326 /* NFSv2/v3: Check if the mtime agrees */ 1342 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1327 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1328 dprintk("NFS: mtime change on server for file %s/%ld\n",
1329 inode->i_sb->s_id, inode->i_ino);
1330 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1331 if (S_ISDIR(inode->i_mode))
1332 nfs_force_lookup_revalidate(inode);
1333 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1334 }
1335 } else if (server->caps & NFS_CAP_MTIME) 1343 } else if (server->caps & NFS_CAP_MTIME)
1336 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1344 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1337 | NFS_INO_INVALID_DATA
1338 | NFS_INO_REVAL_PAGECACHE
1339 | NFS_INO_REVAL_FORCED); 1345 | NFS_INO_REVAL_FORCED);
1340 1346
1341 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1347 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1342 /* If ctime has changed we should definitely clear access+acl caches */ 1348 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1343 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1344 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1345 /* and probably clear data for a directory too as utimes can cause
1346 * havoc with our cache.
1347 */
1348 if (S_ISDIR(inode->i_mode)) {
1349 invalid |= NFS_INO_INVALID_DATA;
1350 nfs_force_lookup_revalidate(inode);
1351 }
1352 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1353 }
1354 } else if (server->caps & NFS_CAP_CTIME) 1349 } else if (server->caps & NFS_CAP_CTIME)
1355 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1350 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1356 | NFS_INO_INVALID_ACCESS
1357 | NFS_INO_INVALID_ACL
1358 | NFS_INO_REVAL_FORCED); 1351 | NFS_INO_REVAL_FORCED);
1359 1352
1360 /* Check if our cached file size is stale */ 1353 /* Check if our cached file size is stale */
@@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1466 nfsi->cache_validity |= invalid; 1459 nfsi->cache_validity |= invalid;
1467 1460
1468 return 0; 1461 return 0;
1469 out_changed:
1470 /*
1471 * Big trouble! The inode has become a different object.
1472 */
1473 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1474 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1475 out_err: 1462 out_err:
1476 /* 1463 /*
1477 * No need to worry about unhashing the dentry, as the 1464 * No need to worry about unhashing the dentry, as the
@@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1480 */ 1467 */
1481 nfs_invalidate_inode(inode); 1468 nfs_invalidate_inode(inode);
1482 return -ESTALE; 1469 return -ESTALE;
1483
1484 out_fileid:
1485 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1486 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1487 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1488 (long long)nfsi->fileid, (long long)fattr->fileid);
1489 goto out_err;
1490} 1470}
1491 1471
1492 1472
@@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1547 nfsi->delegation_state = 0; 1527 nfsi->delegation_state = 0;
1548 init_rwsem(&nfsi->rwsem); 1528 init_rwsem(&nfsi->rwsem);
1549 nfsi->layout = NULL; 1529 nfsi->layout = NULL;
1550 atomic_set(&nfsi->commits_outstanding, 0); 1530 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1551#endif 1531#endif
1552} 1532}
1553 1533
@@ -1559,9 +1539,9 @@ static void init_once(void *foo)
1559 INIT_LIST_HEAD(&nfsi->open_files); 1539 INIT_LIST_HEAD(&nfsi->open_files);
1560 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1540 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1561 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1541 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1562 INIT_LIST_HEAD(&nfsi->commit_list); 1542 INIT_LIST_HEAD(&nfsi->commit_info.list);
1563 nfsi->npages = 0; 1543 nfsi->npages = 0;
1564 nfsi->ncommit = 0; 1544 nfsi->commit_info.ncommit = 0;
1565 atomic_set(&nfsi->silly_count, 1); 1545 atomic_set(&nfsi->silly_count, 1);
1566 INIT_HLIST_HEAD(&nfsi->silly_list); 1546 INIT_HLIST_HEAD(&nfsi->silly_list);
1567 init_waitqueue_head(&nfsi->waitqueue); 1547 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..1848a7275592 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
103 unsigned int version; 103 unsigned int version;
104 unsigned int minorversion; 104 unsigned int minorversion;
105 char *fscache_uniq; 105 char *fscache_uniq;
106 bool need_mount;
106 107
107 struct { 108 struct {
108 struct sockaddr_storage address; 109 struct sockaddr_storage address;
@@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
167 struct nfs_fh *, 168 struct nfs_fh *,
168 struct nfs_fattr *, 169 struct nfs_fattr *,
169 rpc_authflavor_t); 170 rpc_authflavor_t);
171extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
170extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 172extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
171extern int nfs4_check_client_ready(struct nfs_client *clp);
172extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 173extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
173 const struct sockaddr *ds_addr, 174 const struct sockaddr *ds_addr,
174 int ds_addrlen, int ds_proto); 175 int ds_addrlen, int ds_proto,
176 unsigned int ds_timeo,
177 unsigned int ds_retrans);
175#ifdef CONFIG_PROC_FS 178#ifdef CONFIG_PROC_FS
176extern int __init nfs_fs_proc_init(void); 179extern int __init nfs_fs_proc_init(void);
177extern void nfs_fs_proc_exit(void); 180extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
185} 188}
186#endif 189#endif
187 190
188/* nfs4namespace.c */
189#ifdef CONFIG_NFS_V4
190extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
191#else
192static inline
193struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
194{
195 return ERR_PTR(-ENOENT);
196}
197#endif
198
199/* callback_xdr.c */ 191/* callback_xdr.c */
200extern struct svc_version nfs4_callback_version1; 192extern struct svc_version nfs4_callback_version1;
201extern struct svc_version nfs4_callback_version4; 193extern struct svc_version nfs4_callback_version4;
202 194
195struct nfs_pageio_descriptor;
203/* pagelist.c */ 196/* pagelist.c */
204extern int __init nfs_init_nfspagecache(void); 197extern int __init nfs_init_nfspagecache(void);
205extern void nfs_destroy_nfspagecache(void); 198extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
210 203
211extern int __init nfs_init_directcache(void); 204extern int __init nfs_init_directcache(void);
212extern void nfs_destroy_directcache(void); 205extern void nfs_destroy_directcache(void);
206extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
207extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
208 struct nfs_pgio_header *hdr,
209 void (*release)(struct nfs_pgio_header *hdr));
210void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
213 211
214/* nfs2xdr.c */ 212/* nfs2xdr.c */
215extern int nfs_stat_to_errno(enum nfs_stat);
216extern struct rpc_procinfo nfs_procedures[]; 213extern struct rpc_procinfo nfs_procedures[];
217extern int nfs2_decode_dirent(struct xdr_stream *, 214extern int nfs2_decode_dirent(struct xdr_stream *,
218 struct nfs_entry *, int); 215 struct nfs_entry *, int);
@@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;
237extern struct rpc_procinfo nfs4_procedures[]; 234extern struct rpc_procinfo nfs4_procedures[];
238#endif 235#endif
239 236
240extern int nfs4_init_ds_session(struct nfs_client *clp); 237extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
241 238
242/* proc.c */ 239/* proc.c */
243void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 240void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
244extern int nfs_init_client(struct nfs_client *clp, 241extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
245 const struct rpc_timeout *timeparms, 242 const struct rpc_timeout *timeparms,
246 const char *ip_addr, rpc_authflavor_t authflavour, 243 const char *ip_addr, rpc_authflavor_t authflavour);
247 int noresvport);
248 244
249/* dir.c */ 245/* dir.c */
250extern int nfs_access_cache_shrinker(struct shrinker *shrink, 246extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
280extern char *nfs_path(char **p, struct dentry *dentry, 276extern char *nfs_path(char **p, struct dentry *dentry,
281 char *buffer, ssize_t buflen); 277 char *buffer, ssize_t buflen);
282extern struct vfsmount *nfs_d_automount(struct path *path); 278extern struct vfsmount *nfs_d_automount(struct path *path);
283#ifdef CONFIG_NFS_V4 279struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
284rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 280 struct nfs_fh *, struct nfs_fattr *);
285#endif 281struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
282 struct nfs_fattr *, rpc_authflavor_t);
286 283
287/* getroot.c */ 284/* getroot.c */
288extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 285extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
294extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 291extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
295#endif 292#endif
296 293
297struct nfs_pageio_descriptor; 294struct nfs_pgio_completion_ops;
298/* read.c */ 295/* read.c */
299extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296extern struct nfs_read_header *nfs_readhdr_alloc(void);
300 const struct rpc_call_ops *call_ops); 297extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
298extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
299 struct inode *inode,
300 const struct nfs_pgio_completion_ops *compl_ops);
301extern int nfs_initiate_read(struct rpc_clnt *clnt,
302 struct nfs_read_data *data,
303 const struct rpc_call_ops *call_ops, int flags);
301extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 304extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
302extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 305extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
303 struct list_head *head); 306 struct nfs_pgio_header *hdr);
304
305extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 307extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
306 struct inode *inode); 308 struct inode *inode,
309 const struct nfs_pgio_completion_ops *compl_ops);
307extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 310extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
308extern void nfs_readdata_release(struct nfs_read_data *rdata); 311extern void nfs_readdata_release(struct nfs_read_data *rdata);
309 312
310/* write.c */ 313/* write.c */
314extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
315 struct inode *inode, int ioflags,
316 const struct nfs_pgio_completion_ops *compl_ops);
317extern struct nfs_write_header *nfs_writehdr_alloc(void);
318extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
311extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 319extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
312 struct list_head *head); 320 struct nfs_pgio_header *hdr);
313extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 321extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
314 struct inode *inode, int ioflags); 322 struct inode *inode, int ioflags,
323 const struct nfs_pgio_completion_ops *compl_ops);
315extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 324extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
316extern void nfs_writedata_release(struct nfs_write_data *wdata); 325extern void nfs_writedata_release(struct nfs_write_data *wdata);
317extern void nfs_commit_free(struct nfs_write_data *p); 326extern void nfs_commit_free(struct nfs_commit_data *p);
318extern int nfs_initiate_write(struct nfs_write_data *data, 327extern int nfs_initiate_write(struct rpc_clnt *clnt,
319 struct rpc_clnt *clnt, 328 struct nfs_write_data *data,
320 const struct rpc_call_ops *call_ops, 329 const struct rpc_call_ops *call_ops,
321 int how); 330 int how, int flags);
322extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 331extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
323extern int nfs_initiate_commit(struct nfs_write_data *data, 332extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
324 struct rpc_clnt *clnt, 333extern int nfs_initiate_commit(struct rpc_clnt *clnt,
334 struct nfs_commit_data *data,
325 const struct rpc_call_ops *call_ops, 335 const struct rpc_call_ops *call_ops,
326 int how); 336 int how, int flags);
327extern void nfs_init_commit(struct nfs_write_data *data, 337extern void nfs_init_commit(struct nfs_commit_data *data,
328 struct list_head *head, 338 struct list_head *head,
329 struct pnfs_layout_segment *lseg); 339 struct pnfs_layout_segment *lseg,
340 struct nfs_commit_info *cinfo);
341int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
342 struct nfs_commit_info *cinfo, int max);
343int nfs_scan_commit(struct inode *inode, struct list_head *dst,
344 struct nfs_commit_info *cinfo);
345void nfs_mark_request_commit(struct nfs_page *req,
346 struct pnfs_layout_segment *lseg,
347 struct nfs_commit_info *cinfo);
348int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
349 int how, struct nfs_commit_info *cinfo);
330void nfs_retry_commit(struct list_head *page_list, 350void nfs_retry_commit(struct list_head *page_list,
331 struct pnfs_layout_segment *lseg); 351 struct pnfs_layout_segment *lseg,
332void nfs_commit_clear_lock(struct nfs_inode *nfsi); 352 struct nfs_commit_info *cinfo);
333void nfs_commitdata_release(void *data); 353void nfs_commitdata_release(struct nfs_commit_data *data);
334void nfs_commit_release_pages(struct nfs_write_data *data); 354void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); 355 struct nfs_commit_info *cinfo);
336void nfs_request_remove_commit_list(struct nfs_page *req); 356void nfs_request_remove_commit_list(struct nfs_page *req,
357 struct nfs_commit_info *cinfo);
358void nfs_init_cinfo(struct nfs_commit_info *cinfo,
359 struct inode *inode,
360 struct nfs_direct_req *dreq);
337 361
338#ifdef CONFIG_MIGRATION 362#ifdef CONFIG_MIGRATION
339extern int nfs_migrate_page(struct address_space *, 363extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,
342#define nfs_migrate_page NULL 366#define nfs_migrate_page NULL
343#endif 367#endif
344 368
369/* direct.c */
370void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
371 struct nfs_direct_req *dreq);
372
345/* nfs4proc.c */ 373/* nfs4proc.c */
346extern void __nfs4_read_done_cb(struct nfs_read_data *); 374extern void __nfs4_read_done_cb(struct nfs_read_data *);
347extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); 375extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
348extern int nfs4_init_client(struct nfs_client *clp,
349 const struct rpc_timeout *timeparms, 376 const struct rpc_timeout *timeparms,
350 const char *ip_addr, 377 const char *ip_addr,
351 rpc_authflavor_t authflavour, 378 rpc_authflavor_t authflavour);
352 int noresvport);
353extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
354extern int _nfs4_call_sync(struct rpc_clnt *clnt, 379extern int _nfs4_call_sync(struct rpc_clnt *clnt,
355 struct nfs_server *server, 380 struct nfs_server *server,
356 struct rpc_message *msg, 381 struct rpc_message *msg,
@@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
466 PAGE_SIZE - 1) >> PAGE_SHIFT; 491 PAGE_SIZE - 1) >> PAGE_SHIFT;
467} 492}
468 493
494/*
495 * Convert a struct timespec into a 64-bit change attribute
496 *
497 * This does approximately the same thing as timespec_to_ns(),
498 * but for calculation efficiency, we multiply the seconds by
499 * 1024*1024*1024.
500 */
501static inline
502u64 nfs_timespec_to_change_attr(const struct timespec *ts)
503{
504 return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
505}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..08b9c93675da 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
27int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
28 28
29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
30 struct nfs_fh *fh,
31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
33
34/* 29/*
35 * nfs_path - reconstruct the path given an arbitrary dentry 30 * nfs_path - reconstruct the path given an arbitrary dentry
36 * @base - used to return pointer to the end of devname part of path 31 * @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
118 return ERR_PTR(-ENAMETOOLONG); 113 return ERR_PTR(-ENAMETOOLONG);
119} 114}
120 115
121#ifdef CONFIG_NFS_V4
122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
152 struct qstr *name,
153 struct nfs_fh *fh,
154 struct nfs_fattr *fattr)
155{
156 int err;
157
158 if (NFS_PROTO(dir)->version == 4)
159 return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
160
161 err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
162 if (err)
163 return ERR_PTR(err);
164 return rpc_clone_client(NFS_SERVER(dir)->client);
165}
166#else /* CONFIG_NFS_V4 */
167static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
168 struct qstr *name,
169 struct nfs_fh *fh,
170 struct nfs_fattr *fattr)
171{
172 int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
173 if (err)
174 return ERR_PTR(err);
175 return rpc_clone_client(NFS_SERVER(dir)->client);
176}
177#endif /* CONFIG_NFS_V4 */
178
179/* 116/*
180 * nfs_d_automount - Handle crossing a mountpoint on the server 117 * nfs_d_automount - Handle crossing a mountpoint on the server
181 * @path - The mountpoint 118 * @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
191struct vfsmount *nfs_d_automount(struct path *path) 128struct vfsmount *nfs_d_automount(struct path *path)
192{ 129{
193 struct vfsmount *mnt; 130 struct vfsmount *mnt;
194 struct dentry *parent; 131 struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
195 struct nfs_fh *fh = NULL; 132 struct nfs_fh *fh = NULL;
196 struct nfs_fattr *fattr = NULL; 133 struct nfs_fattr *fattr = NULL;
197 struct rpc_clnt *client;
198 134
199 dprintk("--> nfs_d_automount()\n"); 135 dprintk("--> nfs_d_automount()\n");
200 136
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
210 146
211 dprintk("%s: enter\n", __func__); 147 dprintk("%s: enter\n", __func__);
212 148
213 /* Look it up again to get its attributes */ 149 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
214 parent = dget_parent(path->dentry);
215 client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
216 dput(parent);
217 if (IS_ERR(client)) {
218 mnt = ERR_CAST(client);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
223 mnt = nfs_do_refmount(client, path->dentry);
224 else
225 mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
226 rpc_shutdown_client(client);
227
228 if (IS_ERR(mnt)) 150 if (IS_ERR(mnt))
229 goto out; 151 goto out;
230 152
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
297 * @authflavor - security flavor to use when performing the mount 219 * @authflavor - security flavor to use when performing the mount
298 * 220 *
299 */ 221 */
300static struct vfsmount *nfs_do_submount(struct dentry *dentry, 222struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
301 struct nfs_fh *fh, 223 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
302 struct nfs_fattr *fattr,
303 rpc_authflavor_t authflavor)
304{ 224{
305 struct nfs_clone_mount mountdata = { 225 struct nfs_clone_mount mountdata = {
306 .sb = dentry->d_sb, 226 .sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
333 dprintk("<-- nfs_do_submount() = %p\n", mnt); 253 dprintk("<-- nfs_do_submount() = %p\n", mnt);
334 return mnt; 254 return mnt;
335} 255}
256
257struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
258 struct nfs_fh *fh, struct nfs_fattr *fattr)
259{
260 int err;
261 struct dentry *parent = dget_parent(dentry);
262
263 /* Look it up again to get its attributes */
264 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
265 dput(parent);
266 if (err != 0)
267 return ERR_PTR(err);
268
269 return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
270}
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index aa14ec303e94..8a6394edb8b0 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -1,3 +1,7 @@
1/*
2 * NFS-private data for each "struct net". Accessed with net_generic().
3 */
4
1#ifndef __NFS_NETNS_H__ 5#ifndef __NFS_NETNS_H__
2#define __NFS_NETNS_H__ 6#define __NFS_NETNS_H__
3 7
@@ -20,6 +24,7 @@ struct nfs_net {
20 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ 24 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
21#endif 25#endif
22 spinlock_t nfs_client_lock; 26 spinlock_t nfs_client_lock;
27 struct timespec boot_time;
23}; 28};
24 29
25extern int nfs_net_id; 30extern int nfs_net_id;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000fabbd..baf759bccd05 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64static int nfs_stat_to_errno(enum nfs_stat);
64 65
65/* 66/*
66 * While encoding arguments, set up the reply buffer in advance to 67 * While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 p = xdr_decode_time(p, &fattr->atime); 314 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime); 315 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime); 316 xdr_decode_time(p, &fattr->ctime);
317 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
318
316 return 0; 319 return 0;
317out_overflow: 320out_overflow:
318 print_overflow_msg(__func__, xdr); 321 print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
1109 * Returns a local errno value, or -EIO if the NFS status code is 1112 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3. 1113 * not recognized. This function is used jointly by NFSv2 and NFSv3.
1111 */ 1114 */
1112int nfs_stat_to_errno(enum nfs_stat status) 1115static int nfs_stat_to_errno(enum nfs_stat status)
1113{ 1116{
1114 int i; 1117 int i;
1115 1118
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 75c68299358e..2292a0fd2bff 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
142} 142}
143 143
144static int 144static int
145nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 145nfs3_proc_lookup(struct inode *dir, struct qstr *name,
146 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 146 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
147{ 147{
148 struct nfs3_diropargs arg = { 148 struct nfs3_diropargs arg = {
@@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
810 810
811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
812{ 812{
813 if (nfs3_async_handle_jukebox(task, data->inode)) 813 struct inode *inode = data->header->inode;
814
815 if (nfs3_async_handle_jukebox(task, inode))
814 return -EAGAIN; 816 return -EAGAIN;
815 817
816 nfs_invalidate_atime(data->inode); 818 nfs_invalidate_atime(inode);
817 nfs_refresh_inode(data->inode, &data->fattr); 819 nfs_refresh_inode(inode, &data->fattr);
818 return 0; 820 return 0;
819} 821}
820 822
@@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
830 832
831static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 833static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
832{ 834{
833 if (nfs3_async_handle_jukebox(task, data->inode)) 835 struct inode *inode = data->header->inode;
836
837 if (nfs3_async_handle_jukebox(task, inode))
834 return -EAGAIN; 838 return -EAGAIN;
835 if (task->tk_status >= 0) 839 if (task->tk_status >= 0)
836 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 840 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
837 return 0; 841 return 0;
838} 842}
839 843
@@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
847 rpc_call_start(task); 851 rpc_call_start(task);
848} 852}
849 853
850static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 854static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
855{
856 rpc_call_start(task);
857}
858
859static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
851{ 860{
852 if (nfs3_async_handle_jukebox(task, data->inode)) 861 if (nfs3_async_handle_jukebox(task, data->inode))
853 return -EAGAIN; 862 return -EAGAIN;
@@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
855 return 0; 864 return 0;
856} 865}
857 866
858static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 867static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
859{ 868{
860 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 869 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
861} 870}
@@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
875 .file_inode_ops = &nfs3_file_inode_operations, 884 .file_inode_ops = &nfs3_file_inode_operations,
876 .file_ops = &nfs_file_operations, 885 .file_ops = &nfs_file_operations,
877 .getroot = nfs3_proc_get_root, 886 .getroot = nfs3_proc_get_root,
887 .submount = nfs_submount,
878 .getattr = nfs3_proc_getattr, 888 .getattr = nfs3_proc_getattr,
879 .setattr = nfs3_proc_setattr, 889 .setattr = nfs3_proc_setattr,
880 .lookup = nfs3_proc_lookup, 890 .lookup = nfs3_proc_lookup,
@@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
906 .write_rpc_prepare = nfs3_proc_write_rpc_prepare, 916 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
907 .write_done = nfs3_write_done, 917 .write_done = nfs3_write_done,
908 .commit_setup = nfs3_proc_commit_setup, 918 .commit_setup = nfs3_proc_commit_setup,
919 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
909 .commit_done = nfs3_commit_done, 920 .commit_done = nfs3_commit_done,
910 .lock = nfs3_proc_lock, 921 .lock = nfs3_proc_lock,
911 .clear_acl_cache = nfs3_forget_cached_acls, 922 .clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a3ce55..902de489ec9b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) 86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) 87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
88 88
89static int nfs3_stat_to_errno(enum nfs_stat);
90
89/* 91/*
90 * Map file type to S_IFMT bits 92 * Map file type to S_IFMT bits
91 */ 93 */
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 p = xdr_decode_nfstime3(p, &fattr->atime); 677 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime); 678 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime); 679 xdr_decode_nfstime3(p, &fattr->ctime);
680 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
678 681
679 fattr->valid |= NFS_ATTR_FATTR_V3; 682 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0; 683 return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
725 goto out_overflow; 728 goto out_overflow;
726 729
727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 730 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
731 | NFS_ATTR_FATTR_PRECHANGE
728 | NFS_ATTR_FATTR_PREMTIME 732 | NFS_ATTR_FATTR_PREMTIME
729 | NFS_ATTR_FATTR_PRECTIME; 733 | NFS_ATTR_FATTR_PRECTIME;
730 734
731 p = xdr_decode_size3(p, &fattr->pre_size); 735 p = xdr_decode_size3(p, &fattr->pre_size);
732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime); 736 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
733 xdr_decode_nfstime3(p, &fattr->pre_ctime); 737 xdr_decode_nfstime3(p, &fattr->pre_ctime);
738 fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
734 739
735 return 0; 740 return 0;
736out_overflow: 741out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
1287 * }; 1292 * };
1288 */ 1293 */
1289static void encode_commit3args(struct xdr_stream *xdr, 1294static void encode_commit3args(struct xdr_stream *xdr,
1290 const struct nfs_writeargs *args) 1295 const struct nfs_commitargs *args)
1291{ 1296{
1292 __be32 *p; 1297 __be32 *p;
1293 1298
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
1300 1305
1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, 1306static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
1302 struct xdr_stream *xdr, 1307 struct xdr_stream *xdr,
1303 const struct nfs_writeargs *args) 1308 const struct nfs_commitargs *args)
1304{ 1309{
1305 encode_commit3args(xdr, args); 1310 encode_commit3args(xdr, args);
1306} 1311}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
1385out: 1390out:
1386 return error; 1391 return error;
1387out_default: 1392out_default:
1388 return nfs_stat_to_errno(status); 1393 return nfs3_stat_to_errno(status);
1389} 1394}
1390 1395
1391/* 1396/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
1424out: 1429out:
1425 return error; 1430 return error;
1426out_status: 1431out_status:
1427 return nfs_stat_to_errno(status); 1432 return nfs3_stat_to_errno(status);
1428} 1433}
1429 1434
1430/* 1435/*
@@ -1472,7 +1477,7 @@ out_default:
1472 error = decode_post_op_attr(xdr, result->dir_attr); 1477 error = decode_post_op_attr(xdr, result->dir_attr);
1473 if (unlikely(error)) 1478 if (unlikely(error))
1474 goto out; 1479 goto out;
1475 return nfs_stat_to_errno(status); 1480 return nfs3_stat_to_errno(status);
1476} 1481}
1477 1482
1478/* 1483/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
1513out: 1518out:
1514 return error; 1519 return error;
1515out_default: 1520out_default:
1516 return nfs_stat_to_errno(status); 1521 return nfs3_stat_to_errno(status);
1517} 1522}
1518 1523
1519/* 1524/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
1554out: 1559out:
1555 return error; 1560 return error;
1556out_default: 1561out_default:
1557 return nfs_stat_to_errno(status); 1562 return nfs3_stat_to_errno(status);
1558} 1563}
1559 1564
1560/* 1565/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1636out: 1641out:
1637 return error; 1642 return error;
1638out_status: 1643out_status:
1639 return nfs_stat_to_errno(status); 1644 return nfs3_stat_to_errno(status);
1640} 1645}
1641 1646
1642/* 1647/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1706out: 1711out:
1707 return error; 1712 return error;
1708out_status: 1713out_status:
1709 return nfs_stat_to_errno(status); 1714 return nfs3_stat_to_errno(status);
1710} 1715}
1711 1716
1712/* 1717/*
@@ -1770,7 +1775,7 @@ out_default:
1770 error = decode_wcc_data(xdr, result->dir_attr); 1775 error = decode_wcc_data(xdr, result->dir_attr);
1771 if (unlikely(error)) 1776 if (unlikely(error))
1772 goto out; 1777 goto out;
1773 return nfs_stat_to_errno(status); 1778 return nfs3_stat_to_errno(status);
1774} 1779}
1775 1780
1776/* 1781/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1809out: 1814out:
1810 return error; 1815 return error;
1811out_status: 1816out_status:
1812 return nfs_stat_to_errno(status); 1817 return nfs3_stat_to_errno(status);
1813} 1818}
1814 1819
1815/* 1820/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1853out: 1858out:
1854 return error; 1859 return error;
1855out_status: 1860out_status:
1856 return nfs_stat_to_errno(status); 1861 return nfs3_stat_to_errno(status);
1857} 1862}
1858 1863
1859/* 1864/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1896out: 1901out:
1897 return error; 1902 return error;
1898out_status: 1903out_status:
1899 return nfs_stat_to_errno(status); 1904 return nfs3_stat_to_errno(status);
1900} 1905}
1901 1906
1902/** 1907/**
@@ -2088,7 +2093,7 @@ out_default:
2088 error = decode_post_op_attr(xdr, result->dir_attr); 2093 error = decode_post_op_attr(xdr, result->dir_attr);
2089 if (unlikely(error)) 2094 if (unlikely(error))
2090 goto out; 2095 goto out;
2091 return nfs_stat_to_errno(status); 2096 return nfs3_stat_to_errno(status);
2092} 2097}
2093 2098
2094/* 2099/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2156out: 2161out:
2157 return error; 2162 return error;
2158out_status: 2163out_status:
2159 return nfs_stat_to_errno(status); 2164 return nfs3_stat_to_errno(status);
2160} 2165}
2161 2166
2162/* 2167/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2232out: 2237out:
2233 return error; 2238 return error;
2234out_status: 2239out_status:
2235 return nfs_stat_to_errno(status); 2240 return nfs3_stat_to_errno(status);
2236} 2241}
2237 2242
2238/* 2243/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2295out: 2300out:
2296 return error; 2301 return error;
2297out_status: 2302out_status:
2298 return nfs_stat_to_errno(status); 2303 return nfs3_stat_to_errno(status);
2299} 2304}
2300 2305
2301/* 2306/*
@@ -2319,7 +2324,7 @@ out_status:
2319 */ 2324 */
2320static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, 2325static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2321 struct xdr_stream *xdr, 2326 struct xdr_stream *xdr,
2322 struct nfs_writeres *result) 2327 struct nfs_commitres *result)
2323{ 2328{
2324 enum nfs_stat status; 2329 enum nfs_stat status;
2325 int error; 2330 int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2336out: 2341out:
2337 return error; 2342 return error;
2338out_status: 2343out_status:
2339 return nfs_stat_to_errno(status); 2344 return nfs3_stat_to_errno(status);
2340} 2345}
2341 2346
2342#ifdef CONFIG_NFS_V3_ACL 2347#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
2401out: 2406out:
2402 return error; 2407 return error;
2403out_default: 2408out_default:
2404 return nfs_stat_to_errno(status); 2409 return nfs3_stat_to_errno(status);
2405} 2410}
2406 2411
2407static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, 2412static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
2420out: 2425out:
2421 return error; 2426 return error;
2422out_default: 2427out_default:
2423 return nfs_stat_to_errno(status); 2428 return nfs3_stat_to_errno(status);
2424} 2429}
2425 2430
2426#endif /* CONFIG_NFS_V3_ACL */ 2431#endif /* CONFIG_NFS_V3_ACL */
2427 2432
2433
2434/*
2435 * We need to translate between nfs status return values and
2436 * the local errno values which may not be the same.
2437 */
2438static const struct {
2439 int stat;
2440 int errno;
2441} nfs_errtbl[] = {
2442 { NFS_OK, 0 },
2443 { NFSERR_PERM, -EPERM },
2444 { NFSERR_NOENT, -ENOENT },
2445 { NFSERR_IO, -errno_NFSERR_IO},
2446 { NFSERR_NXIO, -ENXIO },
2447/* { NFSERR_EAGAIN, -EAGAIN }, */
2448 { NFSERR_ACCES, -EACCES },
2449 { NFSERR_EXIST, -EEXIST },
2450 { NFSERR_XDEV, -EXDEV },
2451 { NFSERR_NODEV, -ENODEV },
2452 { NFSERR_NOTDIR, -ENOTDIR },
2453 { NFSERR_ISDIR, -EISDIR },
2454 { NFSERR_INVAL, -EINVAL },
2455 { NFSERR_FBIG, -EFBIG },
2456 { NFSERR_NOSPC, -ENOSPC },
2457 { NFSERR_ROFS, -EROFS },
2458 { NFSERR_MLINK, -EMLINK },
2459 { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
2460 { NFSERR_NOTEMPTY, -ENOTEMPTY },
2461 { NFSERR_DQUOT, -EDQUOT },
2462 { NFSERR_STALE, -ESTALE },
2463 { NFSERR_REMOTE, -EREMOTE },
2464#ifdef EWFLUSH
2465 { NFSERR_WFLUSH, -EWFLUSH },
2466#endif
2467 { NFSERR_BADHANDLE, -EBADHANDLE },
2468 { NFSERR_NOT_SYNC, -ENOTSYNC },
2469 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
2470 { NFSERR_NOTSUPP, -ENOTSUPP },
2471 { NFSERR_TOOSMALL, -ETOOSMALL },
2472 { NFSERR_SERVERFAULT, -EREMOTEIO },
2473 { NFSERR_BADTYPE, -EBADTYPE },
2474 { NFSERR_JUKEBOX, -EJUKEBOX },
2475 { -1, -EIO }
2476};
2477
2478/**
2479 * nfs3_stat_to_errno - convert an NFS status code to a local errno
2480 * @status: NFS status code to convert
2481 *
2482 * Returns a local errno value, or -EIO if the NFS status code is
2483 * not recognized. This function is used jointly by NFSv2 and NFSv3.
2484 */
2485static int nfs3_stat_to_errno(enum nfs_stat status)
2486{
2487 int i;
2488
2489 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
2490 if (nfs_errtbl[i].stat == (int)status)
2491 return nfs_errtbl[i].errno;
2492 }
2493 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
2494 return nfs_errtbl[i].errno;
2495}
2496
2497
2428#define PROC(proc, argtype, restype, timer) \ 2498#define PROC(proc, argtype, restype, timer) \
2429[NFS3PROC_##proc] = { \ 2499[NFS3PROC_##proc] = { \
2430 .p_proc = NFS3PROC_##proc, \ 2500 .p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021020b3..c6827f93ab57 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -24,6 +24,8 @@ enum nfs4_client_state {
24 NFS4CLNT_RECALL_SLOT, 24 NFS4CLNT_RECALL_SLOT,
25 NFS4CLNT_LEASE_CONFIRM, 25 NFS4CLNT_LEASE_CONFIRM,
26 NFS4CLNT_SERVER_SCOPE_MISMATCH, 26 NFS4CLNT_SERVER_SCOPE_MISMATCH,
27 NFS4CLNT_PURGE_STATE,
28 NFS4CLNT_BIND_CONN_TO_SESSION,
27}; 29};
28 30
29enum nfs4_session_state { 31enum nfs4_session_state {
@@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {
52 const struct nfs4_state_maintenance_ops *state_renewal_ops; 54 const struct nfs4_state_maintenance_ops *state_renewal_ops;
53}; 55};
54 56
55struct nfs_unique_id {
56 struct rb_node rb_node;
57 __u64 id;
58};
59
60#define NFS_SEQID_CONFIRMED 1 57#define NFS_SEQID_CONFIRMED 1
61struct nfs_seqid_counter { 58struct nfs_seqid_counter {
62 ktime_t create_time; 59 ktime_t create_time;
@@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;
206extern const struct inode_operations nfs4_dir_inode_operations; 203extern const struct inode_operations nfs4_dir_inode_operations;
207 204
208/* nfs4namespace.c */ 205/* nfs4namespace.c */
206rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
209struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 207struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
208struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
209 struct nfs_fh *, struct nfs_fattr *);
210 210
211/* nfs4proc.c */ 211/* nfs4proc.c */
212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
214extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
215extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
214extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 216extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
217extern int nfs4_destroy_clientid(struct nfs_client *clp);
215extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 218extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 219extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 220extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,
239 struct rpc_task *task); 242 struct rpc_task *task);
240extern void nfs4_destroy_session(struct nfs4_session *session); 243extern void nfs4_destroy_session(struct nfs4_session *session);
241extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 244extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
242extern int nfs4_proc_create_session(struct nfs_client *); 245extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
243extern int nfs4_proc_destroy_session(struct nfs4_session *); 246extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
244extern int nfs4_init_session(struct nfs_server *server); 247extern int nfs4_init_session(struct nfs_server *server);
245extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 248extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
246 struct nfs_fsinfo *fsinfo); 249 struct nfs_fsinfo *fsinfo);
@@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
310#if defined(CONFIG_NFS_V4_1) 313#if defined(CONFIG_NFS_V4_1)
311struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 314struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
312struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 315struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
313extern void nfs4_schedule_session_recovery(struct nfs4_session *); 316extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
314#else 317#else
315static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) 318static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
316{ 319{
317} 320}
318#endif /* CONFIG_NFS_V4_1 */ 321#endif /* CONFIG_NFS_V4_1 */
@@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
334extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 337extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
335extern void nfs41_handle_recall_slot(struct nfs_client *clp); 338extern void nfs41_handle_recall_slot(struct nfs_client *clp);
336extern void nfs41_handle_server_scope(struct nfs_client *, 339extern void nfs41_handle_server_scope(struct nfs_client *,
337 struct server_scope **); 340 struct nfs41_server_scope **);
338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 341extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 342extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
340extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, 343extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9ea8a31..e1340293872c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
82 BUG(); 82 BUG();
83} 83}
84 84
85static void filelayout_reset_write(struct nfs_write_data *data)
86{
87 struct nfs_pgio_header *hdr = data->header;
88 struct rpc_task *task = &data->task;
89
90 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
91 dprintk("%s Reset task %5u for i/o through MDS "
92 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
93 data->task.tk_pid,
94 hdr->inode->i_sb->s_id,
95 (long long)NFS_FILEID(hdr->inode),
96 data->args.count,
97 (unsigned long long)data->args.offset);
98
99 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
100 &hdr->pages,
101 hdr->completion_ops);
102 }
103}
104
105static void filelayout_reset_read(struct nfs_read_data *data)
106{
107 struct nfs_pgio_header *hdr = data->header;
108 struct rpc_task *task = &data->task;
109
110 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
111 dprintk("%s Reset task %5u for i/o through MDS "
112 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
113 data->task.tk_pid,
114 hdr->inode->i_sb->s_id,
115 (long long)NFS_FILEID(hdr->inode),
116 data->args.count,
117 (unsigned long long)data->args.offset);
118
119 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
120 &hdr->pages,
121 hdr->completion_ops);
122 }
123}
124
85static int filelayout_async_handle_error(struct rpc_task *task, 125static int filelayout_async_handle_error(struct rpc_task *task,
86 struct nfs4_state *state, 126 struct nfs4_state *state,
87 struct nfs_client *clp, 127 struct nfs_client *clp,
88 int *reset) 128 struct pnfs_layout_segment *lseg)
89{ 129{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode); 130 struct inode *inode = lseg->pls_layout->plh_inode;
131 struct nfs_server *mds_server = NFS_SERVER(inode);
132 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
91 struct nfs_client *mds_client = mds_server->nfs_client; 133 struct nfs_client *mds_client = mds_server->nfs_client;
134 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
92 135
93 if (task->tk_status >= 0) 136 if (task->tk_status >= 0)
94 return 0; 137 return 0;
95 *reset = 0;
96 138
97 switch (task->tk_status) { 139 switch (task->tk_status) {
98 /* MDS state errors */ 140 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED: 141 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED: 142 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID: 143 case -NFS4ERR_BAD_STATEID:
144 if (state == NULL)
145 break;
102 nfs_remove_bad_delegation(state->inode); 146 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE: 147 case -NFS4ERR_OPENMODE:
148 if (state == NULL)
149 break;
104 nfs4_schedule_stateid_recovery(mds_server, state); 150 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery; 151 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED: 152 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state); 153 if (state != NULL)
154 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client); 155 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery; 156 goto wait_on_recovery;
110 /* DS session errors */ 157 /* DS session errors */
@@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
118 dprintk("%s ERROR %d, Reset session. Exchangeid " 165 dprintk("%s ERROR %d, Reset session. Exchangeid "
119 "flags 0x%x\n", __func__, task->tk_status, 166 "flags 0x%x\n", __func__, task->tk_status,
120 clp->cl_exchange_flags); 167 clp->cl_exchange_flags);
121 nfs4_schedule_session_recovery(clp->cl_session); 168 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
122 break; 169 break;
123 case -NFS4ERR_DELAY: 170 case -NFS4ERR_DELAY:
124 case -NFS4ERR_GRACE: 171 case -NFS4ERR_GRACE:
@@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
127 break; 174 break;
128 case -NFS4ERR_RETRY_UNCACHED_REP: 175 case -NFS4ERR_RETRY_UNCACHED_REP:
129 break; 176 break;
177 /* Invalidate Layout errors */
178 case -NFS4ERR_PNFS_NO_LAYOUT:
179 case -ESTALE: /* mapped NFS4ERR_STALE */
180 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
181 case -EISDIR: /* mapped NFS4ERR_ISDIR */
182 case -NFS4ERR_FHEXPIRED:
183 case -NFS4ERR_WRONG_TYPE:
184 dprintk("%s Invalid layout error %d\n", __func__,
185 task->tk_status);
186 /*
187 * Destroy layout so new i/o will get a new layout.
188 * Layout will not be destroyed until all current lseg
189 * references are put. Mark layout as invalid to resend failed
190 * i/o and all i/o waiting on the slot table to the MDS until
191 * layout is destroyed and a new valid layout is obtained.
192 */
193 set_bit(NFS_LAYOUT_INVALID,
194 &NFS_I(inode)->layout->plh_flags);
195 pnfs_destroy_layout(NFS_I(inode));
196 rpc_wake_up(&tbl->slot_tbl_waitq);
197 goto reset;
198 /* RPC connection errors */
199 case -ECONNREFUSED:
200 case -EHOSTDOWN:
201 case -EHOSTUNREACH:
202 case -ENETUNREACH:
203 case -EIO:
204 case -ETIMEDOUT:
205 case -EPIPE:
206 dprintk("%s DS connection error %d\n", __func__,
207 task->tk_status);
208 if (!filelayout_test_devid_invalid(devid))
209 _pnfs_return_layout(inode);
210 filelayout_mark_devid_invalid(devid);
211 rpc_wake_up(&tbl->slot_tbl_waitq);
212 nfs4_ds_disconnect(clp);
213 /* fall through */
130 default: 214 default:
131 dprintk("%s DS error. Retry through MDS %d\n", __func__, 215reset:
216 dprintk("%s Retry through MDS. Error %d\n", __func__,
132 task->tk_status); 217 task->tk_status);
133 *reset = 1; 218 return -NFS4ERR_RESET_TO_MDS;
134 break;
135 } 219 }
136out: 220out:
137 task->tk_status = 0; 221 task->tk_status = 0;
@@ -148,18 +232,17 @@ wait_on_recovery:
148static int filelayout_read_done_cb(struct rpc_task *task, 232static int filelayout_read_done_cb(struct rpc_task *task,
149 struct nfs_read_data *data) 233 struct nfs_read_data *data)
150{ 234{
151 int reset = 0; 235 struct nfs_pgio_header *hdr = data->header;
236 int err;
152 237
153 dprintk("%s DS read\n", __func__); 238 err = filelayout_async_handle_error(task, data->args.context->state,
239 data->ds_clp, hdr->lseg);
154 240
155 if (filelayout_async_handle_error(task, data->args.context->state, 241 switch (err) {
156 data->ds_clp, &reset) == -EAGAIN) { 242 case -NFS4ERR_RESET_TO_MDS:
157 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 243 filelayout_reset_read(data);
158 __func__, data->ds_clp, data->ds_clp->cl_session); 244 return task->tk_status;
159 if (reset) { 245 case -EAGAIN:
160 pnfs_set_lo_fail(data->lseg);
161 nfs4_reset_read(task, data);
162 }
163 rpc_restart_call_prepare(task); 246 rpc_restart_call_prepare(task);
164 return -EAGAIN; 247 return -EAGAIN;
165 } 248 }
@@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
175static void 258static void
176filelayout_set_layoutcommit(struct nfs_write_data *wdata) 259filelayout_set_layoutcommit(struct nfs_write_data *wdata)
177{ 260{
178 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || 261 struct nfs_pgio_header *hdr = wdata->header;
262
263 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
179 wdata->res.verf->committed == NFS_FILE_SYNC) 264 wdata->res.verf->committed == NFS_FILE_SYNC)
180 return; 265 return;
181 266
182 pnfs_set_layoutcommit(wdata); 267 pnfs_set_layoutcommit(wdata);
183 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 268 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
184 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); 269 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
185} 270}
186 271
187/* 272/*
@@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
191 */ 276 */
192static void filelayout_read_prepare(struct rpc_task *task, void *data) 277static void filelayout_read_prepare(struct rpc_task *task, void *data)
193{ 278{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 279 struct nfs_read_data *rdata = data;
195 280
281 if (filelayout_reset_to_mds(rdata->header->lseg)) {
282 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
283 filelayout_reset_read(rdata);
284 rpc_exit(task, 0);
285 return;
286 }
196 rdata->read_done_cb = filelayout_read_done_cb; 287 rdata->read_done_cb = filelayout_read_done_cb;
197 288
198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 289 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
205 296
206static void filelayout_read_call_done(struct rpc_task *task, void *data) 297static void filelayout_read_call_done(struct rpc_task *task, void *data)
207{ 298{
208 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 299 struct nfs_read_data *rdata = data;
209 300
210 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 301 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
211 302
303 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
304 task->tk_status == 0)
305 return;
306
212 /* Note this may cause RPC to be resent */ 307 /* Note this may cause RPC to be resent */
213 rdata->mds_ops->rpc_call_done(task, data); 308 rdata->header->mds_ops->rpc_call_done(task, data);
214} 309}
215 310
216static void filelayout_read_count_stats(struct rpc_task *task, void *data) 311static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{ 312{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 313 struct nfs_read_data *rdata = data;
219 314
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); 315 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
221} 316}
222 317
223static void filelayout_read_release(void *data) 318static void filelayout_read_release(void *data)
224{ 319{
225 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 320 struct nfs_read_data *rdata = data;
226 321
227 put_lseg(rdata->lseg); 322 nfs_put_client(rdata->ds_clp);
228 rdata->mds_ops->rpc_release(data); 323 rdata->header->mds_ops->rpc_release(data);
229} 324}
230 325
231static int filelayout_write_done_cb(struct rpc_task *task, 326static int filelayout_write_done_cb(struct rpc_task *task,
232 struct nfs_write_data *data) 327 struct nfs_write_data *data)
233{ 328{
234 int reset = 0; 329 struct nfs_pgio_header *hdr = data->header;
235 330 int err;
236 if (filelayout_async_handle_error(task, data->args.context->state, 331
237 data->ds_clp, &reset) == -EAGAIN) { 332 err = filelayout_async_handle_error(task, data->args.context->state,
238 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 333 data->ds_clp, hdr->lseg);
239 __func__, data->ds_clp, data->ds_clp->cl_session); 334
240 if (reset) { 335 switch (err) {
241 pnfs_set_lo_fail(data->lseg); 336 case -NFS4ERR_RESET_TO_MDS:
242 nfs4_reset_write(task, data); 337 filelayout_reset_write(data);
243 } 338 return task->tk_status;
339 case -EAGAIN:
244 rpc_restart_call_prepare(task); 340 rpc_restart_call_prepare(task);
245 return -EAGAIN; 341 return -EAGAIN;
246 } 342 }
@@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
250} 346}
251 347
252/* Fake up some data that will cause nfs_commit_release to retry the writes. */ 348/* Fake up some data that will cause nfs_commit_release to retry the writes. */
253static void prepare_to_resend_writes(struct nfs_write_data *data) 349static void prepare_to_resend_writes(struct nfs_commit_data *data)
254{ 350{
255 struct nfs_page *first = nfs_list_entry(data->pages.next); 351 struct nfs_page *first = nfs_list_entry(data->pages.next);
256 352
@@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
261} 357}
262 358
263static int filelayout_commit_done_cb(struct rpc_task *task, 359static int filelayout_commit_done_cb(struct rpc_task *task,
264 struct nfs_write_data *data) 360 struct nfs_commit_data *data)
265{ 361{
266 int reset = 0; 362 int err;
267 363
268 if (filelayout_async_handle_error(task, data->args.context->state, 364 err = filelayout_async_handle_error(task, NULL, data->ds_clp,
269 data->ds_clp, &reset) == -EAGAIN) { 365 data->lseg);
270 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 366
271 __func__, data->ds_clp, data->ds_clp->cl_session); 367 switch (err) {
272 if (reset) { 368 case -NFS4ERR_RESET_TO_MDS:
273 prepare_to_resend_writes(data); 369 prepare_to_resend_writes(data);
274 pnfs_set_lo_fail(data->lseg); 370 return -EAGAIN;
275 } else 371 case -EAGAIN:
276 rpc_restart_call_prepare(task); 372 rpc_restart_call_prepare(task);
277 return -EAGAIN; 373 return -EAGAIN;
278 } 374 }
279 375
@@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
282 378
283static void filelayout_write_prepare(struct rpc_task *task, void *data) 379static void filelayout_write_prepare(struct rpc_task *task, void *data)
284{ 380{
285 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 381 struct nfs_write_data *wdata = data;
286 382
383 if (filelayout_reset_to_mds(wdata->header->lseg)) {
384 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
385 filelayout_reset_write(wdata);
386 rpc_exit(task, 0);
387 return;
388 }
287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 389 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
288 &wdata->args.seq_args, &wdata->res.seq_res, 390 &wdata->args.seq_args, &wdata->res.seq_res,
289 task)) 391 task))
@@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
294 396
295static void filelayout_write_call_done(struct rpc_task *task, void *data) 397static void filelayout_write_call_done(struct rpc_task *task, void *data)
296{ 398{
297 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 399 struct nfs_write_data *wdata = data;
400
401 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
402 task->tk_status == 0)
403 return;
298 404
299 /* Note this may cause RPC to be resent */ 405 /* Note this may cause RPC to be resent */
300 wdata->mds_ops->rpc_call_done(task, data); 406 wdata->header->mds_ops->rpc_call_done(task, data);
301} 407}
302 408
303static void filelayout_write_count_stats(struct rpc_task *task, void *data) 409static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{ 410{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 411 struct nfs_write_data *wdata = data;
306 412
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); 413 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
308} 414}
309 415
310static void filelayout_write_release(void *data) 416static void filelayout_write_release(void *data)
311{ 417{
312 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 418 struct nfs_write_data *wdata = data;
419
420 nfs_put_client(wdata->ds_clp);
421 wdata->header->mds_ops->rpc_release(data);
422}
423
424static void filelayout_commit_prepare(struct rpc_task *task, void *data)
425{
426 struct nfs_commit_data *wdata = data;
313 427
314 put_lseg(wdata->lseg); 428 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
315 wdata->mds_ops->rpc_release(data); 429 &wdata->args.seq_args, &wdata->res.seq_res,
430 task))
431 return;
432
433 rpc_call_start(task);
434}
435
436static void filelayout_write_commit_done(struct rpc_task *task, void *data)
437{
438 struct nfs_commit_data *wdata = data;
439
440 /* Note this may cause RPC to be resent */
441 wdata->mds_ops->rpc_call_done(task, data);
442}
443
444static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
445{
446 struct nfs_commit_data *cdata = data;
447
448 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
316} 449}
317 450
318static void filelayout_commit_release(void *data) 451static void filelayout_commit_release(void *calldata)
319{ 452{
320 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 453 struct nfs_commit_data *data = calldata;
321 454
322 nfs_commit_release_pages(wdata); 455 data->completion_ops->completion(data);
323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 456 put_lseg(data->lseg);
324 nfs_commit_clear_lock(NFS_I(wdata->inode)); 457 nfs_put_client(data->ds_clp);
325 put_lseg(wdata->lseg); 458 nfs_commitdata_release(data);
326 nfs_commitdata_release(wdata);
327} 459}
328 460
329static const struct rpc_call_ops filelayout_read_call_ops = { 461static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
341}; 473};
342 474
343static const struct rpc_call_ops filelayout_commit_call_ops = { 475static const struct rpc_call_ops filelayout_commit_call_ops = {
344 .rpc_call_prepare = filelayout_write_prepare, 476 .rpc_call_prepare = filelayout_commit_prepare,
345 .rpc_call_done = filelayout_write_call_done, 477 .rpc_call_done = filelayout_write_commit_done,
346 .rpc_count_stats = filelayout_write_count_stats, 478 .rpc_count_stats = filelayout_commit_count_stats,
347 .rpc_release = filelayout_commit_release, 479 .rpc_release = filelayout_commit_release,
348}; 480};
349 481
350static enum pnfs_try_status 482static enum pnfs_try_status
351filelayout_read_pagelist(struct nfs_read_data *data) 483filelayout_read_pagelist(struct nfs_read_data *data)
352{ 484{
353 struct pnfs_layout_segment *lseg = data->lseg; 485 struct nfs_pgio_header *hdr = data->header;
486 struct pnfs_layout_segment *lseg = hdr->lseg;
354 struct nfs4_pnfs_ds *ds; 487 struct nfs4_pnfs_ds *ds;
355 loff_t offset = data->args.offset; 488 loff_t offset = data->args.offset;
356 u32 j, idx; 489 u32 j, idx;
@@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
358 int status; 491 int status;
359 492
360 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 493 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
361 __func__, data->inode->i_ino, 494 __func__, hdr->inode->i_ino,
362 data->args.pgbase, (size_t)data->args.count, offset); 495 data->args.pgbase, (size_t)data->args.count, offset);
363 496
364 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
365 return PNFS_NOT_ATTEMPTED;
366
367 /* Retrieve the correct rpc_client for the byte range */ 497 /* Retrieve the correct rpc_client for the byte range */
368 j = nfs4_fl_calc_j_index(lseg, offset); 498 j = nfs4_fl_calc_j_index(lseg, offset);
369 idx = nfs4_fl_calc_ds_index(lseg, j); 499 idx = nfs4_fl_calc_ds_index(lseg, j);
370 ds = nfs4_fl_prepare_ds(lseg, idx); 500 ds = nfs4_fl_prepare_ds(lseg, idx);
371 if (!ds) { 501 if (!ds)
372 /* Either layout fh index faulty, or ds connect failed */
373 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
374 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
375 return PNFS_NOT_ATTEMPTED; 502 return PNFS_NOT_ATTEMPTED;
376 } 503 dprintk("%s USE DS: %s cl_count %d\n", __func__,
377 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 504 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
378 505
379 /* No multipath support. Use first DS */ 506 /* No multipath support. Use first DS */
507 atomic_inc(&ds->ds_clp->cl_count);
380 data->ds_clp = ds->ds_clp; 508 data->ds_clp = ds->ds_clp;
381 fh = nfs4_fl_select_ds_fh(lseg, j); 509 fh = nfs4_fl_select_ds_fh(lseg, j);
382 if (fh) 510 if (fh)
@@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
386 data->mds_offset = offset; 514 data->mds_offset = offset;
387 515
388 /* Perform an asynchronous read to ds */ 516 /* Perform an asynchronous read to ds */
389 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, 517 status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
390 &filelayout_read_call_ops); 518 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
391 BUG_ON(status != 0); 519 BUG_ON(status != 0);
392 return PNFS_ATTEMPTED; 520 return PNFS_ATTEMPTED;
393} 521}
@@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
396static enum pnfs_try_status 524static enum pnfs_try_status
397filelayout_write_pagelist(struct nfs_write_data *data, int sync) 525filelayout_write_pagelist(struct nfs_write_data *data, int sync)
398{ 526{
399 struct pnfs_layout_segment *lseg = data->lseg; 527 struct nfs_pgio_header *hdr = data->header;
528 struct pnfs_layout_segment *lseg = hdr->lseg;
400 struct nfs4_pnfs_ds *ds; 529 struct nfs4_pnfs_ds *ds;
401 loff_t offset = data->args.offset; 530 loff_t offset = data->args.offset;
402 u32 j, idx; 531 u32 j, idx;
403 struct nfs_fh *fh; 532 struct nfs_fh *fh;
404 int status; 533 int status;
405 534
406 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
407 return PNFS_NOT_ATTEMPTED;
408
409 /* Retrieve the correct rpc_client for the byte range */ 535 /* Retrieve the correct rpc_client for the byte range */
410 j = nfs4_fl_calc_j_index(lseg, offset); 536 j = nfs4_fl_calc_j_index(lseg, offset);
411 idx = nfs4_fl_calc_ds_index(lseg, j); 537 idx = nfs4_fl_calc_ds_index(lseg, j);
412 ds = nfs4_fl_prepare_ds(lseg, idx); 538 ds = nfs4_fl_prepare_ds(lseg, idx);
413 if (!ds) { 539 if (!ds)
414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
418 return PNFS_NOT_ATTEMPTED; 540 return PNFS_NOT_ATTEMPTED;
419 } 541 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
420 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 542 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
421 data->inode->i_ino, sync, (size_t) data->args.count, offset, 543 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
422 ds->ds_remotestr);
423 544
424 data->write_done_cb = filelayout_write_done_cb; 545 data->write_done_cb = filelayout_write_done_cb;
546 atomic_inc(&ds->ds_clp->cl_count);
425 data->ds_clp = ds->ds_clp; 547 data->ds_clp = ds->ds_clp;
426 fh = nfs4_fl_select_ds_fh(lseg, j); 548 fh = nfs4_fl_select_ds_fh(lseg, j);
427 if (fh) 549 if (fh)
@@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
433 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 555 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
434 556
435 /* Perform an asynchronous write */ 557 /* Perform an asynchronous write */
436 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, 558 status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
437 &filelayout_write_call_ops, sync); 559 &filelayout_write_call_ops, sync,
560 RPC_TASK_SOFTCONN);
438 BUG_ON(status != 0); 561 BUG_ON(status != 0);
439 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
440} 563}
@@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
650 773
651 dprintk("--> %s\n", __func__); 774 dprintk("--> %s\n", __func__);
652 nfs4_fl_put_deviceid(fl->dsaddr); 775 nfs4_fl_put_deviceid(fl->dsaddr);
653 kfree(fl->commit_buckets); 776 /* This assumes a single RW lseg */
777 if (lseg->pls_range.iomode == IOMODE_RW) {
778 struct nfs4_filelayout *flo;
779
780 flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
781 flo->commit_info.nbuckets = 0;
782 kfree(flo->commit_info.buckets);
783 flo->commit_info.buckets = NULL;
784 }
654 _filelayout_free_lseg(fl); 785 _filelayout_free_lseg(fl);
655} 786}
656 787
788static int
789filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
790 struct nfs_commit_info *cinfo,
791 gfp_t gfp_flags)
792{
793 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
794 struct pnfs_commit_bucket *buckets;
795 int size;
796
797 if (fl->commit_through_mds)
798 return 0;
799 if (cinfo->ds->nbuckets != 0) {
800 /* This assumes there is only one IOMODE_RW lseg. What
801 * we really want to do is have a layout_hdr level
802 * dictionary of <multipath_list4, fh> keys, each
803 * associated with a struct list_head, populated by calls
804 * to filelayout_write_pagelist().
805 * */
806 return 0;
807 }
808
809 size = (fl->stripe_type == STRIPE_SPARSE) ?
810 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
811
812 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
813 gfp_flags);
814 if (!buckets)
815 return -ENOMEM;
816 else {
817 int i;
818
819 spin_lock(cinfo->lock);
820 if (cinfo->ds->nbuckets != 0)
821 kfree(buckets);
822 else {
823 cinfo->ds->buckets = buckets;
824 cinfo->ds->nbuckets = size;
825 for (i = 0; i < size; i++) {
826 INIT_LIST_HEAD(&buckets[i].written);
827 INIT_LIST_HEAD(&buckets[i].committing);
828 }
829 }
830 spin_unlock(cinfo->lock);
831 return 0;
832 }
833}
834
657static struct pnfs_layout_segment * 835static struct pnfs_layout_segment *
658filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 836filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 struct nfs4_layoutget_res *lgr, 837 struct nfs4_layoutget_res *lgr,
@@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
673 _filelayout_free_lseg(fl); 851 _filelayout_free_lseg(fl);
674 return NULL; 852 return NULL;
675 } 853 }
676
677 /* This assumes there is only one IOMODE_RW lseg. What
678 * we really want to do is have a layout_hdr level
679 * dictionary of <multipath_list4, fh> keys, each
680 * associated with a struct list_head, populated by calls
681 * to filelayout_write_pagelist().
682 * */
683 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
684 int i;
685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
687
688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
689 if (!fl->commit_buckets) {
690 filelayout_free_lseg(&fl->generic_hdr);
691 return NULL;
692 }
693 fl->number_of_buckets = size;
694 for (i = 0; i < size; i++) {
695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
698 }
699 return &fl->generic_hdr; 854 return &fl->generic_hdr;
700} 855}
701 856
@@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
716 !nfs_generic_pg_test(pgio, prev, req)) 871 !nfs_generic_pg_test(pgio, prev, req))
717 return false; 872 return false;
718 873
719 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 874 p_stripe = (u64)req_offset(prev);
720 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 875 r_stripe = (u64)req_offset(req);
721 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 876 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
722 877
723 do_div(p_stripe, stripe_unit); 878 do_div(p_stripe, stripe_unit);
@@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
732{ 887{
733 BUG_ON(pgio->pg_lseg != NULL); 888 BUG_ON(pgio->pg_lseg != NULL);
734 889
890 if (req->wb_offset != req->wb_pgbase) {
891 /*
892 * Handling unaligned pages is difficult, because have to
893 * somehow split a req in two in certain cases in the
894 * pg.test code. Avoid this by just not using pnfs
895 * in this case.
896 */
897 nfs_pageio_reset_read_mds(pgio);
898 return;
899 }
735 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
736 req->wb_context, 901 req->wb_context,
737 0, 902 0,
@@ -747,8 +912,13 @@ static void
747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 912filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
748 struct nfs_page *req) 913 struct nfs_page *req)
749{ 914{
915 struct nfs_commit_info cinfo;
916 int status;
917
750 BUG_ON(pgio->pg_lseg != NULL); 918 BUG_ON(pgio->pg_lseg != NULL);
751 919
920 if (req->wb_offset != req->wb_pgbase)
921 goto out_mds;
752 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
753 req->wb_context, 923 req->wb_context,
754 0, 924 0,
@@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
757 GFP_NOFS); 927 GFP_NOFS);
758 /* If no lseg, fall back to write through mds */ 928 /* If no lseg, fall back to write through mds */
759 if (pgio->pg_lseg == NULL) 929 if (pgio->pg_lseg == NULL)
760 nfs_pageio_reset_write_mds(pgio); 930 goto out_mds;
931 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
932 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
933 if (status < 0) {
934 put_lseg(pgio->pg_lseg);
935 pgio->pg_lseg = NULL;
936 goto out_mds;
937 }
938 return;
939out_mds:
940 nfs_pageio_reset_write_mds(pgio);
761} 941}
762 942
763static const struct nfs_pageio_ops filelayout_pg_read_ops = { 943static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
784 * If this will make the bucket empty, it will need to put the lseg reference. 964 * If this will make the bucket empty, it will need to put the lseg reference.
785 */ 965 */
786static void 966static void
787filelayout_clear_request_commit(struct nfs_page *req) 967filelayout_clear_request_commit(struct nfs_page *req,
968 struct nfs_commit_info *cinfo)
788{ 969{
789 struct pnfs_layout_segment *freeme = NULL; 970 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791 971
792 spin_lock(&inode->i_lock); 972 spin_lock(cinfo->lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 973 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out; 974 goto out;
975 cinfo->ds->nwritten--;
795 if (list_is_singular(&req->wb_list)) { 976 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg; 977 struct pnfs_commit_bucket *bucket;
797 978
798 /* From here we can find the bucket, but for the moment, 979 bucket = list_first_entry(&req->wb_list,
799 * since there is only one relevant lseg... 980 struct pnfs_commit_bucket,
800 */ 981 written);
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 982 freeme = bucket->wlseg;
802 if (lseg->pls_range.iomode == IOMODE_RW) { 983 bucket->wlseg = NULL;
803 freeme = lseg;
804 break;
805 }
806 }
807 } 984 }
808out: 985out:
809 nfs_request_remove_commit_list(req); 986 nfs_request_remove_commit_list(req, cinfo);
810 spin_unlock(&inode->i_lock); 987 spin_unlock(cinfo->lock);
811 put_lseg(freeme); 988 put_lseg(freeme);
812} 989}
813 990
814static struct list_head * 991static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req, 992filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg) 993 struct pnfs_layout_segment *lseg,
994 struct nfs_commit_info *cinfo)
817{ 995{
818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 996 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
819 u32 i, j; 997 u32 i, j;
820 struct list_head *list; 998 struct list_head *list;
999 struct pnfs_commit_bucket *buckets;
821 1000
822 if (fl->commit_through_mds) 1001 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; 1002 return &cinfo->mds->list;
824 1003
825 /* Note that we are calling nfs4_fl_calc_j_index on each page 1004 /* Note that we are calling nfs4_fl_calc_j_index on each page
826 * that ends up being committed to a data server. An attractive 1005 * that ends up being committed to a data server. An attractive
@@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
828 * to store the value calculated in filelayout_write_pagelist 1007 * to store the value calculated in filelayout_write_pagelist
829 * and just use that here. 1008 * and just use that here.
830 */ 1009 */
831 j = nfs4_fl_calc_j_index(lseg, 1010 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
833 i = select_bucket_index(fl, j); 1011 i = select_bucket_index(fl, j);
834 list = &fl->commit_buckets[i].written; 1012 buckets = cinfo->ds->buckets;
1013 list = &buckets[i].written;
835 if (list_empty(list)) { 1014 if (list_empty(list)) {
836 /* Non-empty buckets hold a reference on the lseg. That ref 1015 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released 1016 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled 1017 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in 1018 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req 1019 * filelayout_clear_request_commit
841 */ 1020 */
842 get_lseg(lseg); 1021 buckets[i].wlseg = get_lseg(lseg);
843 } 1022 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1023 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1024 cinfo->ds->nwritten++;
845 return list; 1025 return list;
846} 1026}
847 1027
848static void 1028static void
849filelayout_mark_request_commit(struct nfs_page *req, 1029filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg) 1030 struct pnfs_layout_segment *lseg,
1031 struct nfs_commit_info *cinfo)
851{ 1032{
852 struct list_head *list; 1033 struct list_head *list;
853 1034
854 list = filelayout_choose_commit_list(req, lseg); 1035 list = filelayout_choose_commit_list(req, lseg, cinfo);
855 nfs_request_add_commit_list(req, list); 1036 nfs_request_add_commit_list(req, list, cinfo);
856} 1037}
857 1038
858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1039static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
880 return flseg->fh_array[i]; 1061 return flseg->fh_array[i];
881} 1062}
882 1063
883static int filelayout_initiate_commit(struct nfs_write_data *data, int how) 1064static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
884{ 1065{
885 struct pnfs_layout_segment *lseg = data->lseg; 1066 struct pnfs_layout_segment *lseg = data->lseg;
886 struct nfs4_pnfs_ds *ds; 1067 struct nfs4_pnfs_ds *ds;
@@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1071 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
891 ds = nfs4_fl_prepare_ds(lseg, idx); 1072 ds = nfs4_fl_prepare_ds(lseg, idx);
892 if (!ds) { 1073 if (!ds) {
893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
897 prepare_to_resend_writes(data); 1074 prepare_to_resend_writes(data);
898 filelayout_commit_release(data); 1075 filelayout_commit_release(data);
899 return -EAGAIN; 1076 return -EAGAIN;
900 } 1077 }
901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 1078 dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
902 data->write_done_cb = filelayout_commit_done_cb; 1079 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
1080 data->commit_done_cb = filelayout_commit_done_cb;
1081 atomic_inc(&ds->ds_clp->cl_count);
903 data->ds_clp = ds->ds_clp; 1082 data->ds_clp = ds->ds_clp;
904 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1083 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
905 if (fh) 1084 if (fh)
906 data->args.fh = fh; 1085 data->args.fh = fh;
907 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, 1086 return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
908 &filelayout_commit_call_ops, how); 1087 &filelayout_commit_call_ops, how,
909} 1088 RPC_TASK_SOFTCONN);
910
911/*
912 * This is only useful while we are using whole file layouts.
913 */
914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
916{
917 struct pnfs_layout_segment *lseg;
918
919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
920 if (lseg->pls_range.iomode == IOMODE_RW)
921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
933 spin_unlock(&inode->i_lock);
934 return rv;
935} 1089}
936 1090
937static int 1091static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, 1092transfer_commit_list(struct list_head *src, struct list_head *dst,
939 spinlock_t *lock) 1093 struct nfs_commit_info *cinfo, int max)
940{ 1094{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp; 1095 struct nfs_page *req, *tmp;
944 int ret = 0; 1096 int ret = 0;
945 1097
946 list_for_each_entry_safe(req, tmp, src, wb_list) { 1098 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req)) 1099 if (!nfs_lock_request(req))
948 continue; 1100 continue;
949 if (cond_resched_lock(lock)) 1101 kref_get(&req->wb_kref);
1102 if (cond_resched_lock(cinfo->lock))
950 list_safe_reset_next(req, tmp, wb_list); 1103 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req); 1104 nfs_request_remove_commit_list(req, cinfo);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1105 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst); 1106 nfs_list_add_request(req, dst);
954 ret++; 1107 ret++;
955 if (ret == max) 1108 if ((ret == max) && !cinfo->dreq)
956 break; 1109 break;
957 } 1110 }
958 return ret; 1111 return ret;
959} 1112}
960 1113
1114static int
1115filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1116 struct nfs_commit_info *cinfo,
1117 int max)
1118{
1119 struct list_head *src = &bucket->written;
1120 struct list_head *dst = &bucket->committing;
1121 int ret;
1122
1123 ret = transfer_commit_list(src, dst, cinfo, max);
1124 if (ret) {
1125 cinfo->ds->nwritten -= ret;
1126 cinfo->ds->ncommitting += ret;
1127 bucket->clseg = bucket->wlseg;
1128 if (list_empty(src))
1129 bucket->wlseg = NULL;
1130 else
1131 get_lseg(bucket->clseg);
1132 }
1133 return ret;
1134}
1135
961/* Move reqs from written to committing lists, returning count of number moved. 1136/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held. 1137 * Note called with cinfo->lock held.
963 */ 1138 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max, 1139static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
965 spinlock_t *lock) 1140 int max)
966{ 1141{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt; 1142 int i, rv = 0, cnt;
970 1143
971 lseg = find_only_write_lseg_locked(inode); 1144 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
972 if (!lseg) 1145 cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
973 goto out_done; 1146 cinfo, max);
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt; 1147 max -= cnt;
981 rv += cnt; 1148 rv += cnt;
982 } 1149 }
983out_done:
984 return rv; 1150 return rv;
985} 1151}
986 1152
1153/* Pull everything off the committing lists and dump into @dst */
1154static void filelayout_recover_commit_reqs(struct list_head *dst,
1155 struct nfs_commit_info *cinfo)
1156{
1157 struct pnfs_commit_bucket *b;
1158 int i;
1159
1160 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1161 * only called on single thread per dreq.
1162 * Can't take the lock because need to do put_lseg
1163 */
1164 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1165 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1166 BUG_ON(!list_empty(&b->written));
1167 put_lseg(b->wlseg);
1168 b->wlseg = NULL;
1169 }
1170 }
1171 cinfo->ds->nwritten = 0;
1172}
1173
987static unsigned int 1174static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list) 1175alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
989{ 1176{
990 struct pnfs_layout_segment *lseg; 1177 struct pnfs_ds_commit_info *fl_cinfo;
991 struct nfs4_filelayout_segment *fl; 1178 struct pnfs_commit_bucket *bucket;
992 struct nfs_write_data *data; 1179 struct nfs_commit_data *data;
993 int i, j; 1180 int i, j;
994 unsigned int nreq = 0; 1181 unsigned int nreq = 0;
995 1182
996 /* Won't need this when non-whole file layout segments are supported 1183 fl_cinfo = cinfo->ds;
997 * instead we will use a pnfs_layout_hdr structure */ 1184 bucket = fl_cinfo->buckets;
998 lseg = find_only_write_lseg(inode); 1185 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
999 if (!lseg) 1186 if (list_empty(&bucket->committing))
1000 return 0;
1001 fl = FILELAYOUT_LSEG(lseg);
1002 for (i = 0; i < fl->number_of_buckets; i++) {
1003 if (list_empty(&fl->commit_buckets[i].committing))
1004 continue; 1187 continue;
1005 data = nfs_commitdata_alloc(); 1188 data = nfs_commitdata_alloc();
1006 if (!data) 1189 if (!data)
1007 break; 1190 break;
1008 data->ds_commit_index = i; 1191 data->ds_commit_index = i;
1009 data->lseg = lseg; 1192 data->lseg = bucket->clseg;
1193 bucket->clseg = NULL;
1010 list_add(&data->pages, list); 1194 list_add(&data->pages, list);
1011 nreq++; 1195 nreq++;
1012 } 1196 }
1013 1197
1014 /* Clean up on error */ 1198 /* Clean up on error */
1015 for (j = i; j < fl->number_of_buckets; j++) { 1199 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1016 if (list_empty(&fl->commit_buckets[i].committing)) 1200 if (list_empty(&bucket->committing))
1017 continue; 1201 continue;
1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); 1202 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1019 put_lseg(lseg); /* associated with emptying bucket */ 1203 put_lseg(bucket->clseg);
1204 bucket->clseg = NULL;
1020 } 1205 }
1021 put_lseg(lseg);
1022 /* Caller will clean up entries put on list */ 1206 /* Caller will clean up entries put on list */
1023 return nreq; 1207 return nreq;
1024} 1208}
@@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
1026/* This follows nfs_commit_list pretty closely */ 1210/* This follows nfs_commit_list pretty closely */
1027static int 1211static int
1028filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1212filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1029 int how) 1213 int how, struct nfs_commit_info *cinfo)
1030{ 1214{
1031 struct nfs_write_data *data, *tmp; 1215 struct nfs_commit_data *data, *tmp;
1032 LIST_HEAD(list); 1216 LIST_HEAD(list);
1033 unsigned int nreq = 0; 1217 unsigned int nreq = 0;
1034 1218
@@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1039 list_add(&data->pages, &list); 1223 list_add(&data->pages, &list);
1040 nreq++; 1224 nreq++;
1041 } else 1225 } else
1042 nfs_retry_commit(mds_pages, NULL); 1226 nfs_retry_commit(mds_pages, NULL, cinfo);
1043 } 1227 }
1044 1228
1045 nreq += alloc_ds_commits(inode, &list); 1229 nreq += alloc_ds_commits(cinfo, &list);
1046 1230
1047 if (nreq == 0) { 1231 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode)); 1232 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1049 goto out; 1233 goto out;
1050 } 1234 }
1051 1235
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding); 1236 atomic_add(nreq, &cinfo->mds->rpcs_out);
1053 1237
1054 list_for_each_entry_safe(data, tmp, &list, pages) { 1238 list_for_each_entry_safe(data, tmp, &list, pages) {
1055 list_del_init(&data->pages); 1239 list_del_init(&data->pages);
1056 if (!data->lseg) { 1240 if (!data->lseg) {
1057 nfs_init_commit(data, mds_pages, NULL); 1241 nfs_init_commit(data, mds_pages, NULL, cinfo);
1058 nfs_initiate_commit(data, NFS_CLIENT(inode), 1242 nfs_initiate_commit(NFS_CLIENT(inode), data,
1059 data->mds_ops, how); 1243 data->mds_ops, how, 0);
1060 } else { 1244 } else {
1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); 1245 struct pnfs_commit_bucket *buckets;
1246
1247 buckets = cinfo->ds->buckets;
1248 nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
1062 filelayout_initiate_commit(data, how); 1249 filelayout_initiate_commit(data, how);
1063 } 1250 }
1064 } 1251 }
1065out: 1252out:
1253 cinfo->ds->ncommitting = 0;
1066 return PNFS_ATTEMPTED; 1254 return PNFS_ATTEMPTED;
1067} 1255}
1068 1256
@@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
1072 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1260 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1073} 1261}
1074 1262
1263static struct pnfs_layout_hdr *
1264filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1265{
1266 struct nfs4_filelayout *flo;
1267
1268 flo = kzalloc(sizeof(*flo), gfp_flags);
1269 return &flo->generic_hdr;
1270}
1271
1272static void
1273filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1274{
1275 kfree(FILELAYOUT_FROM_HDR(lo));
1276}
1277
1278static struct pnfs_ds_commit_info *
1279filelayout_get_ds_info(struct inode *inode)
1280{
1281 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1282
1283 if (layout == NULL)
1284 return NULL;
1285 else
1286 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1287}
1288
1075static struct pnfs_layoutdriver_type filelayout_type = { 1289static struct pnfs_layoutdriver_type filelayout_type = {
1076 .id = LAYOUT_NFSV4_1_FILES, 1290 .id = LAYOUT_NFSV4_1_FILES,
1077 .name = "LAYOUT_NFSV4_1_FILES", 1291 .name = "LAYOUT_NFSV4_1_FILES",
1078 .owner = THIS_MODULE, 1292 .owner = THIS_MODULE,
1293 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1294 .free_layout_hdr = filelayout_free_layout_hdr,
1079 .alloc_lseg = filelayout_alloc_lseg, 1295 .alloc_lseg = filelayout_alloc_lseg,
1080 .free_lseg = filelayout_free_lseg, 1296 .free_lseg = filelayout_free_lseg,
1081 .pg_read_ops = &filelayout_pg_read_ops, 1297 .pg_read_ops = &filelayout_pg_read_ops,
1082 .pg_write_ops = &filelayout_pg_write_ops, 1298 .pg_write_ops = &filelayout_pg_write_ops,
1299 .get_ds_info = &filelayout_get_ds_info,
1083 .mark_request_commit = filelayout_mark_request_commit, 1300 .mark_request_commit = filelayout_mark_request_commit,
1084 .clear_request_commit = filelayout_clear_request_commit, 1301 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists, 1302 .scan_commit_lists = filelayout_scan_commit_lists,
1303 .recover_commit_reqs = filelayout_recover_commit_reqs,
1086 .commit_pagelist = filelayout_commit_pagelist, 1304 .commit_pagelist = filelayout_commit_pagelist,
1087 .read_pagelist = filelayout_read_pagelist, 1305 .read_pagelist = filelayout_read_pagelist,
1088 .write_pagelist = filelayout_write_pagelist, 1306 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb1f5e3..43fe802dd678 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules.
37 * Set by module paramters dataserver_timeo and dataserver_retrans.
38 */
39#define NFS4_DEF_DS_TIMEO 60
40#define NFS4_DEF_DS_RETRANS 5
41
42/*
36 * Field testing shows we need to support up to 4096 stripe indices. 43 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 44 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 45 * reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096 48#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ 49#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43 50
51/* error codes for internal use */
52#define NFS4ERR_RESET_TO_MDS 12001
53
44enum stripetype4 { 54enum stripetype4 {
45 STRIPE_SPARSE = 1, 55 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2 56 STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
62 atomic_t ds_count; 72 atomic_t ds_count;
63}; 73};
64 74
65/* nfs4_file_layout_dsaddr flags */
66#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
67
68struct nfs4_file_layout_dsaddr { 75struct nfs4_file_layout_dsaddr {
69 struct nfs4_deviceid_node id_node; 76 struct nfs4_deviceid_node id_node;
70 unsigned long flags;
71 u32 stripe_count; 77 u32 stripe_count;
72 u8 *stripe_indices; 78 u8 *stripe_indices;
73 u32 ds_num; 79 u32 ds_num;
74 struct nfs4_pnfs_ds *ds_list[1]; 80 struct nfs4_pnfs_ds *ds_list[1];
75}; 81};
76 82
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
82struct nfs4_filelayout_segment { 83struct nfs4_filelayout_segment {
83 struct pnfs_layout_segment generic_hdr; 84 struct pnfs_layout_segment generic_hdr;
84 u32 stripe_type; 85 u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 90 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
90 unsigned int num_fh; 91 unsigned int num_fh;
91 struct nfs_fh **fh_array; 92 struct nfs_fh **fh_array;
92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
93 int number_of_buckets;
94}; 93};
95 94
95struct nfs4_filelayout {
96 struct pnfs_layout_hdr generic_hdr;
97 struct pnfs_ds_commit_info commit_info;
98};
99
100static inline struct nfs4_filelayout *
101FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
102{
103 return container_of(lo, struct nfs4_filelayout, generic_hdr);
104}
105
96static inline struct nfs4_filelayout_segment * 106static inline struct nfs4_filelayout_segment *
97FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) 107FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
98{ 108{
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
107 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 117 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
108} 118}
109 119
120static inline void
121filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
122{
123 u32 *p = (u32 *)&node->deviceid;
124
125 printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
126 p[0], p[1], p[2], p[3]);
127
128 set_bit(NFS_DEVICEID_INVALID, &node->flags);
129}
130
131static inline bool
132filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
133{
134 return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
135}
136
137static inline bool
138filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
139{
140 return test_bit(NFS_DEVICEID_INVALID, &node->flags);
141}
142
143static inline bool
144filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
145{
146 return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
147 filelayout_test_layout_invalid(lseg->pls_layout);
148}
149
110extern struct nfs_fh * 150extern struct nfs_fh *
111nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 151nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
112 152
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
119extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 159extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
120struct nfs4_file_layout_dsaddr * 160struct nfs4_file_layout_dsaddr *
121get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 161get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
162void nfs4_ds_disconnect(struct nfs_client *clp);
122 163
123#endif /* FS_NFS_NFS4FILELAYOUT_H */ 164#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c9cff9adb2d3..a1fab8da7f03 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
30 30
31#include <linux/nfs_fs.h> 31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/module.h>
33 34
34#include "internal.h" 35#include "internal.h"
35#include "nfs4filelayout.h" 36#include "nfs4filelayout.h"
36 37
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 38#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 39
40static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
41static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
42
39/* 43/*
40 * Data server cache 44 * Data server cache
41 * 45 *
@@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
145} 149}
146 150
147/* 151/*
152 * Lookup DS by nfs_client pointer. Zero data server client pointer
153 */
154void nfs4_ds_disconnect(struct nfs_client *clp)
155{
156 struct nfs4_pnfs_ds *ds;
157 struct nfs_client *found = NULL;
158
159 dprintk("%s clp %p\n", __func__, clp);
160 spin_lock(&nfs4_ds_cache_lock);
161 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
162 if (ds->ds_clp && ds->ds_clp == clp) {
163 found = ds->ds_clp;
164 ds->ds_clp = NULL;
165 }
166 spin_unlock(&nfs4_ds_cache_lock);
167 if (found) {
168 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
169 nfs_put_client(clp);
170 }
171}
172
173/*
148 * Create an rpc connection to the nfs4_pnfs_ds data server 174 * Create an rpc connection to the nfs4_pnfs_ds data server
149 * Currently only supports IPv4 and IPv6 addresses 175 * Currently only supports IPv4 and IPv6 addresses
150 */ 176 */
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
165 __func__, ds->ds_remotestr, da->da_remotestr); 191 __func__, ds->ds_remotestr, da->da_remotestr);
166 192
167 clp = nfs4_set_ds_client(mds_srv->nfs_client, 193 clp = nfs4_set_ds_client(mds_srv->nfs_client,
168 (struct sockaddr *)&da->da_addr, 194 (struct sockaddr *)&da->da_addr,
169 da->da_addrlen, IPPROTO_TCP); 195 da->da_addrlen, IPPROTO_TCP,
196 dataserver_timeo, dataserver_retrans);
170 if (!IS_ERR(clp)) 197 if (!IS_ERR(clp))
171 break; 198 break;
172 } 199 }
@@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
176 goto out; 203 goto out;
177 } 204 }
178 205
179 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { 206 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
180 if (!is_ds_client(clp)) {
181 status = -ENODEV;
182 goto out_put;
183 }
184 ds->ds_clp = clp;
185 dprintk("%s [existing] server=%s\n", __func__,
186 ds->ds_remotestr);
187 goto out;
188 }
189
190 /*
191 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
192 * be equal to the MDS lease. Renewal is scheduled in create_session.
193 */
194 spin_lock(&mds_srv->nfs_client->cl_lock);
195 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
196 spin_unlock(&mds_srv->nfs_client->cl_lock);
197 clp->cl_last_renewal = jiffies;
198
199 /* New nfs_client */
200 status = nfs4_init_ds_session(clp);
201 if (status) 207 if (status)
202 goto out_put; 208 goto out_put;
203 209
@@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
602 608
603 mp_count = be32_to_cpup(p); /* multipath count */ 609 mp_count = be32_to_cpup(p); /* multipath count */
604 for (j = 0; j < mp_count; j++) { 610 for (j = 0; j < mp_count; j++) {
605 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, 611 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
606 &stream, gfp_flags); 612 &stream, gfp_flags);
607 if (da) 613 if (da)
608 list_add_tail(&da->da_node, &dsaddrs); 614 list_add_tail(&da->da_node, &dsaddrs);
@@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
791 return flseg->fh_array[i]; 797 return flseg->fh_array[i];
792} 798}
793 799
794static void
795filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
796 int err, const char *ds_remotestr)
797{
798 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
799
800 printk(KERN_ERR "NFS: data server %s connection error %d."
801 " Deviceid [%x%x%x%x] marked out of use.\n",
802 ds_remotestr, err, p[0], p[1], p[2], p[3]);
803
804 spin_lock(&nfs4_ds_cache_lock);
805 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
806 spin_unlock(&nfs4_ds_cache_lock);
807}
808
809struct nfs4_pnfs_ds * 800struct nfs4_pnfs_ds *
810nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 801nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
811{ 802{
812 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 803 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 804 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
805 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
806
807 if (filelayout_test_devid_invalid(devid))
808 return NULL;
814 809
815 if (ds == NULL) { 810 if (ds == NULL) {
816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 811 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
817 __func__, ds_idx); 812 __func__, ds_idx);
818 return NULL; 813 goto mark_dev_invalid;
819 } 814 }
820 815
821 if (!ds->ds_clp) { 816 if (!ds->ds_clp) {
822 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 817 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
823 int err; 818 int err;
824 819
825 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
826 /* Already tried to connect, don't try again */
827 dprintk("%s Deviceid marked out of use\n", __func__);
828 return NULL;
829 }
830 err = nfs4_ds_connect(s, ds); 820 err = nfs4_ds_connect(s, ds);
831 if (err) { 821 if (err)
832 filelayout_mark_devid_negative(dsaddr, err, 822 goto mark_dev_invalid;
833 ds->ds_remotestr);
834 return NULL;
835 }
836 } 823 }
837 return ds; 824 return ds;
825
826mark_dev_invalid:
827 filelayout_mark_devid_invalid(devid);
828 return NULL;
838} 829}
830
831module_param(dataserver_retrans, uint, 0644);
832MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
833 "retries a request before it attempts further "
834 " recovery action.");
835module_param(dataserver_timeo, uint, 0644);
836MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
837 "NFSv4.1 client waits for a response from a "
838 " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3dedc4ec7..017b4b01a69c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
132 return ret; 132 return ret;
133} 133}
134 134
135rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
136{
137 struct gss_api_mech *mech;
138 struct xdr_netobj oid;
139 int i;
140 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
141
142 for (i = 0; i < flavors->num_flavors; i++) {
143 struct nfs4_secinfo_flavor *flavor;
144 flavor = &flavors->flavors[i];
145
146 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
147 pseudoflavor = flavor->flavor;
148 break;
149 } else if (flavor->flavor == RPC_AUTH_GSS) {
150 oid.len = flavor->gss.sec_oid4.len;
151 oid.data = flavor->gss.sec_oid4.data;
152 mech = gss_mech_get_by_OID(&oid);
153 if (!mech)
154 continue;
155 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
156 gss_mech_put(mech);
157 break;
158 }
159 }
160
161 return pseudoflavor;
162}
163
135static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 164static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
136{ 165{
137 struct page *page; 166 struct page *page;
@@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino
168 rpc_authflavor_t flavor; 197 rpc_authflavor_t flavor;
169 198
170 flavor = nfs4_negotiate_security(inode, name); 199 flavor = nfs4_negotiate_security(inode, name);
171 if (flavor < 0) 200 if ((int)flavor < 0)
172 return ERR_PTR(flavor); 201 return ERR_PTR(flavor);
173 202
174 clone = rpc_clone_client(clnt); 203 clone = rpc_clone_client(clnt);
@@ -300,7 +329,7 @@ out:
300 * @dentry - dentry of referral 329 * @dentry - dentry of referral
301 * 330 *
302 */ 331 */
303struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) 332static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
304{ 333{
305 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 334 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
306 struct dentry *parent; 335 struct dentry *parent;
@@ -341,3 +370,25 @@ out:
341 dprintk("%s: done\n", __func__); 370 dprintk("%s: done\n", __func__);
342 return mnt; 371 return mnt;
343} 372}
373
374struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
375 struct nfs_fh *fh, struct nfs_fattr *fattr)
376{
377 struct dentry *parent = dget_parent(dentry);
378 struct rpc_clnt *client;
379 struct vfsmount *mnt;
380
381 /* Look it up again to get its attributes and sec flavor */
382 client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
383 dput(parent);
384 if (IS_ERR(client))
385 return ERR_CAST(client);
386
387 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
388 mnt = nfs_do_refmount(client, dentry);
389 else
390 mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
391
392 rpc_shutdown_client(client);
393 return mnt;
394}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ab985f6f0da8..d48dbefa0e71 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64#include "iostat.h" 64#include "iostat.h"
65#include "callback.h" 65#include "callback.h"
66#include "pnfs.h" 66#include "pnfs.h"
67#include "netns.h"
67 68
68#define NFSDBG_FACILITY NFSDBG_PROC 69#define NFSDBG_FACILITY NFSDBG_PROC
69 70
@@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 81static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 82static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 83static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
84static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 85static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 86static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
85 struct nfs_fattr *fattr, struct iattr *sattr, 87 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)
101 case -NFS4ERR_BADOWNER: 103 case -NFS4ERR_BADOWNER:
102 case -NFS4ERR_BADNAME: 104 case -NFS4ERR_BADNAME:
103 return -EINVAL; 105 return -EINVAL;
106 case -NFS4ERR_SHARE_DENIED:
107 return -EACCES;
104 default: 108 default:
105 dprintk("%s could not handle NFSv4 error %d\n", 109 dprintk("%s could not handle NFSv4 error %d\n",
106 __func__, -err); 110 __func__, -err);
@@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
304 case -NFS4ERR_SEQ_MISORDERED: 308 case -NFS4ERR_SEQ_MISORDERED:
305 dprintk("%s ERROR: %d Reset session\n", __func__, 309 dprintk("%s ERROR: %d Reset session\n", __func__,
306 errorcode); 310 errorcode);
307 nfs4_schedule_session_recovery(clp->cl_session); 311 nfs4_schedule_session_recovery(clp->cl_session, errorcode);
308 exception->retry = 1; 312 exception->retry = 1;
309 break; 313 break;
310#endif /* defined(CONFIG_NFS_V4_1) */ 314#endif /* defined(CONFIG_NFS_V4_1) */
@@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
772 struct nfs_inode *nfsi = NFS_I(dir); 776 struct nfs_inode *nfsi = NFS_I(dir);
773 777
774 spin_lock(&dir->i_lock); 778 spin_lock(&dir->i_lock);
775 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; 779 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
776 if (!cinfo->atomic || cinfo->before != dir->i_version) 780 if (!cinfo->atomic || cinfo->before != dir->i_version)
777 nfs_force_lookup_revalidate(dir); 781 nfs_force_lookup_revalidate(dir);
778 dir->i_version = cinfo->after; 782 dir->i_version = cinfo->after;
@@ -788,7 +792,6 @@ struct nfs4_opendata {
788 struct nfs4_string owner_name; 792 struct nfs4_string owner_name;
789 struct nfs4_string group_name; 793 struct nfs4_string group_name;
790 struct nfs_fattr f_attr; 794 struct nfs_fattr f_attr;
791 struct nfs_fattr dir_attr;
792 struct dentry *dir; 795 struct dentry *dir;
793 struct dentry *dentry; 796 struct dentry *dentry;
794 struct nfs4_state_owner *owner; 797 struct nfs4_state_owner *owner;
@@ -804,12 +807,10 @@ struct nfs4_opendata {
804static void nfs4_init_opendata_res(struct nfs4_opendata *p) 807static void nfs4_init_opendata_res(struct nfs4_opendata *p)
805{ 808{
806 p->o_res.f_attr = &p->f_attr; 809 p->o_res.f_attr = &p->f_attr;
807 p->o_res.dir_attr = &p->dir_attr;
808 p->o_res.seqid = p->o_arg.seqid; 810 p->o_res.seqid = p->o_arg.seqid;
809 p->c_res.seqid = p->c_arg.seqid; 811 p->c_res.seqid = p->c_arg.seqid;
810 p->o_res.server = p->o_arg.server; 812 p->o_res.server = p->o_arg.server;
811 nfs_fattr_init(&p->f_attr); 813 nfs_fattr_init(&p->f_attr);
812 nfs_fattr_init(&p->dir_attr);
813 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); 814 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
814} 815}
815 816
@@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
843 p->o_arg.name = &dentry->d_name; 844 p->o_arg.name = &dentry->d_name;
844 p->o_arg.server = server; 845 p->o_arg.server = server;
845 p->o_arg.bitmask = server->attr_bitmask; 846 p->o_arg.bitmask = server->attr_bitmask;
846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
848 if (attrs != NULL && attrs->ia_valid != 0) { 848 if (attrs != NULL && attrs->ia_valid != 0) {
849 __be32 verf[2]; 849 __be32 verf[2];
@@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1332 case -NFS4ERR_BAD_HIGH_SLOT: 1332 case -NFS4ERR_BAD_HIGH_SLOT:
1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1334 case -NFS4ERR_DEADSESSION: 1334 case -NFS4ERR_DEADSESSION:
1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1336 goto out; 1336 goto out;
1337 case -NFS4ERR_STALE_CLIENTID: 1337 case -NFS4ERR_STALE_CLIENTID:
1338 case -NFS4ERR_STALE_STATEID: 1338 case -NFS4ERR_STALE_STATEID:
@@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1611 1611
1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); 1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
1613 1613
1614 nfs_refresh_inode(dir, o_res->dir_attr);
1615
1616 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1614 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1617 status = _nfs4_proc_open_confirm(data); 1615 status = _nfs4_proc_open_confirm(data);
1618 if (status != 0) 1616 if (status != 0)
@@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1645 1643
1646 nfs_fattr_map_and_free_names(server, &data->f_attr); 1644 nfs_fattr_map_and_free_names(server, &data->f_attr);
1647 1645
1648 if (o_arg->open_flags & O_CREAT) { 1646 if (o_arg->open_flags & O_CREAT)
1649 update_changeattr(dir, &o_res->cinfo); 1647 update_changeattr(dir, &o_res->cinfo);
1650 nfs_post_op_update_inode(dir, o_res->dir_attr);
1651 } else
1652 nfs_refresh_inode(dir, o_res->dir_attr);
1653 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 1648 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1654 server->caps &= ~NFS_CAP_POSIX_LOCK; 1649 server->caps &= ~NFS_CAP_POSIX_LOCK;
1655 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1650 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1789/* 1784/*
1790 * Returns a referenced nfs4_state 1785 * Returns a referenced nfs4_state
1791 */ 1786 */
1792static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1787static int _nfs4_do_open(struct inode *dir,
1788 struct dentry *dentry,
1789 fmode_t fmode,
1790 int flags,
1791 struct iattr *sattr,
1792 struct rpc_cred *cred,
1793 struct nfs4_state **res,
1794 struct nfs4_threshold **ctx_th)
1793{ 1795{
1794 struct nfs4_state_owner *sp; 1796 struct nfs4_state_owner *sp;
1795 struct nfs4_state *state = NULL; 1797 struct nfs4_state *state = NULL;
@@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1814 if (opendata == NULL) 1816 if (opendata == NULL)
1815 goto err_put_state_owner; 1817 goto err_put_state_owner;
1816 1818
1819 if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
1820 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
1821 if (!opendata->f_attr.mdsthreshold)
1822 goto err_opendata_put;
1823 }
1817 if (dentry->d_inode != NULL) 1824 if (dentry->d_inode != NULL)
1818 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1825 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1819 1826
@@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1839 nfs_setattr_update_inode(state->inode, sattr); 1846 nfs_setattr_update_inode(state->inode, sattr);
1840 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 1847 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1841 } 1848 }
1849
1850 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
1851 *ctx_th = opendata->f_attr.mdsthreshold;
1852 else
1853 kfree(opendata->f_attr.mdsthreshold);
1854 opendata->f_attr.mdsthreshold = NULL;
1855
1842 nfs4_opendata_put(opendata); 1856 nfs4_opendata_put(opendata);
1843 nfs4_put_state_owner(sp); 1857 nfs4_put_state_owner(sp);
1844 *res = state; 1858 *res = state;
1845 return 0; 1859 return 0;
1846err_opendata_put: 1860err_opendata_put:
1861 kfree(opendata->f_attr.mdsthreshold);
1847 nfs4_opendata_put(opendata); 1862 nfs4_opendata_put(opendata);
1848err_put_state_owner: 1863err_put_state_owner:
1849 nfs4_put_state_owner(sp); 1864 nfs4_put_state_owner(sp);
@@ -1853,14 +1868,21 @@ out_err:
1853} 1868}
1854 1869
1855 1870
1856static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) 1871static struct nfs4_state *nfs4_do_open(struct inode *dir,
1872 struct dentry *dentry,
1873 fmode_t fmode,
1874 int flags,
1875 struct iattr *sattr,
1876 struct rpc_cred *cred,
1877 struct nfs4_threshold **ctx_th)
1857{ 1878{
1858 struct nfs4_exception exception = { }; 1879 struct nfs4_exception exception = { };
1859 struct nfs4_state *res; 1880 struct nfs4_state *res;
1860 int status; 1881 int status;
1861 1882
1862 do { 1883 do {
1863 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); 1884 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
1885 &res, ctx_th);
1864 if (status == 0) 1886 if (status == 0)
1865 break; 1887 break;
1866 /* NOTE: BAD_SEQID means the server and client disagree about the 1888 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2184 struct nfs4_state *state; 2206 struct nfs4_state *state;
2185 2207
2186 /* Protect against concurrent sillydeletes */ 2208 /* Protect against concurrent sillydeletes */
2187 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); 2209 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
2210 ctx->cred, &ctx->mdsthreshold);
2188 if (IS_ERR(state)) 2211 if (IS_ERR(state))
2189 return ERR_CAST(state); 2212 return ERR_CAST(state);
2190 ctx->state = state; 2213 ctx->state = state;
@@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2354/* 2377/*
2355 * get the file handle for the "/" directory on the server 2378 * get the file handle for the "/" directory on the server
2356 */ 2379 */
2357static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2380int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
2358 struct nfs_fsinfo *info) 2381 struct nfs_fsinfo *info)
2359{ 2382{
2360 int minor_version = server->nfs_client->cl_minorversion; 2383 int minor_version = server->nfs_client->cl_minorversion;
2361 int status = nfs4_lookup_root(server, fhandle, info); 2384 int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2372 return nfs4_map_errors(status); 2395 return nfs4_map_errors(status);
2373} 2396}
2374 2397
2398static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
2399 struct nfs_fsinfo *info)
2400{
2401 int error;
2402 struct nfs_fattr *fattr = info->fattr;
2403
2404 error = nfs4_server_capabilities(server, mntfh);
2405 if (error < 0) {
2406 dprintk("nfs4_get_root: getcaps error = %d\n", -error);
2407 return error;
2408 }
2409
2410 error = nfs4_proc_getattr(server, mntfh, fattr);
2411 if (error < 0) {
2412 dprintk("nfs4_get_root: getattr error = %d\n", -error);
2413 return error;
2414 }
2415
2416 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
2417 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
2418 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
2419
2420 return error;
2421}
2422
2375/* 2423/*
2376 * Get locations and (maybe) other attributes of a referral. 2424 * Get locations and (maybe) other attributes of a referral.
2377 * Note that we'll actually follow the referral later when 2425 * Note that we'll actually follow the referral later when
@@ -2578,7 +2626,7 @@ out:
2578 return err; 2626 return err;
2579} 2627}
2580 2628
2581static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 2629static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
2582 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2630 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2583{ 2631{
2584 int status; 2632 int status;
@@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2761 fmode = ctx->mode; 2809 fmode = ctx->mode;
2762 } 2810 }
2763 sattr->ia_mode &= ~current_umask(); 2811 sattr->ia_mode &= ~current_umask();
2764 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); 2812 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
2765 d_drop(dentry); 2813 d_drop(dentry);
2766 if (IS_ERR(state)) { 2814 if (IS_ERR(state)) {
2767 status = PTR_ERR(state); 2815 status = PTR_ERR(state);
@@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2783 struct nfs_removeargs args = { 2831 struct nfs_removeargs args = {
2784 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2785 .name = *name, 2833 .name = *name,
2786 .bitmask = server->attr_bitmask,
2787 }; 2834 };
2788 struct nfs_removeres res = { 2835 struct nfs_removeres res = {
2789 .server = server, 2836 .server = server,
@@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2793 .rpc_argp = &args, 2840 .rpc_argp = &args,
2794 .rpc_resp = &res, 2841 .rpc_resp = &res,
2795 }; 2842 };
2796 int status = -ENOMEM; 2843 int status;
2797
2798 res.dir_attr = nfs_alloc_fattr();
2799 if (res.dir_attr == NULL)
2800 goto out;
2801 2844
2802 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); 2845 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2803 if (status == 0) { 2846 if (status == 0)
2804 update_changeattr(dir, &res.cinfo); 2847 update_changeattr(dir, &res.cinfo);
2805 nfs_post_op_update_inode(dir, res.dir_attr);
2806 }
2807 nfs_free_fattr(res.dir_attr);
2808out:
2809 return status; 2848 return status;
2810} 2849}
2811 2850
@@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2827 struct nfs_removeargs *args = msg->rpc_argp; 2866 struct nfs_removeargs *args = msg->rpc_argp;
2828 struct nfs_removeres *res = msg->rpc_resp; 2867 struct nfs_removeres *res = msg->rpc_resp;
2829 2868
2830 args->bitmask = server->cache_consistency_bitmask;
2831 res->server = server; 2869 res->server = server;
2832 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2870 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2833 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); 2871 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2852 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2890 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2853 return 0; 2891 return 0;
2854 update_changeattr(dir, &res->cinfo); 2892 update_changeattr(dir, &res->cinfo);
2855 nfs_post_op_update_inode(dir, res->dir_attr);
2856 return 1; 2893 return 1;
2857} 2894}
2858 2895
@@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2863 struct nfs_renameres *res = msg->rpc_resp; 2900 struct nfs_renameres *res = msg->rpc_resp;
2864 2901
2865 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2902 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2866 arg->bitmask = server->attr_bitmask;
2867 res->server = server; 2903 res->server = server;
2868 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); 2904 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2869} 2905}
@@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2889 return 0; 2925 return 0;
2890 2926
2891 update_changeattr(old_dir, &res->old_cinfo); 2927 update_changeattr(old_dir, &res->old_cinfo);
2892 nfs_post_op_update_inode(old_dir, res->old_fattr);
2893 update_changeattr(new_dir, &res->new_cinfo); 2928 update_changeattr(new_dir, &res->new_cinfo);
2894 nfs_post_op_update_inode(new_dir, res->new_fattr);
2895 return 1; 2929 return 1;
2896} 2930}
2897 2931
@@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2904 .new_dir = NFS_FH(new_dir), 2938 .new_dir = NFS_FH(new_dir),
2905 .old_name = old_name, 2939 .old_name = old_name,
2906 .new_name = new_name, 2940 .new_name = new_name,
2907 .bitmask = server->attr_bitmask,
2908 }; 2941 };
2909 struct nfs_renameres res = { 2942 struct nfs_renameres res = {
2910 .server = server, 2943 .server = server,
@@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2916 }; 2949 };
2917 int status = -ENOMEM; 2950 int status = -ENOMEM;
2918 2951
2919 res.old_fattr = nfs_alloc_fattr();
2920 res.new_fattr = nfs_alloc_fattr();
2921 if (res.old_fattr == NULL || res.new_fattr == NULL)
2922 goto out;
2923
2924 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2952 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2925 if (!status) { 2953 if (!status) {
2926 update_changeattr(old_dir, &res.old_cinfo); 2954 update_changeattr(old_dir, &res.old_cinfo);
2927 nfs_post_op_update_inode(old_dir, res.old_fattr);
2928 update_changeattr(new_dir, &res.new_cinfo); 2955 update_changeattr(new_dir, &res.new_cinfo);
2929 nfs_post_op_update_inode(new_dir, res.new_fattr);
2930 } 2956 }
2931out:
2932 nfs_free_fattr(res.new_fattr);
2933 nfs_free_fattr(res.old_fattr);
2934 return status; 2957 return status;
2935} 2958}
2936 2959
@@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2968 int status = -ENOMEM; 2991 int status = -ENOMEM;
2969 2992
2970 res.fattr = nfs_alloc_fattr(); 2993 res.fattr = nfs_alloc_fattr();
2971 res.dir_attr = nfs_alloc_fattr(); 2994 if (res.fattr == NULL)
2972 if (res.fattr == NULL || res.dir_attr == NULL)
2973 goto out; 2995 goto out;
2974 2996
2975 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2997 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2976 if (!status) { 2998 if (!status) {
2977 update_changeattr(dir, &res.cinfo); 2999 update_changeattr(dir, &res.cinfo);
2978 nfs_post_op_update_inode(dir, res.dir_attr);
2979 nfs_post_op_update_inode(inode, res.fattr); 3000 nfs_post_op_update_inode(inode, res.fattr);
2980 } 3001 }
2981out: 3002out:
2982 nfs_free_fattr(res.dir_attr);
2983 nfs_free_fattr(res.fattr); 3003 nfs_free_fattr(res.fattr);
2984 return status; 3004 return status;
2985} 3005}
@@ -3002,7 +3022,6 @@ struct nfs4_createdata {
3002 struct nfs4_create_res res; 3022 struct nfs4_create_res res;
3003 struct nfs_fh fh; 3023 struct nfs_fh fh;
3004 struct nfs_fattr fattr; 3024 struct nfs_fattr fattr;
3005 struct nfs_fattr dir_fattr;
3006}; 3025};
3007 3026
3008static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, 3027static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
3026 data->res.server = server; 3045 data->res.server = server;
3027 data->res.fh = &data->fh; 3046 data->res.fh = &data->fh;
3028 data->res.fattr = &data->fattr; 3047 data->res.fattr = &data->fattr;
3029 data->res.dir_fattr = &data->dir_fattr;
3030 nfs_fattr_init(data->res.fattr); 3048 nfs_fattr_init(data->res.fattr);
3031 nfs_fattr_init(data->res.dir_fattr);
3032 } 3049 }
3033 return data; 3050 return data;
3034} 3051}
@@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
3039 &data->arg.seq_args, &data->res.seq_res, 1); 3056 &data->arg.seq_args, &data->res.seq_res, 1);
3040 if (status == 0) { 3057 if (status == 0) {
3041 update_changeattr(dir, &data->res.dir_cinfo); 3058 update_changeattr(dir, &data->res.dir_cinfo);
3042 nfs_post_op_update_inode(dir, data->res.dir_fattr);
3043 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); 3059 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
3044 } 3060 }
3045 return status; 3061 return status;
@@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3335 3351
3336void __nfs4_read_done_cb(struct nfs_read_data *data) 3352void __nfs4_read_done_cb(struct nfs_read_data *data)
3337{ 3353{
3338 nfs_invalidate_atime(data->inode); 3354 nfs_invalidate_atime(data->header->inode);
3339} 3355}
3340 3356
3341static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3357static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3342{ 3358{
3343 struct nfs_server *server = NFS_SERVER(data->inode); 3359 struct nfs_server *server = NFS_SERVER(data->header->inode);
3344 3360
3345 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3361 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3346 rpc_restart_call_prepare(task); 3362 rpc_restart_call_prepare(task);
@@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3375 3391
3376static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 3392static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3377{ 3393{
3378 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3394 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3379 &data->args.seq_args, 3395 &data->args.seq_args,
3380 &data->res.seq_res, 3396 &data->res.seq_res,
3381 task)) 3397 task))
@@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
3383 rpc_call_start(task); 3399 rpc_call_start(task);
3384} 3400}
3385 3401
3386/* Reset the the nfs_read_data to send the read to the MDS. */
3387void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3388{
3389 dprintk("%s Reset task for i/o through\n", __func__);
3390 put_lseg(data->lseg);
3391 data->lseg = NULL;
3392 /* offsets will differ in the dense stripe case */
3393 data->args.offset = data->mds_offset;
3394 data->ds_clp = NULL;
3395 data->args.fh = NFS_FH(data->inode);
3396 data->read_done_cb = nfs4_read_done_cb;
3397 task->tk_ops = data->mds_ops;
3398 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3399}
3400EXPORT_SYMBOL_GPL(nfs4_reset_read);
3401
3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3403{ 3403{
3404 struct inode *inode = data->inode; 3404 struct inode *inode = data->header->inode;
3405 3405
3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3407 rpc_restart_call_prepare(task); 3407 rpc_restart_call_prepare(task);
@@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3409 } 3409 }
3410 if (task->tk_status >= 0) { 3410 if (task->tk_status >= 0) {
3411 renew_lease(NFS_SERVER(inode), data->timestamp); 3411 renew_lease(NFS_SERVER(inode), data->timestamp);
3412 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 3412 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
3413 } 3413 }
3414 return 0; 3414 return 0;
3415} 3415}
@@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3422 nfs4_write_done_cb(task, data); 3422 nfs4_write_done_cb(task, data);
3423} 3423}
3424 3424
3425/* Reset the the nfs_write_data to send the write to the MDS. */ 3425static
3426void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) 3426bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
3427{ 3427{
3428 dprintk("%s Reset task for i/o through\n", __func__); 3428 const struct nfs_pgio_header *hdr = data->header;
3429 put_lseg(data->lseg); 3429
3430 data->lseg = NULL; 3430 /* Don't request attributes for pNFS or O_DIRECT writes */
3431 data->ds_clp = NULL; 3431 if (data->ds_clp != NULL || hdr->dreq != NULL)
3432 data->write_done_cb = nfs4_write_done_cb; 3432 return false;
3433 data->args.fh = NFS_FH(data->inode); 3433 /* Otherwise, request attributes if and only if we don't hold
3434 data->args.bitmask = data->res.server->cache_consistency_bitmask; 3434 * a delegation
3435 data->args.offset = data->mds_offset; 3435 */
3436 data->res.fattr = &data->fattr; 3436 return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
3437 task->tk_ops = data->mds_ops;
3438 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3439} 3437}
3440EXPORT_SYMBOL_GPL(nfs4_reset_write);
3441 3438
3442static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3439static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3443{ 3440{
3444 struct nfs_server *server = NFS_SERVER(data->inode); 3441 struct nfs_server *server = NFS_SERVER(data->header->inode);
3445 3442
3446 if (data->lseg) { 3443 if (!nfs4_write_need_cache_consistency_data(data)) {
3447 data->args.bitmask = NULL; 3444 data->args.bitmask = NULL;
3448 data->res.fattr = NULL; 3445 data->res.fattr = NULL;
3449 } else 3446 } else
3450 data->args.bitmask = server->cache_consistency_bitmask; 3447 data->args.bitmask = server->cache_consistency_bitmask;
3448
3451 if (!data->write_done_cb) 3449 if (!data->write_done_cb)
3452 data->write_done_cb = nfs4_write_done_cb; 3450 data->write_done_cb = nfs4_write_done_cb;
3453 data->res.server = server; 3451 data->res.server = server;
@@ -3459,6 +3457,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3459 3457
3460static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) 3458static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3461{ 3459{
3460 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3461 &data->args.seq_args,
3462 &data->res.seq_res,
3463 task))
3464 return;
3465 rpc_call_start(task);
3466}
3467
3468static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
3469{
3462 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3470 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3463 &data->args.seq_args, 3471 &data->args.seq_args,
3464 &data->res.seq_res, 3472 &data->res.seq_res,
@@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
3467 rpc_call_start(task); 3475 rpc_call_start(task);
3468} 3476}
3469 3477
3470static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3478static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
3471{ 3479{
3472 struct inode *inode = data->inode; 3480 struct inode *inode = data->inode;
3473 3481
@@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
3475 rpc_restart_call_prepare(task); 3483 rpc_restart_call_prepare(task);
3476 return -EAGAIN; 3484 return -EAGAIN;
3477 } 3485 }
3478 nfs_refresh_inode(inode, data->res.fattr);
3479 return 0; 3486 return 0;
3480} 3487}
3481 3488
3482static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3489static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
3483{ 3490{
3484 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3491 if (!nfs4_sequence_done(task, &data->res.seq_res))
3485 return -EAGAIN; 3492 return -EAGAIN;
3486 return data->write_done_cb(task, data); 3493 return data->commit_done_cb(task, data);
3487} 3494}
3488 3495
3489static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3496static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
3490{ 3497{
3491 struct nfs_server *server = NFS_SERVER(data->inode); 3498 struct nfs_server *server = NFS_SERVER(data->inode);
3492 3499
3493 if (data->lseg) { 3500 if (data->commit_done_cb == NULL)
3494 data->args.bitmask = NULL; 3501 data->commit_done_cb = nfs4_commit_done_cb;
3495 data->res.fattr = NULL;
3496 } else
3497 data->args.bitmask = server->cache_consistency_bitmask;
3498 if (!data->write_done_cb)
3499 data->write_done_cb = nfs4_commit_done_cb;
3500 data->res.server = server; 3502 data->res.server = server;
3501 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3503 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3502 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 3504 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3905 case -NFS4ERR_SEQ_MISORDERED: 3907 case -NFS4ERR_SEQ_MISORDERED:
3906 dprintk("%s ERROR %d, Reset session\n", __func__, 3908 dprintk("%s ERROR %d, Reset session\n", __func__,
3907 task->tk_status); 3909 task->tk_status);
3908 nfs4_schedule_session_recovery(clp->cl_session); 3910 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
3909 task->tk_status = 0; 3911 task->tk_status = 0;
3910 return -EAGAIN; 3912 return -EAGAIN;
3911#endif /* CONFIG_NFS_V4_1 */ 3913#endif /* CONFIG_NFS_V4_1 */
@@ -3931,13 +3933,21 @@ wait_on_recovery:
3931 return -EAGAIN; 3933 return -EAGAIN;
3932} 3934}
3933 3935
3934static void nfs4_construct_boot_verifier(struct nfs_client *clp, 3936static void nfs4_init_boot_verifier(const struct nfs_client *clp,
3935 nfs4_verifier *bootverf) 3937 nfs4_verifier *bootverf)
3936{ 3938{
3937 __be32 verf[2]; 3939 __be32 verf[2];
3938 3940
3939 verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); 3941 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
3940 verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); 3942 /* An impossible timestamp guarantees this value
3943 * will never match a generated boot time. */
3944 verf[0] = 0;
3945 verf[1] = (__be32)(NSEC_PER_SEC + 1);
3946 } else {
3947 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
3948 verf[0] = (__be32)nn->boot_time.tv_sec;
3949 verf[1] = (__be32)nn->boot_time.tv_nsec;
3950 }
3941 memcpy(bootverf->data, verf, sizeof(bootverf->data)); 3951 memcpy(bootverf->data, verf, sizeof(bootverf->data));
3942} 3952}
3943 3953
@@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3960 int loop = 0; 3970 int loop = 0;
3961 int status; 3971 int status;
3962 3972
3963 nfs4_construct_boot_verifier(clp, &sc_verifier); 3973 nfs4_init_boot_verifier(clp, &sc_verifier);
3964 3974
3965 for(;;) { 3975 for(;;) {
3966 rcu_read_lock(); 3976 rcu_read_lock();
@@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4104 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4114 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4105 data->args.fhandle = &data->fh; 4115 data->args.fhandle = &data->fh;
4106 data->args.stateid = &data->stateid; 4116 data->args.stateid = &data->stateid;
4107 data->args.bitmask = server->attr_bitmask; 4117 data->args.bitmask = server->cache_consistency_bitmask;
4108 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4118 nfs_copy_fh(&data->fh, NFS_FH(inode));
4109 nfs4_stateid_copy(&data->stateid, stateid); 4119 nfs4_stateid_copy(&data->stateid, stateid);
4110 data->res.fattr = &data->fattr; 4120 data->res.fattr = &data->fattr;
@@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4125 if (status != 0) 4135 if (status != 0)
4126 goto out; 4136 goto out;
4127 status = data->rpc_status; 4137 status = data->rpc_status;
4128 if (status != 0) 4138 if (status == 0)
4129 goto out; 4139 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
4130 nfs_refresh_inode(inode, &data->fattr); 4140 else
4141 nfs_refresh_inode(inode, &data->fattr);
4131out: 4142out:
4132 rpc_put_task(task); 4143 rpc_put_task(task);
4133 return status; 4144 return status;
@@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4837 case -NFS4ERR_BAD_HIGH_SLOT: 4848 case -NFS4ERR_BAD_HIGH_SLOT:
4838 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4849 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4839 case -NFS4ERR_DEADSESSION: 4850 case -NFS4ERR_DEADSESSION:
4840 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 4851 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
4841 goto out; 4852 goto out;
4842 case -ERESTARTSYS: 4853 case -ERESTARTSYS:
4843 /* 4854 /*
@@ -5079,7 +5090,8 @@ out_inval:
5079} 5090}
5080 5091
5081static bool 5092static bool
5082nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) 5093nfs41_same_server_scope(struct nfs41_server_scope *a,
5094 struct nfs41_server_scope *b)
5083{ 5095{
5084 if (a->server_scope_sz == b->server_scope_sz && 5096 if (a->server_scope_sz == b->server_scope_sz &&
5085 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) 5097 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5089,6 +5101,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
5089} 5101}
5090 5102
5091/* 5103/*
5104 * nfs4_proc_bind_conn_to_session()
5105 *
5106 * The 4.1 client currently uses the same TCP connection for the
5107 * fore and backchannel.
5108 */
5109int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
5110{
5111 int status;
5112 struct nfs41_bind_conn_to_session_res res;
5113 struct rpc_message msg = {
5114 .rpc_proc =
5115 &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
5116 .rpc_argp = clp,
5117 .rpc_resp = &res,
5118 .rpc_cred = cred,
5119 };
5120
5121 dprintk("--> %s\n", __func__);
5122 BUG_ON(clp == NULL);
5123
5124 res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
5125 if (unlikely(res.session == NULL)) {
5126 status = -ENOMEM;
5127 goto out;
5128 }
5129
5130 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5131 if (status == 0) {
5132 if (memcmp(res.session->sess_id.data,
5133 clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
5134 dprintk("NFS: %s: Session ID mismatch\n", __func__);
5135 status = -EIO;
5136 goto out_session;
5137 }
5138 if (res.dir != NFS4_CDFS4_BOTH) {
5139 dprintk("NFS: %s: Unexpected direction from server\n",
5140 __func__);
5141 status = -EIO;
5142 goto out_session;
5143 }
5144 if (res.use_conn_in_rdma_mode) {
5145 dprintk("NFS: %s: Server returned RDMA mode = true\n",
5146 __func__);
5147 status = -EIO;
5148 goto out_session;
5149 }
5150 }
5151out_session:
5152 kfree(res.session);
5153out:
5154 dprintk("<-- %s status= %d\n", __func__, status);
5155 return status;
5156}
5157
5158/*
5092 * nfs4_proc_exchange_id() 5159 * nfs4_proc_exchange_id()
5093 * 5160 *
5094 * Since the clientid has expired, all compounds using sessions 5161 * Since the clientid has expired, all compounds using sessions
@@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5105 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, 5172 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
5106 }; 5173 };
5107 struct nfs41_exchange_id_res res = { 5174 struct nfs41_exchange_id_res res = {
5108 .client = clp, 5175 0
5109 }; 5176 };
5110 int status; 5177 int status;
5111 struct rpc_message msg = { 5178 struct rpc_message msg = {
@@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5118 dprintk("--> %s\n", __func__); 5185 dprintk("--> %s\n", __func__);
5119 BUG_ON(clp == NULL); 5186 BUG_ON(clp == NULL);
5120 5187
5121 nfs4_construct_boot_verifier(clp, &verifier); 5188 nfs4_init_boot_verifier(clp, &verifier);
5122 5189
5123 args.id_len = scnprintf(args.id, sizeof(args.id), 5190 args.id_len = scnprintf(args.id, sizeof(args.id),
5124 "%s/%s/%u", 5191 "%s/%s/%u",
@@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5126 clp->cl_rpcclient->cl_nodename, 5193 clp->cl_rpcclient->cl_nodename,
5127 clp->cl_rpcclient->cl_auth->au_flavor); 5194 clp->cl_rpcclient->cl_auth->au_flavor);
5128 5195
5129 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 5196 res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
5130 if (unlikely(!res.server_scope)) { 5197 GFP_NOFS);
5198 if (unlikely(res.server_owner == NULL)) {
5131 status = -ENOMEM; 5199 status = -ENOMEM;
5132 goto out; 5200 goto out;
5133 } 5201 }
5134 5202
5135 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); 5203 res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
5136 if (unlikely(!res.impl_id)) { 5204 GFP_NOFS);
5205 if (unlikely(res.server_scope == NULL)) {
5206 status = -ENOMEM;
5207 goto out_server_owner;
5208 }
5209
5210 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
5211 if (unlikely(res.impl_id == NULL)) {
5137 status = -ENOMEM; 5212 status = -ENOMEM;
5138 goto out_server_scope; 5213 goto out_server_scope;
5139 } 5214 }
5140 5215
5141 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5216 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5142 if (!status) 5217 if (status == 0)
5143 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 5218 status = nfs4_check_cl_exchange_flags(res.flags);
5219
5220 if (status == 0) {
5221 clp->cl_clientid = res.clientid;
5222 clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
5223 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
5224 clp->cl_seqid = res.seqid;
5225
5226 kfree(clp->cl_serverowner);
5227 clp->cl_serverowner = res.server_owner;
5228 res.server_owner = NULL;
5144 5229
5145 if (!status) {
5146 /* use the most recent implementation id */ 5230 /* use the most recent implementation id */
5147 kfree(clp->impl_id); 5231 kfree(clp->cl_implid);
5148 clp->impl_id = res.impl_id; 5232 clp->cl_implid = res.impl_id;
5149 } else
5150 kfree(res.impl_id);
5151 5233
5152 if (!status) { 5234 if (clp->cl_serverscope != NULL &&
5153 if (clp->server_scope && 5235 !nfs41_same_server_scope(clp->cl_serverscope,
5154 !nfs41_same_server_scope(clp->server_scope,
5155 res.server_scope)) { 5236 res.server_scope)) {
5156 dprintk("%s: server_scope mismatch detected\n", 5237 dprintk("%s: server_scope mismatch detected\n",
5157 __func__); 5238 __func__);
5158 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); 5239 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
5159 kfree(clp->server_scope); 5240 kfree(clp->cl_serverscope);
5160 clp->server_scope = NULL; 5241 clp->cl_serverscope = NULL;
5161 } 5242 }
5162 5243
5163 if (!clp->server_scope) { 5244 if (clp->cl_serverscope == NULL) {
5164 clp->server_scope = res.server_scope; 5245 clp->cl_serverscope = res.server_scope;
5165 goto out; 5246 goto out;
5166 } 5247 }
5167 } 5248 } else
5249 kfree(res.impl_id);
5168 5250
5251out_server_owner:
5252 kfree(res.server_owner);
5169out_server_scope: 5253out_server_scope:
5170 kfree(res.server_scope); 5254 kfree(res.server_scope);
5171out: 5255out:
5172 if (clp->impl_id) 5256 if (clp->cl_implid != NULL)
5173 dprintk("%s: Server Implementation ID: " 5257 dprintk("%s: Server Implementation ID: "
5174 "domain: %s, name: %s, date: %llu,%u\n", 5258 "domain: %s, name: %s, date: %llu,%u\n",
5175 __func__, clp->impl_id->domain, clp->impl_id->name, 5259 __func__, clp->cl_implid->domain, clp->cl_implid->name,
5176 clp->impl_id->date.seconds, 5260 clp->cl_implid->date.seconds,
5177 clp->impl_id->date.nseconds); 5261 clp->cl_implid->date.nseconds);
5178 dprintk("<-- %s status= %d\n", __func__, status); 5262 dprintk("<-- %s status= %d\n", __func__, status);
5179 return status; 5263 return status;
5180} 5264}
5181 5265
5266static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
5267 struct rpc_cred *cred)
5268{
5269 struct rpc_message msg = {
5270 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
5271 .rpc_argp = clp,
5272 .rpc_cred = cred,
5273 };
5274 int status;
5275
5276 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5277 if (status)
5278 pr_warn("NFS: Got error %d from the server %s on "
5279 "DESTROY_CLIENTID.", status, clp->cl_hostname);
5280 return status;
5281}
5282
5283static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
5284 struct rpc_cred *cred)
5285{
5286 unsigned int loop;
5287 int ret;
5288
5289 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
5290 ret = _nfs4_proc_destroy_clientid(clp, cred);
5291 switch (ret) {
5292 case -NFS4ERR_DELAY:
5293 case -NFS4ERR_CLIENTID_BUSY:
5294 ssleep(1);
5295 break;
5296 default:
5297 return ret;
5298 }
5299 }
5300 return 0;
5301}
5302
5303int nfs4_destroy_clientid(struct nfs_client *clp)
5304{
5305 struct rpc_cred *cred;
5306 int ret = 0;
5307
5308 if (clp->cl_mvops->minor_version < 1)
5309 goto out;
5310 if (clp->cl_exchange_flags == 0)
5311 goto out;
5312 cred = nfs4_get_exchange_id_cred(clp);
5313 ret = nfs4_proc_destroy_clientid(clp, cred);
5314 if (cred)
5315 put_rpccred(cred);
5316 switch (ret) {
5317 case 0:
5318 case -NFS4ERR_STALE_CLIENTID:
5319 clp->cl_exchange_flags = 0;
5320 }
5321out:
5322 return ret;
5323}
5324
5182struct nfs4_get_lease_time_data { 5325struct nfs4_get_lease_time_data {
5183 struct nfs4_get_lease_time_args *args; 5326 struct nfs4_get_lease_time_args *args;
5184 struct nfs4_get_lease_time_res *res; 5327 struct nfs4_get_lease_time_res *res;
@@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5399void nfs4_destroy_session(struct nfs4_session *session) 5542void nfs4_destroy_session(struct nfs4_session *session)
5400{ 5543{
5401 struct rpc_xprt *xprt; 5544 struct rpc_xprt *xprt;
5545 struct rpc_cred *cred;
5402 5546
5403 nfs4_proc_destroy_session(session); 5547 cred = nfs4_get_exchange_id_cred(session->clp);
5548 nfs4_proc_destroy_session(session, cred);
5549 if (cred)
5550 put_rpccred(cred);
5404 5551
5405 rcu_read_lock(); 5552 rcu_read_lock();
5406 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); 5553 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
@@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
5510 return nfs4_verify_back_channel_attrs(args, session); 5657 return nfs4_verify_back_channel_attrs(args, session);
5511} 5658}
5512 5659
5513static int _nfs4_proc_create_session(struct nfs_client *clp) 5660static int _nfs4_proc_create_session(struct nfs_client *clp,
5661 struct rpc_cred *cred)
5514{ 5662{
5515 struct nfs4_session *session = clp->cl_session; 5663 struct nfs4_session *session = clp->cl_session;
5516 struct nfs41_create_session_args args = { 5664 struct nfs41_create_session_args args = {
@@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5524 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], 5672 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
5525 .rpc_argp = &args, 5673 .rpc_argp = &args,
5526 .rpc_resp = &res, 5674 .rpc_resp = &res,
5675 .rpc_cred = cred,
5527 }; 5676 };
5528 int status; 5677 int status;
5529 5678
@@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5548 * It is the responsibility of the caller to verify the session is 5697 * It is the responsibility of the caller to verify the session is
5549 * expired before calling this routine. 5698 * expired before calling this routine.
5550 */ 5699 */
5551int nfs4_proc_create_session(struct nfs_client *clp) 5700int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
5552{ 5701{
5553 int status; 5702 int status;
5554 unsigned *ptr; 5703 unsigned *ptr;
@@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5556 5705
5557 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5706 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5558 5707
5559 status = _nfs4_proc_create_session(clp); 5708 status = _nfs4_proc_create_session(clp, cred);
5560 if (status) 5709 if (status)
5561 goto out; 5710 goto out;
5562 5711
@@ -5578,10 +5727,15 @@ out:
5578 * Issue the over-the-wire RPC DESTROY_SESSION. 5727 * Issue the over-the-wire RPC DESTROY_SESSION.
5579 * The caller must serialize access to this routine. 5728 * The caller must serialize access to this routine.
5580 */ 5729 */
5581int nfs4_proc_destroy_session(struct nfs4_session *session) 5730int nfs4_proc_destroy_session(struct nfs4_session *session,
5731 struct rpc_cred *cred)
5582{ 5732{
5733 struct rpc_message msg = {
5734 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
5735 .rpc_argp = session,
5736 .rpc_cred = cred,
5737 };
5583 int status = 0; 5738 int status = 0;
5584 struct rpc_message msg;
5585 5739
5586 dprintk("--> nfs4_proc_destroy_session\n"); 5740 dprintk("--> nfs4_proc_destroy_session\n");
5587 5741
@@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5589 if (session->clp->cl_cons_state != NFS_CS_READY) 5743 if (session->clp->cl_cons_state != NFS_CS_READY)
5590 return status; 5744 return status;
5591 5745
5592 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
5593 msg.rpc_argp = session;
5594 msg.rpc_resp = NULL;
5595 msg.rpc_cred = NULL;
5596 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5746 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5597 5747
5598 if (status) 5748 if (status)
@@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5604 return status; 5754 return status;
5605} 5755}
5606 5756
5757/*
5758 * With sessions, the client is not marked ready until after a
5759 * successful EXCHANGE_ID and CREATE_SESSION.
5760 *
5761 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
5762 * other versions of NFS can be tried.
5763 */
5764static int nfs41_check_session_ready(struct nfs_client *clp)
5765{
5766 int ret;
5767
5768 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
5769 ret = nfs4_client_recover_expired_lease(clp);
5770 if (ret)
5771 return ret;
5772 }
5773 if (clp->cl_cons_state < NFS_CS_READY)
5774 return -EPROTONOSUPPORT;
5775 smp_rmb();
5776 return 0;
5777}
5778
5607int nfs4_init_session(struct nfs_server *server) 5779int nfs4_init_session(struct nfs_server *server)
5608{ 5780{
5609 struct nfs_client *clp = server->nfs_client; 5781 struct nfs_client *clp = server->nfs_client;
5610 struct nfs4_session *session; 5782 struct nfs4_session *session;
5611 unsigned int rsize, wsize; 5783 unsigned int rsize, wsize;
5612 int ret;
5613 5784
5614 if (!nfs4_has_session(clp)) 5785 if (!nfs4_has_session(clp))
5615 return 0; 5786 return 0;
5616 5787
5617 session = clp->cl_session; 5788 session = clp->cl_session;
5618 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5789 spin_lock(&clp->cl_lock);
5619 return 0; 5790 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5620 5791
5621 rsize = server->rsize; 5792 rsize = server->rsize;
5622 if (rsize == 0) 5793 if (rsize == 0)
5623 rsize = NFS_MAX_FILE_IO_SIZE; 5794 rsize = NFS_MAX_FILE_IO_SIZE;
5624 wsize = server->wsize; 5795 wsize = server->wsize;
5625 if (wsize == 0) 5796 if (wsize == 0)
5626 wsize = NFS_MAX_FILE_IO_SIZE; 5797 wsize = NFS_MAX_FILE_IO_SIZE;
5627 5798
5628 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; 5799 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
5629 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; 5800 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
5801 }
5802 spin_unlock(&clp->cl_lock);
5630 5803
5631 ret = nfs4_recover_expired_lease(server); 5804 return nfs41_check_session_ready(clp);
5632 if (!ret)
5633 ret = nfs4_check_client_ready(clp);
5634 return ret;
5635} 5805}
5636 5806
5637int nfs4_init_ds_session(struct nfs_client *clp) 5807int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
5638{ 5808{
5639 struct nfs4_session *session = clp->cl_session; 5809 struct nfs4_session *session = clp->cl_session;
5640 int ret; 5810 int ret;
5641 5811
5642 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5812 spin_lock(&clp->cl_lock);
5643 return 0; 5813 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5644 5814 /*
5645 ret = nfs4_client_recover_expired_lease(clp); 5815 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
5646 if (!ret) 5816 * DS lease to be equal to the MDS lease.
5647 /* Test for the DS role */ 5817 */
5648 if (!is_ds_client(clp)) 5818 clp->cl_lease_time = lease_time;
5649 ret = -ENODEV; 5819 clp->cl_last_renewal = jiffies;
5650 if (!ret) 5820 }
5651 ret = nfs4_check_client_ready(clp); 5821 spin_unlock(&clp->cl_lock);
5652 return ret;
5653 5822
5823 ret = nfs41_check_session_ready(clp);
5824 if (ret)
5825 return ret;
5826 /* Test for the DS role */
5827 if (!is_ds_client(clp))
5828 return -ENODEV;
5829 return 0;
5654} 5830}
5655EXPORT_SYMBOL_GPL(nfs4_init_ds_session); 5831EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5656 5832
@@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6557 .file_inode_ops = &nfs4_file_inode_operations, 6733 .file_inode_ops = &nfs4_file_inode_operations,
6558 .file_ops = &nfs4_file_operations, 6734 .file_ops = &nfs4_file_operations,
6559 .getroot = nfs4_proc_get_root, 6735 .getroot = nfs4_proc_get_root,
6736 .submount = nfs4_submount,
6560 .getattr = nfs4_proc_getattr, 6737 .getattr = nfs4_proc_getattr,
6561 .setattr = nfs4_proc_setattr, 6738 .setattr = nfs4_proc_setattr,
6562 .lookup = nfs4_proc_lookup, 6739 .lookup = nfs4_proc_lookup,
@@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6589 .write_rpc_prepare = nfs4_proc_write_rpc_prepare, 6766 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6590 .write_done = nfs4_write_done, 6767 .write_done = nfs4_write_done,
6591 .commit_setup = nfs4_proc_commit_setup, 6768 .commit_setup = nfs4_proc_commit_setup,
6769 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
6592 .commit_done = nfs4_commit_done, 6770 .commit_done = nfs4_commit_done,
6593 .lock = nfs4_proc_lock, 6771 .lock = nfs4_proc_lock,
6594 .clear_acl_cache = nfs4_zap_acl_attr, 6772 .clear_acl_cache = nfs4_zap_acl_attr,
6595 .close_context = nfs4_close_context, 6773 .close_context = nfs4_close_context,
6596 .open_context = nfs4_atomic_open, 6774 .open_context = nfs4_atomic_open,
6597 .init_client = nfs4_init_client, 6775 .init_client = nfs4_init_client,
6598 .secinfo = nfs4_proc_secinfo,
6599}; 6776};
6600 6777
6601static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 6778static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0eae7f..6930bec91bca 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
49#include "nfs4_fs.h" 49#include "nfs4_fs.h"
50#include "delegation.h" 50#include "delegation.h"
51 51
52#define NFSDBG_FACILITY NFSDBG_PROC 52#define NFSDBG_FACILITY NFSDBG_STATE
53 53
54void 54void
55nfs4_renew_state(struct work_struct *work) 55nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7f0fcfc1fe9d..c679b9ecef63 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,8 @@
57#include "internal.h" 57#include "internal.h"
58#include "pnfs.h" 58#include "pnfs.h"
59 59
60#define NFSDBG_FACILITY NFSDBG_STATE
61
60#define OPENOWNER_POOL_SIZE 8 62#define OPENOWNER_POOL_SIZE 8
61 63
62const nfs4_stateid zero_stateid; 64const nfs4_stateid zero_stateid;
@@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
254 goto out; 256 goto out;
255 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 257 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
256do_confirm: 258do_confirm:
257 status = nfs4_proc_create_session(clp); 259 status = nfs4_proc_create_session(clp, cred);
258 if (status != 0) 260 if (status != 0)
259 goto out; 261 goto out;
260 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 262 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1106 return; 1108 return;
1107 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1109 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1108 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1110 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1111 dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1112 clp->cl_hostname);
1109 nfs4_schedule_state_manager(clp); 1113 nfs4_schedule_state_manager(clp);
1110} 1114}
1111EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); 1115EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
@@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1122{ 1126{
1123 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1127 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1124 nfs_expire_all_delegations(clp); 1128 nfs_expire_all_delegations(clp);
1129 dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1130 clp->cl_hostname);
1125} 1131}
1126 1132
1127void nfs4_schedule_path_down_recovery(struct nfs_client *clp) 1133void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
@@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1158 struct nfs_client *clp = server->nfs_client; 1164 struct nfs_client *clp = server->nfs_client;
1159 1165
1160 nfs4_state_mark_reclaim_nograce(clp, state); 1166 nfs4_state_mark_reclaim_nograce(clp, state);
1167 dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1168 clp->cl_hostname);
1161 nfs4_schedule_state_manager(clp); 1169 nfs4_schedule_state_manager(clp);
1162} 1170}
1163EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); 1171EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1491 case -NFS4ERR_BADSLOT: 1499 case -NFS4ERR_BADSLOT:
1492 case -NFS4ERR_BAD_HIGH_SLOT: 1500 case -NFS4ERR_BAD_HIGH_SLOT:
1493 case -NFS4ERR_DEADSESSION: 1501 case -NFS4ERR_DEADSESSION:
1494 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1495 case -NFS4ERR_SEQ_FALSE_RETRY: 1502 case -NFS4ERR_SEQ_FALSE_RETRY:
1496 case -NFS4ERR_SEQ_MISORDERED: 1503 case -NFS4ERR_SEQ_MISORDERED:
1497 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1504 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1498 /* Zero session reset errors */ 1505 /* Zero session reset errors */
1499 break; 1506 break;
1507 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1508 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1509 break;
1500 case -EKEYEXPIRED: 1510 case -EKEYEXPIRED:
1501 /* Nothing we can do */ 1511 /* Nothing we can do */
1502 nfs4_warn_keyexpired(clp->cl_hostname); 1512 nfs4_warn_keyexpired(clp->cl_hostname);
1503 break; 1513 break;
1504 default: 1514 default:
1515 dprintk("%s: failed to handle error %d for server %s\n",
1516 __func__, error, clp->cl_hostname);
1505 return error; 1517 return error;
1506 } 1518 }
1519 dprintk("%s: handled error %d for server %s\n", __func__, error,
1520 clp->cl_hostname);
1507 return 0; 1521 return 0;
1508} 1522}
1509 1523
@@ -1572,34 +1586,82 @@ out:
1572 return nfs4_recovery_handle_error(clp, status); 1586 return nfs4_recovery_handle_error(clp, status);
1573} 1587}
1574 1588
1589/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1590 * on EXCHANGE_ID for v4.1
1591 */
1592static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1593{
1594 switch (status) {
1595 case -NFS4ERR_SEQ_MISORDERED:
1596 if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1597 return -ESERVERFAULT;
1598 /* Lease confirmation error: retry after purging the lease */
1599 ssleep(1);
1600 case -NFS4ERR_CLID_INUSE:
1601 case -NFS4ERR_STALE_CLIENTID:
1602 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1603 break;
1604 case -EACCES:
1605 if (clp->cl_machine_cred == NULL)
1606 return -EACCES;
1607 /* Handle case where the user hasn't set up machine creds */
1608 nfs4_clear_machine_cred(clp);
1609 case -NFS4ERR_DELAY:
1610 case -ETIMEDOUT:
1611 case -EAGAIN:
1612 ssleep(1);
1613 break;
1614
1615 case -NFS4ERR_MINOR_VERS_MISMATCH:
1616 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1617 nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
1618 dprintk("%s: exit with error %d for server %s\n",
1619 __func__, -EPROTONOSUPPORT, clp->cl_hostname);
1620 return -EPROTONOSUPPORT;
1621 case -EKEYEXPIRED:
1622 nfs4_warn_keyexpired(clp->cl_hostname);
1623 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1624 * in nfs4_exchange_id */
1625 default:
1626 dprintk("%s: exit with error %d for server %s\n", __func__,
1627 status, clp->cl_hostname);
1628 return status;
1629 }
1630 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1631 dprintk("%s: handled error %d for server %s\n", __func__, status,
1632 clp->cl_hostname);
1633 return 0;
1634}
1635
1575static int nfs4_reclaim_lease(struct nfs_client *clp) 1636static int nfs4_reclaim_lease(struct nfs_client *clp)
1576{ 1637{
1577 struct rpc_cred *cred; 1638 struct rpc_cred *cred;
1578 const struct nfs4_state_recovery_ops *ops = 1639 const struct nfs4_state_recovery_ops *ops =
1579 clp->cl_mvops->reboot_recovery_ops; 1640 clp->cl_mvops->reboot_recovery_ops;
1580 int status = -ENOENT; 1641 int status;
1581 1642
1582 cred = ops->get_clid_cred(clp); 1643 cred = ops->get_clid_cred(clp);
1583 if (cred != NULL) { 1644 if (cred == NULL)
1584 status = ops->establish_clid(clp, cred); 1645 return -ENOENT;
1585 put_rpccred(cred); 1646 status = ops->establish_clid(clp, cred);
1586 /* Handle case where the user hasn't set up machine creds */ 1647 put_rpccred(cred);
1587 if (status == -EACCES && cred == clp->cl_machine_cred) { 1648 if (status != 0)
1588 nfs4_clear_machine_cred(clp); 1649 return nfs4_handle_reclaim_lease_error(clp, status);
1589 status = -EAGAIN; 1650 return 0;
1590 }
1591 if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
1592 status = -EPROTONOSUPPORT;
1593 }
1594 return status;
1595} 1651}
1596 1652
1597#ifdef CONFIG_NFS_V4_1 1653#ifdef CONFIG_NFS_V4_1
1598void nfs4_schedule_session_recovery(struct nfs4_session *session) 1654void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
1599{ 1655{
1600 struct nfs_client *clp = session->clp; 1656 struct nfs_client *clp = session->clp;
1601 1657
1602 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1658 switch (err) {
1659 default:
1660 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1661 break;
1662 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1663 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1664 }
1603 nfs4_schedule_lease_recovery(clp); 1665 nfs4_schedule_lease_recovery(clp);
1604} 1666}
1605EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); 1667EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1607void nfs41_handle_recall_slot(struct nfs_client *clp) 1669void nfs41_handle_recall_slot(struct nfs_client *clp)
1608{ 1670{
1609 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1671 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1672 dprintk("%s: scheduling slot recall for server %s\n", __func__,
1673 clp->cl_hostname);
1610 nfs4_schedule_state_manager(clp); 1674 nfs4_schedule_state_manager(clp);
1611} 1675}
1612 1676
1613static void nfs4_reset_all_state(struct nfs_client *clp) 1677static void nfs4_reset_all_state(struct nfs_client *clp)
1614{ 1678{
1615 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1679 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1616 clp->cl_boot_time = CURRENT_TIME; 1680 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1681 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1617 nfs4_state_start_reclaim_nograce(clp); 1682 nfs4_state_start_reclaim_nograce(clp);
1683 dprintk("%s: scheduling reset of all state for server %s!\n",
1684 __func__, clp->cl_hostname);
1618 nfs4_schedule_state_manager(clp); 1685 nfs4_schedule_state_manager(clp);
1619 } 1686 }
1620} 1687}
@@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1623{ 1690{
1624 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1691 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1625 nfs4_state_start_reclaim_reboot(clp); 1692 nfs4_state_start_reclaim_reboot(clp);
1693 dprintk("%s: server %s rebooted!\n", __func__,
1694 clp->cl_hostname);
1626 nfs4_schedule_state_manager(clp); 1695 nfs4_schedule_state_manager(clp);
1627 } 1696 }
1628} 1697}
1629 1698
1630static void nfs41_handle_state_revoked(struct nfs_client *clp) 1699static void nfs41_handle_state_revoked(struct nfs_client *clp)
1631{ 1700{
1632 /* Temporary */
1633 nfs4_reset_all_state(clp); 1701 nfs4_reset_all_state(clp);
1702 dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
1634} 1703}
1635 1704
1636static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) 1705static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
1637{ 1706{
1638 /* This will need to handle layouts too */ 1707 /* This will need to handle layouts too */
1639 nfs_expire_all_delegations(clp); 1708 nfs_expire_all_delegations(clp);
1709 dprintk("%s: Recallable state revoked on server %s!\n", __func__,
1710 clp->cl_hostname);
1640} 1711}
1641 1712
1642static void nfs41_handle_cb_path_down(struct nfs_client *clp) 1713static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
1643{ 1714{
1644 nfs_expire_all_delegations(clp); 1715 nfs_expire_all_delegations(clp);
1645 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1716 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1646 nfs4_schedule_state_manager(clp); 1717 nfs4_schedule_state_manager(clp);
1718 dprintk("%s: server %s declared a backchannel fault\n", __func__,
1719 clp->cl_hostname);
1720}
1721
1722static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1723{
1724 if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1725 &clp->cl_state) == 0)
1726 nfs4_schedule_state_manager(clp);
1647} 1727}
1648 1728
1649void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1729void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1650{ 1730{
1651 if (!flags) 1731 if (!flags)
1652 return; 1732 return;
1733
1734 dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
1735 __func__, clp->cl_hostname, clp->cl_clientid, flags);
1736
1653 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1737 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1654 nfs41_handle_server_reboot(clp); 1738 nfs41_handle_server_reboot(clp);
1655 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1739 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1659 nfs41_handle_state_revoked(clp); 1743 nfs41_handle_state_revoked(clp);
1660 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1744 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1661 nfs41_handle_recallable_state_revoked(clp); 1745 nfs41_handle_recallable_state_revoked(clp);
1662 if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1746 if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
1663 SEQ4_STATUS_BACKCHANNEL_FAULT | 1747 nfs41_handle_backchannel_fault(clp);
1664 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1748 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1749 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1665 nfs41_handle_cb_path_down(clp); 1750 nfs41_handle_cb_path_down(clp);
1666} 1751}
1667 1752
1668static int nfs4_reset_session(struct nfs_client *clp) 1753static int nfs4_reset_session(struct nfs_client *clp)
1669{ 1754{
1755 struct rpc_cred *cred;
1670 int status; 1756 int status;
1671 1757
1672 nfs4_begin_drain_session(clp); 1758 nfs4_begin_drain_session(clp);
1673 status = nfs4_proc_destroy_session(clp->cl_session); 1759 cred = nfs4_get_exchange_id_cred(clp);
1760 status = nfs4_proc_destroy_session(clp->cl_session, cred);
1674 if (status && status != -NFS4ERR_BADSESSION && 1761 if (status && status != -NFS4ERR_BADSESSION &&
1675 status != -NFS4ERR_DEADSESSION) { 1762 status != -NFS4ERR_DEADSESSION) {
1676 status = nfs4_recovery_handle_error(clp, status); 1763 status = nfs4_recovery_handle_error(clp, status);
@@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)
1678 } 1765 }
1679 1766
1680 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); 1767 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1681 status = nfs4_proc_create_session(clp); 1768 status = nfs4_proc_create_session(clp, cred);
1682 if (status) { 1769 if (status) {
1683 status = nfs4_recovery_handle_error(clp, status); 1770 dprintk("%s: session reset failed with status %d for server %s!\n",
1771 __func__, status, clp->cl_hostname);
1772 status = nfs4_handle_reclaim_lease_error(clp, status);
1684 goto out; 1773 goto out;
1685 } 1774 }
1686 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1775 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1687 /* create_session negotiated new slot table */ 1776 /* create_session negotiated new slot table */
1688 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1777 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1778 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1779 dprintk("%s: session reset was successful for server %s!\n",
1780 __func__, clp->cl_hostname);
1689 1781
1690 /* Let the state manager reestablish state */ 1782 /* Let the state manager reestablish state */
1691 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1783 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1692 nfs41_setup_state_renewal(clp); 1784 nfs41_setup_state_renewal(clp);
1693out: 1785out:
1786 if (cred)
1787 put_rpccred(cred);
1694 return status; 1788 return status;
1695} 1789}
1696 1790
@@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
1722 return 0; 1816 return 0;
1723} 1817}
1724 1818
1725#else /* CONFIG_NFS_V4_1 */ 1819static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1726static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1727static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1728static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1729#endif /* CONFIG_NFS_V4_1 */
1730
1731/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1732 * on EXCHANGE_ID for v4.1
1733 */
1734static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1735{ 1820{
1736 switch (status) { 1821 struct rpc_cred *cred;
1737 case -NFS4ERR_CLID_INUSE: 1822 int ret;
1738 case -NFS4ERR_STALE_CLIENTID: 1823
1739 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 1824 nfs4_begin_drain_session(clp);
1825 cred = nfs4_get_exchange_id_cred(clp);
1826 ret = nfs4_proc_bind_conn_to_session(clp, cred);
1827 if (cred)
1828 put_rpccred(cred);
1829 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1830 switch (ret) {
1831 case 0:
1832 dprintk("%s: bind_conn_to_session was successful for server %s!\n",
1833 __func__, clp->cl_hostname);
1740 break; 1834 break;
1741 case -NFS4ERR_DELAY: 1835 case -NFS4ERR_DELAY:
1742 case -ETIMEDOUT:
1743 case -EAGAIN:
1744 ssleep(1); 1836 ssleep(1);
1837 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1745 break; 1838 break;
1746
1747 case -EKEYEXPIRED:
1748 nfs4_warn_keyexpired(clp->cl_hostname);
1749 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1750 * in nfs4_exchange_id */
1751 default: 1839 default:
1752 return; 1840 return nfs4_recovery_handle_error(clp, ret);
1753 } 1841 }
1754 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1842 return 0;
1755} 1843}
1844#else /* CONFIG_NFS_V4_1 */
1845static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1846static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1847static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1848
1849static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1850{
1851 return 0;
1852}
1853#endif /* CONFIG_NFS_V4_1 */
1756 1854
1757static void nfs4_state_manager(struct nfs_client *clp) 1855static void nfs4_state_manager(struct nfs_client *clp)
1758{ 1856{
@@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
1760 1858
1761 /* Ensure exclusive access to NFSv4 state */ 1859 /* Ensure exclusive access to NFSv4 state */
1762 do { 1860 do {
1861 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
1862 status = nfs4_reclaim_lease(clp);
1863 if (status < 0)
1864 goto out_error;
1865 clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1866 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1867 }
1868
1763 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1869 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1764 /* We're going to have to re-establish a clientid */ 1870 /* We're going to have to re-establish a clientid */
1765 status = nfs4_reclaim_lease(clp); 1871 status = nfs4_reclaim_lease(clp);
1766 if (status) { 1872 if (status < 0)
1767 nfs4_set_lease_expired(clp, status);
1768 if (test_bit(NFS4CLNT_LEASE_EXPIRED,
1769 &clp->cl_state))
1770 continue;
1771 if (clp->cl_cons_state ==
1772 NFS_CS_SESSION_INITING)
1773 nfs_mark_client_ready(clp, status);
1774 goto out_error; 1873 goto out_error;
1775 } 1874 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1875 continue;
1776 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1876 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1777 1877
1778 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, 1878 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
1803 goto out_error; 1903 goto out_error;
1804 } 1904 }
1805 1905
1906 /* Send BIND_CONN_TO_SESSION */
1907 if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1908 &clp->cl_state) && nfs4_has_session(clp)) {
1909 status = nfs4_bind_conn_to_session(clp);
1910 if (status < 0)
1911 goto out_error;
1912 continue;
1913 }
1914
1806 /* First recover reboot state... */ 1915 /* First recover reboot state... */
1807 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1916 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1808 status = nfs4_do_reclaim(clp, 1917 status = nfs4_do_reclaim(clp,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..ee4a74db95d0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -53,9 +53,11 @@
53#include <linux/nfs4.h> 53#include <linux/nfs4.h>
54#include <linux/nfs_fs.h> 54#include <linux/nfs_fs.h>
55#include <linux/nfs_idmap.h> 55#include <linux/nfs_idmap.h>
56
56#include "nfs4_fs.h" 57#include "nfs4_fs.h"
57#include "internal.h" 58#include "internal.h"
58#include "pnfs.h" 59#include "pnfs.h"
60#include "netns.h"
59 61
60#define NFSDBG_FACILITY NFSDBG_XDR 62#define NFSDBG_FACILITY NFSDBG_XDR
61 63
@@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);
99#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 101#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
100#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 102#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
101#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 103#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
104/* We support only one layout type per file system */
105#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
102/* This is based on getfattr, which uses the most attributes: */ 106/* This is based on getfattr, which uses the most attributes: */
103#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 107#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
104 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) 108 3 + 3 + 3 + nfs4_owner_maxsz + \
109 nfs4_group_maxsz + decode_mdsthreshold_maxsz))
105#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 110#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
106 nfs4_fattr_value_maxsz) 111 nfs4_fattr_value_maxsz)
107#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 112#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);
321 1 /* csr_flags */ + \ 326 1 /* csr_flags */ + \
322 decode_channel_attrs_maxsz + \ 327 decode_channel_attrs_maxsz + \
323 decode_channel_attrs_maxsz) 328 decode_channel_attrs_maxsz)
329#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \
330 /* bctsa_sessid */ \
331 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
332 1 /* bctsa_dir */ + \
333 1 /* bctsa_use_conn_in_rdma_mode */)
334#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \
335 /* bctsr_sessid */ \
336 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
337 1 /* bctsr_dir */ + \
338 1 /* bctsr_use_conn_in_rdma_mode */)
324#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) 339#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
325#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) 340#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
341#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2)
342#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz)
326#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ 343#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
327 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) 344 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
328#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ 345#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
@@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);
421#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 438#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
422 encode_sequence_maxsz + \ 439 encode_sequence_maxsz + \
423 encode_putfh_maxsz + \ 440 encode_putfh_maxsz + \
424 encode_commit_maxsz + \ 441 encode_commit_maxsz)
425 encode_getattr_maxsz)
426#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 442#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
427 decode_sequence_maxsz + \ 443 decode_sequence_maxsz + \
428 decode_putfh_maxsz + \ 444 decode_putfh_maxsz + \
429 decode_commit_maxsz + \ 445 decode_commit_maxsz)
430 decode_getattr_maxsz)
431#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 446#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
432 encode_sequence_maxsz + \ 447 encode_sequence_maxsz + \
433 encode_putfh_maxsz + \ 448 encode_putfh_maxsz + \
434 encode_savefh_maxsz + \
435 encode_open_maxsz + \ 449 encode_open_maxsz + \
436 encode_getfh_maxsz + \ 450 encode_getfh_maxsz + \
437 encode_getattr_maxsz + \
438 encode_restorefh_maxsz + \
439 encode_getattr_maxsz) 451 encode_getattr_maxsz)
440#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 452#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
441 decode_sequence_maxsz + \ 453 decode_sequence_maxsz + \
442 decode_putfh_maxsz + \ 454 decode_putfh_maxsz + \
443 decode_savefh_maxsz + \
444 decode_open_maxsz + \ 455 decode_open_maxsz + \
445 decode_getfh_maxsz + \ 456 decode_getfh_maxsz + \
446 decode_getattr_maxsz + \
447 decode_restorefh_maxsz + \
448 decode_getattr_maxsz) 457 decode_getattr_maxsz)
449#define NFS4_enc_open_confirm_sz \ 458#define NFS4_enc_open_confirm_sz \
450 (compound_encode_hdr_maxsz + \ 459 (compound_encode_hdr_maxsz + \
@@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);
595#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 604#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
596 encode_sequence_maxsz + \ 605 encode_sequence_maxsz + \
597 encode_putfh_maxsz + \ 606 encode_putfh_maxsz + \
598 encode_remove_maxsz + \ 607 encode_remove_maxsz)
599 encode_getattr_maxsz)
600#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 608#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
601 decode_sequence_maxsz + \ 609 decode_sequence_maxsz + \
602 decode_putfh_maxsz + \ 610 decode_putfh_maxsz + \
603 decode_remove_maxsz + \ 611 decode_remove_maxsz)
604 decode_getattr_maxsz)
605#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 612#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
606 encode_sequence_maxsz + \ 613 encode_sequence_maxsz + \
607 encode_putfh_maxsz + \ 614 encode_putfh_maxsz + \
608 encode_savefh_maxsz + \ 615 encode_savefh_maxsz + \
609 encode_putfh_maxsz + \ 616 encode_putfh_maxsz + \
610 encode_rename_maxsz + \ 617 encode_rename_maxsz)
611 encode_getattr_maxsz + \
612 encode_restorefh_maxsz + \
613 encode_getattr_maxsz)
614#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 618#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
615 decode_sequence_maxsz + \ 619 decode_sequence_maxsz + \
616 decode_putfh_maxsz + \ 620 decode_putfh_maxsz + \
617 decode_savefh_maxsz + \ 621 decode_savefh_maxsz + \
618 decode_putfh_maxsz + \ 622 decode_putfh_maxsz + \
619 decode_rename_maxsz + \ 623 decode_rename_maxsz)
620 decode_getattr_maxsz + \
621 decode_restorefh_maxsz + \
622 decode_getattr_maxsz)
623#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 624#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
624 encode_sequence_maxsz + \ 625 encode_sequence_maxsz + \
625 encode_putfh_maxsz + \ 626 encode_putfh_maxsz + \
626 encode_savefh_maxsz + \ 627 encode_savefh_maxsz + \
627 encode_putfh_maxsz + \ 628 encode_putfh_maxsz + \
628 encode_link_maxsz + \ 629 encode_link_maxsz + \
629 decode_getattr_maxsz + \
630 encode_restorefh_maxsz + \ 630 encode_restorefh_maxsz + \
631 decode_getattr_maxsz) 631 encode_getattr_maxsz)
632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
633 decode_sequence_maxsz + \ 633 decode_sequence_maxsz + \
634 decode_putfh_maxsz + \ 634 decode_putfh_maxsz + \
635 decode_savefh_maxsz + \ 635 decode_savefh_maxsz + \
636 decode_putfh_maxsz + \ 636 decode_putfh_maxsz + \
637 decode_link_maxsz + \ 637 decode_link_maxsz + \
638 decode_getattr_maxsz + \
639 decode_restorefh_maxsz + \ 638 decode_restorefh_maxsz + \
640 decode_getattr_maxsz) 639 decode_getattr_maxsz)
641#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 640#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);
653#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 652#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
654 encode_sequence_maxsz + \ 653 encode_sequence_maxsz + \
655 encode_putfh_maxsz + \ 654 encode_putfh_maxsz + \
656 encode_savefh_maxsz + \
657 encode_create_maxsz + \ 655 encode_create_maxsz + \
658 encode_getfh_maxsz + \ 656 encode_getfh_maxsz + \
659 encode_getattr_maxsz + \
660 encode_restorefh_maxsz + \
661 encode_getattr_maxsz) 657 encode_getattr_maxsz)
662#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 658#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
663 decode_sequence_maxsz + \ 659 decode_sequence_maxsz + \
664 decode_putfh_maxsz + \ 660 decode_putfh_maxsz + \
665 decode_savefh_maxsz + \
666 decode_create_maxsz + \ 661 decode_create_maxsz + \
667 decode_getfh_maxsz + \ 662 decode_getfh_maxsz + \
668 decode_getattr_maxsz + \
669 decode_restorefh_maxsz + \
670 decode_getattr_maxsz) 663 decode_getattr_maxsz)
671#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 664#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
672 encode_sequence_maxsz + \ 665 encode_sequence_maxsz + \
@@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);
738 decode_putfh_maxsz + \ 731 decode_putfh_maxsz + \
739 decode_secinfo_maxsz) 732 decode_secinfo_maxsz)
740#if defined(CONFIG_NFS_V4_1) 733#if defined(CONFIG_NFS_V4_1)
734#define NFS4_enc_bind_conn_to_session_sz \
735 (compound_encode_hdr_maxsz + \
736 encode_bind_conn_to_session_maxsz)
737#define NFS4_dec_bind_conn_to_session_sz \
738 (compound_decode_hdr_maxsz + \
739 decode_bind_conn_to_session_maxsz)
741#define NFS4_enc_exchange_id_sz \ 740#define NFS4_enc_exchange_id_sz \
742 (compound_encode_hdr_maxsz + \ 741 (compound_encode_hdr_maxsz + \
743 encode_exchange_id_maxsz) 742 encode_exchange_id_maxsz)
@@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);
754 encode_destroy_session_maxsz) 753 encode_destroy_session_maxsz)
755#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ 754#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
756 decode_destroy_session_maxsz) 755 decode_destroy_session_maxsz)
756#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \
757 encode_destroy_clientid_maxsz)
758#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \
759 decode_destroy_clientid_maxsz)
757#define NFS4_enc_sequence_sz \ 760#define NFS4_enc_sequence_sz \
758 (compound_decode_hdr_maxsz + \ 761 (compound_decode_hdr_maxsz + \
759 encode_sequence_maxsz) 762 encode_sequence_maxsz)
@@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
1103 encode_nfs4_stateid(xdr, arg->stateid); 1106 encode_nfs4_stateid(xdr, arg->stateid);
1104} 1107}
1105 1108
1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1109static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
1107{ 1110{
1108 __be32 *p; 1111 __be32 *p;
1109 1112
@@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
1194 bitmask[1] & nfs4_fattr_bitmap[1], hdr); 1197 bitmask[1] & nfs4_fattr_bitmap[1], hdr);
1195} 1198}
1196 1199
1200static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
1201 struct compound_hdr *hdr)
1202{
1203 encode_getattr_three(xdr,
1204 bitmask[0] & nfs4_fattr_bitmap[0],
1205 bitmask[1] & nfs4_fattr_bitmap[1],
1206 bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
1207 hdr);
1208}
1209
1197static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1210static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
1198{ 1211{
1199 encode_getattr_three(xdr, 1212 encode_getattr_three(xdr,
@@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
1678 1691
1679#if defined(CONFIG_NFS_V4_1) 1692#if defined(CONFIG_NFS_V4_1)
1680/* NFSv4.1 operations */ 1693/* NFSv4.1 operations */
1694static void encode_bind_conn_to_session(struct xdr_stream *xdr,
1695 struct nfs4_session *session,
1696 struct compound_hdr *hdr)
1697{
1698 __be32 *p;
1699
1700 encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
1701 decode_bind_conn_to_session_maxsz, hdr);
1702 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1703 p = xdr_reserve_space(xdr, 8);
1704 *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
1705 *p = 0; /* use_conn_in_rdma_mode = False */
1706}
1707
1681static void encode_exchange_id(struct xdr_stream *xdr, 1708static void encode_exchange_id(struct xdr_stream *xdr,
1682 struct nfs41_exchange_id_args *args, 1709 struct nfs41_exchange_id_args *args,
1683 struct compound_hdr *hdr) 1710 struct compound_hdr *hdr)
@@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1726 char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; 1753 char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
1727 uint32_t len; 1754 uint32_t len;
1728 struct nfs_client *clp = args->client; 1755 struct nfs_client *clp = args->client;
1756 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1729 u32 max_resp_sz_cached; 1757 u32 max_resp_sz_cached;
1730 1758
1731 /* 1759 /*
@@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1767 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ 1795 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1768 1796
1769 /* authsys_parms rfc1831 */ 1797 /* authsys_parms rfc1831 */
1770 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1798 *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
1771 p = xdr_encode_opaque(p, machine_name, len); 1799 p = xdr_encode_opaque(p, machine_name, len);
1772 *p++ = cpu_to_be32(0); /* UID */ 1800 *p++ = cpu_to_be32(0); /* UID */
1773 *p++ = cpu_to_be32(0); /* GID */ 1801 *p++ = cpu_to_be32(0); /* GID */
@@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1782 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1810 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1783} 1811}
1784 1812
1813static void encode_destroy_clientid(struct xdr_stream *xdr,
1814 uint64_t clientid,
1815 struct compound_hdr *hdr)
1816{
1817 encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
1818 encode_uint64(xdr, clientid);
1819}
1820
1785static void encode_reclaim_complete(struct xdr_stream *xdr, 1821static void encode_reclaim_complete(struct xdr_stream *xdr,
1786 struct nfs41_reclaim_complete_args *args, 1822 struct nfs41_reclaim_complete_args *args,
1787 struct compound_hdr *hdr) 1823 struct compound_hdr *hdr)
@@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
2064 encode_sequence(xdr, &args->seq_args, &hdr); 2100 encode_sequence(xdr, &args->seq_args, &hdr);
2065 encode_putfh(xdr, args->fh, &hdr); 2101 encode_putfh(xdr, args->fh, &hdr);
2066 encode_remove(xdr, &args->name, &hdr); 2102 encode_remove(xdr, &args->name, &hdr);
2067 encode_getfattr(xdr, args->bitmask, &hdr);
2068 encode_nops(&hdr); 2103 encode_nops(&hdr);
2069} 2104}
2070 2105
@@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
2084 encode_savefh(xdr, &hdr); 2119 encode_savefh(xdr, &hdr);
2085 encode_putfh(xdr, args->new_dir, &hdr); 2120 encode_putfh(xdr, args->new_dir, &hdr);
2086 encode_rename(xdr, args->old_name, args->new_name, &hdr); 2121 encode_rename(xdr, args->old_name, args->new_name, &hdr);
2087 encode_getfattr(xdr, args->bitmask, &hdr);
2088 encode_restorefh(xdr, &hdr);
2089 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_nops(&hdr); 2122 encode_nops(&hdr);
2091} 2123}
2092 2124
@@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
2106 encode_savefh(xdr, &hdr); 2138 encode_savefh(xdr, &hdr);
2107 encode_putfh(xdr, args->dir_fh, &hdr); 2139 encode_putfh(xdr, args->dir_fh, &hdr);
2108 encode_link(xdr, args->name, &hdr); 2140 encode_link(xdr, args->name, &hdr);
2109 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_restorefh(xdr, &hdr); 2141 encode_restorefh(xdr, &hdr);
2111 encode_getfattr(xdr, args->bitmask, &hdr); 2142 encode_getfattr(xdr, args->bitmask, &hdr);
2112 encode_nops(&hdr); 2143 encode_nops(&hdr);
@@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
2125 encode_compound_hdr(xdr, req, &hdr); 2156 encode_compound_hdr(xdr, req, &hdr);
2126 encode_sequence(xdr, &args->seq_args, &hdr); 2157 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_putfh(xdr, args->dir_fh, &hdr); 2158 encode_putfh(xdr, args->dir_fh, &hdr);
2128 encode_savefh(xdr, &hdr);
2129 encode_create(xdr, args, &hdr); 2159 encode_create(xdr, args, &hdr);
2130 encode_getfh(xdr, &hdr); 2160 encode_getfh(xdr, &hdr);
2131 encode_getfattr(xdr, args->bitmask, &hdr); 2161 encode_getfattr(xdr, args->bitmask, &hdr);
2132 encode_restorefh(xdr, &hdr);
2133 encode_getfattr(xdr, args->bitmask, &hdr);
2134 encode_nops(&hdr); 2162 encode_nops(&hdr);
2135} 2163}
2136 2164
@@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2191 encode_compound_hdr(xdr, req, &hdr); 2219 encode_compound_hdr(xdr, req, &hdr);
2192 encode_sequence(xdr, &args->seq_args, &hdr); 2220 encode_sequence(xdr, &args->seq_args, &hdr);
2193 encode_putfh(xdr, args->fh, &hdr); 2221 encode_putfh(xdr, args->fh, &hdr);
2194 encode_savefh(xdr, &hdr);
2195 encode_open(xdr, args, &hdr); 2222 encode_open(xdr, args, &hdr);
2196 encode_getfh(xdr, &hdr); 2223 encode_getfh(xdr, &hdr);
2197 encode_getfattr(xdr, args->bitmask, &hdr); 2224 encode_getfattr_open(xdr, args->bitmask, &hdr);
2198 encode_restorefh(xdr, &hdr);
2199 encode_getfattr(xdr, args->dir_bitmask, &hdr);
2200 encode_nops(&hdr); 2225 encode_nops(&hdr);
2201} 2226}
2202 2227
@@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2448 * a COMMIT request 2473 * a COMMIT request
2449 */ 2474 */
2450static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, 2475static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 struct nfs_writeargs *args) 2476 struct nfs_commitargs *args)
2452{ 2477{
2453 struct compound_hdr hdr = { 2478 struct compound_hdr hdr = {
2454 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2479 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2458 encode_sequence(xdr, &args->seq_args, &hdr); 2483 encode_sequence(xdr, &args->seq_args, &hdr);
2459 encode_putfh(xdr, args->fh, &hdr); 2484 encode_putfh(xdr, args->fh, &hdr);
2460 encode_commit(xdr, args, &hdr); 2485 encode_commit(xdr, args, &hdr);
2461 if (args->bitmask)
2462 encode_getfattr(xdr, args->bitmask, &hdr);
2463 encode_nops(&hdr); 2486 encode_nops(&hdr);
2464} 2487}
2465 2488
@@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2602 encode_compound_hdr(xdr, req, &hdr); 2625 encode_compound_hdr(xdr, req, &hdr);
2603 encode_sequence(xdr, &args->seq_args, &hdr); 2626 encode_sequence(xdr, &args->seq_args, &hdr);
2604 encode_putfh(xdr, args->fhandle, &hdr); 2627 encode_putfh(xdr, args->fhandle, &hdr);
2605 encode_delegreturn(xdr, args->stateid, &hdr);
2606 encode_getfattr(xdr, args->bitmask, &hdr); 2628 encode_getfattr(xdr, args->bitmask, &hdr);
2629 encode_delegreturn(xdr, args->stateid, &hdr);
2607 encode_nops(&hdr); 2630 encode_nops(&hdr);
2608} 2631}
2609 2632
@@ -2651,6 +2674,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2651 2674
2652#if defined(CONFIG_NFS_V4_1) 2675#if defined(CONFIG_NFS_V4_1)
2653/* 2676/*
2677 * BIND_CONN_TO_SESSION request
2678 */
2679static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
2680 struct xdr_stream *xdr,
2681 struct nfs_client *clp)
2682{
2683 struct compound_hdr hdr = {
2684 .minorversion = clp->cl_mvops->minor_version,
2685 };
2686
2687 encode_compound_hdr(xdr, req, &hdr);
2688 encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
2689 encode_nops(&hdr);
2690}
2691
2692/*
2654 * EXCHANGE_ID request 2693 * EXCHANGE_ID request
2655 */ 2694 */
2656static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, 2695static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
@@ -2699,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
2699} 2738}
2700 2739
2701/* 2740/*
2741 * a DESTROY_CLIENTID request
2742 */
2743static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
2744 struct xdr_stream *xdr,
2745 struct nfs_client *clp)
2746{
2747 struct compound_hdr hdr = {
2748 .minorversion = clp->cl_mvops->minor_version,
2749 };
2750
2751 encode_compound_hdr(xdr, req, &hdr);
2752 encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
2753 encode_nops(&hdr);
2754}
2755
2756/*
2702 * a SEQUENCE request 2757 * a SEQUENCE request
2703 */ 2758 */
2704static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, 2759static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4102 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); 4157 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4103} 4158}
4104 4159
4105static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4160static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
4106{ 4161{
4107 int status; 4162 int status;
4108 4163
@@ -4220,6 +4275,110 @@ xdr_error:
4220 return status; 4275 return status;
4221} 4276}
4222 4277
4278static int decode_threshold_hint(struct xdr_stream *xdr,
4279 uint32_t *bitmap,
4280 uint64_t *res,
4281 uint32_t hint_bit)
4282{
4283 __be32 *p;
4284
4285 *res = 0;
4286 if (likely(bitmap[0] & hint_bit)) {
4287 p = xdr_inline_decode(xdr, 8);
4288 if (unlikely(!p))
4289 goto out_overflow;
4290 xdr_decode_hyper(p, res);
4291 }
4292 return 0;
4293out_overflow:
4294 print_overflow_msg(__func__, xdr);
4295 return -EIO;
4296}
4297
4298static int decode_first_threshold_item4(struct xdr_stream *xdr,
4299 struct nfs4_threshold *res)
4300{
4301 __be32 *p, *savep;
4302 uint32_t bitmap[3] = {0,}, attrlen;
4303 int status;
4304
4305 /* layout type */
4306 p = xdr_inline_decode(xdr, 4);
4307 if (unlikely(!p)) {
4308 print_overflow_msg(__func__, xdr);
4309 return -EIO;
4310 }
4311 res->l_type = be32_to_cpup(p);
4312
4313 /* thi_hintset bitmap */
4314 status = decode_attr_bitmap(xdr, bitmap);
4315 if (status < 0)
4316 goto xdr_error;
4317
4318 /* thi_hintlist length */
4319 status = decode_attr_length(xdr, &attrlen, &savep);
4320 if (status < 0)
4321 goto xdr_error;
4322 /* thi_hintlist */
4323 status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
4324 if (status < 0)
4325 goto xdr_error;
4326 status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
4327 if (status < 0)
4328 goto xdr_error;
4329 status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
4330 THRESHOLD_RD_IO);
4331 if (status < 0)
4332 goto xdr_error;
4333 status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
4334 THRESHOLD_WR_IO);
4335 if (status < 0)
4336 goto xdr_error;
4337
4338 status = verify_attr_len(xdr, savep, attrlen);
4339 res->bm = bitmap[0];
4340
4341 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
4342 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
4343 res->wr_io_sz);
4344xdr_error:
4345 dprintk("%s ret=%d!\n", __func__, status);
4346 return status;
4347}
4348
4349/*
4350 * Thresholds on pNFS direct I/O vrs MDS I/O
4351 */
4352static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
4353 uint32_t *bitmap,
4354 struct nfs4_threshold *res)
4355{
4356 __be32 *p;
4357 int status = 0;
4358 uint32_t num;
4359
4360 if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
4361 return -EIO;
4362 if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
4363 p = xdr_inline_decode(xdr, 4);
4364 if (unlikely(!p))
4365 goto out_overflow;
4366 num = be32_to_cpup(p);
4367 if (num == 0)
4368 return 0;
4369 if (num > 1)
4370 printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
4371 "drivers per filesystem not supported\n",
4372 __func__);
4373
4374 status = decode_first_threshold_item4(xdr, res);
4375 }
4376 return status;
4377out_overflow:
4378 print_overflow_msg(__func__, xdr);
4379 return -EIO;
4380}
4381
4223static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4382static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4224 struct nfs_fattr *fattr, struct nfs_fh *fh, 4383 struct nfs_fattr *fattr, struct nfs_fh *fh,
4225 struct nfs4_fs_locations *fs_loc, 4384 struct nfs4_fs_locations *fs_loc,
@@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4326 goto xdr_error; 4485 goto xdr_error;
4327 fattr->valid |= status; 4486 fattr->valid |= status;
4328 4487
4488 status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
4489 if (status < 0)
4490 goto xdr_error;
4491
4329xdr_error: 4492xdr_error:
4330 dprintk("%s: xdr returned %d\n", __func__, -status); 4493 dprintk("%s: xdr returned %d\n", __func__, -status);
4331 return status; 4494 return status;
@@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5156 uint32_t dummy; 5319 uint32_t dummy;
5157 char *dummy_str; 5320 char *dummy_str;
5158 int status; 5321 int status;
5159 struct nfs_client *clp = res->client;
5160 uint32_t impl_id_count; 5322 uint32_t impl_id_count;
5161 5323
5162 status = decode_op_hdr(xdr, OP_EXCHANGE_ID); 5324 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5166 p = xdr_inline_decode(xdr, 8); 5328 p = xdr_inline_decode(xdr, 8);
5167 if (unlikely(!p)) 5329 if (unlikely(!p))
5168 goto out_overflow; 5330 goto out_overflow;
5169 xdr_decode_hyper(p, &clp->cl_clientid); 5331 xdr_decode_hyper(p, &res->clientid);
5170 p = xdr_inline_decode(xdr, 12); 5332 p = xdr_inline_decode(xdr, 12);
5171 if (unlikely(!p)) 5333 if (unlikely(!p))
5172 goto out_overflow; 5334 goto out_overflow;
5173 clp->cl_seqid = be32_to_cpup(p++); 5335 res->seqid = be32_to_cpup(p++);
5174 clp->cl_exchange_flags = be32_to_cpup(p++); 5336 res->flags = be32_to_cpup(p++);
5175 5337
5176 /* We ask for SP4_NONE */ 5338 /* We ask for SP4_NONE */
5177 dummy = be32_to_cpup(p); 5339 dummy = be32_to_cpup(p);
5178 if (dummy != SP4_NONE) 5340 if (dummy != SP4_NONE)
5179 return -EIO; 5341 return -EIO;
5180 5342
5181 /* Throw away minor_id */ 5343 /* server_owner4.so_minor_id */
5182 p = xdr_inline_decode(xdr, 8); 5344 p = xdr_inline_decode(xdr, 8);
5183 if (unlikely(!p)) 5345 if (unlikely(!p))
5184 goto out_overflow; 5346 goto out_overflow;
5347 p = xdr_decode_hyper(p, &res->server_owner->minor_id);
5185 5348
5186 /* Throw away Major id */ 5349 /* server_owner4.so_major_id */
5187 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5350 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5188 if (unlikely(status)) 5351 if (unlikely(status))
5189 return status; 5352 return status;
5353 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5354 return -EIO;
5355 memcpy(res->server_owner->major_id, dummy_str, dummy);
5356 res->server_owner->major_id_sz = dummy;
5190 5357
5191 /* Save server_scope */ 5358 /* server_scope4 */
5192 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5359 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5193 if (unlikely(status)) 5360 if (unlikely(status))
5194 return status; 5361 return status;
5195
5196 if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) 5362 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5197 return -EIO; 5363 return -EIO;
5198
5199 memcpy(res->server_scope->server_scope, dummy_str, dummy); 5364 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5200 res->server_scope->server_scope_sz = dummy; 5365 res->server_scope->server_scope_sz = dummy;
5201 5366
@@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
5276 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); 5441 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
5277} 5442}
5278 5443
5444static int decode_bind_conn_to_session(struct xdr_stream *xdr,
5445 struct nfs41_bind_conn_to_session_res *res)
5446{
5447 __be32 *p;
5448 int status;
5449
5450 status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
5451 if (!status)
5452 status = decode_sessionid(xdr, &res->session->sess_id);
5453 if (unlikely(status))
5454 return status;
5455
5456 /* dir flags, rdma mode bool */
5457 p = xdr_inline_decode(xdr, 8);
5458 if (unlikely(!p))
5459 goto out_overflow;
5460
5461 res->dir = be32_to_cpup(p++);
5462 if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
5463 return -EIO;
5464 if (be32_to_cpup(p) == 0)
5465 res->use_conn_in_rdma_mode = false;
5466 else
5467 res->use_conn_in_rdma_mode = true;
5468
5469 return 0;
5470out_overflow:
5471 print_overflow_msg(__func__, xdr);
5472 return -EIO;
5473}
5474
5279static int decode_create_session(struct xdr_stream *xdr, 5475static int decode_create_session(struct xdr_stream *xdr,
5280 struct nfs41_create_session_res *res) 5476 struct nfs41_create_session_res *res)
5281{ 5477{
@@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
5312 return decode_op_hdr(xdr, OP_DESTROY_SESSION); 5508 return decode_op_hdr(xdr, OP_DESTROY_SESSION);
5313} 5509}
5314 5510
5511static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
5512{
5513 return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
5514}
5515
5315static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) 5516static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
5316{ 5517{
5317 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); 5518 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5800 if (status) 6001 if (status)
5801 goto out; 6002 goto out;
5802 status = decode_remove(xdr, &res->cinfo); 6003 status = decode_remove(xdr, &res->cinfo);
5803 if (status)
5804 goto out;
5805 decode_getfattr(xdr, res->dir_attr, res->server);
5806out: 6004out:
5807 return status; 6005 return status;
5808} 6006}
@@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5832 if (status) 6030 if (status)
5833 goto out; 6031 goto out;
5834 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); 6032 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5835 if (status)
5836 goto out;
5837 /* Current FH is target directory */
5838 if (decode_getfattr(xdr, res->new_fattr, res->server))
5839 goto out;
5840 status = decode_restorefh(xdr);
5841 if (status)
5842 goto out;
5843 decode_getfattr(xdr, res->old_fattr, res->server);
5844out: 6033out:
5845 return status; 6034 return status;
5846} 6035}
@@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5876 * Note order: OP_LINK leaves the directory as the current 6065 * Note order: OP_LINK leaves the directory as the current
5877 * filehandle. 6066 * filehandle.
5878 */ 6067 */
5879 if (decode_getfattr(xdr, res->dir_attr, res->server))
5880 goto out;
5881 status = decode_restorefh(xdr); 6068 status = decode_restorefh(xdr);
5882 if (status) 6069 if (status)
5883 goto out; 6070 goto out;
@@ -5904,21 +6091,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5904 status = decode_putfh(xdr); 6091 status = decode_putfh(xdr);
5905 if (status) 6092 if (status)
5906 goto out; 6093 goto out;
5907 status = decode_savefh(xdr);
5908 if (status)
5909 goto out;
5910 status = decode_create(xdr, &res->dir_cinfo); 6094 status = decode_create(xdr, &res->dir_cinfo);
5911 if (status) 6095 if (status)
5912 goto out; 6096 goto out;
5913 status = decode_getfh(xdr, res->fh); 6097 status = decode_getfh(xdr, res->fh);
5914 if (status) 6098 if (status)
5915 goto out; 6099 goto out;
5916 if (decode_getfattr(xdr, res->fattr, res->server)) 6100 decode_getfattr(xdr, res->fattr, res->server);
5917 goto out;
5918 status = decode_restorefh(xdr);
5919 if (status)
5920 goto out;
5921 decode_getfattr(xdr, res->dir_fattr, res->server);
5922out: 6101out:
5923 return status; 6102 return status;
5924} 6103}
@@ -6075,19 +6254,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6075 status = decode_putfh(xdr); 6254 status = decode_putfh(xdr);
6076 if (status) 6255 if (status)
6077 goto out; 6256 goto out;
6078 status = decode_savefh(xdr);
6079 if (status)
6080 goto out;
6081 status = decode_open(xdr, res); 6257 status = decode_open(xdr, res);
6082 if (status) 6258 if (status)
6083 goto out; 6259 goto out;
6084 if (decode_getfh(xdr, &res->fh) != 0) 6260 if (decode_getfh(xdr, &res->fh) != 0)
6085 goto out; 6261 goto out;
6086 if (decode_getfattr(xdr, res->f_attr, res->server) != 0) 6262 decode_getfattr(xdr, res->f_attr, res->server);
6087 goto out;
6088 if (decode_restorefh(xdr) != 0)
6089 goto out;
6090 decode_getfattr(xdr, res->dir_attr, res->server);
6091out: 6263out:
6092 return status; 6264 return status;
6093} 6265}
@@ -6353,7 +6525,7 @@ out:
6353 * Decode COMMIT response 6525 * Decode COMMIT response
6354 */ 6526 */
6355static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6527static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6356 struct nfs_writeres *res) 6528 struct nfs_commitres *res)
6357{ 6529{
6358 struct compound_hdr hdr; 6530 struct compound_hdr hdr;
6359 int status; 6531 int status;
@@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6368 if (status) 6540 if (status)
6369 goto out; 6541 goto out;
6370 status = decode_commit(xdr, res); 6542 status = decode_commit(xdr, res);
6371 if (status)
6372 goto out;
6373 if (res->fattr)
6374 decode_getfattr(xdr, res->fattr, res->server);
6375out: 6543out:
6376 return status; 6544 return status;
6377} 6545}
@@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
6527 status = decode_putfh(xdr); 6695 status = decode_putfh(xdr);
6528 if (status != 0) 6696 if (status != 0)
6529 goto out; 6697 goto out;
6530 status = decode_delegreturn(xdr); 6698 status = decode_getfattr(xdr, res->fattr, res->server);
6531 if (status != 0) 6699 if (status != 0)
6532 goto out; 6700 goto out;
6533 decode_getfattr(xdr, res->fattr, res->server); 6701 status = decode_delegreturn(xdr);
6534out: 6702out:
6535 return status; 6703 return status;
6536} 6704}
@@ -6591,6 +6759,22 @@ out:
6591 6759
6592#if defined(CONFIG_NFS_V4_1) 6760#if defined(CONFIG_NFS_V4_1)
6593/* 6761/*
6762 * Decode BIND_CONN_TO_SESSION response
6763 */
6764static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
6765 struct xdr_stream *xdr,
6766 void *res)
6767{
6768 struct compound_hdr hdr;
6769 int status;
6770
6771 status = decode_compound_hdr(xdr, &hdr);
6772 if (!status)
6773 status = decode_bind_conn_to_session(xdr, res);
6774 return status;
6775}
6776
6777/*
6594 * Decode EXCHANGE_ID response 6778 * Decode EXCHANGE_ID response
6595 */ 6779 */
6596static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, 6780static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
@@ -6639,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
6639} 6823}
6640 6824
6641/* 6825/*
6826 * Decode DESTROY_CLIENTID response
6827 */
6828static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
6829 struct xdr_stream *xdr,
6830 void *res)
6831{
6832 struct compound_hdr hdr;
6833 int status;
6834
6835 status = decode_compound_hdr(xdr, &hdr);
6836 if (!status)
6837 status = decode_destroy_clientid(xdr, res);
6838 return status;
6839}
6840
6841/*
6642 * Decode SEQUENCE response 6842 * Decode SEQUENCE response
6643 */ 6843 */
6644static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, 6844static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
@@ -7085,6 +7285,9 @@ struct rpc_procinfo nfs4_procedures[] = {
7085 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), 7285 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
7086 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), 7286 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
7087 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), 7287 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
7288 PROC(BIND_CONN_TO_SESSION,
7289 enc_bind_conn_to_session, dec_bind_conn_to_session),
7290 PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
7088#endif /* CONFIG_NFS_V4_1 */ 7291#endif /* CONFIG_NFS_V4_1 */
7089}; 7292};
7090 7293
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..b47277baebab 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); 211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
212} 212}
213 213
214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
215 struct objio_segment **pseg) 215 struct objio_segment **pseg)
216{ 216{
217/* This is the in memory structure of the objio_segment 217/* This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
440 440
441int objio_read_pagelist(struct nfs_read_data *rdata) 441int objio_read_pagelist(struct nfs_read_data *rdata)
442{ 442{
443 struct nfs_pgio_header *hdr = rdata->header;
443 struct objio_state *objios; 444 struct objio_state *objios;
444 int ret; 445 int ret;
445 446
446 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
447 rdata->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, rdata->args.pages, rdata->args.pgbase,
448 rdata->args.offset, rdata->args.count, rdata, 449 rdata->args.offset, rdata->args.count, rdata,
449 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
450 if (unlikely(ret)) 451 if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
483{ 484{
484 struct objio_state *objios = priv; 485 struct objio_state *objios = priv;
485 struct nfs_write_data *wdata = objios->oir.rpcdata; 486 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping;
486 pgoff_t index = offset / PAGE_SIZE; 488 pgoff_t index = offset / PAGE_SIZE;
487 struct page *page = find_get_page(wdata->inode->i_mapping, index); 489 struct page *page = find_get_page(mapping, index);
488 490
489 if (!page) { 491 if (!page) {
490 page = find_or_create_page(wdata->inode->i_mapping, 492 page = find_or_create_page(mapping, index, GFP_NOFS);
491 index, GFP_NOFS);
492 if (unlikely(!page)) { 493 if (unlikely(!page)) {
493 dprintk("%s: grab_cache_page Failed index=0x%lx\n", 494 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
494 __func__, index); 495 __func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
518 519
519int objio_write_pagelist(struct nfs_write_data *wdata, int how) 520int objio_write_pagelist(struct nfs_write_data *wdata, int how)
520{ 521{
522 struct nfs_pgio_header *hdr = wdata->header;
521 struct objio_state *objios; 523 struct objio_state *objios;
522 int ret; 524 int ret;
523 525
524 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, 526 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
525 wdata->lseg, wdata->args.pages, wdata->args.pgbase, 527 hdr->lseg, wdata->args.pages, wdata->args.pgbase,
526 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 528 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
527 &objios); 529 &objios);
528 if (unlikely(ret)) 530 if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc21a19..874613545301 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
258 if (status >= 0) 258 if (status >= 0)
259 rdata->res.count = status; 259 rdata->res.count = status;
260 else 260 else
261 rdata->pnfs_error = status; 261 rdata->header->pnfs_error = status;
262 objlayout_iodone(oir); 262 objlayout_iodone(oir);
263 /* must not use oir after this point */ 263 /* must not use oir after this point */
264 264
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
279enum pnfs_try_status 279enum pnfs_try_status
280objlayout_read_pagelist(struct nfs_read_data *rdata) 280objlayout_read_pagelist(struct nfs_read_data *rdata)
281{ 281{
282 struct nfs_pgio_header *hdr = rdata->header;
283 struct inode *inode = hdr->inode;
282 loff_t offset = rdata->args.offset; 284 loff_t offset = rdata->args.offset;
283 size_t count = rdata->args.count; 285 size_t count = rdata->args.count;
284 int err; 286 int err;
285 loff_t eof; 287 loff_t eof;
286 288
287 eof = i_size_read(rdata->inode); 289 eof = i_size_read(inode);
288 if (unlikely(offset + count > eof)) { 290 if (unlikely(offset + count > eof)) {
289 if (offset >= eof) { 291 if (offset >= eof) {
290 err = 0; 292 err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
297 } 299 }
298 300
299 rdata->res.eof = (offset + count) >= eof; 301 rdata->res.eof = (offset + count) >= eof;
300 _fix_verify_io_params(rdata->lseg, &rdata->args.pages, 302 _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
301 &rdata->args.pgbase, 303 &rdata->args.pgbase,
302 rdata->args.offset, rdata->args.count); 304 rdata->args.offset, rdata->args.count);
303 305
304 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 306 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
305 __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); 307 __func__, inode->i_ino, offset, count, rdata->res.eof);
306 308
307 err = objio_read_pagelist(rdata); 309 err = objio_read_pagelist(rdata);
308 out: 310 out:
309 if (unlikely(err)) { 311 if (unlikely(err)) {
310 rdata->pnfs_error = err; 312 hdr->pnfs_error = err;
311 dprintk("%s: Returned Error %d\n", __func__, err); 313 dprintk("%s: Returned Error %d\n", __func__, err);
312 return PNFS_NOT_ATTEMPTED; 314 return PNFS_NOT_ATTEMPTED;
313 } 315 }
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
340 wdata->res.count = status; 342 wdata->res.count = status;
341 wdata->verf.committed = oir->committed; 343 wdata->verf.committed = oir->committed;
342 } else { 344 } else {
343 wdata->pnfs_error = status; 345 wdata->header->pnfs_error = status;
344 } 346 }
345 objlayout_iodone(oir); 347 objlayout_iodone(oir);
346 /* must not use oir after this point */ 348 /* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
363objlayout_write_pagelist(struct nfs_write_data *wdata, 365objlayout_write_pagelist(struct nfs_write_data *wdata,
364 int how) 366 int how)
365{ 367{
368 struct nfs_pgio_header *hdr = wdata->header;
366 int err; 369 int err;
367 370
368 _fix_verify_io_params(wdata->lseg, &wdata->args.pages, 371 _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
369 &wdata->args.pgbase, 372 &wdata->args.pgbase,
370 wdata->args.offset, wdata->args.count); 373 wdata->args.offset, wdata->args.count);
371 374
372 err = objio_write_pagelist(wdata, how); 375 err = objio_write_pagelist(wdata, how);
373 if (unlikely(err)) { 376 if (unlikely(err)) {
374 wdata->pnfs_error = err; 377 hdr->pnfs_error = err;
375 dprintk("%s: Returned Error %d\n", __func__, err); 378 dprintk("%s: Returned Error %d\n", __func__, err);
376 return PNFS_NOT_ATTEMPTED; 379 return PNFS_NOT_ATTEMPTED;
377 } 380 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fceaa9f62..aed913c833f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,47 @@
26 26
27static struct kmem_cache *nfs_page_cachep; 27static struct kmem_cache *nfs_page_cachep;
28 28
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{
31 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array))
33 p->pagevec = p->page_array;
34 else {
35 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
36 if (!p->pagevec)
37 p->npages = 0;
38 }
39 return p->pagevec != NULL;
40}
41
42void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
43 struct nfs_pgio_header *hdr,
44 void (*release)(struct nfs_pgio_header *hdr))
45{
46 hdr->req = nfs_list_entry(desc->pg_list.next);
47 hdr->inode = desc->pg_inode;
48 hdr->cred = hdr->req->wb_context->cred;
49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq;
52 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr)
55 hdr->completion_ops->init_hdr(hdr);
56}
57
58void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
59{
60 spin_lock(&hdr->lock);
61 if (pos < hdr->io_start + hdr->good_bytes) {
62 set_bit(NFS_IOHDR_ERROR, &hdr->flags);
63 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
64 hdr->good_bytes = pos - hdr->io_start;
65 hdr->error = error;
66 }
67 spin_unlock(&hdr->lock);
68}
69
29static inline struct nfs_page * 70static inline struct nfs_page *
30nfs_page_alloc(void) 71nfs_page_alloc(void)
31{ 72{
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
76 * long write-back delay. This will be adjusted in 117 * long write-back delay. This will be adjusted in
77 * update_nfs_request below if the region is not locked. */ 118 * update_nfs_request below if the region is not locked. */
78 req->wb_page = page; 119 req->wb_page = page;
79 atomic_set(&req->wb_complete, 0);
80 req->wb_index = page->index; 120 req->wb_index = page->index;
81 page_cache_get(page); 121 page_cache_get(page);
82 BUG_ON(PagePrivate(page));
83 BUG_ON(!PageLocked(page));
84 BUG_ON(page->mapping->host != inode);
85 req->wb_offset = offset; 122 req->wb_offset = offset;
86 req->wb_pgbase = offset; 123 req->wb_pgbase = offset;
87 req->wb_bytes = count; 124 req->wb_bytes = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
104 clear_bit(PG_BUSY, &req->wb_flags); 141 clear_bit(PG_BUSY, &req->wb_flags);
105 smp_mb__after_clear_bit(); 142 smp_mb__after_clear_bit();
106 wake_up_bit(&req->wb_flags, PG_BUSY); 143 wake_up_bit(&req->wb_flags, PG_BUSY);
144}
145
146/**
147 * nfs_unlock_and_release_request - Unlock request and release the nfs_page
148 * @req:
149 */
150void nfs_unlock_and_release_request(struct nfs_page *req)
151{
152 nfs_unlock_request(req);
107 nfs_release_request(req); 153 nfs_release_request(req);
108} 154}
109 155
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
203void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 249void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
204 struct inode *inode, 250 struct inode *inode,
205 const struct nfs_pageio_ops *pg_ops, 251 const struct nfs_pageio_ops *pg_ops,
252 const struct nfs_pgio_completion_ops *compl_ops,
206 size_t bsize, 253 size_t bsize,
207 int io_flags) 254 int io_flags)
208{ 255{
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 desc->pg_recoalesce = 0; 262 desc->pg_recoalesce = 0;
216 desc->pg_inode = inode; 263 desc->pg_inode = inode;
217 desc->pg_ops = pg_ops; 264 desc->pg_ops = pg_ops;
265 desc->pg_completion_ops = compl_ops;
218 desc->pg_ioflags = io_flags; 266 desc->pg_ioflags = io_flags;
219 desc->pg_error = 0; 267 desc->pg_error = 0;
220 desc->pg_lseg = NULL; 268 desc->pg_lseg = NULL;
269 desc->pg_dreq = NULL;
221} 270}
222 271
223/** 272/**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
241 return false; 290 return false;
242 if (req->wb_context->state != prev->wb_context->state) 291 if (req->wb_context->state != prev->wb_context->state)
243 return false; 292 return false;
244 if (req->wb_index != (prev->wb_index + 1))
245 return false;
246 if (req->wb_pgbase != 0) 293 if (req->wb_pgbase != 0)
247 return false; 294 return false;
248 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 295 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
249 return false; 296 return false;
297 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
298 return false;
250 return pgio->pg_ops->pg_test(pgio, prev, req); 299 return pgio->pg_ops->pg_test(pgio, prev, req);
251} 300}
252 301
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..b8323aa7b543 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
395 dprintk("%s:Begin lo %p\n", __func__, lo); 395 dprintk("%s:Begin lo %p\n", __func__, lo);
396 396
397 if (list_empty(&lo->plh_segs)) { 397 if (list_empty(&lo->plh_segs)) {
398 /* Reset MDS Threshold I/O counters */
399 NFS_I(lo->plh_inode)->write_io = 0;
400 NFS_I(lo->plh_inode)->read_io = 0;
398 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) 401 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
399 put_layout_hdr_locked(lo); 402 put_layout_hdr_locked(lo);
400 return 0; 403 return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
455 spin_unlock(&nfsi->vfs_inode.i_lock); 458 spin_unlock(&nfsi->vfs_inode.i_lock);
456 pnfs_free_lseg_list(&tmp_list); 459 pnfs_free_lseg_list(&tmp_list);
457} 460}
461EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
458 462
459/* 463/*
460 * Called by the state manger to remove all layouts established under an 464 * Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
692 dprintk("<-- %s status: %d\n", __func__, status); 696 dprintk("<-- %s status: %d\n", __func__, status);
693 return status; 697 return status;
694} 698}
699EXPORT_SYMBOL_GPL(_pnfs_return_layout);
695 700
696bool pnfs_roc(struct inode *ino) 701bool pnfs_roc(struct inode *ino)
697{ 702{
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
931} 936}
932 937
933/* 938/*
939 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
940 * to the MDS or over pNFS
941 *
942 * The nfs_inode read_io and write_io fields are cumulative counters reset
943 * when there are no layout segments. Note that in pnfs_update_layout iomode
944 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
945 * WRITE request.
946 *
947 * A return of true means use MDS I/O.
948 *
949 * From rfc 5661:
950 * If a file's size is smaller than the file size threshold, data accesses
951 * SHOULD be sent to the metadata server. If an I/O request has a length that
952 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
953 * server. If both file size and I/O size are provided, the client SHOULD
954 * reach or exceed both thresholds before sending its read or write
955 * requests to the data server.
956 */
957static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
958 struct inode *ino, int iomode)
959{
960 struct nfs4_threshold *t = ctx->mdsthreshold;
961 struct nfs_inode *nfsi = NFS_I(ino);
962 loff_t fsize = i_size_read(ino);
963 bool size = false, size_set = false, io = false, io_set = false, ret = false;
964
965 if (t == NULL)
966 return ret;
967
968 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
969 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
970
971 switch (iomode) {
972 case IOMODE_READ:
973 if (t->bm & THRESHOLD_RD) {
974 dprintk("%s fsize %llu\n", __func__, fsize);
975 size_set = true;
976 if (fsize < t->rd_sz)
977 size = true;
978 }
979 if (t->bm & THRESHOLD_RD_IO) {
980 dprintk("%s nfsi->read_io %llu\n", __func__,
981 nfsi->read_io);
982 io_set = true;
983 if (nfsi->read_io < t->rd_io_sz)
984 io = true;
985 }
986 break;
987 case IOMODE_RW:
988 if (t->bm & THRESHOLD_WR) {
989 dprintk("%s fsize %llu\n", __func__, fsize);
990 size_set = true;
991 if (fsize < t->wr_sz)
992 size = true;
993 }
994 if (t->bm & THRESHOLD_WR_IO) {
995 dprintk("%s nfsi->write_io %llu\n", __func__,
996 nfsi->write_io);
997 io_set = true;
998 if (nfsi->write_io < t->wr_io_sz)
999 io = true;
1000 }
1001 break;
1002 }
1003 if (size_set && io_set) {
1004 if (size && io)
1005 ret = true;
1006 } else if (size || io)
1007 ret = true;
1008
1009 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1010 return ret;
1011}
1012
1013/*
934 * Layout segment is retreived from the server if not cached. 1014 * Layout segment is retreived from the server if not cached.
935 * The appropriate layout segment is referenced and returned to the caller. 1015 * The appropriate layout segment is referenced and returned to the caller.
936 */ 1016 */
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
957 1037
958 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1038 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
959 return NULL; 1039 return NULL;
1040
1041 if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1042 return NULL;
1043
960 spin_lock(&ino->i_lock); 1044 spin_lock(&ino->i_lock);
961 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1045 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
962 if (lo == NULL) { 1046 if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1082{ 1166{
1083 BUG_ON(pgio->pg_lseg != NULL); 1167 BUG_ON(pgio->pg_lseg != NULL);
1084 1168
1169 if (req->wb_offset != req->wb_pgbase) {
1170 nfs_pageio_reset_read_mds(pgio);
1171 return;
1172 }
1085 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1173 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1086 req->wb_context, 1174 req->wb_context,
1087 req_offset(req), 1175 req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1100{ 1188{
1101 BUG_ON(pgio->pg_lseg != NULL); 1189 BUG_ON(pgio->pg_lseg != NULL);
1102 1190
1191 if (req->wb_offset != req->wb_pgbase) {
1192 nfs_pageio_reset_write_mds(pgio);
1193 return;
1194 }
1103 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1195 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1104 req->wb_context, 1196 req->wb_context,
1105 req_offset(req), 1197 req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1113EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1205EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1114 1206
1115bool 1207bool
1116pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1208pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1209 const struct nfs_pgio_completion_ops *compl_ops)
1117{ 1210{
1118 struct nfs_server *server = NFS_SERVER(inode); 1211 struct nfs_server *server = NFS_SERVER(inode);
1119 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1212 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1120 1213
1121 if (ld == NULL) 1214 if (ld == NULL)
1122 return false; 1215 return false;
1123 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1216 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1217 server->rsize, 0);
1124 return true; 1218 return true;
1125} 1219}
1126 1220
1127bool 1221bool
1128pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1222pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1223 int ioflags,
1224 const struct nfs_pgio_completion_ops *compl_ops)
1129{ 1225{
1130 struct nfs_server *server = NFS_SERVER(inode); 1226 struct nfs_server *server = NFS_SERVER(inode);
1131 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1227 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1132 1228
1133 if (ld == NULL) 1229 if (ld == NULL)
1134 return false; 1230 return false;
1135 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1231 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1232 server->wsize, ioflags);
1136 return true; 1233 return true;
1137} 1234}
1138 1235
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1162} 1259}
1163EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1260EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1164 1261
1165static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) 1262int pnfs_write_done_resend_to_mds(struct inode *inode,
1263 struct list_head *head,
1264 const struct nfs_pgio_completion_ops *compl_ops)
1166{ 1265{
1167 struct nfs_pageio_descriptor pgio; 1266 struct nfs_pageio_descriptor pgio;
1168 LIST_HEAD(failed); 1267 LIST_HEAD(failed);
1169 1268
1170 /* Resend all requests through the MDS */ 1269 /* Resend all requests through the MDS */
1171 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); 1270 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
1172 while (!list_empty(head)) { 1271 while (!list_empty(head)) {
1173 struct nfs_page *req = nfs_list_entry(head->next); 1272 struct nfs_page *req = nfs_list_entry(head->next);
1174 1273
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
1188 } 1287 }
1189 return 0; 1288 return 0;
1190} 1289}
1290EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1291
1292static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1293{
1294 struct nfs_pgio_header *hdr = data->header;
1295
1296 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1297 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1298 PNFS_LAYOUTRET_ON_ERROR) {
1299 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1300 pnfs_return_layout(hdr->inode);
1301 }
1302 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1303 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1304 &hdr->pages,
1305 hdr->completion_ops);
1306}
1191 1307
1192/* 1308/*
1193 * Called by non rpc-based layout drivers 1309 * Called by non rpc-based layout drivers
1194 */ 1310 */
1195void pnfs_ld_write_done(struct nfs_write_data *data) 1311void pnfs_ld_write_done(struct nfs_write_data *data)
1196{ 1312{
1197 if (likely(!data->pnfs_error)) { 1313 struct nfs_pgio_header *hdr = data->header;
1314
1315 if (!hdr->pnfs_error) {
1198 pnfs_set_layoutcommit(data); 1316 pnfs_set_layoutcommit(data);
1199 data->mds_ops->rpc_call_done(&data->task, data); 1317 hdr->mds_ops->rpc_call_done(&data->task, data);
1200 } else { 1318 } else
1201 dprintk("pnfs write error = %d\n", data->pnfs_error); 1319 pnfs_ld_handle_write_error(data);
1202 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1320 hdr->mds_ops->rpc_release(data);
1203 PNFS_LAYOUTRET_ON_ERROR) {
1204 /* Don't lo_commit on error, Server will needs to
1205 * preform a file recovery.
1206 */
1207 clear_bit(NFS_INO_LAYOUTCOMMIT,
1208 &NFS_I(data->inode)->flags);
1209 pnfs_return_layout(data->inode);
1210 }
1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1212 }
1213 put_lseg(data->lseg);
1214 data->mds_ops->rpc_release(data);
1215} 1321}
1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1322EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1217 1323
@@ -1219,12 +1325,13 @@ static void
1219pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1325pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1220 struct nfs_write_data *data) 1326 struct nfs_write_data *data)
1221{ 1327{
1222 list_splice_tail_init(&data->pages, &desc->pg_list); 1328 struct nfs_pgio_header *hdr = data->header;
1223 if (data->req && list_empty(&data->req->wb_list)) 1329
1224 nfs_list_add_request(data->req, &desc->pg_list); 1330 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1225 nfs_pageio_reset_write_mds(desc); 1331 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1226 desc->pg_recoalesce = 1; 1332 nfs_pageio_reset_write_mds(desc);
1227 put_lseg(data->lseg); 1333 desc->pg_recoalesce = 1;
1334 }
1228 nfs_writedata_release(data); 1335 nfs_writedata_release(data);
1229} 1336}
1230 1337
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1234 struct pnfs_layout_segment *lseg, 1341 struct pnfs_layout_segment *lseg,
1235 int how) 1342 int how)
1236{ 1343{
1237 struct inode *inode = wdata->inode; 1344 struct nfs_pgio_header *hdr = wdata->header;
1345 struct inode *inode = hdr->inode;
1238 enum pnfs_try_status trypnfs; 1346 enum pnfs_try_status trypnfs;
1239 struct nfs_server *nfss = NFS_SERVER(inode); 1347 struct nfs_server *nfss = NFS_SERVER(inode);
1240 1348
1241 wdata->mds_ops = call_ops; 1349 hdr->mds_ops = call_ops;
1242 wdata->lseg = get_lseg(lseg);
1243 1350
1244 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1351 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1245 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1352 inode->i_ino, wdata->args.count, wdata->args.offset, how);
1246
1247 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1353 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1248 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1354 if (trypnfs != PNFS_NOT_ATTEMPTED)
1249 put_lseg(wdata->lseg);
1250 wdata->lseg = NULL;
1251 } else
1252 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1355 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1253
1254 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1356 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1255 return trypnfs; 1357 return trypnfs;
1256} 1358}
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1266 while (!list_empty(head)) { 1368 while (!list_empty(head)) {
1267 enum pnfs_try_status trypnfs; 1369 enum pnfs_try_status trypnfs;
1268 1370
1269 data = list_entry(head->next, struct nfs_write_data, list); 1371 data = list_first_entry(head, struct nfs_write_data, list);
1270 list_del_init(&data->list); 1372 list_del_init(&data->list);
1271 1373
1272 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1374 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1276 put_lseg(lseg); 1378 put_lseg(lseg);
1277} 1379}
1278 1380
1381static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1382{
1383 put_lseg(hdr->lseg);
1384 nfs_writehdr_free(hdr);
1385}
1386
1279int 1387int
1280pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1388pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1281{ 1389{
1282 LIST_HEAD(head); 1390 struct nfs_write_header *whdr;
1391 struct nfs_pgio_header *hdr;
1283 int ret; 1392 int ret;
1284 1393
1285 ret = nfs_generic_flush(desc, &head); 1394 whdr = nfs_writehdr_alloc();
1286 if (ret != 0) { 1395 if (!whdr) {
1396 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1287 put_lseg(desc->pg_lseg); 1397 put_lseg(desc->pg_lseg);
1288 desc->pg_lseg = NULL; 1398 desc->pg_lseg = NULL;
1289 return ret; 1399 return -ENOMEM;
1290 } 1400 }
1291 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1401 hdr = &whdr->header;
1292 return 0; 1402 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1403 hdr->lseg = get_lseg(desc->pg_lseg);
1404 atomic_inc(&hdr->refcnt);
1405 ret = nfs_generic_flush(desc, hdr);
1406 if (ret != 0) {
1407 put_lseg(desc->pg_lseg);
1408 desc->pg_lseg = NULL;
1409 } else
1410 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1411 if (atomic_dec_and_test(&hdr->refcnt))
1412 hdr->completion_ops->completion(hdr);
1413 return ret;
1293} 1414}
1294EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1415EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1295 1416
1296static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1417int pnfs_read_done_resend_to_mds(struct inode *inode,
1418 struct list_head *head,
1419 const struct nfs_pgio_completion_ops *compl_ops)
1297{ 1420{
1298 struct nfs_pageio_descriptor pgio; 1421 struct nfs_pageio_descriptor pgio;
1422 LIST_HEAD(failed);
1299 1423
1300 put_lseg(data->lseg); 1424 /* Resend all requests through the MDS */
1301 data->lseg = NULL; 1425 nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
1302 dprintk("pnfs write error = %d\n", data->pnfs_error); 1426 while (!list_empty(head)) {
1303 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1427 struct nfs_page *req = nfs_list_entry(head->next);
1304 PNFS_LAYOUTRET_ON_ERROR)
1305 pnfs_return_layout(data->inode);
1306
1307 nfs_pageio_init_read_mds(&pgio, data->inode);
1308
1309 while (!list_empty(&data->pages)) {
1310 struct nfs_page *req = nfs_list_entry(data->pages.next);
1311 1428
1312 nfs_list_remove_request(req); 1429 nfs_list_remove_request(req);
1313 nfs_pageio_add_request(&pgio, req); 1430 if (!nfs_pageio_add_request(&pgio, req))
1431 nfs_list_add_request(req, &failed);
1314 } 1432 }
1315 nfs_pageio_complete(&pgio); 1433 nfs_pageio_complete(&pgio);
1434
1435 if (!list_empty(&failed)) {
1436 list_move(&failed, head);
1437 return -EIO;
1438 }
1439 return 0;
1440}
1441EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1442
1443static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1444{
1445 struct nfs_pgio_header *hdr = data->header;
1446
1447 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1448 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1449 PNFS_LAYOUTRET_ON_ERROR) {
1450 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1451 pnfs_return_layout(hdr->inode);
1452 }
1453 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1454 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1455 &hdr->pages,
1456 hdr->completion_ops);
1316} 1457}
1317 1458
1318/* 1459/*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1320 */ 1461 */
1321void pnfs_ld_read_done(struct nfs_read_data *data) 1462void pnfs_ld_read_done(struct nfs_read_data *data)
1322{ 1463{
1323 if (likely(!data->pnfs_error)) { 1464 struct nfs_pgio_header *hdr = data->header;
1465
1466 if (likely(!hdr->pnfs_error)) {
1324 __nfs4_read_done_cb(data); 1467 __nfs4_read_done_cb(data);
1325 data->mds_ops->rpc_call_done(&data->task, data); 1468 hdr->mds_ops->rpc_call_done(&data->task, data);
1326 } else 1469 } else
1327 pnfs_ld_handle_read_error(data); 1470 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg); 1471 hdr->mds_ops->rpc_release(data);
1329 data->mds_ops->rpc_release(data);
1330} 1472}
1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1473EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1332 1474
@@ -1334,11 +1476,13 @@ static void
1334pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1476pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1335 struct nfs_read_data *data) 1477 struct nfs_read_data *data)
1336{ 1478{
1337 list_splice_tail_init(&data->pages, &desc->pg_list); 1479 struct nfs_pgio_header *hdr = data->header;
1338 if (data->req && list_empty(&data->req->wb_list)) 1480
1339 nfs_list_add_request(data->req, &desc->pg_list); 1481 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1340 nfs_pageio_reset_read_mds(desc); 1482 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1341 desc->pg_recoalesce = 1; 1483 nfs_pageio_reset_read_mds(desc);
1484 desc->pg_recoalesce = 1;
1485 }
1342 nfs_readdata_release(data); 1486 nfs_readdata_release(data);
1343} 1487}
1344 1488
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1350 const struct rpc_call_ops *call_ops, 1494 const struct rpc_call_ops *call_ops,
1351 struct pnfs_layout_segment *lseg) 1495 struct pnfs_layout_segment *lseg)
1352{ 1496{
1353 struct inode *inode = rdata->inode; 1497 struct nfs_pgio_header *hdr = rdata->header;
1498 struct inode *inode = hdr->inode;
1354 struct nfs_server *nfss = NFS_SERVER(inode); 1499 struct nfs_server *nfss = NFS_SERVER(inode);
1355 enum pnfs_try_status trypnfs; 1500 enum pnfs_try_status trypnfs;
1356 1501
1357 rdata->mds_ops = call_ops; 1502 hdr->mds_ops = call_ops;
1358 rdata->lseg = get_lseg(lseg);
1359 1503
1360 dprintk("%s: Reading ino:%lu %u@%llu\n", 1504 dprintk("%s: Reading ino:%lu %u@%llu\n",
1361 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1505 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1362 1506
1363 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1507 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1364 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1508 if (trypnfs != PNFS_NOT_ATTEMPTED)
1365 put_lseg(rdata->lseg);
1366 rdata->lseg = NULL;
1367 } else {
1368 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1509 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1369 }
1370 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1510 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1371 return trypnfs; 1511 return trypnfs;
1372} 1512}
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1382 while (!list_empty(head)) { 1522 while (!list_empty(head)) {
1383 enum pnfs_try_status trypnfs; 1523 enum pnfs_try_status trypnfs;
1384 1524
1385 data = list_entry(head->next, struct nfs_read_data, list); 1525 data = list_first_entry(head, struct nfs_read_data, list);
1386 list_del_init(&data->list); 1526 list_del_init(&data->list);
1387 1527
1388 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1528 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1392 put_lseg(lseg); 1532 put_lseg(lseg);
1393} 1533}
1394 1534
1535static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1536{
1537 put_lseg(hdr->lseg);
1538 nfs_readhdr_free(hdr);
1539}
1540
1395int 1541int
1396pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1542pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1397{ 1543{
1398 LIST_HEAD(head); 1544 struct nfs_read_header *rhdr;
1545 struct nfs_pgio_header *hdr;
1399 int ret; 1546 int ret;
1400 1547
1401 ret = nfs_generic_pagein(desc, &head); 1548 rhdr = nfs_readhdr_alloc();
1402 if (ret != 0) { 1549 if (!rhdr) {
1550 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1551 ret = -ENOMEM;
1403 put_lseg(desc->pg_lseg); 1552 put_lseg(desc->pg_lseg);
1404 desc->pg_lseg = NULL; 1553 desc->pg_lseg = NULL;
1405 return ret; 1554 return ret;
1406 } 1555 }
1407 pnfs_do_multiple_reads(desc, &head); 1556 hdr = &rhdr->header;
1408 return 0; 1557 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1558 hdr->lseg = get_lseg(desc->pg_lseg);
1559 atomic_inc(&hdr->refcnt);
1560 ret = nfs_generic_pagein(desc, hdr);
1561 if (ret != 0) {
1562 put_lseg(desc->pg_lseg);
1563 desc->pg_lseg = NULL;
1564 } else
1565 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1566 if (atomic_dec_and_test(&hdr->refcnt))
1567 hdr->completion_ops->completion(hdr);
1568 return ret;
1409} 1569}
1410EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1570EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1411 1571
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1438void 1598void
1439pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1599pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1440{ 1600{
1441 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1601 struct nfs_pgio_header *hdr = wdata->header;
1602 struct inode *inode = hdr->inode;
1603 struct nfs_inode *nfsi = NFS_I(inode);
1442 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1604 loff_t end_pos = wdata->mds_offset + wdata->res.count;
1443 bool mark_as_dirty = false; 1605 bool mark_as_dirty = false;
1444 1606
1445 spin_lock(&nfsi->vfs_inode.i_lock); 1607 spin_lock(&inode->i_lock);
1446 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1608 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1447 mark_as_dirty = true; 1609 mark_as_dirty = true;
1448 dprintk("%s: Set layoutcommit for inode %lu ", 1610 dprintk("%s: Set layoutcommit for inode %lu ",
1449 __func__, wdata->inode->i_ino); 1611 __func__, inode->i_ino);
1450 } 1612 }
1451 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { 1613 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1452 /* references matched in nfs4_layoutcommit_release */ 1614 /* references matched in nfs4_layoutcommit_release */
1453 get_lseg(wdata->lseg); 1615 get_lseg(hdr->lseg);
1454 } 1616 }
1455 if (end_pos > nfsi->layout->plh_lwb) 1617 if (end_pos > nfsi->layout->plh_lwb)
1456 nfsi->layout->plh_lwb = end_pos; 1618 nfsi->layout->plh_lwb = end_pos;
1457 spin_unlock(&nfsi->vfs_inode.i_lock); 1619 spin_unlock(&inode->i_lock);
1458 dprintk("%s: lseg %p end_pos %llu\n", 1620 dprintk("%s: lseg %p end_pos %llu\n",
1459 __func__, wdata->lseg, nfsi->layout->plh_lwb); 1621 __func__, hdr->lseg, nfsi->layout->plh_lwb);
1460 1622
1461 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1623 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1462 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1624 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1463 if (mark_as_dirty) 1625 if (mark_as_dirty)
1464 mark_inode_dirty_sync(wdata->inode); 1626 mark_inode_dirty_sync(inode);
1465} 1627}
1466EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1628EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1467 1629
@@ -1550,3 +1712,15 @@ out_free:
1550 kfree(data); 1712 kfree(data);
1551 goto out; 1713 goto out;
1552} 1714}
1715
1716struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1717{
1718 struct nfs4_threshold *thp;
1719
1720 thp = kzalloc(sizeof(*thp), GFP_NOFS);
1721 if (!thp) {
1722 dprintk("%s mdsthreshold allocation failed\n", __func__);
1723 return NULL;
1724 }
1725 return thp;
1726}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..29fd23c0efdc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -63,6 +63,7 @@ enum {
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
66 NFS_LAYOUT_INVALID, /* layout is being destroyed */
66}; 67};
67 68
68enum layoutdriver_policy_flags { 69enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 95 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 96 const struct nfs_pageio_ops *pg_write_ops;
96 97
98 struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
97 void (*mark_request_commit) (struct nfs_page *req, 99 void (*mark_request_commit) (struct nfs_page *req,
98 struct pnfs_layout_segment *lseg); 100 struct pnfs_layout_segment *lseg,
99 void (*clear_request_commit) (struct nfs_page *req); 101 struct nfs_commit_info *cinfo);
100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); 102 void (*clear_request_commit) (struct nfs_page *req,
101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 103 struct nfs_commit_info *cinfo);
104 int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
105 int max);
106 void (*recover_commit_reqs) (struct list_head *list,
107 struct nfs_commit_info *cinfo);
108 int (*commit_pagelist)(struct inode *inode,
109 struct list_head *mds_pages,
110 int how,
111 struct nfs_commit_info *cinfo);
102 112
103 /* 113 /*
104 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 114 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
168void get_layout_hdr(struct pnfs_layout_hdr *lo); 178void get_layout_hdr(struct pnfs_layout_hdr *lo);
169void put_lseg(struct pnfs_layout_segment *lseg); 179void put_lseg(struct pnfs_layout_segment *lseg);
170 180
171bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 181bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
172bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); 182 const struct nfs_pgio_completion_ops *);
183bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
184 int, const struct nfs_pgio_completion_ops *);
173 185
174void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 186void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
175void unset_pnfs_layoutdriver(struct nfs_server *); 187void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
211 gfp_t gfp_flags); 223 gfp_t gfp_flags);
212 224
213void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 225void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
226int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
227 const struct nfs_pgio_completion_ops *compl_ops);
228int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
229 const struct nfs_pgio_completion_ops *compl_ops);
230struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
214 231
215/* nfs4_deviceid_flags */ 232/* nfs4_deviceid_flags */
216enum { 233enum {
@@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
261} 278}
262 279
263static inline int 280static inline int
264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 281pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
282 struct nfs_commit_info *cinfo)
265{ 283{
266 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) 284 if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
267 return PNFS_NOT_ATTEMPTED; 285 return PNFS_NOT_ATTEMPTED;
268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 286 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
287}
288
289static inline struct pnfs_ds_commit_info *
290pnfs_get_ds_info(struct inode *inode)
291{
292 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
293
294 if (ld == NULL || ld->get_ds_info == NULL)
295 return NULL;
296 return ld->get_ds_info(inode);
269} 297}
270 298
271static inline bool 299static inline bool
272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 300pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
301 struct nfs_commit_info *cinfo)
273{ 302{
274 struct inode *inode = req->wb_context->dentry->d_inode; 303 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 304 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
276 305
277 if (lseg == NULL || ld->mark_request_commit == NULL) 306 if (lseg == NULL || ld->mark_request_commit == NULL)
278 return false; 307 return false;
279 ld->mark_request_commit(req, lseg); 308 ld->mark_request_commit(req, lseg, cinfo);
280 return true; 309 return true;
281} 310}
282 311
283static inline bool 312static inline bool
284pnfs_clear_request_commit(struct nfs_page *req) 313pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
285{ 314{
286 struct inode *inode = req->wb_context->dentry->d_inode; 315 struct inode *inode = req->wb_context->dentry->d_inode;
287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 316 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
288 317
289 if (ld == NULL || ld->clear_request_commit == NULL) 318 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false; 319 return false;
291 ld->clear_request_commit(req); 320 ld->clear_request_commit(req, cinfo);
292 return true; 321 return true;
293} 322}
294 323
295static inline int 324static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 325pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
326 int max)
297{ 327{
298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 328 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0; 329 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock); 330 else
304 if (ret != 0) 331 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 332}
306 return ret; 333
334static inline void
335pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
336 struct nfs_commit_info *cinfo)
337{
338 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
339 return;
340 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
307} 341}
308 342
309/* Should the pNFS client commit and return the layout upon a setattr */ 343/* Should the pNFS client commit and return the layout upon a setattr */
@@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
327 return 0; 361 return 0;
328} 362}
329 363
364static inline bool
365pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
366 struct nfs_server *nfss)
367{
368 return (dst && src && src->bm != 0 &&
369 nfss->pnfs_curr_ld->id == src->l_type);
370}
371
330#ifdef NFS_DEBUG 372#ifdef NFS_DEBUG
331void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 373void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
332#else 374#else
@@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
396{ 438{
397} 439}
398 440
399static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 441static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
442 const struct nfs_pgio_completion_ops *compl_ops)
400{ 443{
401 return false; 444 return false;
402} 445}
403 446
404static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 447static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
448 const struct nfs_pgio_completion_ops *compl_ops)
405{ 449{
406 return false; 450 return false;
407} 451}
408 452
409static inline int 453static inline int
410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 454pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
455 struct nfs_commit_info *cinfo)
411{ 456{
412 return PNFS_NOT_ATTEMPTED; 457 return PNFS_NOT_ATTEMPTED;
413} 458}
414 459
460static inline struct pnfs_ds_commit_info *
461pnfs_get_ds_info(struct inode *inode)
462{
463 return NULL;
464}
465
415static inline bool 466static inline bool
416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 467pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
468 struct nfs_commit_info *cinfo)
417{ 469{
418 return false; 470 return false;
419} 471}
420 472
421static inline bool 473static inline bool
422pnfs_clear_request_commit(struct nfs_page *req) 474pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
423{ 475{
424 return false; 476 return false;
425} 477}
426 478
427static inline int 479static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 480pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
481 int max)
429{ 482{
430 return 0; 483 return 0;
431} 484}
432 485
486static inline void
487pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
488 struct nfs_commit_info *cinfo)
489{
490}
491
433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 492static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
434{ 493{
435 return 0; 494 return 0;
436} 495}
437 496
497static inline bool
498pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
499 struct nfs_server *nfss)
500{
501 return false;
502}
503
504static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
505{
506 return NULL;
507}
508
438#endif /* CONFIG_NFS_V4_1 */ 509#endif /* CONFIG_NFS_V4_1 */
439 510
440#endif /* FS_NFS_PNFS_H */ 511#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d6408b6437de..a706b6bcc286 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
178} 178}
179 179
180static int 180static int
181nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 181nfs_proc_lookup(struct inode *dir, struct qstr *name,
182 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 182 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
183{ 183{
184 struct nfs_diropargs arg = { 184 struct nfs_diropargs arg = {
@@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
640 640
641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
642{ 642{
643 struct inode *inode = data->header->inode;
644
643 if (nfs_async_handle_expired_key(task)) 645 if (nfs_async_handle_expired_key(task))
644 return -EAGAIN; 646 return -EAGAIN;
645 647
646 nfs_invalidate_atime(data->inode); 648 nfs_invalidate_atime(inode);
647 if (task->tk_status >= 0) { 649 if (task->tk_status >= 0) {
648 nfs_refresh_inode(data->inode, data->res.fattr); 650 nfs_refresh_inode(inode, data->res.fattr);
649 /* Emulate the eof flag, which isn't normally needed in NFSv2 651 /* Emulate the eof flag, which isn't normally needed in NFSv2
650 * as it is guaranteed to always return the file attributes 652 * as it is guaranteed to always return the file attributes
651 */ 653 */
@@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
667 669
668static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 670static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
669{ 671{
672 struct inode *inode = data->header->inode;
673
670 if (nfs_async_handle_expired_key(task)) 674 if (nfs_async_handle_expired_key(task))
671 return -EAGAIN; 675 return -EAGAIN;
672 676
673 if (task->tk_status >= 0) 677 if (task->tk_status >= 0)
674 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 678 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
675 return 0; 679 return 0;
676} 680}
677 681
@@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
687 rpc_call_start(task); 691 rpc_call_start(task);
688} 692}
689 693
694static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
695{
696 BUG();
697}
698
690static void 699static void
691nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 700nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
692{ 701{
693 BUG(); 702 BUG();
694} 703}
@@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
732 .file_inode_ops = &nfs_file_inode_operations, 741 .file_inode_ops = &nfs_file_inode_operations,
733 .file_ops = &nfs_file_operations, 742 .file_ops = &nfs_file_operations,
734 .getroot = nfs_proc_get_root, 743 .getroot = nfs_proc_get_root,
744 .submount = nfs_submount,
735 .getattr = nfs_proc_getattr, 745 .getattr = nfs_proc_getattr,
736 .setattr = nfs_proc_setattr, 746 .setattr = nfs_proc_setattr,
737 .lookup = nfs_proc_lookup, 747 .lookup = nfs_proc_lookup,
@@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
763 .write_rpc_prepare = nfs_proc_write_rpc_prepare, 773 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
764 .write_done = nfs_write_done, 774 .write_done = nfs_write_done,
765 .commit_setup = nfs_proc_commit_setup, 775 .commit_setup = nfs_proc_commit_setup,
776 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
766 .lock = nfs_proc_lock, 777 .lock = nfs_proc_lock,
767 .lock_check_bounds = nfs_lock_check_bounds, 778 .lock_check_bounds = nfs_lock_check_bounds,
768 .close_context = nfs_close_context, 779 .close_context = nfs_close_context,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28c2ea3..86ced7836214 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,43 +30,73 @@
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 31
32static const struct nfs_pageio_ops nfs_pageio_read_ops; 32static const struct nfs_pageio_ops nfs_pageio_read_ops;
33static const struct rpc_call_ops nfs_read_partial_ops; 33static const struct rpc_call_ops nfs_read_common_ops;
34static const struct rpc_call_ops nfs_read_full_ops; 34static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
35 35
36static struct kmem_cache *nfs_rdata_cachep; 36static struct kmem_cache *nfs_rdata_cachep;
37 37
38struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 38struct nfs_read_header *nfs_readhdr_alloc(void)
39{ 39{
40 struct nfs_read_data *p; 40 struct nfs_read_header *rhdr;
41 41
42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 42 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
43 if (p) { 43 if (rhdr) {
44 INIT_LIST_HEAD(&p->pages); 44 struct nfs_pgio_header *hdr = &rhdr->header;
45 p->npages = pagecount; 45
46 if (pagecount <= ARRAY_SIZE(p->page_array)) 46 INIT_LIST_HEAD(&hdr->pages);
47 p->pagevec = p->page_array; 47 INIT_LIST_HEAD(&hdr->rpc_list);
48 else { 48 spin_lock_init(&hdr->lock);
49 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 49 atomic_set(&hdr->refcnt, 0);
50 if (!p->pagevec) { 50 }
51 kmem_cache_free(nfs_rdata_cachep, p); 51 return rhdr;
52 p = NULL; 52}
53 } 53
54 } 54static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
55 unsigned int pagecount)
56{
57 struct nfs_read_data *data, *prealloc;
58
59 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
60 if (prealloc->header == NULL)
61 data = prealloc;
62 else
63 data = kzalloc(sizeof(*data), GFP_KERNEL);
64 if (!data)
65 goto out;
66
67 if (nfs_pgarray_set(&data->pages, pagecount)) {
68 data->header = hdr;
69 atomic_inc(&hdr->refcnt);
70 } else {
71 if (data != prealloc)
72 kfree(data);
73 data = NULL;
55 } 74 }
56 return p; 75out:
76 return data;
57} 77}
58 78
59void nfs_readdata_free(struct nfs_read_data *p) 79void nfs_readhdr_free(struct nfs_pgio_header *hdr)
60{ 80{
61 if (p && (p->pagevec != &p->page_array[0])) 81 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
62 kfree(p->pagevec); 82
63 kmem_cache_free(nfs_rdata_cachep, p); 83 kmem_cache_free(nfs_rdata_cachep, rhdr);
64} 84}
65 85
66void nfs_readdata_release(struct nfs_read_data *rdata) 86void nfs_readdata_release(struct nfs_read_data *rdata)
67{ 87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
68 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
69 nfs_readdata_free(rdata); 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data)
95 kfree(rdata);
96 else
97 rdata->header = NULL;
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
70} 100}
71 101
72static 102static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
78 return 0; 108 return 0;
79} 109}
80 110
81static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
82{
83 unsigned int remainder = data->args.count - data->res.count;
84 unsigned int base = data->args.pgbase + data->res.count;
85 unsigned int pglen;
86 struct page **pages;
87
88 if (data->res.eof == 0 || remainder == 0)
89 return;
90 /*
91 * Note: "remainder" can never be negative, since we check for
92 * this in the XDR code.
93 */
94 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
95 base &= ~PAGE_CACHE_MASK;
96 pglen = PAGE_CACHE_SIZE - base;
97 for (;;) {
98 if (remainder <= pglen) {
99 zero_user(*pages, base, remainder);
100 break;
101 }
102 zero_user(*pages, base, pglen);
103 pages++;
104 remainder -= pglen;
105 pglen = PAGE_CACHE_SIZE;
106 base = 0;
107 }
108}
109
110void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 111void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
111 struct inode *inode) 112 struct inode *inode,
113 const struct nfs_pgio_completion_ops *compl_ops)
112{ 114{
113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
114 NFS_SERVER(inode)->rsize, 0); 116 NFS_SERVER(inode)->rsize, 0);
115} 117}
116 118
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
121} 123}
122EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 124EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
123 125
124static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 126void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
125 struct inode *inode) 127 struct inode *inode,
128 const struct nfs_pgio_completion_ops *compl_ops)
126{ 129{
127 if (!pnfs_pageio_init_read(pgio, inode)) 130 if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
128 nfs_pageio_init_read_mds(pgio, inode); 131 nfs_pageio_init_read_mds(pgio, inode, compl_ops);
129} 132}
130 133
131int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 134int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
146 if (len < PAGE_CACHE_SIZE) 149 if (len < PAGE_CACHE_SIZE)
147 zero_user_segment(page, len, PAGE_CACHE_SIZE); 150 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 151
149 nfs_pageio_init_read(&pgio, inode); 152 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
150 nfs_pageio_add_request(&pgio, new); 153 nfs_pageio_add_request(&pgio, new);
151 nfs_pageio_complete(&pgio); 154 nfs_pageio_complete(&pgio);
155 NFS_I(inode)->read_io += pgio.pg_bytes_written;
152 return 0; 156 return 0;
153} 157}
154 158
@@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)
169 nfs_release_request(req); 173 nfs_release_request(req);
170} 174}
171 175
172int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 176/* Note io was page aligned */
173 const struct rpc_call_ops *call_ops) 177static void nfs_read_completion(struct nfs_pgio_header *hdr)
178{
179 unsigned long bytes = 0;
180
181 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
182 goto out;
183 while (!list_empty(&hdr->pages)) {
184 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
185 struct page *page = req->wb_page;
186
187 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
188 if (bytes > hdr->good_bytes)
189 zero_user(page, 0, PAGE_SIZE);
190 else if (hdr->good_bytes - bytes < PAGE_SIZE)
191 zero_user_segment(page,
192 hdr->good_bytes & ~PAGE_MASK,
193 PAGE_SIZE);
194 }
195 bytes += req->wb_bytes;
196 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
197 if (bytes <= hdr->good_bytes)
198 SetPageUptodate(page);
199 } else
200 SetPageUptodate(page);
201 nfs_list_remove_request(req);
202 nfs_readpage_release(req);
203 }
204out:
205 hdr->release(hdr);
206}
207
208int nfs_initiate_read(struct rpc_clnt *clnt,
209 struct nfs_read_data *data,
210 const struct rpc_call_ops *call_ops, int flags)
174{ 211{
175 struct inode *inode = data->inode; 212 struct inode *inode = data->header->inode;
176 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 213 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
177 struct rpc_task *task; 214 struct rpc_task *task;
178 struct rpc_message msg = { 215 struct rpc_message msg = {
179 .rpc_argp = &data->args, 216 .rpc_argp = &data->args,
180 .rpc_resp = &data->res, 217 .rpc_resp = &data->res,
181 .rpc_cred = data->cred, 218 .rpc_cred = data->header->cred,
182 }; 219 };
183 struct rpc_task_setup task_setup_data = { 220 struct rpc_task_setup task_setup_data = {
184 .task = &data->task, 221 .task = &data->task,
@@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
187 .callback_ops = call_ops, 224 .callback_ops = call_ops,
188 .callback_data = data, 225 .callback_data = data,
189 .workqueue = nfsiod_workqueue, 226 .workqueue = nfsiod_workqueue,
190 .flags = RPC_TASK_ASYNC | swap_flags, 227 .flags = RPC_TASK_ASYNC | swap_flags | flags,
191 }; 228 };
192 229
193 /* Set up the initial task struct. */ 230 /* Set up the initial task struct. */
@@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
212/* 249/*
213 * Set up the NFS read request struct 250 * Set up the NFS read request struct
214 */ 251 */
215static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 252static void nfs_read_rpcsetup(struct nfs_read_data *data,
216 unsigned int count, unsigned int offset) 253 unsigned int count, unsigned int offset)
217{ 254{
218 struct inode *inode = req->wb_context->dentry->d_inode; 255 struct nfs_page *req = data->header->req;
219
220 data->req = req;
221 data->inode = inode;
222 data->cred = req->wb_context->cred;
223 256
224 data->args.fh = NFS_FH(inode); 257 data->args.fh = NFS_FH(data->header->inode);
225 data->args.offset = req_offset(req) + offset; 258 data->args.offset = req_offset(req) + offset;
226 data->args.pgbase = req->wb_pgbase + offset; 259 data->args.pgbase = req->wb_pgbase + offset;
227 data->args.pages = data->pagevec; 260 data->args.pages = data->pages.pagevec;
228 data->args.count = count; 261 data->args.count = count;
229 data->args.context = get_nfs_open_context(req->wb_context); 262 data->args.context = get_nfs_open_context(req->wb_context);
230 data->args.lock_context = req->wb_lock_context; 263 data->args.lock_context = req->wb_lock_context;
@@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
238static int nfs_do_read(struct nfs_read_data *data, 271static int nfs_do_read(struct nfs_read_data *data,
239 const struct rpc_call_ops *call_ops) 272 const struct rpc_call_ops *call_ops)
240{ 273{
241 struct inode *inode = data->args.context->dentry->d_inode; 274 struct inode *inode = data->header->inode;
242 275
243 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 276 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
244} 277}
245 278
246static int 279static int
@@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,
253 while (!list_empty(head)) { 286 while (!list_empty(head)) {
254 int ret2; 287 int ret2;
255 288
256 data = list_entry(head->next, struct nfs_read_data, list); 289 data = list_first_entry(head, struct nfs_read_data, list);
257 list_del_init(&data->list); 290 list_del_init(&data->list);
258 291
259 ret2 = nfs_do_read(data, call_ops); 292 ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)
275 } 308 }
276} 309}
277 310
311static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
312 .error_cleanup = nfs_async_read_error,
313 .completion = nfs_read_completion,
314};
315
316static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
317 struct nfs_pgio_header *hdr)
318{
319 set_bit(NFS_IOHDR_REDO, &hdr->flags);
320 while (!list_empty(&hdr->rpc_list)) {
321 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
322 struct nfs_read_data, list);
323 list_del(&data->list);
324 nfs_readdata_release(data);
325 }
326 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
327}
328
278/* 329/*
279 * Generate multiple requests to fill a single page. 330 * Generate multiple requests to fill a single page.
280 * 331 *
@@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)
288 * won't see the new data until our attribute cache is updated. This is more 339 * won't see the new data until our attribute cache is updated. This is more
289 * or less conventional NFS client behavior. 340 * or less conventional NFS client behavior.
290 */ 341 */
291static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 342static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
343 struct nfs_pgio_header *hdr)
292{ 344{
293 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 345 struct nfs_page *req = hdr->req;
294 struct page *page = req->wb_page; 346 struct page *page = req->wb_page;
295 struct nfs_read_data *data; 347 struct nfs_read_data *data;
296 size_t rsize = desc->pg_bsize, nbytes; 348 size_t rsize = desc->pg_bsize, nbytes;
297 unsigned int offset; 349 unsigned int offset;
298 int requests = 0;
299 int ret = 0;
300
301 nfs_list_remove_request(req);
302 350
303 offset = 0; 351 offset = 0;
304 nbytes = desc->pg_count; 352 nbytes = desc->pg_count;
305 do { 353 do {
306 size_t len = min(nbytes,rsize); 354 size_t len = min(nbytes,rsize);
307 355
308 data = nfs_readdata_alloc(1); 356 data = nfs_readdata_alloc(hdr, 1);
309 if (!data) 357 if (!data) {
310 goto out_bad; 358 nfs_pagein_error(desc, hdr);
311 data->pagevec[0] = page; 359 return -ENOMEM;
312 nfs_read_rpcsetup(req, data, len, offset); 360 }
313 list_add(&data->list, res); 361 data->pages.pagevec[0] = page;
314 requests++; 362 nfs_read_rpcsetup(data, len, offset);
363 list_add(&data->list, &hdr->rpc_list);
315 nbytes -= len; 364 nbytes -= len;
316 offset += len; 365 offset += len;
317 } while(nbytes != 0); 366 } while (nbytes != 0);
318 atomic_set(&req->wb_complete, requests); 367
319 desc->pg_rpc_callops = &nfs_read_partial_ops; 368 nfs_list_remove_request(req);
320 return ret; 369 nfs_list_add_request(req, &hdr->pages);
321out_bad: 370 desc->pg_rpc_callops = &nfs_read_common_ops;
322 while (!list_empty(res)) { 371 return 0;
323 data = list_entry(res->next, struct nfs_read_data, list);
324 list_del(&data->list);
325 nfs_readdata_release(data);
326 }
327 nfs_readpage_release(req);
328 return -ENOMEM;
329} 372}
330 373
331static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 374static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
375 struct nfs_pgio_header *hdr)
332{ 376{
333 struct nfs_page *req; 377 struct nfs_page *req;
334 struct page **pages; 378 struct page **pages;
335 struct nfs_read_data *data; 379 struct nfs_read_data *data;
336 struct list_head *head = &desc->pg_list; 380 struct list_head *head = &desc->pg_list;
337 int ret = 0;
338 381
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 382 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 383 desc->pg_count));
341 if (!data) { 384 if (!data) {
342 nfs_async_read_error(head); 385 nfs_pagein_error(desc, hdr);
343 ret = -ENOMEM; 386 return -ENOMEM;
344 goto out;
345 } 387 }
346 388
347 pages = data->pagevec; 389 pages = data->pages.pagevec;
348 while (!list_empty(head)) { 390 while (!list_empty(head)) {
349 req = nfs_list_entry(head->next); 391 req = nfs_list_entry(head->next);
350 nfs_list_remove_request(req); 392 nfs_list_remove_request(req);
351 nfs_list_add_request(req, &data->pages); 393 nfs_list_add_request(req, &hdr->pages);
352 *pages++ = req->wb_page; 394 *pages++ = req->wb_page;
353 } 395 }
354 req = nfs_list_entry(data->pages.next);
355 396
356 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 397 nfs_read_rpcsetup(data, desc->pg_count, 0);
357 list_add(&data->list, res); 398 list_add(&data->list, &hdr->rpc_list);
358 desc->pg_rpc_callops = &nfs_read_full_ops; 399 desc->pg_rpc_callops = &nfs_read_common_ops;
359out: 400 return 0;
360 return ret;
361} 401}
362 402
363int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 403int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
404 struct nfs_pgio_header *hdr)
364{ 405{
365 if (desc->pg_bsize < PAGE_CACHE_SIZE) 406 if (desc->pg_bsize < PAGE_CACHE_SIZE)
366 return nfs_pagein_multi(desc, head); 407 return nfs_pagein_multi(desc, hdr);
367 return nfs_pagein_one(desc, head); 408 return nfs_pagein_one(desc, hdr);
368} 409}
369 410
370static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 411static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
371{ 412{
372 LIST_HEAD(head); 413 struct nfs_read_header *rhdr;
414 struct nfs_pgio_header *hdr;
373 int ret; 415 int ret;
374 416
375 ret = nfs_generic_pagein(desc, &head); 417 rhdr = nfs_readhdr_alloc();
418 if (!rhdr) {
419 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
420 return -ENOMEM;
421 }
422 hdr = &rhdr->header;
423 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
424 atomic_inc(&hdr->refcnt);
425 ret = nfs_generic_pagein(desc, hdr);
376 if (ret == 0) 426 if (ret == 0)
377 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 427 ret = nfs_do_multiple_reads(&hdr->rpc_list,
428 desc->pg_rpc_callops);
429 if (atomic_dec_and_test(&hdr->refcnt))
430 hdr->completion_ops->completion(hdr);
378 return ret; 431 return ret;
379} 432}
380 433
@@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
389 */ 442 */
390int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 443int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
391{ 444{
445 struct inode *inode = data->header->inode;
392 int status; 446 int status;
393 447
394 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 448 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
395 task->tk_status); 449 task->tk_status);
396 450
397 status = NFS_PROTO(data->inode)->read_done(task, data); 451 status = NFS_PROTO(inode)->read_done(task, data);
398 if (status != 0) 452 if (status != 0)
399 return status; 453 return status;
400 454
401 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 455 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
402 456
403 if (task->tk_status == -ESTALE) { 457 if (task->tk_status == -ESTALE) {
404 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 458 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
405 nfs_mark_for_revalidate(data->inode); 459 nfs_mark_for_revalidate(inode);
406 } 460 }
407 return 0; 461 return 0;
408} 462}
@@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
412 struct nfs_readargs *argp = &data->args; 466 struct nfs_readargs *argp = &data->args;
413 struct nfs_readres *resp = &data->res; 467 struct nfs_readres *resp = &data->res;
414 468
415 if (resp->eof || resp->count == argp->count)
416 return;
417
418 /* This is a short read! */ 469 /* This is a short read! */
419 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 470 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
420 /* Has the server at least made some progress? */ 471 /* Has the server at least made some progress? */
421 if (resp->count == 0) 472 if (resp->count == 0) {
473 nfs_set_pgio_error(data->header, -EIO, argp->offset);
422 return; 474 return;
423 475 }
424 /* Yes, so retry the read at the end of the data */ 476 /* Yes, so retry the read at the end of the data */
425 data->mds_offset += resp->count; 477 data->mds_offset += resp->count;
426 argp->offset += resp->count; 478 argp->offset += resp->count;
@@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
429 rpc_restart_call_prepare(task); 481 rpc_restart_call_prepare(task);
430} 482}
431 483
432/* 484static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
433 * Handle a read reply that fills part of a page.
434 */
435static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
436{ 485{
437 struct nfs_read_data *data = calldata; 486 struct nfs_read_data *data = calldata;
438 487 struct nfs_pgio_header *hdr = data->header;
488
489 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
439 if (nfs_readpage_result(task, data) != 0) 490 if (nfs_readpage_result(task, data) != 0)
440 return; 491 return;
441 if (task->tk_status < 0) 492 if (task->tk_status < 0)
442 return; 493 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
443 494 else if (data->res.eof) {
444 nfs_readpage_truncate_uninitialised_page(data); 495 loff_t bound;
445 nfs_readpage_retry(task, data); 496
497 bound = data->args.offset + data->res.count;
498 spin_lock(&hdr->lock);
499 if (bound < hdr->io_start + hdr->good_bytes) {
500 set_bit(NFS_IOHDR_EOF, &hdr->flags);
501 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
502 hdr->good_bytes = bound - hdr->io_start;
503 }
504 spin_unlock(&hdr->lock);
505 } else if (data->res.count != data->args.count)
506 nfs_readpage_retry(task, data);
446} 507}
447 508
448static void nfs_readpage_release_partial(void *calldata) 509static void nfs_readpage_release_common(void *calldata)
449{ 510{
450 struct nfs_read_data *data = calldata;
451 struct nfs_page *req = data->req;
452 struct page *page = req->wb_page;
453 int status = data->task.tk_status;
454
455 if (status < 0)
456 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
457
458 if (atomic_dec_and_test(&req->wb_complete)) {
459 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
460 SetPageUptodate(page);
461 nfs_readpage_release(req);
462 }
463 nfs_readdata_release(calldata); 511 nfs_readdata_release(calldata);
464} 512}
465 513
466void nfs_read_prepare(struct rpc_task *task, void *calldata) 514void nfs_read_prepare(struct rpc_task *task, void *calldata)
467{ 515{
468 struct nfs_read_data *data = calldata; 516 struct nfs_read_data *data = calldata;
469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data); 517 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
470}
471
472static const struct rpc_call_ops nfs_read_partial_ops = {
473 .rpc_call_prepare = nfs_read_prepare,
474 .rpc_call_done = nfs_readpage_result_partial,
475 .rpc_release = nfs_readpage_release_partial,
476};
477
478static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
479{
480 unsigned int count = data->res.count;
481 unsigned int base = data->args.pgbase;
482 struct page **pages;
483
484 if (data->res.eof)
485 count = data->args.count;
486 if (unlikely(count == 0))
487 return;
488 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
489 base &= ~PAGE_CACHE_MASK;
490 count += base;
491 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
492 SetPageUptodate(*pages);
493 if (count == 0)
494 return;
495 /* Was this a short read? */
496 if (data->res.eof || data->res.count == data->args.count)
497 SetPageUptodate(*pages);
498}
499
500/*
501 * This is the callback from RPC telling us whether a reply was
502 * received or some error occurred (timeout or socket shutdown).
503 */
504static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
505{
506 struct nfs_read_data *data = calldata;
507
508 if (nfs_readpage_result(task, data) != 0)
509 return;
510 if (task->tk_status < 0)
511 return;
512 /*
513 * Note: nfs_readpage_retry may change the values of
514 * data->args. In the multi-page case, we therefore need
515 * to ensure that we call nfs_readpage_set_pages_uptodate()
516 * first.
517 */
518 nfs_readpage_truncate_uninitialised_page(data);
519 nfs_readpage_set_pages_uptodate(data);
520 nfs_readpage_retry(task, data);
521}
522
523static void nfs_readpage_release_full(void *calldata)
524{
525 struct nfs_read_data *data = calldata;
526
527 while (!list_empty(&data->pages)) {
528 struct nfs_page *req = nfs_list_entry(data->pages.next);
529
530 nfs_list_remove_request(req);
531 nfs_readpage_release(req);
532 }
533 nfs_readdata_release(calldata);
534} 518}
535 519
536static const struct rpc_call_ops nfs_read_full_ops = { 520static const struct rpc_call_ops nfs_read_common_ops = {
537 .rpc_call_prepare = nfs_read_prepare, 521 .rpc_call_prepare = nfs_read_prepare,
538 .rpc_call_done = nfs_readpage_result_full, 522 .rpc_call_done = nfs_readpage_result_common,
539 .rpc_release = nfs_readpage_release_full, 523 .rpc_release = nfs_readpage_release_common,
540}; 524};
541 525
542/* 526/*
@@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
668 if (ret == 0) 652 if (ret == 0)
669 goto read_complete; /* all pages were read */ 653 goto read_complete; /* all pages were read */
670 654
671 nfs_pageio_init_read(&pgio, inode); 655 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
672 656
673 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 657 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
674 658
675 nfs_pageio_complete(&pgio); 659 nfs_pageio_complete(&pgio);
660 NFS_I(inode)->read_io += pgio.pg_bytes_written;
676 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 661 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
677 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 662 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
678read_complete: 663read_complete:
@@ -684,7 +669,7 @@ out:
684int __init nfs_init_readpagecache(void) 669int __init nfs_init_readpagecache(void)
685{ 670{
686 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 671 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
687 sizeof(struct nfs_read_data), 672 sizeof(struct nfs_read_header),
688 0, SLAB_HWCACHE_ALIGN, 673 0, SLAB_HWCACHE_ALIGN,
689 NULL); 674 NULL);
690 if (nfs_rdata_cachep == NULL) 675 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca7e4bf..ff656c022684 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -66,6 +66,7 @@
66#include "pnfs.h" 66#include "pnfs.h"
67 67
68#define NFSDBG_FACILITY NFSDBG_VFS 68#define NFSDBG_FACILITY NFSDBG_VFS
69#define NFS_TEXT_DATA 1
69 70
70#ifdef CONFIG_NFS_V3 71#ifdef CONFIG_NFS_V3
71#define NFS_DEFAULT_VERSION 3 72#define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
277 { Opt_vers_err, NULL } 278 { Opt_vers_err, NULL }
278}; 279};
279 280
281struct nfs_mount_info {
282 void (*fill_super)(struct super_block *, struct nfs_mount_info *);
283 int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
284 struct nfs_parsed_mount_data *parsed;
285 struct nfs_clone_mount *cloned;
286 struct nfs_fh *mntfh;
287};
288
280static void nfs_umount_begin(struct super_block *); 289static void nfs_umount_begin(struct super_block *);
281static int nfs_statfs(struct dentry *, struct kstatfs *); 290static int nfs_statfs(struct dentry *, struct kstatfs *);
282static int nfs_show_options(struct seq_file *, struct dentry *); 291static int nfs_show_options(struct seq_file *, struct dentry *);
283static int nfs_show_devname(struct seq_file *, struct dentry *); 292static int nfs_show_devname(struct seq_file *, struct dentry *);
284static int nfs_show_path(struct seq_file *, struct dentry *); 293static int nfs_show_path(struct seq_file *, struct dentry *);
285static int nfs_show_stats(struct seq_file *, struct dentry *); 294static int nfs_show_stats(struct seq_file *, struct dentry *);
295static struct dentry *nfs_fs_mount_common(struct file_system_type *,
296 struct nfs_server *, int, const char *, struct nfs_mount_info *);
286static struct dentry *nfs_fs_mount(struct file_system_type *, 297static struct dentry *nfs_fs_mount(struct file_system_type *,
287 int, const char *, void *); 298 int, const char *, void *);
288static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 299static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
323}; 334};
324 335
325#ifdef CONFIG_NFS_V4 336#ifdef CONFIG_NFS_V4
326static int nfs4_validate_text_mount_data(void *options, 337static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
338static int nfs4_validate_mount_data(void *options,
327 struct nfs_parsed_mount_data *args, const char *dev_name); 339 struct nfs_parsed_mount_data *args, const char *dev_name);
328static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 340static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
329 struct nfs_parsed_mount_data *data); 341 struct nfs_mount_info *mount_info);
330static struct dentry *nfs4_mount(struct file_system_type *fs_type,
331 int flags, const char *dev_name, void *raw_data);
332static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 342static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
333 int flags, const char *dev_name, void *raw_data); 343 int flags, const char *dev_name, void *raw_data);
334static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 344static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
342static struct file_system_type nfs4_fs_type = { 352static struct file_system_type nfs4_fs_type = {
343 .owner = THIS_MODULE, 353 .owner = THIS_MODULE,
344 .name = "nfs4", 354 .name = "nfs4",
345 .mount = nfs4_mount, 355 .mount = nfs_fs_mount,
346 .kill_sb = nfs4_kill_super, 356 .kill_sb = nfs4_kill_super,
347 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
348}; 358};
@@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
786 796
787static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) 797static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
788{ 798{
789 if (nfss->nfs_client && nfss->nfs_client->impl_id) { 799 if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
790 struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; 800 struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
791 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," 801 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
792 "date='%llu,%u'", 802 "date='%llu,%u'",
793 impl_id->name, impl_id->domain, 803 impl_id->name, impl_id->domain,
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
938 rpc_killall_tasks(rpc); 948 rpc_killall_tasks(rpc);
939} 949}
940 950
941static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) 951static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
942{ 952{
943 struct nfs_parsed_mount_data *data; 953 struct nfs_parsed_mount_data *data;
944 954
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
953 data->nfs_server.protocol = XPRT_TRANSPORT_TCP; 963 data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
954 data->auth_flavors[0] = RPC_AUTH_UNIX; 964 data->auth_flavors[0] = RPC_AUTH_UNIX;
955 data->auth_flavor_len = 1; 965 data->auth_flavor_len = 1;
956 data->version = version;
957 data->minorversion = 0; 966 data->minorversion = 0;
967 data->need_mount = true;
958 data->net = current->nsproxy->net_ns; 968 data->net = current->nsproxy->net_ns;
959 security_init_mnt_opts(&data->lsm_opts); 969 security_init_mnt_opts(&data->lsm_opts);
960 } 970 }
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1674 * Use the remote server's MOUNT service to request the NFS file handle 1684 * Use the remote server's MOUNT service to request the NFS file handle
1675 * corresponding to the provided path. 1685 * corresponding to the provided path.
1676 */ 1686 */
1677static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1687static int nfs_request_mount(struct nfs_parsed_mount_data *args,
1678 struct nfs_fh *root_fh) 1688 struct nfs_fh *root_fh)
1679{ 1689{
1680 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; 1690 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1681 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); 1691 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1738 return nfs_walk_authlist(args, &request); 1748 return nfs_walk_authlist(args, &request);
1739} 1749}
1740 1750
1751static struct dentry *nfs_try_mount(int flags, const char *dev_name,
1752 struct nfs_mount_info *mount_info)
1753{
1754 int status;
1755 struct nfs_server *server;
1756
1757 if (mount_info->parsed->need_mount) {
1758 status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
1759 if (status)
1760 return ERR_PTR(status);
1761 }
1762
1763 /* Get a volume representation */
1764 server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
1765 if (IS_ERR(server))
1766 return ERR_CAST(server);
1767
1768 return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
1769}
1770
1741/* 1771/*
1742 * Split "dev_name" into "hostname:export_path". 1772 * Split "dev_name" into "hostname:export_path".
1743 * 1773 *
@@ -1826,10 +1856,10 @@ out_path:
1826 * + breaking back: trying proto=udp after proto=tcp, v2 after v3, 1856 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1827 * mountproto=tcp after mountproto=udp, and so on 1857 * mountproto=tcp after mountproto=udp, and so on
1828 */ 1858 */
1829static int nfs_validate_mount_data(void *options, 1859static int nfs23_validate_mount_data(void *options,
1830 struct nfs_parsed_mount_data *args, 1860 struct nfs_parsed_mount_data *args,
1831 struct nfs_fh *mntfh, 1861 struct nfs_fh *mntfh,
1832 const char *dev_name) 1862 const char *dev_name)
1833{ 1863{
1834 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1864 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1835 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; 1865 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
1883 args->acregmax = data->acregmax; 1913 args->acregmax = data->acregmax;
1884 args->acdirmin = data->acdirmin; 1914 args->acdirmin = data->acdirmin;
1885 args->acdirmax = data->acdirmax; 1915 args->acdirmax = data->acdirmax;
1916 args->need_mount = false;
1886 1917
1887 memcpy(sap, &data->addr, sizeof(data->addr)); 1918 memcpy(sap, &data->addr, sizeof(data->addr));
1888 args->nfs_server.addrlen = sizeof(data->addr); 1919 args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
1934 } 1965 }
1935 1966
1936 break; 1967 break;
1937 default: { 1968 default:
1938 int status; 1969 return NFS_TEXT_DATA;
1939
1940 if (nfs_parse_mount_options((char *)options, args) == 0)
1941 return -EINVAL;
1942
1943 if (!nfs_verify_server_address(sap))
1944 goto out_no_address;
1945
1946 if (args->version == 4)
1947#ifdef CONFIG_NFS_V4
1948 return nfs4_validate_text_mount_data(options,
1949 args, dev_name);
1950#else
1951 goto out_v4_not_compiled;
1952#endif
1953
1954 nfs_set_port(sap, &args->nfs_server.port, 0);
1955
1956 nfs_set_mount_transport_protocol(args);
1957
1958 status = nfs_parse_devname(dev_name,
1959 &args->nfs_server.hostname,
1960 PAGE_SIZE,
1961 &args->nfs_server.export_path,
1962 NFS_MAXPATHLEN);
1963 if (!status)
1964 status = nfs_try_mount(args, mntfh);
1965
1966 kfree(args->nfs_server.export_path);
1967 args->nfs_server.export_path = NULL;
1968
1969 if (status)
1970 return status;
1971
1972 break;
1973 }
1974 } 1970 }
1975 1971
1976#ifndef CONFIG_NFS_V3 1972#ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
1999 return -EPROTONOSUPPORT; 1995 return -EPROTONOSUPPORT;
2000#endif /* !CONFIG_NFS_V3 */ 1996#endif /* !CONFIG_NFS_V3 */
2001 1997
2002#ifndef CONFIG_NFS_V4
2003out_v4_not_compiled:
2004 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2005 return -EPROTONOSUPPORT;
2006#endif /* !CONFIG_NFS_V4 */
2007
2008out_nomem: 1998out_nomem:
2009 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1999 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
2010 return -ENOMEM; 2000 return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
2018 return -EINVAL; 2008 return -EINVAL;
2019} 2009}
2020 2010
2011#ifdef CONFIG_NFS_V4
2012static int nfs_validate_mount_data(struct file_system_type *fs_type,
2013 void *options,
2014 struct nfs_parsed_mount_data *args,
2015 struct nfs_fh *mntfh,
2016 const char *dev_name)
2017{
2018 if (fs_type == &nfs_fs_type)
2019 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2020 return nfs4_validate_mount_data(options, args, dev_name);
2021}
2022#else
2023static int nfs_validate_mount_data(struct file_system_type *fs_type,
2024 void *options,
2025 struct nfs_parsed_mount_data *args,
2026 struct nfs_fh *mntfh,
2027 const char *dev_name)
2028{
2029 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2030}
2031#endif
2032
2033static int nfs_validate_text_mount_data(void *options,
2034 struct nfs_parsed_mount_data *args,
2035 const char *dev_name)
2036{
2037 int port = 0;
2038 int max_namelen = PAGE_SIZE;
2039 int max_pathlen = NFS_MAXPATHLEN;
2040 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2041
2042 if (nfs_parse_mount_options((char *)options, args) == 0)
2043 return -EINVAL;
2044
2045 if (!nfs_verify_server_address(sap))
2046 goto out_no_address;
2047
2048 if (args->version == 4) {
2049#ifdef CONFIG_NFS_V4
2050 port = NFS_PORT;
2051 max_namelen = NFS4_MAXNAMLEN;
2052 max_pathlen = NFS4_MAXPATHLEN;
2053 nfs_validate_transport_protocol(args);
2054 nfs4_validate_mount_flags(args);
2055#else
2056 goto out_v4_not_compiled;
2057#endif /* CONFIG_NFS_V4 */
2058 } else
2059 nfs_set_mount_transport_protocol(args);
2060
2061 nfs_set_port(sap, &args->nfs_server.port, port);
2062
2063 if (args->auth_flavor_len > 1)
2064 goto out_bad_auth;
2065
2066 return nfs_parse_devname(dev_name,
2067 &args->nfs_server.hostname,
2068 max_namelen,
2069 &args->nfs_server.export_path,
2070 max_pathlen);
2071
2072#ifndef CONFIG_NFS_V4
2073out_v4_not_compiled:
2074 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2075 return -EPROTONOSUPPORT;
2076#endif /* !CONFIG_NFS_V4 */
2077
2078out_no_address:
2079 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
2080 return -EINVAL;
2081
2082out_bad_auth:
2083 dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
2084 return -EINVAL;
2085}
2086
2021static int 2087static int
2022nfs_compare_remount_data(struct nfs_server *nfss, 2088nfs_compare_remount_data(struct nfs_server *nfss,
2023 struct nfs_parsed_mount_data *data) 2089 struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2129 * Finish setting up an NFS2/3 superblock 2195 * Finish setting up an NFS2/3 superblock
2130 */ 2196 */
2131static void nfs_fill_super(struct super_block *sb, 2197static void nfs_fill_super(struct super_block *sb,
2132 struct nfs_parsed_mount_data *data) 2198 struct nfs_mount_info *mount_info)
2133{ 2199{
2200 struct nfs_parsed_mount_data *data = mount_info->parsed;
2134 struct nfs_server *server = NFS_SB(sb); 2201 struct nfs_server *server = NFS_SB(sb);
2135 2202
2136 sb->s_blocksize_bits = 0; 2203 sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
2154 * Finish setting up a cloned NFS2/3 superblock 2221 * Finish setting up a cloned NFS2/3 superblock
2155 */ 2222 */
2156static void nfs_clone_super(struct super_block *sb, 2223static void nfs_clone_super(struct super_block *sb,
2157 const struct super_block *old_sb) 2224 struct nfs_mount_info *mount_info)
2158{ 2225{
2226 const struct super_block *old_sb = mount_info->cloned->sb;
2159 struct nfs_server *server = NFS_SB(sb); 2227 struct nfs_server *server = NFS_SB(sb);
2160 2228
2161 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2229 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
2278 return nfs_compare_mount_options(sb, server, mntflags); 2346 return nfs_compare_mount_options(sb, server, mntflags);
2279} 2347}
2280 2348
2349#ifdef CONFIG_NFS_FSCACHE
2350static void nfs_get_cache_cookie(struct super_block *sb,
2351 struct nfs_parsed_mount_data *parsed,
2352 struct nfs_clone_mount *cloned)
2353{
2354 char *uniq = NULL;
2355 int ulen = 0;
2356
2357 if (parsed && parsed->fscache_uniq) {
2358 uniq = parsed->fscache_uniq;
2359 ulen = strlen(parsed->fscache_uniq);
2360 } else if (cloned) {
2361 struct nfs_server *mnt_s = NFS_SB(cloned->sb);
2362 if (mnt_s->fscache_key) {
2363 uniq = mnt_s->fscache_key->key.uniquifier;
2364 ulen = mnt_s->fscache_key->key.uniq_len;
2365 };
2366 }
2367
2368 nfs_fscache_get_super_cookie(sb, uniq, ulen);
2369}
2370#else
2371static void nfs_get_cache_cookie(struct super_block *sb,
2372 struct nfs_parsed_mount_data *parsed,
2373 struct nfs_clone_mount *cloned)
2374{
2375}
2376#endif
2377
2281static int nfs_bdi_register(struct nfs_server *server) 2378static int nfs_bdi_register(struct nfs_server *server)
2282{ 2379{
2283 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2380 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2284} 2381}
2285 2382
2286static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, 2383static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
2287 int flags, const char *dev_name, void *raw_data) 2384 struct nfs_mount_info *mount_info)
2385{
2386 return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
2387}
2388
2389static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
2390 struct nfs_mount_info *mount_info)
2391{
2392 /* clone any lsm security options from the parent to the new sb */
2393 security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
2394 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
2395 return -ESTALE;
2396 return 0;
2397}
2398
2399static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
2400 struct nfs_server *server,
2401 int flags, const char *dev_name,
2402 struct nfs_mount_info *mount_info)
2288{ 2403{
2289 struct nfs_server *server = NULL;
2290 struct super_block *s; 2404 struct super_block *s;
2291 struct nfs_parsed_mount_data *data;
2292 struct nfs_fh *mntfh;
2293 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2405 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2294 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2406 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2295 struct nfs_sb_mountdata sb_mntdata = { 2407 struct nfs_sb_mountdata sb_mntdata = {
2296 .mntflags = flags, 2408 .mntflags = flags,
2409 .server = server,
2297 }; 2410 };
2298 int error; 2411 int error;
2299 2412
2300 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2301 mntfh = nfs_alloc_fhandle();
2302 if (data == NULL || mntfh == NULL)
2303 goto out;
2304
2305 /* Validate the mount data */
2306 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2307 if (error < 0) {
2308 mntroot = ERR_PTR(error);
2309 goto out;
2310 }
2311
2312#ifdef CONFIG_NFS_V4
2313 if (data->version == 4) {
2314 mntroot = nfs4_try_mount(flags, dev_name, data);
2315 goto out;
2316 }
2317#endif /* CONFIG_NFS_V4 */
2318
2319 /* Get a volume representation */
2320 server = nfs_create_server(data, mntfh);
2321 if (IS_ERR(server)) {
2322 mntroot = ERR_CAST(server);
2323 goto out;
2324 }
2325 sb_mntdata.server = server;
2326
2327 if (server->flags & NFS_MOUNT_UNSHARED) 2413 if (server->flags & NFS_MOUNT_UNSHARED)
2328 compare_super = NULL; 2414 compare_super = NULL;
2329 2415
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2351 2437
2352 if (!s->s_root) { 2438 if (!s->s_root) {
2353 /* initial superblock/root creation */ 2439 /* initial superblock/root creation */
2354 nfs_fill_super(s, data); 2440 mount_info->fill_super(s, mount_info);
2355 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); 2441 nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
2356 } 2442 }
2357 2443
2358 mntroot = nfs_get_root(s, mntfh, dev_name); 2444 mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
2359 if (IS_ERR(mntroot)) 2445 if (IS_ERR(mntroot))
2360 goto error_splat_super; 2446 goto error_splat_super;
2361 2447
2362 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2448 error = mount_info->set_security(s, mntroot, mount_info);
2363 if (error) 2449 if (error)
2364 goto error_splat_root; 2450 goto error_splat_root;
2365 2451
2366 s->s_flags |= MS_ACTIVE; 2452 s->s_flags |= MS_ACTIVE;
2367 2453
2368out: 2454out:
2369 nfs_free_parsed_mount_data(data);
2370 nfs_free_fhandle(mntfh);
2371 return mntroot; 2455 return mntroot;
2372 2456
2373out_err_nosb: 2457out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
2385 goto out; 2469 goto out;
2386} 2470}
2387 2471
2472static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2473 int flags, const char *dev_name, void *raw_data)
2474{
2475 struct nfs_mount_info mount_info = {
2476 .fill_super = nfs_fill_super,
2477 .set_security = nfs_set_sb_security,
2478 };
2479 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2480 int error;
2481
2482 mount_info.parsed = nfs_alloc_parsed_mount_data();
2483 mount_info.mntfh = nfs_alloc_fhandle();
2484 if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
2485 goto out;
2486
2487 /* Validate the mount data */
2488 error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
2489 if (error == NFS_TEXT_DATA)
2490 error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
2491 if (error < 0) {
2492 mntroot = ERR_PTR(error);
2493 goto out;
2494 }
2495
2496#ifdef CONFIG_NFS_V4
2497 if (mount_info.parsed->version == 4)
2498 mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
2499 else
2500#endif /* CONFIG_NFS_V4 */
2501 mntroot = nfs_try_mount(flags, dev_name, &mount_info);
2502
2503out:
2504 nfs_free_parsed_mount_data(mount_info.parsed);
2505 nfs_free_fhandle(mount_info.mntfh);
2506 return mntroot;
2507}
2508
2388/* 2509/*
2389 * Ensure that we unregister the bdi before kill_anon_super 2510 * Ensure that we unregister the bdi before kill_anon_super
2390 * releases the device name 2511 * releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
2409} 2530}
2410 2531
2411/* 2532/*
2412 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2533 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
2413 */ 2534 */
2414static struct dentry * 2535static struct dentry *
2415nfs_xdev_mount(struct file_system_type *fs_type, int flags, 2536nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
2416 const char *dev_name, void *raw_data) 2537 const char *dev_name, struct nfs_mount_info *mount_info)
2417{ 2538{
2418 struct nfs_clone_mount *data = raw_data; 2539 struct nfs_clone_mount *data = mount_info->cloned;
2419 struct super_block *s;
2420 struct nfs_server *server; 2540 struct nfs_server *server;
2421 struct dentry *mntroot; 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2422 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2423 struct nfs_sb_mountdata sb_mntdata = {
2424 .mntflags = flags,
2425 };
2426 int error; 2542 int error;
2427 2543
2428 dprintk("--> nfs_xdev_mount()\n"); 2544 dprintk("--> nfs_xdev_mount_common()\n");
2545
2546 mount_info->mntfh = data->fh;
2429 2547
2430 /* create a new volume representation */ 2548 /* create a new volume representation */
2431 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2432 if (IS_ERR(server)) { 2550 if (IS_ERR(server)) {
2433 error = PTR_ERR(server); 2551 error = PTR_ERR(server);
2434 goto out_err_noserver; 2552 goto out_err;
2435 }
2436 sb_mntdata.server = server;
2437
2438 if (server->flags & NFS_MOUNT_UNSHARED)
2439 compare_super = NULL;
2440
2441 /* -o noac implies -o sync */
2442 if (server->flags & NFS_MOUNT_NOAC)
2443 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2444
2445 /* Get a superblock - note that we may end up sharing one that already exists */
2446 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2447 if (IS_ERR(s)) {
2448 error = PTR_ERR(s);
2449 goto out_err_nosb;
2450 }
2451
2452 if (s->s_fs_info != server) {
2453 nfs_free_server(server);
2454 server = NULL;
2455 } else {
2456 error = nfs_bdi_register(server);
2457 if (error)
2458 goto error_splat_bdi;
2459 }
2460
2461 if (!s->s_root) {
2462 /* initial superblock/root creation */
2463 nfs_clone_super(s, data->sb);
2464 nfs_fscache_get_super_cookie(s, NULL, data);
2465 }
2466
2467 mntroot = nfs_get_root(s, data->fh, dev_name);
2468 if (IS_ERR(mntroot)) {
2469 error = PTR_ERR(mntroot);
2470 goto error_splat_super;
2471 }
2472 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
2473 dput(mntroot);
2474 error = -ESTALE;
2475 goto error_splat_super;
2476 } 2553 }
2477 2554
2478 s->s_flags |= MS_ACTIVE; 2555 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2479 2556 dprintk("<-- nfs_xdev_mount_common() = 0\n");
2480 /* clone any lsm security options from the parent to the new sb */ 2557out:
2481 security_sb_clone_mnt_opts(data->sb, s);
2482
2483 dprintk("<-- nfs_xdev_mount() = 0\n");
2484 return mntroot; 2558 return mntroot;
2485 2559
2486out_err_nosb: 2560out_err:
2487 nfs_free_server(server); 2561 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
2488out_err_noserver: 2562 goto out;
2489 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); 2563}
2490 return ERR_PTR(error);
2491 2564
2492error_splat_super: 2565/*
2493 if (server && !s->s_root) 2566 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2494 bdi_unregister(&server->backing_dev_info); 2567 */
2495error_splat_bdi: 2568static struct dentry *
2496 deactivate_locked_super(s); 2569nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2497 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); 2570 const char *dev_name, void *raw_data)
2498 return ERR_PTR(error); 2571{
2572 struct nfs_mount_info mount_info = {
2573 .fill_super = nfs_clone_super,
2574 .set_security = nfs_clone_sb_security,
2575 .cloned = raw_data,
2576 };
2577 return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
2499} 2578}
2500 2579
2501#ifdef CONFIG_NFS_V4 2580#ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
2504 * Finish setting up a cloned NFS4 superblock 2583 * Finish setting up a cloned NFS4 superblock
2505 */ 2584 */
2506static void nfs4_clone_super(struct super_block *sb, 2585static void nfs4_clone_super(struct super_block *sb,
2507 const struct super_block *old_sb) 2586 struct nfs_mount_info *mount_info)
2508{ 2587{
2588 const struct super_block *old_sb = mount_info->cloned->sb;
2509 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2589 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
2510 sb->s_blocksize = old_sb->s_blocksize; 2590 sb->s_blocksize = old_sb->s_blocksize;
2511 sb->s_maxbytes = old_sb->s_maxbytes; 2591 sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
2523/* 2603/*
2524 * Set up an NFS4 superblock 2604 * Set up an NFS4 superblock
2525 */ 2605 */
2526static void nfs4_fill_super(struct super_block *sb) 2606static void nfs4_fill_super(struct super_block *sb,
2607 struct nfs_mount_info *mount_info)
2527{ 2608{
2528 sb->s_time_gran = 1; 2609 sb->s_time_gran = 1;
2529 sb->s_op = &nfs4_sops; 2610 sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2542 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); 2623 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2543} 2624}
2544 2625
2545static int nfs4_validate_text_mount_data(void *options,
2546 struct nfs_parsed_mount_data *args,
2547 const char *dev_name)
2548{
2549 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2550
2551 nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
2552
2553 nfs_validate_transport_protocol(args);
2554
2555 nfs4_validate_mount_flags(args);
2556
2557 if (args->version != 4) {
2558 dfprintk(MOUNT,
2559 "NFS4: Illegal mount version\n");
2560 return -EINVAL;
2561 }
2562
2563 if (args->auth_flavor_len > 1) {
2564 dfprintk(MOUNT,
2565 "NFS4: Too many RPC auth flavours specified\n");
2566 return -EINVAL;
2567 }
2568
2569 return nfs_parse_devname(dev_name,
2570 &args->nfs_server.hostname,
2571 NFS4_MAXNAMLEN,
2572 &args->nfs_server.export_path,
2573 NFS4_MAXPATHLEN);
2574}
2575
2576/* 2626/*
2577 * Validate NFSv4 mount options 2627 * Validate NFSv4 mount options
2578 */ 2628 */
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
2643 2693
2644 break; 2694 break;
2645 default: 2695 default:
2646 if (nfs_parse_mount_options((char *)options, args) == 0) 2696 return NFS_TEXT_DATA;
2647 return -EINVAL;
2648
2649 if (!nfs_verify_server_address(sap))
2650 return -EINVAL;
2651
2652 return nfs4_validate_text_mount_data(options, args, dev_name);
2653 } 2697 }
2654 2698
2655 return 0; 2699 return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
2673 */ 2717 */
2674static struct dentry * 2718static struct dentry *
2675nfs4_remote_mount(struct file_system_type *fs_type, int flags, 2719nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2676 const char *dev_name, void *raw_data) 2720 const char *dev_name, void *info)
2677{ 2721{
2678 struct nfs_parsed_mount_data *data = raw_data; 2722 struct nfs_mount_info *mount_info = info;
2679 struct super_block *s;
2680 struct nfs_server *server; 2723 struct nfs_server *server;
2681 struct nfs_fh *mntfh; 2724 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2682 struct dentry *mntroot;
2683 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2684 struct nfs_sb_mountdata sb_mntdata = {
2685 .mntflags = flags,
2686 };
2687 int error = -ENOMEM;
2688 2725
2689 mntfh = nfs_alloc_fhandle(); 2726 mount_info->fill_super = nfs4_fill_super;
2690 if (data == NULL || mntfh == NULL) 2727 mount_info->set_security = nfs_set_sb_security;
2691 goto out;
2692 2728
2693 /* Get a volume representation */ 2729 /* Get a volume representation */
2694 server = nfs4_create_server(data, mntfh); 2730 server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
2695 if (IS_ERR(server)) { 2731 if (IS_ERR(server)) {
2696 error = PTR_ERR(server); 2732 mntroot = ERR_CAST(server);
2697 goto out; 2733 goto out;
2698 } 2734 }
2699 sb_mntdata.server = server;
2700 2735
2701 if (server->flags & NFS4_MOUNT_UNSHARED) 2736 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2702 compare_super = NULL;
2703
2704 /* -o noac implies -o sync */
2705 if (server->flags & NFS_MOUNT_NOAC)
2706 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2707
2708 /* Get a superblock - note that we may end up sharing one that already exists */
2709 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2710 if (IS_ERR(s)) {
2711 error = PTR_ERR(s);
2712 goto out_free;
2713 }
2714
2715 if (s->s_fs_info != server) {
2716 nfs_free_server(server);
2717 server = NULL;
2718 } else {
2719 error = nfs_bdi_register(server);
2720 if (error)
2721 goto error_splat_bdi;
2722 }
2723
2724 if (!s->s_root) {
2725 /* initial superblock/root creation */
2726 nfs4_fill_super(s);
2727 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2728 }
2729
2730 mntroot = nfs4_get_root(s, mntfh, dev_name);
2731 if (IS_ERR(mntroot)) {
2732 error = PTR_ERR(mntroot);
2733 goto error_splat_super;
2734 }
2735
2736 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2737 if (error)
2738 goto error_splat_root;
2739
2740 s->s_flags |= MS_ACTIVE;
2741
2742 nfs_free_fhandle(mntfh);
2743 return mntroot;
2744 2737
2745out: 2738out:
2746 nfs_free_fhandle(mntfh); 2739 return mntroot;
2747 return ERR_PTR(error);
2748
2749out_free:
2750 nfs_free_server(server);
2751 goto out;
2752
2753error_splat_root:
2754 dput(mntroot);
2755error_splat_super:
2756 if (server && !s->s_root)
2757 bdi_unregister(&server->backing_dev_info);
2758error_splat_bdi:
2759 deactivate_locked_super(s);
2760 goto out;
2761} 2740}
2762 2741
2763static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, 2742static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2869} 2848}
2870 2849
2871static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 2850static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2872 struct nfs_parsed_mount_data *data) 2851 struct nfs_mount_info *mount_info)
2873{ 2852{
2874 char *export_path; 2853 char *export_path;
2875 struct vfsmount *root_mnt; 2854 struct vfsmount *root_mnt;
2876 struct dentry *res; 2855 struct dentry *res;
2856 struct nfs_parsed_mount_data *data = mount_info->parsed;
2877 2857
2878 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2858 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2879 2859
2880 export_path = data->nfs_server.export_path; 2860 export_path = data->nfs_server.export_path;
2881 data->nfs_server.export_path = "/"; 2861 data->nfs_server.export_path = "/";
2882 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, 2862 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
2883 data->nfs_server.hostname); 2863 data->nfs_server.hostname);
2884 data->nfs_server.export_path = export_path; 2864 data->nfs_server.export_path = export_path;
2885 2865
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2891 return res; 2871 return res;
2892} 2872}
2893 2873
2894/*
2895 * Get the superblock for an NFS4 mountpoint
2896 */
2897static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2898 int flags, const char *dev_name, void *raw_data)
2899{
2900 struct nfs_parsed_mount_data *data;
2901 int error = -ENOMEM;
2902 struct dentry *res = ERR_PTR(-ENOMEM);
2903
2904 data = nfs_alloc_parsed_mount_data(4);
2905 if (data == NULL)
2906 goto out;
2907
2908 /* Validate the mount data */
2909 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2910 if (error < 0) {
2911 res = ERR_PTR(error);
2912 goto out;
2913 }
2914
2915 res = nfs4_try_mount(flags, dev_name, data);
2916 if (IS_ERR(res))
2917 error = PTR_ERR(res);
2918
2919out:
2920 nfs_free_parsed_mount_data(data);
2921 dprintk("<-- nfs4_mount() = %d%s\n", error,
2922 error != 0 ? " [error]" : "");
2923 return res;
2924}
2925
2926static void nfs4_kill_super(struct super_block *sb) 2874static void nfs4_kill_super(struct super_block *sb)
2927{ 2875{
2928 struct nfs_server *server = NFS_SB(sb); 2876 struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
2942nfs4_xdev_mount(struct file_system_type *fs_type, int flags, 2890nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2943 const char *dev_name, void *raw_data) 2891 const char *dev_name, void *raw_data)
2944{ 2892{
2945 struct nfs_clone_mount *data = raw_data; 2893 struct nfs_mount_info mount_info = {
2946 struct super_block *s; 2894 .fill_super = nfs4_clone_super,
2947 struct nfs_server *server; 2895 .set_security = nfs_clone_sb_security,
2948 struct dentry *mntroot; 2896 .cloned = raw_data,
2949 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2950 struct nfs_sb_mountdata sb_mntdata = {
2951 .mntflags = flags,
2952 }; 2897 };
2953 int error; 2898 return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
2954
2955 dprintk("--> nfs4_xdev_mount()\n");
2956
2957 /* create a new volume representation */
2958 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2959 if (IS_ERR(server)) {
2960 error = PTR_ERR(server);
2961 goto out_err_noserver;
2962 }
2963 sb_mntdata.server = server;
2964
2965 if (server->flags & NFS4_MOUNT_UNSHARED)
2966 compare_super = NULL;
2967
2968 /* -o noac implies -o sync */
2969 if (server->flags & NFS_MOUNT_NOAC)
2970 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2971
2972 /* Get a superblock - note that we may end up sharing one that already exists */
2973 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2974 if (IS_ERR(s)) {
2975 error = PTR_ERR(s);
2976 goto out_err_nosb;
2977 }
2978
2979 if (s->s_fs_info != server) {
2980 nfs_free_server(server);
2981 server = NULL;
2982 } else {
2983 error = nfs_bdi_register(server);
2984 if (error)
2985 goto error_splat_bdi;
2986 }
2987
2988 if (!s->s_root) {
2989 /* initial superblock/root creation */
2990 nfs4_clone_super(s, data->sb);
2991 nfs_fscache_get_super_cookie(s, NULL, data);
2992 }
2993
2994 mntroot = nfs4_get_root(s, data->fh, dev_name);
2995 if (IS_ERR(mntroot)) {
2996 error = PTR_ERR(mntroot);
2997 goto error_splat_super;
2998 }
2999 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3000 dput(mntroot);
3001 error = -ESTALE;
3002 goto error_splat_super;
3003 }
3004
3005 s->s_flags |= MS_ACTIVE;
3006
3007 security_sb_clone_mnt_opts(data->sb, s);
3008
3009 dprintk("<-- nfs4_xdev_mount() = 0\n");
3010 return mntroot;
3011
3012out_err_nosb:
3013 nfs_free_server(server);
3014out_err_noserver:
3015 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3016 return ERR_PTR(error);
3017
3018error_splat_super:
3019 if (server && !s->s_root)
3020 bdi_unregister(&server->backing_dev_info);
3021error_splat_bdi:
3022 deactivate_locked_super(s);
3023 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3024 return ERR_PTR(error);
3025} 2899}
3026 2900
3027static struct dentry * 2901static struct dentry *
3028nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, 2902nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3029 const char *dev_name, void *raw_data) 2903 const char *dev_name, void *raw_data)
3030{ 2904{
3031 struct nfs_clone_mount *data = raw_data; 2905 struct nfs_mount_info mount_info = {
3032 struct super_block *s; 2906 .fill_super = nfs4_fill_super,
3033 struct nfs_server *server; 2907 .set_security = nfs_clone_sb_security,
3034 struct dentry *mntroot; 2908 .cloned = raw_data,
3035 struct nfs_fh *mntfh;
3036 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
3037 struct nfs_sb_mountdata sb_mntdata = {
3038 .mntflags = flags,
3039 }; 2909 };
3040 int error = -ENOMEM; 2910 struct nfs_server *server;
2911 struct dentry *mntroot = ERR_PTR(-ENOMEM);
3041 2912
3042 dprintk("--> nfs4_referral_get_sb()\n"); 2913 dprintk("--> nfs4_referral_get_sb()\n");
3043 2914
3044 mntfh = nfs_alloc_fhandle(); 2915 mount_info.mntfh = nfs_alloc_fhandle();
3045 if (mntfh == NULL) 2916 if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
3046 goto out_err_nofh; 2917 goto out;
3047 2918
3048 /* create a new volume representation */ 2919 /* create a new volume representation */
3049 server = nfs4_create_referral_server(data, mntfh); 2920 server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
3050 if (IS_ERR(server)) { 2921 if (IS_ERR(server)) {
3051 error = PTR_ERR(server); 2922 mntroot = ERR_CAST(server);
3052 goto out_err_noserver; 2923 goto out;
3053 }
3054 sb_mntdata.server = server;
3055
3056 if (server->flags & NFS4_MOUNT_UNSHARED)
3057 compare_super = NULL;
3058
3059 /* -o noac implies -o sync */
3060 if (server->flags & NFS_MOUNT_NOAC)
3061 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
3062
3063 /* Get a superblock - note that we may end up sharing one that already exists */
3064 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
3065 if (IS_ERR(s)) {
3066 error = PTR_ERR(s);
3067 goto out_err_nosb;
3068 }
3069
3070 if (s->s_fs_info != server) {
3071 nfs_free_server(server);
3072 server = NULL;
3073 } else {
3074 error = nfs_bdi_register(server);
3075 if (error)
3076 goto error_splat_bdi;
3077 }
3078
3079 if (!s->s_root) {
3080 /* initial superblock/root creation */
3081 nfs4_fill_super(s);
3082 nfs_fscache_get_super_cookie(s, NULL, data);
3083 }
3084
3085 mntroot = nfs4_get_root(s, mntfh, dev_name);
3086 if (IS_ERR(mntroot)) {
3087 error = PTR_ERR(mntroot);
3088 goto error_splat_super;
3089 }
3090 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3091 dput(mntroot);
3092 error = -ESTALE;
3093 goto error_splat_super;
3094 } 2924 }
3095 2925
3096 s->s_flags |= MS_ACTIVE; 2926 mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
3097 2927out:
3098 security_sb_clone_mnt_opts(data->sb, s); 2928 nfs_free_fhandle(mount_info.mntfh);
3099
3100 nfs_free_fhandle(mntfh);
3101 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3102 return mntroot; 2929 return mntroot;
3103
3104out_err_nosb:
3105 nfs_free_server(server);
3106out_err_noserver:
3107 nfs_free_fhandle(mntfh);
3108out_err_nofh:
3109 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3110 return ERR_PTR(error);
3111
3112error_splat_super:
3113 if (server && !s->s_root)
3114 bdi_unregister(&server->backing_dev_info);
3115error_splat_bdi:
3116 deactivate_locked_super(s);
3117 nfs_free_fhandle(mntfh);
3118 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3119 return ERR_PTR(error);
3120} 2930}
3121 2931
3122/* 2932/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c07462320f6b..e6fe3d69d14c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,20 +39,20 @@
39/* 39/*
40 * Local function declarations 40 * Local function declarations
41 */ 41 */
42static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
43 struct inode *inode, int ioflags);
44static void nfs_redirty_request(struct nfs_page *req); 42static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_write_full_ops;
47static const struct rpc_call_ops nfs_commit_ops; 44static const struct rpc_call_ops nfs_commit_ops;
45static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
46static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48 47
49static struct kmem_cache *nfs_wdata_cachep; 48static struct kmem_cache *nfs_wdata_cachep;
50static mempool_t *nfs_wdata_mempool; 49static mempool_t *nfs_wdata_mempool;
50static struct kmem_cache *nfs_cdata_cachep;
51static mempool_t *nfs_commit_mempool; 51static mempool_t *nfs_commit_mempool;
52 52
53struct nfs_write_data *nfs_commitdata_alloc(void) 53struct nfs_commit_data *nfs_commitdata_alloc(void)
54{ 54{
55 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 55 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
56 56
57 if (p) { 57 if (p) {
58 memset(p, 0, sizeof(*p)); 58 memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
62} 62}
63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
64 64
65void nfs_commit_free(struct nfs_write_data *p) 65void nfs_commit_free(struct nfs_commit_data *p)
66{ 66{
67 if (p && (p->pagevec != &p->page_array[0]))
68 kfree(p->pagevec);
69 mempool_free(p, nfs_commit_mempool); 67 mempool_free(p, nfs_commit_mempool);
70} 68}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 69EXPORT_SYMBOL_GPL(nfs_commit_free);
72 70
73struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 71struct nfs_write_header *nfs_writehdr_alloc(void)
74{ 72{
75 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 73 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
76 74
77 if (p) { 75 if (p) {
76 struct nfs_pgio_header *hdr = &p->header;
77
78 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
79 INIT_LIST_HEAD(&p->pages); 79 INIT_LIST_HEAD(&hdr->pages);
80 p->npages = pagecount; 80 INIT_LIST_HEAD(&hdr->rpc_list);
81 if (pagecount <= ARRAY_SIZE(p->page_array)) 81 spin_lock_init(&hdr->lock);
82 p->pagevec = p->page_array; 82 atomic_set(&hdr->refcnt, 0);
83 else {
84 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
85 if (!p->pagevec) {
86 mempool_free(p, nfs_wdata_mempool);
87 p = NULL;
88 }
89 }
90 } 83 }
91 return p; 84 return p;
92} 85}
93 86
94void nfs_writedata_free(struct nfs_write_data *p) 87static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
88 unsigned int pagecount)
89{
90 struct nfs_write_data *data, *prealloc;
91
92 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
93 if (prealloc->header == NULL)
94 data = prealloc;
95 else
96 data = kzalloc(sizeof(*data), GFP_KERNEL);
97 if (!data)
98 goto out;
99
100 if (nfs_pgarray_set(&data->pages, pagecount)) {
101 data->header = hdr;
102 atomic_inc(&hdr->refcnt);
103 } else {
104 if (data != prealloc)
105 kfree(data);
106 data = NULL;
107 }
108out:
109 return data;
110}
111
112void nfs_writehdr_free(struct nfs_pgio_header *hdr)
95{ 113{
96 if (p && (p->pagevec != &p->page_array[0])) 114 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
97 kfree(p->pagevec); 115 mempool_free(whdr, nfs_wdata_mempool);
98 mempool_free(p, nfs_wdata_mempool);
99} 116}
100 117
101void nfs_writedata_release(struct nfs_write_data *wdata) 118void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 119{
120 struct nfs_pgio_header *hdr = wdata->header;
121 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
122
103 put_nfs_open_context(wdata->args.context); 123 put_nfs_open_context(wdata->args.context);
104 nfs_writedata_free(wdata); 124 if (wdata->pages.pagevec != wdata->pages.page_array)
125 kfree(wdata->pages.pagevec);
126 if (wdata != &write_header->rpc_data)
127 kfree(wdata);
128 else
129 wdata->header = NULL;
130 if (atomic_dec_and_test(&hdr->refcnt))
131 hdr->completion_ops->completion(hdr);
105} 132}
106 133
107static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 134static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
203 struct inode *inode = page->mapping->host; 230 struct inode *inode = page->mapping->host;
204 struct nfs_server *nfss = NFS_SERVER(inode); 231 struct nfs_server *nfss = NFS_SERVER(inode);
205 232
206 page_cache_get(page);
207 if (atomic_long_inc_return(&nfss->writeback) > 233 if (atomic_long_inc_return(&nfss->writeback) >
208 NFS_CONGESTION_ON_THRESH) { 234 NFS_CONGESTION_ON_THRESH) {
209 set_bdi_congested(&nfss->backing_dev_info, 235 set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
219 struct nfs_server *nfss = NFS_SERVER(inode); 245 struct nfs_server *nfss = NFS_SERVER(inode);
220 246
221 end_page_writeback(page); 247 end_page_writeback(page);
222 page_cache_release(page);
223 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 248 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
224 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 249 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
225} 250}
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
235 req = nfs_page_find_request_locked(page); 260 req = nfs_page_find_request_locked(page);
236 if (req == NULL) 261 if (req == NULL)
237 break; 262 break;
238 if (nfs_lock_request_dontget(req)) 263 if (nfs_lock_request(req))
239 break; 264 break;
240 /* Note: If we hold the page lock, as is the case in nfs_writepage, 265 /* Note: If we hold the page lock, as is the case in nfs_writepage,
241 * then the call to nfs_lock_request_dontget() will always 266 * then the call to nfs_lock_request() will always
242 * succeed provided that someone hasn't already marked the 267 * succeed provided that someone hasn't already marked the
243 * request as dirty (in which case we don't care). 268 * request as dirty (in which case we don't care).
244 */ 269 */
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
310 struct nfs_pageio_descriptor pgio; 335 struct nfs_pageio_descriptor pgio;
311 int err; 336 int err;
312 337
313 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 338 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
339 &nfs_async_write_completion_ops);
314 err = nfs_do_writepage(page, wbc, &pgio); 340 err = nfs_do_writepage(page, wbc, &pgio);
315 nfs_pageio_complete(&pgio); 341 nfs_pageio_complete(&pgio);
316 if (err < 0) 342 if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353 379
354 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 380 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
355 381
356 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 382 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
383 &nfs_async_write_completion_ops);
357 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 384 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
358 nfs_pageio_complete(&pgio); 385 nfs_pageio_complete(&pgio);
359 386
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379 struct nfs_inode *nfsi = NFS_I(inode); 406 struct nfs_inode *nfsi = NFS_I(inode);
380 407
381 /* Lock the request! */ 408 /* Lock the request! */
382 nfs_lock_request_dontget(req); 409 nfs_lock_request(req);
383 410
384 spin_lock(&inode->i_lock); 411 spin_lock(&inode->i_lock);
385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 412 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
421/** 448/**
422 * nfs_request_add_commit_list - add request to a commit list 449 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page 450 * @req: pointer to a struct nfs_page
424 * @head: commit list head 451 * @dst: commit list head
452 * @cinfo: holds list lock and accounting info
425 * 453 *
426 * This sets the PG_CLEAN bit, updates the inode global count of 454 * This sets the PG_CLEAN bit, updates the cinfo count of
427 * number of outstanding requests requiring a commit as well as 455 * number of outstanding requests requiring a commit as well as
428 * the MM page stats. 456 * the MM page stats.
429 * 457 *
430 * The caller must _not_ hold the inode->i_lock, but must be 458 * The caller must _not_ hold the cinfo->lock, but must be
431 * holding the nfs_page lock. 459 * holding the nfs_page lock.
432 */ 460 */
433void 461void
434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) 462nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
463 struct nfs_commit_info *cinfo)
435{ 464{
436 struct inode *inode = req->wb_context->dentry->d_inode;
437
438 set_bit(PG_CLEAN, &(req)->wb_flags); 465 set_bit(PG_CLEAN, &(req)->wb_flags);
439 spin_lock(&inode->i_lock); 466 spin_lock(cinfo->lock);
440 nfs_list_add_request(req, head); 467 nfs_list_add_request(req, dst);
441 NFS_I(inode)->ncommit++; 468 cinfo->mds->ncommit++;
442 spin_unlock(&inode->i_lock); 469 spin_unlock(cinfo->lock);
443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 470 if (!cinfo->dreq) {
444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 471 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 472 inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
473 BDI_RECLAIMABLE);
474 __mark_inode_dirty(req->wb_context->dentry->d_inode,
475 I_DIRTY_DATASYNC);
476 }
446} 477}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 478EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
448 479
449/** 480/**
450 * nfs_request_remove_commit_list - Remove request from a commit list 481 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page 482 * @req: pointer to a nfs_page
483 * @cinfo: holds list lock and accounting info
452 * 484 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of 485 * This clears the PG_CLEAN bit, and updates the cinfo's count of
454 * number of outstanding requests requiring a commit 486 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats. 487 * It does not update the MM page stats.
456 * 488 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock. 489 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
458 */ 490 */
459void 491void
460nfs_request_remove_commit_list(struct nfs_page *req) 492nfs_request_remove_commit_list(struct nfs_page *req,
493 struct nfs_commit_info *cinfo)
461{ 494{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 495 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return; 496 return;
466 nfs_list_remove_request(req); 497 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--; 498 cinfo->mds->ncommit--;
468} 499}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 500EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470 501
502static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
503 struct inode *inode)
504{
505 cinfo->lock = &inode->i_lock;
506 cinfo->mds = &NFS_I(inode)->commit_info;
507 cinfo->ds = pnfs_get_ds_info(inode);
508 cinfo->dreq = NULL;
509 cinfo->completion_ops = &nfs_commit_completion_ops;
510}
511
512void nfs_init_cinfo(struct nfs_commit_info *cinfo,
513 struct inode *inode,
514 struct nfs_direct_req *dreq)
515{
516 if (dreq)
517 nfs_init_cinfo_from_dreq(cinfo, dreq);
518 else
519 nfs_init_cinfo_from_inode(cinfo, inode);
520}
521EXPORT_SYMBOL_GPL(nfs_init_cinfo);
471 522
472/* 523/*
473 * Add a request to the inode's commit list. 524 * Add a request to the inode's commit list.
474 */ 525 */
475static void 526void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 527nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
528 struct nfs_commit_info *cinfo)
477{ 529{
478 struct inode *inode = req->wb_context->dentry->d_inode; 530 if (pnfs_mark_request_commit(req, lseg, cinfo))
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return; 531 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); 532 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
483} 533}
484 534
485static void 535static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
494{ 544{
495 if (test_bit(PG_CLEAN, &req->wb_flags)) { 545 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode; 546 struct inode *inode = req->wb_context->dentry->d_inode;
547 struct nfs_commit_info cinfo;
497 548
498 if (!pnfs_clear_request_commit(req)) { 549 nfs_init_cinfo_from_inode(&cinfo, inode);
499 spin_lock(&inode->i_lock); 550 if (!pnfs_clear_request_commit(req, &cinfo)) {
500 nfs_request_remove_commit_list(req); 551 spin_lock(cinfo.lock);
501 spin_unlock(&inode->i_lock); 552 nfs_request_remove_commit_list(req, &cinfo);
553 spin_unlock(cinfo.lock);
502 } 554 }
503 nfs_clear_page_commit(req->wb_page); 555 nfs_clear_page_commit(req->wb_page);
504 } 556 }
@@ -508,28 +560,25 @@ static inline
508int nfs_write_need_commit(struct nfs_write_data *data) 560int nfs_write_need_commit(struct nfs_write_data *data)
509{ 561{
510 if (data->verf.committed == NFS_DATA_SYNC) 562 if (data->verf.committed == NFS_DATA_SYNC)
511 return data->lseg == NULL; 563 return data->header->lseg == NULL;
512 else 564 return data->verf.committed != NFS_FILE_SYNC;
513 return data->verf.committed != NFS_FILE_SYNC;
514} 565}
515 566
516static inline 567#else
517int nfs_reschedule_unstable_write(struct nfs_page *req, 568static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
518 struct nfs_write_data *data) 569 struct inode *inode)
519{ 570{
520 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
521 nfs_mark_request_commit(req, data->lseg);
522 return 1;
523 }
524 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
525 nfs_mark_request_dirty(req);
526 return 1;
527 }
528 return 0;
529} 571}
530#else 572
531static void 573void nfs_init_cinfo(struct nfs_commit_info *cinfo,
532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 574 struct inode *inode,
575 struct nfs_direct_req *dreq)
576{
577}
578
579void
580nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
581 struct nfs_commit_info *cinfo)
533{ 582{
534} 583}
535 584
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
544 return 0; 593 return 0;
545} 594}
546 595
547static inline 596#endif
548int nfs_reschedule_unstable_write(struct nfs_page *req, 597
549 struct nfs_write_data *data) 598static void nfs_write_completion(struct nfs_pgio_header *hdr)
550{ 599{
551 return 0; 600 struct nfs_commit_info cinfo;
601 unsigned long bytes = 0;
602
603 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
604 goto out;
605 nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
606 while (!list_empty(&hdr->pages)) {
607 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
608
609 bytes += req->wb_bytes;
610 nfs_list_remove_request(req);
611 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
612 (hdr->good_bytes < bytes)) {
613 nfs_set_pageerror(req->wb_page);
614 nfs_context_set_write_error(req->wb_context, hdr->error);
615 goto remove_req;
616 }
617 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
618 nfs_mark_request_dirty(req);
619 goto next;
620 }
621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
622 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
623 goto next;
624 }
625remove_req:
626 nfs_inode_remove_request(req);
627next:
628 nfs_unlock_request(req);
629 nfs_end_page_writeback(req->wb_page);
630 nfs_release_request(req);
631 }
632out:
633 hdr->release(hdr);
552} 634}
553#endif
554 635
555#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 636#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
556static int 637static unsigned long
557nfs_need_commit(struct nfs_inode *nfsi) 638nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
558{ 639{
559 return nfsi->ncommit > 0; 640 return cinfo->mds->ncommit;
560} 641}
561 642
562/* i_lock held by caller */ 643/* cinfo->lock held by caller */
563static int 644int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, 645nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
565 spinlock_t *lock) 646 struct nfs_commit_info *cinfo, int max)
566{ 647{
567 struct nfs_page *req, *tmp; 648 struct nfs_page *req, *tmp;
568 int ret = 0; 649 int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
570 list_for_each_entry_safe(req, tmp, src, wb_list) { 651 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req)) 652 if (!nfs_lock_request(req))
572 continue; 653 continue;
573 if (cond_resched_lock(lock)) 654 kref_get(&req->wb_kref);
655 if (cond_resched_lock(cinfo->lock))
574 list_safe_reset_next(req, tmp, wb_list); 656 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req); 657 nfs_request_remove_commit_list(req, cinfo);
576 nfs_list_add_request(req, dst); 658 nfs_list_add_request(req, dst);
577 ret++; 659 ret++;
578 if (ret == max) 660 if ((ret == max) && !cinfo->dreq)
579 break; 661 break;
580 } 662 }
581 return ret; 663 return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
584/* 666/*
585 * nfs_scan_commit - Scan an inode for commit requests 667 * nfs_scan_commit - Scan an inode for commit requests
586 * @inode: NFS inode to scan 668 * @inode: NFS inode to scan
587 * @dst: destination list 669 * @dst: mds destination list
670 * @cinfo: mds and ds lists of reqs ready to commit
588 * 671 *
589 * Moves requests from the inode's 'commit' request list. 672 * Moves requests from the inode's 'commit' request list.
590 * The requests are *not* checked to ensure that they form a contiguous set. 673 * The requests are *not* checked to ensure that they form a contiguous set.
591 */ 674 */
592static int 675int
593nfs_scan_commit(struct inode *inode, struct list_head *dst) 676nfs_scan_commit(struct inode *inode, struct list_head *dst,
677 struct nfs_commit_info *cinfo)
594{ 678{
595 struct nfs_inode *nfsi = NFS_I(inode);
596 int ret = 0; 679 int ret = 0;
597 680
598 spin_lock(&inode->i_lock); 681 spin_lock(cinfo->lock);
599 if (nfsi->ncommit > 0) { 682 if (cinfo->mds->ncommit > 0) {
600 const int max = INT_MAX; 683 const int max = INT_MAX;
601 684
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, 685 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
603 &inode->i_lock); 686 cinfo, max);
604 ret += pnfs_scan_commit_lists(inode, max - ret, 687 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
605 &inode->i_lock);
606 } 688 }
607 spin_unlock(&inode->i_lock); 689 spin_unlock(cinfo->lock);
608 return ret; 690 return ret;
609} 691}
610 692
611#else 693#else
612static inline int nfs_need_commit(struct nfs_inode *nfsi) 694static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
613{ 695{
614 return 0; 696 return 0;
615} 697}
616 698
617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) 699int nfs_scan_commit(struct inode *inode, struct list_head *dst,
700 struct nfs_commit_info *cinfo)
618{ 701{
619 return 0; 702 return 0;
620} 703}
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
659 || end < req->wb_offset) 742 || end < req->wb_offset)
660 goto out_flushme; 743 goto out_flushme;
661 744
662 if (nfs_lock_request_dontget(req)) 745 if (nfs_lock_request(req))
663 break; 746 break;
664 747
665 /* The request is locked, so wait and then retry */ 748 /* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
729 nfs_grow_file(page, offset, count); 812 nfs_grow_file(page, offset, count);
730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 813 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
731 nfs_mark_request_dirty(req); 814 nfs_mark_request_dirty(req);
732 nfs_unlock_request(req); 815 nfs_unlock_and_release_request(req);
733 return 0; 816 return 0;
734} 817}
735 818
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
766 * the PageUptodate() flag. In this case, we will need to turn off 849 * the PageUptodate() flag. In this case, we will need to turn off
767 * write optimisations that depend on the page contents being correct. 850 * write optimisations that depend on the page contents being correct.
768 */ 851 */
769static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 852static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
770{ 853{
771 return PageUptodate(page) && 854 if (nfs_have_delegated_attributes(inode))
772 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 855 goto out;
856 if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
857 return false;
858out:
859 return PageUptodate(page) != 0;
773} 860}
774 861
775/* 862/*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
815 return status; 902 return status;
816} 903}
817 904
818static void nfs_writepage_release(struct nfs_page *req,
819 struct nfs_write_data *data)
820{
821 struct page *page = req->wb_page;
822
823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
824 nfs_inode_remove_request(req);
825 nfs_unlock_request(req);
826 nfs_end_page_writeback(page);
827}
828
829static int flush_task_priority(int how) 905static int flush_task_priority(int how)
830{ 906{
831 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 907 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
837 return RPC_PRIORITY_NORMAL; 913 return RPC_PRIORITY_NORMAL;
838} 914}
839 915
840int nfs_initiate_write(struct nfs_write_data *data, 916int nfs_initiate_write(struct rpc_clnt *clnt,
841 struct rpc_clnt *clnt, 917 struct nfs_write_data *data,
842 const struct rpc_call_ops *call_ops, 918 const struct rpc_call_ops *call_ops,
843 int how) 919 int how, int flags)
844{ 920{
845 struct inode *inode = data->inode; 921 struct inode *inode = data->header->inode;
846 int priority = flush_task_priority(how); 922 int priority = flush_task_priority(how);
847 struct rpc_task *task; 923 struct rpc_task *task;
848 struct rpc_message msg = { 924 struct rpc_message msg = {
849 .rpc_argp = &data->args, 925 .rpc_argp = &data->args,
850 .rpc_resp = &data->res, 926 .rpc_resp = &data->res,
851 .rpc_cred = data->cred, 927 .rpc_cred = data->header->cred,
852 }; 928 };
853 struct rpc_task_setup task_setup_data = { 929 struct rpc_task_setup task_setup_data = {
854 .rpc_client = clnt, 930 .rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
857 .callback_ops = call_ops, 933 .callback_ops = call_ops,
858 .callback_data = data, 934 .callback_data = data,
859 .workqueue = nfsiod_workqueue, 935 .workqueue = nfsiod_workqueue,
860 .flags = RPC_TASK_ASYNC, 936 .flags = RPC_TASK_ASYNC | flags,
861 .priority = priority, 937 .priority = priority,
862 }; 938 };
863 int ret = 0; 939 int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
892/* 968/*
893 * Set up the argument/result storage required for the RPC call. 969 * Set up the argument/result storage required for the RPC call.
894 */ 970 */
895static void nfs_write_rpcsetup(struct nfs_page *req, 971static void nfs_write_rpcsetup(struct nfs_write_data *data,
896 struct nfs_write_data *data,
897 unsigned int count, unsigned int offset, 972 unsigned int count, unsigned int offset,
898 int how) 973 int how, struct nfs_commit_info *cinfo)
899{ 974{
900 struct inode *inode = req->wb_context->dentry->d_inode; 975 struct nfs_page *req = data->header->req;
901 976
902 /* Set up the RPC argument and reply structs 977 /* Set up the RPC argument and reply structs
903 * NB: take care not to mess about with data->commit et al. */ 978 * NB: take care not to mess about with data->commit et al. */
904 979
905 data->req = req; 980 data->args.fh = NFS_FH(data->header->inode);
906 data->inode = inode = req->wb_context->dentry->d_inode;
907 data->cred = req->wb_context->cred;
908
909 data->args.fh = NFS_FH(inode);
910 data->args.offset = req_offset(req) + offset; 981 data->args.offset = req_offset(req) + offset;
911 /* pnfs_set_layoutcommit needs this */ 982 /* pnfs_set_layoutcommit needs this */
912 data->mds_offset = data->args.offset; 983 data->mds_offset = data->args.offset;
913 data->args.pgbase = req->wb_pgbase + offset; 984 data->args.pgbase = req->wb_pgbase + offset;
914 data->args.pages = data->pagevec; 985 data->args.pages = data->pages.pagevec;
915 data->args.count = count; 986 data->args.count = count;
916 data->args.context = get_nfs_open_context(req->wb_context); 987 data->args.context = get_nfs_open_context(req->wb_context);
917 data->args.lock_context = req->wb_lock_context; 988 data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
920 case 0: 991 case 0:
921 break; 992 break;
922 case FLUSH_COND_STABLE: 993 case FLUSH_COND_STABLE:
923 if (nfs_need_commit(NFS_I(inode))) 994 if (nfs_reqs_to_commit(cinfo))
924 break; 995 break;
925 default: 996 default:
926 data->args.stable = NFS_FILE_SYNC; 997 data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
936 const struct rpc_call_ops *call_ops, 1007 const struct rpc_call_ops *call_ops,
937 int how) 1008 int how)
938{ 1009{
939 struct inode *inode = data->args.context->dentry->d_inode; 1010 struct inode *inode = data->header->inode;
940 1011
941 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 1012 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
942} 1013}
943 1014
944static int nfs_do_multiple_writes(struct list_head *head, 1015static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
951 while (!list_empty(head)) { 1022 while (!list_empty(head)) {
952 int ret2; 1023 int ret2;
953 1024
954 data = list_entry(head->next, struct nfs_write_data, list); 1025 data = list_first_entry(head, struct nfs_write_data, list);
955 list_del_init(&data->list); 1026 list_del_init(&data->list);
956 1027
957 ret2 = nfs_do_write(data, call_ops, how); 1028 ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
967 */ 1038 */
968static void nfs_redirty_request(struct nfs_page *req) 1039static void nfs_redirty_request(struct nfs_page *req)
969{ 1040{
970 struct page *page = req->wb_page;
971
972 nfs_mark_request_dirty(req); 1041 nfs_mark_request_dirty(req);
973 nfs_unlock_request(req); 1042 nfs_unlock_request(req);
974 nfs_end_page_writeback(page); 1043 nfs_end_page_writeback(req->wb_page);
1044 nfs_release_request(req);
1045}
1046
1047static void nfs_async_write_error(struct list_head *head)
1048{
1049 struct nfs_page *req;
1050
1051 while (!list_empty(head)) {
1052 req = nfs_list_entry(head->next);
1053 nfs_list_remove_request(req);
1054 nfs_redirty_request(req);
1055 }
1056}
1057
1058static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1059 .error_cleanup = nfs_async_write_error,
1060 .completion = nfs_write_completion,
1061};
1062
1063static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1064 struct nfs_pgio_header *hdr)
1065{
1066 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1067 while (!list_empty(&hdr->rpc_list)) {
1068 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1069 struct nfs_write_data, list);
1070 list_del(&data->list);
1071 nfs_writedata_release(data);
1072 }
1073 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
975} 1074}
976 1075
977/* 1076/*
978 * Generate multiple small requests to write out a single 1077 * Generate multiple small requests to write out a single
979 * contiguous dirty area on one page. 1078 * contiguous dirty area on one page.
980 */ 1079 */
981static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 1080static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1081 struct nfs_pgio_header *hdr)
982{ 1082{
983 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 1083 struct nfs_page *req = hdr->req;
984 struct page *page = req->wb_page; 1084 struct page *page = req->wb_page;
985 struct nfs_write_data *data; 1085 struct nfs_write_data *data;
986 size_t wsize = desc->pg_bsize, nbytes; 1086 size_t wsize = desc->pg_bsize, nbytes;
987 unsigned int offset; 1087 unsigned int offset;
988 int requests = 0; 1088 int requests = 0;
989 int ret = 0; 1089 struct nfs_commit_info cinfo;
990 1090
991 nfs_list_remove_request(req); 1091 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
992 1092
993 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1093 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
994 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || 1094 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
995 desc->pg_count > wsize)) 1095 desc->pg_count > wsize))
996 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1096 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
997 1097
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
1001 do { 1101 do {
1002 size_t len = min(nbytes, wsize); 1102 size_t len = min(nbytes, wsize);
1003 1103
1004 data = nfs_writedata_alloc(1); 1104 data = nfs_writedata_alloc(hdr, 1);
1005 if (!data) 1105 if (!data) {
1006 goto out_bad; 1106 nfs_flush_error(desc, hdr);
1007 data->pagevec[0] = page; 1107 return -ENOMEM;
1008 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); 1108 }
1009 list_add(&data->list, res); 1109 data->pages.pagevec[0] = page;
1110 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1111 list_add(&data->list, &hdr->rpc_list);
1010 requests++; 1112 requests++;
1011 nbytes -= len; 1113 nbytes -= len;
1012 offset += len; 1114 offset += len;
1013 } while (nbytes != 0); 1115 } while (nbytes != 0);
1014 atomic_set(&req->wb_complete, requests); 1116 nfs_list_remove_request(req);
1015 desc->pg_rpc_callops = &nfs_write_partial_ops; 1117 nfs_list_add_request(req, &hdr->pages);
1016 return ret; 1118 desc->pg_rpc_callops = &nfs_write_common_ops;
1017 1119 return 0;
1018out_bad:
1019 while (!list_empty(res)) {
1020 data = list_entry(res->next, struct nfs_write_data, list);
1021 list_del(&data->list);
1022 nfs_writedata_release(data);
1023 }
1024 nfs_redirty_request(req);
1025 return -ENOMEM;
1026} 1120}
1027 1121
1028/* 1122/*
@@ -1033,62 +1127,71 @@ out_bad:
1033 * This is the case if nfs_updatepage detects a conflicting request 1127 * This is the case if nfs_updatepage detects a conflicting request
1034 * that has been written but not committed. 1128 * that has been written but not committed.
1035 */ 1129 */
1036static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 1130static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1131 struct nfs_pgio_header *hdr)
1037{ 1132{
1038 struct nfs_page *req; 1133 struct nfs_page *req;
1039 struct page **pages; 1134 struct page **pages;
1040 struct nfs_write_data *data; 1135 struct nfs_write_data *data;
1041 struct list_head *head = &desc->pg_list; 1136 struct list_head *head = &desc->pg_list;
1042 int ret = 0; 1137 struct nfs_commit_info cinfo;
1043 1138
1044 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 1139 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1045 desc->pg_count)); 1140 desc->pg_count));
1046 if (!data) { 1141 if (!data) {
1047 while (!list_empty(head)) { 1142 nfs_flush_error(desc, hdr);
1048 req = nfs_list_entry(head->next); 1143 return -ENOMEM;
1049 nfs_list_remove_request(req);
1050 nfs_redirty_request(req);
1051 }
1052 ret = -ENOMEM;
1053 goto out;
1054 } 1144 }
1055 pages = data->pagevec; 1145
1146 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1147 pages = data->pages.pagevec;
1056 while (!list_empty(head)) { 1148 while (!list_empty(head)) {
1057 req = nfs_list_entry(head->next); 1149 req = nfs_list_entry(head->next);
1058 nfs_list_remove_request(req); 1150 nfs_list_remove_request(req);
1059 nfs_list_add_request(req, &data->pages); 1151 nfs_list_add_request(req, &hdr->pages);
1060 *pages++ = req->wb_page; 1152 *pages++ = req->wb_page;
1061 } 1153 }
1062 req = nfs_list_entry(data->pages.next);
1063 1154
1064 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1155 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1065 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1156 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1066 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1157 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1067 1158
1068 /* Set up the argument struct */ 1159 /* Set up the argument struct */
1069 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1160 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1070 list_add(&data->list, res); 1161 list_add(&data->list, &hdr->rpc_list);
1071 desc->pg_rpc_callops = &nfs_write_full_ops; 1162 desc->pg_rpc_callops = &nfs_write_common_ops;
1072out: 1163 return 0;
1073 return ret;
1074} 1164}
1075 1165
1076int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1166int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1167 struct nfs_pgio_header *hdr)
1077{ 1168{
1078 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1169 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1079 return nfs_flush_multi(desc, head); 1170 return nfs_flush_multi(desc, hdr);
1080 return nfs_flush_one(desc, head); 1171 return nfs_flush_one(desc, hdr);
1081} 1172}
1082 1173
1083static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1174static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1084{ 1175{
1085 LIST_HEAD(head); 1176 struct nfs_write_header *whdr;
1177 struct nfs_pgio_header *hdr;
1086 int ret; 1178 int ret;
1087 1179
1088 ret = nfs_generic_flush(desc, &head); 1180 whdr = nfs_writehdr_alloc();
1181 if (!whdr) {
1182 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1183 return -ENOMEM;
1184 }
1185 hdr = &whdr->header;
1186 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1187 atomic_inc(&hdr->refcnt);
1188 ret = nfs_generic_flush(desc, hdr);
1089 if (ret == 0) 1189 if (ret == 0)
1090 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1190 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1091 desc->pg_ioflags); 1191 desc->pg_rpc_callops,
1192 desc->pg_ioflags);
1193 if (atomic_dec_and_test(&hdr->refcnt))
1194 hdr->completion_ops->completion(hdr);
1092 return ret; 1195 return ret;
1093} 1196}
1094 1197
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1098}; 1201};
1099 1202
1100void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1203void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1101 struct inode *inode, int ioflags) 1204 struct inode *inode, int ioflags,
1205 const struct nfs_pgio_completion_ops *compl_ops)
1102{ 1206{
1103 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1207 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
1104 NFS_SERVER(inode)->wsize, ioflags); 1208 NFS_SERVER(inode)->wsize, ioflags);
1105} 1209}
1106 1210
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1111} 1215}
1112EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1216EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1113 1217
1114static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1218void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1115 struct inode *inode, int ioflags) 1219 struct inode *inode, int ioflags,
1220 const struct nfs_pgio_completion_ops *compl_ops)
1116{ 1221{
1117 if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1222 if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
1118 nfs_pageio_init_write_mds(pgio, inode, ioflags); 1223 nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
1119} 1224}
1120 1225
1121/* 1226void nfs_write_prepare(struct rpc_task *task, void *calldata)
1122 * Handle a write reply that flushed part of a page.
1123 */
1124static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1125{ 1227{
1126 struct nfs_write_data *data = calldata; 1228 struct nfs_write_data *data = calldata;
1127 1229 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1128 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1129 task->tk_pid,
1130 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1131 (long long)
1132 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1133 data->req->wb_bytes, (long long)req_offset(data->req));
1134
1135 nfs_writeback_done(task, data);
1136} 1230}
1137 1231
1138static void nfs_writeback_release_partial(void *calldata) 1232void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1139{ 1233{
1140 struct nfs_write_data *data = calldata; 1234 struct nfs_commit_data *data = calldata;
1141 struct nfs_page *req = data->req;
1142 struct page *page = req->wb_page;
1143 int status = data->task.tk_status;
1144 1235
1145 if (status < 0) { 1236 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1146 nfs_set_pageerror(page);
1147 nfs_context_set_write_error(req->wb_context, status);
1148 dprintk(", error = %d\n", status);
1149 goto out;
1150 }
1151
1152 if (nfs_write_need_commit(data)) {
1153 struct inode *inode = page->mapping->host;
1154
1155 spin_lock(&inode->i_lock);
1156 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1157 /* Do nothing we need to resend the writes */
1158 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1159 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1160 dprintk(" defer commit\n");
1161 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1162 set_bit(PG_NEED_RESCHED, &req->wb_flags);
1163 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1164 dprintk(" server reboot detected\n");
1165 }
1166 spin_unlock(&inode->i_lock);
1167 } else
1168 dprintk(" OK\n");
1169
1170out:
1171 if (atomic_dec_and_test(&req->wb_complete))
1172 nfs_writepage_release(req, data);
1173 nfs_writedata_release(calldata);
1174} 1237}
1175 1238
1176void nfs_write_prepare(struct rpc_task *task, void *calldata)
1177{
1178 struct nfs_write_data *data = calldata;
1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1180}
1181
1182static const struct rpc_call_ops nfs_write_partial_ops = {
1183 .rpc_call_prepare = nfs_write_prepare,
1184 .rpc_call_done = nfs_writeback_done_partial,
1185 .rpc_release = nfs_writeback_release_partial,
1186};
1187
1188/* 1239/*
1189 * Handle a write reply that flushes a whole page. 1240 * Handle a write reply that flushes a whole page.
1190 * 1241 *
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
1192 * writebacks since the page->count is kept > 1 for as long 1243 * writebacks since the page->count is kept > 1 for as long
1193 * as the page has a write request pending. 1244 * as the page has a write request pending.
1194 */ 1245 */
1195static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1246static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1196{ 1247{
1197 struct nfs_write_data *data = calldata; 1248 struct nfs_write_data *data = calldata;
1198 1249
1199 nfs_writeback_done(task, data); 1250 nfs_writeback_done(task, data);
1200} 1251}
1201 1252
1202static void nfs_writeback_release_full(void *calldata) 1253static void nfs_writeback_release_common(void *calldata)
1203{ 1254{
1204 struct nfs_write_data *data = calldata; 1255 struct nfs_write_data *data = calldata;
1256 struct nfs_pgio_header *hdr = data->header;
1205 int status = data->task.tk_status; 1257 int status = data->task.tk_status;
1258 struct nfs_page *req = hdr->req;
1206 1259
1207 /* Update attributes as result of writeback. */ 1260 if ((status >= 0) && nfs_write_need_commit(data)) {
1208 while (!list_empty(&data->pages)) { 1261 spin_lock(&hdr->lock);
1209 struct nfs_page *req = nfs_list_entry(data->pages.next); 1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1210 struct page *page = req->wb_page; 1263 ; /* Do nothing */
1211 1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1212 nfs_list_remove_request(req);
1213
1214 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1215 data->task.tk_pid,
1216 req->wb_context->dentry->d_inode->i_sb->s_id,
1217 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1218 req->wb_bytes,
1219 (long long)req_offset(req));
1220
1221 if (status < 0) {
1222 nfs_set_pageerror(page);
1223 nfs_context_set_write_error(req->wb_context, status);
1224 dprintk(", error = %d\n", status);
1225 goto remove_request;
1226 }
1227
1228 if (nfs_write_need_commit(data)) {
1229 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1230 nfs_mark_request_commit(req, data->lseg); 1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
1231 dprintk(" marked for commit\n"); 1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1232 goto next; 1268 spin_unlock(&hdr->lock);
1233 }
1234 dprintk(" OK\n");
1235remove_request:
1236 nfs_inode_remove_request(req);
1237 next:
1238 nfs_unlock_request(req);
1239 nfs_end_page_writeback(page);
1240 } 1269 }
1241 nfs_writedata_release(calldata); 1270 nfs_writedata_release(data);
1242} 1271}
1243 1272
1244static const struct rpc_call_ops nfs_write_full_ops = { 1273static const struct rpc_call_ops nfs_write_common_ops = {
1245 .rpc_call_prepare = nfs_write_prepare, 1274 .rpc_call_prepare = nfs_write_prepare,
1246 .rpc_call_done = nfs_writeback_done_full, 1275 .rpc_call_done = nfs_writeback_done_common,
1247 .rpc_release = nfs_writeback_release_full, 1276 .rpc_release = nfs_writeback_release_common,
1248}; 1277};
1249 1278
1250 1279
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1255{ 1284{
1256 struct nfs_writeargs *argp = &data->args; 1285 struct nfs_writeargs *argp = &data->args;
1257 struct nfs_writeres *resp = &data->res; 1286 struct nfs_writeres *resp = &data->res;
1287 struct inode *inode = data->header->inode;
1258 int status; 1288 int status;
1259 1289
1260 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1290 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1267 * another writer had changed the file, but some applications 1297 * another writer had changed the file, but some applications
1268 * depend on tighter cache coherency when writing. 1298 * depend on tighter cache coherency when writing.
1269 */ 1299 */
1270 status = NFS_PROTO(data->inode)->write_done(task, data); 1300 status = NFS_PROTO(inode)->write_done(task, data);
1271 if (status != 0) 1301 if (status != 0)
1272 return; 1302 return;
1273 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1303 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1274 1304
1275#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1305#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1276 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1306 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1288 if (time_before(complain, jiffies)) { 1318 if (time_before(complain, jiffies)) {
1289 dprintk("NFS: faulty NFS server %s:" 1319 dprintk("NFS: faulty NFS server %s:"
1290 " (committed = %d) != (stable = %d)\n", 1320 " (committed = %d) != (stable = %d)\n",
1291 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1321 NFS_SERVER(inode)->nfs_client->cl_hostname,
1292 resp->verf->committed, argp->stable); 1322 resp->verf->committed, argp->stable);
1293 complain = jiffies + 300 * HZ; 1323 complain = jiffies + 300 * HZ;
1294 } 1324 }
1295 } 1325 }
1296#endif 1326#endif
1297 /* Is this a short write? */ 1327 if (task->tk_status < 0)
1298 if (task->tk_status >= 0 && resp->count < argp->count) { 1328 nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
1329 else if (resp->count < argp->count) {
1299 static unsigned long complain; 1330 static unsigned long complain;
1300 1331
1301 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1332 /* This a short write! */
1333 nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
1302 1334
1303 /* Has the server at least made some progress? */ 1335 /* Has the server at least made some progress? */
1304 if (resp->count != 0) { 1336 if (resp->count == 0) {
1305 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1337 if (time_before(complain, jiffies)) {
1306 if (resp->verf->committed != NFS_UNSTABLE) { 1338 printk(KERN_WARNING
1307 /* Resend from where the server left off */ 1339 "NFS: Server wrote zero bytes, expected %u.\n",
1308 data->mds_offset += resp->count; 1340 argp->count);
1309 argp->offset += resp->count; 1341 complain = jiffies + 300 * HZ;
1310 argp->pgbase += resp->count;
1311 argp->count -= resp->count;
1312 } else {
1313 /* Resend as a stable write in order to avoid
1314 * headaches in the case of a server crash.
1315 */
1316 argp->stable = NFS_FILE_SYNC;
1317 } 1342 }
1318 rpc_restart_call_prepare(task); 1343 nfs_set_pgio_error(data->header, -EIO, argp->offset);
1344 task->tk_status = -EIO;
1319 return; 1345 return;
1320 } 1346 }
1321 if (time_before(complain, jiffies)) { 1347 /* Was this an NFSv2 write or an NFSv3 stable write? */
1322 printk(KERN_WARNING 1348 if (resp->verf->committed != NFS_UNSTABLE) {
1323 "NFS: Server wrote zero bytes, expected %u.\n", 1349 /* Resend from where the server left off */
1324 argp->count); 1350 data->mds_offset += resp->count;
1325 complain = jiffies + 300 * HZ; 1351 argp->offset += resp->count;
1352 argp->pgbase += resp->count;
1353 argp->count -= resp->count;
1354 } else {
1355 /* Resend as a stable write in order to avoid
1356 * headaches in the case of a server crash.
1357 */
1358 argp->stable = NFS_FILE_SYNC;
1326 } 1359 }
1327 /* Can't do anything about it except throw an error. */ 1360 rpc_restart_call_prepare(task);
1328 task->tk_status = -EIO;
1329 } 1361 }
1330 return;
1331} 1362}
1332 1363
1333 1364
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1347 return (ret < 0) ? ret : 1; 1378 return (ret < 0) ? ret : 1;
1348} 1379}
1349 1380
1350void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1381static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1351{ 1382{
1352 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1383 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1353 smp_mb__after_clear_bit(); 1384 smp_mb__after_clear_bit();
1354 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1385 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1355} 1386}
1356EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1357 1387
1358void nfs_commitdata_release(void *data) 1388void nfs_commitdata_release(struct nfs_commit_data *data)
1359{ 1389{
1360 struct nfs_write_data *wdata = data; 1390 put_nfs_open_context(data->context);
1361 1391 nfs_commit_free(data);
1362 put_nfs_open_context(wdata->args.context);
1363 nfs_commit_free(wdata);
1364} 1392}
1365EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1393EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1366 1394
1367int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, 1395int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1368 const struct rpc_call_ops *call_ops, 1396 const struct rpc_call_ops *call_ops,
1369 int how) 1397 int how, int flags)
1370{ 1398{
1371 struct rpc_task *task; 1399 struct rpc_task *task;
1372 int priority = flush_task_priority(how); 1400 int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1382 .callback_ops = call_ops, 1410 .callback_ops = call_ops,
1383 .callback_data = data, 1411 .callback_data = data,
1384 .workqueue = nfsiod_workqueue, 1412 .workqueue = nfsiod_workqueue,
1385 .flags = RPC_TASK_ASYNC, 1413 .flags = RPC_TASK_ASYNC | flags,
1386 .priority = priority, 1414 .priority = priority,
1387 }; 1415 };
1388 /* Set up the initial task struct. */ 1416 /* Set up the initial task struct. */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1403/* 1431/*
1404 * Set up the argument/result storage required for the RPC call. 1432 * Set up the argument/result storage required for the RPC call.
1405 */ 1433 */
1406void nfs_init_commit(struct nfs_write_data *data, 1434void nfs_init_commit(struct nfs_commit_data *data,
1407 struct list_head *head, 1435 struct list_head *head,
1408 struct pnfs_layout_segment *lseg) 1436 struct pnfs_layout_segment *lseg,
1437 struct nfs_commit_info *cinfo)
1409{ 1438{
1410 struct nfs_page *first = nfs_list_entry(head->next); 1439 struct nfs_page *first = nfs_list_entry(head->next);
1411 struct inode *inode = first->wb_context->dentry->d_inode; 1440 struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
1419 data->cred = first->wb_context->cred; 1448 data->cred = first->wb_context->cred;
1420 data->lseg = lseg; /* reference transferred */ 1449 data->lseg = lseg; /* reference transferred */
1421 data->mds_ops = &nfs_commit_ops; 1450 data->mds_ops = &nfs_commit_ops;
1451 data->completion_ops = cinfo->completion_ops;
1452 data->dreq = cinfo->dreq;
1422 1453
1423 data->args.fh = NFS_FH(data->inode); 1454 data->args.fh = NFS_FH(data->inode);
1424 /* Note: we always request a commit of the entire inode */ 1455 /* Note: we always request a commit of the entire inode */
1425 data->args.offset = 0; 1456 data->args.offset = 0;
1426 data->args.count = 0; 1457 data->args.count = 0;
1427 data->args.context = get_nfs_open_context(first->wb_context); 1458 data->context = get_nfs_open_context(first->wb_context);
1428 data->res.count = 0;
1429 data->res.fattr = &data->fattr; 1459 data->res.fattr = &data->fattr;
1430 data->res.verf = &data->verf; 1460 data->res.verf = &data->verf;
1431 nfs_fattr_init(&data->fattr); 1461 nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
1433EXPORT_SYMBOL_GPL(nfs_init_commit); 1463EXPORT_SYMBOL_GPL(nfs_init_commit);
1434 1464
1435void nfs_retry_commit(struct list_head *page_list, 1465void nfs_retry_commit(struct list_head *page_list,
1436 struct pnfs_layout_segment *lseg) 1466 struct pnfs_layout_segment *lseg,
1467 struct nfs_commit_info *cinfo)
1437{ 1468{
1438 struct nfs_page *req; 1469 struct nfs_page *req;
1439 1470
1440 while (!list_empty(page_list)) { 1471 while (!list_empty(page_list)) {
1441 req = nfs_list_entry(page_list->next); 1472 req = nfs_list_entry(page_list->next);
1442 nfs_list_remove_request(req); 1473 nfs_list_remove_request(req);
1443 nfs_mark_request_commit(req, lseg); 1474 nfs_mark_request_commit(req, lseg, cinfo);
1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1475 if (!cinfo->dreq) {
1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1476 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1446 BDI_RECLAIMABLE); 1477 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1447 nfs_unlock_request(req); 1478 BDI_RECLAIMABLE);
1479 }
1480 nfs_unlock_and_release_request(req);
1448 } 1481 }
1449} 1482}
1450EXPORT_SYMBOL_GPL(nfs_retry_commit); 1483EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
1453 * Commit dirty pages 1486 * Commit dirty pages
1454 */ 1487 */
1455static int 1488static int
1456nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1489nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1490 struct nfs_commit_info *cinfo)
1457{ 1491{
1458 struct nfs_write_data *data; 1492 struct nfs_commit_data *data;
1459 1493
1460 data = nfs_commitdata_alloc(); 1494 data = nfs_commitdata_alloc();
1461 1495
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1463 goto out_bad; 1497 goto out_bad;
1464 1498
1465 /* Set up the argument struct */ 1499 /* Set up the argument struct */
1466 nfs_init_commit(data, head, NULL); 1500 nfs_init_commit(data, head, NULL, cinfo);
1467 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); 1501 atomic_inc(&cinfo->mds->rpcs_out);
1502 return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
1503 how, 0);
1468 out_bad: 1504 out_bad:
1469 nfs_retry_commit(head, NULL); 1505 nfs_retry_commit(head, NULL, cinfo);
1470 nfs_commit_clear_lock(NFS_I(inode)); 1506 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1471 return -ENOMEM; 1507 return -ENOMEM;
1472} 1508}
1473 1509
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1476 */ 1512 */
1477static void nfs_commit_done(struct rpc_task *task, void *calldata) 1513static void nfs_commit_done(struct rpc_task *task, void *calldata)
1478{ 1514{
1479 struct nfs_write_data *data = calldata; 1515 struct nfs_commit_data *data = calldata;
1480 1516
1481 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1517 dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1482 task->tk_pid, task->tk_status); 1518 task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1485 NFS_PROTO(data->inode)->commit_done(task, data); 1521 NFS_PROTO(data->inode)->commit_done(task, data);
1486} 1522}
1487 1523
1488void nfs_commit_release_pages(struct nfs_write_data *data) 1524static void nfs_commit_release_pages(struct nfs_commit_data *data)
1489{ 1525{
1490 struct nfs_page *req; 1526 struct nfs_page *req;
1491 int status = data->task.tk_status; 1527 int status = data->task.tk_status;
1528 struct nfs_commit_info cinfo;
1492 1529
1493 while (!list_empty(&data->pages)) { 1530 while (!list_empty(&data->pages)) {
1494 req = nfs_list_entry(data->pages.next); 1531 req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1519 dprintk(" mismatch\n"); 1556 dprintk(" mismatch\n");
1520 nfs_mark_request_dirty(req); 1557 nfs_mark_request_dirty(req);
1521 next: 1558 next:
1522 nfs_unlock_request(req); 1559 nfs_unlock_and_release_request(req);
1523 } 1560 }
1561 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1562 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
1563 nfs_commit_clear_lock(NFS_I(data->inode));
1524} 1564}
1525EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1526 1565
1527static void nfs_commit_release(void *calldata) 1566static void nfs_commit_release(void *calldata)
1528{ 1567{
1529 struct nfs_write_data *data = calldata; 1568 struct nfs_commit_data *data = calldata;
1530 1569
1531 nfs_commit_release_pages(data); 1570 data->completion_ops->completion(data);
1532 nfs_commit_clear_lock(NFS_I(data->inode));
1533 nfs_commitdata_release(calldata); 1571 nfs_commitdata_release(calldata);
1534} 1572}
1535 1573
1536static const struct rpc_call_ops nfs_commit_ops = { 1574static const struct rpc_call_ops nfs_commit_ops = {
1537 .rpc_call_prepare = nfs_write_prepare, 1575 .rpc_call_prepare = nfs_commit_prepare,
1538 .rpc_call_done = nfs_commit_done, 1576 .rpc_call_done = nfs_commit_done,
1539 .rpc_release = nfs_commit_release, 1577 .rpc_release = nfs_commit_release,
1540}; 1578};
1541 1579
1580static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1581 .completion = nfs_commit_release_pages,
1582 .error_cleanup = nfs_commit_clear_lock,
1583};
1584
1585int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1586 int how, struct nfs_commit_info *cinfo)
1587{
1588 int status;
1589
1590 status = pnfs_commit_list(inode, head, how, cinfo);
1591 if (status == PNFS_NOT_ATTEMPTED)
1592 status = nfs_commit_list(inode, head, how, cinfo);
1593 return status;
1594}
1595
1542int nfs_commit_inode(struct inode *inode, int how) 1596int nfs_commit_inode(struct inode *inode, int how)
1543{ 1597{
1544 LIST_HEAD(head); 1598 LIST_HEAD(head);
1599 struct nfs_commit_info cinfo;
1545 int may_wait = how & FLUSH_SYNC; 1600 int may_wait = how & FLUSH_SYNC;
1546 int res; 1601 int res;
1547 1602
1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1603 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1549 if (res <= 0) 1604 if (res <= 0)
1550 goto out_mark_dirty; 1605 goto out_mark_dirty;
1551 res = nfs_scan_commit(inode, &head); 1606 nfs_init_cinfo_from_inode(&cinfo, inode);
1607 res = nfs_scan_commit(inode, &head, &cinfo);
1552 if (res) { 1608 if (res) {
1553 int error; 1609 int error;
1554 1610
1555 error = pnfs_commit_list(inode, &head, how); 1611 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1556 if (error == PNFS_NOT_ATTEMPTED)
1557 error = nfs_commit_list(inode, &head, how);
1558 if (error < 0) 1612 if (error < 0)
1559 return error; 1613 return error;
1560 if (!may_wait) 1614 if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1585 int ret = 0; 1639 int ret = 0;
1586 1640
1587 /* no commits means nothing needs to be done */ 1641 /* no commits means nothing needs to be done */
1588 if (!nfsi->ncommit) 1642 if (!nfsi->commit_info.ncommit)
1589 return ret; 1643 return ret;
1590 1644
1591 if (wbc->sync_mode == WB_SYNC_NONE) { 1645 if (wbc->sync_mode == WB_SYNC_NONE) {
1592 /* Don't commit yet if this is a non-blocking flush and there 1646 /* Don't commit yet if this is a non-blocking flush and there
1593 * are a lot of outstanding writes for this mapping. 1647 * are a lot of outstanding writes for this mapping.
1594 */ 1648 */
1595 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1649 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1596 goto out_mark_dirty; 1650 goto out_mark_dirty;
1597 1651
1598 /* don't wait for the COMMIT response */ 1652 /* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1665 req = nfs_page_find_request(page); 1719 req = nfs_page_find_request(page);
1666 if (req == NULL) 1720 if (req == NULL)
1667 break; 1721 break;
1668 if (nfs_lock_request_dontget(req)) { 1722 if (nfs_lock_request(req)) {
1669 nfs_clear_request_commit(req); 1723 nfs_clear_request_commit(req);
1670 nfs_inode_remove_request(req); 1724 nfs_inode_remove_request(req);
1671 /* 1725 /*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1673 * page as being dirty 1727 * page as being dirty
1674 */ 1728 */
1675 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1729 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1676 nfs_unlock_request(req); 1730 nfs_unlock_and_release_request(req);
1677 break; 1731 break;
1678 } 1732 }
1679 ret = nfs_wait_on_request(req); 1733 ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1742int __init nfs_init_writepagecache(void) 1796int __init nfs_init_writepagecache(void)
1743{ 1797{
1744 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1798 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1745 sizeof(struct nfs_write_data), 1799 sizeof(struct nfs_write_header),
1746 0, SLAB_HWCACHE_ALIGN, 1800 0, SLAB_HWCACHE_ALIGN,
1747 NULL); 1801 NULL);
1748 if (nfs_wdata_cachep == NULL) 1802 if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
1753 if (nfs_wdata_mempool == NULL) 1807 if (nfs_wdata_mempool == NULL)
1754 return -ENOMEM; 1808 return -ENOMEM;
1755 1809
1810 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1811 sizeof(struct nfs_commit_data),
1812 0, SLAB_HWCACHE_ALIGN,
1813 NULL);
1814 if (nfs_cdata_cachep == NULL)
1815 return -ENOMEM;
1816
1756 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1817 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1757 nfs_wdata_cachep); 1818 nfs_wdata_cachep);
1758 if (nfs_commit_mempool == NULL) 1819 if (nfs_commit_mempool == NULL)