aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-10 18:02:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-10 18:02:42 -0400
commitd1e1cda862c16252087374ac75949b0e89a5717e (patch)
tree544ce467bed23638949a1991b4f7b00e7472baa4 /fs/nfs
parent07888238f55056605cd23aa4ea3ca97d5e15938f (diff)
parenta914722f333b3359d2f4f12919380a334176bb89 (diff)
Merge tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: - massive cleanup of the NFS read/write code by Anna and Dros - support multiple NFS read/write requests per page in order to deal with non-page aligned pNFS striping. Also cleans up the r/wsize < page size code nicely. - stable fix for ensuring inode is declared uptodate only after all the attributes have been checked. - stable fix for a kernel Oops when remounting - NFS over RDMA client fixes - move the pNFS files layout driver into its own subdirectory" * tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits) NFS: populate ->net in mount data when remounting pnfs: fix lockup caused by pnfs_generic_pg_test NFSv4.1: Fix typo in dprintk NFSv4.1: Comment is now wrong and redundant to code NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state xprtrdma: Disconnect on registration failure xprtrdma: Remove BUG_ON() call sites xprtrdma: Avoid deadlock when credit window is reset SUNRPC: Move congestion window constants to header file xprtrdma: Reset connection timeout after successful reconnect xprtrdma: Use macros for reconnection timeout constants xprtrdma: Allocate missing pagelist xprtrdma: Remove Tavor MTU setting xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting xprtrdma: Reduce the number of hardway buffer allocations xprtrdma: Limit work done by completion handler xprtrmda: Reduce calls to ib_poll_cq() in completion handlers xprtrmda: Reduce lock contention in completion handlers xprtrdma: Split the completion queue xprtrdma: Make rpcrdma_ep_destroy() return void ...
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/direct.c117
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)203
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)6
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c26
-rw-r--r--fs/nfs/internal.h33
-rw-r--r--fs/nfs/nfs2xdr.c14
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs3xdr.c16
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4proc.c56
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4trace.h8
-rw-r--r--fs/nfs/nfs4xdr.c19
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c24
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c633
-rw-r--r--fs/nfs/pnfs.c166
-rw-r--r--fs/nfs/pnfs.h30
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c414
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/write.c588
29 files changed, 1316 insertions, 1203 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 03192a66c143..4782e0840dcc 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
29nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o 29nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
30nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 30nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
31 31
32obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o 32obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
33nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
34
35obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ 33obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
36obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ 34obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849bdf77a..9b431f44fad9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
210 SetPageUptodate(bvec->bv_page); 210 SetPageUptodate(bvec->bv_page);
211 211
212 if (err) { 212 if (err) {
213 struct nfs_read_data *rdata = par->data; 213 struct nfs_pgio_data *rdata = par->data;
214 struct nfs_pgio_header *header = rdata->header; 214 struct nfs_pgio_header *header = rdata->header;
215 215
216 if (!header->pnfs_error) 216 if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
224static void bl_read_cleanup(struct work_struct *work) 224static void bl_read_cleanup(struct work_struct *work)
225{ 225{
226 struct rpc_task *task; 226 struct rpc_task *task;
227 struct nfs_read_data *rdata; 227 struct nfs_pgio_data *rdata;
228 dprintk("%s enter\n", __func__); 228 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work); 229 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_read_data, task); 230 rdata = container_of(task, struct nfs_pgio_data, task);
231 pnfs_ld_read_done(rdata); 231 pnfs_ld_read_done(rdata);
232} 232}
233 233
234static void 234static void
235bl_end_par_io_read(void *data, int unused) 235bl_end_par_io_read(void *data, int unused)
236{ 236{
237 struct nfs_read_data *rdata = data; 237 struct nfs_pgio_data *rdata = data;
238 238
239 rdata->task.tk_status = rdata->header->pnfs_error; 239 rdata->task.tk_status = rdata->header->pnfs_error;
240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
242} 242}
243 243
244static enum pnfs_try_status 244static enum pnfs_try_status
245bl_read_pagelist(struct nfs_read_data *rdata) 245bl_read_pagelist(struct nfs_pgio_data *rdata)
246{ 246{
247 struct nfs_pgio_header *header = rdata->header; 247 struct nfs_pgio_header *header = rdata->header;
248 int i, hole; 248 int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
390 } 390 }
391 391
392 if (unlikely(err)) { 392 if (unlikely(err)) {
393 struct nfs_write_data *data = par->data; 393 struct nfs_pgio_data *data = par->data;
394 struct nfs_pgio_header *header = data->header; 394 struct nfs_pgio_header *header = data->header;
395 395
396 if (!header->pnfs_error) 396 if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
405{ 405{
406 struct parallel_io *par = bio->bi_private; 406 struct parallel_io *par = bio->bi_private;
407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
408 struct nfs_write_data *data = par->data; 408 struct nfs_pgio_data *data = par->data;
409 struct nfs_pgio_header *header = data->header; 409 struct nfs_pgio_header *header = data->header;
410 410
411 if (!uptodate) { 411 if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
423static void bl_write_cleanup(struct work_struct *work) 423static void bl_write_cleanup(struct work_struct *work)
424{ 424{
425 struct rpc_task *task; 425 struct rpc_task *task;
426 struct nfs_write_data *wdata; 426 struct nfs_pgio_data *wdata;
427 dprintk("%s enter\n", __func__); 427 dprintk("%s enter\n", __func__);
428 task = container_of(work, struct rpc_task, u.tk_work); 428 task = container_of(work, struct rpc_task, u.tk_work);
429 wdata = container_of(task, struct nfs_write_data, task); 429 wdata = container_of(task, struct nfs_pgio_data, task);
430 if (likely(!wdata->header->pnfs_error)) { 430 if (likely(!wdata->header->pnfs_error)) {
431 /* Marks for LAYOUTCOMMIT */ 431 /* Marks for LAYOUTCOMMIT */
432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
438/* Called when last of bios associated with a bl_write_pagelist call finishes */ 438/* Called when last of bios associated with a bl_write_pagelist call finishes */
439static void bl_end_par_io_write(void *data, int num_se) 439static void bl_end_par_io_write(void *data, int num_se)
440{ 440{
441 struct nfs_write_data *wdata = data; 441 struct nfs_pgio_data *wdata = data;
442 442
443 if (unlikely(wdata->header->pnfs_error)) { 443 if (unlikely(wdata->header->pnfs_error)) {
444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
673} 673}
674 674
675static enum pnfs_try_status 675static enum pnfs_try_status
676bl_write_pagelist(struct nfs_write_data *wdata, int sync) 676bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
677{ 677{
678 struct nfs_pgio_header *header = wdata->header; 678 struct nfs_pgio_header *header = wdata->header;
679 int i, ret, npg_zero, pg_index, last = 0; 679 int i, ret, npg_zero, pg_index, last = 0;
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1189 pnfs_generic_pg_init_read(pgio, req); 1189 pnfs_generic_pg_init_read(pgio, req);
1190} 1190}
1191 1191
1192static bool 1192/*
1193 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1194 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1195 */
1196static size_t
1193bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1197bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1194 struct nfs_page *req) 1198 struct nfs_page *req)
1195{ 1199{
1196 if (pgio->pg_dreq != NULL && 1200 if (pgio->pg_dreq != NULL &&
1197 !is_aligned_req(req, SECTOR_SIZE)) 1201 !is_aligned_req(req, SECTOR_SIZE))
1198 return false; 1202 return 0;
1199 1203
1200 return pnfs_generic_pg_test(pgio, prev, req); 1204 return pnfs_generic_pg_test(pgio, prev, req);
1201} 1205}
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1241 } 1245 }
1242} 1246}
1243 1247
1244static bool 1248/*
1249 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1250 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1251 */
1252static size_t
1245bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1253bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1246 struct nfs_page *req) 1254 struct nfs_page *req)
1247{ 1255{
1248 if (pgio->pg_dreq != NULL && 1256 if (pgio->pg_dreq != NULL &&
1249 !is_aligned_req(req, PAGE_CACHE_SIZE)) 1257 !is_aligned_req(req, PAGE_CACHE_SIZE))
1250 return false; 1258 return 0;
1251 1259
1252 return pnfs_generic_pg_test(pgio, prev, req); 1260 return pnfs_generic_pg_test(pgio, prev, req);
1253} 1261}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..4ad7bc388679 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
108 return atomic_dec_and_test(&dreq->io_count); 108 return atomic_dec_and_test(&dreq->io_count);
109} 109}
110 110
111/*
112 * nfs_direct_select_verf - select the right verifier
113 * @dreq - direct request possibly spanning multiple servers
114 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
115 * @ds_idx - index of data server in data server list, only valid if ds_clp set
116 *
117 * returns the correct verifier to use given the role of the server
118 */
119static struct nfs_writeverf *
120nfs_direct_select_verf(struct nfs_direct_req *dreq,
121 struct nfs_client *ds_clp,
122 int ds_idx)
123{
124 struct nfs_writeverf *verfp = &dreq->verf;
125
126#ifdef CONFIG_NFS_V4_1
127 if (ds_clp) {
128 /* pNFS is in use, use the DS verf */
129 if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
130 verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
131 else
132 WARN_ON_ONCE(1);
133 }
134#endif
135 return verfp;
136}
137
138
139/*
140 * nfs_direct_set_hdr_verf - set the write/commit verifier
141 * @dreq - direct request possibly spanning multiple servers
142 * @hdr - pageio header to validate against previously seen verfs
143 *
144 * Set the server's (MDS or DS) "seen" verifier
145 */
146static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
147 struct nfs_pgio_header *hdr)
148{
149 struct nfs_writeverf *verfp;
150
151 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
152 hdr->data->ds_idx);
153 WARN_ON_ONCE(verfp->committed >= 0);
154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
155 WARN_ON_ONCE(verfp->committed < 0);
156}
157
158/*
159 * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
160 * @dreq - direct request possibly spanning multiple servers
161 * @hdr - pageio header to validate against previously seen verf
162 *
163 * set the server's "seen" verf if not initialized.
164 * returns result of comparison between @hdr->verf and the "seen"
165 * verf of the server used by @hdr (DS or MDS)
166 */
167static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
168 struct nfs_pgio_header *hdr)
169{
170 struct nfs_writeverf *verfp;
171
172 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
173 hdr->data->ds_idx);
174 if (verfp->committed < 0) {
175 nfs_direct_set_hdr_verf(dreq, hdr);
176 return 0;
177 }
178 return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
179}
180
181#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
182/*
183 * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
184 * @dreq - direct request possibly spanning multiple servers
185 * @data - commit data to validate against previously seen verf
186 *
187 * returns result of comparison between @data->verf and the verf of
188 * the server used by @data (DS or MDS)
189 */
190static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
191 struct nfs_commit_data *data)
192{
193 struct nfs_writeverf *verfp;
194
195 verfp = nfs_direct_select_verf(dreq, data->ds_clp,
196 data->ds_commit_index);
197 WARN_ON_ONCE(verfp->committed < 0);
198 return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
199}
200#endif
201
111/** 202/**
112 * nfs_direct_IO - NFS address space operation for direct I/O 203 * nfs_direct_IO - NFS address space operation for direct I/O
113 * @rw: direction (read or write) 204 * @rw: direction (read or write)
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
168 kref_get(&dreq->kref); 259 kref_get(&dreq->kref);
169 init_completion(&dreq->completion); 260 init_completion(&dreq->completion);
170 INIT_LIST_HEAD(&dreq->mds_cinfo.list); 261 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
262 dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
171 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 263 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
172 spin_lock_init(&dreq->lock); 264 spin_lock_init(&dreq->lock);
173 265
@@ -380,8 +472,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
380 struct nfs_page *req; 472 struct nfs_page *req;
381 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 473 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
382 /* XXX do we need to do the eof zeroing found in async_filler? */ 474 /* XXX do we need to do the eof zeroing found in async_filler? */
383 req = nfs_create_request(dreq->ctx, dreq->inode, 475 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
384 pagevec[i],
385 pgbase, req_len); 476 pgbase, req_len);
386 if (IS_ERR(req)) { 477 if (IS_ERR(req)) {
387 result = PTR_ERR(req); 478 result = PTR_ERR(req);
@@ -424,7 +515,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
424 size_t requested_bytes = 0; 515 size_t requested_bytes = 0;
425 unsigned long seg; 516 unsigned long seg;
426 517
427 NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, 518 nfs_pageio_init_read(&desc, dreq->inode, false,
428 &nfs_direct_read_completion_ops); 519 &nfs_direct_read_completion_ops);
429 get_dreq(dreq); 520 get_dreq(dreq);
430 desc.pg_dreq = dreq; 521 desc.pg_dreq = dreq;
@@ -564,7 +655,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
564 dreq->count = 0; 655 dreq->count = 0;
565 get_dreq(dreq); 656 get_dreq(dreq);
566 657
567 NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, 658 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
568 &nfs_direct_write_completion_ops); 659 &nfs_direct_write_completion_ops);
569 desc.pg_dreq = dreq; 660 desc.pg_dreq = dreq;
570 661
@@ -603,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
603 dprintk("NFS: %5u commit failed with error %d.\n", 694 dprintk("NFS: %5u commit failed with error %d.\n",
604 data->task.tk_pid, status); 695 data->task.tk_pid, status);
605 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 696 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
606 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 697 } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
607 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 698 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
608 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 699 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
609 } 700 }
@@ -750,8 +841,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
750 struct nfs_page *req; 841 struct nfs_page *req;
751 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 842 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
752 843
753 req = nfs_create_request(dreq->ctx, dreq->inode, 844 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
754 pagevec[i],
755 pgbase, req_len); 845 pgbase, req_len);
756 if (IS_ERR(req)) { 846 if (IS_ERR(req)) {
757 result = PTR_ERR(req); 847 result = PTR_ERR(req);
@@ -813,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
813 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 903 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
814 bit = NFS_IOHDR_NEED_RESCHED; 904 bit = NFS_IOHDR_NEED_RESCHED;
815 else if (dreq->flags == 0) { 905 else if (dreq->flags == 0) {
816 memcpy(&dreq->verf, hdr->verf, 906 nfs_direct_set_hdr_verf(dreq, hdr);
817 sizeof(dreq->verf));
818 bit = NFS_IOHDR_NEED_COMMIT; 907 bit = NFS_IOHDR_NEED_COMMIT;
819 dreq->flags = NFS_ODIRECT_DO_COMMIT; 908 dreq->flags = NFS_ODIRECT_DO_COMMIT;
820 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 909 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
821 if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { 910 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
822 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 911 dreq->flags =
912 NFS_ODIRECT_RESCHED_WRITES;
823 bit = NFS_IOHDR_NEED_RESCHED; 913 bit = NFS_IOHDR_NEED_RESCHED;
824 } else 914 } else
825 bit = NFS_IOHDR_NEED_COMMIT; 915 bit = NFS_IOHDR_NEED_COMMIT;
@@ -829,6 +919,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
829 spin_unlock(&dreq->lock); 919 spin_unlock(&dreq->lock);
830 920
831 while (!list_empty(&hdr->pages)) { 921 while (!list_empty(&hdr->pages)) {
922 bool do_destroy = true;
923
832 req = nfs_list_entry(hdr->pages.next); 924 req = nfs_list_entry(hdr->pages.next);
833 nfs_list_remove_request(req); 925 nfs_list_remove_request(req);
834 switch (bit) { 926 switch (bit) {
@@ -836,6 +928,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
836 case NFS_IOHDR_NEED_COMMIT: 928 case NFS_IOHDR_NEED_COMMIT:
837 kref_get(&req->wb_kref); 929 kref_get(&req->wb_kref);
838 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 930 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
931 do_destroy = false;
839 } 932 }
840 nfs_unlock_and_release_request(req); 933 nfs_unlock_and_release_request(req);
841 } 934 }
@@ -874,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
874 size_t requested_bytes = 0; 967 size_t requested_bytes = 0;
875 unsigned long seg; 968 unsigned long seg;
876 969
877 NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, 970 nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
878 &nfs_direct_write_completion_ops); 971 &nfs_direct_write_completion_ops);
879 desc.pg_dreq = dreq; 972 desc.pg_dreq = dreq;
880 get_dreq(dreq); 973 get_dreq(dreq);
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 000000000000..8516cdffb9e9
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the pNFS Files Layout Driver kernel module
3#
4obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
5nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
index b9a35c05b60f..d2eba1c13b7e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -35,11 +35,11 @@
35 35
36#include <linux/sunrpc/metrics.h> 36#include <linux/sunrpc/metrics.h>
37 37
38#include "nfs4session.h" 38#include "../nfs4session.h"
39#include "internal.h" 39#include "../internal.h"
40#include "delegation.h" 40#include "../delegation.h"
41#include "nfs4filelayout.h" 41#include "filelayout.h"
42#include "nfs4trace.h" 42#include "../nfs4trace.h"
43 43
44#define NFSDBG_FACILITY NFSDBG_PNFS_LD 44#define NFSDBG_FACILITY NFSDBG_PNFS_LD
45 45
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
84 BUG(); 84 BUG();
85} 85}
86 86
87static void filelayout_reset_write(struct nfs_write_data *data) 87static void filelayout_reset_write(struct nfs_pgio_data *data)
88{ 88{
89 struct nfs_pgio_header *hdr = data->header; 89 struct nfs_pgio_header *hdr = data->header;
90 struct rpc_task *task = &data->task; 90 struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
105 } 105 }
106} 106}
107 107
108static void filelayout_reset_read(struct nfs_read_data *data) 108static void filelayout_reset_read(struct nfs_pgio_data *data)
109{ 109{
110 struct nfs_pgio_header *hdr = data->header; 110 struct nfs_pgio_header *hdr = data->header;
111 struct rpc_task *task = &data->task; 111 struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
243/* NFS_PROTO call done callback routines */ 243/* NFS_PROTO call done callback routines */
244 244
245static int filelayout_read_done_cb(struct rpc_task *task, 245static int filelayout_read_done_cb(struct rpc_task *task,
246 struct nfs_read_data *data) 246 struct nfs_pgio_data *data)
247{ 247{
248 struct nfs_pgio_header *hdr = data->header; 248 struct nfs_pgio_header *hdr = data->header;
249 int err; 249 int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
270 * rfc5661 is not clear about which credential should be used. 270 * rfc5661 is not clear about which credential should be used.
271 */ 271 */
272static void 272static void
273filelayout_set_layoutcommit(struct nfs_write_data *wdata) 273filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
274{ 274{
275 struct nfs_pgio_header *hdr = wdata->header; 275 struct nfs_pgio_header *hdr = wdata->header;
276 276
@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
279 return; 279 return;
280 280
281 pnfs_set_layoutcommit(wdata); 281 pnfs_set_layoutcommit(wdata);
282 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 282 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
284} 284}
285 285
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
305 */ 305 */
306static void filelayout_read_prepare(struct rpc_task *task, void *data) 306static void filelayout_read_prepare(struct rpc_task *task, void *data)
307{ 307{
308 struct nfs_read_data *rdata = data; 308 struct nfs_pgio_data *rdata = data;
309 309
310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { 310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
311 rpc_exit(task, -EIO); 311 rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
317 rpc_exit(task, 0); 317 rpc_exit(task, 0);
318 return; 318 return;
319 } 319 }
320 rdata->read_done_cb = filelayout_read_done_cb; 320 rdata->pgio_done_cb = filelayout_read_done_cb;
321 321
322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
323 &rdata->args.seq_args, 323 &rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
331 331
332static void filelayout_read_call_done(struct rpc_task *task, void *data) 332static void filelayout_read_call_done(struct rpc_task *task, void *data)
333{ 333{
334 struct nfs_read_data *rdata = data; 334 struct nfs_pgio_data *rdata = data;
335 335
336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
337 337
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
347 347
348static void filelayout_read_count_stats(struct rpc_task *task, void *data) 348static void filelayout_read_count_stats(struct rpc_task *task, void *data)
349{ 349{
350 struct nfs_read_data *rdata = data; 350 struct nfs_pgio_data *rdata = data;
351 351
352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); 352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
353} 353}
354 354
355static void filelayout_read_release(void *data) 355static void filelayout_read_release(void *data)
356{ 356{
357 struct nfs_read_data *rdata = data; 357 struct nfs_pgio_data *rdata = data;
358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; 358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
359 359
360 filelayout_fenceme(lo->plh_inode, lo); 360 filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
363} 363}
364 364
365static int filelayout_write_done_cb(struct rpc_task *task, 365static int filelayout_write_done_cb(struct rpc_task *task,
366 struct nfs_write_data *data) 366 struct nfs_pgio_data *data)
367{ 367{
368 struct nfs_pgio_header *hdr = data->header; 368 struct nfs_pgio_header *hdr = data->header;
369 int err; 369 int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
419 419
420static void filelayout_write_prepare(struct rpc_task *task, void *data) 420static void filelayout_write_prepare(struct rpc_task *task, void *data)
421{ 421{
422 struct nfs_write_data *wdata = data; 422 struct nfs_pgio_data *wdata = data;
423 423
424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { 424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
425 rpc_exit(task, -EIO); 425 rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
443 443
444static void filelayout_write_call_done(struct rpc_task *task, void *data) 444static void filelayout_write_call_done(struct rpc_task *task, void *data)
445{ 445{
446 struct nfs_write_data *wdata = data; 446 struct nfs_pgio_data *wdata = data;
447 447
448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && 448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
449 task->tk_status == 0) { 449 task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
457 457
458static void filelayout_write_count_stats(struct rpc_task *task, void *data) 458static void filelayout_write_count_stats(struct rpc_task *task, void *data)
459{ 459{
460 struct nfs_write_data *wdata = data; 460 struct nfs_pgio_data *wdata = data;
461 461
462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); 462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
463} 463}
464 464
465static void filelayout_write_release(void *data) 465static void filelayout_write_release(void *data)
466{ 466{
467 struct nfs_write_data *wdata = data; 467 struct nfs_pgio_data *wdata = data;
468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; 468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
469 469
470 filelayout_fenceme(lo->plh_inode, lo); 470 filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
529}; 529};
530 530
531static enum pnfs_try_status 531static enum pnfs_try_status
532filelayout_read_pagelist(struct nfs_read_data *data) 532filelayout_read_pagelist(struct nfs_pgio_data *data)
533{ 533{
534 struct nfs_pgio_header *hdr = data->header; 534 struct nfs_pgio_header *hdr = data->header;
535 struct pnfs_layout_segment *lseg = hdr->lseg; 535 struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
560 /* No multipath support. Use first DS */ 560 /* No multipath support. Use first DS */
561 atomic_inc(&ds->ds_clp->cl_count); 561 atomic_inc(&ds->ds_clp->cl_count);
562 data->ds_clp = ds->ds_clp; 562 data->ds_clp = ds->ds_clp;
563 data->ds_idx = idx;
563 fh = nfs4_fl_select_ds_fh(lseg, j); 564 fh = nfs4_fl_select_ds_fh(lseg, j);
564 if (fh) 565 if (fh)
565 data->args.fh = fh; 566 data->args.fh = fh;
@@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data)
568 data->mds_offset = offset; 569 data->mds_offset = offset;
569 570
570 /* Perform an asynchronous read to ds */ 571 /* Perform an asynchronous read to ds */
571 nfs_initiate_read(ds_clnt, data, 572 nfs_initiate_pgio(ds_clnt, data,
572 &filelayout_read_call_ops, RPC_TASK_SOFTCONN); 573 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
573 return PNFS_ATTEMPTED; 574 return PNFS_ATTEMPTED;
574} 575}
575 576
576/* Perform async writes. */ 577/* Perform async writes. */
577static enum pnfs_try_status 578static enum pnfs_try_status
578filelayout_write_pagelist(struct nfs_write_data *data, int sync) 579filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
579{ 580{
580 struct nfs_pgio_header *hdr = data->header; 581 struct nfs_pgio_header *hdr = data->header;
581 struct pnfs_layout_segment *lseg = hdr->lseg; 582 struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
600 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, 601 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
601 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); 602 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
602 603
603 data->write_done_cb = filelayout_write_done_cb; 604 data->pgio_done_cb = filelayout_write_done_cb;
604 atomic_inc(&ds->ds_clp->cl_count); 605 atomic_inc(&ds->ds_clp->cl_count);
605 data->ds_clp = ds->ds_clp; 606 data->ds_clp = ds->ds_clp;
607 data->ds_idx = idx;
606 fh = nfs4_fl_select_ds_fh(lseg, j); 608 fh = nfs4_fl_select_ds_fh(lseg, j);
607 if (fh) 609 if (fh)
608 data->args.fh = fh; 610 data->args.fh = fh;
609 /* 611
610 * Get the file offset on the dserver. Set the write offset to
611 * this offset and save the original offset.
612 */
613 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 612 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
614 613
615 /* Perform an asynchronous write */ 614 /* Perform an asynchronous write */
616 nfs_initiate_write(ds_clnt, data, 615 nfs_initiate_pgio(ds_clnt, data,
617 &filelayout_write_call_ops, sync, 616 &filelayout_write_call_ops, sync,
618 RPC_TASK_SOFTCONN); 617 RPC_TASK_SOFTCONN);
619 return PNFS_ATTEMPTED; 618 return PNFS_ATTEMPTED;
@@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
637 struct nfs4_deviceid_node *d; 636 struct nfs4_deviceid_node *d;
638 struct nfs4_file_layout_dsaddr *dsaddr; 637 struct nfs4_file_layout_dsaddr *dsaddr;
639 int status = -EINVAL; 638 int status = -EINVAL;
640 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
641 639
642 dprintk("--> %s\n", __func__); 640 dprintk("--> %s\n", __func__);
643 641
@@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
655 goto out; 653 goto out;
656 } 654 }
657 655
658 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { 656 if (!fl->stripe_unit) {
659 dprintk("%s Invalid stripe unit (%u)\n", 657 dprintk("%s Invalid stripe unit (%u)\n",
660 __func__, fl->stripe_unit); 658 __func__, fl->stripe_unit);
661 goto out; 659 goto out;
@@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
692 goto out_put; 690 goto out_put;
693 } 691 }
694 692
695 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
696 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
697 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
698 nfss->wsize);
699 }
700
701 status = 0; 693 status = 0;
702out: 694out:
703 dprintk("--> %s returns %d\n", __func__, status); 695 dprintk("--> %s returns %d\n", __func__, status);
@@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
850{ 842{
851 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 843 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
852 struct pnfs_commit_bucket *buckets; 844 struct pnfs_commit_bucket *buckets;
853 int size; 845 int size, i;
854 846
855 if (fl->commit_through_mds) 847 if (fl->commit_through_mds)
856 return 0; 848 return 0;
857 if (cinfo->ds->nbuckets != 0) { 849
850 size = (fl->stripe_type == STRIPE_SPARSE) ?
851 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
852
853 if (cinfo->ds->nbuckets >= size) {
858 /* This assumes there is only one IOMODE_RW lseg. What 854 /* This assumes there is only one IOMODE_RW lseg. What
859 * we really want to do is have a layout_hdr level 855 * we really want to do is have a layout_hdr level
860 * dictionary of <multipath_list4, fh> keys, each 856 * dictionary of <multipath_list4, fh> keys, each
@@ -864,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
864 return 0; 860 return 0;
865 } 861 }
866 862
867 size = (fl->stripe_type == STRIPE_SPARSE) ?
868 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
869
870 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 863 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
871 gfp_flags); 864 gfp_flags);
872 if (!buckets) 865 if (!buckets)
873 return -ENOMEM; 866 return -ENOMEM;
874 else { 867 for (i = 0; i < size; i++) {
875 int i; 868 INIT_LIST_HEAD(&buckets[i].written);
869 INIT_LIST_HEAD(&buckets[i].committing);
870 /* mark direct verifier as unset */
871 buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
872 }
876 873
877 spin_lock(cinfo->lock); 874 spin_lock(cinfo->lock);
878 if (cinfo->ds->nbuckets != 0) 875 if (cinfo->ds->nbuckets >= size)
879 kfree(buckets); 876 goto out;
880 else { 877 for (i = 0; i < cinfo->ds->nbuckets; i++) {
881 cinfo->ds->buckets = buckets; 878 list_splice(&cinfo->ds->buckets[i].written,
882 cinfo->ds->nbuckets = size; 879 &buckets[i].written);
883 for (i = 0; i < size; i++) { 880 list_splice(&cinfo->ds->buckets[i].committing,
884 INIT_LIST_HEAD(&buckets[i].written); 881 &buckets[i].committing);
885 INIT_LIST_HEAD(&buckets[i].committing); 882 buckets[i].direct_verf.committed =
886 } 883 cinfo->ds->buckets[i].direct_verf.committed;
887 } 884 buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
888 spin_unlock(cinfo->lock); 885 buckets[i].clseg = cinfo->ds->buckets[i].clseg;
889 return 0;
890 } 886 }
887 swap(cinfo->ds->buckets, buckets);
888 cinfo->ds->nbuckets = size;
889out:
890 spin_unlock(cinfo->lock);
891 kfree(buckets);
892 return 0;
891} 893}
892 894
893static struct pnfs_layout_segment * 895static struct pnfs_layout_segment *
@@ -915,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
915/* 917/*
916 * filelayout_pg_test(). Called by nfs_can_coalesce_requests() 918 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
917 * 919 *
918 * return true : coalesce page 920 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
919 * return false : don't coalesce page 921 * of bytes (maximum @req->wb_bytes) that can be coalesced.
920 */ 922 */
921static bool 923static size_t
922filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 924filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
923 struct nfs_page *req) 925 struct nfs_page *req)
924{ 926{
927 unsigned int size;
925 u64 p_stripe, r_stripe; 928 u64 p_stripe, r_stripe;
926 u32 stripe_unit; 929 u32 stripe_offset;
930 u64 segment_offset = pgio->pg_lseg->pls_range.offset;
931 u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
927 932
928 if (!pnfs_generic_pg_test(pgio, prev, req) || 933 /* calls nfs_generic_pg_test */
929 !nfs_generic_pg_test(pgio, prev, req)) 934 size = pnfs_generic_pg_test(pgio, prev, req);
930 return false; 935 if (!size)
936 return 0;
931 937
932 p_stripe = (u64)req_offset(prev); 938 /* see if req and prev are in the same stripe */
933 r_stripe = (u64)req_offset(req); 939 if (prev) {
934 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 940 p_stripe = (u64)req_offset(prev) - segment_offset;
941 r_stripe = (u64)req_offset(req) - segment_offset;
942 do_div(p_stripe, stripe_unit);
943 do_div(r_stripe, stripe_unit);
935 944
936 do_div(p_stripe, stripe_unit); 945 if (p_stripe != r_stripe)
937 do_div(r_stripe, stripe_unit); 946 return 0;
947 }
938 948
939 return (p_stripe == r_stripe); 949 /* calculate remaining bytes in the current stripe */
950 div_u64_rem((u64)req_offset(req) - segment_offset,
951 stripe_unit,
952 &stripe_offset);
953 WARN_ON_ONCE(stripe_offset > stripe_unit);
954 if (stripe_offset >= stripe_unit)
955 return 0;
956 return min(stripe_unit - (unsigned int)stripe_offset, size);
940} 957}
941 958
942static void 959static void
943filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 960filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
944 struct nfs_page *req) 961 struct nfs_page *req)
945{ 962{
946 WARN_ON_ONCE(pgio->pg_lseg != NULL); 963 if (!pgio->pg_lseg)
947 964 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
948 if (req->wb_offset != req->wb_pgbase) {
949 /*
950 * Handling unaligned pages is difficult, because have to
951 * somehow split a req in two in certain cases in the
952 * pg.test code. Avoid this by just not using pnfs
953 * in this case.
954 */
955 nfs_pageio_reset_read_mds(pgio);
956 return;
957 }
958 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
959 req->wb_context, 965 req->wb_context,
960 0, 966 0,
961 NFS4_MAX_UINT64, 967 NFS4_MAX_UINT64,
@@ -973,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
973 struct nfs_commit_info cinfo; 979 struct nfs_commit_info cinfo;
974 int status; 980 int status;
975 981
976 WARN_ON_ONCE(pgio->pg_lseg != NULL); 982 if (!pgio->pg_lseg)
977 983 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
978 if (req->wb_offset != req->wb_pgbase)
979 goto out_mds;
980 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
981 req->wb_context, 984 req->wb_context,
982 0, 985 0,
983 NFS4_MAX_UINT64, 986 NFS4_MAX_UINT64,
@@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
1067 */ 1070 */
1068 j = nfs4_fl_calc_j_index(lseg, req_offset(req)); 1071 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
1069 i = select_bucket_index(fl, j); 1072 i = select_bucket_index(fl, j);
1073 spin_lock(cinfo->lock);
1070 buckets = cinfo->ds->buckets; 1074 buckets = cinfo->ds->buckets;
1071 list = &buckets[i].written; 1075 list = &buckets[i].written;
1072 if (list_empty(list)) { 1076 if (list_empty(list)) {
@@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
1080 } 1084 }
1081 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1085 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1082 cinfo->ds->nwritten++; 1086 cinfo->ds->nwritten++;
1087 spin_unlock(cinfo->lock);
1083 return list; 1088 return list;
1084} 1089}
1085 1090
@@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
1176 return ret; 1181 return ret;
1177} 1182}
1178 1183
1184/* Note called with cinfo->lock held. */
1179static int 1185static int
1180filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, 1186filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1181 struct nfs_commit_info *cinfo, 1187 struct nfs_commit_info *cinfo,
@@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
1220 struct nfs_commit_info *cinfo) 1226 struct nfs_commit_info *cinfo)
1221{ 1227{
1222 struct pnfs_commit_bucket *b; 1228 struct pnfs_commit_bucket *b;
1229 struct pnfs_layout_segment *freeme;
1223 int i; 1230 int i;
1224 1231
1232restart:
1225 spin_lock(cinfo->lock); 1233 spin_lock(cinfo->lock);
1226 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 1234 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1227 if (transfer_commit_list(&b->written, dst, cinfo, 0)) { 1235 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1228 spin_unlock(cinfo->lock); 1236 freeme = b->wlseg;
1229 pnfs_put_lseg(b->wlseg);
1230 b->wlseg = NULL; 1237 b->wlseg = NULL;
1231 spin_lock(cinfo->lock); 1238 spin_unlock(cinfo->lock);
1239 pnfs_put_lseg(freeme);
1240 goto restart;
1232 } 1241 }
1233 } 1242 }
1234 cinfo->ds->nwritten = 0; 1243 cinfo->ds->nwritten = 0;
@@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1243 struct nfs_commit_data *data; 1252 struct nfs_commit_data *data;
1244 int i, j; 1253 int i, j;
1245 unsigned int nreq = 0; 1254 unsigned int nreq = 0;
1255 struct pnfs_layout_segment *freeme;
1246 1256
1247 fl_cinfo = cinfo->ds; 1257 fl_cinfo = cinfo->ds;
1248 bucket = fl_cinfo->buckets; 1258 bucket = fl_cinfo->buckets;
@@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1253 if (!data) 1263 if (!data)
1254 break; 1264 break;
1255 data->ds_commit_index = i; 1265 data->ds_commit_index = i;
1266 spin_lock(cinfo->lock);
1256 data->lseg = bucket->clseg; 1267 data->lseg = bucket->clseg;
1257 bucket->clseg = NULL; 1268 bucket->clseg = NULL;
1269 spin_unlock(cinfo->lock);
1258 list_add(&data->pages, list); 1270 list_add(&data->pages, list);
1259 nreq++; 1271 nreq++;
1260 } 1272 }
@@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1264 if (list_empty(&bucket->committing)) 1276 if (list_empty(&bucket->committing))
1265 continue; 1277 continue;
1266 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); 1278 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1267 pnfs_put_lseg(bucket->clseg); 1279 spin_lock(cinfo->lock);
1280 freeme = bucket->clseg;
1268 bucket->clseg = NULL; 1281 bucket->clseg = NULL;
1282 spin_unlock(cinfo->lock);
1283 pnfs_put_lseg(freeme);
1269 } 1284 }
1270 /* Caller will clean up entries put on list */ 1285 /* Caller will clean up entries put on list */
1271 return nreq; 1286 return nreq;
@@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1330 struct nfs4_filelayout *flo; 1345 struct nfs4_filelayout *flo;
1331 1346
1332 flo = kzalloc(sizeof(*flo), gfp_flags); 1347 flo = kzalloc(sizeof(*flo), gfp_flags);
1333 return &flo->generic_hdr; 1348 return flo != NULL ? &flo->generic_hdr : NULL;
1334} 1349}
1335 1350
1336static void 1351static void
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
index cebd20e7e923..ffbddf2219ea 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -30,7 +30,7 @@
30#ifndef FS_NFS_NFS4FILELAYOUT_H 30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H 31#define FS_NFS_NFS4FILELAYOUT_H
32 32
33#include "pnfs.h" 33#include "../pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules. 36 * Default data server connection timeout and retrans vaules.
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index b9c61efe9660..44bf0140a4c7 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -33,9 +33,9 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/sunrpc/addr.h> 34#include <linux/sunrpc/addr.h>
35 35
36#include "internal.h" 36#include "../internal.h"
37#include "nfs4session.h" 37#include "../nfs4session.h"
38#include "nfs4filelayout.h" 38#include "filelayout.h"
39 39
40#define NFSDBG_FACILITY NFSDBG_PNFS_LD 40#define NFSDBG_FACILITY NFSDBG_PNFS_LD
41 41
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 66984a9aafaa..b94f80420a58 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
120 120
121 security_d_instantiate(ret, inode); 121 security_d_instantiate(ret, inode);
122 spin_lock(&ret->d_lock); 122 spin_lock(&ret->d_lock);
123 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { 123 if (IS_ROOT(ret) && !ret->d_fsdata &&
124 !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
124 ret->d_fsdata = name; 125 ret->d_fsdata = name;
125 name = NULL; 126 name = NULL;
126 } 127 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e6f7398d2b3c..c496f8a74639 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1575,18 +1575,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1575 inode->i_version = fattr->change_attr; 1575 inode->i_version = fattr->change_attr;
1576 } 1576 }
1577 } else if (server->caps & NFS_CAP_CHANGE_ATTR) 1577 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1578 invalid |= save_cache_validity; 1578 nfsi->cache_validity |= save_cache_validity;
1579 1579
1580 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1580 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1581 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1581 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1582 } else if (server->caps & NFS_CAP_MTIME) 1582 } else if (server->caps & NFS_CAP_MTIME)
1583 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1583 nfsi->cache_validity |= save_cache_validity &
1584 (NFS_INO_INVALID_ATTR
1584 | NFS_INO_REVAL_FORCED); 1585 | NFS_INO_REVAL_FORCED);
1585 1586
1586 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1587 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1587 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1588 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1588 } else if (server->caps & NFS_CAP_CTIME) 1589 } else if (server->caps & NFS_CAP_CTIME)
1589 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1590 nfsi->cache_validity |= save_cache_validity &
1591 (NFS_INO_INVALID_ATTR
1590 | NFS_INO_REVAL_FORCED); 1592 | NFS_INO_REVAL_FORCED);
1591 1593
1592 /* Check if our cached file size is stale */ 1594 /* Check if our cached file size is stale */
@@ -1608,7 +1610,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1608 (long long)new_isize); 1610 (long long)new_isize);
1609 } 1611 }
1610 } else 1612 } else
1611 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1613 nfsi->cache_validity |= save_cache_validity &
1614 (NFS_INO_INVALID_ATTR
1612 | NFS_INO_REVAL_PAGECACHE 1615 | NFS_INO_REVAL_PAGECACHE
1613 | NFS_INO_REVAL_FORCED); 1616 | NFS_INO_REVAL_FORCED);
1614 1617
@@ -1616,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1616 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1619 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1617 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1620 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1618 else if (server->caps & NFS_CAP_ATIME) 1621 else if (server->caps & NFS_CAP_ATIME)
1619 invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME 1622 nfsi->cache_validity |= save_cache_validity &
1623 (NFS_INO_INVALID_ATIME
1620 | NFS_INO_REVAL_FORCED); 1624 | NFS_INO_REVAL_FORCED);
1621 1625
1622 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1626 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1627,7 +1631,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1627 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1631 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1628 } 1632 }
1629 } else if (server->caps & NFS_CAP_MODE) 1633 } else if (server->caps & NFS_CAP_MODE)
1630 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1634 nfsi->cache_validity |= save_cache_validity &
1635 (NFS_INO_INVALID_ATTR
1631 | NFS_INO_INVALID_ACCESS 1636 | NFS_INO_INVALID_ACCESS
1632 | NFS_INO_INVALID_ACL 1637 | NFS_INO_INVALID_ACL
1633 | NFS_INO_REVAL_FORCED); 1638 | NFS_INO_REVAL_FORCED);
@@ -1638,7 +1643,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1638 inode->i_uid = fattr->uid; 1643 inode->i_uid = fattr->uid;
1639 } 1644 }
1640 } else if (server->caps & NFS_CAP_OWNER) 1645 } else if (server->caps & NFS_CAP_OWNER)
1641 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1646 nfsi->cache_validity |= save_cache_validity &
1647 (NFS_INO_INVALID_ATTR
1642 | NFS_INO_INVALID_ACCESS 1648 | NFS_INO_INVALID_ACCESS
1643 | NFS_INO_INVALID_ACL 1649 | NFS_INO_INVALID_ACL
1644 | NFS_INO_REVAL_FORCED); 1650 | NFS_INO_REVAL_FORCED);
@@ -1649,7 +1655,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1649 inode->i_gid = fattr->gid; 1655 inode->i_gid = fattr->gid;
1650 } 1656 }
1651 } else if (server->caps & NFS_CAP_OWNER_GROUP) 1657 } else if (server->caps & NFS_CAP_OWNER_GROUP)
1652 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1658 nfsi->cache_validity |= save_cache_validity &
1659 (NFS_INO_INVALID_ATTR
1653 | NFS_INO_INVALID_ACCESS 1660 | NFS_INO_INVALID_ACCESS
1654 | NFS_INO_INVALID_ACL 1661 | NFS_INO_INVALID_ACL
1655 | NFS_INO_REVAL_FORCED); 1662 | NFS_INO_REVAL_FORCED);
@@ -1662,7 +1669,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1662 set_nlink(inode, fattr->nlink); 1669 set_nlink(inode, fattr->nlink);
1663 } 1670 }
1664 } else if (server->caps & NFS_CAP_NLINK) 1671 } else if (server->caps & NFS_CAP_NLINK)
1665 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1672 nfsi->cache_validity |= save_cache_validity &
1673 (NFS_INO_INVALID_ATTR
1666 | NFS_INO_REVAL_FORCED); 1674 | NFS_INO_REVAL_FORCED);
1667 1675
1668 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1676 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2e2464..8b69cba1bb04 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -231,13 +231,20 @@ extern void nfs_destroy_writepagecache(void);
231 231
232extern int __init nfs_init_directcache(void); 232extern int __init nfs_init_directcache(void);
233extern void nfs_destroy_directcache(void); 233extern void nfs_destroy_directcache(void);
234extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
235extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, 234extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
236 struct nfs_pgio_header *hdr, 235 struct nfs_pgio_header *hdr,
237 void (*release)(struct nfs_pgio_header *hdr)); 236 void (*release)(struct nfs_pgio_header *hdr));
238void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); 237void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
239int nfs_iocounter_wait(struct nfs_io_counter *c); 238int nfs_iocounter_wait(struct nfs_io_counter *c);
240 239
240extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
241struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
242void nfs_rw_header_free(struct nfs_pgio_header *);
243void nfs_pgio_data_release(struct nfs_pgio_data *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
246 const struct rpc_call_ops *, int, int);
247
241static inline void nfs_iocounter_init(struct nfs_io_counter *c) 248static inline void nfs_iocounter_init(struct nfs_io_counter *c)
242{ 249{
243 c->flags = 0; 250 c->flags = 0;
@@ -395,19 +402,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
395 402
396struct nfs_pgio_completion_ops; 403struct nfs_pgio_completion_ops;
397/* read.c */ 404/* read.c */
398extern struct nfs_read_header *nfs_readhdr_alloc(void);
399extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
400extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 405extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
401 struct inode *inode, 406 struct inode *inode, bool force_mds,
402 const struct nfs_pgio_completion_ops *compl_ops); 407 const struct nfs_pgio_completion_ops *compl_ops);
403extern int nfs_initiate_read(struct rpc_clnt *clnt,
404 struct nfs_read_data *data,
405 const struct rpc_call_ops *call_ops, int flags);
406extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 408extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
407extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
408 struct nfs_pgio_header *hdr);
409extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 409extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
410extern void nfs_readdata_release(struct nfs_read_data *rdata);
411 410
412/* super.c */ 411/* super.c */
413void nfs_clone_super(struct super_block *, struct nfs_mount_info *); 412void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -422,19 +421,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
422 421
423/* write.c */ 422/* write.c */
424extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 423extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
425 struct inode *inode, int ioflags, 424 struct inode *inode, int ioflags, bool force_mds,
426 const struct nfs_pgio_completion_ops *compl_ops); 425 const struct nfs_pgio_completion_ops *compl_ops);
427extern struct nfs_write_header *nfs_writehdr_alloc(void);
428extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
429extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
430 struct nfs_pgio_header *hdr);
431extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 426extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
432extern void nfs_writedata_release(struct nfs_write_data *wdata);
433extern void nfs_commit_free(struct nfs_commit_data *p); 427extern void nfs_commit_free(struct nfs_commit_data *p);
434extern int nfs_initiate_write(struct rpc_clnt *clnt,
435 struct nfs_write_data *data,
436 const struct rpc_call_ops *call_ops,
437 int how, int flags);
438extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 428extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
439extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); 429extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
440extern int nfs_initiate_commit(struct rpc_clnt *clnt, 430extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -447,6 +437,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
447 struct nfs_commit_info *cinfo); 437 struct nfs_commit_info *cinfo);
448int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 438int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
449 struct nfs_commit_info *cinfo, int max); 439 struct nfs_commit_info *cinfo, int max);
440unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
450int nfs_scan_commit(struct inode *inode, struct list_head *dst, 441int nfs_scan_commit(struct inode *inode, struct list_head *dst,
451 struct nfs_commit_info *cinfo); 442 struct nfs_commit_info *cinfo);
452void nfs_mark_request_commit(struct nfs_page *req, 443void nfs_mark_request_commit(struct nfs_page *req,
@@ -492,7 +483,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
492extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 483extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
493 484
494/* nfs4proc.c */ 485/* nfs4proc.c */
495extern void __nfs4_read_done_cb(struct nfs_read_data *); 486extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
496extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, 487extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
497 const struct rpc_timeout *timeparms, 488 const struct rpc_timeout *timeparms,
498 const char *ip_addr); 489 const char *ip_addr);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339ea..5f61b83f4a1c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
103/* 103/*
104 * typedef opaque nfsdata<>; 104 * typedef opaque nfsdata<>;
105 */ 105 */
106static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) 106static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
107{ 107{
108 u32 recvd, count; 108 u32 recvd, count;
109 __be32 *p; 109 __be32 *p;
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
613 * }; 613 * };
614 */ 614 */
615static void encode_readargs(struct xdr_stream *xdr, 615static void encode_readargs(struct xdr_stream *xdr,
616 const struct nfs_readargs *args) 616 const struct nfs_pgio_args *args)
617{ 617{
618 u32 offset = args->offset; 618 u32 offset = args->offset;
619 u32 count = args->count; 619 u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
629 629
630static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, 630static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
631 struct xdr_stream *xdr, 631 struct xdr_stream *xdr,
632 const struct nfs_readargs *args) 632 const struct nfs_pgio_args *args)
633{ 633{
634 encode_readargs(xdr, args); 634 encode_readargs(xdr, args);
635 prepare_reply_buffer(req, args->pages, args->pgbase, 635 prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
649 * }; 649 * };
650 */ 650 */
651static void encode_writeargs(struct xdr_stream *xdr, 651static void encode_writeargs(struct xdr_stream *xdr,
652 const struct nfs_writeargs *args) 652 const struct nfs_pgio_args *args)
653{ 653{
654 u32 offset = args->offset; 654 u32 offset = args->offset;
655 u32 count = args->count; 655 u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
669 669
670static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, 670static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
671 struct xdr_stream *xdr, 671 struct xdr_stream *xdr,
672 const struct nfs_writeargs *args) 672 const struct nfs_pgio_args *args)
673{ 673{
674 encode_writeargs(xdr, args); 674 encode_writeargs(xdr, args);
675 xdr->buf->flags |= XDRBUF_WRITE; 675 xdr->buf->flags |= XDRBUF_WRITE;
@@ -857,7 +857,7 @@ out_default:
857 * }; 857 * };
858 */ 858 */
859static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, 859static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
860 struct nfs_readres *result) 860 struct nfs_pgio_res *result)
861{ 861{
862 enum nfs_stat status; 862 enum nfs_stat status;
863 int error; 863 int error;
@@ -878,7 +878,7 @@ out_default:
878} 878}
879 879
880static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, 880static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
881 struct nfs_writeres *result) 881 struct nfs_pgio_res *result)
882{ 882{
883 /* All NFSv2 writes are "file sync" writes */ 883 /* All NFSv2 writes are "file sync" writes */
884 result->verf->committed = NFS_FILE_SYNC; 884 result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149c4579..e7daa42bbc86 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
795 return status; 795 return status;
796} 796}
797 797
798static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
799{ 799{
800 struct inode *inode = data->header->inode; 800 struct inode *inode = data->header->inode;
801 801
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
807 return 0; 807 return 0;
808} 808}
809 809
810static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 810static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
811{ 811{
812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
813} 813}
814 814
815static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 815static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
816{ 816{
817 rpc_call_start(task); 817 rpc_call_start(task);
818 return 0; 818 return 0;
819} 819}
820 820
821static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 821static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
822{ 822{
823 struct inode *inode = data->header->inode; 823 struct inode *inode = data->header->inode;
824 824
@@ -829,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
829 return 0; 829 return 0;
830} 830}
831 831
832static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 832static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
833{ 833{
834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
835} 835}
836 836
837static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
838{
839 rpc_call_start(task);
840 return 0;
841}
842
843static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 837static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
844{ 838{
845 rpc_call_start(task); 839 rpc_call_start(task);
@@ -946,13 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
946 .fsinfo = nfs3_proc_fsinfo, 940 .fsinfo = nfs3_proc_fsinfo,
947 .pathconf = nfs3_proc_pathconf, 941 .pathconf = nfs3_proc_pathconf,
948 .decode_dirent = nfs3_decode_dirent, 942 .decode_dirent = nfs3_decode_dirent,
943 .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
949 .read_setup = nfs3_proc_read_setup, 944 .read_setup = nfs3_proc_read_setup,
950 .read_pageio_init = nfs_pageio_init_read,
951 .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
952 .read_done = nfs3_read_done, 945 .read_done = nfs3_read_done,
953 .write_setup = nfs3_proc_write_setup, 946 .write_setup = nfs3_proc_write_setup,
954 .write_pageio_init = nfs_pageio_init_write,
955 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
956 .write_done = nfs3_write_done, 947 .write_done = nfs3_write_done,
957 .commit_setup = nfs3_proc_commit_setup, 948 .commit_setup = nfs3_proc_commit_setup,
958 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, 949 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c19..8f4cbe7f4aa8 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
953 * }; 953 * };
954 */ 954 */
955static void encode_read3args(struct xdr_stream *xdr, 955static void encode_read3args(struct xdr_stream *xdr,
956 const struct nfs_readargs *args) 956 const struct nfs_pgio_args *args)
957{ 957{
958 __be32 *p; 958 __be32 *p;
959 959
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
966 966
967static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, 967static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
968 struct xdr_stream *xdr, 968 struct xdr_stream *xdr,
969 const struct nfs_readargs *args) 969 const struct nfs_pgio_args *args)
970{ 970{
971 encode_read3args(xdr, args); 971 encode_read3args(xdr, args);
972 prepare_reply_buffer(req, args->pages, args->pgbase, 972 prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
992 * }; 992 * };
993 */ 993 */
994static void encode_write3args(struct xdr_stream *xdr, 994static void encode_write3args(struct xdr_stream *xdr,
995 const struct nfs_writeargs *args) 995 const struct nfs_pgio_args *args)
996{ 996{
997 __be32 *p; 997 __be32 *p;
998 998
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
1008 1008
1009static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, 1009static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
1010 struct xdr_stream *xdr, 1010 struct xdr_stream *xdr,
1011 const struct nfs_writeargs *args) 1011 const struct nfs_pgio_args *args)
1012{ 1012{
1013 encode_write3args(xdr, args); 1013 encode_write3args(xdr, args);
1014 xdr->buf->flags |= XDRBUF_WRITE; 1014 xdr->buf->flags |= XDRBUF_WRITE;
@@ -1589,7 +1589,7 @@ out_default:
1589 * }; 1589 * };
1590 */ 1590 */
1591static int decode_read3resok(struct xdr_stream *xdr, 1591static int decode_read3resok(struct xdr_stream *xdr,
1592 struct nfs_readres *result) 1592 struct nfs_pgio_res *result)
1593{ 1593{
1594 u32 eof, count, ocount, recvd; 1594 u32 eof, count, ocount, recvd;
1595 __be32 *p; 1595 __be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
1625} 1625}
1626 1626
1627static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, 1627static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1628 struct nfs_readres *result) 1628 struct nfs_pgio_res *result)
1629{ 1629{
1630 enum nfs_stat status; 1630 enum nfs_stat status;
1631 int error; 1631 int error;
@@ -1673,7 +1673,7 @@ out_status:
1673 * }; 1673 * };
1674 */ 1674 */
1675static int decode_write3resok(struct xdr_stream *xdr, 1675static int decode_write3resok(struct xdr_stream *xdr,
1676 struct nfs_writeres *result) 1676 struct nfs_pgio_res *result)
1677{ 1677{
1678 __be32 *p; 1678 __be32 *p;
1679 1679
@@ -1697,7 +1697,7 @@ out_eio:
1697} 1697}
1698 1698
1699static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, 1699static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1700 struct nfs_writeres *result) 1700 struct nfs_pgio_res *result)
1701{ 1701{
1702 enum nfs_stat status; 1702 enum nfs_stat status;
1703 int error; 1703 int error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1badbe53c..f63cb87cd730 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
337 */ 337 */
338static inline void 338static inline void
339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
340 struct rpc_message *msg, struct nfs_write_data *wdata) 340 struct rpc_message *msg, struct nfs_pgio_data *wdata)
341{ 341{
342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && 342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) 343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
369 369
370static inline void 370static inline void
371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
372 struct rpc_message *msg, struct nfs_write_data *wdata) 372 struct rpc_message *msg, struct nfs_pgio_data *wdata)
373{ 373{
374} 374}
375#endif /* CONFIG_NFS_V4_1 */ 375#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407e0360..464db9dd6318 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
100 break; 100 break;
101 mutex_lock(&inode->i_mutex); 101 mutex_lock(&inode->i_mutex);
102 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 ret = nfs_file_fsync_commit(file, start, end, datasync);
103 if (!ret && !datasync) 103 if (!ret)
104 /* application has asked for meta-data sync */
105 ret = pnfs_layoutcommit_inode(inode, true); 104 ret = pnfs_layoutcommit_inode(inode, true);
106 mutex_unlock(&inode->i_mutex); 105 mutex_unlock(&inode->i_mutex);
107 /* 106 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7f55fed8dc64..285ad5334018 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
2027 return status; 2027 return status;
2028 } 2028 }
2029 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 2029 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
2030 _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); 2030 nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
2031 return 0; 2031 return 0;
2032} 2032}
2033 2033
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
4033 return false; 4033 return false;
4034} 4034}
4035 4035
4036void __nfs4_read_done_cb(struct nfs_read_data *data) 4036void __nfs4_read_done_cb(struct nfs_pgio_data *data)
4037{ 4037{
4038 nfs_invalidate_atime(data->header->inode); 4038 nfs_invalidate_atime(data->header->inode);
4039} 4039}
4040 4040
4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
4042{ 4042{
4043 struct nfs_server *server = NFS_SERVER(data->header->inode); 4043 struct nfs_server *server = NFS_SERVER(data->header->inode);
4044 4044
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
4055} 4055}
4056 4056
4057static bool nfs4_read_stateid_changed(struct rpc_task *task, 4057static bool nfs4_read_stateid_changed(struct rpc_task *task,
4058 struct nfs_readargs *args) 4058 struct nfs_pgio_args *args)
4059{ 4059{
4060 4060
4061 if (!nfs4_error_stateid_expired(task->tk_status) || 4061 if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
4068 return true; 4068 return true;
4069} 4069}
4070 4070
4071static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 4071static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
4072{ 4072{
4073 4073
4074 dprintk("--> %s\n", __func__); 4074 dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
4077 return -EAGAIN; 4077 return -EAGAIN;
4078 if (nfs4_read_stateid_changed(task, &data->args)) 4078 if (nfs4_read_stateid_changed(task, &data->args))
4079 return -EAGAIN; 4079 return -EAGAIN;
4080 return data->read_done_cb ? data->read_done_cb(task, data) : 4080 return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
4081 nfs4_read_done_cb(task, data); 4081 nfs4_read_done_cb(task, data);
4082} 4082}
4083 4083
4084static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 4084static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
4085{ 4085{
4086 data->timestamp = jiffies; 4086 data->timestamp = jiffies;
4087 data->read_done_cb = nfs4_read_done_cb; 4087 data->pgio_done_cb = nfs4_read_done_cb;
4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
4090} 4090}
4091 4091
4092static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 4092static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
4093{ 4093{
4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), 4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
4095 &data->args.seq_args, 4095 &data->args.seq_args,
@@ -4097,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
4097 task)) 4097 task))
4098 return 0; 4098 return 0;
4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, 4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
4100 data->args.lock_context, FMODE_READ) == -EIO) 4100 data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
4101 return -EIO; 4101 return -EIO;
4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) 4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
4103 return -EIO; 4103 return -EIO;
4104 return 0; 4104 return 0;
4105} 4105}
4106 4106
4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
4108{ 4108{
4109 struct inode *inode = data->header->inode; 4109 struct inode *inode = data->header->inode;
4110 4110
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
4121} 4121}
4122 4122
4123static bool nfs4_write_stateid_changed(struct rpc_task *task, 4123static bool nfs4_write_stateid_changed(struct rpc_task *task,
4124 struct nfs_writeargs *args) 4124 struct nfs_pgio_args *args)
4125{ 4125{
4126 4126
4127 if (!nfs4_error_stateid_expired(task->tk_status) || 4127 if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
4134 return true; 4134 return true;
4135} 4135}
4136 4136
4137static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 4137static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
4138{ 4138{
4139 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4139 if (!nfs4_sequence_done(task, &data->res.seq_res))
4140 return -EAGAIN; 4140 return -EAGAIN;
4141 if (nfs4_write_stateid_changed(task, &data->args)) 4141 if (nfs4_write_stateid_changed(task, &data->args))
4142 return -EAGAIN; 4142 return -EAGAIN;
4143 return data->write_done_cb ? data->write_done_cb(task, data) : 4143 return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
4144 nfs4_write_done_cb(task, data); 4144 nfs4_write_done_cb(task, data);
4145} 4145}
4146 4146
4147static 4147static
4148bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) 4148bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
4149{ 4149{
4150 const struct nfs_pgio_header *hdr = data->header; 4150 const struct nfs_pgio_header *hdr = data->header;
4151 4151
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; 4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
4159} 4159}
4160 4160
4161static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 4161static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
4162{ 4162{
4163 struct nfs_server *server = NFS_SERVER(data->header->inode); 4163 struct nfs_server *server = NFS_SERVER(data->header->inode);
4164 4164
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
4168 } else 4168 } else
4169 data->args.bitmask = server->cache_consistency_bitmask; 4169 data->args.bitmask = server->cache_consistency_bitmask;
4170 4170
4171 if (!data->write_done_cb) 4171 if (!data->pgio_done_cb)
4172 data->write_done_cb = nfs4_write_done_cb; 4172 data->pgio_done_cb = nfs4_write_done_cb;
4173 data->res.server = server; 4173 data->res.server = server;
4174 data->timestamp = jiffies; 4174 data->timestamp = jiffies;
4175 4175
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4178} 4178}
4179 4179
4180static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
4181{
4182 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
4183 &data->args.seq_args,
4184 &data->res.seq_res,
4185 task))
4186 return 0;
4187 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
4188 data->args.lock_context, FMODE_WRITE) == -EIO)
4189 return -EIO;
4190 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
4191 return -EIO;
4192 return 0;
4193}
4194
4195static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 4180static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
4196{ 4181{
4197 nfs4_setup_sequence(NFS_SERVER(data->inode), 4182 nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
8432 .pathconf = nfs4_proc_pathconf, 8417 .pathconf = nfs4_proc_pathconf,
8433 .set_capabilities = nfs4_server_capabilities, 8418 .set_capabilities = nfs4_server_capabilities,
8434 .decode_dirent = nfs4_decode_dirent, 8419 .decode_dirent = nfs4_decode_dirent,
8420 .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
8435 .read_setup = nfs4_proc_read_setup, 8421 .read_setup = nfs4_proc_read_setup,
8436 .read_pageio_init = pnfs_pageio_init_read,
8437 .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
8438 .read_done = nfs4_read_done, 8422 .read_done = nfs4_read_done,
8439 .write_setup = nfs4_proc_write_setup, 8423 .write_setup = nfs4_proc_write_setup,
8440 .write_pageio_init = pnfs_pageio_init_write,
8441 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
8442 .write_done = nfs4_write_done, 8424 .write_done = nfs4_write_done,
8443 .commit_setup = nfs4_proc_commit_setup, 8425 .commit_setup = nfs4_proc_commit_setup,
8444 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, 8426 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c0583b9bef71..848f6853c59e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1456 * server that doesn't support a grace period. 1456 * server that doesn't support a grace period.
1457 */ 1457 */
1458 spin_lock(&sp->so_lock); 1458 spin_lock(&sp->so_lock);
1459 write_seqcount_begin(&sp->so_reclaim_seqcount); 1459 raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
1460restart: 1460restart:
1461 list_for_each_entry(state, &sp->so_states, open_states) { 1461 list_for_each_entry(state, &sp->so_states, open_states) {
1462 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1462 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1519,13 +1519,13 @@ restart:
1519 spin_lock(&sp->so_lock); 1519 spin_lock(&sp->so_lock);
1520 goto restart; 1520 goto restart;
1521 } 1521 }
1522 write_seqcount_end(&sp->so_reclaim_seqcount); 1522 raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1523 spin_unlock(&sp->so_lock); 1523 spin_unlock(&sp->so_lock);
1524 return 0; 1524 return 0;
1525out_err: 1525out_err:
1526 nfs4_put_open_state(state); 1526 nfs4_put_open_state(state);
1527 spin_lock(&sp->so_lock); 1527 spin_lock(&sp->so_lock);
1528 write_seqcount_end(&sp->so_reclaim_seqcount); 1528 raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1529 spin_unlock(&sp->so_lock); 1529 spin_unlock(&sp->so_lock);
1530 return status; 1530 return status;
1531} 1531}
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db30..0a744f3a86f6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
932 932
933DECLARE_EVENT_CLASS(nfs4_read_event, 933DECLARE_EVENT_CLASS(nfs4_read_event,
934 TP_PROTO( 934 TP_PROTO(
935 const struct nfs_read_data *data, 935 const struct nfs_pgio_data *data,
936 int error 936 int error
937 ), 937 ),
938 938
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
972#define DEFINE_NFS4_READ_EVENT(name) \ 972#define DEFINE_NFS4_READ_EVENT(name) \
973 DEFINE_EVENT(nfs4_read_event, name, \ 973 DEFINE_EVENT(nfs4_read_event, name, \
974 TP_PROTO( \ 974 TP_PROTO( \
975 const struct nfs_read_data *data, \ 975 const struct nfs_pgio_data *data, \
976 int error \ 976 int error \
977 ), \ 977 ), \
978 TP_ARGS(data, error)) 978 TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
983 983
984DECLARE_EVENT_CLASS(nfs4_write_event, 984DECLARE_EVENT_CLASS(nfs4_write_event,
985 TP_PROTO( 985 TP_PROTO(
986 const struct nfs_write_data *data, 986 const struct nfs_pgio_data *data,
987 int error 987 int error
988 ), 988 ),
989 989
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1024#define DEFINE_NFS4_WRITE_EVENT(name) \ 1024#define DEFINE_NFS4_WRITE_EVENT(name) \
1025 DEFINE_EVENT(nfs4_write_event, name, \ 1025 DEFINE_EVENT(nfs4_write_event, name, \
1026 TP_PROTO( \ 1026 TP_PROTO( \
1027 const struct nfs_write_data *data, \ 1027 const struct nfs_pgio_data *data, \
1028 int error \ 1028 int error \
1029 ), \ 1029 ), \
1030 TP_ARGS(data, error)) 1030 TP_ARGS(data, error))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4fe2c8..939ae606cfa4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1556 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); 1556 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
1557} 1557}
1558 1558
1559static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1559static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
1560 struct compound_hdr *hdr)
1560{ 1561{
1561 __be32 *p; 1562 __be32 *p;
1562 1563
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
1701 encode_nfs4_verifier(xdr, &arg->confirm); 1702 encode_nfs4_verifier(xdr, &arg->confirm);
1702} 1703}
1703 1704
1704static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1705static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
1706 struct compound_hdr *hdr)
1705{ 1707{
1706 __be32 *p; 1708 __be32 *p;
1707 1709
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 * Encode a READ request 2453 * Encode a READ request
2452 */ 2454 */
2453static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, 2455static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
2454 struct nfs_readargs *args) 2456 struct nfs_pgio_args *args)
2455{ 2457{
2456 struct compound_hdr hdr = { 2458 struct compound_hdr hdr = {
2457 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2459 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2513 * Encode a WRITE request 2515 * Encode a WRITE request
2514 */ 2516 */
2515static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, 2517static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2516 struct nfs_writeargs *args) 2518 struct nfs_pgio_args *args)
2517{ 2519{
2518 struct compound_hdr hdr = { 2520 struct compound_hdr hdr = {
2519 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2521 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -5085,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
5085 return decode_op_hdr(xdr, OP_PUTROOTFH); 5087 return decode_op_hdr(xdr, OP_PUTROOTFH);
5086} 5088}
5087 5089
5088static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) 5090static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
5091 struct nfs_pgio_res *res)
5089{ 5092{
5090 __be32 *p; 5093 __be32 *p;
5091 uint32_t count, eof, recvd; 5094 uint32_t count, eof, recvd;
@@ -5339,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
5339 return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); 5342 return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
5340} 5343}
5341 5344
5342static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) 5345static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
5343{ 5346{
5344 __be32 *p; 5347 __be32 *p;
5345 int status; 5348 int status;
@@ -6636,7 +6639,7 @@ out:
6636 * Decode Read response 6639 * Decode Read response
6637 */ 6640 */
6638static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6641static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6639 struct nfs_readres *res) 6642 struct nfs_pgio_res *res)
6640{ 6643{
6641 struct compound_hdr hdr; 6644 struct compound_hdr hdr;
6642 int status; 6645 int status;
@@ -6661,7 +6664,7 @@ out:
6661 * Decode WRITE response 6664 * Decode WRITE response
6662 */ 6665 */
6663static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6666static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6664 struct nfs_writeres *res) 6667 struct nfs_pgio_res *res)
6665{ 6668{
6666 struct compound_hdr hdr; 6669 struct compound_hdr hdr;
6667 int status; 6670 int status;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f1..611320753db2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
439 objlayout_read_done(&objios->oir, status, objios->sync); 439 objlayout_read_done(&objios->oir, status, objios->sync);
440} 440}
441 441
442int objio_read_pagelist(struct nfs_read_data *rdata) 442int objio_read_pagelist(struct nfs_pgio_data *rdata)
443{ 443{
444 struct nfs_pgio_header *hdr = rdata->header; 444 struct nfs_pgio_header *hdr = rdata->header;
445 struct objio_state *objios; 445 struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
488{ 488{
489 struct objio_state *objios = priv; 489 struct objio_state *objios = priv;
490 struct nfs_write_data *wdata = objios->oir.rpcdata; 490 struct nfs_pgio_data *wdata = objios->oir.rpcdata;
491 struct address_space *mapping = wdata->header->inode->i_mapping; 491 struct address_space *mapping = wdata->header->inode->i_mapping;
492 pgoff_t index = offset / PAGE_SIZE; 492 pgoff_t index = offset / PAGE_SIZE;
493 struct page *page; 493 struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
531 .put_page = &__r4w_put_page, 531 .put_page = &__r4w_put_page,
532}; 532};
533 533
534int objio_write_pagelist(struct nfs_write_data *wdata, int how) 534int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
535{ 535{
536 struct nfs_pgio_header *hdr = wdata->header; 536 struct nfs_pgio_header *hdr = wdata->header;
537 struct objio_state *objios; 537 struct objio_state *objios;
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
564 return 0; 564 return 0;
565} 565}
566 566
567static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, 567/*
568 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
569 * of bytes (maximum @req->wb_bytes) that can be coalesced.
570 */
571static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
568 struct nfs_page *prev, struct nfs_page *req) 572 struct nfs_page *prev, struct nfs_page *req)
569{ 573{
570 if (!pnfs_generic_pg_test(pgio, prev, req)) 574 unsigned int size;
571 return false; 575
576 size = pnfs_generic_pg_test(pgio, prev, req);
577
578 if (!size || pgio->pg_count + req->wb_bytes >
579 (unsigned long)pgio->pg_layout_private)
580 return 0;
572 581
573 return pgio->pg_count + req->wb_bytes <= 582 return min(size, req->wb_bytes);
574 (unsigned long)pgio->pg_layout_private;
575} 583}
576 584
577static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 585static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67b..765d3f54e986 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
53 struct objlayout *objlay; 53 struct objlayout *objlay;
54 54
55 objlay = kzalloc(sizeof(struct objlayout), gfp_flags); 55 objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
56 if (objlay) { 56 if (!objlay)
57 spin_lock_init(&objlay->lock); 57 return NULL;
58 INIT_LIST_HEAD(&objlay->err_list); 58 spin_lock_init(&objlay->lock);
59 } 59 INIT_LIST_HEAD(&objlay->err_list);
60 dprintk("%s: Return %p\n", __func__, objlay); 60 dprintk("%s: Return %p\n", __func__, objlay);
61 return &objlay->pnfs_layout; 61 return &objlay->pnfs_layout;
62} 62}
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
229static void _rpc_read_complete(struct work_struct *work) 229static void _rpc_read_complete(struct work_struct *work)
230{ 230{
231 struct rpc_task *task; 231 struct rpc_task *task;
232 struct nfs_read_data *rdata; 232 struct nfs_pgio_data *rdata;
233 233
234 dprintk("%s enter\n", __func__); 234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work); 235 task = container_of(work, struct rpc_task, u.tk_work);
236 rdata = container_of(task, struct nfs_read_data, task); 236 rdata = container_of(task, struct nfs_pgio_data, task);
237 237
238 pnfs_ld_read_done(rdata); 238 pnfs_ld_read_done(rdata);
239} 239}
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
241void 241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{ 243{
244 struct nfs_read_data *rdata = oir->rpcdata; 244 struct nfs_pgio_data *rdata = oir->rpcdata;
245 245
246 oir->status = rdata->task.tk_status = status; 246 oir->status = rdata->task.tk_status = status;
247 if (status >= 0) 247 if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
266 * Perform sync or async reads. 266 * Perform sync or async reads.
267 */ 267 */
268enum pnfs_try_status 268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_read_data *rdata) 269objlayout_read_pagelist(struct nfs_pgio_data *rdata)
270{ 270{
271 struct nfs_pgio_header *hdr = rdata->header; 271 struct nfs_pgio_header *hdr = rdata->header;
272 struct inode *inode = hdr->inode; 272 struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
312static void _rpc_write_complete(struct work_struct *work) 312static void _rpc_write_complete(struct work_struct *work)
313{ 313{
314 struct rpc_task *task; 314 struct rpc_task *task;
315 struct nfs_write_data *wdata; 315 struct nfs_pgio_data *wdata;
316 316
317 dprintk("%s enter\n", __func__); 317 dprintk("%s enter\n", __func__);
318 task = container_of(work, struct rpc_task, u.tk_work); 318 task = container_of(work, struct rpc_task, u.tk_work);
319 wdata = container_of(task, struct nfs_write_data, task); 319 wdata = container_of(task, struct nfs_pgio_data, task);
320 320
321 pnfs_ld_write_done(wdata); 321 pnfs_ld_write_done(wdata);
322} 322}
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
324void 324void
325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
326{ 326{
327 struct nfs_write_data *wdata = oir->rpcdata; 327 struct nfs_pgio_data *wdata = oir->rpcdata;
328 328
329 oir->status = wdata->task.tk_status = status; 329 oir->status = wdata->task.tk_status = status;
330 if (status >= 0) { 330 if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
351 * Perform sync or async writes. 351 * Perform sync or async writes.
352 */ 352 */
353enum pnfs_try_status 353enum pnfs_try_status
354objlayout_write_pagelist(struct nfs_write_data *wdata, 354objlayout_write_pagelist(struct nfs_pgio_data *wdata,
355 int how) 355 int how)
356{ 356{
357 struct nfs_pgio_header *hdr = wdata->header; 357 struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec6120..01e041029a6c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
119 */ 119 */
120extern void objio_free_result(struct objlayout_io_res *oir); 120extern void objio_free_result(struct objlayout_io_res *oir);
121 121
122extern int objio_read_pagelist(struct nfs_read_data *rdata); 122extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
123extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); 123extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
124 124
125/* 125/*
126 * callback API 126 * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
168extern void objlayout_free_lseg(struct pnfs_layout_segment *); 168extern void objlayout_free_lseg(struct pnfs_layout_segment *);
169 169
170extern enum pnfs_try_status objlayout_read_pagelist( 170extern enum pnfs_try_status objlayout_read_pagelist(
171 struct nfs_read_data *); 171 struct nfs_pgio_data *);
172 172
173extern enum pnfs_try_status objlayout_write_pagelist( 173extern enum pnfs_try_status objlayout_write_pagelist(
174 struct nfs_write_data *, 174 struct nfs_pgio_data *,
175 int how); 175 int how);
176 176
177extern void objlayout_encode_layoutcommit( 177extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 03ed984ab4d8..b6ee3a6ee96d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,9 +24,14 @@
24#include "internal.h" 24#include "internal.h"
25#include "pnfs.h" 25#include "pnfs.h"
26 26
27#define NFSDBG_FACILITY NFSDBG_PAGECACHE
28
27static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops;
31
32static void nfs_free_request(struct nfs_page *);
28 33
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 34static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{ 35{
31 p->npages = pagecount; 36 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array)) 37 if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
133 return __nfs_iocounter_wait(c); 138 return __nfs_iocounter_wait(c);
134} 139}
135 140
141static int nfs_wait_bit_uninterruptible(void *word)
142{
143 io_schedule();
144 return 0;
145}
146
147/*
148 * nfs_page_group_lock - lock the head of the page group
149 * @req - request in group that is to be locked
150 *
151 * this lock must be held if modifying the page group list
152 */
153void
154nfs_page_group_lock(struct nfs_page *req)
155{
156 struct nfs_page *head = req->wb_head;
157
158 WARN_ON_ONCE(head != head->wb_head);
159
160 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
161 nfs_wait_bit_uninterruptible,
162 TASK_UNINTERRUPTIBLE);
163}
164
165/*
166 * nfs_page_group_unlock - unlock the head of the page group
167 * @req - request in group that is to be unlocked
168 */
169void
170nfs_page_group_unlock(struct nfs_page *req)
171{
172 struct nfs_page *head = req->wb_head;
173
174 WARN_ON_ONCE(head != head->wb_head);
175
176 smp_mb__before_atomic();
177 clear_bit(PG_HEADLOCK, &head->wb_flags);
178 smp_mb__after_atomic();
179 wake_up_bit(&head->wb_flags, PG_HEADLOCK);
180}
181
182/*
183 * nfs_page_group_sync_on_bit_locked
184 *
185 * must be called with page group lock held
186 */
187static bool
188nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
189{
190 struct nfs_page *head = req->wb_head;
191 struct nfs_page *tmp;
192
193 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
194 WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
195
196 tmp = req->wb_this_page;
197 while (tmp != req) {
198 if (!test_bit(bit, &tmp->wb_flags))
199 return false;
200 tmp = tmp->wb_this_page;
201 }
202
203 /* true! reset all bits */
204 tmp = req;
205 do {
206 clear_bit(bit, &tmp->wb_flags);
207 tmp = tmp->wb_this_page;
208 } while (tmp != req);
209
210 return true;
211}
212
213/*
214 * nfs_page_group_sync_on_bit - set bit on current request, but only
215 * return true if the bit is set for all requests in page group
216 * @req - request in page group
217 * @bit - PG_* bit that is used to sync page group
218 */
219bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
220{
221 bool ret;
222
223 nfs_page_group_lock(req);
224 ret = nfs_page_group_sync_on_bit_locked(req, bit);
225 nfs_page_group_unlock(req);
226
227 return ret;
228}
229
230/*
231 * nfs_page_group_init - Initialize the page group linkage for @req
232 * @req - a new nfs request
233 * @prev - the previous request in page group, or NULL if @req is the first
234 * or only request in the group (the head).
235 */
236static inline void
237nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
238{
239 WARN_ON_ONCE(prev == req);
240
241 if (!prev) {
242 req->wb_head = req;
243 req->wb_this_page = req;
244 } else {
245 WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
246 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
247 req->wb_head = prev->wb_head;
248 req->wb_this_page = prev->wb_this_page;
249 prev->wb_this_page = req;
250
251 /* grab extra ref if head request has extra ref from
252 * the write/commit path to handle handoff between write
253 * and commit lists */
254 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
255 kref_get(&req->wb_kref);
256 }
257}
258
259/*
260 * nfs_page_group_destroy - sync the destruction of page groups
261 * @req - request that no longer needs the page group
262 *
263 * releases the page group reference from each member once all
264 * members have called this function.
265 */
266static void
267nfs_page_group_destroy(struct kref *kref)
268{
269 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
270 struct nfs_page *tmp, *next;
271
272 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
273 return;
274
275 tmp = req;
276 do {
277 next = tmp->wb_this_page;
278 /* unlink and free */
279 tmp->wb_this_page = tmp;
280 tmp->wb_head = tmp;
281 nfs_free_request(tmp);
282 tmp = next;
283 } while (tmp != req);
284}
285
136/** 286/**
137 * nfs_create_request - Create an NFS read/write request. 287 * nfs_create_request - Create an NFS read/write request.
138 * @ctx: open context to use 288 * @ctx: open context to use
139 * @inode: inode to which the request is attached
140 * @page: page to write 289 * @page: page to write
290 * @last: last nfs request created for this page group or NULL if head
141 * @offset: starting offset within the page for the write 291 * @offset: starting offset within the page for the write
142 * @count: number of bytes to read/write 292 * @count: number of bytes to read/write
143 * 293 *
@@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
146 * User should ensure it is safe to sleep in this function. 296 * User should ensure it is safe to sleep in this function.
147 */ 297 */
148struct nfs_page * 298struct nfs_page *
149nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 299nfs_create_request(struct nfs_open_context *ctx, struct page *page,
150 struct page *page, 300 struct nfs_page *last, unsigned int offset,
151 unsigned int offset, unsigned int count) 301 unsigned int count)
152{ 302{
153 struct nfs_page *req; 303 struct nfs_page *req;
154 struct nfs_lock_context *l_ctx; 304 struct nfs_lock_context *l_ctx;
@@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
180 req->wb_bytes = count; 330 req->wb_bytes = count;
181 req->wb_context = get_nfs_open_context(ctx); 331 req->wb_context = get_nfs_open_context(ctx);
182 kref_init(&req->wb_kref); 332 kref_init(&req->wb_kref);
333 nfs_page_group_init(req, last);
183 return req; 334 return req;
184} 335}
185 336
@@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req)
237 } 388 }
238} 389}
239 390
240
241/** 391/**
242 * nfs_release_request - Release the count on an NFS read/write request 392 * nfs_release_request - Release the count on an NFS read/write request
243 * @req: request to release 393 * @req: request to release
244 * 394 *
245 * Note: Should never be called with the spinlock held! 395 * Note: Should never be called with the spinlock held!
246 */ 396 */
247static void nfs_free_request(struct kref *kref) 397static void nfs_free_request(struct nfs_page *req)
248{ 398{
249 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 399 WARN_ON_ONCE(req->wb_this_page != req);
400
401 /* extra debug: make sure no sync bits are still set */
402 WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
403 WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
404 WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
405 WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
406 WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
250 407
251 /* Release struct file and open context */ 408 /* Release struct file and open context */
252 nfs_clear_request(req); 409 nfs_clear_request(req);
@@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref)
255 412
256void nfs_release_request(struct nfs_page *req) 413void nfs_release_request(struct nfs_page *req)
257{ 414{
258 kref_put(&req->wb_kref, nfs_free_request); 415 kref_put(&req->wb_kref, nfs_page_group_destroy);
259}
260
261static int nfs_wait_bit_uninterruptible(void *word)
262{
263 io_schedule();
264 return 0;
265} 416}
266 417
267/** 418/**
@@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req)
279 TASK_UNINTERRUPTIBLE); 430 TASK_UNINTERRUPTIBLE);
280} 431}
281 432
282bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 433/*
434 * nfs_generic_pg_test - determine if requests can be coalesced
435 * @desc: pointer to descriptor
436 * @prev: previous request in desc, or NULL
437 * @req: this request
438 *
439 * Returns zero if @req can be coalesced into @desc, otherwise it returns
440 * the size of the request.
441 */
442size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
443 struct nfs_page *prev, struct nfs_page *req)
283{ 444{
284 /* 445 if (desc->pg_count > desc->pg_bsize) {
285 * FIXME: ideally we should be able to coalesce all requests 446 /* should never happen */
286 * that are not block boundary aligned, but currently this 447 WARN_ON_ONCE(1);
287 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
288 * since nfs_flush_multi and nfs_pagein_multi assume you
289 * can have only one struct nfs_page.
290 */
291 if (desc->pg_bsize < PAGE_SIZE)
292 return 0; 448 return 0;
449 }
293 450
294 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 451 return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
295} 452}
296EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 453EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
297 454
455static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
456{
457 return container_of(hdr, struct nfs_rw_header, header);
458}
459
460/**
461 * nfs_rw_header_alloc - Allocate a header for a read or write
462 * @ops: Read or write function vector
463 */
464struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
465{
466 struct nfs_rw_header *header = ops->rw_alloc_header();
467
468 if (header) {
469 struct nfs_pgio_header *hdr = &header->header;
470
471 INIT_LIST_HEAD(&hdr->pages);
472 spin_lock_init(&hdr->lock);
473 atomic_set(&hdr->refcnt, 0);
474 hdr->rw_ops = ops;
475 }
476 return header;
477}
478EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
479
480/*
481 * nfs_rw_header_free - Free a read or write header
482 * @hdr: The header to free
483 */
484void nfs_rw_header_free(struct nfs_pgio_header *hdr)
485{
486 hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
487}
488EXPORT_SYMBOL_GPL(nfs_rw_header_free);
489
490/**
491 * nfs_pgio_data_alloc - Allocate pageio data
492 * @hdr: The header making a request
493 * @pagecount: Number of pages to create
494 */
495static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
496 unsigned int pagecount)
497{
498 struct nfs_pgio_data *data, *prealloc;
499
500 prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
501 if (prealloc->header == NULL)
502 data = prealloc;
503 else
504 data = kzalloc(sizeof(*data), GFP_KERNEL);
505 if (!data)
506 goto out;
507
508 if (nfs_pgarray_set(&data->pages, pagecount)) {
509 data->header = hdr;
510 atomic_inc(&hdr->refcnt);
511 } else {
512 if (data != prealloc)
513 kfree(data);
514 data = NULL;
515 }
516out:
517 return data;
518}
519
520/**
521 * nfs_pgio_data_release - Properly free pageio data
522 * @data: The data to release
523 */
524void nfs_pgio_data_release(struct nfs_pgio_data *data)
525{
526 struct nfs_pgio_header *hdr = data->header;
527 struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
528
529 put_nfs_open_context(data->args.context);
530 if (data->pages.pagevec != data->pages.page_array)
531 kfree(data->pages.pagevec);
532 if (data == &pageio_header->rpc_data) {
533 data->header = NULL;
534 data = NULL;
535 }
536 if (atomic_dec_and_test(&hdr->refcnt))
537 hdr->completion_ops->completion(hdr);
538 /* Note: we only free the rpc_task after callbacks are done.
539 * See the comment in rpc_free_task() for why
540 */
541 kfree(data);
542}
543EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
544
545/**
546 * nfs_pgio_rpcsetup - Set up arguments for a pageio call
547 * @data: The pageio data
548 * @count: Number of bytes to read
549 * @offset: Initial offset
550 * @how: How to commit data (writes only)
551 * @cinfo: Commit information for the call (writes only)
552 */
553static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
554 unsigned int count, unsigned int offset,
555 int how, struct nfs_commit_info *cinfo)
556{
557 struct nfs_page *req = data->header->req;
558
559 /* Set up the RPC argument and reply structs
560 * NB: take care not to mess about with data->commit et al. */
561
562 data->args.fh = NFS_FH(data->header->inode);
563 data->args.offset = req_offset(req) + offset;
564 /* pnfs_set_layoutcommit needs this */
565 data->mds_offset = data->args.offset;
566 data->args.pgbase = req->wb_pgbase + offset;
567 data->args.pages = data->pages.pagevec;
568 data->args.count = count;
569 data->args.context = get_nfs_open_context(req->wb_context);
570 data->args.lock_context = req->wb_lock_context;
571 data->args.stable = NFS_UNSTABLE;
572 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
573 case 0:
574 break;
575 case FLUSH_COND_STABLE:
576 if (nfs_reqs_to_commit(cinfo))
577 break;
578 default:
579 data->args.stable = NFS_FILE_SYNC;
580 }
581
582 data->res.fattr = &data->fattr;
583 data->res.count = count;
584 data->res.eof = 0;
585 data->res.verf = &data->verf;
586 nfs_fattr_init(&data->fattr);
587}
588
589/**
590 * nfs_pgio_prepare - Prepare pageio data to go over the wire
591 * @task: The current task
592 * @calldata: pageio data to prepare
593 */
594static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
595{
596 struct nfs_pgio_data *data = calldata;
597 int err;
598 err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
599 if (err)
600 rpc_exit(task, err);
601}
602
603int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
604 const struct rpc_call_ops *call_ops, int how, int flags)
605{
606 struct rpc_task *task;
607 struct rpc_message msg = {
608 .rpc_argp = &data->args,
609 .rpc_resp = &data->res,
610 .rpc_cred = data->header->cred,
611 };
612 struct rpc_task_setup task_setup_data = {
613 .rpc_client = clnt,
614 .task = &data->task,
615 .rpc_message = &msg,
616 .callback_ops = call_ops,
617 .callback_data = data,
618 .workqueue = nfsiod_workqueue,
619 .flags = RPC_TASK_ASYNC | flags,
620 };
621 int ret = 0;
622
623 data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
624
625 dprintk("NFS: %5u initiated pgio call "
626 "(req %s/%llu, %u bytes @ offset %llu)\n",
627 data->task.tk_pid,
628 data->header->inode->i_sb->s_id,
629 (unsigned long long)NFS_FILEID(data->header->inode),
630 data->args.count,
631 (unsigned long long)data->args.offset);
632
633 task = rpc_run_task(&task_setup_data);
634 if (IS_ERR(task)) {
635 ret = PTR_ERR(task);
636 goto out;
637 }
638 if (how & FLUSH_SYNC) {
639 ret = rpc_wait_for_completion_task(task);
640 if (ret == 0)
641 ret = task->tk_status;
642 }
643 rpc_put_task(task);
644out:
645 return ret;
646}
647EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
648
649/**
650 * nfs_pgio_error - Clean up from a pageio error
651 * @desc: IO descriptor
652 * @hdr: pageio header
653 */
654static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
655 struct nfs_pgio_header *hdr)
656{
657 set_bit(NFS_IOHDR_REDO, &hdr->flags);
658 nfs_pgio_data_release(hdr->data);
659 hdr->data = NULL;
660 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
661 return -ENOMEM;
662}
663
664/**
665 * nfs_pgio_release - Release pageio data
666 * @calldata: The pageio data to release
667 */
668static void nfs_pgio_release(void *calldata)
669{
670 struct nfs_pgio_data *data = calldata;
671 if (data->header->rw_ops->rw_release)
672 data->header->rw_ops->rw_release(data);
673 nfs_pgio_data_release(data);
674}
675
298/** 676/**
299 * nfs_pageio_init - initialise a page io descriptor 677 * nfs_pageio_init - initialise a page io descriptor
300 * @desc: pointer to descriptor 678 * @desc: pointer to descriptor
@@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
307 struct inode *inode, 685 struct inode *inode,
308 const struct nfs_pageio_ops *pg_ops, 686 const struct nfs_pageio_ops *pg_ops,
309 const struct nfs_pgio_completion_ops *compl_ops, 687 const struct nfs_pgio_completion_ops *compl_ops,
688 const struct nfs_rw_ops *rw_ops,
310 size_t bsize, 689 size_t bsize,
311 int io_flags) 690 int io_flags)
312{ 691{
@@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
320 desc->pg_inode = inode; 699 desc->pg_inode = inode;
321 desc->pg_ops = pg_ops; 700 desc->pg_ops = pg_ops;
322 desc->pg_completion_ops = compl_ops; 701 desc->pg_completion_ops = compl_ops;
702 desc->pg_rw_ops = rw_ops;
323 desc->pg_ioflags = io_flags; 703 desc->pg_ioflags = io_flags;
324 desc->pg_error = 0; 704 desc->pg_error = 0;
325 desc->pg_lseg = NULL; 705 desc->pg_lseg = NULL;
@@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
328} 708}
329EXPORT_SYMBOL_GPL(nfs_pageio_init); 709EXPORT_SYMBOL_GPL(nfs_pageio_init);
330 710
711/**
712 * nfs_pgio_result - Basic pageio error handling
713 * @task: The task that ran
714 * @calldata: Pageio data to check
715 */
716static void nfs_pgio_result(struct rpc_task *task, void *calldata)
717{
718 struct nfs_pgio_data *data = calldata;
719 struct inode *inode = data->header->inode;
720
721 dprintk("NFS: %s: %5u, (status %d)\n", __func__,
722 task->tk_pid, task->tk_status);
723
724 if (data->header->rw_ops->rw_done(task, data, inode) != 0)
725 return;
726 if (task->tk_status < 0)
727 nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
728 else
729 data->header->rw_ops->rw_result(task, data);
730}
731
732/*
733 * Create an RPC task for the given read or write request and kick it.
734 * The page must have been locked by the caller.
735 *
736 * It may happen that the page we're passed is not marked dirty.
737 * This is the case if nfs_updatepage detects a conflicting request
738 * that has been written but not committed.
739 */
740int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
741 struct nfs_pgio_header *hdr)
742{
743 struct nfs_page *req;
744 struct page **pages;
745 struct nfs_pgio_data *data;
746 struct list_head *head = &desc->pg_list;
747 struct nfs_commit_info cinfo;
748
749 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
750 desc->pg_count));
751 if (!data)
752 return nfs_pgio_error(desc, hdr);
753
754 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
755 pages = data->pages.pagevec;
756 while (!list_empty(head)) {
757 req = nfs_list_entry(head->next);
758 nfs_list_remove_request(req);
759 nfs_list_add_request(req, &hdr->pages);
760 *pages++ = req->wb_page;
761 }
762
763 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
764 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
765 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
766
767 /* Set up the argument struct */
768 nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
769 hdr->data = data;
770 desc->pg_rpc_callops = &nfs_pgio_common_ops;
771 return 0;
772}
773EXPORT_SYMBOL_GPL(nfs_generic_pgio);
774
775static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
776{
777 struct nfs_rw_header *rw_hdr;
778 struct nfs_pgio_header *hdr;
779 int ret;
780
781 rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
782 if (!rw_hdr) {
783 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
784 return -ENOMEM;
785 }
786 hdr = &rw_hdr->header;
787 nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
788 atomic_inc(&hdr->refcnt);
789 ret = nfs_generic_pgio(desc, hdr);
790 if (ret == 0)
791 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
792 hdr->data, desc->pg_rpc_callops,
793 desc->pg_ioflags, 0);
794 if (atomic_dec_and_test(&hdr->refcnt))
795 hdr->completion_ops->completion(hdr);
796 return ret;
797}
798
331static bool nfs_match_open_context(const struct nfs_open_context *ctx1, 799static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
332 const struct nfs_open_context *ctx2) 800 const struct nfs_open_context *ctx2)
333{ 801{
@@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
356 struct nfs_page *req, 824 struct nfs_page *req,
357 struct nfs_pageio_descriptor *pgio) 825 struct nfs_pageio_descriptor *pgio)
358{ 826{
359 if (!nfs_match_open_context(req->wb_context, prev->wb_context)) 827 size_t size;
360 return false; 828
361 if (req->wb_context->dentry->d_inode->i_flock != NULL && 829 if (prev) {
362 !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) 830 if (!nfs_match_open_context(req->wb_context, prev->wb_context))
363 return false; 831 return false;
364 if (req->wb_pgbase != 0) 832 if (req->wb_context->dentry->d_inode->i_flock != NULL &&
365 return false; 833 !nfs_match_lock_context(req->wb_lock_context,
366 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 834 prev->wb_lock_context))
367 return false; 835 return false;
368 if (req_offset(req) != req_offset(prev) + prev->wb_bytes) 836 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
369 return false; 837 return false;
370 return pgio->pg_ops->pg_test(pgio, prev, req); 838 }
839 size = pgio->pg_ops->pg_test(pgio, prev, req);
840 WARN_ON_ONCE(size > req->wb_bytes);
841 if (size && size < req->wb_bytes)
842 req->wb_bytes = size;
843 return size > 0;
371} 844}
372 845
373/** 846/**
@@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
381static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 854static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
382 struct nfs_page *req) 855 struct nfs_page *req)
383{ 856{
857 struct nfs_page *prev = NULL;
384 if (desc->pg_count != 0) { 858 if (desc->pg_count != 0) {
385 struct nfs_page *prev;
386
387 prev = nfs_list_entry(desc->pg_list.prev); 859 prev = nfs_list_entry(desc->pg_list.prev);
388 if (!nfs_can_coalesce_requests(prev, req, desc))
389 return 0;
390 } else { 860 } else {
391 if (desc->pg_ops->pg_init) 861 if (desc->pg_ops->pg_init)
392 desc->pg_ops->pg_init(desc, req); 862 desc->pg_ops->pg_init(desc, req);
393 desc->pg_base = req->wb_pgbase; 863 desc->pg_base = req->wb_pgbase;
394 } 864 }
865 if (!nfs_can_coalesce_requests(prev, req, desc))
866 return 0;
395 nfs_list_remove_request(req); 867 nfs_list_remove_request(req);
396 nfs_list_add_request(req, &desc->pg_list); 868 nfs_list_add_request(req, &desc->pg_list);
397 desc->pg_count += req->wb_bytes; 869 desc->pg_count += req->wb_bytes;
@@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
421 * @desc: destination io descriptor 893 * @desc: destination io descriptor
422 * @req: request 894 * @req: request
423 * 895 *
896 * This may split a request into subrequests which are all part of the
897 * same page group.
898 *
424 * Returns true if the request 'req' was successfully coalesced into the 899 * Returns true if the request 'req' was successfully coalesced into the
425 * existing list of pages 'desc'. 900 * existing list of pages 'desc'.
426 */ 901 */
427static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 902static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
428 struct nfs_page *req) 903 struct nfs_page *req)
429{ 904{
430 while (!nfs_pageio_do_add_request(desc, req)) { 905 struct nfs_page *subreq;
431 desc->pg_moreio = 1; 906 unsigned int bytes_left = 0;
432 nfs_pageio_doio(desc); 907 unsigned int offset, pgbase;
433 if (desc->pg_error < 0) 908
434 return 0; 909 nfs_page_group_lock(req);
435 desc->pg_moreio = 0; 910
436 if (desc->pg_recoalesce) 911 subreq = req;
437 return 0; 912 bytes_left = subreq->wb_bytes;
438 } 913 offset = subreq->wb_offset;
914 pgbase = subreq->wb_pgbase;
915
916 do {
917 if (!nfs_pageio_do_add_request(desc, subreq)) {
918 /* make sure pg_test call(s) did nothing */
919 WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
920 WARN_ON_ONCE(subreq->wb_offset != offset);
921 WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
922
923 nfs_page_group_unlock(req);
924 desc->pg_moreio = 1;
925 nfs_pageio_doio(desc);
926 if (desc->pg_error < 0)
927 return 0;
928 desc->pg_moreio = 0;
929 if (desc->pg_recoalesce)
930 return 0;
931 /* retry add_request for this subreq */
932 nfs_page_group_lock(req);
933 continue;
934 }
935
936 /* check for buggy pg_test call(s) */
937 WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
938 WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
939 WARN_ON_ONCE(subreq->wb_bytes == 0);
940
941 bytes_left -= subreq->wb_bytes;
942 offset += subreq->wb_bytes;
943 pgbase += subreq->wb_bytes;
944
945 if (bytes_left) {
946 subreq = nfs_create_request(req->wb_context,
947 req->wb_page,
948 subreq, pgbase, bytes_left);
949 if (IS_ERR(subreq))
950 goto err_ptr;
951 nfs_lock_request(subreq);
952 subreq->wb_offset = offset;
953 subreq->wb_index = req->wb_index;
954 }
955 } while (bytes_left > 0);
956
957 nfs_page_group_unlock(req);
439 return 1; 958 return 1;
959err_ptr:
960 desc->pg_error = PTR_ERR(subreq);
961 nfs_page_group_unlock(req);
962 return 0;
440} 963}
441 964
442static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 965static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void)
535 kmem_cache_destroy(nfs_page_cachep); 1058 kmem_cache_destroy(nfs_page_cachep);
536} 1059}
537 1060
1061static const struct rpc_call_ops nfs_pgio_common_ops = {
1062 .rpc_call_prepare = nfs_pgio_prepare,
1063 .rpc_call_done = nfs_pgio_result,
1064 .rpc_release = nfs_pgio_release,
1065};
1066
1067const struct nfs_pageio_ops nfs_pgio_rw_ops = {
1068 .pg_test = nfs_generic_pg_test,
1069 .pg_doio = nfs_generic_pg_pgios,
1070};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fd9536e494bc..6fdcd233d6f7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1388 1388
1389 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1389 WARN_ON_ONCE(pgio->pg_lseg != NULL);
1390 1390
1391 if (req->wb_offset != req->wb_pgbase) {
1392 nfs_pageio_reset_read_mds(pgio);
1393 return;
1394 }
1395
1396 if (pgio->pg_dreq == NULL) 1391 if (pgio->pg_dreq == NULL)
1397 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1392 rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
1398 else 1393 else
@@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1417{ 1412{
1418 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1413 WARN_ON_ONCE(pgio->pg_lseg != NULL);
1419 1414
1420 if (req->wb_offset != req->wb_pgbase) {
1421 nfs_pageio_reset_write_mds(pgio);
1422 return;
1423 }
1424
1425 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1415 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1426 req->wb_context, 1416 req->wb_context,
1427 req_offset(req), 1417 req_offset(req),
@@ -1434,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1434} 1424}
1435EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1425EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1436 1426
1437void 1427/*
1438pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, 1428 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1439 const struct nfs_pgio_completion_ops *compl_ops) 1429 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1440{ 1430 */
1441 struct nfs_server *server = NFS_SERVER(inode); 1431size_t
1442 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1443
1444 if (ld == NULL)
1445 nfs_pageio_init_read(pgio, inode, compl_ops);
1446 else
1447 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
1448}
1449
1450void
1451pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1452 int ioflags,
1453 const struct nfs_pgio_completion_ops *compl_ops)
1454{
1455 struct nfs_server *server = NFS_SERVER(inode);
1456 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1457
1458 if (ld == NULL)
1459 nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
1460 else
1461 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
1462}
1463
1464bool
1465pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1432pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1466 struct nfs_page *req) 1433 struct nfs_page *req)
1467{ 1434{
1468 if (pgio->pg_lseg == NULL) 1435 unsigned int size;
1469 return nfs_generic_pg_test(pgio, prev, req); 1436 u64 seg_end, req_start, seg_left;
1437
1438 size = nfs_generic_pg_test(pgio, prev, req);
1439 if (!size)
1440 return 0;
1470 1441
1471 /* 1442 /*
1472 * Test if a nfs_page is fully contained in the pnfs_layout_range. 1443 * 'size' contains the number of bytes left in the current page (up
1473 * Note that this test makes several assumptions: 1444 * to the original size asked for in @req->wb_bytes).
1474 * - that the previous nfs_page in the struct nfs_pageio_descriptor 1445 *
1475 * is known to lie within the range. 1446 * Calculate how many bytes are left in the layout segment
1476 * - that the nfs_page being tested is known to be contiguous with the 1447 * and if there are less bytes than 'size', return that instead.
1477 * previous nfs_page.
1478 * - Layout ranges are page aligned, so we only have to test the
1479 * start offset of the request.
1480 * 1448 *
1481 * Please also note that 'end_offset' is actually the offset of the 1449 * Please also note that 'end_offset' is actually the offset of the
1482 * first byte that lies outside the pnfs_layout_range. FIXME? 1450 * first byte that lies outside the pnfs_layout_range. FIXME?
1483 * 1451 *
1484 */ 1452 */
1485 return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, 1453 if (pgio->pg_lseg) {
1486 pgio->pg_lseg->pls_range.length); 1454 seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
1455 pgio->pg_lseg->pls_range.length);
1456 req_start = req_offset(req);
1457 WARN_ON_ONCE(req_start > seg_end);
1458 /* start of request is past the last byte of this segment */
1459 if (req_start >= seg_end)
1460 return 0;
1461
1462 /* adjust 'size' iff there are fewer bytes left in the
1463 * segment than what nfs_generic_pg_test returned */
1464 seg_left = seg_end - req_start;
1465 if (seg_left < size)
1466 size = (unsigned int)seg_left;
1467 }
1468
1469 return size;
1487} 1470}
1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1471EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1489 1472
@@ -1496,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
1496 LIST_HEAD(failed); 1479 LIST_HEAD(failed);
1497 1480
1498 /* Resend all requests through the MDS */ 1481 /* Resend all requests through the MDS */
1499 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); 1482 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
1500 pgio.pg_dreq = dreq; 1483 pgio.pg_dreq = dreq;
1501 while (!list_empty(head)) { 1484 while (!list_empty(head)) {
1502 struct nfs_page *req = nfs_list_entry(head->next); 1485 struct nfs_page *req = nfs_list_entry(head->next);
@@ -1519,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
1519} 1502}
1520EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1503EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1521 1504
1522static void pnfs_ld_handle_write_error(struct nfs_write_data *data) 1505static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
1523{ 1506{
1524 struct nfs_pgio_header *hdr = data->header; 1507 struct nfs_pgio_header *hdr = data->header;
1525 1508
@@ -1538,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1538/* 1521/*
1539 * Called by non rpc-based layout drivers 1522 * Called by non rpc-based layout drivers
1540 */ 1523 */
1541void pnfs_ld_write_done(struct nfs_write_data *data) 1524void pnfs_ld_write_done(struct nfs_pgio_data *data)
1542{ 1525{
1543 struct nfs_pgio_header *hdr = data->header; 1526 struct nfs_pgio_header *hdr = data->header;
1544 1527
@@ -1554,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1554 1537
1555static void 1538static void
1556pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1539pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1557 struct nfs_write_data *data) 1540 struct nfs_pgio_data *data)
1558{ 1541{
1559 struct nfs_pgio_header *hdr = data->header; 1542 struct nfs_pgio_header *hdr = data->header;
1560 1543
@@ -1563,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1563 nfs_pageio_reset_write_mds(desc); 1546 nfs_pageio_reset_write_mds(desc);
1564 desc->pg_recoalesce = 1; 1547 desc->pg_recoalesce = 1;
1565 } 1548 }
1566 nfs_writedata_release(data); 1549 nfs_pgio_data_release(data);
1567} 1550}
1568 1551
1569static enum pnfs_try_status 1552static enum pnfs_try_status
1570pnfs_try_to_write_data(struct nfs_write_data *wdata, 1553pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
1571 const struct rpc_call_ops *call_ops, 1554 const struct rpc_call_ops *call_ops,
1572 struct pnfs_layout_segment *lseg, 1555 struct pnfs_layout_segment *lseg,
1573 int how) 1556 int how)
@@ -1589,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1589} 1572}
1590 1573
1591static void 1574static void
1592pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) 1575pnfs_do_write(struct nfs_pageio_descriptor *desc,
1576 struct nfs_pgio_header *hdr, int how)
1593{ 1577{
1594 struct nfs_write_data *data; 1578 struct nfs_pgio_data *data = hdr->data;
1595 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1579 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1596 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1580 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1581 enum pnfs_try_status trypnfs;
1597 1582
1598 desc->pg_lseg = NULL; 1583 desc->pg_lseg = NULL;
1599 while (!list_empty(head)) { 1584 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1600 enum pnfs_try_status trypnfs; 1585 if (trypnfs == PNFS_NOT_ATTEMPTED)
1601 1586 pnfs_write_through_mds(desc, data);
1602 data = list_first_entry(head, struct nfs_write_data, list);
1603 list_del_init(&data->list);
1604
1605 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1606 if (trypnfs == PNFS_NOT_ATTEMPTED)
1607 pnfs_write_through_mds(desc, data);
1608 }
1609 pnfs_put_lseg(lseg); 1587 pnfs_put_lseg(lseg);
1610} 1588}
1611 1589
1612static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1590static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1613{ 1591{
1614 pnfs_put_lseg(hdr->lseg); 1592 pnfs_put_lseg(hdr->lseg);
1615 nfs_writehdr_free(hdr); 1593 nfs_rw_header_free(hdr);
1616} 1594}
1617EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1595EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1618 1596
1619int 1597int
1620pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1598pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1621{ 1599{
1622 struct nfs_write_header *whdr; 1600 struct nfs_rw_header *whdr;
1623 struct nfs_pgio_header *hdr; 1601 struct nfs_pgio_header *hdr;
1624 int ret; 1602 int ret;
1625 1603
1626 whdr = nfs_writehdr_alloc(); 1604 whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
1627 if (!whdr) { 1605 if (!whdr) {
1628 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1606 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1629 pnfs_put_lseg(desc->pg_lseg); 1607 pnfs_put_lseg(desc->pg_lseg);
@@ -1634,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1634 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1612 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1635 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1613 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1636 atomic_inc(&hdr->refcnt); 1614 atomic_inc(&hdr->refcnt);
1637 ret = nfs_generic_flush(desc, hdr); 1615 ret = nfs_generic_pgio(desc, hdr);
1638 if (ret != 0) { 1616 if (ret != 0) {
1639 pnfs_put_lseg(desc->pg_lseg); 1617 pnfs_put_lseg(desc->pg_lseg);
1640 desc->pg_lseg = NULL; 1618 desc->pg_lseg = NULL;
1641 } else 1619 } else
1642 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); 1620 pnfs_do_write(desc, hdr, desc->pg_ioflags);
1643 if (atomic_dec_and_test(&hdr->refcnt)) 1621 if (atomic_dec_and_test(&hdr->refcnt))
1644 hdr->completion_ops->completion(hdr); 1622 hdr->completion_ops->completion(hdr);
1645 return ret; 1623 return ret;
@@ -1655,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
1655 LIST_HEAD(failed); 1633 LIST_HEAD(failed);
1656 1634
1657 /* Resend all requests through the MDS */ 1635 /* Resend all requests through the MDS */
1658 nfs_pageio_init_read(&pgio, inode, compl_ops); 1636 nfs_pageio_init_read(&pgio, inode, true, compl_ops);
1659 pgio.pg_dreq = dreq; 1637 pgio.pg_dreq = dreq;
1660 while (!list_empty(head)) { 1638 while (!list_empty(head)) {
1661 struct nfs_page *req = nfs_list_entry(head->next); 1639 struct nfs_page *req = nfs_list_entry(head->next);
@@ -1674,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
1674} 1652}
1675EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1653EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1676 1654
1677static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1655static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
1678{ 1656{
1679 struct nfs_pgio_header *hdr = data->header; 1657 struct nfs_pgio_header *hdr = data->header;
1680 1658
@@ -1693,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1693/* 1671/*
1694 * Called by non rpc-based layout drivers 1672 * Called by non rpc-based layout drivers
1695 */ 1673 */
1696void pnfs_ld_read_done(struct nfs_read_data *data) 1674void pnfs_ld_read_done(struct nfs_pgio_data *data)
1697{ 1675{
1698 struct nfs_pgio_header *hdr = data->header; 1676 struct nfs_pgio_header *hdr = data->header;
1699 1677
@@ -1709,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1709 1687
1710static void 1688static void
1711pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1689pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1712 struct nfs_read_data *data) 1690 struct nfs_pgio_data *data)
1713{ 1691{
1714 struct nfs_pgio_header *hdr = data->header; 1692 struct nfs_pgio_header *hdr = data->header;
1715 1693
@@ -1718,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1718 nfs_pageio_reset_read_mds(desc); 1696 nfs_pageio_reset_read_mds(desc);
1719 desc->pg_recoalesce = 1; 1697 desc->pg_recoalesce = 1;
1720 } 1698 }
1721 nfs_readdata_release(data); 1699 nfs_pgio_data_release(data);
1722} 1700}
1723 1701
1724/* 1702/*
1725 * Call the appropriate parallel I/O subsystem read function. 1703 * Call the appropriate parallel I/O subsystem read function.
1726 */ 1704 */
1727static enum pnfs_try_status 1705static enum pnfs_try_status
1728pnfs_try_to_read_data(struct nfs_read_data *rdata, 1706pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1729 const struct rpc_call_ops *call_ops, 1707 const struct rpc_call_ops *call_ops,
1730 struct pnfs_layout_segment *lseg) 1708 struct pnfs_layout_segment *lseg)
1731{ 1709{
@@ -1747,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1747} 1725}
1748 1726
1749static void 1727static void
1750pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) 1728pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
1751{ 1729{
1752 struct nfs_read_data *data; 1730 struct nfs_pgio_data *data = hdr->data;
1753 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1731 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1754 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1732 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1733 enum pnfs_try_status trypnfs;
1755 1734
1756 desc->pg_lseg = NULL; 1735 desc->pg_lseg = NULL;
1757 while (!list_empty(head)) { 1736 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1758 enum pnfs_try_status trypnfs; 1737 if (trypnfs == PNFS_NOT_ATTEMPTED)
1759 1738 pnfs_read_through_mds(desc, data);
1760 data = list_first_entry(head, struct nfs_read_data, list);
1761 list_del_init(&data->list);
1762
1763 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1764 if (trypnfs == PNFS_NOT_ATTEMPTED)
1765 pnfs_read_through_mds(desc, data);
1766 }
1767 pnfs_put_lseg(lseg); 1739 pnfs_put_lseg(lseg);
1768} 1740}
1769 1741
1770static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1742static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1771{ 1743{
1772 pnfs_put_lseg(hdr->lseg); 1744 pnfs_put_lseg(hdr->lseg);
1773 nfs_readhdr_free(hdr); 1745 nfs_rw_header_free(hdr);
1774} 1746}
1775EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1747EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1776 1748
1777int 1749int
1778pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1750pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1779{ 1751{
1780 struct nfs_read_header *rhdr; 1752 struct nfs_rw_header *rhdr;
1781 struct nfs_pgio_header *hdr; 1753 struct nfs_pgio_header *hdr;
1782 int ret; 1754 int ret;
1783 1755
1784 rhdr = nfs_readhdr_alloc(); 1756 rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
1785 if (!rhdr) { 1757 if (!rhdr) {
1786 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1758 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1787 ret = -ENOMEM; 1759 ret = -ENOMEM;
@@ -1793,12 +1765,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1793 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1765 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1794 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1766 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1795 atomic_inc(&hdr->refcnt); 1767 atomic_inc(&hdr->refcnt);
1796 ret = nfs_generic_pagein(desc, hdr); 1768 ret = nfs_generic_pgio(desc, hdr);
1797 if (ret != 0) { 1769 if (ret != 0) {
1798 pnfs_put_lseg(desc->pg_lseg); 1770 pnfs_put_lseg(desc->pg_lseg);
1799 desc->pg_lseg = NULL; 1771 desc->pg_lseg = NULL;
1800 } else 1772 } else
1801 pnfs_do_multiple_reads(desc, &hdr->rpc_list); 1773 pnfs_do_read(desc, hdr);
1802 if (atomic_dec_and_test(&hdr->refcnt)) 1774 if (atomic_dec_and_test(&hdr->refcnt))
1803 hdr->completion_ops->completion(hdr); 1775 hdr->completion_ops->completion(hdr);
1804 return ret; 1776 return ret;
@@ -1848,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1848EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1820EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1849 1821
1850void 1822void
1851pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1823pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
1852{ 1824{
1853 struct nfs_pgio_header *hdr = wdata->header; 1825 struct nfs_pgio_header *hdr = wdata->header;
1854 struct inode *inode = hdr->inode; 1826 struct inode *inode = hdr->inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c3058a076596..4fb309a2b4c4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
115 */ 115 */
116 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); 116 enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
117 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); 117 enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
118 118
119 void (*free_deviceid_node) (struct nfs4_deviceid_node *); 119 void (*free_deviceid_node) (struct nfs4_deviceid_node *);
120 120
@@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
181void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 181void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
182 182
183void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
184 const struct nfs_pgio_completion_ops *);
185void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
186 int, const struct nfs_pgio_completion_ops *);
187
188void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 183void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
189void unset_pnfs_layoutdriver(struct nfs_server *); 184void unset_pnfs_layoutdriver(struct nfs_server *);
190void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); 185void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
192void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 187void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
193 struct nfs_page *req, u64 wb_size); 188 struct nfs_page *req, u64 wb_size);
194int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 189int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
195bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 190size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
191 struct nfs_page *prev, struct nfs_page *req);
196void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); 192void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
197struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); 193struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
198void pnfs_free_lseg_list(struct list_head *tmp_list); 194void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
217void pnfs_roc_release(struct inode *ino); 213void pnfs_roc_release(struct inode *ino);
218void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 214void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
219bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 215bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
220void pnfs_set_layoutcommit(struct nfs_write_data *wdata); 216void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
221void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 217void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
222int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 218int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
223int _pnfs_return_layout(struct inode *); 219int _pnfs_return_layout(struct inode *);
224int pnfs_commit_and_return_layout(struct inode *); 220int pnfs_commit_and_return_layout(struct inode *);
225void pnfs_ld_write_done(struct nfs_write_data *); 221void pnfs_ld_write_done(struct nfs_pgio_data *);
226void pnfs_ld_read_done(struct nfs_read_data *); 222void pnfs_ld_read_done(struct nfs_pgio_data *);
227struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 223struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
228 struct nfs_open_context *ctx, 224 struct nfs_open_context *ctx,
229 loff_t pos, 225 loff_t pos,
@@ -461,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
461{ 457{
462} 458}
463 459
464static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
465 const struct nfs_pgio_completion_ops *compl_ops)
466{
467 nfs_pageio_init_read(pgio, inode, compl_ops);
468}
469
470static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
471 const struct nfs_pgio_completion_ops *compl_ops)
472{
473 nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
474}
475
476static inline int 460static inline int
477pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, 461pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
478 struct nfs_commit_info *cinfo) 462 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e8b034..c171ce1a8a30 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
578 return 0; 578 return 0;
579} 579}
580 580
581static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
582{ 582{
583 struct inode *inode = data->header->inode; 583 struct inode *inode = data->header->inode;
584 584
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
594 return 0; 594 return 0;
595} 595}
596 596
597static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 597static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
598{ 598{
599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
600} 600}
601 601
602static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 602static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
603{ 603{
604 rpc_call_start(task); 604 rpc_call_start(task);
605 return 0; 605 return 0;
606} 606}
607 607
608static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 608static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
609{ 609{
610 struct inode *inode = data->header->inode; 610 struct inode *inode = data->header->inode;
611 611
@@ -614,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
614 return 0; 614 return 0;
615} 615}
616 616
617static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 617static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
618{ 618{
619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
620 data->args.stable = NFS_FILE_SYNC; 620 data->args.stable = NFS_FILE_SYNC;
621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
622} 622}
623 623
624static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
625{
626 rpc_call_start(task);
627 return 0;
628}
629
630static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 624static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
631{ 625{
632 BUG(); 626 BUG();
@@ -734,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
734 .fsinfo = nfs_proc_fsinfo, 728 .fsinfo = nfs_proc_fsinfo,
735 .pathconf = nfs_proc_pathconf, 729 .pathconf = nfs_proc_pathconf,
736 .decode_dirent = nfs2_decode_dirent, 730 .decode_dirent = nfs2_decode_dirent,
731 .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
737 .read_setup = nfs_proc_read_setup, 732 .read_setup = nfs_proc_read_setup,
738 .read_pageio_init = nfs_pageio_init_read,
739 .read_rpc_prepare = nfs_proc_read_rpc_prepare,
740 .read_done = nfs_read_done, 733 .read_done = nfs_read_done,
741 .write_setup = nfs_proc_write_setup, 734 .write_setup = nfs_proc_write_setup,
742 .write_pageio_init = nfs_pageio_init_write,
743 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
744 .write_done = nfs_write_done, 735 .write_done = nfs_write_done,
745 .commit_setup = nfs_proc_commit_setup, 736 .commit_setup = nfs_proc_commit_setup,
746 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, 737 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedda14bb..e818a475ca64 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,85 +24,24 @@
24#include "internal.h" 24#include "internal.h"
25#include "iostat.h" 25#include "iostat.h"
26#include "fscache.h" 26#include "fscache.h"
27#include "pnfs.h"
27 28
28#define NFSDBG_FACILITY NFSDBG_PAGECACHE 29#define NFSDBG_FACILITY NFSDBG_PAGECACHE
29 30
30static const struct nfs_pageio_ops nfs_pageio_read_ops;
31static const struct rpc_call_ops nfs_read_common_ops;
32static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; 31static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
32static const struct nfs_rw_ops nfs_rw_read_ops;
33 33
34static struct kmem_cache *nfs_rdata_cachep; 34static struct kmem_cache *nfs_rdata_cachep;
35 35
36struct nfs_read_header *nfs_readhdr_alloc(void) 36static struct nfs_rw_header *nfs_readhdr_alloc(void)
37{ 37{
38 struct nfs_read_header *rhdr; 38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39
40 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
41 if (rhdr) {
42 struct nfs_pgio_header *hdr = &rhdr->header;
43
44 INIT_LIST_HEAD(&hdr->pages);
45 INIT_LIST_HEAD(&hdr->rpc_list);
46 spin_lock_init(&hdr->lock);
47 atomic_set(&hdr->refcnt, 0);
48 }
49 return rhdr;
50} 39}
51EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
52 40
53static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, 41static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
54 unsigned int pagecount)
55{ 42{
56 struct nfs_read_data *data, *prealloc;
57
58 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
59 if (prealloc->header == NULL)
60 data = prealloc;
61 else
62 data = kzalloc(sizeof(*data), GFP_KERNEL);
63 if (!data)
64 goto out;
65
66 if (nfs_pgarray_set(&data->pages, pagecount)) {
67 data->header = hdr;
68 atomic_inc(&hdr->refcnt);
69 } else {
70 if (data != prealloc)
71 kfree(data);
72 data = NULL;
73 }
74out:
75 return data;
76}
77
78void nfs_readhdr_free(struct nfs_pgio_header *hdr)
79{
80 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
81
82 kmem_cache_free(nfs_rdata_cachep, rhdr); 43 kmem_cache_free(nfs_rdata_cachep, rhdr);
83} 44}
84EXPORT_SYMBOL_GPL(nfs_readhdr_free);
85
86void nfs_readdata_release(struct nfs_read_data *rdata)
87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
91 put_nfs_open_context(rdata->args.context);
92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata == &read_header->rpc_data) {
95 rdata->header = NULL;
96 rdata = NULL;
97 }
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
100 /* Note: we only free the rpc_task after callbacks are done.
101 * See the comment in rpc_free_task() for why
102 */
103 kfree(rdata);
104}
105EXPORT_SYMBOL_GPL(nfs_readdata_release);
106 45
107static 46static
108int nfs_return_empty_page(struct page *page) 47int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page)
114} 53}
115 54
116void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 55void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
117 struct inode *inode, 56 struct inode *inode, bool force_mds,
118 const struct nfs_pgio_completion_ops *compl_ops) 57 const struct nfs_pgio_completion_ops *compl_ops)
119{ 58{
120 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, 59 struct nfs_server *server = NFS_SERVER(inode);
121 NFS_SERVER(inode)->rsize, 0); 60 const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
61
62#ifdef CONFIG_NFS_V4_1
63 if (server->pnfs_curr_ld && !force_mds)
64 pg_ops = server->pnfs_curr_ld->pg_read_ops;
65#endif
66 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
67 server->rsize, 0);
122} 68}
123EXPORT_SYMBOL_GPL(nfs_pageio_init_read); 69EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
124 70
125void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 71void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
126{ 72{
127 pgio->pg_ops = &nfs_pageio_read_ops; 73 pgio->pg_ops = &nfs_pgio_rw_ops;
128 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 74 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
129} 75}
130EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 76EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
139 len = nfs_page_length(page); 85 len = nfs_page_length(page);
140 if (len == 0) 86 if (len == 0)
141 return nfs_return_empty_page(page); 87 return nfs_return_empty_page(page);
142 new = nfs_create_request(ctx, inode, page, 0, len); 88 new = nfs_create_request(ctx, page, NULL, 0, len);
143 if (IS_ERR(new)) { 89 if (IS_ERR(new)) {
144 unlock_page(page); 90 unlock_page(page);
145 return PTR_ERR(new); 91 return PTR_ERR(new);
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
147 if (len < PAGE_CACHE_SIZE) 93 if (len < PAGE_CACHE_SIZE)
148 zero_user_segment(page, len, PAGE_CACHE_SIZE); 94 zero_user_segment(page, len, PAGE_CACHE_SIZE);
149 95
150 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 96 nfs_pageio_init_read(&pgio, inode, false,
97 &nfs_async_read_completion_ops);
151 nfs_pageio_add_request(&pgio, new); 98 nfs_pageio_add_request(&pgio, new);
152 nfs_pageio_complete(&pgio); 99 nfs_pageio_complete(&pgio);
153 NFS_I(inode)->read_io += pgio.pg_bytes_written; 100 NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req)
158{ 105{
159 struct inode *d_inode = req->wb_context->dentry->d_inode; 106 struct inode *d_inode = req->wb_context->dentry->d_inode;
160 107
161 if (PageUptodate(req->wb_page)) 108 dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
162 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 109 (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
110 (long long)req_offset(req));
163 111
164 unlock_page(req->wb_page); 112 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
113 if (PageUptodate(req->wb_page))
114 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
115
116 unlock_page(req->wb_page);
117 }
165 118
166 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", 119 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
167 req->wb_context->dentry->d_inode->i_sb->s_id, 120 req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -171,7 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req)
171 nfs_release_request(req); 124 nfs_release_request(req);
172} 125}
173 126
174/* Note io was page aligned */ 127static void nfs_page_group_set_uptodate(struct nfs_page *req)
128{
129 if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
130 SetPageUptodate(req->wb_page);
131}
132
175static void nfs_read_completion(struct nfs_pgio_header *hdr) 133static void nfs_read_completion(struct nfs_pgio_header *hdr)
176{ 134{
177 unsigned long bytes = 0; 135 unsigned long bytes = 0;
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
181 while (!list_empty(&hdr->pages)) { 139 while (!list_empty(&hdr->pages)) {
182 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 140 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
183 struct page *page = req->wb_page; 141 struct page *page = req->wb_page;
142 unsigned long start = req->wb_pgbase;
143 unsigned long end = req->wb_pgbase + req->wb_bytes;
184 144
185 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { 145 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
186 if (bytes > hdr->good_bytes) 146 /* note: regions of the page not covered by a
187 zero_user(page, 0, PAGE_SIZE); 147 * request are zeroed in nfs_readpage_async /
188 else if (hdr->good_bytes - bytes < PAGE_SIZE) 148 * readpage_async_filler */
189 zero_user_segment(page, 149 if (bytes > hdr->good_bytes) {
190 hdr->good_bytes & ~PAGE_MASK, 150 /* nothing in this request was good, so zero
191 PAGE_SIZE); 151 * the full extent of the request */
152 zero_user_segment(page, start, end);
153
154 } else if (hdr->good_bytes - bytes < req->wb_bytes) {
155 /* part of this request has good bytes, but
156 * not all. zero the bad bytes */
157 start += hdr->good_bytes - bytes;
158 WARN_ON(start < req->wb_pgbase);
159 zero_user_segment(page, start, end);
160 }
192 } 161 }
193 bytes += req->wb_bytes; 162 bytes += req->wb_bytes;
194 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 163 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
195 if (bytes <= hdr->good_bytes) 164 if (bytes <= hdr->good_bytes)
196 SetPageUptodate(page); 165 nfs_page_group_set_uptodate(req);
197 } else 166 } else
198 SetPageUptodate(page); 167 nfs_page_group_set_uptodate(req);
199 nfs_list_remove_request(req); 168 nfs_list_remove_request(req);
200 nfs_readpage_release(req); 169 nfs_readpage_release(req);
201 } 170 }
@@ -203,95 +172,14 @@ out:
203 hdr->release(hdr); 172 hdr->release(hdr);
204} 173}
205 174
206int nfs_initiate_read(struct rpc_clnt *clnt, 175static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
207 struct nfs_read_data *data, 176 struct rpc_task_setup *task_setup_data, int how)
208 const struct rpc_call_ops *call_ops, int flags)
209{ 177{
210 struct inode *inode = data->header->inode; 178 struct inode *inode = data->header->inode;
211 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 179 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
212 struct rpc_task *task;
213 struct rpc_message msg = {
214 .rpc_argp = &data->args,
215 .rpc_resp = &data->res,
216 .rpc_cred = data->header->cred,
217 };
218 struct rpc_task_setup task_setup_data = {
219 .task = &data->task,
220 .rpc_client = clnt,
221 .rpc_message = &msg,
222 .callback_ops = call_ops,
223 .callback_data = data,
224 .workqueue = nfsiod_workqueue,
225 .flags = RPC_TASK_ASYNC | swap_flags | flags,
226 };
227 180
228 /* Set up the initial task struct. */ 181 task_setup_data->flags |= swap_flags;
229 NFS_PROTO(inode)->read_setup(data, &msg); 182 NFS_PROTO(inode)->read_setup(data, msg);
230
231 dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
232 "offset %llu)\n",
233 data->task.tk_pid,
234 inode->i_sb->s_id,
235 (unsigned long long)NFS_FILEID(inode),
236 data->args.count,
237 (unsigned long long)data->args.offset);
238
239 task = rpc_run_task(&task_setup_data);
240 if (IS_ERR(task))
241 return PTR_ERR(task);
242 rpc_put_task(task);
243 return 0;
244}
245EXPORT_SYMBOL_GPL(nfs_initiate_read);
246
247/*
248 * Set up the NFS read request struct
249 */
250static void nfs_read_rpcsetup(struct nfs_read_data *data,
251 unsigned int count, unsigned int offset)
252{
253 struct nfs_page *req = data->header->req;
254
255 data->args.fh = NFS_FH(data->header->inode);
256 data->args.offset = req_offset(req) + offset;
257 data->args.pgbase = req->wb_pgbase + offset;
258 data->args.pages = data->pages.pagevec;
259 data->args.count = count;
260 data->args.context = get_nfs_open_context(req->wb_context);
261 data->args.lock_context = req->wb_lock_context;
262
263 data->res.fattr = &data->fattr;
264 data->res.count = count;
265 data->res.eof = 0;
266 nfs_fattr_init(&data->fattr);
267}
268
269static int nfs_do_read(struct nfs_read_data *data,
270 const struct rpc_call_ops *call_ops)
271{
272 struct inode *inode = data->header->inode;
273
274 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
275}
276
277static int
278nfs_do_multiple_reads(struct list_head *head,
279 const struct rpc_call_ops *call_ops)
280{
281 struct nfs_read_data *data;
282 int ret = 0;
283
284 while (!list_empty(head)) {
285 int ret2;
286
287 data = list_first_entry(head, struct nfs_read_data, list);
288 list_del_init(&data->list);
289
290 ret2 = nfs_do_read(data, call_ops);
291 if (ret == 0)
292 ret = ret2;
293 }
294 return ret;
295} 183}
296 184
297static void 185static void
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
311 .completion = nfs_read_completion, 199 .completion = nfs_read_completion,
312}; 200};
313 201
314static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
315 struct nfs_pgio_header *hdr)
316{
317 set_bit(NFS_IOHDR_REDO, &hdr->flags);
318 while (!list_empty(&hdr->rpc_list)) {
319 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
320 struct nfs_read_data, list);
321 list_del(&data->list);
322 nfs_readdata_release(data);
323 }
324 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
325}
326
327/*
328 * Generate multiple requests to fill a single page.
329 *
330 * We optimize to reduce the number of read operations on the wire. If we
331 * detect that we're reading a page, or an area of a page, that is past the
332 * end of file, we do not generate NFS read operations but just clear the
333 * parts of the page that would have come back zero from the server anyway.
334 *
335 * We rely on the cached value of i_size to make this determination; another
336 * client can fill pages on the server past our cached end-of-file, but we
337 * won't see the new data until our attribute cache is updated. This is more
338 * or less conventional NFS client behavior.
339 */
340static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
341 struct nfs_pgio_header *hdr)
342{
343 struct nfs_page *req = hdr->req;
344 struct page *page = req->wb_page;
345 struct nfs_read_data *data;
346 size_t rsize = desc->pg_bsize, nbytes;
347 unsigned int offset;
348
349 offset = 0;
350 nbytes = desc->pg_count;
351 do {
352 size_t len = min(nbytes,rsize);
353
354 data = nfs_readdata_alloc(hdr, 1);
355 if (!data) {
356 nfs_pagein_error(desc, hdr);
357 return -ENOMEM;
358 }
359 data->pages.pagevec[0] = page;
360 nfs_read_rpcsetup(data, len, offset);
361 list_add(&data->list, &hdr->rpc_list);
362 nbytes -= len;
363 offset += len;
364 } while (nbytes != 0);
365
366 nfs_list_remove_request(req);
367 nfs_list_add_request(req, &hdr->pages);
368 desc->pg_rpc_callops = &nfs_read_common_ops;
369 return 0;
370}
371
372static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
373 struct nfs_pgio_header *hdr)
374{
375 struct nfs_page *req;
376 struct page **pages;
377 struct nfs_read_data *data;
378 struct list_head *head = &desc->pg_list;
379
380 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
381 desc->pg_count));
382 if (!data) {
383 nfs_pagein_error(desc, hdr);
384 return -ENOMEM;
385 }
386
387 pages = data->pages.pagevec;
388 while (!list_empty(head)) {
389 req = nfs_list_entry(head->next);
390 nfs_list_remove_request(req);
391 nfs_list_add_request(req, &hdr->pages);
392 *pages++ = req->wb_page;
393 }
394
395 nfs_read_rpcsetup(data, desc->pg_count, 0);
396 list_add(&data->list, &hdr->rpc_list);
397 desc->pg_rpc_callops = &nfs_read_common_ops;
398 return 0;
399}
400
401int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
402 struct nfs_pgio_header *hdr)
403{
404 if (desc->pg_bsize < PAGE_CACHE_SIZE)
405 return nfs_pagein_multi(desc, hdr);
406 return nfs_pagein_one(desc, hdr);
407}
408EXPORT_SYMBOL_GPL(nfs_generic_pagein);
409
410static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
411{
412 struct nfs_read_header *rhdr;
413 struct nfs_pgio_header *hdr;
414 int ret;
415
416 rhdr = nfs_readhdr_alloc();
417 if (!rhdr) {
418 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
419 return -ENOMEM;
420 }
421 hdr = &rhdr->header;
422 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
423 atomic_inc(&hdr->refcnt);
424 ret = nfs_generic_pagein(desc, hdr);
425 if (ret == 0)
426 ret = nfs_do_multiple_reads(&hdr->rpc_list,
427 desc->pg_rpc_callops);
428 if (atomic_dec_and_test(&hdr->refcnt))
429 hdr->completion_ops->completion(hdr);
430 return ret;
431}
432
433static const struct nfs_pageio_ops nfs_pageio_read_ops = {
434 .pg_test = nfs_generic_pg_test,
435 .pg_doio = nfs_generic_pg_readpages,
436};
437
438/* 202/*
439 * This is the callback from RPC telling us whether a reply was 203 * This is the callback from RPC telling us whether a reply was
440 * received or some error occurred (timeout or socket shutdown). 204 * received or some error occurred (timeout or socket shutdown).
441 */ 205 */
442int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 206static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
207 struct inode *inode)
443{ 208{
444 struct inode *inode = data->header->inode; 209 int status = NFS_PROTO(inode)->read_done(task, data);
445 int status;
446
447 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
448 task->tk_status);
449
450 status = NFS_PROTO(inode)->read_done(task, data);
451 if (status != 0) 210 if (status != 0)
452 return status; 211 return status;
453 212
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
460 return 0; 219 return 0;
461} 220}
462 221
463static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 222static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
464{ 223{
465 struct nfs_readargs *argp = &data->args; 224 struct nfs_pgio_args *argp = &data->args;
466 struct nfs_readres *resp = &data->res; 225 struct nfs_pgio_res *resp = &data->res;
467 226
468 /* This is a short read! */ 227 /* This is a short read! */
469 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 228 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
480 rpc_restart_call_prepare(task); 239 rpc_restart_call_prepare(task);
481} 240}
482 241
483static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) 242static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
484{ 243{
485 struct nfs_read_data *data = calldata;
486 struct nfs_pgio_header *hdr = data->header; 244 struct nfs_pgio_header *hdr = data->header;
487 245
488 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ 246 if (data->res.eof) {
489 if (nfs_readpage_result(task, data) != 0)
490 return;
491 if (task->tk_status < 0)
492 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
493 else if (data->res.eof) {
494 loff_t bound; 247 loff_t bound;
495 248
496 bound = data->args.offset + data->res.count; 249 bound = data->args.offset + data->res.count;
@@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
505 nfs_readpage_retry(task, data); 258 nfs_readpage_retry(task, data);
506} 259}
507 260
508static void nfs_readpage_release_common(void *calldata)
509{
510 nfs_readdata_release(calldata);
511}
512
513void nfs_read_prepare(struct rpc_task *task, void *calldata)
514{
515 struct nfs_read_data *data = calldata;
516 int err;
517 err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
518 if (err)
519 rpc_exit(task, err);
520}
521
522static const struct rpc_call_ops nfs_read_common_ops = {
523 .rpc_call_prepare = nfs_read_prepare,
524 .rpc_call_done = nfs_readpage_result_common,
525 .rpc_release = nfs_readpage_release_common,
526};
527
528/* 261/*
529 * Read a page over NFS. 262 * Read a page over NFS.
530 * We read the page synchronously in the following case: 263 * We read the page synchronously in the following case:
@@ -592,7 +325,6 @@ static int
592readpage_async_filler(void *data, struct page *page) 325readpage_async_filler(void *data, struct page *page)
593{ 326{
594 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 327 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
595 struct inode *inode = page_file_mapping(page)->host;
596 struct nfs_page *new; 328 struct nfs_page *new;
597 unsigned int len; 329 unsigned int len;
598 int error; 330 int error;
@@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page)
601 if (len == 0) 333 if (len == 0)
602 return nfs_return_empty_page(page); 334 return nfs_return_empty_page(page);
603 335
604 new = nfs_create_request(desc->ctx, inode, page, 0, len); 336 new = nfs_create_request(desc->ctx, page, NULL, 0, len);
605 if (IS_ERR(new)) 337 if (IS_ERR(new))
606 goto out_error; 338 goto out_error;
607 339
@@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
654 if (ret == 0) 386 if (ret == 0)
655 goto read_complete; /* all pages were read */ 387 goto read_complete; /* all pages were read */
656 388
657 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 389 nfs_pageio_init_read(&pgio, inode, false,
390 &nfs_async_read_completion_ops);
658 391
659 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 392 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
660 393
@@ -671,7 +404,7 @@ out:
671int __init nfs_init_readpagecache(void) 404int __init nfs_init_readpagecache(void)
672{ 405{
673 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 406 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
674 sizeof(struct nfs_read_header), 407 sizeof(struct nfs_rw_header),
675 0, SLAB_HWCACHE_ALIGN, 408 0, SLAB_HWCACHE_ALIGN,
676 NULL); 409 NULL);
677 if (nfs_rdata_cachep == NULL) 410 if (nfs_rdata_cachep == NULL)
@@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void)
684{ 417{
685 kmem_cache_destroy(nfs_rdata_cachep); 418 kmem_cache_destroy(nfs_rdata_cachep);
686} 419}
420
421static const struct nfs_rw_ops nfs_rw_read_ops = {
422 .rw_mode = FMODE_READ,
423 .rw_alloc_header = nfs_readhdr_alloc,
424 .rw_free_header = nfs_readhdr_free,
425 .rw_done = nfs_readpage_done,
426 .rw_result = nfs_readpage_result,
427 .rw_initiate = nfs_initiate_read,
428};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2cb56943e232..084af1060d79 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2180,11 +2180,23 @@ out_no_address:
2180 return -EINVAL; 2180 return -EINVAL;
2181} 2181}
2182 2182
2183#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2184 | NFS_MOUNT_SECURE \
2185 | NFS_MOUNT_TCP \
2186 | NFS_MOUNT_VER3 \
2187 | NFS_MOUNT_KERBEROS \
2188 | NFS_MOUNT_NONLM \
2189 | NFS_MOUNT_BROKEN_SUID \
2190 | NFS_MOUNT_STRICTLOCK \
2191 | NFS_MOUNT_UNSHARED \
2192 | NFS_MOUNT_NORESVPORT \
2193 | NFS_MOUNT_LEGACY_INTERFACE)
2194
2183static int 2195static int
2184nfs_compare_remount_data(struct nfs_server *nfss, 2196nfs_compare_remount_data(struct nfs_server *nfss,
2185 struct nfs_parsed_mount_data *data) 2197 struct nfs_parsed_mount_data *data)
2186{ 2198{
2187 if (data->flags != nfss->flags || 2199 if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
2188 data->rsize != nfss->rsize || 2200 data->rsize != nfss->rsize ||
2189 data->wsize != nfss->wsize || 2201 data->wsize != nfss->wsize ||
2190 data->version != nfss->nfs_client->rpc_ops->version || 2202 data->version != nfss->nfs_client->rpc_ops->version ||
@@ -2248,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
2248 data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; 2260 data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
2249 data->version = nfsvers; 2261 data->version = nfsvers;
2250 data->minorversion = nfss->nfs_client->cl_minorversion; 2262 data->minorversion = nfss->nfs_client->cl_minorversion;
2263 data->net = current->nsproxy->net_ns;
2251 memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, 2264 memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
2252 data->nfs_server.addrlen); 2265 data->nfs_server.addrlen);
2253 2266
@@ -2347,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
2347 nfs_initialise_sb(sb); 2360 nfs_initialise_sb(sb);
2348} 2361}
2349 2362
2350#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2351 | NFS_MOUNT_SECURE \
2352 | NFS_MOUNT_TCP \
2353 | NFS_MOUNT_VER3 \
2354 | NFS_MOUNT_KERBEROS \
2355 | NFS_MOUNT_NONLM \
2356 | NFS_MOUNT_BROKEN_SUID \
2357 | NFS_MOUNT_STRICTLOCK \
2358 | NFS_MOUNT_UNSHARED \
2359 | NFS_MOUNT_NORESVPORT \
2360 | NFS_MOUNT_LEGACY_INTERFACE)
2361
2362static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) 2363static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
2363{ 2364{
2364 const struct nfs_server *a = s->s_fs_info; 2365 const struct nfs_server *a = s->s_fs_info;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ffb9459f180b..3ee5af4e738e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,10 +42,10 @@
42 * Local function declarations 42 * Local function declarations
43 */ 43 */
44static void nfs_redirty_request(struct nfs_page *req); 44static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_commit_ops; 45static const struct rpc_call_ops nfs_commit_ops;
47static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; 46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
48static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops;
49 49
50static struct kmem_cache *nfs_wdata_cachep; 50static struct kmem_cache *nfs_wdata_cachep;
51static mempool_t *nfs_wdata_mempool; 51static mempool_t *nfs_wdata_mempool;
@@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
70} 70}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 71EXPORT_SYMBOL_GPL(nfs_commit_free);
72 72
73struct nfs_write_header *nfs_writehdr_alloc(void) 73static struct nfs_rw_header *nfs_writehdr_alloc(void)
74{ 74{
75 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 75 struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
76
77 if (p) {
78 struct nfs_pgio_header *hdr = &p->header;
79 76
77 if (p)
80 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
81 INIT_LIST_HEAD(&hdr->pages);
82 INIT_LIST_HEAD(&hdr->rpc_list);
83 spin_lock_init(&hdr->lock);
84 atomic_set(&hdr->refcnt, 0);
85 hdr->verf = &p->verf;
86 }
87 return p; 79 return p;
88} 80}
89EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
90
91static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
92 unsigned int pagecount)
93{
94 struct nfs_write_data *data, *prealloc;
95
96 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
97 if (prealloc->header == NULL)
98 data = prealloc;
99 else
100 data = kzalloc(sizeof(*data), GFP_KERNEL);
101 if (!data)
102 goto out;
103
104 if (nfs_pgarray_set(&data->pages, pagecount)) {
105 data->header = hdr;
106 atomic_inc(&hdr->refcnt);
107 } else {
108 if (data != prealloc)
109 kfree(data);
110 data = NULL;
111 }
112out:
113 return data;
114}
115 81
116void nfs_writehdr_free(struct nfs_pgio_header *hdr) 82static void nfs_writehdr_free(struct nfs_rw_header *whdr)
117{ 83{
118 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
119 mempool_free(whdr, nfs_wdata_mempool); 84 mempool_free(whdr, nfs_wdata_mempool);
120} 85}
121EXPORT_SYMBOL_GPL(nfs_writehdr_free);
122
123void nfs_writedata_release(struct nfs_write_data *wdata)
124{
125 struct nfs_pgio_header *hdr = wdata->header;
126 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
127
128 put_nfs_open_context(wdata->args.context);
129 if (wdata->pages.pagevec != wdata->pages.page_array)
130 kfree(wdata->pages.pagevec);
131 if (wdata == &write_header->rpc_data) {
132 wdata->header = NULL;
133 wdata = NULL;
134 }
135 if (atomic_dec_and_test(&hdr->refcnt))
136 hdr->completion_ops->completion(hdr);
137 /* Note: we only free the rpc_task after callbacks are done.
138 * See the comment in rpc_free_task() for why
139 */
140 kfree(wdata);
141}
142EXPORT_SYMBOL_GPL(nfs_writedata_release);
143 86
144static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 87static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
145{ 88{
@@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page)
211 nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); 154 nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
212} 155}
213 156
157/*
158 * nfs_page_group_search_locked
159 * @head - head request of page group
160 * @page_offset - offset into page
161 *
162 * Search page group with head @head to find a request that contains the
163 * page offset @page_offset.
164 *
165 * Returns a pointer to the first matching nfs request, or NULL if no
166 * match is found.
167 *
168 * Must be called with the page group lock held
169 */
170static struct nfs_page *
171nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
172{
173 struct nfs_page *req;
174
175 WARN_ON_ONCE(head != head->wb_head);
176 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
177
178 req = head;
179 do {
180 if (page_offset >= req->wb_pgbase &&
181 page_offset < (req->wb_pgbase + req->wb_bytes))
182 return req;
183
184 req = req->wb_this_page;
185 } while (req != head);
186
187 return NULL;
188}
189
190/*
191 * nfs_page_group_covers_page
192 * @head - head request of page group
193 *
194 * Return true if the page group with head @head covers the whole page,
195 * returns false otherwise
196 */
197static bool nfs_page_group_covers_page(struct nfs_page *req)
198{
199 struct nfs_page *tmp;
200 unsigned int pos = 0;
201 unsigned int len = nfs_page_length(req->wb_page);
202
203 nfs_page_group_lock(req);
204
205 do {
206 tmp = nfs_page_group_search_locked(req->wb_head, pos);
207 if (tmp) {
208 /* no way this should happen */
209 WARN_ON_ONCE(tmp->wb_pgbase != pos);
210 pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
211 }
212 } while (tmp && pos < len);
213
214 nfs_page_group_unlock(req);
215 WARN_ON_ONCE(pos > len);
216 return pos == len;
217}
218
214/* We can set the PG_uptodate flag if we see that a write request 219/* We can set the PG_uptodate flag if we see that a write request
215 * covers the full page. 220 * covers the full page.
216 */ 221 */
217static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 222static void nfs_mark_uptodate(struct nfs_page *req)
218{ 223{
219 if (PageUptodate(page)) 224 if (PageUptodate(req->wb_page))
220 return;
221 if (base != 0)
222 return; 225 return;
223 if (count != nfs_page_length(page)) 226 if (!nfs_page_group_covers_page(req))
224 return; 227 return;
225 SetPageUptodate(page); 228 SetPageUptodate(req->wb_page);
226} 229}
227 230
228static int wb_priority(struct writeback_control *wbc) 231static int wb_priority(struct writeback_control *wbc)
@@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page)
258 } 261 }
259} 262}
260 263
261static void nfs_end_page_writeback(struct page *page) 264static void nfs_end_page_writeback(struct nfs_page *req)
262{ 265{
263 struct inode *inode = page_file_mapping(page)->host; 266 struct inode *inode = page_file_mapping(req->wb_page)->host;
264 struct nfs_server *nfss = NFS_SERVER(inode); 267 struct nfs_server *nfss = NFS_SERVER(inode);
265 268
266 end_page_writeback(page); 269 if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
270 return;
271
272 end_page_writeback(req->wb_page);
267 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 273 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
268 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 274 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
269} 275}
@@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
354 struct nfs_pageio_descriptor pgio; 360 struct nfs_pageio_descriptor pgio;
355 int err; 361 int err;
356 362
357 NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, 363 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
358 page->mapping->host, 364 false, &nfs_async_write_completion_ops);
359 wb_priority(wbc),
360 &nfs_async_write_completion_ops);
361 err = nfs_do_writepage(page, wbc, &pgio); 365 err = nfs_do_writepage(page, wbc, &pgio);
362 nfs_pageio_complete(&pgio); 366 nfs_pageio_complete(&pgio);
363 if (err < 0) 367 if (err < 0)
@@ -400,7 +404,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
400 404
401 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 405 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
402 406
403 NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); 407 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
408 &nfs_async_write_completion_ops);
404 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 409 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
405 nfs_pageio_complete(&pgio); 410 nfs_pageio_complete(&pgio);
406 411
@@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
425{ 430{
426 struct nfs_inode *nfsi = NFS_I(inode); 431 struct nfs_inode *nfsi = NFS_I(inode);
427 432
433 WARN_ON_ONCE(req->wb_this_page != req);
434
428 /* Lock the request! */ 435 /* Lock the request! */
429 nfs_lock_request(req); 436 nfs_lock_request(req);
430 437
@@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
441 set_page_private(req->wb_page, (unsigned long)req); 448 set_page_private(req->wb_page, (unsigned long)req);
442 } 449 }
443 nfsi->npages++; 450 nfsi->npages++;
451 set_bit(PG_INODE_REF, &req->wb_flags);
444 kref_get(&req->wb_kref); 452 kref_get(&req->wb_kref);
445 spin_unlock(&inode->i_lock); 453 spin_unlock(&inode->i_lock);
446} 454}
@@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
452{ 460{
453 struct inode *inode = req->wb_context->dentry->d_inode; 461 struct inode *inode = req->wb_context->dentry->d_inode;
454 struct nfs_inode *nfsi = NFS_I(inode); 462 struct nfs_inode *nfsi = NFS_I(inode);
463 struct nfs_page *head;
455 464
456 spin_lock(&inode->i_lock); 465 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
457 if (likely(!PageSwapCache(req->wb_page))) { 466 head = req->wb_head;
458 set_page_private(req->wb_page, 0); 467
459 ClearPagePrivate(req->wb_page); 468 spin_lock(&inode->i_lock);
460 clear_bit(PG_MAPPED, &req->wb_flags); 469 if (likely(!PageSwapCache(head->wb_page))) {
470 set_page_private(head->wb_page, 0);
471 ClearPagePrivate(head->wb_page);
472 clear_bit(PG_MAPPED, &head->wb_flags);
473 }
474 nfsi->npages--;
475 spin_unlock(&inode->i_lock);
461 } 476 }
462 nfsi->npages--;
463 spin_unlock(&inode->i_lock);
464 nfs_release_request(req); 477 nfs_release_request(req);
465} 478}
466 479
@@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req)
583} 596}
584 597
585static inline 598static inline
586int nfs_write_need_commit(struct nfs_write_data *data) 599int nfs_write_need_commit(struct nfs_pgio_data *data)
587{ 600{
588 if (data->verf.committed == NFS_DATA_SYNC) 601 if (data->verf.committed == NFS_DATA_SYNC)
589 return data->header->lseg == NULL; 602 return data->header->lseg == NULL;
@@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
614} 627}
615 628
616static inline 629static inline
617int nfs_write_need_commit(struct nfs_write_data *data) 630int nfs_write_need_commit(struct nfs_pgio_data *data)
618{ 631{
619 return 0; 632 return 0;
620} 633}
@@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
625{ 638{
626 struct nfs_commit_info cinfo; 639 struct nfs_commit_info cinfo;
627 unsigned long bytes = 0; 640 unsigned long bytes = 0;
641 bool do_destroy;
628 642
629 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 643 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
630 goto out; 644 goto out;
@@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
645 goto next; 659 goto next;
646 } 660 }
647 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { 661 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
648 memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); 662 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
649 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 663 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
650 goto next; 664 goto next;
651 } 665 }
@@ -653,7 +667,8 @@ remove_req:
653 nfs_inode_remove_request(req); 667 nfs_inode_remove_request(req);
654next: 668next:
655 nfs_unlock_request(req); 669 nfs_unlock_request(req);
656 nfs_end_page_writeback(req->wb_page); 670 nfs_end_page_writeback(req);
671 do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
657 nfs_release_request(req); 672 nfs_release_request(req);
658 } 673 }
659out: 674out:
@@ -661,7 +676,7 @@ out:
661} 676}
662 677
663#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 678#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
664static unsigned long 679unsigned long
665nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 680nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
666{ 681{
667 return cinfo->mds->ncommit; 682 return cinfo->mds->ncommit;
@@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
718} 733}
719 734
720#else 735#else
721static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 736unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
722{ 737{
723 return 0; 738 return 0;
724} 739}
@@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
758 if (req == NULL) 773 if (req == NULL)
759 goto out_unlock; 774 goto out_unlock;
760 775
776 /* should be handled by nfs_flush_incompatible */
777 WARN_ON_ONCE(req->wb_head != req);
778 WARN_ON_ONCE(req->wb_this_page != req);
779
761 rqend = req->wb_offset + req->wb_bytes; 780 rqend = req->wb_offset + req->wb_bytes;
762 /* 781 /*
763 * Tell the caller to flush out the request if 782 * Tell the caller to flush out the request if
@@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
819 req = nfs_try_to_update_request(inode, page, offset, bytes); 838 req = nfs_try_to_update_request(inode, page, offset, bytes);
820 if (req != NULL) 839 if (req != NULL)
821 goto out; 840 goto out;
822 req = nfs_create_request(ctx, inode, page, offset, bytes); 841 req = nfs_create_request(ctx, page, NULL, offset, bytes);
823 if (IS_ERR(req)) 842 if (IS_ERR(req))
824 goto out; 843 goto out;
825 nfs_inode_add_request(inode, req); 844 nfs_inode_add_request(inode, req);
@@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
837 return PTR_ERR(req); 856 return PTR_ERR(req);
838 /* Update file length */ 857 /* Update file length */
839 nfs_grow_file(page, offset, count); 858 nfs_grow_file(page, offset, count);
840 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 859 nfs_mark_uptodate(req);
841 nfs_mark_request_dirty(req); 860 nfs_mark_request_dirty(req);
842 nfs_unlock_and_release_request(req); 861 nfs_unlock_and_release_request(req);
843 return 0; 862 return 0;
@@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
863 return 0; 882 return 0;
864 l_ctx = req->wb_lock_context; 883 l_ctx = req->wb_lock_context;
865 do_flush = req->wb_page != page || req->wb_context != ctx; 884 do_flush = req->wb_page != page || req->wb_context != ctx;
885 /* for now, flush if more than 1 request in page_group */
886 do_flush |= req->wb_this_page != req;
866 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { 887 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
867 do_flush |= l_ctx->lockowner.l_owner != current->files 888 do_flush |= l_ctx->lockowner.l_owner != current->files
868 || l_ctx->lockowner.l_pid != current->tgid; 889 || l_ctx->lockowner.l_pid != current->tgid;
@@ -990,126 +1011,17 @@ static int flush_task_priority(int how)
990 return RPC_PRIORITY_NORMAL; 1011 return RPC_PRIORITY_NORMAL;
991} 1012}
992 1013
993int nfs_initiate_write(struct rpc_clnt *clnt, 1014static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
994 struct nfs_write_data *data, 1015 struct rpc_task_setup *task_setup_data, int how)
995 const struct rpc_call_ops *call_ops,
996 int how, int flags)
997{ 1016{
998 struct inode *inode = data->header->inode; 1017 struct inode *inode = data->header->inode;
999 int priority = flush_task_priority(how); 1018 int priority = flush_task_priority(how);
1000 struct rpc_task *task;
1001 struct rpc_message msg = {
1002 .rpc_argp = &data->args,
1003 .rpc_resp = &data->res,
1004 .rpc_cred = data->header->cred,
1005 };
1006 struct rpc_task_setup task_setup_data = {
1007 .rpc_client = clnt,
1008 .task = &data->task,
1009 .rpc_message = &msg,
1010 .callback_ops = call_ops,
1011 .callback_data = data,
1012 .workqueue = nfsiod_workqueue,
1013 .flags = RPC_TASK_ASYNC | flags,
1014 .priority = priority,
1015 };
1016 int ret = 0;
1017
1018 /* Set up the initial task struct. */
1019 NFS_PROTO(inode)->write_setup(data, &msg);
1020 1019
1021 dprintk("NFS: %5u initiated write call " 1020 task_setup_data->priority = priority;
1022 "(req %s/%llu, %u bytes @ offset %llu)\n", 1021 NFS_PROTO(inode)->write_setup(data, msg);
1023 data->task.tk_pid,
1024 inode->i_sb->s_id,
1025 (unsigned long long)NFS_FILEID(inode),
1026 data->args.count,
1027 (unsigned long long)data->args.offset);
1028 1022
1029 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1023 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1030 &task_setup_data.rpc_client, &msg, data); 1024 &task_setup_data->rpc_client, msg, data);
1031
1032 task = rpc_run_task(&task_setup_data);
1033 if (IS_ERR(task)) {
1034 ret = PTR_ERR(task);
1035 goto out;
1036 }
1037 if (how & FLUSH_SYNC) {
1038 ret = rpc_wait_for_completion_task(task);
1039 if (ret == 0)
1040 ret = task->tk_status;
1041 }
1042 rpc_put_task(task);
1043out:
1044 return ret;
1045}
1046EXPORT_SYMBOL_GPL(nfs_initiate_write);
1047
1048/*
1049 * Set up the argument/result storage required for the RPC call.
1050 */
1051static void nfs_write_rpcsetup(struct nfs_write_data *data,
1052 unsigned int count, unsigned int offset,
1053 int how, struct nfs_commit_info *cinfo)
1054{
1055 struct nfs_page *req = data->header->req;
1056
1057 /* Set up the RPC argument and reply structs
1058 * NB: take care not to mess about with data->commit et al. */
1059
1060 data->args.fh = NFS_FH(data->header->inode);
1061 data->args.offset = req_offset(req) + offset;
1062 /* pnfs_set_layoutcommit needs this */
1063 data->mds_offset = data->args.offset;
1064 data->args.pgbase = req->wb_pgbase + offset;
1065 data->args.pages = data->pages.pagevec;
1066 data->args.count = count;
1067 data->args.context = get_nfs_open_context(req->wb_context);
1068 data->args.lock_context = req->wb_lock_context;
1069 data->args.stable = NFS_UNSTABLE;
1070 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
1071 case 0:
1072 break;
1073 case FLUSH_COND_STABLE:
1074 if (nfs_reqs_to_commit(cinfo))
1075 break;
1076 default:
1077 data->args.stable = NFS_FILE_SYNC;
1078 }
1079
1080 data->res.fattr = &data->fattr;
1081 data->res.count = count;
1082 data->res.verf = &data->verf;
1083 nfs_fattr_init(&data->fattr);
1084}
1085
1086static int nfs_do_write(struct nfs_write_data *data,
1087 const struct rpc_call_ops *call_ops,
1088 int how)
1089{
1090 struct inode *inode = data->header->inode;
1091
1092 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
1093}
1094
1095static int nfs_do_multiple_writes(struct list_head *head,
1096 const struct rpc_call_ops *call_ops,
1097 int how)
1098{
1099 struct nfs_write_data *data;
1100 int ret = 0;
1101
1102 while (!list_empty(head)) {
1103 int ret2;
1104
1105 data = list_first_entry(head, struct nfs_write_data, list);
1106 list_del_init(&data->list);
1107
1108 ret2 = nfs_do_write(data, call_ops, how);
1109 if (ret == 0)
1110 ret = ret2;
1111 }
1112 return ret;
1113} 1025}
1114 1026
1115/* If a nfs_flush_* function fails, it should remove reqs from @head and 1027/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1120,7 +1032,7 @@ static void nfs_redirty_request(struct nfs_page *req)
1120{ 1032{
1121 nfs_mark_request_dirty(req); 1033 nfs_mark_request_dirty(req);
1122 nfs_unlock_request(req); 1034 nfs_unlock_request(req);
1123 nfs_end_page_writeback(req->wb_page); 1035 nfs_end_page_writeback(req);
1124 nfs_release_request(req); 1036 nfs_release_request(req);
1125} 1037}
1126 1038
@@ -1140,173 +1052,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1140 .completion = nfs_write_completion, 1052 .completion = nfs_write_completion,
1141}; 1053};
1142 1054
1143static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1144 struct nfs_pgio_header *hdr)
1145{
1146 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1147 while (!list_empty(&hdr->rpc_list)) {
1148 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1149 struct nfs_write_data, list);
1150 list_del(&data->list);
1151 nfs_writedata_release(data);
1152 }
1153 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1154}
1155
1156/*
1157 * Generate multiple small requests to write out a single
1158 * contiguous dirty area on one page.
1159 */
1160static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1161 struct nfs_pgio_header *hdr)
1162{
1163 struct nfs_page *req = hdr->req;
1164 struct page *page = req->wb_page;
1165 struct nfs_write_data *data;
1166 size_t wsize = desc->pg_bsize, nbytes;
1167 unsigned int offset;
1168 int requests = 0;
1169 struct nfs_commit_info cinfo;
1170
1171 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1172
1173 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1174 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
1175 desc->pg_count > wsize))
1176 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1177
1178
1179 offset = 0;
1180 nbytes = desc->pg_count;
1181 do {
1182 size_t len = min(nbytes, wsize);
1183
1184 data = nfs_writedata_alloc(hdr, 1);
1185 if (!data) {
1186 nfs_flush_error(desc, hdr);
1187 return -ENOMEM;
1188 }
1189 data->pages.pagevec[0] = page;
1190 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1191 list_add(&data->list, &hdr->rpc_list);
1192 requests++;
1193 nbytes -= len;
1194 offset += len;
1195 } while (nbytes != 0);
1196 nfs_list_remove_request(req);
1197 nfs_list_add_request(req, &hdr->pages);
1198 desc->pg_rpc_callops = &nfs_write_common_ops;
1199 return 0;
1200}
1201
1202/*
1203 * Create an RPC task for the given write request and kick it.
1204 * The page must have been locked by the caller.
1205 *
1206 * It may happen that the page we're passed is not marked dirty.
1207 * This is the case if nfs_updatepage detects a conflicting request
1208 * that has been written but not committed.
1209 */
1210static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1211 struct nfs_pgio_header *hdr)
1212{
1213 struct nfs_page *req;
1214 struct page **pages;
1215 struct nfs_write_data *data;
1216 struct list_head *head = &desc->pg_list;
1217 struct nfs_commit_info cinfo;
1218
1219 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1220 desc->pg_count));
1221 if (!data) {
1222 nfs_flush_error(desc, hdr);
1223 return -ENOMEM;
1224 }
1225
1226 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1227 pages = data->pages.pagevec;
1228 while (!list_empty(head)) {
1229 req = nfs_list_entry(head->next);
1230 nfs_list_remove_request(req);
1231 nfs_list_add_request(req, &hdr->pages);
1232 *pages++ = req->wb_page;
1233 }
1234
1235 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1236 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1237 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1238
1239 /* Set up the argument struct */
1240 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1241 list_add(&data->list, &hdr->rpc_list);
1242 desc->pg_rpc_callops = &nfs_write_common_ops;
1243 return 0;
1244}
1245
1246int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1247 struct nfs_pgio_header *hdr)
1248{
1249 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1250 return nfs_flush_multi(desc, hdr);
1251 return nfs_flush_one(desc, hdr);
1252}
1253EXPORT_SYMBOL_GPL(nfs_generic_flush);
1254
1255static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1256{
1257 struct nfs_write_header *whdr;
1258 struct nfs_pgio_header *hdr;
1259 int ret;
1260
1261 whdr = nfs_writehdr_alloc();
1262 if (!whdr) {
1263 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1264 return -ENOMEM;
1265 }
1266 hdr = &whdr->header;
1267 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1268 atomic_inc(&hdr->refcnt);
1269 ret = nfs_generic_flush(desc, hdr);
1270 if (ret == 0)
1271 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1272 desc->pg_rpc_callops,
1273 desc->pg_ioflags);
1274 if (atomic_dec_and_test(&hdr->refcnt))
1275 hdr->completion_ops->completion(hdr);
1276 return ret;
1277}
1278
1279static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1280 .pg_test = nfs_generic_pg_test,
1281 .pg_doio = nfs_generic_pg_writepages,
1282};
1283
1284void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1055void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1285 struct inode *inode, int ioflags, 1056 struct inode *inode, int ioflags, bool force_mds,
1286 const struct nfs_pgio_completion_ops *compl_ops) 1057 const struct nfs_pgio_completion_ops *compl_ops)
1287{ 1058{
1288 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, 1059 struct nfs_server *server = NFS_SERVER(inode);
1289 NFS_SERVER(inode)->wsize, ioflags); 1060 const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1061
1062#ifdef CONFIG_NFS_V4_1
1063 if (server->pnfs_curr_ld && !force_mds)
1064 pg_ops = server->pnfs_curr_ld->pg_write_ops;
1065#endif
1066 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1067 server->wsize, ioflags);
1290} 1068}
1291EXPORT_SYMBOL_GPL(nfs_pageio_init_write); 1069EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1292 1070
1293void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1071void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1294{ 1072{
1295 pgio->pg_ops = &nfs_pageio_write_ops; 1073 pgio->pg_ops = &nfs_pgio_rw_ops;
1296 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1074 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1297} 1075}
1298EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1076EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1299 1077
1300 1078
1301void nfs_write_prepare(struct rpc_task *task, void *calldata)
1302{
1303 struct nfs_write_data *data = calldata;
1304 int err;
1305 err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1306 if (err)
1307 rpc_exit(task, err);
1308}
1309
1310void nfs_commit_prepare(struct rpc_task *task, void *calldata) 1079void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1311{ 1080{
1312 struct nfs_commit_data *data = calldata; 1081 struct nfs_commit_data *data = calldata;
@@ -1314,23 +1083,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1314 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1083 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1315} 1084}
1316 1085
1317/* 1086static void nfs_writeback_release_common(struct nfs_pgio_data *data)
1318 * Handle a write reply that flushes a whole page.
1319 *
1320 * FIXME: There is an inherent race with invalidate_inode_pages and
1321 * writebacks since the page->count is kept > 1 for as long
1322 * as the page has a write request pending.
1323 */
1324static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1325{
1326 struct nfs_write_data *data = calldata;
1327
1328 nfs_writeback_done(task, data);
1329}
1330
1331static void nfs_writeback_release_common(void *calldata)
1332{ 1087{
1333 struct nfs_write_data *data = calldata;
1334 struct nfs_pgio_header *hdr = data->header; 1088 struct nfs_pgio_header *hdr = data->header;
1335 int status = data->task.tk_status; 1089 int status = data->task.tk_status;
1336 1090
@@ -1339,34 +1093,46 @@ static void nfs_writeback_release_common(void *calldata)
1339 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) 1093 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1340 ; /* Do nothing */ 1094 ; /* Do nothing */
1341 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) 1095 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1342 memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); 1096 memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
1343 else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) 1097 else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
1344 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); 1098 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1345 spin_unlock(&hdr->lock); 1099 spin_unlock(&hdr->lock);
1346 } 1100 }
1347 nfs_writedata_release(data);
1348} 1101}
1349 1102
1350static const struct rpc_call_ops nfs_write_common_ops = { 1103/*
1351 .rpc_call_prepare = nfs_write_prepare, 1104 * Special version of should_remove_suid() that ignores capabilities.
1352 .rpc_call_done = nfs_writeback_done_common, 1105 */
1353 .rpc_release = nfs_writeback_release_common, 1106static int nfs_should_remove_suid(const struct inode *inode)
1354}; 1107{
1108 umode_t mode = inode->i_mode;
1109 int kill = 0;
1110
1111 /* suid always must be killed */
1112 if (unlikely(mode & S_ISUID))
1113 kill = ATTR_KILL_SUID;
1355 1114
1115 /*
1116 * sgid without any exec bits is just a mandatory locking mark; leave
1117 * it alone. If some exec bits are set, it's a real sgid; kill it.
1118 */
1119 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1120 kill |= ATTR_KILL_SGID;
1121
1122 if (unlikely(kill && S_ISREG(mode)))
1123 return kill;
1124
1125 return 0;
1126}
1356 1127
1357/* 1128/*
1358 * This function is called when the WRITE call is complete. 1129 * This function is called when the WRITE call is complete.
1359 */ 1130 */
1360void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1131static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1132 struct inode *inode)
1361{ 1133{
1362 struct nfs_writeargs *argp = &data->args;
1363 struct nfs_writeres *resp = &data->res;
1364 struct inode *inode = data->header->inode;
1365 int status; 1134 int status;
1366 1135
1367 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
1368 task->tk_pid, task->tk_status);
1369
1370 /* 1136 /*
1371 * ->write_done will attempt to use post-op attributes to detect 1137 * ->write_done will attempt to use post-op attributes to detect
1372 * conflicting writes by other clients. A strict interpretation 1138 * conflicting writes by other clients. A strict interpretation
@@ -1376,11 +1142,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1376 */ 1142 */
1377 status = NFS_PROTO(inode)->write_done(task, data); 1143 status = NFS_PROTO(inode)->write_done(task, data);
1378 if (status != 0) 1144 if (status != 0)
1379 return; 1145 return status;
1380 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1146 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
1381 1147
1382#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 1148#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1383 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1149 if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
1384 /* We tried a write call, but the server did not 1150 /* We tried a write call, but the server did not
1385 * commit data to stable storage even though we 1151 * commit data to stable storage even though we
1386 * requested it. 1152 * requested it.
@@ -1396,18 +1162,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1396 dprintk("NFS: faulty NFS server %s:" 1162 dprintk("NFS: faulty NFS server %s:"
1397 " (committed = %d) != (stable = %d)\n", 1163 " (committed = %d) != (stable = %d)\n",
1398 NFS_SERVER(inode)->nfs_client->cl_hostname, 1164 NFS_SERVER(inode)->nfs_client->cl_hostname,
1399 resp->verf->committed, argp->stable); 1165 data->res.verf->committed, data->args.stable);
1400 complain = jiffies + 300 * HZ; 1166 complain = jiffies + 300 * HZ;
1401 } 1167 }
1402 } 1168 }
1403#endif 1169#endif
1404 if (task->tk_status < 0) 1170
1405 nfs_set_pgio_error(data->header, task->tk_status, argp->offset); 1171 /* Deal with the suid/sgid bit corner case */
1406 else if (resp->count < argp->count) { 1172 if (nfs_should_remove_suid(inode))
1173 nfs_mark_for_revalidate(inode);
1174 return 0;
1175}
1176
1177/*
1178 * This function is called when the WRITE call is complete.
1179 */
1180static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
1181{
1182 struct nfs_pgio_args *argp = &data->args;
1183 struct nfs_pgio_res *resp = &data->res;
1184
1185 if (resp->count < argp->count) {
1407 static unsigned long complain; 1186 static unsigned long complain;
1408 1187
1409 /* This a short write! */ 1188 /* This a short write! */
1410 nfs_inc_stats(inode, NFSIOS_SHORTWRITE); 1189 nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
1411 1190
1412 /* Has the server at least made some progress? */ 1191 /* Has the server at least made some progress? */
1413 if (resp->count == 0) { 1192 if (resp->count == 0) {
@@ -1874,7 +1653,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1874int __init nfs_init_writepagecache(void) 1653int __init nfs_init_writepagecache(void)
1875{ 1654{
1876 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1655 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1877 sizeof(struct nfs_write_header), 1656 sizeof(struct nfs_rw_header),
1878 0, SLAB_HWCACHE_ALIGN, 1657 0, SLAB_HWCACHE_ALIGN,
1879 NULL); 1658 NULL);
1880 if (nfs_wdata_cachep == NULL) 1659 if (nfs_wdata_cachep == NULL)
@@ -1936,3 +1715,12 @@ void nfs_destroy_writepagecache(void)
1936 kmem_cache_destroy(nfs_wdata_cachep); 1715 kmem_cache_destroy(nfs_wdata_cachep);
1937} 1716}
1938 1717
1718static const struct nfs_rw_ops nfs_rw_write_ops = {
1719 .rw_mode = FMODE_WRITE,
1720 .rw_alloc_header = nfs_writehdr_alloc,
1721 .rw_free_header = nfs_writehdr_free,
1722 .rw_release = nfs_writeback_release_common,
1723 .rw_done = nfs_writeback_done,
1724 .rw_result = nfs_writeback_result,
1725 .rw_initiate = nfs_initiate_write,
1726};