aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/dir.c12
-rw-r--r--fs/nfs/direct.c439
-rw-r--r--fs/nfs/file.c69
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)203
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)10
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c104
-rw-r--r--fs/nfs/internal.h39
-rw-r--r--fs/nfs/nfs2xdr.c14
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs3xdr.c16
-rw-r--r--fs/nfs/nfs4_fs.h6
-rw-r--r--fs/nfs/nfs4file.c13
-rw-r--r--fs/nfs/nfs4namespace.c102
-rw-r--r--fs/nfs/nfs4proc.c60
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4sysctl.c6
-rw-r--r--fs/nfs/nfs4trace.h8
-rw-r--r--fs/nfs/nfs4xdr.c19
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c24
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c639
-rw-r--r--fs/nfs/pnfs.c168
-rw-r--r--fs/nfs/pnfs.h32
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c414
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/sysctl.c6
-rw-r--r--fs/nfs/write.c596
34 files changed, 1568 insertions, 1594 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 03192a66c143..4782e0840dcc 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
29nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o 29nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
30nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 30nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
31 31
32obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o 32obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
33nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
34
35obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ 33obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
36obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ 34obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849bdf77a..9b431f44fad9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
210 SetPageUptodate(bvec->bv_page); 210 SetPageUptodate(bvec->bv_page);
211 211
212 if (err) { 212 if (err) {
213 struct nfs_read_data *rdata = par->data; 213 struct nfs_pgio_data *rdata = par->data;
214 struct nfs_pgio_header *header = rdata->header; 214 struct nfs_pgio_header *header = rdata->header;
215 215
216 if (!header->pnfs_error) 216 if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
224static void bl_read_cleanup(struct work_struct *work) 224static void bl_read_cleanup(struct work_struct *work)
225{ 225{
226 struct rpc_task *task; 226 struct rpc_task *task;
227 struct nfs_read_data *rdata; 227 struct nfs_pgio_data *rdata;
228 dprintk("%s enter\n", __func__); 228 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work); 229 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_read_data, task); 230 rdata = container_of(task, struct nfs_pgio_data, task);
231 pnfs_ld_read_done(rdata); 231 pnfs_ld_read_done(rdata);
232} 232}
233 233
234static void 234static void
235bl_end_par_io_read(void *data, int unused) 235bl_end_par_io_read(void *data, int unused)
236{ 236{
237 struct nfs_read_data *rdata = data; 237 struct nfs_pgio_data *rdata = data;
238 238
239 rdata->task.tk_status = rdata->header->pnfs_error; 239 rdata->task.tk_status = rdata->header->pnfs_error;
240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
242} 242}
243 243
244static enum pnfs_try_status 244static enum pnfs_try_status
245bl_read_pagelist(struct nfs_read_data *rdata) 245bl_read_pagelist(struct nfs_pgio_data *rdata)
246{ 246{
247 struct nfs_pgio_header *header = rdata->header; 247 struct nfs_pgio_header *header = rdata->header;
248 int i, hole; 248 int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
390 } 390 }
391 391
392 if (unlikely(err)) { 392 if (unlikely(err)) {
393 struct nfs_write_data *data = par->data; 393 struct nfs_pgio_data *data = par->data;
394 struct nfs_pgio_header *header = data->header; 394 struct nfs_pgio_header *header = data->header;
395 395
396 if (!header->pnfs_error) 396 if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
405{ 405{
406 struct parallel_io *par = bio->bi_private; 406 struct parallel_io *par = bio->bi_private;
407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
408 struct nfs_write_data *data = par->data; 408 struct nfs_pgio_data *data = par->data;
409 struct nfs_pgio_header *header = data->header; 409 struct nfs_pgio_header *header = data->header;
410 410
411 if (!uptodate) { 411 if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
423static void bl_write_cleanup(struct work_struct *work) 423static void bl_write_cleanup(struct work_struct *work)
424{ 424{
425 struct rpc_task *task; 425 struct rpc_task *task;
426 struct nfs_write_data *wdata; 426 struct nfs_pgio_data *wdata;
427 dprintk("%s enter\n", __func__); 427 dprintk("%s enter\n", __func__);
428 task = container_of(work, struct rpc_task, u.tk_work); 428 task = container_of(work, struct rpc_task, u.tk_work);
429 wdata = container_of(task, struct nfs_write_data, task); 429 wdata = container_of(task, struct nfs_pgio_data, task);
430 if (likely(!wdata->header->pnfs_error)) { 430 if (likely(!wdata->header->pnfs_error)) {
431 /* Marks for LAYOUTCOMMIT */ 431 /* Marks for LAYOUTCOMMIT */
432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
438/* Called when last of bios associated with a bl_write_pagelist call finishes */ 438/* Called when last of bios associated with a bl_write_pagelist call finishes */
439static void bl_end_par_io_write(void *data, int num_se) 439static void bl_end_par_io_write(void *data, int num_se)
440{ 440{
441 struct nfs_write_data *wdata = data; 441 struct nfs_pgio_data *wdata = data;
442 442
443 if (unlikely(wdata->header->pnfs_error)) { 443 if (unlikely(wdata->header->pnfs_error)) {
444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
673} 673}
674 674
675static enum pnfs_try_status 675static enum pnfs_try_status
676bl_write_pagelist(struct nfs_write_data *wdata, int sync) 676bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
677{ 677{
678 struct nfs_pgio_header *header = wdata->header; 678 struct nfs_pgio_header *header = wdata->header;
679 int i, ret, npg_zero, pg_index, last = 0; 679 int i, ret, npg_zero, pg_index, last = 0;
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1189 pnfs_generic_pg_init_read(pgio, req); 1189 pnfs_generic_pg_init_read(pgio, req);
1190} 1190}
1191 1191
1192static bool 1192/*
1193 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1194 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1195 */
1196static size_t
1193bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1197bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1194 struct nfs_page *req) 1198 struct nfs_page *req)
1195{ 1199{
1196 if (pgio->pg_dreq != NULL && 1200 if (pgio->pg_dreq != NULL &&
1197 !is_aligned_req(req, SECTOR_SIZE)) 1201 !is_aligned_req(req, SECTOR_SIZE))
1198 return false; 1202 return 0;
1199 1203
1200 return pnfs_generic_pg_test(pgio, prev, req); 1204 return pnfs_generic_pg_test(pgio, prev, req);
1201} 1205}
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1241 } 1245 }
1242} 1246}
1243 1247
1244static bool 1248/*
1249 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1250 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1251 */
1252static size_t
1245bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1253bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1246 struct nfs_page *req) 1254 struct nfs_page *req)
1247{ 1255{
1248 if (pgio->pg_dreq != NULL && 1256 if (pgio->pg_dreq != NULL &&
1249 !is_aligned_req(req, PAGE_CACHE_SIZE)) 1257 !is_aligned_req(req, PAGE_CACHE_SIZE))
1250 return false; 1258 return 0;
1251 1259
1252 return pnfs_generic_pg_test(pgio, prev, req); 1260 return pnfs_generic_pg_test(pgio, prev, req);
1253} 1261}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9f3d067cd15..4a3d4ef76127 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2032,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
2032{ 2032{
2033 put_rpccred(entry->cred); 2033 put_rpccred(entry->cred);
2034 kfree(entry); 2034 kfree(entry);
2035 smp_mb__before_atomic_dec(); 2035 smp_mb__before_atomic();
2036 atomic_long_dec(&nfs_access_nr_entries); 2036 atomic_long_dec(&nfs_access_nr_entries);
2037 smp_mb__after_atomic_dec(); 2037 smp_mb__after_atomic();
2038} 2038}
2039 2039
2040static void nfs_access_free_list(struct list_head *head) 2040static void nfs_access_free_list(struct list_head *head)
@@ -2082,9 +2082,9 @@ nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2082 else { 2082 else {
2083remove_lru_entry: 2083remove_lru_entry:
2084 list_del_init(&nfsi->access_cache_inode_lru); 2084 list_del_init(&nfsi->access_cache_inode_lru);
2085 smp_mb__before_clear_bit(); 2085 smp_mb__before_atomic();
2086 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 2086 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2087 smp_mb__after_clear_bit(); 2087 smp_mb__after_atomic();
2088 } 2088 }
2089 spin_unlock(&inode->i_lock); 2089 spin_unlock(&inode->i_lock);
2090 } 2090 }
@@ -2232,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2232 nfs_access_add_rbtree(inode, cache); 2232 nfs_access_add_rbtree(inode, cache);
2233 2233
2234 /* Update accounting */ 2234 /* Update accounting */
2235 smp_mb__before_atomic_inc(); 2235 smp_mb__before_atomic();
2236 atomic_long_inc(&nfs_access_nr_entries); 2236 atomic_long_inc(&nfs_access_nr_entries);
2237 smp_mb__after_atomic_inc(); 2237 smp_mb__after_atomic();
2238 2238
2239 /* Add inode to global LRU list */ 2239 /* Add inode to global LRU list */
2240 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { 2240 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..8f98138cbc43 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
108 return atomic_dec_and_test(&dreq->io_count); 108 return atomic_dec_and_test(&dreq->io_count);
109} 109}
110 110
111/*
112 * nfs_direct_select_verf - select the right verifier
113 * @dreq - direct request possibly spanning multiple servers
114 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
115 * @ds_idx - index of data server in data server list, only valid if ds_clp set
116 *
117 * returns the correct verifier to use given the role of the server
118 */
119static struct nfs_writeverf *
120nfs_direct_select_verf(struct nfs_direct_req *dreq,
121 struct nfs_client *ds_clp,
122 int ds_idx)
123{
124 struct nfs_writeverf *verfp = &dreq->verf;
125
126#ifdef CONFIG_NFS_V4_1
127 if (ds_clp) {
128 /* pNFS is in use, use the DS verf */
129 if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
130 verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
131 else
132 WARN_ON_ONCE(1);
133 }
134#endif
135 return verfp;
136}
137
138
139/*
140 * nfs_direct_set_hdr_verf - set the write/commit verifier
141 * @dreq - direct request possibly spanning multiple servers
142 * @hdr - pageio header to validate against previously seen verfs
143 *
144 * Set the server's (MDS or DS) "seen" verifier
145 */
146static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
147 struct nfs_pgio_header *hdr)
148{
149 struct nfs_writeverf *verfp;
150
151 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
152 hdr->data->ds_idx);
153 WARN_ON_ONCE(verfp->committed >= 0);
154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
155 WARN_ON_ONCE(verfp->committed < 0);
156}
157
158/*
159 * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
160 * @dreq - direct request possibly spanning multiple servers
161 * @hdr - pageio header to validate against previously seen verf
162 *
163 * set the server's "seen" verf if not initialized.
164 * returns result of comparison between @hdr->verf and the "seen"
165 * verf of the server used by @hdr (DS or MDS)
166 */
167static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
168 struct nfs_pgio_header *hdr)
169{
170 struct nfs_writeverf *verfp;
171
172 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
173 hdr->data->ds_idx);
174 if (verfp->committed < 0) {
175 nfs_direct_set_hdr_verf(dreq, hdr);
176 return 0;
177 }
178 return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
179}
180
181#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
182/*
183 * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
184 * @dreq - direct request possibly spanning multiple servers
185 * @data - commit data to validate against previously seen verf
186 *
187 * returns result of comparison between @data->verf and the verf of
188 * the server used by @data (DS or MDS)
189 */
190static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
191 struct nfs_commit_data *data)
192{
193 struct nfs_writeverf *verfp;
194
195 verfp = nfs_direct_select_verf(dreq, data->ds_clp,
196 data->ds_commit_index);
197 WARN_ON_ONCE(verfp->committed < 0);
198 return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
199}
200#endif
201
111/** 202/**
112 * nfs_direct_IO - NFS address space operation for direct I/O 203 * nfs_direct_IO - NFS address space operation for direct I/O
113 * @rw: direction (read or write) 204 * @rw: direction (read or write)
@@ -121,20 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
121 * shunt off direct read and write requests before the VFS gets them, 212 * shunt off direct read and write requests before the VFS gets them,
122 * so this method is only ever called for swap. 213 * so this method is only ever called for swap.
123 */ 214 */
124ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 215ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
125{ 216{
126#ifndef CONFIG_NFS_SWAP 217#ifndef CONFIG_NFS_SWAP
127 dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", 218 dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
128 iocb->ki_filp, (long long) pos, nr_segs); 219 iocb->ki_filp, (long long) pos, iter->nr_segs);
129 220
130 return -EINVAL; 221 return -EINVAL;
131#else 222#else
132 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 223 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
133 224
134 if (rw == READ || rw == KERNEL_READ) 225 if (rw == READ || rw == KERNEL_READ)
135 return nfs_file_direct_read(iocb, iov, nr_segs, pos, 226 return nfs_file_direct_read(iocb, iter, pos,
136 rw == READ ? true : false); 227 rw == READ ? true : false);
137 return nfs_file_direct_write(iocb, iov, nr_segs, pos, 228 return nfs_file_direct_write(iocb, iter, pos,
138 rw == WRITE ? true : false); 229 rw == WRITE ? true : false);
139#endif /* CONFIG_NFS_SWAP */ 230#endif /* CONFIG_NFS_SWAP */
140} 231}
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
168 kref_get(&dreq->kref); 259 kref_get(&dreq->kref);
169 init_completion(&dreq->completion); 260 init_completion(&dreq->completion);
170 INIT_LIST_HEAD(&dreq->mds_cinfo.list); 261 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
262 dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
171 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 263 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
172 spin_lock_init(&dreq->lock); 264 spin_lock_init(&dreq->lock);
173 265
@@ -322,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
322 * handled automatically by nfs_direct_read_result(). Otherwise, if 414 * handled automatically by nfs_direct_read_result(). Otherwise, if
323 * no requests have been sent, just return an error. 415 * no requests have been sent, just return an error.
324 */ 416 */
325static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
326 const struct iovec *iov,
327 loff_t pos, bool uio)
328{
329 struct nfs_direct_req *dreq = desc->pg_dreq;
330 struct nfs_open_context *ctx = dreq->ctx;
331 struct inode *inode = ctx->dentry->d_inode;
332 unsigned long user_addr = (unsigned long)iov->iov_base;
333 size_t count = iov->iov_len;
334 size_t rsize = NFS_SERVER(inode)->rsize;
335 unsigned int pgbase;
336 int result;
337 ssize_t started = 0;
338 struct page **pagevec = NULL;
339 unsigned int npages;
340
341 do {
342 size_t bytes;
343 int i;
344 417
345 pgbase = user_addr & ~PAGE_MASK; 418static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
346 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); 419 struct iov_iter *iter,
420 loff_t pos)
421{
422 struct nfs_pageio_descriptor desc;
423 struct inode *inode = dreq->inode;
424 ssize_t result = -EINVAL;
425 size_t requested_bytes = 0;
426 size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
347 427
348 result = -ENOMEM; 428 nfs_pageio_init_read(&desc, dreq->inode, false,
349 npages = nfs_page_array_len(pgbase, bytes); 429 &nfs_direct_read_completion_ops);
350 if (!pagevec) 430 get_dreq(dreq);
351 pagevec = kmalloc(npages * sizeof(struct page *), 431 desc.pg_dreq = dreq;
352 GFP_KERNEL); 432 atomic_inc(&inode->i_dio_count);
353 if (!pagevec)
354 break;
355 if (uio) {
356 down_read(&current->mm->mmap_sem);
357 result = get_user_pages(current, current->mm, user_addr,
358 npages, 1, 0, pagevec, NULL);
359 up_read(&current->mm->mmap_sem);
360 if (result < 0)
361 break;
362 } else {
363 WARN_ON(npages != 1);
364 result = get_kernel_page(user_addr, 1, pagevec);
365 if (WARN_ON(result != 1))
366 break;
367 }
368 433
369 if ((unsigned)result < npages) { 434 while (iov_iter_count(iter)) {
370 bytes = result * PAGE_SIZE; 435 struct page **pagevec;
371 if (bytes <= pgbase) { 436 size_t bytes;
372 nfs_direct_release_pages(pagevec, result); 437 size_t pgbase;
373 break; 438 unsigned npages, i;
374 }
375 bytes -= pgbase;
376 npages = result;
377 }
378 439
440 result = iov_iter_get_pages_alloc(iter, &pagevec,
441 rsize, &pgbase);
442 if (result < 0)
443 break;
444
445 bytes = result;
446 iov_iter_advance(iter, bytes);
447 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
379 for (i = 0; i < npages; i++) { 448 for (i = 0; i < npages; i++) {
380 struct nfs_page *req; 449 struct nfs_page *req;
381 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 450 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
382 /* XXX do we need to do the eof zeroing found in async_filler? */ 451 /* XXX do we need to do the eof zeroing found in async_filler? */
383 req = nfs_create_request(dreq->ctx, dreq->inode, 452 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
384 pagevec[i],
385 pgbase, req_len); 453 pgbase, req_len);
386 if (IS_ERR(req)) { 454 if (IS_ERR(req)) {
387 result = PTR_ERR(req); 455 result = PTR_ERR(req);
@@ -389,56 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
389 } 457 }
390 req->wb_index = pos >> PAGE_SHIFT; 458 req->wb_index = pos >> PAGE_SHIFT;
391 req->wb_offset = pos & ~PAGE_MASK; 459 req->wb_offset = pos & ~PAGE_MASK;
392 if (!nfs_pageio_add_request(desc, req)) { 460 if (!nfs_pageio_add_request(&desc, req)) {
393 result = desc->pg_error; 461 result = desc.pg_error;
394 nfs_release_request(req); 462 nfs_release_request(req);
395 break; 463 break;
396 } 464 }
397 pgbase = 0; 465 pgbase = 0;
398 bytes -= req_len; 466 bytes -= req_len;
399 started += req_len; 467 requested_bytes += req_len;
400 user_addr += req_len;
401 pos += req_len; 468 pos += req_len;
402 count -= req_len;
403 dreq->bytes_left -= req_len; 469 dreq->bytes_left -= req_len;
404 } 470 }
405 /* The nfs_page now hold references to these pages */
406 nfs_direct_release_pages(pagevec, npages); 471 nfs_direct_release_pages(pagevec, npages);
407 } while (count != 0 && result >= 0); 472 kvfree(pagevec);
408
409 kfree(pagevec);
410
411 if (started)
412 return started;
413 return result < 0 ? (ssize_t) result : -EFAULT;
414}
415
416static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
417 const struct iovec *iov,
418 unsigned long nr_segs,
419 loff_t pos, bool uio)
420{
421 struct nfs_pageio_descriptor desc;
422 struct inode *inode = dreq->inode;
423 ssize_t result = -EINVAL;
424 size_t requested_bytes = 0;
425 unsigned long seg;
426
427 NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
428 &nfs_direct_read_completion_ops);
429 get_dreq(dreq);
430 desc.pg_dreq = dreq;
431 atomic_inc(&inode->i_dio_count);
432
433 for (seg = 0; seg < nr_segs; seg++) {
434 const struct iovec *vec = &iov[seg];
435 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
436 if (result < 0) 473 if (result < 0)
437 break; 474 break;
438 requested_bytes += result;
439 if ((size_t)result < vec->iov_len)
440 break;
441 pos += vec->iov_len;
442 } 475 }
443 476
444 nfs_pageio_complete(&desc); 477 nfs_pageio_complete(&desc);
@@ -461,8 +494,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
461/** 494/**
462 * nfs_file_direct_read - file direct read operation for NFS files 495 * nfs_file_direct_read - file direct read operation for NFS files
463 * @iocb: target I/O control block 496 * @iocb: target I/O control block
464 * @iov: vector of user buffers into which to read data 497 * @iter: vector of user buffers into which to read data
465 * @nr_segs: size of iov vector
466 * @pos: byte offset in file where reading starts 498 * @pos: byte offset in file where reading starts
467 * 499 *
468 * We use this function for direct reads instead of calling 500 * We use this function for direct reads instead of calling
@@ -479,8 +511,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
479 * client must read the updated atime from the server back into its 511 * client must read the updated atime from the server back into its
480 * cache. 512 * cache.
481 */ 513 */
482ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, 514ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
483 unsigned long nr_segs, loff_t pos, bool uio) 515 loff_t pos, bool uio)
484{ 516{
485 struct file *file = iocb->ki_filp; 517 struct file *file = iocb->ki_filp;
486 struct address_space *mapping = file->f_mapping; 518 struct address_space *mapping = file->f_mapping;
@@ -488,9 +520,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
488 struct nfs_direct_req *dreq; 520 struct nfs_direct_req *dreq;
489 struct nfs_lock_context *l_ctx; 521 struct nfs_lock_context *l_ctx;
490 ssize_t result = -EINVAL; 522 ssize_t result = -EINVAL;
491 size_t count; 523 size_t count = iov_iter_count(iter);
492
493 count = iov_length(iov, nr_segs);
494 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 524 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
495 525
496 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", 526 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -513,7 +543,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
513 goto out_unlock; 543 goto out_unlock;
514 544
515 dreq->inode = inode; 545 dreq->inode = inode;
516 dreq->bytes_left = iov_length(iov, nr_segs); 546 dreq->bytes_left = count;
517 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 547 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
518 l_ctx = nfs_get_lock_context(dreq->ctx); 548 l_ctx = nfs_get_lock_context(dreq->ctx);
519 if (IS_ERR(l_ctx)) { 549 if (IS_ERR(l_ctx)) {
@@ -524,8 +554,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
524 if (!is_sync_kiocb(iocb)) 554 if (!is_sync_kiocb(iocb))
525 dreq->iocb = iocb; 555 dreq->iocb = iocb;
526 556
527 NFS_I(inode)->read_io += iov_length(iov, nr_segs); 557 NFS_I(inode)->read_io += count;
528 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); 558 result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
529 559
530 mutex_unlock(&inode->i_mutex); 560 mutex_unlock(&inode->i_mutex);
531 561
@@ -564,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
564 dreq->count = 0; 594 dreq->count = 0;
565 get_dreq(dreq); 595 get_dreq(dreq);
566 596
567 NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, 597 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
568 &nfs_direct_write_completion_ops); 598 &nfs_direct_write_completion_ops);
569 desc.pg_dreq = dreq; 599 desc.pg_dreq = dreq;
570 600
@@ -603,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
603 dprintk("NFS: %5u commit failed with error %d.\n", 633 dprintk("NFS: %5u commit failed with error %d.\n",
604 data->task.tk_pid, status); 634 data->task.tk_pid, status);
605 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 635 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
606 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 636 } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
607 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 637 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
608 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 638 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
609 } 639 }
@@ -681,109 +711,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
681} 711}
682#endif 712#endif
683 713
684/*
685 * NB: Return the value of the first error return code. Subsequent
686 * errors after the first one are ignored.
687 */
688/*
689 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
690 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
691 * bail and stop sending more writes. Write length accounting is
692 * handled automatically by nfs_direct_write_result(). Otherwise, if
693 * no requests have been sent, just return an error.
694 */
695static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
696 const struct iovec *iov,
697 loff_t pos, bool uio)
698{
699 struct nfs_direct_req *dreq = desc->pg_dreq;
700 struct nfs_open_context *ctx = dreq->ctx;
701 struct inode *inode = ctx->dentry->d_inode;
702 unsigned long user_addr = (unsigned long)iov->iov_base;
703 size_t count = iov->iov_len;
704 size_t wsize = NFS_SERVER(inode)->wsize;
705 unsigned int pgbase;
706 int result;
707 ssize_t started = 0;
708 struct page **pagevec = NULL;
709 unsigned int npages;
710
711 do {
712 size_t bytes;
713 int i;
714
715 pgbase = user_addr & ~PAGE_MASK;
716 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
717
718 result = -ENOMEM;
719 npages = nfs_page_array_len(pgbase, bytes);
720 if (!pagevec)
721 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
722 if (!pagevec)
723 break;
724
725 if (uio) {
726 down_read(&current->mm->mmap_sem);
727 result = get_user_pages(current, current->mm, user_addr,
728 npages, 0, 0, pagevec, NULL);
729 up_read(&current->mm->mmap_sem);
730 if (result < 0)
731 break;
732 } else {
733 WARN_ON(npages != 1);
734 result = get_kernel_page(user_addr, 0, pagevec);
735 if (WARN_ON(result != 1))
736 break;
737 }
738
739 if ((unsigned)result < npages) {
740 bytes = result * PAGE_SIZE;
741 if (bytes <= pgbase) {
742 nfs_direct_release_pages(pagevec, result);
743 break;
744 }
745 bytes -= pgbase;
746 npages = result;
747 }
748
749 for (i = 0; i < npages; i++) {
750 struct nfs_page *req;
751 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
752
753 req = nfs_create_request(dreq->ctx, dreq->inode,
754 pagevec[i],
755 pgbase, req_len);
756 if (IS_ERR(req)) {
757 result = PTR_ERR(req);
758 break;
759 }
760 nfs_lock_request(req);
761 req->wb_index = pos >> PAGE_SHIFT;
762 req->wb_offset = pos & ~PAGE_MASK;
763 if (!nfs_pageio_add_request(desc, req)) {
764 result = desc->pg_error;
765 nfs_unlock_and_release_request(req);
766 break;
767 }
768 pgbase = 0;
769 bytes -= req_len;
770 started += req_len;
771 user_addr += req_len;
772 pos += req_len;
773 count -= req_len;
774 dreq->bytes_left -= req_len;
775 }
776 /* The nfs_page now hold references to these pages */
777 nfs_direct_release_pages(pagevec, npages);
778 } while (count != 0 && result >= 0);
779
780 kfree(pagevec);
781
782 if (started)
783 return started;
784 return result < 0 ? (ssize_t) result : -EFAULT;
785}
786
787static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 714static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
788{ 715{
789 struct nfs_direct_req *dreq = hdr->dreq; 716 struct nfs_direct_req *dreq = hdr->dreq;
@@ -813,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
813 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 740 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
814 bit = NFS_IOHDR_NEED_RESCHED; 741 bit = NFS_IOHDR_NEED_RESCHED;
815 else if (dreq->flags == 0) { 742 else if (dreq->flags == 0) {
816 memcpy(&dreq->verf, hdr->verf, 743 nfs_direct_set_hdr_verf(dreq, hdr);
817 sizeof(dreq->verf));
818 bit = NFS_IOHDR_NEED_COMMIT; 744 bit = NFS_IOHDR_NEED_COMMIT;
819 dreq->flags = NFS_ODIRECT_DO_COMMIT; 745 dreq->flags = NFS_ODIRECT_DO_COMMIT;
820 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 746 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
821 if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { 747 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
822 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 748 dreq->flags =
749 NFS_ODIRECT_RESCHED_WRITES;
823 bit = NFS_IOHDR_NEED_RESCHED; 750 bit = NFS_IOHDR_NEED_RESCHED;
824 } else 751 } else
825 bit = NFS_IOHDR_NEED_COMMIT; 752 bit = NFS_IOHDR_NEED_COMMIT;
@@ -829,6 +756,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
829 spin_unlock(&dreq->lock); 756 spin_unlock(&dreq->lock);
830 757
831 while (!list_empty(&hdr->pages)) { 758 while (!list_empty(&hdr->pages)) {
759 bool do_destroy = true;
760
832 req = nfs_list_entry(hdr->pages.next); 761 req = nfs_list_entry(hdr->pages.next);
833 nfs_list_remove_request(req); 762 nfs_list_remove_request(req);
834 switch (bit) { 763 switch (bit) {
@@ -836,6 +765,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
836 case NFS_IOHDR_NEED_COMMIT: 765 case NFS_IOHDR_NEED_COMMIT:
837 kref_get(&req->wb_kref); 766 kref_get(&req->wb_kref);
838 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 767 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
768 do_destroy = false;
839 } 769 }
840 nfs_unlock_and_release_request(req); 770 nfs_unlock_and_release_request(req);
841 } 771 }
@@ -863,33 +793,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
863 .completion = nfs_direct_write_completion, 793 .completion = nfs_direct_write_completion,
864}; 794};
865 795
796
797/*
798 * NB: Return the value of the first error return code. Subsequent
799 * errors after the first one are ignored.
800 */
801/*
802 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
803 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
804 * bail and stop sending more writes. Write length accounting is
805 * handled automatically by nfs_direct_write_result(). Otherwise, if
806 * no requests have been sent, just return an error.
807 */
866static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 808static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
867 const struct iovec *iov, 809 struct iov_iter *iter,
868 unsigned long nr_segs, 810 loff_t pos)
869 loff_t pos, bool uio)
870{ 811{
871 struct nfs_pageio_descriptor desc; 812 struct nfs_pageio_descriptor desc;
872 struct inode *inode = dreq->inode; 813 struct inode *inode = dreq->inode;
873 ssize_t result = 0; 814 ssize_t result = 0;
874 size_t requested_bytes = 0; 815 size_t requested_bytes = 0;
875 unsigned long seg; 816 size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
876 817
877 NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, 818 nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
878 &nfs_direct_write_completion_ops); 819 &nfs_direct_write_completion_ops);
879 desc.pg_dreq = dreq; 820 desc.pg_dreq = dreq;
880 get_dreq(dreq); 821 get_dreq(dreq);
881 atomic_inc(&inode->i_dio_count); 822 atomic_inc(&inode->i_dio_count);
882 823
883 NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); 824 NFS_I(inode)->write_io += iov_iter_count(iter);
884 for (seg = 0; seg < nr_segs; seg++) { 825 while (iov_iter_count(iter)) {
885 const struct iovec *vec = &iov[seg]; 826 struct page **pagevec;
886 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); 827 size_t bytes;
828 size_t pgbase;
829 unsigned npages, i;
830
831 result = iov_iter_get_pages_alloc(iter, &pagevec,
832 wsize, &pgbase);
887 if (result < 0) 833 if (result < 0)
888 break; 834 break;
889 requested_bytes += result; 835
890 if ((size_t)result < vec->iov_len) 836 bytes = result;
837 iov_iter_advance(iter, bytes);
838 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
839 for (i = 0; i < npages; i++) {
840 struct nfs_page *req;
841 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
842
843 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
844 pgbase, req_len);
845 if (IS_ERR(req)) {
846 result = PTR_ERR(req);
847 break;
848 }
849 nfs_lock_request(req);
850 req->wb_index = pos >> PAGE_SHIFT;
851 req->wb_offset = pos & ~PAGE_MASK;
852 if (!nfs_pageio_add_request(&desc, req)) {
853 result = desc.pg_error;
854 nfs_unlock_and_release_request(req);
855 break;
856 }
857 pgbase = 0;
858 bytes -= req_len;
859 requested_bytes += req_len;
860 pos += req_len;
861 dreq->bytes_left -= req_len;
862 }
863 nfs_direct_release_pages(pagevec, npages);
864 kvfree(pagevec);
865 if (result < 0)
891 break; 866 break;
892 pos += vec->iov_len;
893 } 867 }
894 nfs_pageio_complete(&desc); 868 nfs_pageio_complete(&desc);
895 869
@@ -911,8 +885,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
911/** 885/**
912 * nfs_file_direct_write - file direct write operation for NFS files 886 * nfs_file_direct_write - file direct write operation for NFS files
913 * @iocb: target I/O control block 887 * @iocb: target I/O control block
914 * @iov: vector of user buffers from which to write data 888 * @iter: vector of user buffers from which to write data
915 * @nr_segs: size of iov vector
916 * @pos: byte offset in file where writing starts 889 * @pos: byte offset in file where writing starts
917 * 890 *
918 * We use this function for direct writes instead of calling 891 * We use this function for direct writes instead of calling
@@ -930,8 +903,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
930 * Note that O_APPEND is not supported for NFS direct writes, as there 903 * Note that O_APPEND is not supported for NFS direct writes, as there
931 * is no atomic O_APPEND write facility in the NFS protocol. 904 * is no atomic O_APPEND write facility in the NFS protocol.
932 */ 905 */
933ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 906ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
934 unsigned long nr_segs, loff_t pos, bool uio) 907 loff_t pos, bool uio)
935{ 908{
936 ssize_t result = -EINVAL; 909 ssize_t result = -EINVAL;
937 struct file *file = iocb->ki_filp; 910 struct file *file = iocb->ki_filp;
@@ -940,9 +913,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
940 struct nfs_direct_req *dreq; 913 struct nfs_direct_req *dreq;
941 struct nfs_lock_context *l_ctx; 914 struct nfs_lock_context *l_ctx;
942 loff_t end; 915 loff_t end;
943 size_t count; 916 size_t count = iov_iter_count(iter);
944
945 count = iov_length(iov, nr_segs);
946 end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 917 end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
947 918
948 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 919 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -993,7 +964,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
993 if (!is_sync_kiocb(iocb)) 964 if (!is_sync_kiocb(iocb))
994 dreq->iocb = iocb; 965 dreq->iocb = iocb;
995 966
996 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); 967 result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
997 968
998 if (mapping->nrpages) { 969 if (mapping->nrpages) {
999 invalidate_inode_pages2_range(mapping, 970 invalidate_inode_pages2_range(mapping,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 284ca901fe16..4042ff58fe3f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id)
165EXPORT_SYMBOL_GPL(nfs_file_flush); 165EXPORT_SYMBOL_GPL(nfs_file_flush);
166 166
167ssize_t 167ssize_t
168nfs_file_read(struct kiocb *iocb, const struct iovec *iov, 168nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
169 unsigned long nr_segs, loff_t pos)
170{ 169{
171 struct inode *inode = file_inode(iocb->ki_filp); 170 struct inode *inode = file_inode(iocb->ki_filp);
172 ssize_t result; 171 ssize_t result;
173 172
174 if (iocb->ki_filp->f_flags & O_DIRECT) 173 if (iocb->ki_filp->f_flags & O_DIRECT)
175 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); 174 return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
176 175
177 dprintk("NFS: read(%pD2, %lu@%lu)\n", 176 dprintk("NFS: read(%pD2, %zu@%lu)\n",
178 iocb->ki_filp, 177 iocb->ki_filp,
179 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); 178 iov_iter_count(to), (unsigned long) iocb->ki_pos);
180 179
181 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 180 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
182 if (!result) { 181 if (!result) {
183 result = generic_file_aio_read(iocb, iov, nr_segs, pos); 182 result = generic_file_read_iter(iocb, to);
184 if (result > 0) 183 if (result > 0)
185 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); 184 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
186 } 185 }
@@ -635,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
635 return 0; 634 return 0;
636} 635}
637 636
638ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 637ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
639 unsigned long nr_segs, loff_t pos)
640{ 638{
641 struct file *file = iocb->ki_filp; 639 struct file *file = iocb->ki_filp;
642 struct inode *inode = file_inode(file); 640 struct inode *inode = file_inode(file);
643 unsigned long written = 0; 641 unsigned long written = 0;
644 ssize_t result; 642 ssize_t result;
645 size_t count = iov_length(iov, nr_segs); 643 size_t count = iov_iter_count(from);
644 loff_t pos = iocb->ki_pos;
646 645
647 result = nfs_key_timeout_notify(file, inode); 646 result = nfs_key_timeout_notify(file, inode);
648 if (result) 647 if (result)
649 return result; 648 return result;
650 649
651 if (file->f_flags & O_DIRECT) 650 if (file->f_flags & O_DIRECT)
652 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); 651 return nfs_file_direct_write(iocb, from, pos, true);
653 652
654 dprintk("NFS: write(%pD2, %lu@%Ld)\n", 653 dprintk("NFS: write(%pD2, %zu@%Ld)\n",
655 file, (unsigned long) count, (long long) pos); 654 file, count, (long long) pos);
656 655
657 result = -EBUSY; 656 result = -EBUSY;
658 if (IS_SWAPFILE(inode)) 657 if (IS_SWAPFILE(inode))
@@ -670,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
670 if (!count) 669 if (!count)
671 goto out; 670 goto out;
672 671
673 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 672 result = generic_file_write_iter(iocb, from);
674 if (result > 0) 673 if (result > 0)
675 written = result; 674 written = result;
676 675
@@ -691,36 +690,6 @@ out_swapfile:
691} 690}
692EXPORT_SYMBOL_GPL(nfs_file_write); 691EXPORT_SYMBOL_GPL(nfs_file_write);
693 692
694ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
695 struct file *filp, loff_t *ppos,
696 size_t count, unsigned int flags)
697{
698 struct inode *inode = file_inode(filp);
699 unsigned long written = 0;
700 ssize_t ret;
701
702 dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
703 filp, (unsigned long) count, (unsigned long long) *ppos);
704
705 /*
706 * The combination of splice and an O_APPEND destination is disallowed.
707 */
708
709 ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
710 if (ret > 0)
711 written = ret;
712
713 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
714 int err = vfs_fsync(filp, 0);
715 if (err < 0)
716 ret = err;
717 }
718 if (ret > 0)
719 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
720 return ret;
721}
722EXPORT_SYMBOL_GPL(nfs_file_splice_write);
723
724static int 693static int
725do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 694do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
726{ 695{
@@ -916,10 +885,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
916 is_local = 1; 885 is_local = 1;
917 886
918 /* We're simulating flock() locks using posix locks on the server */ 887 /* We're simulating flock() locks using posix locks on the server */
919 fl->fl_owner = (fl_owner_t)filp;
920 fl->fl_start = 0;
921 fl->fl_end = OFFSET_MAX;
922
923 if (fl->fl_type == F_UNLCK) 888 if (fl->fl_type == F_UNLCK)
924 return do_unlk(filp, cmd, fl, is_local); 889 return do_unlk(filp, cmd, fl, is_local);
925 return do_setlk(filp, cmd, fl, is_local); 890 return do_setlk(filp, cmd, fl, is_local);
@@ -939,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease);
939 904
940const struct file_operations nfs_file_operations = { 905const struct file_operations nfs_file_operations = {
941 .llseek = nfs_file_llseek, 906 .llseek = nfs_file_llseek,
942 .read = do_sync_read, 907 .read = new_sync_read,
943 .write = do_sync_write, 908 .write = new_sync_write,
944 .aio_read = nfs_file_read, 909 .read_iter = nfs_file_read,
945 .aio_write = nfs_file_write, 910 .write_iter = nfs_file_write,
946 .mmap = nfs_file_mmap, 911 .mmap = nfs_file_mmap,
947 .open = nfs_file_open, 912 .open = nfs_file_open,
948 .flush = nfs_file_flush, 913 .flush = nfs_file_flush,
@@ -951,7 +916,7 @@ const struct file_operations nfs_file_operations = {
951 .lock = nfs_lock, 916 .lock = nfs_lock,
952 .flock = nfs_flock, 917 .flock = nfs_flock,
953 .splice_read = nfs_file_splice_read, 918 .splice_read = nfs_file_splice_read,
954 .splice_write = nfs_file_splice_write, 919 .splice_write = iter_file_splice_write,
955 .check_flags = nfs_check_flags, 920 .check_flags = nfs_check_flags,
956 .setlease = nfs_setlease, 921 .setlease = nfs_setlease,
957}; 922};
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 000000000000..8516cdffb9e9
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the pNFS Files Layout Driver kernel module
3#
4obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
5nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
index b9a35c05b60f..d2eba1c13b7e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -35,11 +35,11 @@
35 35
36#include <linux/sunrpc/metrics.h> 36#include <linux/sunrpc/metrics.h>
37 37
38#include "nfs4session.h" 38#include "../nfs4session.h"
39#include "internal.h" 39#include "../internal.h"
40#include "delegation.h" 40#include "../delegation.h"
41#include "nfs4filelayout.h" 41#include "filelayout.h"
42#include "nfs4trace.h" 42#include "../nfs4trace.h"
43 43
44#define NFSDBG_FACILITY NFSDBG_PNFS_LD 44#define NFSDBG_FACILITY NFSDBG_PNFS_LD
45 45
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
84 BUG(); 84 BUG();
85} 85}
86 86
87static void filelayout_reset_write(struct nfs_write_data *data) 87static void filelayout_reset_write(struct nfs_pgio_data *data)
88{ 88{
89 struct nfs_pgio_header *hdr = data->header; 89 struct nfs_pgio_header *hdr = data->header;
90 struct rpc_task *task = &data->task; 90 struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
105 } 105 }
106} 106}
107 107
108static void filelayout_reset_read(struct nfs_read_data *data) 108static void filelayout_reset_read(struct nfs_pgio_data *data)
109{ 109{
110 struct nfs_pgio_header *hdr = data->header; 110 struct nfs_pgio_header *hdr = data->header;
111 struct rpc_task *task = &data->task; 111 struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
243/* NFS_PROTO call done callback routines */ 243/* NFS_PROTO call done callback routines */
244 244
245static int filelayout_read_done_cb(struct rpc_task *task, 245static int filelayout_read_done_cb(struct rpc_task *task,
246 struct nfs_read_data *data) 246 struct nfs_pgio_data *data)
247{ 247{
248 struct nfs_pgio_header *hdr = data->header; 248 struct nfs_pgio_header *hdr = data->header;
249 int err; 249 int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
270 * rfc5661 is not clear about which credential should be used. 270 * rfc5661 is not clear about which credential should be used.
271 */ 271 */
272static void 272static void
273filelayout_set_layoutcommit(struct nfs_write_data *wdata) 273filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
274{ 274{
275 struct nfs_pgio_header *hdr = wdata->header; 275 struct nfs_pgio_header *hdr = wdata->header;
276 276
@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
279 return; 279 return;
280 280
281 pnfs_set_layoutcommit(wdata); 281 pnfs_set_layoutcommit(wdata);
282 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 282 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
284} 284}
285 285
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
305 */ 305 */
306static void filelayout_read_prepare(struct rpc_task *task, void *data) 306static void filelayout_read_prepare(struct rpc_task *task, void *data)
307{ 307{
308 struct nfs_read_data *rdata = data; 308 struct nfs_pgio_data *rdata = data;
309 309
310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { 310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
311 rpc_exit(task, -EIO); 311 rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
317 rpc_exit(task, 0); 317 rpc_exit(task, 0);
318 return; 318 return;
319 } 319 }
320 rdata->read_done_cb = filelayout_read_done_cb; 320 rdata->pgio_done_cb = filelayout_read_done_cb;
321 321
322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
323 &rdata->args.seq_args, 323 &rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
331 331
332static void filelayout_read_call_done(struct rpc_task *task, void *data) 332static void filelayout_read_call_done(struct rpc_task *task, void *data)
333{ 333{
334 struct nfs_read_data *rdata = data; 334 struct nfs_pgio_data *rdata = data;
335 335
336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
337 337
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
347 347
348static void filelayout_read_count_stats(struct rpc_task *task, void *data) 348static void filelayout_read_count_stats(struct rpc_task *task, void *data)
349{ 349{
350 struct nfs_read_data *rdata = data; 350 struct nfs_pgio_data *rdata = data;
351 351
352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); 352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
353} 353}
354 354
355static void filelayout_read_release(void *data) 355static void filelayout_read_release(void *data)
356{ 356{
357 struct nfs_read_data *rdata = data; 357 struct nfs_pgio_data *rdata = data;
358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; 358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
359 359
360 filelayout_fenceme(lo->plh_inode, lo); 360 filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
363} 363}
364 364
365static int filelayout_write_done_cb(struct rpc_task *task, 365static int filelayout_write_done_cb(struct rpc_task *task,
366 struct nfs_write_data *data) 366 struct nfs_pgio_data *data)
367{ 367{
368 struct nfs_pgio_header *hdr = data->header; 368 struct nfs_pgio_header *hdr = data->header;
369 int err; 369 int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
419 419
420static void filelayout_write_prepare(struct rpc_task *task, void *data) 420static void filelayout_write_prepare(struct rpc_task *task, void *data)
421{ 421{
422 struct nfs_write_data *wdata = data; 422 struct nfs_pgio_data *wdata = data;
423 423
424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { 424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
425 rpc_exit(task, -EIO); 425 rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
443 443
444static void filelayout_write_call_done(struct rpc_task *task, void *data) 444static void filelayout_write_call_done(struct rpc_task *task, void *data)
445{ 445{
446 struct nfs_write_data *wdata = data; 446 struct nfs_pgio_data *wdata = data;
447 447
448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && 448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
449 task->tk_status == 0) { 449 task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
457 457
458static void filelayout_write_count_stats(struct rpc_task *task, void *data) 458static void filelayout_write_count_stats(struct rpc_task *task, void *data)
459{ 459{
460 struct nfs_write_data *wdata = data; 460 struct nfs_pgio_data *wdata = data;
461 461
462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); 462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
463} 463}
464 464
465static void filelayout_write_release(void *data) 465static void filelayout_write_release(void *data)
466{ 466{
467 struct nfs_write_data *wdata = data; 467 struct nfs_pgio_data *wdata = data;
468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; 468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
469 469
470 filelayout_fenceme(lo->plh_inode, lo); 470 filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
529}; 529};
530 530
531static enum pnfs_try_status 531static enum pnfs_try_status
532filelayout_read_pagelist(struct nfs_read_data *data) 532filelayout_read_pagelist(struct nfs_pgio_data *data)
533{ 533{
534 struct nfs_pgio_header *hdr = data->header; 534 struct nfs_pgio_header *hdr = data->header;
535 struct pnfs_layout_segment *lseg = hdr->lseg; 535 struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
560 /* No multipath support. Use first DS */ 560 /* No multipath support. Use first DS */
561 atomic_inc(&ds->ds_clp->cl_count); 561 atomic_inc(&ds->ds_clp->cl_count);
562 data->ds_clp = ds->ds_clp; 562 data->ds_clp = ds->ds_clp;
563 data->ds_idx = idx;
563 fh = nfs4_fl_select_ds_fh(lseg, j); 564 fh = nfs4_fl_select_ds_fh(lseg, j);
564 if (fh) 565 if (fh)
565 data->args.fh = fh; 566 data->args.fh = fh;
@@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data)
568 data->mds_offset = offset; 569 data->mds_offset = offset;
569 570
570 /* Perform an asynchronous read to ds */ 571 /* Perform an asynchronous read to ds */
571 nfs_initiate_read(ds_clnt, data, 572 nfs_initiate_pgio(ds_clnt, data,
572 &filelayout_read_call_ops, RPC_TASK_SOFTCONN); 573 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
573 return PNFS_ATTEMPTED; 574 return PNFS_ATTEMPTED;
574} 575}
575 576
576/* Perform async writes. */ 577/* Perform async writes. */
577static enum pnfs_try_status 578static enum pnfs_try_status
578filelayout_write_pagelist(struct nfs_write_data *data, int sync) 579filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
579{ 580{
580 struct nfs_pgio_header *hdr = data->header; 581 struct nfs_pgio_header *hdr = data->header;
581 struct pnfs_layout_segment *lseg = hdr->lseg; 582 struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
600 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, 601 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
601 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); 602 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
602 603
603 data->write_done_cb = filelayout_write_done_cb; 604 data->pgio_done_cb = filelayout_write_done_cb;
604 atomic_inc(&ds->ds_clp->cl_count); 605 atomic_inc(&ds->ds_clp->cl_count);
605 data->ds_clp = ds->ds_clp; 606 data->ds_clp = ds->ds_clp;
607 data->ds_idx = idx;
606 fh = nfs4_fl_select_ds_fh(lseg, j); 608 fh = nfs4_fl_select_ds_fh(lseg, j);
607 if (fh) 609 if (fh)
608 data->args.fh = fh; 610 data->args.fh = fh;
609 /* 611
610 * Get the file offset on the dserver. Set the write offset to
611 * this offset and save the original offset.
612 */
613 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 612 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
614 613
615 /* Perform an asynchronous write */ 614 /* Perform an asynchronous write */
616 nfs_initiate_write(ds_clnt, data, 615 nfs_initiate_pgio(ds_clnt, data,
617 &filelayout_write_call_ops, sync, 616 &filelayout_write_call_ops, sync,
618 RPC_TASK_SOFTCONN); 617 RPC_TASK_SOFTCONN);
619 return PNFS_ATTEMPTED; 618 return PNFS_ATTEMPTED;
@@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
637 struct nfs4_deviceid_node *d; 636 struct nfs4_deviceid_node *d;
638 struct nfs4_file_layout_dsaddr *dsaddr; 637 struct nfs4_file_layout_dsaddr *dsaddr;
639 int status = -EINVAL; 638 int status = -EINVAL;
640 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
641 639
642 dprintk("--> %s\n", __func__); 640 dprintk("--> %s\n", __func__);
643 641
@@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
655 goto out; 653 goto out;
656 } 654 }
657 655
658 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { 656 if (!fl->stripe_unit) {
659 dprintk("%s Invalid stripe unit (%u)\n", 657 dprintk("%s Invalid stripe unit (%u)\n",
660 __func__, fl->stripe_unit); 658 __func__, fl->stripe_unit);
661 goto out; 659 goto out;
@@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
692 goto out_put; 690 goto out_put;
693 } 691 }
694 692
695 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
696 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
697 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
698 nfss->wsize);
699 }
700
701 status = 0; 693 status = 0;
702out: 694out:
703 dprintk("--> %s returns %d\n", __func__, status); 695 dprintk("--> %s returns %d\n", __func__, status);
@@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
850{ 842{
851 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 843 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
852 struct pnfs_commit_bucket *buckets; 844 struct pnfs_commit_bucket *buckets;
853 int size; 845 int size, i;
854 846
855 if (fl->commit_through_mds) 847 if (fl->commit_through_mds)
856 return 0; 848 return 0;
857 if (cinfo->ds->nbuckets != 0) { 849
850 size = (fl->stripe_type == STRIPE_SPARSE) ?
851 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
852
853 if (cinfo->ds->nbuckets >= size) {
858 /* This assumes there is only one IOMODE_RW lseg. What 854 /* This assumes there is only one IOMODE_RW lseg. What
859 * we really want to do is have a layout_hdr level 855 * we really want to do is have a layout_hdr level
860 * dictionary of <multipath_list4, fh> keys, each 856 * dictionary of <multipath_list4, fh> keys, each
@@ -864,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
864 return 0; 860 return 0;
865 } 861 }
866 862
867 size = (fl->stripe_type == STRIPE_SPARSE) ?
868 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
869
870 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 863 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
871 gfp_flags); 864 gfp_flags);
872 if (!buckets) 865 if (!buckets)
873 return -ENOMEM; 866 return -ENOMEM;
874 else { 867 for (i = 0; i < size; i++) {
875 int i; 868 INIT_LIST_HEAD(&buckets[i].written);
869 INIT_LIST_HEAD(&buckets[i].committing);
870 /* mark direct verifier as unset */
871 buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
872 }
876 873
877 spin_lock(cinfo->lock); 874 spin_lock(cinfo->lock);
878 if (cinfo->ds->nbuckets != 0) 875 if (cinfo->ds->nbuckets >= size)
879 kfree(buckets); 876 goto out;
880 else { 877 for (i = 0; i < cinfo->ds->nbuckets; i++) {
881 cinfo->ds->buckets = buckets; 878 list_splice(&cinfo->ds->buckets[i].written,
882 cinfo->ds->nbuckets = size; 879 &buckets[i].written);
883 for (i = 0; i < size; i++) { 880 list_splice(&cinfo->ds->buckets[i].committing,
884 INIT_LIST_HEAD(&buckets[i].written); 881 &buckets[i].committing);
885 INIT_LIST_HEAD(&buckets[i].committing); 882 buckets[i].direct_verf.committed =
886 } 883 cinfo->ds->buckets[i].direct_verf.committed;
887 } 884 buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
888 spin_unlock(cinfo->lock); 885 buckets[i].clseg = cinfo->ds->buckets[i].clseg;
889 return 0;
890 } 886 }
887 swap(cinfo->ds->buckets, buckets);
888 cinfo->ds->nbuckets = size;
889out:
890 spin_unlock(cinfo->lock);
891 kfree(buckets);
892 return 0;
891} 893}
892 894
893static struct pnfs_layout_segment * 895static struct pnfs_layout_segment *
@@ -915,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
915/* 917/*
916 * filelayout_pg_test(). Called by nfs_can_coalesce_requests() 918 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
917 * 919 *
918 * return true : coalesce page 920 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
919 * return false : don't coalesce page 921 * of bytes (maximum @req->wb_bytes) that can be coalesced.
920 */ 922 */
921static bool 923static size_t
922filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 924filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
923 struct nfs_page *req) 925 struct nfs_page *req)
924{ 926{
927 unsigned int size;
925 u64 p_stripe, r_stripe; 928 u64 p_stripe, r_stripe;
926 u32 stripe_unit; 929 u32 stripe_offset;
930 u64 segment_offset = pgio->pg_lseg->pls_range.offset;
931 u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
927 932
928 if (!pnfs_generic_pg_test(pgio, prev, req) || 933 /* calls nfs_generic_pg_test */
929 !nfs_generic_pg_test(pgio, prev, req)) 934 size = pnfs_generic_pg_test(pgio, prev, req);
930 return false; 935 if (!size)
936 return 0;
931 937
932 p_stripe = (u64)req_offset(prev); 938 /* see if req and prev are in the same stripe */
933 r_stripe = (u64)req_offset(req); 939 if (prev) {
934 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 940 p_stripe = (u64)req_offset(prev) - segment_offset;
941 r_stripe = (u64)req_offset(req) - segment_offset;
942 do_div(p_stripe, stripe_unit);
943 do_div(r_stripe, stripe_unit);
935 944
936 do_div(p_stripe, stripe_unit); 945 if (p_stripe != r_stripe)
937 do_div(r_stripe, stripe_unit); 946 return 0;
947 }
938 948
939 return (p_stripe == r_stripe); 949 /* calculate remaining bytes in the current stripe */
950 div_u64_rem((u64)req_offset(req) - segment_offset,
951 stripe_unit,
952 &stripe_offset);
953 WARN_ON_ONCE(stripe_offset > stripe_unit);
954 if (stripe_offset >= stripe_unit)
955 return 0;
956 return min(stripe_unit - (unsigned int)stripe_offset, size);
940} 957}
941 958
942static void 959static void
943filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 960filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
944 struct nfs_page *req) 961 struct nfs_page *req)
945{ 962{
946 WARN_ON_ONCE(pgio->pg_lseg != NULL); 963 if (!pgio->pg_lseg)
947 964 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
948 if (req->wb_offset != req->wb_pgbase) {
949 /*
950 * Handling unaligned pages is difficult, because have to
951 * somehow split a req in two in certain cases in the
952 * pg.test code. Avoid this by just not using pnfs
953 * in this case.
954 */
955 nfs_pageio_reset_read_mds(pgio);
956 return;
957 }
958 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
959 req->wb_context, 965 req->wb_context,
960 0, 966 0,
961 NFS4_MAX_UINT64, 967 NFS4_MAX_UINT64,
@@ -973,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
973 struct nfs_commit_info cinfo; 979 struct nfs_commit_info cinfo;
974 int status; 980 int status;
975 981
976 WARN_ON_ONCE(pgio->pg_lseg != NULL); 982 if (!pgio->pg_lseg)
977 983 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
978 if (req->wb_offset != req->wb_pgbase)
979 goto out_mds;
980 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
981 req->wb_context, 984 req->wb_context,
982 0, 985 0,
983 NFS4_MAX_UINT64, 986 NFS4_MAX_UINT64,
@@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
1067 */ 1070 */
1068 j = nfs4_fl_calc_j_index(lseg, req_offset(req)); 1071 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
1069 i = select_bucket_index(fl, j); 1072 i = select_bucket_index(fl, j);
1073 spin_lock(cinfo->lock);
1070 buckets = cinfo->ds->buckets; 1074 buckets = cinfo->ds->buckets;
1071 list = &buckets[i].written; 1075 list = &buckets[i].written;
1072 if (list_empty(list)) { 1076 if (list_empty(list)) {
@@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
1080 } 1084 }
1081 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1085 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1082 cinfo->ds->nwritten++; 1086 cinfo->ds->nwritten++;
1087 spin_unlock(cinfo->lock);
1083 return list; 1088 return list;
1084} 1089}
1085 1090
@@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
1176 return ret; 1181 return ret;
1177} 1182}
1178 1183
1184/* Note called with cinfo->lock held. */
1179static int 1185static int
1180filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, 1186filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1181 struct nfs_commit_info *cinfo, 1187 struct nfs_commit_info *cinfo,
@@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
1220 struct nfs_commit_info *cinfo) 1226 struct nfs_commit_info *cinfo)
1221{ 1227{
1222 struct pnfs_commit_bucket *b; 1228 struct pnfs_commit_bucket *b;
1229 struct pnfs_layout_segment *freeme;
1223 int i; 1230 int i;
1224 1231
1232restart:
1225 spin_lock(cinfo->lock); 1233 spin_lock(cinfo->lock);
1226 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 1234 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1227 if (transfer_commit_list(&b->written, dst, cinfo, 0)) { 1235 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1228 spin_unlock(cinfo->lock); 1236 freeme = b->wlseg;
1229 pnfs_put_lseg(b->wlseg);
1230 b->wlseg = NULL; 1237 b->wlseg = NULL;
1231 spin_lock(cinfo->lock); 1238 spin_unlock(cinfo->lock);
1239 pnfs_put_lseg(freeme);
1240 goto restart;
1232 } 1241 }
1233 } 1242 }
1234 cinfo->ds->nwritten = 0; 1243 cinfo->ds->nwritten = 0;
@@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1243 struct nfs_commit_data *data; 1252 struct nfs_commit_data *data;
1244 int i, j; 1253 int i, j;
1245 unsigned int nreq = 0; 1254 unsigned int nreq = 0;
1255 struct pnfs_layout_segment *freeme;
1246 1256
1247 fl_cinfo = cinfo->ds; 1257 fl_cinfo = cinfo->ds;
1248 bucket = fl_cinfo->buckets; 1258 bucket = fl_cinfo->buckets;
@@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1253 if (!data) 1263 if (!data)
1254 break; 1264 break;
1255 data->ds_commit_index = i; 1265 data->ds_commit_index = i;
1266 spin_lock(cinfo->lock);
1256 data->lseg = bucket->clseg; 1267 data->lseg = bucket->clseg;
1257 bucket->clseg = NULL; 1268 bucket->clseg = NULL;
1269 spin_unlock(cinfo->lock);
1258 list_add(&data->pages, list); 1270 list_add(&data->pages, list);
1259 nreq++; 1271 nreq++;
1260 } 1272 }
@@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1264 if (list_empty(&bucket->committing)) 1276 if (list_empty(&bucket->committing))
1265 continue; 1277 continue;
1266 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); 1278 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1267 pnfs_put_lseg(bucket->clseg); 1279 spin_lock(cinfo->lock);
1280 freeme = bucket->clseg;
1268 bucket->clseg = NULL; 1281 bucket->clseg = NULL;
1282 spin_unlock(cinfo->lock);
1283 pnfs_put_lseg(freeme);
1269 } 1284 }
1270 /* Caller will clean up entries put on list */ 1285 /* Caller will clean up entries put on list */
1271 return nreq; 1286 return nreq;
@@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1330 struct nfs4_filelayout *flo; 1345 struct nfs4_filelayout *flo;
1331 1346
1332 flo = kzalloc(sizeof(*flo), gfp_flags); 1347 flo = kzalloc(sizeof(*flo), gfp_flags);
1333 return &flo->generic_hdr; 1348 return flo != NULL ? &flo->generic_hdr : NULL;
1334} 1349}
1335 1350
1336static void 1351static void
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
index cebd20e7e923..ffbddf2219ea 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -30,7 +30,7 @@
30#ifndef FS_NFS_NFS4FILELAYOUT_H 30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H 31#define FS_NFS_NFS4FILELAYOUT_H
32 32
33#include "pnfs.h" 33#include "../pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules. 36 * Default data server connection timeout and retrans vaules.
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index efac602edb37..44bf0140a4c7 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -33,9 +33,9 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/sunrpc/addr.h> 34#include <linux/sunrpc/addr.h>
35 35
36#include "internal.h" 36#include "../internal.h"
37#include "nfs4session.h" 37#include "../nfs4session.h"
38#include "nfs4filelayout.h" 38#include "filelayout.h"
39 39
40#define NFSDBG_FACILITY NFSDBG_PNFS_LD 40#define NFSDBG_FACILITY NFSDBG_PNFS_LD
41 41
@@ -789,9 +789,9 @@ static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
789 789
790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
791{ 791{
792 smp_mb__before_clear_bit(); 792 smp_mb__before_atomic();
793 clear_bit(NFS4DS_CONNECTING, &ds->ds_state); 793 clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
794 smp_mb__after_clear_bit(); 794 smp_mb__after_atomic();
795 wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); 795 wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
796} 796}
797 797
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 66984a9aafaa..b94f80420a58 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
120 120
121 security_d_instantiate(ret, inode); 121 security_d_instantiate(ret, inode);
122 spin_lock(&ret->d_lock); 122 spin_lock(&ret->d_lock);
123 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { 123 if (IS_ROOT(ret) && !ret->d_fsdata &&
124 !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
124 ret->d_fsdata = name; 125 ret->d_fsdata = name;
125 name = NULL; 126 name = NULL;
126 } 127 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0c438973f3c8..9927913c97c2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
147 return ret; 147 return ret;
148} 148}
149 149
150static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
151{
152 struct nfs_inode *nfsi = NFS_I(inode);
153
154 if (inode->i_mapping->nrpages == 0)
155 flags &= ~NFS_INO_INVALID_DATA;
156 nfsi->cache_validity |= flags;
157 if (flags & NFS_INO_INVALID_DATA)
158 nfs_fscache_invalidate(inode);
159}
160
150/* 161/*
151 * Invalidate the local caches 162 * Invalidate the local caches
152 */ 163 */
@@ -162,17 +173,16 @@ static void nfs_zap_caches_locked(struct inode *inode)
162 173
163 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); 174 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
164 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 175 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
165 nfs_fscache_invalidate(inode); 176 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
166 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
167 | NFS_INO_INVALID_DATA 177 | NFS_INO_INVALID_DATA
168 | NFS_INO_INVALID_ACCESS 178 | NFS_INO_INVALID_ACCESS
169 | NFS_INO_INVALID_ACL 179 | NFS_INO_INVALID_ACL
170 | NFS_INO_REVAL_PAGECACHE; 180 | NFS_INO_REVAL_PAGECACHE);
171 } else 181 } else
172 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 182 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
173 | NFS_INO_INVALID_ACCESS 183 | NFS_INO_INVALID_ACCESS
174 | NFS_INO_INVALID_ACL 184 | NFS_INO_INVALID_ACL
175 | NFS_INO_REVAL_PAGECACHE; 185 | NFS_INO_REVAL_PAGECACHE);
176 nfs_zap_label_cache_locked(nfsi); 186 nfs_zap_label_cache_locked(nfsi);
177} 187}
178 188
@@ -187,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
187{ 197{
188 if (mapping->nrpages != 0) { 198 if (mapping->nrpages != 0) {
189 spin_lock(&inode->i_lock); 199 spin_lock(&inode->i_lock);
190 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; 200 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
191 nfs_fscache_invalidate(inode);
192 spin_unlock(&inode->i_lock); 201 spin_unlock(&inode->i_lock);
193 } 202 }
194} 203}
@@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
209void nfs_invalidate_atime(struct inode *inode) 218void nfs_invalidate_atime(struct inode *inode)
210{ 219{
211 spin_lock(&inode->i_lock); 220 spin_lock(&inode->i_lock);
212 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 221 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
213 spin_unlock(&inode->i_lock); 222 spin_unlock(&inode->i_lock);
214} 223}
215EXPORT_SYMBOL_GPL(nfs_invalidate_atime); 224EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -369,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
369 inode->i_mode = fattr->mode; 378 inode->i_mode = fattr->mode;
370 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 379 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
371 && nfs_server_capable(inode, NFS_CAP_MODE)) 380 && nfs_server_capable(inode, NFS_CAP_MODE))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 381 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
373 /* Why so? Because we want revalidate for devices/FIFOs, and 382 /* Why so? Because we want revalidate for devices/FIFOs, and
374 * that's precisely what we have in nfs_file_inode_operations. 383 * that's precisely what we have in nfs_file_inode_operations.
375 */ 384 */
@@ -415,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
415 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 424 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
416 inode->i_atime = fattr->atime; 425 inode->i_atime = fattr->atime;
417 else if (nfs_server_capable(inode, NFS_CAP_ATIME)) 426 else if (nfs_server_capable(inode, NFS_CAP_ATIME))
418 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 427 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
419 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 428 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
420 inode->i_mtime = fattr->mtime; 429 inode->i_mtime = fattr->mtime;
421 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 430 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
422 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 431 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
423 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 432 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
424 inode->i_ctime = fattr->ctime; 433 inode->i_ctime = fattr->ctime;
425 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 434 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
426 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 435 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
427 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 436 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
428 inode->i_version = fattr->change_attr; 437 inode->i_version = fattr->change_attr;
429 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 438 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
430 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 439 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
431 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 440 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
432 inode->i_size = nfs_size_to_loff_t(fattr->size); 441 inode->i_size = nfs_size_to_loff_t(fattr->size);
433 else 442 else
434 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 443 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
435 | NFS_INO_REVAL_PAGECACHE; 444 | NFS_INO_REVAL_PAGECACHE);
436 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 445 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
437 set_nlink(inode, fattr->nlink); 446 set_nlink(inode, fattr->nlink);
438 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 447 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
439 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 448 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
440 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 449 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
441 inode->i_uid = fattr->uid; 450 inode->i_uid = fattr->uid;
442 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 451 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
443 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 452 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
444 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 453 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
445 inode->i_gid = fattr->gid; 454 inode->i_gid = fattr->gid;
446 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 455 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
447 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 456 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
448 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 457 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
449 inode->i_blocks = fattr->du.nfs2.blocks; 458 inode->i_blocks = fattr->du.nfs2.blocks;
450 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 459 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -550,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
550 559
551 spin_lock(&inode->i_lock); 560 spin_lock(&inode->i_lock);
552 i_size_write(inode, offset); 561 i_size_write(inode, offset);
562 /* Optimisation */
563 if (offset == 0)
564 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
553 spin_unlock(&inode->i_lock); 565 spin_unlock(&inode->i_lock);
554 566
555 truncate_pagecache(inode, offset); 567 truncate_pagecache(inode, offset);
@@ -578,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
578 inode->i_uid = attr->ia_uid; 590 inode->i_uid = attr->ia_uid;
579 if ((attr->ia_valid & ATTR_GID) != 0) 591 if ((attr->ia_valid & ATTR_GID) != 0)
580 inode->i_gid = attr->ia_gid; 592 inode->i_gid = attr->ia_gid;
581 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 593 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
594 | NFS_INO_INVALID_ACL);
582 spin_unlock(&inode->i_lock); 595 spin_unlock(&inode->i_lock);
583 } 596 }
584 if ((attr->ia_valid & ATTR_SIZE) != 0) { 597 if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -1085,7 +1098,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1085 trace_nfs_invalidate_mapping_exit(inode, ret); 1098 trace_nfs_invalidate_mapping_exit(inode, ret);
1086 1099
1087 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); 1100 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
1088 smp_mb__after_clear_bit(); 1101 smp_mb__after_atomic();
1089 wake_up_bit(bitlock, NFS_INO_INVALIDATING); 1102 wake_up_bit(bitlock, NFS_INO_INVALIDATING);
1090out: 1103out:
1091 return ret; 1104 return ret;
@@ -1101,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1101 && inode->i_version == fattr->pre_change_attr) { 1114 && inode->i_version == fattr->pre_change_attr) {
1102 inode->i_version = fattr->change_attr; 1115 inode->i_version = fattr->change_attr;
1103 if (S_ISDIR(inode->i_mode)) 1116 if (S_ISDIR(inode->i_mode))
1104 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1117 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
1105 ret |= NFS_INO_INVALID_ATTR; 1118 ret |= NFS_INO_INVALID_ATTR;
1106 } 1119 }
1107 /* If we have atomic WCC data, we may update some attributes */ 1120 /* If we have atomic WCC data, we may update some attributes */
@@ -1117,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1117 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 1130 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
1118 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1131 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1119 if (S_ISDIR(inode->i_mode)) 1132 if (S_ISDIR(inode->i_mode))
1120 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1133 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
1121 ret |= NFS_INO_INVALID_ATTR; 1134 ret |= NFS_INO_INVALID_ATTR;
1122 } 1135 }
1123 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1136 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -1128,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1128 ret |= NFS_INO_INVALID_ATTR; 1141 ret |= NFS_INO_INVALID_ATTR;
1129 } 1142 }
1130 1143
1131 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
1132 nfs_fscache_invalidate(inode);
1133
1134 return ret; 1144 return ret;
1135} 1145}
1136 1146
@@ -1189,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1189 invalid |= NFS_INO_INVALID_ATIME; 1199 invalid |= NFS_INO_INVALID_ATIME;
1190 1200
1191 if (invalid != 0) 1201 if (invalid != 0)
1192 nfsi->cache_validity |= invalid; 1202 nfs_set_cache_invalid(inode, invalid);
1193 1203
1194 nfsi->read_cache_jiffies = fattr->time_start; 1204 nfsi->read_cache_jiffies = fattr->time_start;
1195 return 0; 1205 return 0;
@@ -1402,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
1402 1412
1403static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) 1413static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
1404{ 1414{
1405 struct nfs_inode *nfsi = NFS_I(inode); 1415 unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1406 1416
1407 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 1417 if (S_ISDIR(inode->i_mode))
1408 if (S_ISDIR(inode->i_mode)) { 1418 invalid |= NFS_INO_INVALID_DATA;
1409 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1419 nfs_set_cache_invalid(inode, invalid);
1410 nfs_fscache_invalidate(inode);
1411 }
1412 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 1420 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1413 return 0; 1421 return 0;
1414 return nfs_refresh_inode_locked(inode, fattr); 1422 return nfs_refresh_inode_locked(inode, fattr);
@@ -1575,18 +1583,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1575 inode->i_version = fattr->change_attr; 1583 inode->i_version = fattr->change_attr;
1576 } 1584 }
1577 } else if (server->caps & NFS_CAP_CHANGE_ATTR) 1585 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1578 invalid |= save_cache_validity; 1586 nfsi->cache_validity |= save_cache_validity;
1579 1587
1580 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1588 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1581 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1589 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1582 } else if (server->caps & NFS_CAP_MTIME) 1590 } else if (server->caps & NFS_CAP_MTIME)
1583 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1591 nfsi->cache_validity |= save_cache_validity &
1592 (NFS_INO_INVALID_ATTR
1584 | NFS_INO_REVAL_FORCED); 1593 | NFS_INO_REVAL_FORCED);
1585 1594
1586 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1595 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1587 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1596 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1588 } else if (server->caps & NFS_CAP_CTIME) 1597 } else if (server->caps & NFS_CAP_CTIME)
1589 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1598 nfsi->cache_validity |= save_cache_validity &
1599 (NFS_INO_INVALID_ATTR
1590 | NFS_INO_REVAL_FORCED); 1600 | NFS_INO_REVAL_FORCED);
1591 1601
1592 /* Check if our cached file size is stale */ 1602 /* Check if our cached file size is stale */
@@ -1599,6 +1609,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1599 if ((nfsi->npages == 0) || new_isize > cur_isize) { 1609 if ((nfsi->npages == 0) || new_isize > cur_isize) {
1600 i_size_write(inode, new_isize); 1610 i_size_write(inode, new_isize);
1601 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1611 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1612 invalid &= ~NFS_INO_REVAL_PAGECACHE;
1602 } 1613 }
1603 dprintk("NFS: isize change on server for file %s/%ld " 1614 dprintk("NFS: isize change on server for file %s/%ld "
1604 "(%Ld to %Ld)\n", 1615 "(%Ld to %Ld)\n",
@@ -1608,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1608 (long long)new_isize); 1619 (long long)new_isize);
1609 } 1620 }
1610 } else 1621 } else
1611 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1622 nfsi->cache_validity |= save_cache_validity &
1623 (NFS_INO_INVALID_ATTR
1612 | NFS_INO_REVAL_PAGECACHE 1624 | NFS_INO_REVAL_PAGECACHE
1613 | NFS_INO_REVAL_FORCED); 1625 | NFS_INO_REVAL_FORCED);
1614 1626
@@ -1616,7 +1628,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1616 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1628 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1617 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1629 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1618 else if (server->caps & NFS_CAP_ATIME) 1630 else if (server->caps & NFS_CAP_ATIME)
1619 invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME 1631 nfsi->cache_validity |= save_cache_validity &
1632 (NFS_INO_INVALID_ATIME
1620 | NFS_INO_REVAL_FORCED); 1633 | NFS_INO_REVAL_FORCED);
1621 1634
1622 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1635 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1627,7 +1640,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1627 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1640 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1628 } 1641 }
1629 } else if (server->caps & NFS_CAP_MODE) 1642 } else if (server->caps & NFS_CAP_MODE)
1630 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1643 nfsi->cache_validity |= save_cache_validity &
1644 (NFS_INO_INVALID_ATTR
1631 | NFS_INO_INVALID_ACCESS 1645 | NFS_INO_INVALID_ACCESS
1632 | NFS_INO_INVALID_ACL 1646 | NFS_INO_INVALID_ACL
1633 | NFS_INO_REVAL_FORCED); 1647 | NFS_INO_REVAL_FORCED);
@@ -1638,7 +1652,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1638 inode->i_uid = fattr->uid; 1652 inode->i_uid = fattr->uid;
1639 } 1653 }
1640 } else if (server->caps & NFS_CAP_OWNER) 1654 } else if (server->caps & NFS_CAP_OWNER)
1641 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1655 nfsi->cache_validity |= save_cache_validity &
1656 (NFS_INO_INVALID_ATTR
1642 | NFS_INO_INVALID_ACCESS 1657 | NFS_INO_INVALID_ACCESS
1643 | NFS_INO_INVALID_ACL 1658 | NFS_INO_INVALID_ACL
1644 | NFS_INO_REVAL_FORCED); 1659 | NFS_INO_REVAL_FORCED);
@@ -1649,7 +1664,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1649 inode->i_gid = fattr->gid; 1664 inode->i_gid = fattr->gid;
1650 } 1665 }
1651 } else if (server->caps & NFS_CAP_OWNER_GROUP) 1666 } else if (server->caps & NFS_CAP_OWNER_GROUP)
1652 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1667 nfsi->cache_validity |= save_cache_validity &
1668 (NFS_INO_INVALID_ATTR
1653 | NFS_INO_INVALID_ACCESS 1669 | NFS_INO_INVALID_ACCESS
1654 | NFS_INO_INVALID_ACL 1670 | NFS_INO_INVALID_ACL
1655 | NFS_INO_REVAL_FORCED); 1671 | NFS_INO_REVAL_FORCED);
@@ -1662,7 +1678,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1662 set_nlink(inode, fattr->nlink); 1678 set_nlink(inode, fattr->nlink);
1663 } 1679 }
1664 } else if (server->caps & NFS_CAP_NLINK) 1680 } else if (server->caps & NFS_CAP_NLINK)
1665 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1681 nfsi->cache_validity |= save_cache_validity &
1682 (NFS_INO_INVALID_ATTR
1666 | NFS_INO_REVAL_FORCED); 1683 | NFS_INO_REVAL_FORCED);
1667 1684
1668 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1685 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1694,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1694 invalid &= ~NFS_INO_INVALID_DATA; 1711 invalid &= ~NFS_INO_INVALID_DATA;
1695 if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || 1712 if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
1696 (save_cache_validity & NFS_INO_REVAL_FORCED)) 1713 (save_cache_validity & NFS_INO_REVAL_FORCED))
1697 nfsi->cache_validity |= invalid; 1714 nfs_set_cache_invalid(inode, invalid);
1698
1699 if (invalid & NFS_INO_INVALID_DATA)
1700 nfs_fscache_invalidate(inode);
1701 1715
1702 return 0; 1716 return 0;
1703 out_err: 1717 out_err:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2e2464..82ddbf46660e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -231,13 +231,20 @@ extern void nfs_destroy_writepagecache(void);
231 231
232extern int __init nfs_init_directcache(void); 232extern int __init nfs_init_directcache(void);
233extern void nfs_destroy_directcache(void); 233extern void nfs_destroy_directcache(void);
234extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
235extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, 234extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
236 struct nfs_pgio_header *hdr, 235 struct nfs_pgio_header *hdr,
237 void (*release)(struct nfs_pgio_header *hdr)); 236 void (*release)(struct nfs_pgio_header *hdr));
238void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); 237void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
239int nfs_iocounter_wait(struct nfs_io_counter *c); 238int nfs_iocounter_wait(struct nfs_io_counter *c);
240 239
240extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
241struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
242void nfs_rw_header_free(struct nfs_pgio_header *);
243void nfs_pgio_data_release(struct nfs_pgio_data *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
246 const struct rpc_call_ops *, int, int);
247
241static inline void nfs_iocounter_init(struct nfs_io_counter *c) 248static inline void nfs_iocounter_init(struct nfs_io_counter *c)
242{ 249{
243 c->flags = 0; 250 c->flags = 0;
@@ -320,16 +327,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
320int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); 327int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
321loff_t nfs_file_llseek(struct file *, loff_t, int); 328loff_t nfs_file_llseek(struct file *, loff_t, int);
322int nfs_file_flush(struct file *, fl_owner_t); 329int nfs_file_flush(struct file *, fl_owner_t);
323ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 330ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
324ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, 331ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
325 size_t, unsigned int); 332 size_t, unsigned int);
326int nfs_file_mmap(struct file *, struct vm_area_struct *); 333int nfs_file_mmap(struct file *, struct vm_area_struct *);
327ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 334ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
328int nfs_file_release(struct inode *, struct file *); 335int nfs_file_release(struct inode *, struct file *);
329int nfs_lock(struct file *, int, struct file_lock *); 336int nfs_lock(struct file *, int, struct file_lock *);
330int nfs_flock(struct file *, int, struct file_lock *); 337int nfs_flock(struct file *, int, struct file_lock *);
331ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
332 size_t, unsigned int);
333int nfs_check_flags(int); 338int nfs_check_flags(int);
334int nfs_setlease(struct file *, long, struct file_lock **); 339int nfs_setlease(struct file *, long, struct file_lock **);
335 340
@@ -395,19 +400,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
395 400
396struct nfs_pgio_completion_ops; 401struct nfs_pgio_completion_ops;
397/* read.c */ 402/* read.c */
398extern struct nfs_read_header *nfs_readhdr_alloc(void);
399extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
400extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 403extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
401 struct inode *inode, 404 struct inode *inode, bool force_mds,
402 const struct nfs_pgio_completion_ops *compl_ops); 405 const struct nfs_pgio_completion_ops *compl_ops);
403extern int nfs_initiate_read(struct rpc_clnt *clnt,
404 struct nfs_read_data *data,
405 const struct rpc_call_ops *call_ops, int flags);
406extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 406extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
407extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
408 struct nfs_pgio_header *hdr);
409extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 407extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
410extern void nfs_readdata_release(struct nfs_read_data *rdata);
411 408
412/* super.c */ 409/* super.c */
413void nfs_clone_super(struct super_block *, struct nfs_mount_info *); 410void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -422,19 +419,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
422 419
423/* write.c */ 420/* write.c */
424extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 421extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
425 struct inode *inode, int ioflags, 422 struct inode *inode, int ioflags, bool force_mds,
426 const struct nfs_pgio_completion_ops *compl_ops); 423 const struct nfs_pgio_completion_ops *compl_ops);
427extern struct nfs_write_header *nfs_writehdr_alloc(void);
428extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
429extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
430 struct nfs_pgio_header *hdr);
431extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 424extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
432extern void nfs_writedata_release(struct nfs_write_data *wdata);
433extern void nfs_commit_free(struct nfs_commit_data *p); 425extern void nfs_commit_free(struct nfs_commit_data *p);
434extern int nfs_initiate_write(struct rpc_clnt *clnt,
435 struct nfs_write_data *data,
436 const struct rpc_call_ops *call_ops,
437 int how, int flags);
438extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 426extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
439extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); 427extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
440extern int nfs_initiate_commit(struct rpc_clnt *clnt, 428extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -447,6 +435,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
447 struct nfs_commit_info *cinfo); 435 struct nfs_commit_info *cinfo);
448int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 436int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
449 struct nfs_commit_info *cinfo, int max); 437 struct nfs_commit_info *cinfo, int max);
438unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
450int nfs_scan_commit(struct inode *inode, struct list_head *dst, 439int nfs_scan_commit(struct inode *inode, struct list_head *dst,
451 struct nfs_commit_info *cinfo); 440 struct nfs_commit_info *cinfo);
452void nfs_mark_request_commit(struct nfs_page *req, 441void nfs_mark_request_commit(struct nfs_page *req,
@@ -492,7 +481,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
492extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 481extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
493 482
494/* nfs4proc.c */ 483/* nfs4proc.c */
495extern void __nfs4_read_done_cb(struct nfs_read_data *); 484extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
496extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, 485extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
497 const struct rpc_timeout *timeparms, 486 const struct rpc_timeout *timeparms,
498 const char *ip_addr); 487 const char *ip_addr);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339ea..5f61b83f4a1c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
103/* 103/*
104 * typedef opaque nfsdata<>; 104 * typedef opaque nfsdata<>;
105 */ 105 */
106static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) 106static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
107{ 107{
108 u32 recvd, count; 108 u32 recvd, count;
109 __be32 *p; 109 __be32 *p;
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
613 * }; 613 * };
614 */ 614 */
615static void encode_readargs(struct xdr_stream *xdr, 615static void encode_readargs(struct xdr_stream *xdr,
616 const struct nfs_readargs *args) 616 const struct nfs_pgio_args *args)
617{ 617{
618 u32 offset = args->offset; 618 u32 offset = args->offset;
619 u32 count = args->count; 619 u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
629 629
630static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, 630static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
631 struct xdr_stream *xdr, 631 struct xdr_stream *xdr,
632 const struct nfs_readargs *args) 632 const struct nfs_pgio_args *args)
633{ 633{
634 encode_readargs(xdr, args); 634 encode_readargs(xdr, args);
635 prepare_reply_buffer(req, args->pages, args->pgbase, 635 prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
649 * }; 649 * };
650 */ 650 */
651static void encode_writeargs(struct xdr_stream *xdr, 651static void encode_writeargs(struct xdr_stream *xdr,
652 const struct nfs_writeargs *args) 652 const struct nfs_pgio_args *args)
653{ 653{
654 u32 offset = args->offset; 654 u32 offset = args->offset;
655 u32 count = args->count; 655 u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
669 669
670static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, 670static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
671 struct xdr_stream *xdr, 671 struct xdr_stream *xdr,
672 const struct nfs_writeargs *args) 672 const struct nfs_pgio_args *args)
673{ 673{
674 encode_writeargs(xdr, args); 674 encode_writeargs(xdr, args);
675 xdr->buf->flags |= XDRBUF_WRITE; 675 xdr->buf->flags |= XDRBUF_WRITE;
@@ -857,7 +857,7 @@ out_default:
857 * }; 857 * };
858 */ 858 */
859static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, 859static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
860 struct nfs_readres *result) 860 struct nfs_pgio_res *result)
861{ 861{
862 enum nfs_stat status; 862 enum nfs_stat status;
863 int error; 863 int error;
@@ -878,7 +878,7 @@ out_default:
878} 878}
879 879
880static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, 880static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
881 struct nfs_writeres *result) 881 struct nfs_pgio_res *result)
882{ 882{
883 /* All NFSv2 writes are "file sync" writes */ 883 /* All NFSv2 writes are "file sync" writes */
884 result->verf->committed = NFS_FILE_SYNC; 884 result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149c4579..e7daa42bbc86 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
795 return status; 795 return status;
796} 796}
797 797
798static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
799{ 799{
800 struct inode *inode = data->header->inode; 800 struct inode *inode = data->header->inode;
801 801
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
807 return 0; 807 return 0;
808} 808}
809 809
810static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 810static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
811{ 811{
812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
813} 813}
814 814
815static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 815static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
816{ 816{
817 rpc_call_start(task); 817 rpc_call_start(task);
818 return 0; 818 return 0;
819} 819}
820 820
821static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 821static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
822{ 822{
823 struct inode *inode = data->header->inode; 823 struct inode *inode = data->header->inode;
824 824
@@ -829,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
829 return 0; 829 return 0;
830} 830}
831 831
832static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 832static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
833{ 833{
834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
835} 835}
836 836
837static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
838{
839 rpc_call_start(task);
840 return 0;
841}
842
843static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 837static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
844{ 838{
845 rpc_call_start(task); 839 rpc_call_start(task);
@@ -946,13 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
946 .fsinfo = nfs3_proc_fsinfo, 940 .fsinfo = nfs3_proc_fsinfo,
947 .pathconf = nfs3_proc_pathconf, 941 .pathconf = nfs3_proc_pathconf,
948 .decode_dirent = nfs3_decode_dirent, 942 .decode_dirent = nfs3_decode_dirent,
943 .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
949 .read_setup = nfs3_proc_read_setup, 944 .read_setup = nfs3_proc_read_setup,
950 .read_pageio_init = nfs_pageio_init_read,
951 .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
952 .read_done = nfs3_read_done, 945 .read_done = nfs3_read_done,
953 .write_setup = nfs3_proc_write_setup, 946 .write_setup = nfs3_proc_write_setup,
954 .write_pageio_init = nfs_pageio_init_write,
955 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
956 .write_done = nfs3_write_done, 947 .write_done = nfs3_write_done,
957 .commit_setup = nfs3_proc_commit_setup, 948 .commit_setup = nfs3_proc_commit_setup,
958 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, 949 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c19..8f4cbe7f4aa8 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
953 * }; 953 * };
954 */ 954 */
955static void encode_read3args(struct xdr_stream *xdr, 955static void encode_read3args(struct xdr_stream *xdr,
956 const struct nfs_readargs *args) 956 const struct nfs_pgio_args *args)
957{ 957{
958 __be32 *p; 958 __be32 *p;
959 959
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
966 966
967static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, 967static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
968 struct xdr_stream *xdr, 968 struct xdr_stream *xdr,
969 const struct nfs_readargs *args) 969 const struct nfs_pgio_args *args)
970{ 970{
971 encode_read3args(xdr, args); 971 encode_read3args(xdr, args);
972 prepare_reply_buffer(req, args->pages, args->pgbase, 972 prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
992 * }; 992 * };
993 */ 993 */
994static void encode_write3args(struct xdr_stream *xdr, 994static void encode_write3args(struct xdr_stream *xdr,
995 const struct nfs_writeargs *args) 995 const struct nfs_pgio_args *args)
996{ 996{
997 __be32 *p; 997 __be32 *p;
998 998
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
1008 1008
1009static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, 1009static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
1010 struct xdr_stream *xdr, 1010 struct xdr_stream *xdr,
1011 const struct nfs_writeargs *args) 1011 const struct nfs_pgio_args *args)
1012{ 1012{
1013 encode_write3args(xdr, args); 1013 encode_write3args(xdr, args);
1014 xdr->buf->flags |= XDRBUF_WRITE; 1014 xdr->buf->flags |= XDRBUF_WRITE;
@@ -1589,7 +1589,7 @@ out_default:
1589 * }; 1589 * };
1590 */ 1590 */
1591static int decode_read3resok(struct xdr_stream *xdr, 1591static int decode_read3resok(struct xdr_stream *xdr,
1592 struct nfs_readres *result) 1592 struct nfs_pgio_res *result)
1593{ 1593{
1594 u32 eof, count, ocount, recvd; 1594 u32 eof, count, ocount, recvd;
1595 __be32 *p; 1595 __be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
1625} 1625}
1626 1626
1627static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, 1627static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1628 struct nfs_readres *result) 1628 struct nfs_pgio_res *result)
1629{ 1629{
1630 enum nfs_stat status; 1630 enum nfs_stat status;
1631 int error; 1631 int error;
@@ -1673,7 +1673,7 @@ out_status:
1673 * }; 1673 * };
1674 */ 1674 */
1675static int decode_write3resok(struct xdr_stream *xdr, 1675static int decode_write3resok(struct xdr_stream *xdr,
1676 struct nfs_writeres *result) 1676 struct nfs_pgio_res *result)
1677{ 1677{
1678 __be32 *p; 1678 __be32 *p;
1679 1679
@@ -1697,7 +1697,7 @@ out_eio:
1697} 1697}
1698 1698
1699static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, 1699static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1700 struct nfs_writeres *result) 1700 struct nfs_pgio_res *result)
1701{ 1701{
1702 enum nfs_stat status; 1702 enum nfs_stat status;
1703 int error; 1703 int error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1badbe53c..ba2affa51941 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -230,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
230extern struct file_system_type nfs4_fs_type; 230extern struct file_system_type nfs4_fs_type;
231 231
232/* nfs4namespace.c */ 232/* nfs4namespace.c */
233struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 233struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
234struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, 234struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
235 struct nfs_fh *, struct nfs_fattr *); 235 struct nfs_fh *, struct nfs_fattr *);
236int nfs4_replace_transport(struct nfs_server *server, 236int nfs4_replace_transport(struct nfs_server *server,
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
337 */ 337 */
338static inline void 338static inline void
339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
340 struct rpc_message *msg, struct nfs_write_data *wdata) 340 struct rpc_message *msg, struct nfs_pgio_data *wdata)
341{ 341{
342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && 342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) 343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
369 369
370static inline void 370static inline void
371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
372 struct rpc_message *msg, struct nfs_write_data *wdata) 372 struct rpc_message *msg, struct nfs_pgio_data *wdata)
373{ 373{
374} 374}
375#endif /* CONFIG_NFS_V4_1 */ 375#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407e0360..a816f0627a6c 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
100 break; 100 break;
101 mutex_lock(&inode->i_mutex); 101 mutex_lock(&inode->i_mutex);
102 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 ret = nfs_file_fsync_commit(file, start, end, datasync);
103 if (!ret && !datasync) 103 if (!ret)
104 /* application has asked for meta-data sync */
105 ret = pnfs_layoutcommit_inode(inode, true); 104 ret = pnfs_layoutcommit_inode(inode, true);
106 mutex_unlock(&inode->i_mutex); 105 mutex_unlock(&inode->i_mutex);
107 /* 106 /*
@@ -118,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
118 117
119const struct file_operations nfs4_file_operations = { 118const struct file_operations nfs4_file_operations = {
120 .llseek = nfs_file_llseek, 119 .llseek = nfs_file_llseek,
121 .read = do_sync_read, 120 .read = new_sync_read,
122 .write = do_sync_write, 121 .write = new_sync_write,
123 .aio_read = nfs_file_read, 122 .read_iter = nfs_file_read,
124 .aio_write = nfs_file_write, 123 .write_iter = nfs_file_write,
125 .mmap = nfs_file_mmap, 124 .mmap = nfs_file_mmap,
126 .open = nfs4_file_open, 125 .open = nfs4_file_open,
127 .flush = nfs_file_flush, 126 .flush = nfs_file_flush,
@@ -130,7 +129,7 @@ const struct file_operations nfs4_file_operations = {
130 .lock = nfs_lock, 129 .lock = nfs_lock,
131 .flock = nfs_flock, 130 .flock = nfs_flock,
132 .splice_read = nfs_file_splice_read, 131 .splice_read = nfs_file_splice_read,
133 .splice_write = nfs_file_splice_write, 132 .splice_write = iter_file_splice_write,
134 .check_flags = nfs_check_flags, 133 .check_flags = nfs_check_flags,
135 .setlease = nfs_setlease, 134 .setlease = nfs_setlease,
136}; 135};
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3d5dbf80d46a..3d83cb1fdc70 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -139,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len,
139 * @server: NFS server struct 139 * @server: NFS server struct
140 * @flavors: List of security tuples returned by SECINFO procedure 140 * @flavors: List of security tuples returned by SECINFO procedure
141 * 141 *
142 * Return the pseudoflavor of the first security mechanism in 142 * Return an rpc client that uses the first security mechanism in
143 * "flavors" that is locally supported. Return RPC_AUTH_UNIX if 143 * "flavors" that is locally supported. The "flavors" array
144 * no matching flavor is found in the array. The "flavors" array
145 * is searched in the order returned from the server, per RFC 3530 144 * is searched in the order returned from the server, per RFC 3530
146 * recommendation. 145 * recommendation and each flavor is checked for membership in the
146 * sec= mount option list if it exists.
147 *
148 * Return -EPERM if no matching flavor is found in the array.
149 *
150 * Please call rpc_shutdown_client() when you are done with this rpc client.
151 *
147 */ 152 */
148static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, 153static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
154 struct nfs_server *server,
149 struct nfs4_secinfo_flavors *flavors) 155 struct nfs4_secinfo_flavors *flavors)
150{ 156{
151 rpc_authflavor_t pseudoflavor; 157 rpc_authflavor_t pflavor;
152 struct nfs4_secinfo4 *secinfo; 158 struct nfs4_secinfo4 *secinfo;
153 unsigned int i; 159 unsigned int i;
154 160
@@ -159,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
159 case RPC_AUTH_NULL: 165 case RPC_AUTH_NULL:
160 case RPC_AUTH_UNIX: 166 case RPC_AUTH_UNIX:
161 case RPC_AUTH_GSS: 167 case RPC_AUTH_GSS:
162 pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, 168 pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
163 &secinfo->flavor_info); 169 &secinfo->flavor_info);
164 /* make sure pseudoflavor matches sec= mount opt */ 170 /* does the pseudoflavor match a sec= mount opt? */
165 if (pseudoflavor != RPC_AUTH_MAXFLAVOR && 171 if (pflavor != RPC_AUTH_MAXFLAVOR &&
166 nfs_auth_info_match(&server->auth_info, 172 nfs_auth_info_match(&server->auth_info, pflavor)) {
167 pseudoflavor)) 173 struct rpc_clnt *new;
168 return pseudoflavor; 174 struct rpc_cred *cred;
169 break; 175
176 /* Cloning creates an rpc_auth for the flavor */
177 new = rpc_clone_client_set_auth(clnt, pflavor);
178 if (IS_ERR(new))
179 continue;
180 /**
181 * Check that the user actually can use the
182 * flavor. This is mostly for RPC_AUTH_GSS
183 * where cr_init obtains a gss context
184 */
185 cred = rpcauth_lookupcred(new->cl_auth, 0);
186 if (IS_ERR(cred)) {
187 rpc_shutdown_client(new);
188 continue;
189 }
190 put_rpccred(cred);
191 return new;
192 }
170 } 193 }
171 } 194 }
172 195 return ERR_PTR(-EPERM);
173 /* if there were any sec= options then nothing matched */
174 if (server->auth_info.flavor_len > 0)
175 return -EPERM;
176
177 return RPC_AUTH_UNIX;
178} 196}
179 197
180static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 198/**
199 * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
200 * return an rpc_clnt that uses the best available security flavor with
201 * respect to the secinfo flavor list and the sec= mount options.
202 *
203 * @clnt: RPC client to clone
204 * @inode: directory inode
205 * @name: lookup name
206 *
207 * Please call rpc_shutdown_client() when you are done with this rpc client.
208 */
209struct rpc_clnt *
210nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
211 struct qstr *name)
181{ 212{
182 struct page *page; 213 struct page *page;
183 struct nfs4_secinfo_flavors *flavors; 214 struct nfs4_secinfo_flavors *flavors;
184 rpc_authflavor_t flavor; 215 struct rpc_clnt *new;
185 int err; 216 int err;
186 217
187 page = alloc_page(GFP_KERNEL); 218 page = alloc_page(GFP_KERNEL);
188 if (!page) 219 if (!page)
189 return -ENOMEM; 220 return ERR_PTR(-ENOMEM);
221
190 flavors = page_address(page); 222 flavors = page_address(page);
191 223
192 err = nfs4_proc_secinfo(inode, name, flavors); 224 err = nfs4_proc_secinfo(inode, name, flavors);
193 if (err < 0) { 225 if (err < 0) {
194 flavor = err; 226 new = ERR_PTR(err);
195 goto out; 227 goto out;
196 } 228 }
197 229
198 flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors); 230 new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
199 231
200out: 232out:
201 put_page(page); 233 put_page(page);
202 return flavor; 234 return new;
203}
204
205/*
206 * Please call rpc_shutdown_client() when you are done with this client.
207 */
208struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
209 struct qstr *name)
210{
211 rpc_authflavor_t flavor;
212
213 flavor = nfs4_negotiate_security(inode, name);
214 if ((int)flavor < 0)
215 return ERR_PTR((int)flavor);
216
217 return rpc_clone_client_set_auth(clnt, flavor);
218} 235}
219 236
220static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 237static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -397,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
397 414
398 if (client->cl_auth->au_flavor != flavor) 415 if (client->cl_auth->au_flavor != flavor)
399 flavor = client->cl_auth->au_flavor; 416 flavor = client->cl_auth->au_flavor;
400 else {
401 rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
402 if ((int)new >= 0)
403 flavor = new;
404 }
405 mnt = nfs_do_submount(dentry, fh, fattr, flavor); 417 mnt = nfs_do_submount(dentry, fh, fattr, flavor);
406out: 418out:
407 rpc_shutdown_client(client); 419 rpc_shutdown_client(client);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..4bf3d97cc5a0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
2027 return status; 2027 return status;
2028 } 2028 }
2029 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 2029 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
2030 _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); 2030 nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
2031 return 0; 2031 return 0;
2032} 2032}
2033 2033
@@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
2750 2750
2751#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) 2751#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
2752#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) 2752#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
2753#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) 2753#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
2754 2754
2755static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 2755static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
2756{ 2756{
@@ -3247,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
3247 err = -EPERM; 3247 err = -EPERM;
3248 if (client != *clnt) 3248 if (client != *clnt)
3249 goto out; 3249 goto out;
3250 client = nfs4_create_sec_client(client, dir, name); 3250 client = nfs4_negotiate_security(client, dir, name);
3251 if (IS_ERR(client)) 3251 if (IS_ERR(client))
3252 return PTR_ERR(client); 3252 return PTR_ERR(client);
3253 3253
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
4033 return false; 4033 return false;
4034} 4034}
4035 4035
4036void __nfs4_read_done_cb(struct nfs_read_data *data) 4036void __nfs4_read_done_cb(struct nfs_pgio_data *data)
4037{ 4037{
4038 nfs_invalidate_atime(data->header->inode); 4038 nfs_invalidate_atime(data->header->inode);
4039} 4039}
4040 4040
4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
4042{ 4042{
4043 struct nfs_server *server = NFS_SERVER(data->header->inode); 4043 struct nfs_server *server = NFS_SERVER(data->header->inode);
4044 4044
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
4055} 4055}
4056 4056
4057static bool nfs4_read_stateid_changed(struct rpc_task *task, 4057static bool nfs4_read_stateid_changed(struct rpc_task *task,
4058 struct nfs_readargs *args) 4058 struct nfs_pgio_args *args)
4059{ 4059{
4060 4060
4061 if (!nfs4_error_stateid_expired(task->tk_status) || 4061 if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
4068 return true; 4068 return true;
4069} 4069}
4070 4070
4071static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 4071static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
4072{ 4072{
4073 4073
4074 dprintk("--> %s\n", __func__); 4074 dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
4077 return -EAGAIN; 4077 return -EAGAIN;
4078 if (nfs4_read_stateid_changed(task, &data->args)) 4078 if (nfs4_read_stateid_changed(task, &data->args))
4079 return -EAGAIN; 4079 return -EAGAIN;
4080 return data->read_done_cb ? data->read_done_cb(task, data) : 4080 return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
4081 nfs4_read_done_cb(task, data); 4081 nfs4_read_done_cb(task, data);
4082} 4082}
4083 4083
4084static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 4084static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
4085{ 4085{
4086 data->timestamp = jiffies; 4086 data->timestamp = jiffies;
4087 data->read_done_cb = nfs4_read_done_cb; 4087 data->pgio_done_cb = nfs4_read_done_cb;
4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
4090} 4090}
4091 4091
4092static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 4092static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
4093{ 4093{
4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), 4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
4095 &data->args.seq_args, 4095 &data->args.seq_args,
@@ -4097,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
4097 task)) 4097 task))
4098 return 0; 4098 return 0;
4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, 4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
4100 data->args.lock_context, FMODE_READ) == -EIO) 4100 data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
4101 return -EIO; 4101 return -EIO;
4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) 4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
4103 return -EIO; 4103 return -EIO;
4104 return 0; 4104 return 0;
4105} 4105}
4106 4106
4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
4108{ 4108{
4109 struct inode *inode = data->header->inode; 4109 struct inode *inode = data->header->inode;
4110 4110
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
4121} 4121}
4122 4122
4123static bool nfs4_write_stateid_changed(struct rpc_task *task, 4123static bool nfs4_write_stateid_changed(struct rpc_task *task,
4124 struct nfs_writeargs *args) 4124 struct nfs_pgio_args *args)
4125{ 4125{
4126 4126
4127 if (!nfs4_error_stateid_expired(task->tk_status) || 4127 if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
4134 return true; 4134 return true;
4135} 4135}
4136 4136
4137static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 4137static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
4138{ 4138{
4139 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4139 if (!nfs4_sequence_done(task, &data->res.seq_res))
4140 return -EAGAIN; 4140 return -EAGAIN;
4141 if (nfs4_write_stateid_changed(task, &data->args)) 4141 if (nfs4_write_stateid_changed(task, &data->args))
4142 return -EAGAIN; 4142 return -EAGAIN;
4143 return data->write_done_cb ? data->write_done_cb(task, data) : 4143 return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
4144 nfs4_write_done_cb(task, data); 4144 nfs4_write_done_cb(task, data);
4145} 4145}
4146 4146
4147static 4147static
4148bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) 4148bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
4149{ 4149{
4150 const struct nfs_pgio_header *hdr = data->header; 4150 const struct nfs_pgio_header *hdr = data->header;
4151 4151
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; 4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
4159} 4159}
4160 4160
4161static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 4161static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
4162{ 4162{
4163 struct nfs_server *server = NFS_SERVER(data->header->inode); 4163 struct nfs_server *server = NFS_SERVER(data->header->inode);
4164 4164
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
4168 } else 4168 } else
4169 data->args.bitmask = server->cache_consistency_bitmask; 4169 data->args.bitmask = server->cache_consistency_bitmask;
4170 4170
4171 if (!data->write_done_cb) 4171 if (!data->pgio_done_cb)
4172 data->write_done_cb = nfs4_write_done_cb; 4172 data->pgio_done_cb = nfs4_write_done_cb;
4173 data->res.server = server; 4173 data->res.server = server;
4174 data->timestamp = jiffies; 4174 data->timestamp = jiffies;
4175 4175
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4178} 4178}
4179 4179
4180static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
4181{
4182 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
4183 &data->args.seq_args,
4184 &data->res.seq_res,
4185 task))
4186 return 0;
4187 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
4188 data->args.lock_context, FMODE_WRITE) == -EIO)
4189 return -EIO;
4190 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
4191 return -EIO;
4192 return 0;
4193}
4194
4195static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 4180static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
4196{ 4181{
4197 nfs4_setup_sequence(NFS_SERVER(data->inode), 4182 nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
8432 .pathconf = nfs4_proc_pathconf, 8417 .pathconf = nfs4_proc_pathconf,
8433 .set_capabilities = nfs4_server_capabilities, 8418 .set_capabilities = nfs4_server_capabilities,
8434 .decode_dirent = nfs4_decode_dirent, 8419 .decode_dirent = nfs4_decode_dirent,
8420 .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
8435 .read_setup = nfs4_proc_read_setup, 8421 .read_setup = nfs4_proc_read_setup,
8436 .read_pageio_init = pnfs_pageio_init_read,
8437 .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
8438 .read_done = nfs4_read_done, 8422 .read_done = nfs4_read_done,
8439 .write_setup = nfs4_proc_write_setup, 8423 .write_setup = nfs4_proc_write_setup,
8440 .write_pageio_init = pnfs_pageio_init_write,
8441 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
8442 .write_done = nfs4_write_done, 8424 .write_done = nfs4_write_done,
8443 .commit_setup = nfs4_proc_commit_setup, 8425 .commit_setup = nfs4_proc_commit_setup,
8444 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, 8426 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2349518eef2c..848f6853c59e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1140,9 +1140,9 @@ static int nfs4_run_state_manager(void *);
1140 1140
1141static void nfs4_clear_state_manager_bit(struct nfs_client *clp) 1141static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1142{ 1142{
1143 smp_mb__before_clear_bit(); 1143 smp_mb__before_atomic();
1144 clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); 1144 clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1145 smp_mb__after_clear_bit(); 1145 smp_mb__after_atomic();
1146 wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); 1146 wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
1147 rpc_wake_up(&clp->cl_rpcwaitq); 1147 rpc_wake_up(&clp->cl_rpcwaitq);
1148} 1148}
@@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1456 * server that doesn't support a grace period. 1456 * server that doesn't support a grace period.
1457 */ 1457 */
1458 spin_lock(&sp->so_lock); 1458 spin_lock(&sp->so_lock);
1459 write_seqcount_begin(&sp->so_reclaim_seqcount); 1459 raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
1460restart: 1460restart:
1461 list_for_each_entry(state, &sp->so_states, open_states) { 1461 list_for_each_entry(state, &sp->so_states, open_states) {
1462 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1462 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1519,13 +1519,13 @@ restart:
1519 spin_lock(&sp->so_lock); 1519 spin_lock(&sp->so_lock);
1520 goto restart; 1520 goto restart;
1521 } 1521 }
1522 write_seqcount_end(&sp->so_reclaim_seqcount); 1522 raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1523 spin_unlock(&sp->so_lock); 1523 spin_unlock(&sp->so_lock);
1524 return 0; 1524 return 0;
1525out_err: 1525out_err:
1526 nfs4_put_open_state(state); 1526 nfs4_put_open_state(state);
1527 spin_lock(&sp->so_lock); 1527 spin_lock(&sp->so_lock);
1528 write_seqcount_end(&sp->so_reclaim_seqcount); 1528 raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1529 spin_unlock(&sp->so_lock); 1529 spin_unlock(&sp->so_lock);
1530 return status; 1530 return status;
1531} 1531}
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 2628d921b7e3..b6ebe7e445f6 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0;
16static const int nfs_set_port_max = 65535; 16static const int nfs_set_port_max = 65535;
17static struct ctl_table_header *nfs4_callback_sysctl_table; 17static struct ctl_table_header *nfs4_callback_sysctl_table;
18 18
19static ctl_table nfs4_cb_sysctls[] = { 19static struct ctl_table nfs4_cb_sysctls[] = {
20 { 20 {
21 .procname = "nfs_callback_tcpport", 21 .procname = "nfs_callback_tcpport",
22 .data = &nfs_callback_set_tcpport, 22 .data = &nfs_callback_set_tcpport,
@@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = {
36 { } 36 { }
37}; 37};
38 38
39static ctl_table nfs4_cb_sysctl_dir[] = { 39static struct ctl_table nfs4_cb_sysctl_dir[] = {
40 { 40 {
41 .procname = "nfs", 41 .procname = "nfs",
42 .mode = 0555, 42 .mode = 0555,
@@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = {
45 { } 45 { }
46}; 46};
47 47
48static ctl_table nfs4_cb_sysctl_root[] = { 48static struct ctl_table nfs4_cb_sysctl_root[] = {
49 { 49 {
50 .procname = "fs", 50 .procname = "fs",
51 .mode = 0555, 51 .mode = 0555,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db30..0a744f3a86f6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
932 932
933DECLARE_EVENT_CLASS(nfs4_read_event, 933DECLARE_EVENT_CLASS(nfs4_read_event,
934 TP_PROTO( 934 TP_PROTO(
935 const struct nfs_read_data *data, 935 const struct nfs_pgio_data *data,
936 int error 936 int error
937 ), 937 ),
938 938
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
972#define DEFINE_NFS4_READ_EVENT(name) \ 972#define DEFINE_NFS4_READ_EVENT(name) \
973 DEFINE_EVENT(nfs4_read_event, name, \ 973 DEFINE_EVENT(nfs4_read_event, name, \
974 TP_PROTO( \ 974 TP_PROTO( \
975 const struct nfs_read_data *data, \ 975 const struct nfs_pgio_data *data, \
976 int error \ 976 int error \
977 ), \ 977 ), \
978 TP_ARGS(data, error)) 978 TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
983 983
984DECLARE_EVENT_CLASS(nfs4_write_event, 984DECLARE_EVENT_CLASS(nfs4_write_event,
985 TP_PROTO( 985 TP_PROTO(
986 const struct nfs_write_data *data, 986 const struct nfs_pgio_data *data,
987 int error 987 int error
988 ), 988 ),
989 989
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1024#define DEFINE_NFS4_WRITE_EVENT(name) \ 1024#define DEFINE_NFS4_WRITE_EVENT(name) \
1025 DEFINE_EVENT(nfs4_write_event, name, \ 1025 DEFINE_EVENT(nfs4_write_event, name, \
1026 TP_PROTO( \ 1026 TP_PROTO( \
1027 const struct nfs_write_data *data, \ 1027 const struct nfs_pgio_data *data, \
1028 int error \ 1028 int error \
1029 ), \ 1029 ), \
1030 TP_ARGS(data, error)) 1030 TP_ARGS(data, error))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4fe2c8..939ae606cfa4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1556 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); 1556 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
1557} 1557}
1558 1558
1559static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1559static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
1560 struct compound_hdr *hdr)
1560{ 1561{
1561 __be32 *p; 1562 __be32 *p;
1562 1563
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
1701 encode_nfs4_verifier(xdr, &arg->confirm); 1702 encode_nfs4_verifier(xdr, &arg->confirm);
1702} 1703}
1703 1704
1704static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1705static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
1706 struct compound_hdr *hdr)
1705{ 1707{
1706 __be32 *p; 1708 __be32 *p;
1707 1709
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 * Encode a READ request 2453 * Encode a READ request
2452 */ 2454 */
2453static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, 2455static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
2454 struct nfs_readargs *args) 2456 struct nfs_pgio_args *args)
2455{ 2457{
2456 struct compound_hdr hdr = { 2458 struct compound_hdr hdr = {
2457 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2459 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2513 * Encode a WRITE request 2515 * Encode a WRITE request
2514 */ 2516 */
2515static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, 2517static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2516 struct nfs_writeargs *args) 2518 struct nfs_pgio_args *args)
2517{ 2519{
2518 struct compound_hdr hdr = { 2520 struct compound_hdr hdr = {
2519 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2521 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -5085,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
5085 return decode_op_hdr(xdr, OP_PUTROOTFH); 5087 return decode_op_hdr(xdr, OP_PUTROOTFH);
5086} 5088}
5087 5089
5088static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) 5090static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
5091 struct nfs_pgio_res *res)
5089{ 5092{
5090 __be32 *p; 5093 __be32 *p;
5091 uint32_t count, eof, recvd; 5094 uint32_t count, eof, recvd;
@@ -5339,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
5339 return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); 5342 return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
5340} 5343}
5341 5344
5342static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) 5345static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
5343{ 5346{
5344 __be32 *p; 5347 __be32 *p;
5345 int status; 5348 int status;
@@ -6636,7 +6639,7 @@ out:
6636 * Decode Read response 6639 * Decode Read response
6637 */ 6640 */
6638static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6641static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6639 struct nfs_readres *res) 6642 struct nfs_pgio_res *res)
6640{ 6643{
6641 struct compound_hdr hdr; 6644 struct compound_hdr hdr;
6642 int status; 6645 int status;
@@ -6661,7 +6664,7 @@ out:
6661 * Decode WRITE response 6664 * Decode WRITE response
6662 */ 6665 */
6663static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6666static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6664 struct nfs_writeres *res) 6667 struct nfs_pgio_res *res)
6665{ 6668{
6666 struct compound_hdr hdr; 6669 struct compound_hdr hdr;
6667 int status; 6670 int status;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f1..611320753db2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
439 objlayout_read_done(&objios->oir, status, objios->sync); 439 objlayout_read_done(&objios->oir, status, objios->sync);
440} 440}
441 441
442int objio_read_pagelist(struct nfs_read_data *rdata) 442int objio_read_pagelist(struct nfs_pgio_data *rdata)
443{ 443{
444 struct nfs_pgio_header *hdr = rdata->header; 444 struct nfs_pgio_header *hdr = rdata->header;
445 struct objio_state *objios; 445 struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
488{ 488{
489 struct objio_state *objios = priv; 489 struct objio_state *objios = priv;
490 struct nfs_write_data *wdata = objios->oir.rpcdata; 490 struct nfs_pgio_data *wdata = objios->oir.rpcdata;
491 struct address_space *mapping = wdata->header->inode->i_mapping; 491 struct address_space *mapping = wdata->header->inode->i_mapping;
492 pgoff_t index = offset / PAGE_SIZE; 492 pgoff_t index = offset / PAGE_SIZE;
493 struct page *page; 493 struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
531 .put_page = &__r4w_put_page, 531 .put_page = &__r4w_put_page,
532}; 532};
533 533
534int objio_write_pagelist(struct nfs_write_data *wdata, int how) 534int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
535{ 535{
536 struct nfs_pgio_header *hdr = wdata->header; 536 struct nfs_pgio_header *hdr = wdata->header;
537 struct objio_state *objios; 537 struct objio_state *objios;
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
564 return 0; 564 return 0;
565} 565}
566 566
567static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, 567/*
568 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
569 * of bytes (maximum @req->wb_bytes) that can be coalesced.
570 */
571static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
568 struct nfs_page *prev, struct nfs_page *req) 572 struct nfs_page *prev, struct nfs_page *req)
569{ 573{
570 if (!pnfs_generic_pg_test(pgio, prev, req)) 574 unsigned int size;
571 return false; 575
576 size = pnfs_generic_pg_test(pgio, prev, req);
577
578 if (!size || pgio->pg_count + req->wb_bytes >
579 (unsigned long)pgio->pg_layout_private)
580 return 0;
572 581
573 return pgio->pg_count + req->wb_bytes <= 582 return min(size, req->wb_bytes);
574 (unsigned long)pgio->pg_layout_private;
575} 583}
576 584
577static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 585static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67b..765d3f54e986 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
53 struct objlayout *objlay; 53 struct objlayout *objlay;
54 54
55 objlay = kzalloc(sizeof(struct objlayout), gfp_flags); 55 objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
56 if (objlay) { 56 if (!objlay)
57 spin_lock_init(&objlay->lock); 57 return NULL;
58 INIT_LIST_HEAD(&objlay->err_list); 58 spin_lock_init(&objlay->lock);
59 } 59 INIT_LIST_HEAD(&objlay->err_list);
60 dprintk("%s: Return %p\n", __func__, objlay); 60 dprintk("%s: Return %p\n", __func__, objlay);
61 return &objlay->pnfs_layout; 61 return &objlay->pnfs_layout;
62} 62}
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
229static void _rpc_read_complete(struct work_struct *work) 229static void _rpc_read_complete(struct work_struct *work)
230{ 230{
231 struct rpc_task *task; 231 struct rpc_task *task;
232 struct nfs_read_data *rdata; 232 struct nfs_pgio_data *rdata;
233 233
234 dprintk("%s enter\n", __func__); 234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work); 235 task = container_of(work, struct rpc_task, u.tk_work);
236 rdata = container_of(task, struct nfs_read_data, task); 236 rdata = container_of(task, struct nfs_pgio_data, task);
237 237
238 pnfs_ld_read_done(rdata); 238 pnfs_ld_read_done(rdata);
239} 239}
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
241void 241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{ 243{
244 struct nfs_read_data *rdata = oir->rpcdata; 244 struct nfs_pgio_data *rdata = oir->rpcdata;
245 245
246 oir->status = rdata->task.tk_status = status; 246 oir->status = rdata->task.tk_status = status;
247 if (status >= 0) 247 if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
266 * Perform sync or async reads. 266 * Perform sync or async reads.
267 */ 267 */
268enum pnfs_try_status 268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_read_data *rdata) 269objlayout_read_pagelist(struct nfs_pgio_data *rdata)
270{ 270{
271 struct nfs_pgio_header *hdr = rdata->header; 271 struct nfs_pgio_header *hdr = rdata->header;
272 struct inode *inode = hdr->inode; 272 struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
312static void _rpc_write_complete(struct work_struct *work) 312static void _rpc_write_complete(struct work_struct *work)
313{ 313{
314 struct rpc_task *task; 314 struct rpc_task *task;
315 struct nfs_write_data *wdata; 315 struct nfs_pgio_data *wdata;
316 316
317 dprintk("%s enter\n", __func__); 317 dprintk("%s enter\n", __func__);
318 task = container_of(work, struct rpc_task, u.tk_work); 318 task = container_of(work, struct rpc_task, u.tk_work);
319 wdata = container_of(task, struct nfs_write_data, task); 319 wdata = container_of(task, struct nfs_pgio_data, task);
320 320
321 pnfs_ld_write_done(wdata); 321 pnfs_ld_write_done(wdata);
322} 322}
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
324void 324void
325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
326{ 326{
327 struct nfs_write_data *wdata = oir->rpcdata; 327 struct nfs_pgio_data *wdata = oir->rpcdata;
328 328
329 oir->status = wdata->task.tk_status = status; 329 oir->status = wdata->task.tk_status = status;
330 if (status >= 0) { 330 if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
351 * Perform sync or async writes. 351 * Perform sync or async writes.
352 */ 352 */
353enum pnfs_try_status 353enum pnfs_try_status
354objlayout_write_pagelist(struct nfs_write_data *wdata, 354objlayout_write_pagelist(struct nfs_pgio_data *wdata,
355 int how) 355 int how)
356{ 356{
357 struct nfs_pgio_header *hdr = wdata->header; 357 struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec6120..01e041029a6c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
119 */ 119 */
120extern void objio_free_result(struct objlayout_io_res *oir); 120extern void objio_free_result(struct objlayout_io_res *oir);
121 121
122extern int objio_read_pagelist(struct nfs_read_data *rdata); 122extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
123extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); 123extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
124 124
125/* 125/*
126 * callback API 126 * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
168extern void objlayout_free_lseg(struct pnfs_layout_segment *); 168extern void objlayout_free_lseg(struct pnfs_layout_segment *);
169 169
170extern enum pnfs_try_status objlayout_read_pagelist( 170extern enum pnfs_try_status objlayout_read_pagelist(
171 struct nfs_read_data *); 171 struct nfs_pgio_data *);
172 172
173extern enum pnfs_try_status objlayout_write_pagelist( 173extern enum pnfs_try_status objlayout_write_pagelist(
174 struct nfs_write_data *, 174 struct nfs_pgio_data *,
175 int how); 175 int how);
176 176
177extern void objlayout_encode_layoutcommit( 177extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2ffebf2081ce..b6ee3a6ee96d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,9 +24,14 @@
24#include "internal.h" 24#include "internal.h"
25#include "pnfs.h" 25#include "pnfs.h"
26 26
27#define NFSDBG_FACILITY NFSDBG_PAGECACHE
28
27static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops;
31
32static void nfs_free_request(struct nfs_page *);
28 33
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 34static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{ 35{
31 p->npages = pagecount; 36 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array)) 37 if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -95,7 +100,7 @@ nfs_iocounter_dec(struct nfs_io_counter *c)
95{ 100{
96 if (atomic_dec_and_test(&c->io_count)) { 101 if (atomic_dec_and_test(&c->io_count)) {
97 clear_bit(NFS_IO_INPROGRESS, &c->flags); 102 clear_bit(NFS_IO_INPROGRESS, &c->flags);
98 smp_mb__after_clear_bit(); 103 smp_mb__after_atomic();
99 wake_up_bit(&c->flags, NFS_IO_INPROGRESS); 104 wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
100 } 105 }
101} 106}
@@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
133 return __nfs_iocounter_wait(c); 138 return __nfs_iocounter_wait(c);
134} 139}
135 140
141static int nfs_wait_bit_uninterruptible(void *word)
142{
143 io_schedule();
144 return 0;
145}
146
147/*
148 * nfs_page_group_lock - lock the head of the page group
149 * @req - request in group that is to be locked
150 *
151 * this lock must be held if modifying the page group list
152 */
153void
154nfs_page_group_lock(struct nfs_page *req)
155{
156 struct nfs_page *head = req->wb_head;
157
158 WARN_ON_ONCE(head != head->wb_head);
159
160 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
161 nfs_wait_bit_uninterruptible,
162 TASK_UNINTERRUPTIBLE);
163}
164
165/*
166 * nfs_page_group_unlock - unlock the head of the page group
167 * @req - request in group that is to be unlocked
168 */
169void
170nfs_page_group_unlock(struct nfs_page *req)
171{
172 struct nfs_page *head = req->wb_head;
173
174 WARN_ON_ONCE(head != head->wb_head);
175
176 smp_mb__before_atomic();
177 clear_bit(PG_HEADLOCK, &head->wb_flags);
178 smp_mb__after_atomic();
179 wake_up_bit(&head->wb_flags, PG_HEADLOCK);
180}
181
182/*
183 * nfs_page_group_sync_on_bit_locked
184 *
185 * must be called with page group lock held
186 */
187static bool
188nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
189{
190 struct nfs_page *head = req->wb_head;
191 struct nfs_page *tmp;
192
193 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
194 WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
195
196 tmp = req->wb_this_page;
197 while (tmp != req) {
198 if (!test_bit(bit, &tmp->wb_flags))
199 return false;
200 tmp = tmp->wb_this_page;
201 }
202
203 /* true! reset all bits */
204 tmp = req;
205 do {
206 clear_bit(bit, &tmp->wb_flags);
207 tmp = tmp->wb_this_page;
208 } while (tmp != req);
209
210 return true;
211}
212
213/*
214 * nfs_page_group_sync_on_bit - set bit on current request, but only
215 * return true if the bit is set for all requests in page group
216 * @req - request in page group
217 * @bit - PG_* bit that is used to sync page group
218 */
219bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
220{
221 bool ret;
222
223 nfs_page_group_lock(req);
224 ret = nfs_page_group_sync_on_bit_locked(req, bit);
225 nfs_page_group_unlock(req);
226
227 return ret;
228}
229
230/*
231 * nfs_page_group_init - Initialize the page group linkage for @req
232 * @req - a new nfs request
233 * @prev - the previous request in page group, or NULL if @req is the first
234 * or only request in the group (the head).
235 */
236static inline void
237nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
238{
239 WARN_ON_ONCE(prev == req);
240
241 if (!prev) {
242 req->wb_head = req;
243 req->wb_this_page = req;
244 } else {
245 WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
246 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
247 req->wb_head = prev->wb_head;
248 req->wb_this_page = prev->wb_this_page;
249 prev->wb_this_page = req;
250
251 /* grab extra ref if head request has extra ref from
252 * the write/commit path to handle handoff between write
253 * and commit lists */
254 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
255 kref_get(&req->wb_kref);
256 }
257}
258
259/*
260 * nfs_page_group_destroy - sync the destruction of page groups
261 * @req - request that no longer needs the page group
262 *
263 * releases the page group reference from each member once all
264 * members have called this function.
265 */
266static void
267nfs_page_group_destroy(struct kref *kref)
268{
269 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
270 struct nfs_page *tmp, *next;
271
272 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
273 return;
274
275 tmp = req;
276 do {
277 next = tmp->wb_this_page;
278 /* unlink and free */
279 tmp->wb_this_page = tmp;
280 tmp->wb_head = tmp;
281 nfs_free_request(tmp);
282 tmp = next;
283 } while (tmp != req);
284}
285
136/** 286/**
137 * nfs_create_request - Create an NFS read/write request. 287 * nfs_create_request - Create an NFS read/write request.
138 * @ctx: open context to use 288 * @ctx: open context to use
139 * @inode: inode to which the request is attached
140 * @page: page to write 289 * @page: page to write
290 * @last: last nfs request created for this page group or NULL if head
141 * @offset: starting offset within the page for the write 291 * @offset: starting offset within the page for the write
142 * @count: number of bytes to read/write 292 * @count: number of bytes to read/write
143 * 293 *
@@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
146 * User should ensure it is safe to sleep in this function. 296 * User should ensure it is safe to sleep in this function.
147 */ 297 */
148struct nfs_page * 298struct nfs_page *
149nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 299nfs_create_request(struct nfs_open_context *ctx, struct page *page,
150 struct page *page, 300 struct nfs_page *last, unsigned int offset,
151 unsigned int offset, unsigned int count) 301 unsigned int count)
152{ 302{
153 struct nfs_page *req; 303 struct nfs_page *req;
154 struct nfs_lock_context *l_ctx; 304 struct nfs_lock_context *l_ctx;
@@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
180 req->wb_bytes = count; 330 req->wb_bytes = count;
181 req->wb_context = get_nfs_open_context(ctx); 331 req->wb_context = get_nfs_open_context(ctx);
182 kref_init(&req->wb_kref); 332 kref_init(&req->wb_kref);
333 nfs_page_group_init(req, last);
183 return req; 334 return req;
184} 335}
185 336
@@ -193,9 +344,9 @@ void nfs_unlock_request(struct nfs_page *req)
193 printk(KERN_ERR "NFS: Invalid unlock attempted\n"); 344 printk(KERN_ERR "NFS: Invalid unlock attempted\n");
194 BUG(); 345 BUG();
195 } 346 }
196 smp_mb__before_clear_bit(); 347 smp_mb__before_atomic();
197 clear_bit(PG_BUSY, &req->wb_flags); 348 clear_bit(PG_BUSY, &req->wb_flags);
198 smp_mb__after_clear_bit(); 349 smp_mb__after_atomic();
199 wake_up_bit(&req->wb_flags, PG_BUSY); 350 wake_up_bit(&req->wb_flags, PG_BUSY);
200} 351}
201 352
@@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req)
237 } 388 }
238} 389}
239 390
240
241/** 391/**
242 * nfs_release_request - Release the count on an NFS read/write request 392 * nfs_release_request - Release the count on an NFS read/write request
243 * @req: request to release 393 * @req: request to release
244 * 394 *
245 * Note: Should never be called with the spinlock held! 395 * Note: Should never be called with the spinlock held!
246 */ 396 */
247static void nfs_free_request(struct kref *kref) 397static void nfs_free_request(struct nfs_page *req)
248{ 398{
249 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 399 WARN_ON_ONCE(req->wb_this_page != req);
400
401 /* extra debug: make sure no sync bits are still set */
402 WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
403 WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
404 WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
405 WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
406 WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
250 407
251 /* Release struct file and open context */ 408 /* Release struct file and open context */
252 nfs_clear_request(req); 409 nfs_clear_request(req);
@@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref)
255 412
256void nfs_release_request(struct nfs_page *req) 413void nfs_release_request(struct nfs_page *req)
257{ 414{
258 kref_put(&req->wb_kref, nfs_free_request); 415 kref_put(&req->wb_kref, nfs_page_group_destroy);
259}
260
261static int nfs_wait_bit_uninterruptible(void *word)
262{
263 io_schedule();
264 return 0;
265} 416}
266 417
267/** 418/**
@@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req)
279 TASK_UNINTERRUPTIBLE); 430 TASK_UNINTERRUPTIBLE);
280} 431}
281 432
282bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 433/*
434 * nfs_generic_pg_test - determine if requests can be coalesced
435 * @desc: pointer to descriptor
436 * @prev: previous request in desc, or NULL
437 * @req: this request
438 *
439 * Returns zero if @req can be coalesced into @desc, otherwise it returns
440 * the size of the request.
441 */
442size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
443 struct nfs_page *prev, struct nfs_page *req)
283{ 444{
284 /* 445 if (desc->pg_count > desc->pg_bsize) {
285 * FIXME: ideally we should be able to coalesce all requests 446 /* should never happen */
286 * that are not block boundary aligned, but currently this 447 WARN_ON_ONCE(1);
287 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
288 * since nfs_flush_multi and nfs_pagein_multi assume you
289 * can have only one struct nfs_page.
290 */
291 if (desc->pg_bsize < PAGE_SIZE)
292 return 0; 448 return 0;
449 }
293 450
294 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 451 return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
295} 452}
296EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 453EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
297 454
455static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
456{
457 return container_of(hdr, struct nfs_rw_header, header);
458}
459
460/**
461 * nfs_rw_header_alloc - Allocate a header for a read or write
462 * @ops: Read or write function vector
463 */
464struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
465{
466 struct nfs_rw_header *header = ops->rw_alloc_header();
467
468 if (header) {
469 struct nfs_pgio_header *hdr = &header->header;
470
471 INIT_LIST_HEAD(&hdr->pages);
472 spin_lock_init(&hdr->lock);
473 atomic_set(&hdr->refcnt, 0);
474 hdr->rw_ops = ops;
475 }
476 return header;
477}
478EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
479
480/*
481 * nfs_rw_header_free - Free a read or write header
482 * @hdr: The header to free
483 */
484void nfs_rw_header_free(struct nfs_pgio_header *hdr)
485{
486 hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
487}
488EXPORT_SYMBOL_GPL(nfs_rw_header_free);
489
490/**
491 * nfs_pgio_data_alloc - Allocate pageio data
492 * @hdr: The header making a request
493 * @pagecount: Number of pages to create
494 */
495static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
496 unsigned int pagecount)
497{
498 struct nfs_pgio_data *data, *prealloc;
499
500 prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
501 if (prealloc->header == NULL)
502 data = prealloc;
503 else
504 data = kzalloc(sizeof(*data), GFP_KERNEL);
505 if (!data)
506 goto out;
507
508 if (nfs_pgarray_set(&data->pages, pagecount)) {
509 data->header = hdr;
510 atomic_inc(&hdr->refcnt);
511 } else {
512 if (data != prealloc)
513 kfree(data);
514 data = NULL;
515 }
516out:
517 return data;
518}
519
520/**
521 * nfs_pgio_data_release - Properly free pageio data
522 * @data: The data to release
523 */
524void nfs_pgio_data_release(struct nfs_pgio_data *data)
525{
526 struct nfs_pgio_header *hdr = data->header;
527 struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
528
529 put_nfs_open_context(data->args.context);
530 if (data->pages.pagevec != data->pages.page_array)
531 kfree(data->pages.pagevec);
532 if (data == &pageio_header->rpc_data) {
533 data->header = NULL;
534 data = NULL;
535 }
536 if (atomic_dec_and_test(&hdr->refcnt))
537 hdr->completion_ops->completion(hdr);
538 /* Note: we only free the rpc_task after callbacks are done.
539 * See the comment in rpc_free_task() for why
540 */
541 kfree(data);
542}
543EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
544
545/**
546 * nfs_pgio_rpcsetup - Set up arguments for a pageio call
547 * @data: The pageio data
548 * @count: Number of bytes to read
549 * @offset: Initial offset
550 * @how: How to commit data (writes only)
551 * @cinfo: Commit information for the call (writes only)
552 */
553static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
554 unsigned int count, unsigned int offset,
555 int how, struct nfs_commit_info *cinfo)
556{
557 struct nfs_page *req = data->header->req;
558
559 /* Set up the RPC argument and reply structs
560 * NB: take care not to mess about with data->commit et al. */
561
562 data->args.fh = NFS_FH(data->header->inode);
563 data->args.offset = req_offset(req) + offset;
564 /* pnfs_set_layoutcommit needs this */
565 data->mds_offset = data->args.offset;
566 data->args.pgbase = req->wb_pgbase + offset;
567 data->args.pages = data->pages.pagevec;
568 data->args.count = count;
569 data->args.context = get_nfs_open_context(req->wb_context);
570 data->args.lock_context = req->wb_lock_context;
571 data->args.stable = NFS_UNSTABLE;
572 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
573 case 0:
574 break;
575 case FLUSH_COND_STABLE:
576 if (nfs_reqs_to_commit(cinfo))
577 break;
578 default:
579 data->args.stable = NFS_FILE_SYNC;
580 }
581
582 data->res.fattr = &data->fattr;
583 data->res.count = count;
584 data->res.eof = 0;
585 data->res.verf = &data->verf;
586 nfs_fattr_init(&data->fattr);
587}
588
589/**
590 * nfs_pgio_prepare - Prepare pageio data to go over the wire
591 * @task: The current task
592 * @calldata: pageio data to prepare
593 */
594static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
595{
596 struct nfs_pgio_data *data = calldata;
597 int err;
598 err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
599 if (err)
600 rpc_exit(task, err);
601}
602
603int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
604 const struct rpc_call_ops *call_ops, int how, int flags)
605{
606 struct rpc_task *task;
607 struct rpc_message msg = {
608 .rpc_argp = &data->args,
609 .rpc_resp = &data->res,
610 .rpc_cred = data->header->cred,
611 };
612 struct rpc_task_setup task_setup_data = {
613 .rpc_client = clnt,
614 .task = &data->task,
615 .rpc_message = &msg,
616 .callback_ops = call_ops,
617 .callback_data = data,
618 .workqueue = nfsiod_workqueue,
619 .flags = RPC_TASK_ASYNC | flags,
620 };
621 int ret = 0;
622
623 data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
624
625 dprintk("NFS: %5u initiated pgio call "
626 "(req %s/%llu, %u bytes @ offset %llu)\n",
627 data->task.tk_pid,
628 data->header->inode->i_sb->s_id,
629 (unsigned long long)NFS_FILEID(data->header->inode),
630 data->args.count,
631 (unsigned long long)data->args.offset);
632
633 task = rpc_run_task(&task_setup_data);
634 if (IS_ERR(task)) {
635 ret = PTR_ERR(task);
636 goto out;
637 }
638 if (how & FLUSH_SYNC) {
639 ret = rpc_wait_for_completion_task(task);
640 if (ret == 0)
641 ret = task->tk_status;
642 }
643 rpc_put_task(task);
644out:
645 return ret;
646}
647EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
648
649/**
650 * nfs_pgio_error - Clean up from a pageio error
651 * @desc: IO descriptor
652 * @hdr: pageio header
653 */
654static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
655 struct nfs_pgio_header *hdr)
656{
657 set_bit(NFS_IOHDR_REDO, &hdr->flags);
658 nfs_pgio_data_release(hdr->data);
659 hdr->data = NULL;
660 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
661 return -ENOMEM;
662}
663
664/**
665 * nfs_pgio_release - Release pageio data
666 * @calldata: The pageio data to release
667 */
668static void nfs_pgio_release(void *calldata)
669{
670 struct nfs_pgio_data *data = calldata;
671 if (data->header->rw_ops->rw_release)
672 data->header->rw_ops->rw_release(data);
673 nfs_pgio_data_release(data);
674}
675
298/** 676/**
299 * nfs_pageio_init - initialise a page io descriptor 677 * nfs_pageio_init - initialise a page io descriptor
300 * @desc: pointer to descriptor 678 * @desc: pointer to descriptor
@@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
307 struct inode *inode, 685 struct inode *inode,
308 const struct nfs_pageio_ops *pg_ops, 686 const struct nfs_pageio_ops *pg_ops,
309 const struct nfs_pgio_completion_ops *compl_ops, 687 const struct nfs_pgio_completion_ops *compl_ops,
688 const struct nfs_rw_ops *rw_ops,
310 size_t bsize, 689 size_t bsize,
311 int io_flags) 690 int io_flags)
312{ 691{
@@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
320 desc->pg_inode = inode; 699 desc->pg_inode = inode;
321 desc->pg_ops = pg_ops; 700 desc->pg_ops = pg_ops;
322 desc->pg_completion_ops = compl_ops; 701 desc->pg_completion_ops = compl_ops;
702 desc->pg_rw_ops = rw_ops;
323 desc->pg_ioflags = io_flags; 703 desc->pg_ioflags = io_flags;
324 desc->pg_error = 0; 704 desc->pg_error = 0;
325 desc->pg_lseg = NULL; 705 desc->pg_lseg = NULL;
@@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
328} 708}
329EXPORT_SYMBOL_GPL(nfs_pageio_init); 709EXPORT_SYMBOL_GPL(nfs_pageio_init);
330 710
711/**
712 * nfs_pgio_result - Basic pageio error handling
713 * @task: The task that ran
714 * @calldata: Pageio data to check
715 */
716static void nfs_pgio_result(struct rpc_task *task, void *calldata)
717{
718 struct nfs_pgio_data *data = calldata;
719 struct inode *inode = data->header->inode;
720
721 dprintk("NFS: %s: %5u, (status %d)\n", __func__,
722 task->tk_pid, task->tk_status);
723
724 if (data->header->rw_ops->rw_done(task, data, inode) != 0)
725 return;
726 if (task->tk_status < 0)
727 nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
728 else
729 data->header->rw_ops->rw_result(task, data);
730}
731
732/*
733 * Create an RPC task for the given read or write request and kick it.
734 * The page must have been locked by the caller.
735 *
736 * It may happen that the page we're passed is not marked dirty.
737 * This is the case if nfs_updatepage detects a conflicting request
738 * that has been written but not committed.
739 */
740int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
741 struct nfs_pgio_header *hdr)
742{
743 struct nfs_page *req;
744 struct page **pages;
745 struct nfs_pgio_data *data;
746 struct list_head *head = &desc->pg_list;
747 struct nfs_commit_info cinfo;
748
749 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
750 desc->pg_count));
751 if (!data)
752 return nfs_pgio_error(desc, hdr);
753
754 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
755 pages = data->pages.pagevec;
756 while (!list_empty(head)) {
757 req = nfs_list_entry(head->next);
758 nfs_list_remove_request(req);
759 nfs_list_add_request(req, &hdr->pages);
760 *pages++ = req->wb_page;
761 }
762
763 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
764 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
765 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
766
767 /* Set up the argument struct */
768 nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
769 hdr->data = data;
770 desc->pg_rpc_callops = &nfs_pgio_common_ops;
771 return 0;
772}
773EXPORT_SYMBOL_GPL(nfs_generic_pgio);
774
775static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
776{
777 struct nfs_rw_header *rw_hdr;
778 struct nfs_pgio_header *hdr;
779 int ret;
780
781 rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
782 if (!rw_hdr) {
783 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
784 return -ENOMEM;
785 }
786 hdr = &rw_hdr->header;
787 nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
788 atomic_inc(&hdr->refcnt);
789 ret = nfs_generic_pgio(desc, hdr);
790 if (ret == 0)
791 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
792 hdr->data, desc->pg_rpc_callops,
793 desc->pg_ioflags, 0);
794 if (atomic_dec_and_test(&hdr->refcnt))
795 hdr->completion_ops->completion(hdr);
796 return ret;
797}
798
331static bool nfs_match_open_context(const struct nfs_open_context *ctx1, 799static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
332 const struct nfs_open_context *ctx2) 800 const struct nfs_open_context *ctx2)
333{ 801{
@@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
356 struct nfs_page *req, 824 struct nfs_page *req,
357 struct nfs_pageio_descriptor *pgio) 825 struct nfs_pageio_descriptor *pgio)
358{ 826{
359 if (!nfs_match_open_context(req->wb_context, prev->wb_context)) 827 size_t size;
360 return false; 828
361 if (req->wb_context->dentry->d_inode->i_flock != NULL && 829 if (prev) {
362 !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) 830 if (!nfs_match_open_context(req->wb_context, prev->wb_context))
363 return false; 831 return false;
364 if (req->wb_pgbase != 0) 832 if (req->wb_context->dentry->d_inode->i_flock != NULL &&
365 return false; 833 !nfs_match_lock_context(req->wb_lock_context,
366 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 834 prev->wb_lock_context))
367 return false; 835 return false;
368 if (req_offset(req) != req_offset(prev) + prev->wb_bytes) 836 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
369 return false; 837 return false;
370 return pgio->pg_ops->pg_test(pgio, prev, req); 838 }
839 size = pgio->pg_ops->pg_test(pgio, prev, req);
840 WARN_ON_ONCE(size > req->wb_bytes);
841 if (size && size < req->wb_bytes)
842 req->wb_bytes = size;
843 return size > 0;
371} 844}
372 845
373/** 846/**
@@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
381static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 854static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
382 struct nfs_page *req) 855 struct nfs_page *req)
383{ 856{
857 struct nfs_page *prev = NULL;
384 if (desc->pg_count != 0) { 858 if (desc->pg_count != 0) {
385 struct nfs_page *prev;
386
387 prev = nfs_list_entry(desc->pg_list.prev); 859 prev = nfs_list_entry(desc->pg_list.prev);
388 if (!nfs_can_coalesce_requests(prev, req, desc))
389 return 0;
390 } else { 860 } else {
391 if (desc->pg_ops->pg_init) 861 if (desc->pg_ops->pg_init)
392 desc->pg_ops->pg_init(desc, req); 862 desc->pg_ops->pg_init(desc, req);
393 desc->pg_base = req->wb_pgbase; 863 desc->pg_base = req->wb_pgbase;
394 } 864 }
865 if (!nfs_can_coalesce_requests(prev, req, desc))
866 return 0;
395 nfs_list_remove_request(req); 867 nfs_list_remove_request(req);
396 nfs_list_add_request(req, &desc->pg_list); 868 nfs_list_add_request(req, &desc->pg_list);
397 desc->pg_count += req->wb_bytes; 869 desc->pg_count += req->wb_bytes;
@@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
421 * @desc: destination io descriptor 893 * @desc: destination io descriptor
422 * @req: request 894 * @req: request
423 * 895 *
896 * This may split a request into subrequests which are all part of the
897 * same page group.
898 *
424 * Returns true if the request 'req' was successfully coalesced into the 899 * Returns true if the request 'req' was successfully coalesced into the
425 * existing list of pages 'desc'. 900 * existing list of pages 'desc'.
426 */ 901 */
427static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 902static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
428 struct nfs_page *req) 903 struct nfs_page *req)
429{ 904{
430 while (!nfs_pageio_do_add_request(desc, req)) { 905 struct nfs_page *subreq;
431 desc->pg_moreio = 1; 906 unsigned int bytes_left = 0;
432 nfs_pageio_doio(desc); 907 unsigned int offset, pgbase;
433 if (desc->pg_error < 0) 908
434 return 0; 909 nfs_page_group_lock(req);
435 desc->pg_moreio = 0; 910
436 if (desc->pg_recoalesce) 911 subreq = req;
437 return 0; 912 bytes_left = subreq->wb_bytes;
438 } 913 offset = subreq->wb_offset;
914 pgbase = subreq->wb_pgbase;
915
916 do {
917 if (!nfs_pageio_do_add_request(desc, subreq)) {
918 /* make sure pg_test call(s) did nothing */
919 WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
920 WARN_ON_ONCE(subreq->wb_offset != offset);
921 WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
922
923 nfs_page_group_unlock(req);
924 desc->pg_moreio = 1;
925 nfs_pageio_doio(desc);
926 if (desc->pg_error < 0)
927 return 0;
928 desc->pg_moreio = 0;
929 if (desc->pg_recoalesce)
930 return 0;
931 /* retry add_request for this subreq */
932 nfs_page_group_lock(req);
933 continue;
934 }
935
936 /* check for buggy pg_test call(s) */
937 WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
938 WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
939 WARN_ON_ONCE(subreq->wb_bytes == 0);
940
941 bytes_left -= subreq->wb_bytes;
942 offset += subreq->wb_bytes;
943 pgbase += subreq->wb_bytes;
944
945 if (bytes_left) {
946 subreq = nfs_create_request(req->wb_context,
947 req->wb_page,
948 subreq, pgbase, bytes_left);
949 if (IS_ERR(subreq))
950 goto err_ptr;
951 nfs_lock_request(subreq);
952 subreq->wb_offset = offset;
953 subreq->wb_index = req->wb_index;
954 }
955 } while (bytes_left > 0);
956
957 nfs_page_group_unlock(req);
439 return 1; 958 return 1;
959err_ptr:
960 desc->pg_error = PTR_ERR(subreq);
961 nfs_page_group_unlock(req);
962 return 0;
440} 963}
441 964
442static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 965static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void)
535 kmem_cache_destroy(nfs_page_cachep); 1058 kmem_cache_destroy(nfs_page_cachep);
536} 1059}
537 1060
1061static const struct rpc_call_ops nfs_pgio_common_ops = {
1062 .rpc_call_prepare = nfs_pgio_prepare,
1063 .rpc_call_done = nfs_pgio_result,
1064 .rpc_release = nfs_pgio_release,
1065};
1066
1067const struct nfs_pageio_ops nfs_pgio_rw_ops = {
1068 .pg_test = nfs_generic_pg_test,
1069 .pg_doio = nfs_generic_pg_pgios,
1070};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cb53d450ae32..6fdcd233d6f7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1388 1388
1389 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1389 WARN_ON_ONCE(pgio->pg_lseg != NULL);
1390 1390
1391 if (req->wb_offset != req->wb_pgbase) {
1392 nfs_pageio_reset_read_mds(pgio);
1393 return;
1394 }
1395
1396 if (pgio->pg_dreq == NULL) 1391 if (pgio->pg_dreq == NULL)
1397 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1392 rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
1398 else 1393 else
@@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1417{ 1412{
1418 WARN_ON_ONCE(pgio->pg_lseg != NULL); 1413 WARN_ON_ONCE(pgio->pg_lseg != NULL);
1419 1414
1420 if (req->wb_offset != req->wb_pgbase) {
1421 nfs_pageio_reset_write_mds(pgio);
1422 return;
1423 }
1424
1425 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1415 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1426 req->wb_context, 1416 req->wb_context,
1427 req_offset(req), 1417 req_offset(req),
@@ -1434,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1434} 1424}
1435EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1425EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1436 1426
1437void 1427/*
1438pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, 1428 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
1439 const struct nfs_pgio_completion_ops *compl_ops) 1429 * of bytes (maximum @req->wb_bytes) that can be coalesced.
1440{ 1430 */
1441 struct nfs_server *server = NFS_SERVER(inode); 1431size_t
1442 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1443
1444 if (ld == NULL)
1445 nfs_pageio_init_read(pgio, inode, compl_ops);
1446 else
1447 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
1448}
1449
1450void
1451pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1452 int ioflags,
1453 const struct nfs_pgio_completion_ops *compl_ops)
1454{
1455 struct nfs_server *server = NFS_SERVER(inode);
1456 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1457
1458 if (ld == NULL)
1459 nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
1460 else
1461 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
1462}
1463
1464bool
1465pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1432pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1466 struct nfs_page *req) 1433 struct nfs_page *req)
1467{ 1434{
1468 if (pgio->pg_lseg == NULL) 1435 unsigned int size;
1469 return nfs_generic_pg_test(pgio, prev, req); 1436 u64 seg_end, req_start, seg_left;
1437
1438 size = nfs_generic_pg_test(pgio, prev, req);
1439 if (!size)
1440 return 0;
1470 1441
1471 /* 1442 /*
1472 * Test if a nfs_page is fully contained in the pnfs_layout_range. 1443 * 'size' contains the number of bytes left in the current page (up
1473 * Note that this test makes several assumptions: 1444 * to the original size asked for in @req->wb_bytes).
1474 * - that the previous nfs_page in the struct nfs_pageio_descriptor 1445 *
1475 * is known to lie within the range. 1446 * Calculate how many bytes are left in the layout segment
1476 * - that the nfs_page being tested is known to be contiguous with the 1447 * and if there are less bytes than 'size', return that instead.
1477 * previous nfs_page.
1478 * - Layout ranges are page aligned, so we only have to test the
1479 * start offset of the request.
1480 * 1448 *
1481 * Please also note that 'end_offset' is actually the offset of the 1449 * Please also note that 'end_offset' is actually the offset of the
1482 * first byte that lies outside the pnfs_layout_range. FIXME? 1450 * first byte that lies outside the pnfs_layout_range. FIXME?
1483 * 1451 *
1484 */ 1452 */
1485 return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, 1453 if (pgio->pg_lseg) {
1486 pgio->pg_lseg->pls_range.length); 1454 seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
1455 pgio->pg_lseg->pls_range.length);
1456 req_start = req_offset(req);
1457 WARN_ON_ONCE(req_start > seg_end);
1458 /* start of request is past the last byte of this segment */
1459 if (req_start >= seg_end)
1460 return 0;
1461
1462 /* adjust 'size' iff there are fewer bytes left in the
1463 * segment than what nfs_generic_pg_test returned */
1464 seg_left = seg_end - req_start;
1465 if (seg_left < size)
1466 size = (unsigned int)seg_left;
1467 }
1468
1469 return size;
1487} 1470}
1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1471EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1489 1472
@@ -1496,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
1496 LIST_HEAD(failed); 1479 LIST_HEAD(failed);
1497 1480
1498 /* Resend all requests through the MDS */ 1481 /* Resend all requests through the MDS */
1499 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); 1482 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
1500 pgio.pg_dreq = dreq; 1483 pgio.pg_dreq = dreq;
1501 while (!list_empty(head)) { 1484 while (!list_empty(head)) {
1502 struct nfs_page *req = nfs_list_entry(head->next); 1485 struct nfs_page *req = nfs_list_entry(head->next);
@@ -1519,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
1519} 1502}
1520EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1503EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1521 1504
1522static void pnfs_ld_handle_write_error(struct nfs_write_data *data) 1505static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
1523{ 1506{
1524 struct nfs_pgio_header *hdr = data->header; 1507 struct nfs_pgio_header *hdr = data->header;
1525 1508
@@ -1538,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1538/* 1521/*
1539 * Called by non rpc-based layout drivers 1522 * Called by non rpc-based layout drivers
1540 */ 1523 */
1541void pnfs_ld_write_done(struct nfs_write_data *data) 1524void pnfs_ld_write_done(struct nfs_pgio_data *data)
1542{ 1525{
1543 struct nfs_pgio_header *hdr = data->header; 1526 struct nfs_pgio_header *hdr = data->header;
1544 1527
@@ -1554,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1554 1537
1555static void 1538static void
1556pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1539pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1557 struct nfs_write_data *data) 1540 struct nfs_pgio_data *data)
1558{ 1541{
1559 struct nfs_pgio_header *hdr = data->header; 1542 struct nfs_pgio_header *hdr = data->header;
1560 1543
@@ -1563,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1563 nfs_pageio_reset_write_mds(desc); 1546 nfs_pageio_reset_write_mds(desc);
1564 desc->pg_recoalesce = 1; 1547 desc->pg_recoalesce = 1;
1565 } 1548 }
1566 nfs_writedata_release(data); 1549 nfs_pgio_data_release(data);
1567} 1550}
1568 1551
1569static enum pnfs_try_status 1552static enum pnfs_try_status
1570pnfs_try_to_write_data(struct nfs_write_data *wdata, 1553pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
1571 const struct rpc_call_ops *call_ops, 1554 const struct rpc_call_ops *call_ops,
1572 struct pnfs_layout_segment *lseg, 1555 struct pnfs_layout_segment *lseg,
1573 int how) 1556 int how)
@@ -1589,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1589} 1572}
1590 1573
1591static void 1574static void
1592pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) 1575pnfs_do_write(struct nfs_pageio_descriptor *desc,
1576 struct nfs_pgio_header *hdr, int how)
1593{ 1577{
1594 struct nfs_write_data *data; 1578 struct nfs_pgio_data *data = hdr->data;
1595 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1579 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1596 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1580 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1581 enum pnfs_try_status trypnfs;
1597 1582
1598 desc->pg_lseg = NULL; 1583 desc->pg_lseg = NULL;
1599 while (!list_empty(head)) { 1584 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1600 enum pnfs_try_status trypnfs; 1585 if (trypnfs == PNFS_NOT_ATTEMPTED)
1601 1586 pnfs_write_through_mds(desc, data);
1602 data = list_first_entry(head, struct nfs_write_data, list);
1603 list_del_init(&data->list);
1604
1605 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1606 if (trypnfs == PNFS_NOT_ATTEMPTED)
1607 pnfs_write_through_mds(desc, data);
1608 }
1609 pnfs_put_lseg(lseg); 1587 pnfs_put_lseg(lseg);
1610} 1588}
1611 1589
1612static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1590static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1613{ 1591{
1614 pnfs_put_lseg(hdr->lseg); 1592 pnfs_put_lseg(hdr->lseg);
1615 nfs_writehdr_free(hdr); 1593 nfs_rw_header_free(hdr);
1616} 1594}
1617EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1595EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1618 1596
1619int 1597int
1620pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1598pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1621{ 1599{
1622 struct nfs_write_header *whdr; 1600 struct nfs_rw_header *whdr;
1623 struct nfs_pgio_header *hdr; 1601 struct nfs_pgio_header *hdr;
1624 int ret; 1602 int ret;
1625 1603
1626 whdr = nfs_writehdr_alloc(); 1604 whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
1627 if (!whdr) { 1605 if (!whdr) {
1628 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1606 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1629 pnfs_put_lseg(desc->pg_lseg); 1607 pnfs_put_lseg(desc->pg_lseg);
@@ -1634,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1634 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1612 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1635 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1613 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1636 atomic_inc(&hdr->refcnt); 1614 atomic_inc(&hdr->refcnt);
1637 ret = nfs_generic_flush(desc, hdr); 1615 ret = nfs_generic_pgio(desc, hdr);
1638 if (ret != 0) { 1616 if (ret != 0) {
1639 pnfs_put_lseg(desc->pg_lseg); 1617 pnfs_put_lseg(desc->pg_lseg);
1640 desc->pg_lseg = NULL; 1618 desc->pg_lseg = NULL;
1641 } else 1619 } else
1642 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); 1620 pnfs_do_write(desc, hdr, desc->pg_ioflags);
1643 if (atomic_dec_and_test(&hdr->refcnt)) 1621 if (atomic_dec_and_test(&hdr->refcnt))
1644 hdr->completion_ops->completion(hdr); 1622 hdr->completion_ops->completion(hdr);
1645 return ret; 1623 return ret;
@@ -1655,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
1655 LIST_HEAD(failed); 1633 LIST_HEAD(failed);
1656 1634
1657 /* Resend all requests through the MDS */ 1635 /* Resend all requests through the MDS */
1658 nfs_pageio_init_read(&pgio, inode, compl_ops); 1636 nfs_pageio_init_read(&pgio, inode, true, compl_ops);
1659 pgio.pg_dreq = dreq; 1637 pgio.pg_dreq = dreq;
1660 while (!list_empty(head)) { 1638 while (!list_empty(head)) {
1661 struct nfs_page *req = nfs_list_entry(head->next); 1639 struct nfs_page *req = nfs_list_entry(head->next);
@@ -1674,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
1674} 1652}
1675EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1653EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1676 1654
1677static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1655static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
1678{ 1656{
1679 struct nfs_pgio_header *hdr = data->header; 1657 struct nfs_pgio_header *hdr = data->header;
1680 1658
@@ -1693,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1693/* 1671/*
1694 * Called by non rpc-based layout drivers 1672 * Called by non rpc-based layout drivers
1695 */ 1673 */
1696void pnfs_ld_read_done(struct nfs_read_data *data) 1674void pnfs_ld_read_done(struct nfs_pgio_data *data)
1697{ 1675{
1698 struct nfs_pgio_header *hdr = data->header; 1676 struct nfs_pgio_header *hdr = data->header;
1699 1677
@@ -1709,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1709 1687
1710static void 1688static void
1711pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1689pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1712 struct nfs_read_data *data) 1690 struct nfs_pgio_data *data)
1713{ 1691{
1714 struct nfs_pgio_header *hdr = data->header; 1692 struct nfs_pgio_header *hdr = data->header;
1715 1693
@@ -1718,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1718 nfs_pageio_reset_read_mds(desc); 1696 nfs_pageio_reset_read_mds(desc);
1719 desc->pg_recoalesce = 1; 1697 desc->pg_recoalesce = 1;
1720 } 1698 }
1721 nfs_readdata_release(data); 1699 nfs_pgio_data_release(data);
1722} 1700}
1723 1701
1724/* 1702/*
1725 * Call the appropriate parallel I/O subsystem read function. 1703 * Call the appropriate parallel I/O subsystem read function.
1726 */ 1704 */
1727static enum pnfs_try_status 1705static enum pnfs_try_status
1728pnfs_try_to_read_data(struct nfs_read_data *rdata, 1706pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1729 const struct rpc_call_ops *call_ops, 1707 const struct rpc_call_ops *call_ops,
1730 struct pnfs_layout_segment *lseg) 1708 struct pnfs_layout_segment *lseg)
1731{ 1709{
@@ -1747,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1747} 1725}
1748 1726
1749static void 1727static void
1750pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) 1728pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
1751{ 1729{
1752 struct nfs_read_data *data; 1730 struct nfs_pgio_data *data = hdr->data;
1753 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1731 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1754 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1732 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1733 enum pnfs_try_status trypnfs;
1755 1734
1756 desc->pg_lseg = NULL; 1735 desc->pg_lseg = NULL;
1757 while (!list_empty(head)) { 1736 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1758 enum pnfs_try_status trypnfs; 1737 if (trypnfs == PNFS_NOT_ATTEMPTED)
1759 1738 pnfs_read_through_mds(desc, data);
1760 data = list_first_entry(head, struct nfs_read_data, list);
1761 list_del_init(&data->list);
1762
1763 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1764 if (trypnfs == PNFS_NOT_ATTEMPTED)
1765 pnfs_read_through_mds(desc, data);
1766 }
1767 pnfs_put_lseg(lseg); 1739 pnfs_put_lseg(lseg);
1768} 1740}
1769 1741
1770static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1742static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1771{ 1743{
1772 pnfs_put_lseg(hdr->lseg); 1744 pnfs_put_lseg(hdr->lseg);
1773 nfs_readhdr_free(hdr); 1745 nfs_rw_header_free(hdr);
1774} 1746}
1775EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1747EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1776 1748
1777int 1749int
1778pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1750pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1779{ 1751{
1780 struct nfs_read_header *rhdr; 1752 struct nfs_rw_header *rhdr;
1781 struct nfs_pgio_header *hdr; 1753 struct nfs_pgio_header *hdr;
1782 int ret; 1754 int ret;
1783 1755
1784 rhdr = nfs_readhdr_alloc(); 1756 rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
1785 if (!rhdr) { 1757 if (!rhdr) {
1786 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1758 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1787 ret = -ENOMEM; 1759 ret = -ENOMEM;
@@ -1793,12 +1765,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1793 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1765 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1794 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1766 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1795 atomic_inc(&hdr->refcnt); 1767 atomic_inc(&hdr->refcnt);
1796 ret = nfs_generic_pagein(desc, hdr); 1768 ret = nfs_generic_pgio(desc, hdr);
1797 if (ret != 0) { 1769 if (ret != 0) {
1798 pnfs_put_lseg(desc->pg_lseg); 1770 pnfs_put_lseg(desc->pg_lseg);
1799 desc->pg_lseg = NULL; 1771 desc->pg_lseg = NULL;
1800 } else 1772 } else
1801 pnfs_do_multiple_reads(desc, &hdr->rpc_list); 1773 pnfs_do_read(desc, hdr);
1802 if (atomic_dec_and_test(&hdr->refcnt)) 1774 if (atomic_dec_and_test(&hdr->refcnt))
1803 hdr->completion_ops->completion(hdr); 1775 hdr->completion_ops->completion(hdr);
1804 return ret; 1776 return ret;
@@ -1810,7 +1782,7 @@ static void pnfs_clear_layoutcommitting(struct inode *inode)
1810 unsigned long *bitlock = &NFS_I(inode)->flags; 1782 unsigned long *bitlock = &NFS_I(inode)->flags;
1811 1783
1812 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 1784 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
1813 smp_mb__after_clear_bit(); 1785 smp_mb__after_atomic();
1814 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 1786 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
1815} 1787}
1816 1788
@@ -1848,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1848EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1820EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1849 1821
1850void 1822void
1851pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1823pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
1852{ 1824{
1853 struct nfs_pgio_header *hdr = wdata->header; 1825 struct nfs_pgio_header *hdr = wdata->header;
1854 struct inode *inode = hdr->inode; 1826 struct inode *inode = hdr->inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 023793909778..4fb309a2b4c4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
115 */ 115 */
116 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); 116 enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
117 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); 117 enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
118 118
119 void (*free_deviceid_node) (struct nfs4_deviceid_node *); 119 void (*free_deviceid_node) (struct nfs4_deviceid_node *);
120 120
@@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
181void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 181void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
182 182
183void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
184 const struct nfs_pgio_completion_ops *);
185void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
186 int, const struct nfs_pgio_completion_ops *);
187
188void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 183void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
189void unset_pnfs_layoutdriver(struct nfs_server *); 184void unset_pnfs_layoutdriver(struct nfs_server *);
190void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); 185void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
192void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 187void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
193 struct nfs_page *req, u64 wb_size); 188 struct nfs_page *req, u64 wb_size);
194int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 189int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
195bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 190size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
191 struct nfs_page *prev, struct nfs_page *req);
196void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); 192void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
197struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); 193struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
198void pnfs_free_lseg_list(struct list_head *tmp_list); 194void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
217void pnfs_roc_release(struct inode *ino); 213void pnfs_roc_release(struct inode *ino);
218void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 214void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
219bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 215bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
220void pnfs_set_layoutcommit(struct nfs_write_data *wdata); 216void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
221void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 217void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
222int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 218int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
223int _pnfs_return_layout(struct inode *); 219int _pnfs_return_layout(struct inode *);
224int pnfs_commit_and_return_layout(struct inode *); 220int pnfs_commit_and_return_layout(struct inode *);
225void pnfs_ld_write_done(struct nfs_write_data *); 221void pnfs_ld_write_done(struct nfs_pgio_data *);
226void pnfs_ld_read_done(struct nfs_read_data *); 222void pnfs_ld_read_done(struct nfs_pgio_data *);
227struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 223struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
228 struct nfs_open_context *ctx, 224 struct nfs_open_context *ctx,
229 loff_t pos, 225 loff_t pos,
@@ -275,7 +271,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
275{ 271{
276 if (lseg) { 272 if (lseg) {
277 atomic_inc(&lseg->pls_refcount); 273 atomic_inc(&lseg->pls_refcount);
278 smp_mb__after_atomic_inc(); 274 smp_mb__after_atomic();
279 } 275 }
280 return lseg; 276 return lseg;
281} 277}
@@ -461,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
461{ 457{
462} 458}
463 459
464static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
465 const struct nfs_pgio_completion_ops *compl_ops)
466{
467 nfs_pageio_init_read(pgio, inode, compl_ops);
468}
469
470static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
471 const struct nfs_pgio_completion_ops *compl_ops)
472{
473 nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
474}
475
476static inline int 460static inline int
477pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, 461pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
478 struct nfs_commit_info *cinfo) 462 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e8b034..c171ce1a8a30 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
578 return 0; 578 return 0;
579} 579}
580 580
581static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
582{ 582{
583 struct inode *inode = data->header->inode; 583 struct inode *inode = data->header->inode;
584 584
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
594 return 0; 594 return 0;
595} 595}
596 596
597static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 597static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
598{ 598{
599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
600} 600}
601 601
602static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 602static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
603{ 603{
604 rpc_call_start(task); 604 rpc_call_start(task);
605 return 0; 605 return 0;
606} 606}
607 607
608static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 608static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
609{ 609{
610 struct inode *inode = data->header->inode; 610 struct inode *inode = data->header->inode;
611 611
@@ -614,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
614 return 0; 614 return 0;
615} 615}
616 616
617static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 617static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
618{ 618{
619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
620 data->args.stable = NFS_FILE_SYNC; 620 data->args.stable = NFS_FILE_SYNC;
621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
622} 622}
623 623
624static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
625{
626 rpc_call_start(task);
627 return 0;
628}
629
630static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 624static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
631{ 625{
632 BUG(); 626 BUG();
@@ -734,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
734 .fsinfo = nfs_proc_fsinfo, 728 .fsinfo = nfs_proc_fsinfo,
735 .pathconf = nfs_proc_pathconf, 729 .pathconf = nfs_proc_pathconf,
736 .decode_dirent = nfs2_decode_dirent, 730 .decode_dirent = nfs2_decode_dirent,
731 .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
737 .read_setup = nfs_proc_read_setup, 732 .read_setup = nfs_proc_read_setup,
738 .read_pageio_init = nfs_pageio_init_read,
739 .read_rpc_prepare = nfs_proc_read_rpc_prepare,
740 .read_done = nfs_read_done, 733 .read_done = nfs_read_done,
741 .write_setup = nfs_proc_write_setup, 734 .write_setup = nfs_proc_write_setup,
742 .write_pageio_init = nfs_pageio_init_write,
743 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
744 .write_done = nfs_write_done, 735 .write_done = nfs_write_done,
745 .commit_setup = nfs_proc_commit_setup, 736 .commit_setup = nfs_proc_commit_setup,
746 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, 737 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedda14bb..e818a475ca64 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,85 +24,24 @@
24#include "internal.h" 24#include "internal.h"
25#include "iostat.h" 25#include "iostat.h"
26#include "fscache.h" 26#include "fscache.h"
27#include "pnfs.h"
27 28
28#define NFSDBG_FACILITY NFSDBG_PAGECACHE 29#define NFSDBG_FACILITY NFSDBG_PAGECACHE
29 30
30static const struct nfs_pageio_ops nfs_pageio_read_ops;
31static const struct rpc_call_ops nfs_read_common_ops;
32static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; 31static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
32static const struct nfs_rw_ops nfs_rw_read_ops;
33 33
34static struct kmem_cache *nfs_rdata_cachep; 34static struct kmem_cache *nfs_rdata_cachep;
35 35
36struct nfs_read_header *nfs_readhdr_alloc(void) 36static struct nfs_rw_header *nfs_readhdr_alloc(void)
37{ 37{
38 struct nfs_read_header *rhdr; 38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39
40 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
41 if (rhdr) {
42 struct nfs_pgio_header *hdr = &rhdr->header;
43
44 INIT_LIST_HEAD(&hdr->pages);
45 INIT_LIST_HEAD(&hdr->rpc_list);
46 spin_lock_init(&hdr->lock);
47 atomic_set(&hdr->refcnt, 0);
48 }
49 return rhdr;
50} 39}
51EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
52 40
53static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, 41static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
54 unsigned int pagecount)
55{ 42{
56 struct nfs_read_data *data, *prealloc;
57
58 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
59 if (prealloc->header == NULL)
60 data = prealloc;
61 else
62 data = kzalloc(sizeof(*data), GFP_KERNEL);
63 if (!data)
64 goto out;
65
66 if (nfs_pgarray_set(&data->pages, pagecount)) {
67 data->header = hdr;
68 atomic_inc(&hdr->refcnt);
69 } else {
70 if (data != prealloc)
71 kfree(data);
72 data = NULL;
73 }
74out:
75 return data;
76}
77
78void nfs_readhdr_free(struct nfs_pgio_header *hdr)
79{
80 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
81
82 kmem_cache_free(nfs_rdata_cachep, rhdr); 43 kmem_cache_free(nfs_rdata_cachep, rhdr);
83} 44}
84EXPORT_SYMBOL_GPL(nfs_readhdr_free);
85
86void nfs_readdata_release(struct nfs_read_data *rdata)
87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
91 put_nfs_open_context(rdata->args.context);
92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata == &read_header->rpc_data) {
95 rdata->header = NULL;
96 rdata = NULL;
97 }
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
100 /* Note: we only free the rpc_task after callbacks are done.
101 * See the comment in rpc_free_task() for why
102 */
103 kfree(rdata);
104}
105EXPORT_SYMBOL_GPL(nfs_readdata_release);
106 45
107static 46static
108int nfs_return_empty_page(struct page *page) 47int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page)
114} 53}
115 54
116void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 55void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
117 struct inode *inode, 56 struct inode *inode, bool force_mds,
118 const struct nfs_pgio_completion_ops *compl_ops) 57 const struct nfs_pgio_completion_ops *compl_ops)
119{ 58{
120 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, 59 struct nfs_server *server = NFS_SERVER(inode);
121 NFS_SERVER(inode)->rsize, 0); 60 const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
61
62#ifdef CONFIG_NFS_V4_1
63 if (server->pnfs_curr_ld && !force_mds)
64 pg_ops = server->pnfs_curr_ld->pg_read_ops;
65#endif
66 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
67 server->rsize, 0);
122} 68}
123EXPORT_SYMBOL_GPL(nfs_pageio_init_read); 69EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
124 70
125void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 71void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
126{ 72{
127 pgio->pg_ops = &nfs_pageio_read_ops; 73 pgio->pg_ops = &nfs_pgio_rw_ops;
128 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 74 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
129} 75}
130EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 76EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
139 len = nfs_page_length(page); 85 len = nfs_page_length(page);
140 if (len == 0) 86 if (len == 0)
141 return nfs_return_empty_page(page); 87 return nfs_return_empty_page(page);
142 new = nfs_create_request(ctx, inode, page, 0, len); 88 new = nfs_create_request(ctx, page, NULL, 0, len);
143 if (IS_ERR(new)) { 89 if (IS_ERR(new)) {
144 unlock_page(page); 90 unlock_page(page);
145 return PTR_ERR(new); 91 return PTR_ERR(new);
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
147 if (len < PAGE_CACHE_SIZE) 93 if (len < PAGE_CACHE_SIZE)
148 zero_user_segment(page, len, PAGE_CACHE_SIZE); 94 zero_user_segment(page, len, PAGE_CACHE_SIZE);
149 95
150 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 96 nfs_pageio_init_read(&pgio, inode, false,
97 &nfs_async_read_completion_ops);
151 nfs_pageio_add_request(&pgio, new); 98 nfs_pageio_add_request(&pgio, new);
152 nfs_pageio_complete(&pgio); 99 nfs_pageio_complete(&pgio);
153 NFS_I(inode)->read_io += pgio.pg_bytes_written; 100 NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req)
158{ 105{
159 struct inode *d_inode = req->wb_context->dentry->d_inode; 106 struct inode *d_inode = req->wb_context->dentry->d_inode;
160 107
161 if (PageUptodate(req->wb_page)) 108 dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
162 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 109 (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
110 (long long)req_offset(req));
163 111
164 unlock_page(req->wb_page); 112 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
113 if (PageUptodate(req->wb_page))
114 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
115
116 unlock_page(req->wb_page);
117 }
165 118
166 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", 119 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
167 req->wb_context->dentry->d_inode->i_sb->s_id, 120 req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -171,7 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req)
171 nfs_release_request(req); 124 nfs_release_request(req);
172} 125}
173 126
174/* Note io was page aligned */ 127static void nfs_page_group_set_uptodate(struct nfs_page *req)
128{
129 if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
130 SetPageUptodate(req->wb_page);
131}
132
175static void nfs_read_completion(struct nfs_pgio_header *hdr) 133static void nfs_read_completion(struct nfs_pgio_header *hdr)
176{ 134{
177 unsigned long bytes = 0; 135 unsigned long bytes = 0;
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
181 while (!list_empty(&hdr->pages)) { 139 while (!list_empty(&hdr->pages)) {
182 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 140 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
183 struct page *page = req->wb_page; 141 struct page *page = req->wb_page;
142 unsigned long start = req->wb_pgbase;
143 unsigned long end = req->wb_pgbase + req->wb_bytes;
184 144
185 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { 145 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
186 if (bytes > hdr->good_bytes) 146 /* note: regions of the page not covered by a
187 zero_user(page, 0, PAGE_SIZE); 147 * request are zeroed in nfs_readpage_async /
188 else if (hdr->good_bytes - bytes < PAGE_SIZE) 148 * readpage_async_filler */
189 zero_user_segment(page, 149 if (bytes > hdr->good_bytes) {
190 hdr->good_bytes & ~PAGE_MASK, 150 /* nothing in this request was good, so zero
191 PAGE_SIZE); 151 * the full extent of the request */
152 zero_user_segment(page, start, end);
153
154 } else if (hdr->good_bytes - bytes < req->wb_bytes) {
155 /* part of this request has good bytes, but
156 * not all. zero the bad bytes */
157 start += hdr->good_bytes - bytes;
158 WARN_ON(start < req->wb_pgbase);
159 zero_user_segment(page, start, end);
160 }
192 } 161 }
193 bytes += req->wb_bytes; 162 bytes += req->wb_bytes;
194 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 163 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
195 if (bytes <= hdr->good_bytes) 164 if (bytes <= hdr->good_bytes)
196 SetPageUptodate(page); 165 nfs_page_group_set_uptodate(req);
197 } else 166 } else
198 SetPageUptodate(page); 167 nfs_page_group_set_uptodate(req);
199 nfs_list_remove_request(req); 168 nfs_list_remove_request(req);
200 nfs_readpage_release(req); 169 nfs_readpage_release(req);
201 } 170 }
@@ -203,95 +172,14 @@ out:
203 hdr->release(hdr); 172 hdr->release(hdr);
204} 173}
205 174
206int nfs_initiate_read(struct rpc_clnt *clnt, 175static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
207 struct nfs_read_data *data, 176 struct rpc_task_setup *task_setup_data, int how)
208 const struct rpc_call_ops *call_ops, int flags)
209{ 177{
210 struct inode *inode = data->header->inode; 178 struct inode *inode = data->header->inode;
211 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 179 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
212 struct rpc_task *task;
213 struct rpc_message msg = {
214 .rpc_argp = &data->args,
215 .rpc_resp = &data->res,
216 .rpc_cred = data->header->cred,
217 };
218 struct rpc_task_setup task_setup_data = {
219 .task = &data->task,
220 .rpc_client = clnt,
221 .rpc_message = &msg,
222 .callback_ops = call_ops,
223 .callback_data = data,
224 .workqueue = nfsiod_workqueue,
225 .flags = RPC_TASK_ASYNC | swap_flags | flags,
226 };
227 180
228 /* Set up the initial task struct. */ 181 task_setup_data->flags |= swap_flags;
229 NFS_PROTO(inode)->read_setup(data, &msg); 182 NFS_PROTO(inode)->read_setup(data, msg);
230
231 dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
232 "offset %llu)\n",
233 data->task.tk_pid,
234 inode->i_sb->s_id,
235 (unsigned long long)NFS_FILEID(inode),
236 data->args.count,
237 (unsigned long long)data->args.offset);
238
239 task = rpc_run_task(&task_setup_data);
240 if (IS_ERR(task))
241 return PTR_ERR(task);
242 rpc_put_task(task);
243 return 0;
244}
245EXPORT_SYMBOL_GPL(nfs_initiate_read);
246
247/*
248 * Set up the NFS read request struct
249 */
250static void nfs_read_rpcsetup(struct nfs_read_data *data,
251 unsigned int count, unsigned int offset)
252{
253 struct nfs_page *req = data->header->req;
254
255 data->args.fh = NFS_FH(data->header->inode);
256 data->args.offset = req_offset(req) + offset;
257 data->args.pgbase = req->wb_pgbase + offset;
258 data->args.pages = data->pages.pagevec;
259 data->args.count = count;
260 data->args.context = get_nfs_open_context(req->wb_context);
261 data->args.lock_context = req->wb_lock_context;
262
263 data->res.fattr = &data->fattr;
264 data->res.count = count;
265 data->res.eof = 0;
266 nfs_fattr_init(&data->fattr);
267}
268
269static int nfs_do_read(struct nfs_read_data *data,
270 const struct rpc_call_ops *call_ops)
271{
272 struct inode *inode = data->header->inode;
273
274 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
275}
276
277static int
278nfs_do_multiple_reads(struct list_head *head,
279 const struct rpc_call_ops *call_ops)
280{
281 struct nfs_read_data *data;
282 int ret = 0;
283
284 while (!list_empty(head)) {
285 int ret2;
286
287 data = list_first_entry(head, struct nfs_read_data, list);
288 list_del_init(&data->list);
289
290 ret2 = nfs_do_read(data, call_ops);
291 if (ret == 0)
292 ret = ret2;
293 }
294 return ret;
295} 183}
296 184
297static void 185static void
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
311 .completion = nfs_read_completion, 199 .completion = nfs_read_completion,
312}; 200};
313 201
314static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
315 struct nfs_pgio_header *hdr)
316{
317 set_bit(NFS_IOHDR_REDO, &hdr->flags);
318 while (!list_empty(&hdr->rpc_list)) {
319 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
320 struct nfs_read_data, list);
321 list_del(&data->list);
322 nfs_readdata_release(data);
323 }
324 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
325}
326
327/*
328 * Generate multiple requests to fill a single page.
329 *
330 * We optimize to reduce the number of read operations on the wire. If we
331 * detect that we're reading a page, or an area of a page, that is past the
332 * end of file, we do not generate NFS read operations but just clear the
333 * parts of the page that would have come back zero from the server anyway.
334 *
335 * We rely on the cached value of i_size to make this determination; another
336 * client can fill pages on the server past our cached end-of-file, but we
337 * won't see the new data until our attribute cache is updated. This is more
338 * or less conventional NFS client behavior.
339 */
340static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
341 struct nfs_pgio_header *hdr)
342{
343 struct nfs_page *req = hdr->req;
344 struct page *page = req->wb_page;
345 struct nfs_read_data *data;
346 size_t rsize = desc->pg_bsize, nbytes;
347 unsigned int offset;
348
349 offset = 0;
350 nbytes = desc->pg_count;
351 do {
352 size_t len = min(nbytes,rsize);
353
354 data = nfs_readdata_alloc(hdr, 1);
355 if (!data) {
356 nfs_pagein_error(desc, hdr);
357 return -ENOMEM;
358 }
359 data->pages.pagevec[0] = page;
360 nfs_read_rpcsetup(data, len, offset);
361 list_add(&data->list, &hdr->rpc_list);
362 nbytes -= len;
363 offset += len;
364 } while (nbytes != 0);
365
366 nfs_list_remove_request(req);
367 nfs_list_add_request(req, &hdr->pages);
368 desc->pg_rpc_callops = &nfs_read_common_ops;
369 return 0;
370}
371
372static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
373 struct nfs_pgio_header *hdr)
374{
375 struct nfs_page *req;
376 struct page **pages;
377 struct nfs_read_data *data;
378 struct list_head *head = &desc->pg_list;
379
380 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
381 desc->pg_count));
382 if (!data) {
383 nfs_pagein_error(desc, hdr);
384 return -ENOMEM;
385 }
386
387 pages = data->pages.pagevec;
388 while (!list_empty(head)) {
389 req = nfs_list_entry(head->next);
390 nfs_list_remove_request(req);
391 nfs_list_add_request(req, &hdr->pages);
392 *pages++ = req->wb_page;
393 }
394
395 nfs_read_rpcsetup(data, desc->pg_count, 0);
396 list_add(&data->list, &hdr->rpc_list);
397 desc->pg_rpc_callops = &nfs_read_common_ops;
398 return 0;
399}
400
401int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
402 struct nfs_pgio_header *hdr)
403{
404 if (desc->pg_bsize < PAGE_CACHE_SIZE)
405 return nfs_pagein_multi(desc, hdr);
406 return nfs_pagein_one(desc, hdr);
407}
408EXPORT_SYMBOL_GPL(nfs_generic_pagein);
409
410static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
411{
412 struct nfs_read_header *rhdr;
413 struct nfs_pgio_header *hdr;
414 int ret;
415
416 rhdr = nfs_readhdr_alloc();
417 if (!rhdr) {
418 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
419 return -ENOMEM;
420 }
421 hdr = &rhdr->header;
422 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
423 atomic_inc(&hdr->refcnt);
424 ret = nfs_generic_pagein(desc, hdr);
425 if (ret == 0)
426 ret = nfs_do_multiple_reads(&hdr->rpc_list,
427 desc->pg_rpc_callops);
428 if (atomic_dec_and_test(&hdr->refcnt))
429 hdr->completion_ops->completion(hdr);
430 return ret;
431}
432
433static const struct nfs_pageio_ops nfs_pageio_read_ops = {
434 .pg_test = nfs_generic_pg_test,
435 .pg_doio = nfs_generic_pg_readpages,
436};
437
438/* 202/*
439 * This is the callback from RPC telling us whether a reply was 203 * This is the callback from RPC telling us whether a reply was
440 * received or some error occurred (timeout or socket shutdown). 204 * received or some error occurred (timeout or socket shutdown).
441 */ 205 */
442int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 206static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
207 struct inode *inode)
443{ 208{
444 struct inode *inode = data->header->inode; 209 int status = NFS_PROTO(inode)->read_done(task, data);
445 int status;
446
447 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
448 task->tk_status);
449
450 status = NFS_PROTO(inode)->read_done(task, data);
451 if (status != 0) 210 if (status != 0)
452 return status; 211 return status;
453 212
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
460 return 0; 219 return 0;
461} 220}
462 221
463static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 222static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
464{ 223{
465 struct nfs_readargs *argp = &data->args; 224 struct nfs_pgio_args *argp = &data->args;
466 struct nfs_readres *resp = &data->res; 225 struct nfs_pgio_res *resp = &data->res;
467 226
468 /* This is a short read! */ 227 /* This is a short read! */
469 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 228 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
480 rpc_restart_call_prepare(task); 239 rpc_restart_call_prepare(task);
481} 240}
482 241
483static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) 242static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
484{ 243{
485 struct nfs_read_data *data = calldata;
486 struct nfs_pgio_header *hdr = data->header; 244 struct nfs_pgio_header *hdr = data->header;
487 245
488 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ 246 if (data->res.eof) {
489 if (nfs_readpage_result(task, data) != 0)
490 return;
491 if (task->tk_status < 0)
492 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
493 else if (data->res.eof) {
494 loff_t bound; 247 loff_t bound;
495 248
496 bound = data->args.offset + data->res.count; 249 bound = data->args.offset + data->res.count;
@@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
505 nfs_readpage_retry(task, data); 258 nfs_readpage_retry(task, data);
506} 259}
507 260
508static void nfs_readpage_release_common(void *calldata)
509{
510 nfs_readdata_release(calldata);
511}
512
513void nfs_read_prepare(struct rpc_task *task, void *calldata)
514{
515 struct nfs_read_data *data = calldata;
516 int err;
517 err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
518 if (err)
519 rpc_exit(task, err);
520}
521
522static const struct rpc_call_ops nfs_read_common_ops = {
523 .rpc_call_prepare = nfs_read_prepare,
524 .rpc_call_done = nfs_readpage_result_common,
525 .rpc_release = nfs_readpage_release_common,
526};
527
528/* 261/*
529 * Read a page over NFS. 262 * Read a page over NFS.
530 * We read the page synchronously in the following case: 263 * We read the page synchronously in the following case:
@@ -592,7 +325,6 @@ static int
592readpage_async_filler(void *data, struct page *page) 325readpage_async_filler(void *data, struct page *page)
593{ 326{
594 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 327 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
595 struct inode *inode = page_file_mapping(page)->host;
596 struct nfs_page *new; 328 struct nfs_page *new;
597 unsigned int len; 329 unsigned int len;
598 int error; 330 int error;
@@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page)
601 if (len == 0) 333 if (len == 0)
602 return nfs_return_empty_page(page); 334 return nfs_return_empty_page(page);
603 335
604 new = nfs_create_request(desc->ctx, inode, page, 0, len); 336 new = nfs_create_request(desc->ctx, page, NULL, 0, len);
605 if (IS_ERR(new)) 337 if (IS_ERR(new))
606 goto out_error; 338 goto out_error;
607 339
@@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
654 if (ret == 0) 386 if (ret == 0)
655 goto read_complete; /* all pages were read */ 387 goto read_complete; /* all pages were read */
656 388
657 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 389 nfs_pageio_init_read(&pgio, inode, false,
390 &nfs_async_read_completion_ops);
658 391
659 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 392 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
660 393
@@ -671,7 +404,7 @@ out:
671int __init nfs_init_readpagecache(void) 404int __init nfs_init_readpagecache(void)
672{ 405{
673 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 406 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
674 sizeof(struct nfs_read_header), 407 sizeof(struct nfs_rw_header),
675 0, SLAB_HWCACHE_ALIGN, 408 0, SLAB_HWCACHE_ALIGN,
676 NULL); 409 NULL);
677 if (nfs_rdata_cachep == NULL) 410 if (nfs_rdata_cachep == NULL)
@@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void)
684{ 417{
685 kmem_cache_destroy(nfs_rdata_cachep); 418 kmem_cache_destroy(nfs_rdata_cachep);
686} 419}
420
421static const struct nfs_rw_ops nfs_rw_read_ops = {
422 .rw_mode = FMODE_READ,
423 .rw_alloc_header = nfs_readhdr_alloc,
424 .rw_free_header = nfs_readhdr_free,
425 .rw_done = nfs_readpage_done,
426 .rw_result = nfs_readpage_result,
427 .rw_initiate = nfs_initiate_read,
428};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2cb56943e232..084af1060d79 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2180,11 +2180,23 @@ out_no_address:
2180 return -EINVAL; 2180 return -EINVAL;
2181} 2181}
2182 2182
2183#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2184 | NFS_MOUNT_SECURE \
2185 | NFS_MOUNT_TCP \
2186 | NFS_MOUNT_VER3 \
2187 | NFS_MOUNT_KERBEROS \
2188 | NFS_MOUNT_NONLM \
2189 | NFS_MOUNT_BROKEN_SUID \
2190 | NFS_MOUNT_STRICTLOCK \
2191 | NFS_MOUNT_UNSHARED \
2192 | NFS_MOUNT_NORESVPORT \
2193 | NFS_MOUNT_LEGACY_INTERFACE)
2194
2183static int 2195static int
2184nfs_compare_remount_data(struct nfs_server *nfss, 2196nfs_compare_remount_data(struct nfs_server *nfss,
2185 struct nfs_parsed_mount_data *data) 2197 struct nfs_parsed_mount_data *data)
2186{ 2198{
2187 if (data->flags != nfss->flags || 2199 if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
2188 data->rsize != nfss->rsize || 2200 data->rsize != nfss->rsize ||
2189 data->wsize != nfss->wsize || 2201 data->wsize != nfss->wsize ||
2190 data->version != nfss->nfs_client->rpc_ops->version || 2202 data->version != nfss->nfs_client->rpc_ops->version ||
@@ -2248,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
2248 data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; 2260 data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
2249 data->version = nfsvers; 2261 data->version = nfsvers;
2250 data->minorversion = nfss->nfs_client->cl_minorversion; 2262 data->minorversion = nfss->nfs_client->cl_minorversion;
2263 data->net = current->nsproxy->net_ns;
2251 memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, 2264 memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
2252 data->nfs_server.addrlen); 2265 data->nfs_server.addrlen);
2253 2266
@@ -2347,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
2347 nfs_initialise_sb(sb); 2360 nfs_initialise_sb(sb);
2348} 2361}
2349 2362
2350#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2351 | NFS_MOUNT_SECURE \
2352 | NFS_MOUNT_TCP \
2353 | NFS_MOUNT_VER3 \
2354 | NFS_MOUNT_KERBEROS \
2355 | NFS_MOUNT_NONLM \
2356 | NFS_MOUNT_BROKEN_SUID \
2357 | NFS_MOUNT_STRICTLOCK \
2358 | NFS_MOUNT_UNSHARED \
2359 | NFS_MOUNT_NORESVPORT \
2360 | NFS_MOUNT_LEGACY_INTERFACE)
2361
2362static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) 2363static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
2363{ 2364{
2364 const struct nfs_server *a = s->s_fs_info; 2365 const struct nfs_server *a = s->s_fs_info;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 6b3f2535a3ec..bb6ed810fa6f 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -13,7 +13,7 @@
13 13
14static struct ctl_table_header *nfs_callback_sysctl_table; 14static struct ctl_table_header *nfs_callback_sysctl_table;
15 15
16static ctl_table nfs_cb_sysctls[] = { 16static struct ctl_table nfs_cb_sysctls[] = {
17 { 17 {
18 .procname = "nfs_mountpoint_timeout", 18 .procname = "nfs_mountpoint_timeout",
19 .data = &nfs_mountpoint_expiry_timeout, 19 .data = &nfs_mountpoint_expiry_timeout,
@@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = {
31 { } 31 { }
32}; 32};
33 33
34static ctl_table nfs_cb_sysctl_dir[] = { 34static struct ctl_table nfs_cb_sysctl_dir[] = {
35 { 35 {
36 .procname = "nfs", 36 .procname = "nfs",
37 .mode = 0555, 37 .mode = 0555,
@@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = {
40 { } 40 { }
41}; 41};
42 42
43static ctl_table nfs_cb_sysctl_root[] = { 43static struct ctl_table nfs_cb_sysctl_root[] = {
44 { 44 {
45 .procname = "fs", 45 .procname = "fs",
46 .mode = 0555, 46 .mode = 0555,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9a3b6a4cd6b9..98ff061ccaf3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,10 +42,10 @@
42 * Local function declarations 42 * Local function declarations
43 */ 43 */
44static void nfs_redirty_request(struct nfs_page *req); 44static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_commit_ops; 45static const struct rpc_call_ops nfs_commit_ops;
47static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; 46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
48static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops;
49 49
50static struct kmem_cache *nfs_wdata_cachep; 50static struct kmem_cache *nfs_wdata_cachep;
51static mempool_t *nfs_wdata_mempool; 51static mempool_t *nfs_wdata_mempool;
@@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
70} 70}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 71EXPORT_SYMBOL_GPL(nfs_commit_free);
72 72
73struct nfs_write_header *nfs_writehdr_alloc(void) 73static struct nfs_rw_header *nfs_writehdr_alloc(void)
74{ 74{
75 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 75 struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
76
77 if (p) {
78 struct nfs_pgio_header *hdr = &p->header;
79 76
77 if (p)
80 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
81 INIT_LIST_HEAD(&hdr->pages);
82 INIT_LIST_HEAD(&hdr->rpc_list);
83 spin_lock_init(&hdr->lock);
84 atomic_set(&hdr->refcnt, 0);
85 hdr->verf = &p->verf;
86 }
87 return p; 79 return p;
88} 80}
89EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
90
91static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
92 unsigned int pagecount)
93{
94 struct nfs_write_data *data, *prealloc;
95
96 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
97 if (prealloc->header == NULL)
98 data = prealloc;
99 else
100 data = kzalloc(sizeof(*data), GFP_KERNEL);
101 if (!data)
102 goto out;
103
104 if (nfs_pgarray_set(&data->pages, pagecount)) {
105 data->header = hdr;
106 atomic_inc(&hdr->refcnt);
107 } else {
108 if (data != prealloc)
109 kfree(data);
110 data = NULL;
111 }
112out:
113 return data;
114}
115 81
116void nfs_writehdr_free(struct nfs_pgio_header *hdr) 82static void nfs_writehdr_free(struct nfs_rw_header *whdr)
117{ 83{
118 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
119 mempool_free(whdr, nfs_wdata_mempool); 84 mempool_free(whdr, nfs_wdata_mempool);
120} 85}
121EXPORT_SYMBOL_GPL(nfs_writehdr_free);
122
123void nfs_writedata_release(struct nfs_write_data *wdata)
124{
125 struct nfs_pgio_header *hdr = wdata->header;
126 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
127
128 put_nfs_open_context(wdata->args.context);
129 if (wdata->pages.pagevec != wdata->pages.page_array)
130 kfree(wdata->pages.pagevec);
131 if (wdata == &write_header->rpc_data) {
132 wdata->header = NULL;
133 wdata = NULL;
134 }
135 if (atomic_dec_and_test(&hdr->refcnt))
136 hdr->completion_ops->completion(hdr);
137 /* Note: we only free the rpc_task after callbacks are done.
138 * See the comment in rpc_free_task() for why
139 */
140 kfree(wdata);
141}
142EXPORT_SYMBOL_GPL(nfs_writedata_release);
143 86
144static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 87static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
145{ 88{
@@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page)
211 nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); 154 nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
212} 155}
213 156
157/*
158 * nfs_page_group_search_locked
159 * @head - head request of page group
160 * @page_offset - offset into page
161 *
162 * Search page group with head @head to find a request that contains the
163 * page offset @page_offset.
164 *
165 * Returns a pointer to the first matching nfs request, or NULL if no
166 * match is found.
167 *
168 * Must be called with the page group lock held
169 */
170static struct nfs_page *
171nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
172{
173 struct nfs_page *req;
174
175 WARN_ON_ONCE(head != head->wb_head);
176 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
177
178 req = head;
179 do {
180 if (page_offset >= req->wb_pgbase &&
181 page_offset < (req->wb_pgbase + req->wb_bytes))
182 return req;
183
184 req = req->wb_this_page;
185 } while (req != head);
186
187 return NULL;
188}
189
190/*
191 * nfs_page_group_covers_page
192 * @head - head request of page group
193 *
194 * Return true if the page group with head @head covers the whole page,
195 * returns false otherwise
196 */
197static bool nfs_page_group_covers_page(struct nfs_page *req)
198{
199 struct nfs_page *tmp;
200 unsigned int pos = 0;
201 unsigned int len = nfs_page_length(req->wb_page);
202
203 nfs_page_group_lock(req);
204
205 do {
206 tmp = nfs_page_group_search_locked(req->wb_head, pos);
207 if (tmp) {
208 /* no way this should happen */
209 WARN_ON_ONCE(tmp->wb_pgbase != pos);
210 pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
211 }
212 } while (tmp && pos < len);
213
214 nfs_page_group_unlock(req);
215 WARN_ON_ONCE(pos > len);
216 return pos == len;
217}
218
214/* We can set the PG_uptodate flag if we see that a write request 219/* We can set the PG_uptodate flag if we see that a write request
215 * covers the full page. 220 * covers the full page.
216 */ 221 */
217static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 222static void nfs_mark_uptodate(struct nfs_page *req)
218{ 223{
219 if (PageUptodate(page)) 224 if (PageUptodate(req->wb_page))
220 return; 225 return;
221 if (base != 0) 226 if (!nfs_page_group_covers_page(req))
222 return; 227 return;
223 if (count != nfs_page_length(page)) 228 SetPageUptodate(req->wb_page);
224 return;
225 SetPageUptodate(page);
226} 229}
227 230
228static int wb_priority(struct writeback_control *wbc) 231static int wb_priority(struct writeback_control *wbc)
@@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page)
258 } 261 }
259} 262}
260 263
261static void nfs_end_page_writeback(struct page *page) 264static void nfs_end_page_writeback(struct nfs_page *req)
262{ 265{
263 struct inode *inode = page_file_mapping(page)->host; 266 struct inode *inode = page_file_mapping(req->wb_page)->host;
264 struct nfs_server *nfss = NFS_SERVER(inode); 267 struct nfs_server *nfss = NFS_SERVER(inode);
265 268
266 end_page_writeback(page); 269 if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
270 return;
271
272 end_page_writeback(req->wb_page);
267 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 273 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
268 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 274 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
269} 275}
@@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
354 struct nfs_pageio_descriptor pgio; 360 struct nfs_pageio_descriptor pgio;
355 int err; 361 int err;
356 362
357 NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, 363 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
358 page->mapping->host, 364 false, &nfs_async_write_completion_ops);
359 wb_priority(wbc),
360 &nfs_async_write_completion_ops);
361 err = nfs_do_writepage(page, wbc, &pgio); 365 err = nfs_do_writepage(page, wbc, &pgio);
362 nfs_pageio_complete(&pgio); 366 nfs_pageio_complete(&pgio);
363 if (err < 0) 367 if (err < 0)
@@ -400,12 +404,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
400 404
401 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 405 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
402 406
403 NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); 407 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
408 &nfs_async_write_completion_ops);
404 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 409 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
405 nfs_pageio_complete(&pgio); 410 nfs_pageio_complete(&pgio);
406 411
407 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 412 clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
408 smp_mb__after_clear_bit(); 413 smp_mb__after_atomic();
409 wake_up_bit(bitlock, NFS_INO_FLUSHING); 414 wake_up_bit(bitlock, NFS_INO_FLUSHING);
410 415
411 if (err < 0) 416 if (err < 0)
@@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
425{ 430{
426 struct nfs_inode *nfsi = NFS_I(inode); 431 struct nfs_inode *nfsi = NFS_I(inode);
427 432
433 WARN_ON_ONCE(req->wb_this_page != req);
434
428 /* Lock the request! */ 435 /* Lock the request! */
429 nfs_lock_request(req); 436 nfs_lock_request(req);
430 437
@@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
441 set_page_private(req->wb_page, (unsigned long)req); 448 set_page_private(req->wb_page, (unsigned long)req);
442 } 449 }
443 nfsi->npages++; 450 nfsi->npages++;
451 set_bit(PG_INODE_REF, &req->wb_flags);
444 kref_get(&req->wb_kref); 452 kref_get(&req->wb_kref);
445 spin_unlock(&inode->i_lock); 453 spin_unlock(&inode->i_lock);
446} 454}
@@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
452{ 460{
453 struct inode *inode = req->wb_context->dentry->d_inode; 461 struct inode *inode = req->wb_context->dentry->d_inode;
454 struct nfs_inode *nfsi = NFS_I(inode); 462 struct nfs_inode *nfsi = NFS_I(inode);
463 struct nfs_page *head;
455 464
456 spin_lock(&inode->i_lock); 465 if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
457 if (likely(!PageSwapCache(req->wb_page))) { 466 head = req->wb_head;
458 set_page_private(req->wb_page, 0); 467
459 ClearPagePrivate(req->wb_page); 468 spin_lock(&inode->i_lock);
460 clear_bit(PG_MAPPED, &req->wb_flags); 469 if (likely(!PageSwapCache(head->wb_page))) {
470 set_page_private(head->wb_page, 0);
471 ClearPagePrivate(head->wb_page);
472 clear_bit(PG_MAPPED, &head->wb_flags);
473 }
474 nfsi->npages--;
475 spin_unlock(&inode->i_lock);
461 } 476 }
462 nfsi->npages--;
463 spin_unlock(&inode->i_lock);
464 nfs_release_request(req); 477 nfs_release_request(req);
465} 478}
466 479
@@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req)
583} 596}
584 597
585static inline 598static inline
586int nfs_write_need_commit(struct nfs_write_data *data) 599int nfs_write_need_commit(struct nfs_pgio_data *data)
587{ 600{
588 if (data->verf.committed == NFS_DATA_SYNC) 601 if (data->verf.committed == NFS_DATA_SYNC)
589 return data->header->lseg == NULL; 602 return data->header->lseg == NULL;
@@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
614} 627}
615 628
616static inline 629static inline
617int nfs_write_need_commit(struct nfs_write_data *data) 630int nfs_write_need_commit(struct nfs_pgio_data *data)
618{ 631{
619 return 0; 632 return 0;
620} 633}
@@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
625{ 638{
626 struct nfs_commit_info cinfo; 639 struct nfs_commit_info cinfo;
627 unsigned long bytes = 0; 640 unsigned long bytes = 0;
641 bool do_destroy;
628 642
629 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 643 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
630 goto out; 644 goto out;
@@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
645 goto next; 659 goto next;
646 } 660 }
647 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { 661 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
648 memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); 662 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
649 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 663 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
650 goto next; 664 goto next;
651 } 665 }
@@ -653,7 +667,8 @@ remove_req:
653 nfs_inode_remove_request(req); 667 nfs_inode_remove_request(req);
654next: 668next:
655 nfs_unlock_request(req); 669 nfs_unlock_request(req);
656 nfs_end_page_writeback(req->wb_page); 670 nfs_end_page_writeback(req);
671 do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
657 nfs_release_request(req); 672 nfs_release_request(req);
658 } 673 }
659out: 674out:
@@ -661,7 +676,7 @@ out:
661} 676}
662 677
663#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 678#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
664static unsigned long 679unsigned long
665nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 680nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
666{ 681{
667 return cinfo->mds->ncommit; 682 return cinfo->mds->ncommit;
@@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
718} 733}
719 734
720#else 735#else
721static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 736unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
722{ 737{
723 return 0; 738 return 0;
724} 739}
@@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
758 if (req == NULL) 773 if (req == NULL)
759 goto out_unlock; 774 goto out_unlock;
760 775
776 /* should be handled by nfs_flush_incompatible */
777 WARN_ON_ONCE(req->wb_head != req);
778 WARN_ON_ONCE(req->wb_this_page != req);
779
761 rqend = req->wb_offset + req->wb_bytes; 780 rqend = req->wb_offset + req->wb_bytes;
762 /* 781 /*
763 * Tell the caller to flush out the request if 782 * Tell the caller to flush out the request if
@@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
819 req = nfs_try_to_update_request(inode, page, offset, bytes); 838 req = nfs_try_to_update_request(inode, page, offset, bytes);
820 if (req != NULL) 839 if (req != NULL)
821 goto out; 840 goto out;
822 req = nfs_create_request(ctx, inode, page, offset, bytes); 841 req = nfs_create_request(ctx, page, NULL, offset, bytes);
823 if (IS_ERR(req)) 842 if (IS_ERR(req))
824 goto out; 843 goto out;
825 nfs_inode_add_request(inode, req); 844 nfs_inode_add_request(inode, req);
@@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
837 return PTR_ERR(req); 856 return PTR_ERR(req);
838 /* Update file length */ 857 /* Update file length */
839 nfs_grow_file(page, offset, count); 858 nfs_grow_file(page, offset, count);
840 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 859 nfs_mark_uptodate(req);
841 nfs_mark_request_dirty(req); 860 nfs_mark_request_dirty(req);
842 nfs_unlock_and_release_request(req); 861 nfs_unlock_and_release_request(req);
843 return 0; 862 return 0;
@@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
863 return 0; 882 return 0;
864 l_ctx = req->wb_lock_context; 883 l_ctx = req->wb_lock_context;
865 do_flush = req->wb_page != page || req->wb_context != ctx; 884 do_flush = req->wb_page != page || req->wb_context != ctx;
885 /* for now, flush if more than 1 request in page_group */
886 do_flush |= req->wb_this_page != req;
866 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { 887 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
867 do_flush |= l_ctx->lockowner.l_owner != current->files 888 do_flush |= l_ctx->lockowner.l_owner != current->files
868 || l_ctx->lockowner.l_pid != current->tgid; 889 || l_ctx->lockowner.l_pid != current->tgid;
@@ -913,12 +934,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
913 934
914 if (nfs_have_delegated_attributes(inode)) 935 if (nfs_have_delegated_attributes(inode))
915 goto out; 936 goto out;
916 if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) 937 if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
917 return false; 938 return false;
918 smp_rmb(); 939 smp_rmb();
919 if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) 940 if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
920 return false; 941 return false;
921out: 942out:
943 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
944 return false;
922 return PageUptodate(page) != 0; 945 return PageUptodate(page) != 0;
923} 946}
924 947
@@ -990,126 +1013,17 @@ static int flush_task_priority(int how)
990 return RPC_PRIORITY_NORMAL; 1013 return RPC_PRIORITY_NORMAL;
991} 1014}
992 1015
993int nfs_initiate_write(struct rpc_clnt *clnt, 1016static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
994 struct nfs_write_data *data, 1017 struct rpc_task_setup *task_setup_data, int how)
995 const struct rpc_call_ops *call_ops,
996 int how, int flags)
997{ 1018{
998 struct inode *inode = data->header->inode; 1019 struct inode *inode = data->header->inode;
999 int priority = flush_task_priority(how); 1020 int priority = flush_task_priority(how);
1000 struct rpc_task *task;
1001 struct rpc_message msg = {
1002 .rpc_argp = &data->args,
1003 .rpc_resp = &data->res,
1004 .rpc_cred = data->header->cred,
1005 };
1006 struct rpc_task_setup task_setup_data = {
1007 .rpc_client = clnt,
1008 .task = &data->task,
1009 .rpc_message = &msg,
1010 .callback_ops = call_ops,
1011 .callback_data = data,
1012 .workqueue = nfsiod_workqueue,
1013 .flags = RPC_TASK_ASYNC | flags,
1014 .priority = priority,
1015 };
1016 int ret = 0;
1017
1018 /* Set up the initial task struct. */
1019 NFS_PROTO(inode)->write_setup(data, &msg);
1020 1021
1021 dprintk("NFS: %5u initiated write call " 1022 task_setup_data->priority = priority;
1022 "(req %s/%llu, %u bytes @ offset %llu)\n", 1023 NFS_PROTO(inode)->write_setup(data, msg);
1023 data->task.tk_pid,
1024 inode->i_sb->s_id,
1025 (unsigned long long)NFS_FILEID(inode),
1026 data->args.count,
1027 (unsigned long long)data->args.offset);
1028 1024
1029 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1025 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1030 &task_setup_data.rpc_client, &msg, data); 1026 &task_setup_data->rpc_client, msg, data);
1031
1032 task = rpc_run_task(&task_setup_data);
1033 if (IS_ERR(task)) {
1034 ret = PTR_ERR(task);
1035 goto out;
1036 }
1037 if (how & FLUSH_SYNC) {
1038 ret = rpc_wait_for_completion_task(task);
1039 if (ret == 0)
1040 ret = task->tk_status;
1041 }
1042 rpc_put_task(task);
1043out:
1044 return ret;
1045}
1046EXPORT_SYMBOL_GPL(nfs_initiate_write);
1047
1048/*
1049 * Set up the argument/result storage required for the RPC call.
1050 */
1051static void nfs_write_rpcsetup(struct nfs_write_data *data,
1052 unsigned int count, unsigned int offset,
1053 int how, struct nfs_commit_info *cinfo)
1054{
1055 struct nfs_page *req = data->header->req;
1056
1057 /* Set up the RPC argument and reply structs
1058 * NB: take care not to mess about with data->commit et al. */
1059
1060 data->args.fh = NFS_FH(data->header->inode);
1061 data->args.offset = req_offset(req) + offset;
1062 /* pnfs_set_layoutcommit needs this */
1063 data->mds_offset = data->args.offset;
1064 data->args.pgbase = req->wb_pgbase + offset;
1065 data->args.pages = data->pages.pagevec;
1066 data->args.count = count;
1067 data->args.context = get_nfs_open_context(req->wb_context);
1068 data->args.lock_context = req->wb_lock_context;
1069 data->args.stable = NFS_UNSTABLE;
1070 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
1071 case 0:
1072 break;
1073 case FLUSH_COND_STABLE:
1074 if (nfs_reqs_to_commit(cinfo))
1075 break;
1076 default:
1077 data->args.stable = NFS_FILE_SYNC;
1078 }
1079
1080 data->res.fattr = &data->fattr;
1081 data->res.count = count;
1082 data->res.verf = &data->verf;
1083 nfs_fattr_init(&data->fattr);
1084}
1085
1086static int nfs_do_write(struct nfs_write_data *data,
1087 const struct rpc_call_ops *call_ops,
1088 int how)
1089{
1090 struct inode *inode = data->header->inode;
1091
1092 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
1093}
1094
1095static int nfs_do_multiple_writes(struct list_head *head,
1096 const struct rpc_call_ops *call_ops,
1097 int how)
1098{
1099 struct nfs_write_data *data;
1100 int ret = 0;
1101
1102 while (!list_empty(head)) {
1103 int ret2;
1104
1105 data = list_first_entry(head, struct nfs_write_data, list);
1106 list_del_init(&data->list);
1107
1108 ret2 = nfs_do_write(data, call_ops, how);
1109 if (ret == 0)
1110 ret = ret2;
1111 }
1112 return ret;
1113} 1027}
1114 1028
1115/* If a nfs_flush_* function fails, it should remove reqs from @head and 1029/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1120,7 +1034,7 @@ static void nfs_redirty_request(struct nfs_page *req)
1120{ 1034{
1121 nfs_mark_request_dirty(req); 1035 nfs_mark_request_dirty(req);
1122 nfs_unlock_request(req); 1036 nfs_unlock_request(req);
1123 nfs_end_page_writeback(req->wb_page); 1037 nfs_end_page_writeback(req);
1124 nfs_release_request(req); 1038 nfs_release_request(req);
1125} 1039}
1126 1040
@@ -1140,173 +1054,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1140 .completion = nfs_write_completion, 1054 .completion = nfs_write_completion,
1141}; 1055};
1142 1056
1143static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1144 struct nfs_pgio_header *hdr)
1145{
1146 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1147 while (!list_empty(&hdr->rpc_list)) {
1148 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1149 struct nfs_write_data, list);
1150 list_del(&data->list);
1151 nfs_writedata_release(data);
1152 }
1153 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1154}
1155
1156/*
1157 * Generate multiple small requests to write out a single
1158 * contiguous dirty area on one page.
1159 */
1160static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1161 struct nfs_pgio_header *hdr)
1162{
1163 struct nfs_page *req = hdr->req;
1164 struct page *page = req->wb_page;
1165 struct nfs_write_data *data;
1166 size_t wsize = desc->pg_bsize, nbytes;
1167 unsigned int offset;
1168 int requests = 0;
1169 struct nfs_commit_info cinfo;
1170
1171 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1172
1173 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1174 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
1175 desc->pg_count > wsize))
1176 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1177
1178
1179 offset = 0;
1180 nbytes = desc->pg_count;
1181 do {
1182 size_t len = min(nbytes, wsize);
1183
1184 data = nfs_writedata_alloc(hdr, 1);
1185 if (!data) {
1186 nfs_flush_error(desc, hdr);
1187 return -ENOMEM;
1188 }
1189 data->pages.pagevec[0] = page;
1190 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1191 list_add(&data->list, &hdr->rpc_list);
1192 requests++;
1193 nbytes -= len;
1194 offset += len;
1195 } while (nbytes != 0);
1196 nfs_list_remove_request(req);
1197 nfs_list_add_request(req, &hdr->pages);
1198 desc->pg_rpc_callops = &nfs_write_common_ops;
1199 return 0;
1200}
1201
1202/*
1203 * Create an RPC task for the given write request and kick it.
1204 * The page must have been locked by the caller.
1205 *
1206 * It may happen that the page we're passed is not marked dirty.
1207 * This is the case if nfs_updatepage detects a conflicting request
1208 * that has been written but not committed.
1209 */
1210static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1211 struct nfs_pgio_header *hdr)
1212{
1213 struct nfs_page *req;
1214 struct page **pages;
1215 struct nfs_write_data *data;
1216 struct list_head *head = &desc->pg_list;
1217 struct nfs_commit_info cinfo;
1218
1219 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1220 desc->pg_count));
1221 if (!data) {
1222 nfs_flush_error(desc, hdr);
1223 return -ENOMEM;
1224 }
1225
1226 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1227 pages = data->pages.pagevec;
1228 while (!list_empty(head)) {
1229 req = nfs_list_entry(head->next);
1230 nfs_list_remove_request(req);
1231 nfs_list_add_request(req, &hdr->pages);
1232 *pages++ = req->wb_page;
1233 }
1234
1235 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1236 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1237 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1238
1239 /* Set up the argument struct */
1240 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1241 list_add(&data->list, &hdr->rpc_list);
1242 desc->pg_rpc_callops = &nfs_write_common_ops;
1243 return 0;
1244}
1245
1246int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1247 struct nfs_pgio_header *hdr)
1248{
1249 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1250 return nfs_flush_multi(desc, hdr);
1251 return nfs_flush_one(desc, hdr);
1252}
1253EXPORT_SYMBOL_GPL(nfs_generic_flush);
1254
1255static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1256{
1257 struct nfs_write_header *whdr;
1258 struct nfs_pgio_header *hdr;
1259 int ret;
1260
1261 whdr = nfs_writehdr_alloc();
1262 if (!whdr) {
1263 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1264 return -ENOMEM;
1265 }
1266 hdr = &whdr->header;
1267 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1268 atomic_inc(&hdr->refcnt);
1269 ret = nfs_generic_flush(desc, hdr);
1270 if (ret == 0)
1271 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1272 desc->pg_rpc_callops,
1273 desc->pg_ioflags);
1274 if (atomic_dec_and_test(&hdr->refcnt))
1275 hdr->completion_ops->completion(hdr);
1276 return ret;
1277}
1278
1279static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1280 .pg_test = nfs_generic_pg_test,
1281 .pg_doio = nfs_generic_pg_writepages,
1282};
1283
1284void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1057void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1285 struct inode *inode, int ioflags, 1058 struct inode *inode, int ioflags, bool force_mds,
1286 const struct nfs_pgio_completion_ops *compl_ops) 1059 const struct nfs_pgio_completion_ops *compl_ops)
1287{ 1060{
1288 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, 1061 struct nfs_server *server = NFS_SERVER(inode);
1289 NFS_SERVER(inode)->wsize, ioflags); 1062 const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1063
1064#ifdef CONFIG_NFS_V4_1
1065 if (server->pnfs_curr_ld && !force_mds)
1066 pg_ops = server->pnfs_curr_ld->pg_write_ops;
1067#endif
1068 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1069 server->wsize, ioflags);
1290} 1070}
1291EXPORT_SYMBOL_GPL(nfs_pageio_init_write); 1071EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1292 1072
1293void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1073void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1294{ 1074{
1295 pgio->pg_ops = &nfs_pageio_write_ops; 1075 pgio->pg_ops = &nfs_pgio_rw_ops;
1296 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1076 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1297} 1077}
1298EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1078EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1299 1079
1300 1080
1301void nfs_write_prepare(struct rpc_task *task, void *calldata)
1302{
1303 struct nfs_write_data *data = calldata;
1304 int err;
1305 err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1306 if (err)
1307 rpc_exit(task, err);
1308}
1309
1310void nfs_commit_prepare(struct rpc_task *task, void *calldata) 1081void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1311{ 1082{
1312 struct nfs_commit_data *data = calldata; 1083 struct nfs_commit_data *data = calldata;
@@ -1314,23 +1085,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1314 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1085 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1315} 1086}
1316 1087
1317/* 1088static void nfs_writeback_release_common(struct nfs_pgio_data *data)
1318 * Handle a write reply that flushes a whole page.
1319 *
1320 * FIXME: There is an inherent race with invalidate_inode_pages and
1321 * writebacks since the page->count is kept > 1 for as long
1322 * as the page has a write request pending.
1323 */
1324static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1325{
1326 struct nfs_write_data *data = calldata;
1327
1328 nfs_writeback_done(task, data);
1329}
1330
1331static void nfs_writeback_release_common(void *calldata)
1332{ 1089{
1333 struct nfs_write_data *data = calldata;
1334 struct nfs_pgio_header *hdr = data->header; 1090 struct nfs_pgio_header *hdr = data->header;
1335 int status = data->task.tk_status; 1091 int status = data->task.tk_status;
1336 1092
@@ -1339,34 +1095,46 @@ static void nfs_writeback_release_common(void *calldata)
1339 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) 1095 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1340 ; /* Do nothing */ 1096 ; /* Do nothing */
1341 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) 1097 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1342 memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); 1098 memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
1343 else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) 1099 else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
1344 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); 1100 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1345 spin_unlock(&hdr->lock); 1101 spin_unlock(&hdr->lock);
1346 } 1102 }
1347 nfs_writedata_release(data);
1348} 1103}
1349 1104
1350static const struct rpc_call_ops nfs_write_common_ops = { 1105/*
1351 .rpc_call_prepare = nfs_write_prepare, 1106 * Special version of should_remove_suid() that ignores capabilities.
1352 .rpc_call_done = nfs_writeback_done_common, 1107 */
1353 .rpc_release = nfs_writeback_release_common, 1108static int nfs_should_remove_suid(const struct inode *inode)
1354}; 1109{
1110 umode_t mode = inode->i_mode;
1111 int kill = 0;
1112
1113 /* suid always must be killed */
1114 if (unlikely(mode & S_ISUID))
1115 kill = ATTR_KILL_SUID;
1355 1116
1117 /*
1118 * sgid without any exec bits is just a mandatory locking mark; leave
1119 * it alone. If some exec bits are set, it's a real sgid; kill it.
1120 */
1121 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1122 kill |= ATTR_KILL_SGID;
1123
1124 if (unlikely(kill && S_ISREG(mode)))
1125 return kill;
1126
1127 return 0;
1128}
1356 1129
1357/* 1130/*
1358 * This function is called when the WRITE call is complete. 1131 * This function is called when the WRITE call is complete.
1359 */ 1132 */
1360void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1133static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1134 struct inode *inode)
1361{ 1135{
1362 struct nfs_writeargs *argp = &data->args;
1363 struct nfs_writeres *resp = &data->res;
1364 struct inode *inode = data->header->inode;
1365 int status; 1136 int status;
1366 1137
1367 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
1368 task->tk_pid, task->tk_status);
1369
1370 /* 1138 /*
1371 * ->write_done will attempt to use post-op attributes to detect 1139 * ->write_done will attempt to use post-op attributes to detect
1372 * conflicting writes by other clients. A strict interpretation 1140 * conflicting writes by other clients. A strict interpretation
@@ -1376,11 +1144,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1376 */ 1144 */
1377 status = NFS_PROTO(inode)->write_done(task, data); 1145 status = NFS_PROTO(inode)->write_done(task, data);
1378 if (status != 0) 1146 if (status != 0)
1379 return; 1147 return status;
1380 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1148 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
1381 1149
1382#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 1150#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1383 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1151 if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
1384 /* We tried a write call, but the server did not 1152 /* We tried a write call, but the server did not
1385 * commit data to stable storage even though we 1153 * commit data to stable storage even though we
1386 * requested it. 1154 * requested it.
@@ -1396,18 +1164,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1396 dprintk("NFS: faulty NFS server %s:" 1164 dprintk("NFS: faulty NFS server %s:"
1397 " (committed = %d) != (stable = %d)\n", 1165 " (committed = %d) != (stable = %d)\n",
1398 NFS_SERVER(inode)->nfs_client->cl_hostname, 1166 NFS_SERVER(inode)->nfs_client->cl_hostname,
1399 resp->verf->committed, argp->stable); 1167 data->res.verf->committed, data->args.stable);
1400 complain = jiffies + 300 * HZ; 1168 complain = jiffies + 300 * HZ;
1401 } 1169 }
1402 } 1170 }
1403#endif 1171#endif
1404 if (task->tk_status < 0) 1172
1405 nfs_set_pgio_error(data->header, task->tk_status, argp->offset); 1173 /* Deal with the suid/sgid bit corner case */
1406 else if (resp->count < argp->count) { 1174 if (nfs_should_remove_suid(inode))
1175 nfs_mark_for_revalidate(inode);
1176 return 0;
1177}
1178
1179/*
1180 * This function is called when the WRITE call is complete.
1181 */
1182static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
1183{
1184 struct nfs_pgio_args *argp = &data->args;
1185 struct nfs_pgio_res *resp = &data->res;
1186
1187 if (resp->count < argp->count) {
1407 static unsigned long complain; 1188 static unsigned long complain;
1408 1189
1409 /* This a short write! */ 1190 /* This a short write! */
1410 nfs_inc_stats(inode, NFSIOS_SHORTWRITE); 1191 nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
1411 1192
1412 /* Has the server at least made some progress? */ 1193 /* Has the server at least made some progress? */
1413 if (resp->count == 0) { 1194 if (resp->count == 0) {
@@ -1458,7 +1239,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1458static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1239static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1459{ 1240{
1460 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1241 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1461 smp_mb__after_clear_bit(); 1242 smp_mb__after_atomic();
1462 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1243 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1463} 1244}
1464 1245
@@ -1874,7 +1655,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1874int __init nfs_init_writepagecache(void) 1655int __init nfs_init_writepagecache(void)
1875{ 1656{
1876 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1657 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1877 sizeof(struct nfs_write_header), 1658 sizeof(struct nfs_rw_header),
1878 0, SLAB_HWCACHE_ALIGN, 1659 0, SLAB_HWCACHE_ALIGN,
1879 NULL); 1660 NULL);
1880 if (nfs_wdata_cachep == NULL) 1661 if (nfs_wdata_cachep == NULL)
@@ -1936,3 +1717,12 @@ void nfs_destroy_writepagecache(void)
1936 kmem_cache_destroy(nfs_wdata_cachep); 1717 kmem_cache_destroy(nfs_wdata_cachep);
1937} 1718}
1938 1719
1720static const struct nfs_rw_ops nfs_rw_write_ops = {
1721 .rw_mode = FMODE_WRITE,
1722 .rw_alloc_header = nfs_writehdr_alloc,
1723 .rw_free_header = nfs_writehdr_free,
1724 .rw_release = nfs_writeback_release_common,
1725 .rw_done = nfs_writeback_done,
1726 .rw_result = nfs_writeback_result,
1727 .rw_initiate = nfs_initiate_write,
1728};