From 1f2edbe3fe2111a59fcd1bb3b9725066bc9ed686 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 13 Apr 2014 11:11:31 -0400 Subject: NFS: Don't ignore suid/sgid bit changes after a successful write If we suspect that the server may have cleared the suid/sgid bit, then mark the inode for revalidation. Reported-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9a3b6a4cd6b9..cd7c651f9b84 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1353,6 +1353,30 @@ static const struct rpc_call_ops nfs_write_common_ops = { .rpc_release = nfs_writeback_release_common, }; +/* + * Special version of should_remove_suid() that ignores capabilities. + */ +static int nfs_should_remove_suid(const struct inode *inode) +{ + umode_t mode = inode->i_mode; + int kill = 0; + + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && S_ISREG(mode))) + return kill; + + return 0; +} /* * This function is called when the WRITE call is complete. @@ -1401,9 +1425,16 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } } #endif - if (task->tk_status < 0) + if (task->tk_status < 0) { nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - else if (resp->count < argp->count) { + return; + } + + /* Deal with the suid/sgid bit corner case */ + if (nfs_should_remove_suid(inode)) + nfs_mark_for_revalidate(inode); + + if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ -- cgit v1.2.2 From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:21 +0200 Subject: nfs: remove ->write_pageio_init from rpc ops The write_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_write based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cd7c651f9b84..ee6d46fde76c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -354,10 +354,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, - page->mapping->host, - wb_priority(wbc), - &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -400,7 +398,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -1282,11 +1281,17 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, - NFS_SERVER(inode)->wsize, ioflags); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_write_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); -- cgit v1.2.2 From 3c6b899c49e5e9c2803b59ee553eddaf69cea7f6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:24 -0400 Subject: NFS: Create a common argument structure for reads and writes Reads and writes have very similar arguments. This patch combines them together and documents the few fields used only by write. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee6d46fde76c..25ba3830ec8b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1388,7 +1388,7 @@ static int nfs_should_remove_suid(const struct inode *inode) */ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_writeargs *argp = &data->args; + struct nfs_pgio_args *argp = &data->args; struct nfs_writeres *resp = &data->res; struct inode *inode = data->header->inode; int status; -- cgit v1.2.2 From 9137bdf3d241fc2cbeb2a8ced51d1546150aa6a1 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:25 -0400 Subject: NFS: Create a common results structure for reads and writes Reads and writes have very similar results. This patch combines the two structs together with comments to show where the differing fields are used. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 25ba3830ec8b..d392a70092fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1389,7 +1389,7 @@ static int nfs_should_remove_suid(const struct inode *inode) void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_pgio_args *argp = &data->args; - struct nfs_writeres *resp = &data->res; + struct nfs_pgio_res *resp = &data->res; struct inode *inode = data->header->inode; int status; -- cgit v1.2.2 From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:26 -0400 Subject: NFS: Create a common read and write data struct At this point, the only difference between nfs_read_data and nfs_write_data is the write verifier. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d392a70092fe..3a2fc5c4c79a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -88,10 +88,10 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, +static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) { - struct nfs_write_data *data, *prealloc; + struct nfs_pgio_data *data, *prealloc; prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; if (prealloc->header == NULL) @@ -120,7 +120,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_writehdr_free); -void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); @@ -582,7 +582,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { if (data->verf.committed == NFS_DATA_SYNC) return data->header->lseg == NULL; @@ -613,7 +613,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { return 0; } @@ -990,7 +990,7 @@ static int flush_task_priority(int how) } int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1047,7 +1047,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { @@ -1082,7 +1082,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } -static int nfs_do_write(struct nfs_write_data *data, +static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -1095,13 +1095,13 @@ static int nfs_do_multiple_writes(struct list_head *head, const struct rpc_call_ops *call_ops, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; int ret = 0; while (!list_empty(head)) { int ret2; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); ret2 = nfs_do_write(data, call_ops, how); @@ -1144,8 +1144,8 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); while (!list_empty(&hdr->rpc_list)) { - struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_write_data, list); + struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_pgio_data, list); list_del(&data->list); nfs_writedata_release(data); } @@ -1161,7 +1161,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, { struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_write_data *data; + struct nfs_pgio_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; @@ -1211,7 +1211,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_write_data *data; + struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; @@ -1305,7 +1305,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); void nfs_write_prepare(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; int err; err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); if (err) @@ -1328,14 +1328,14 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) */ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; nfs_writeback_done(task, data); } static void nfs_writeback_release_common(void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1386,7 +1386,7 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_res *resp = &data->res; -- cgit v1.2.2 From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:27 -0400 Subject: NFS: Create a common read and write header struct The only difference is the write verifier field, but we can keep that for a little bit longer. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3a2fc5c4c79a..37c4c988519c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,9 +70,9 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(void) +struct nfs_rw_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; @@ -93,7 +93,7 @@ static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, { struct nfs_pgio_data *data, *prealloc; - prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; + prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; if (prealloc->header == NULL) data = prealloc; else @@ -115,7 +115,7 @@ out: void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } EXPORT_SYMBOL_GPL(nfs_writehdr_free); @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; - struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header); put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) @@ -1253,7 +1253,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1910,7 +1910,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) -- cgit v1.2.2 From f79d06f544a797d75cbf5256a5d06c4b3d2759cc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:28 -0400 Subject: NFS: Move the write verifier into the nfs_pgio_header The header had a pointer to the verifier that was set from the old write data struct. We don't need to keep the pointer around now that we have shared structures. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 37c4c988519c..321a791c72bf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -82,7 +82,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); - hdr->verf = &p->verf; } return p; } @@ -644,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1344,8 +1343,8 @@ static void nfs_writeback_release_common(void *calldata) if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); - else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) + memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } -- cgit v1.2.2 From 00bfa30abe86982ce1929e9cabd703e5546106bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:29 -0400 Subject: NFS: Create a common pgio_alloc and pgio_release function These functions are identical for the read and write paths so they can be combined. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 54 ++++-------------------------------------------------- 1 file changed, 4 insertions(+), 50 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 321a791c72bf..0dc4d6a28bd0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -87,31 +87,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - struct nfs_pgio_data *data, *prealloc; - - prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - void nfs_writehdr_free(struct nfs_pgio_header *hdr) { struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); @@ -119,27 +94,6 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_writehdr_free); -void nfs_writedata_release(struct nfs_pgio_data *wdata) -{ - struct nfs_pgio_header *hdr = wdata->header; - struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header); - - put_nfs_open_context(wdata->args.context); - if (wdata->pages.pagevec != wdata->pages.page_array) - kfree(wdata->pages.pagevec); - if (wdata == &write_header->rpc_data) { - wdata->header = NULL; - wdata = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(wdata); -} -EXPORT_SYMBOL_GPL(nfs_writedata_release); - static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { ctx->error = error; @@ -1146,7 +1100,7 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); list_del(&data->list); - nfs_writedata_release(data); + nfs_pgio_data_release(data); } desc->pg_completion_ops->error_cleanup(&desc->pg_list); } @@ -1179,7 +1133,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, do { size_t len = min(nbytes, wsize); - data = nfs_writedata_alloc(hdr, 1); + data = nfs_pgio_data_alloc(hdr, 1); if (!data) { nfs_flush_error(desc, hdr); return -ENOMEM; @@ -1214,7 +1168,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; - data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, + data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_flush_error(desc, hdr); @@ -1348,7 +1302,7 @@ static void nfs_writeback_release_common(void *calldata) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_writedata_release(data); + nfs_pgio_data_release(data); } static const struct rpc_call_ops nfs_write_common_ops = { -- cgit v1.2.2 From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:30 -0400 Subject: NFS: Create a common rw_header_alloc and rw_header_free function I create a new struct nfs_rw_ops to decide the differences between reads and writes. This struct will be set when initializing a new nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new header is allocated. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0dc4d6a28bd0..9c5cde38da45 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; +static const struct nfs_rw_ops nfs_rw_write_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -70,29 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_rw_header *nfs_writehdr_alloc(void) { struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - if (p) { - struct nfs_pgio_header *hdr = &p->header; - + if (p) memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } return p; } -EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -void nfs_writehdr_free(struct nfs_pgio_header *hdr) +static void nfs_writehdr_free(struct nfs_rw_header *whdr) { - struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } -EXPORT_SYMBOL_GPL(nfs_writehdr_free); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -1210,13 +1201,13 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_writehdr_free); + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret == 0) @@ -1244,7 +1235,8 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, if (server->pnfs_curr_ld && !force_mds) pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif - nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1925,3 +1917,7 @@ void nfs_destroy_writepagecache(void) kmem_cache_destroy(nfs_wdata_cachep); } +static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_alloc_header = nfs_writehdr_alloc, + .rw_free_header = nfs_writehdr_free, +}; -- cgit v1.2.2 From a4cdda59111f92000297e0d3edb1e0e08ba3549b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:31 -0400 Subject: NFS: Create a common pgio_rpc_prepare function The read and write paths do exactly the same thing for the rpc_prepare rpc_op. This patch combines them together into a single function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9c5cde38da45..ae799c96ec2b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1248,15 +1248,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_write_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - int err; - err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); - if (err) - rpc_exit(task, err); -} - void nfs_commit_prepare(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; @@ -1278,9 +1269,8 @@ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) nfs_writeback_done(task, data); } -static void nfs_writeback_release_common(void *calldata) +static void nfs_writeback_release_common(struct nfs_pgio_data *data) { - struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1294,13 +1284,12 @@ static void nfs_writeback_release_common(void *calldata) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_pgio_data_release(data); } static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_writeback_done_common, - .rpc_release = nfs_writeback_release_common, + .rpc_release = nfs_pgio_release, }; /* @@ -1918,6 +1907,8 @@ void nfs_destroy_writepagecache(void) } static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, + .rw_release = nfs_writeback_release_common, }; -- cgit v1.2.2 From 0eecb2145c1ce18e36617008424a93836ad0a3bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:32 -0400 Subject: NFS: Create a common nfs_pgio_result_common function Combining these functions will let me make a single nfs_rw_common_ops struct (see the next patch). Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 51 ++++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae799c96ec2b..1d3e1d75c8c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1255,20 +1255,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -/* - * Handle a write reply that flushes a whole page. - * - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. - */ -static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - - nfs_writeback_done(task, data); -} - static void nfs_writeback_release_common(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1288,7 +1274,7 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) static const struct rpc_call_ops nfs_write_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_writeback_done_common, + .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; @@ -1320,16 +1306,11 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; - struct inode *inode = data->header->inode; int status; - dprintk("NFS: %5u nfs_writeback_done (status %d)\n", - task->tk_pid, task->tk_status); - /* * ->write_done will attempt to use post-op attributes to detect * conflicting writes by other clients. A strict interpretation @@ -1339,11 +1320,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) */ status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) - return; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + return status; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1359,25 +1340,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - resp->verf->committed, argp->stable); + data->res.verf->committed, data->args.stable); complain = jiffies + 300 * HZ; } } #endif - if (task->tk_status < 0) { - nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - return; - } /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) nfs_mark_for_revalidate(inode); + return 0; +} + +/* + * This function is called when the WRITE call is complete. + */ +static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +{ + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1911,4 +1898,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, .rw_release = nfs_writeback_release_common, + .rw_done = nfs_writeback_done, + .rw_result = nfs_writeback_result, }; -- cgit v1.2.2 From 6f92fa4581f1c26562f80dc686b3c9ea76556911 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:33 -0400 Subject: NFS: Create a common rpc_call_ops struct The read and write paths set up this struct in exactly the same way, so create a single shared struct. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1d3e1d75c8c5..d877f15fb31a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -42,7 +42,6 @@ * Local function declarations */ static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; @@ -1138,7 +1137,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, } while (nbytes != 0); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_write_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -1182,7 +1181,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, /* Set up the argument struct */ nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_write_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -1272,12 +1271,6 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) } } -static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_pgio_result, - .rpc_release = nfs_pgio_release, -}; - /* * Special version of should_remove_suid() that ignores capabilities. */ -- cgit v1.2.2 From ce59515c1484d3a01bc2f3e7043dc488d25efe34 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:34 -0400 Subject: NFS: Create a common rpcsetup function for reads and writes Write adds a little bit of code dealing with flush flags, but since "how" will always be 0 when reading we can share the code. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 46 ++++------------------------------------------ 1 file changed, 4 insertions(+), 42 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d877f15fb31a..0d367aa87814 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -603,7 +603,7 @@ out: } #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) -static unsigned long +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return cinfo->mds->ncommit; @@ -660,7 +660,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, } #else -static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return 0; } @@ -987,44 +987,6 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_write); -/* - * Set up the argument/result storage required for the RPC call. - */ -static void nfs_write_rpcsetup(struct nfs_pgio_data *data, - unsigned int count, unsigned int offset, - int how, struct nfs_commit_info *cinfo) -{ - struct nfs_page *req = data->header->req; - - /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ - - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; - /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; - switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - case 0: - break; - case FLUSH_COND_STABLE: - if (nfs_reqs_to_commit(cinfo)) - break; - default: - data->args.stable = NFS_FILE_SYNC; - } - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); -} - static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how) @@ -1129,7 +1091,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, return -ENOMEM; } data->pages.pagevec[0] = page; - nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; @@ -1179,7 +1141,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; -- cgit v1.2.2 From 844c9e691d8723853ca8f2de0207683538645824 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:35 -0400 Subject: NFS: Create a common pgio_error function At this point, the read and write versions of this function look identical so both should use the same function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0d367aa87814..02d088b1d8e4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1044,19 +1044,6 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static void nfs_flush_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_pgio_data, list); - list_del(&data->list); - nfs_pgio_data_release(data); - } - desc->pg_completion_ops->error_cleanup(&desc->pg_list); -} - /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1086,10 +1073,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t len = min(nbytes, wsize); data = nfs_pgio_data_alloc(hdr, 1); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); data->pages.pagevec[0] = page; nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); @@ -1122,10 +1107,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = data->pages.pagevec; -- cgit v1.2.2 From ef2c488c073f4f0b3a200745dd8d608c01d69c39 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:36 -0400 Subject: NFS: Create a generic_pgio function These functions are almost identical on both the read and write side. FLUSH_COND_STABLE will never be set for the read path, so leaving it in the generic code won't hurt anything. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 97 +--------------------------------------------------------- 1 file changed, 1 insertion(+), 96 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 02d088b1d8e4..0e34c7024195 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1044,101 +1044,6 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -/* - * Generate multiple small requests to write out a single - * contiguous dirty area on one page. - */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_pgio_data *data; - size_t wsize = desc->pg_bsize, nbytes; - unsigned int offset; - int requests = 0; - struct nfs_commit_info cinfo; - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || - desc->pg_count > wsize)) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes, wsize); - - data = nfs_pgio_data_alloc(hdr, 1); - if (!data) - return nfs_pgio_error(desc, hdr); - data->pages.pagevec[0] = page; - nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - requests++; - nbytes -= len; - offset += len; - } while (nbytes != 0); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -/* - * Create an RPC task for the given write request and kick it. - * The page must have been locked by the caller. - * - * It may happen that the page we're passed is not marked dirty. - * This is the case if nfs_updatepage detects a conflicting request - * that has been written but not committed. - */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req; - struct page **pages; - struct nfs_pgio_data *data; - struct list_head *head = &desc->pg_list; - struct nfs_commit_info cinfo; - - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) - return nfs_pgio_error(desc, hdr); - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; - } - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - /* Set up the argument struct */ - nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc, hdr); - return nfs_flush_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_flush); - static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { struct nfs_rw_header *whdr; @@ -1153,7 +1058,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) hdr = &whdr->header; nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_do_multiple_writes(&hdr->rpc_list, desc->pg_rpc_callops, -- cgit v1.2.2 From 1ed26f33008e954a8e91d26f97d4380dea8145db Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:37 -0400 Subject: NFS: Create a common initiate_pgio() function Most of this code is the same for both the read and write paths, so combine everything and use the rw_ops when necessary. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 55 +++++++------------------------------------------------ 1 file changed, 7 insertions(+), 48 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0e34c7024195..e46a1fc6c1fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -932,60 +932,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags) +static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int priority = flush_task_priority(how); - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = clnt, - .task = &data->task, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | flags, - .priority = priority, - }; - int ret = 0; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->write_setup(data, &msg); - - dprintk("NFS: %5u initiated write call " - "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); + task_setup_data->priority = priority; + NFS_PROTO(inode)->write_setup(data, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data.rpc_client, &msg, data); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } - rpc_put_task(task); -out: - return ret; + &task_setup_data->rpc_client, msg, data); } -EXPORT_SYMBOL_GPL(nfs_initiate_write); static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, @@ -993,7 +951,7 @@ static int nfs_do_write(struct nfs_pgio_data *data, { struct inode *inode = data->header->inode; - return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); + return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0); } static int nfs_do_multiple_writes(struct list_head *head, @@ -1743,4 +1701,5 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, + .rw_initiate = nfs_initiate_write, }; -- cgit v1.2.2 From c3766276f26090f4459329839cdcc8506dfbced5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:38 -0400 Subject: NFS: Create a common multiple_pgios() function Once again, these two functions look identical in the read and write case. Time to combine them together! Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e46a1fc6c1fe..d3fa181053ad 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -945,35 +945,6 @@ static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *m &task_setup_data->rpc_client, msg, data); } -static int nfs_do_write(struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how) -{ - struct inode *inode = data->header->inode; - - return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0); -} - -static int nfs_do_multiple_writes(struct list_head *head, - const struct rpc_call_ops *call_ops, - int how) -{ - struct nfs_pgio_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - ret2 = nfs_do_write(data, call_ops, how); - if (ret == 0) - ret = ret2; - } - return ret; -} - /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -1018,7 +989,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_writes(&hdr->rpc_list, + ret = nfs_do_multiple_pgios(&hdr->rpc_list, desc->pg_rpc_callops, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) -- cgit v1.2.2 From cf485fcd68bc2dd91258e844ba4649404fff3235 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:39 -0400 Subject: NFS: Create a common generic_pg_pgios() What we have here is two functions that look identical. Let's share some more code! Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d3fa181053ad..31a8b29e4026 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -973,33 +973,9 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) -{ - struct nfs_rw_header *whdr; - struct nfs_pgio_header *hdr; - int ret; - - whdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!whdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); - return -ENOMEM; - } - hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); - atomic_inc(&hdr->refcnt); - ret = nfs_generic_pgio(desc, hdr); - if (ret == 0) - ret = nfs_do_multiple_pgios(&hdr->rpc_list, - desc->pg_rpc_callops, - desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - return ret; -} - static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = nfs_generic_pg_pgios, }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, -- cgit v1.2.2 From 41d8d5b7a559a9bfbf9680d1e4777e1a7b0149d5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:40 -0400 Subject: NFS: Create a common nfs_pageio_ops struct At this point the read and write structures look identical, so combine them into something shared by both. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 31a8b29e4026..2680f29f8a51 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -973,17 +973,12 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static const struct nfs_pageio_ops nfs_pageio_write_ops = { - .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_pgios, -}; - void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); - const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops; + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; #ifdef CONFIG_NFS_V4_1 if (server->pnfs_curr_ld && !force_mds) @@ -996,7 +991,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { - pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -- cgit v1.2.2 From 8c8f1ac109726e4ed44a920f5c962c84610d4a17 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:42 -0400 Subject: nfs: remove unused arg from nfs_create_request @inode is passed but not used. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2680f29f8a51..e773df207c05 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -761,7 +761,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, inode, page, offset, bytes); + req = nfs_create_request(ctx, page, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); -- cgit v1.2.2 From 2bfc6e566daa8386c9cffef2f7de17fc330d3835 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:45 -0400 Subject: nfs: add support for multiple nfs reqs per page Add "page groups" - a circular list of nfs requests (struct nfs_page) that all reference the same page. This gives nfs read and write paths the ability to account for sub-page regions independently. This somewhat follows the design of struct buffer_head's sub-page accounting. Only "head" requests are ever added/removed from the inode list in the buffered write path. "head" and "sub" requests are treated the same through the read path and the rest of the write/commit path. Requests are given an extra reference across the life of the list. Page groups are never rejoined after being split. If the read/write request fails and the client falls back to another path (ie revert to MDS in PNFS case), the already split requests are pushed through the recoalescing code again, which may split them further and then coalesce them into properly sized requests on the wire. Fragmentation shouldn't be a problem with the current design, because we flush all requests in page group when a non-contiguous request is added, so the only time resplitting should occur is on a resend of a read or write. This patch lays the groundwork for sub-page splitting, but does not actually do any splitting. For now all page groups have one request as pg_test functions don't yet split pages. There are several related patches that are needed support multiple requests per page group. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e773df207c05..d0f30f12a8b3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); + WARN_ON_ONCE(req->wb_this_page != req); + /* Lock the request! */ nfs_lock_request(req); @@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); } nfsi->npages++; + set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); } @@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; unsigned long bytes = 0; + bool do_destroy; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; @@ -596,6 +600,7 @@ remove_req: next: nfs_unlock_request(req); nfs_end_page_writeback(req->wb_page); + do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } out: @@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, if (req == NULL) goto out_unlock; + /* should be handled by nfs_flush_incompatible */ + WARN_ON_ONCE(req->wb_head != req); + WARN_ON_ONCE(req->wb_this_page != req); + rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if @@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, page, offset, bytes); + req = nfs_create_request(ctx, page, NULL, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); @@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || req->wb_context != ctx; + /* for now, flush if more than 1 request in page_group */ + do_flush |= req->wb_this_page != req; if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; -- cgit v1.2.2 From 20633f042fd0907300069714b98aaf607a8b5bf8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:47 -0400 Subject: nfs: page group syncing in write path Operations that modify state for a whole page must be syncronized across all requests within a page group. In the write path, this is calling end_page_writeback and removing the head request from an inode. Both of these operations should not be called until all requests in a page group have reached the point where they would call them. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d0f30f12a8b3..5d752766139d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -201,12 +201,15 @@ static void nfs_set_page_writeback(struct page *page) } } -static void nfs_end_page_writeback(struct page *page) +static void nfs_end_page_writeback(struct nfs_page *req) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = page_file_mapping(req->wb_page)->host; struct nfs_server *nfss = NFS_SERVER(inode); - end_page_writeback(page); + if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) + return; + + end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -397,15 +400,20 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *head; - spin_lock(&inode->i_lock); - if (likely(!PageSwapCache(req->wb_page))) { - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + head = req->wb_head; + + spin_lock(&inode->i_lock); + if (likely(!PageSwapCache(head->wb_page))) { + set_page_private(head->wb_page, 0); + ClearPagePrivate(head->wb_page); + clear_bit(PG_MAPPED, &head->wb_flags); + } + nfsi->npages--; + spin_unlock(&inode->i_lock); } - nfsi->npages--; - spin_unlock(&inode->i_lock); nfs_release_request(req); } @@ -599,7 +607,7 @@ remove_req: nfs_inode_remove_request(req); next: nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } @@ -964,7 +972,7 @@ static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); nfs_release_request(req); } -- cgit v1.2.2 From d72ddcbab60a70258d0cd5752db3f53824df78d6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:48 -0400 Subject: nfs: page group support in nfs_mark_uptodate Change how nfs_mark_uptodate checks to see if writes cover a whole page. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 7 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5d752766139d..17b98952f7bd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -154,18 +154,78 @@ static void nfs_set_pageerror(struct page *page) nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } +/* + * nfs_page_group_search_locked + * @head - head request of page group + * @page_offset - offset into page + * + * Search page group with head @head to find a request that contains the + * page offset @page_offset. + * + * Returns a pointer to the first matching nfs request, or NULL if no + * match is found. + * + * Must be called with the page group lock held + */ +static struct nfs_page * +nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) +{ + struct nfs_page *req; + + WARN_ON_ONCE(head != head->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); + + req = head; + do { + if (page_offset >= req->wb_pgbase && + page_offset < (req->wb_pgbase + req->wb_bytes)) + return req; + + req = req->wb_this_page; + } while (req != head); + + return NULL; +} + +/* + * nfs_page_group_covers_page + * @head - head request of page group + * + * Return true if the page group with head @head covers the whole page, + * returns false otherwise + */ +static bool nfs_page_group_covers_page(struct nfs_page *req) +{ + struct nfs_page *tmp; + unsigned int pos = 0; + unsigned int len = nfs_page_length(req->wb_page); + + nfs_page_group_lock(req); + + do { + tmp = nfs_page_group_search_locked(req->wb_head, pos); + if (tmp) { + /* no way this should happen */ + WARN_ON_ONCE(tmp->wb_pgbase != pos); + pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); + } + } while (tmp && pos < len); + + nfs_page_group_unlock(req); + WARN_ON_ONCE(pos > len); + return pos == len; +} + /* We can set the PG_uptodate flag if we see that a write request * covers the full page. */ -static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +static void nfs_mark_uptodate(struct nfs_page *req) { - if (PageUptodate(page)) - return; - if (base != 0) + if (PageUptodate(req->wb_page)) return; - if (count != nfs_page_length(page)) + if (!nfs_page_group_covers_page(req)) return; - SetPageUptodate(page); + SetPageUptodate(req->wb_page); } static int wb_priority(struct writeback_control *wbc) @@ -796,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, return PTR_ERR(req); /* Update file length */ nfs_grow_file(page, offset, count); - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_uptodate(req); nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); return 0; -- cgit v1.2.2