aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWeston Andros Adamson <dros@primarydata.com>2014-11-12 12:08:00 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-11-24 17:00:42 -0500
commitcb1410c71e0b6b2eba8c1890645a76ff86169d24 (patch)
tree22fb6770fed66df7d51676e25e5a525bdd352abe
parent6a74c0c9402b85647793da70edc9d6b097d54472 (diff)
NFS: fix subtle change in COMMIT behavior
Recent work in the pgio layer made it possible for there to be more than one request per page. This caused a subtle change in commit behavior, because write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for writeback against the number of requests on a commit list to choose when to send a COMMIT in a non-blocking flush. This is probably hard to hit in normal operation - you have to be using rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page aligned to have a noticeable change in behavior. Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/write.c17
-rw-r--r--include/linux/nfs_fs.h4
5 files changed, 27 insertions, 15 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 73466b934090..e36a9d78ea49 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
49 goto out_iput; 49 goto out_iput;
50 res->size = i_size_read(inode); 50 res->size = i_size_read(inode);
51 res->change_attr = delegation->change_attr; 51 res->change_attr = delegation->change_attr;
52 if (nfsi->npages != 0) 52 if (nfsi->nrequests != 0)
53 res->change_attr++; 53 res->change_attr++;
54 res->ctime = inode->i_ctime; 54 res->ctime = inode->i_ctime;
55 res->mtime = inode->i_mtime; 55 res->mtime = inode->i_mtime;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 00689a8a85e4..2b48ce58a584 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1149 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1149 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
1150 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 1150 && (fattr->valid & NFS_ATTR_FATTR_SIZE)
1151 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 1151 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
1152 && nfsi->npages == 0) { 1152 && nfsi->nrequests == 0) {
1153 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 1153 i_size_write(inode, nfs_size_to_loff_t(fattr->size));
1154 ret |= NFS_INO_INVALID_ATTR; 1154 ret |= NFS_INO_INVALID_ATTR;
1155 } 1155 }
@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1192 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1192 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
1193 cur_size = i_size_read(inode); 1193 cur_size = i_size_read(inode);
1194 new_isize = nfs_size_to_loff_t(fattr->size); 1194 new_isize = nfs_size_to_loff_t(fattr->size);
1195 if (cur_size != new_isize && nfsi->npages == 0) 1195 if (cur_size != new_isize && nfsi->nrequests == 0)
1196 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 1196 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1197 } 1197 }
1198 1198
@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1619 if (new_isize != cur_isize) { 1619 if (new_isize != cur_isize) {
1620 /* Do we perhaps have any outstanding writes, or has 1620 /* Do we perhaps have any outstanding writes, or has
1621 * the file grown beyond our last write? */ 1621 * the file grown beyond our last write? */
1622 if ((nfsi->npages == 0) || new_isize > cur_isize) { 1622 if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
1623 i_size_write(inode, new_isize); 1623 i_size_write(inode, new_isize);
1624 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1624 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1625 invalid &= ~NFS_INO_REVAL_PAGECACHE; 1625 invalid &= ~NFS_INO_REVAL_PAGECACHE;
@@ -1784,7 +1784,7 @@ static void init_once(void *foo)
1784 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1784 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1785 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1785 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1786 INIT_LIST_HEAD(&nfsi->commit_info.list); 1786 INIT_LIST_HEAD(&nfsi->commit_info.list);
1787 nfsi->npages = 0; 1787 nfsi->nrequests = 0;
1788 nfsi->commit_info.ncommit = 0; 1788 nfsi->commit_info.ncommit = 0;
1789 atomic_set(&nfsi->commit_info.rpcs_out, 0); 1789 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1790 atomic_set(&nfsi->silly_count, 1); 1790 atomic_set(&nfsi->silly_count, 1);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ed0db61f8543..2b5e769beb16 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
258static inline void 258static inline void
259nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) 259nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
260{ 260{
261 struct inode *inode;
261 WARN_ON_ONCE(prev == req); 262 WARN_ON_ONCE(prev == req);
262 263
263 if (!prev) { 264 if (!prev) {
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
276 * nfs_page_group_destroy is called */ 277 * nfs_page_group_destroy is called */
277 kref_get(&req->wb_head->wb_kref); 278 kref_get(&req->wb_head->wb_kref);
278 279
279 /* grab extra ref if head request has extra ref from 280 /* grab extra ref and bump the request count if head request
280 * the write/commit path to handle handoff between write 281 * has extra ref from the write/commit path to handle handoff
281 * and commit lists */ 282 * between write and commit lists. */
282 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { 283 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
284 inode = page_file_mapping(req->wb_page)->host;
283 set_bit(PG_INODE_REF, &req->wb_flags); 285 set_bit(PG_INODE_REF, &req->wb_flags);
284 kref_get(&req->wb_kref); 286 kref_get(&req->wb_kref);
287 spin_lock(&inode->i_lock);
288 NFS_I(inode)->nrequests++;
289 spin_unlock(&inode->i_lock);
285 } 290 }
286 } 291 }
287} 292}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f83b02dc9166..d489ff3f438f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
670 nfs_lock_request(req); 670 nfs_lock_request(req);
671 671
672 spin_lock(&inode->i_lock); 672 spin_lock(&inode->i_lock);
673 if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) 673 if (!nfsi->nrequests &&
674 NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
674 inode->i_version++; 675 inode->i_version++;
675 /* 676 /*
676 * Swap-space should not get truncated. Hence no need to plug the race 677 * Swap-space should not get truncated. Hence no need to plug the race
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
681 SetPagePrivate(req->wb_page); 682 SetPagePrivate(req->wb_page);
682 set_page_private(req->wb_page, (unsigned long)req); 683 set_page_private(req->wb_page, (unsigned long)req);
683 } 684 }
684 nfsi->npages++; 685 nfsi->nrequests++;
685 /* this a head request for a page group - mark it as having an 686 /* this a head request for a page group - mark it as having an
686 * extra reference so sub groups can follow suit */ 687 * extra reference so sub groups can follow suit.
688 * This flag also informs pgio layer when to bump nrequests when
689 * adding subrequests. */
687 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); 690 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
688 kref_get(&req->wb_kref); 691 kref_get(&req->wb_kref);
689 spin_unlock(&inode->i_lock); 692 spin_unlock(&inode->i_lock);
@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
709 wake_up_page(head->wb_page, PG_private); 712 wake_up_page(head->wb_page, PG_private);
710 clear_bit(PG_MAPPED, &head->wb_flags); 713 clear_bit(PG_MAPPED, &head->wb_flags);
711 } 714 }
712 nfsi->npages--; 715 nfsi->nrequests--;
716 spin_unlock(&inode->i_lock);
717 } else {
718 spin_lock(&inode->i_lock);
719 nfsi->nrequests--;
713 spin_unlock(&inode->i_lock); 720 spin_unlock(&inode->i_lock);
714 } 721 }
715 722
@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1735 /* Don't commit yet if this is a non-blocking flush and there 1742 /* Don't commit yet if this is a non-blocking flush and there
1736 * are a lot of outstanding writes for this mapping. 1743 * are a lot of outstanding writes for this mapping.
1737 */ 1744 */
1738 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) 1745 if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
1739 goto out_mark_dirty; 1746 goto out_mark_dirty;
1740 1747
1741 /* don't wait for the COMMIT response */ 1748 /* don't wait for the COMMIT response */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c72d1ad41ad4..6d627b92df53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -163,7 +163,7 @@ struct nfs_inode {
163 */ 163 */
164 __be32 cookieverf[2]; 164 __be32 cookieverf[2];
165 165
166 unsigned long npages; 166 unsigned long nrequests;
167 struct nfs_mds_commit_info commit_info; 167 struct nfs_mds_commit_info commit_info;
168 168
169 /* Open contexts for shared mmap writes */ 169 /* Open contexts for shared mmap writes */
@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data);
520static inline int 520static inline int
521nfs_have_writebacks(struct inode *inode) 521nfs_have_writebacks(struct inode *inode)
522{ 522{
523 return NFS_I(inode)->npages != 0; 523 return NFS_I(inode)->nrequests != 0;
524} 524}
525 525
526/* 526/*