diff options
author | Weston Andros Adamson <dros@primarydata.com> | 2014-11-12 12:08:00 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-11-24 17:00:42 -0500 |
commit | cb1410c71e0b6b2eba8c1890645a76ff86169d24 (patch) | |
tree | 22fb6770fed66df7d51676e25e5a525bdd352abe | |
parent | 6a74c0c9402b85647793da70edc9d6b097d54472 (diff) |
NFS: fix subtle change in COMMIT behavior
Recent work in the pgio layer made it possible for there to be more than one
request per page. This caused a subtle change in commit behavior, because
write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for
writeback against the number of requests on a commit list to choose when to
send a COMMIT in a non-blocking flush.
This is probably hard to hit in normal operation - you have to be using
rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page
aligned to have a noticeable change in behavior.
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r-- | fs/nfs/callback_proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/inode.c | 8 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 11 | ||||
-rw-r--r-- | fs/nfs/write.c | 17 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 4 |
5 files changed, 27 insertions, 15 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b934090..e36a9d78ea49 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, | |||
49 | goto out_iput; | 49 | goto out_iput; |
50 | res->size = i_size_read(inode); | 50 | res->size = i_size_read(inode); |
51 | res->change_attr = delegation->change_attr; | 51 | res->change_attr = delegation->change_attr; |
52 | if (nfsi->npages != 0) | 52 | if (nfsi->nrequests != 0) |
53 | res->change_attr++; | 53 | res->change_attr++; |
54 | res->ctime = inode->i_ctime; | 54 | res->ctime = inode->i_ctime; |
55 | res->mtime = inode->i_mtime; | 55 | res->mtime = inode->i_mtime; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00689a8a85e4..2b48ce58a584 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr | |||
1149 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) | 1149 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) |
1150 | && (fattr->valid & NFS_ATTR_FATTR_SIZE) | 1150 | && (fattr->valid & NFS_ATTR_FATTR_SIZE) |
1151 | && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) | 1151 | && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) |
1152 | && nfsi->npages == 0) { | 1152 | && nfsi->nrequests == 0) { |
1153 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); | 1153 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); |
1154 | ret |= NFS_INO_INVALID_ATTR; | 1154 | ret |= NFS_INO_INVALID_ATTR; |
1155 | } | 1155 | } |
@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
1192 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { | 1192 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) { |
1193 | cur_size = i_size_read(inode); | 1193 | cur_size = i_size_read(inode); |
1194 | new_isize = nfs_size_to_loff_t(fattr->size); | 1194 | new_isize = nfs_size_to_loff_t(fattr->size); |
1195 | if (cur_size != new_isize && nfsi->npages == 0) | 1195 | if (cur_size != new_isize && nfsi->nrequests == 0) |
1196 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1196 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
1197 | } | 1197 | } |
1198 | 1198 | ||
@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1619 | if (new_isize != cur_isize) { | 1619 | if (new_isize != cur_isize) { |
1620 | /* Do we perhaps have any outstanding writes, or has | 1620 | /* Do we perhaps have any outstanding writes, or has |
1621 | * the file grown beyond our last write? */ | 1621 | * the file grown beyond our last write? */ |
1622 | if ((nfsi->npages == 0) || new_isize > cur_isize) { | 1622 | if ((nfsi->nrequests == 0) || new_isize > cur_isize) { |
1623 | i_size_write(inode, new_isize); | 1623 | i_size_write(inode, new_isize); |
1624 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1624 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1625 | invalid &= ~NFS_INO_REVAL_PAGECACHE; | 1625 | invalid &= ~NFS_INO_REVAL_PAGECACHE; |
@@ -1784,7 +1784,7 @@ static void init_once(void *foo) | |||
1784 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); | 1784 | INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); |
1785 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); | 1785 | INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); |
1786 | INIT_LIST_HEAD(&nfsi->commit_info.list); | 1786 | INIT_LIST_HEAD(&nfsi->commit_info.list); |
1787 | nfsi->npages = 0; | 1787 | nfsi->nrequests = 0; |
1788 | nfsi->commit_info.ncommit = 0; | 1788 | nfsi->commit_info.ncommit = 0; |
1789 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | 1789 | atomic_set(&nfsi->commit_info.rpcs_out, 0); |
1790 | atomic_set(&nfsi->silly_count, 1); | 1790 | atomic_set(&nfsi->silly_count, 1); |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f8543..2b5e769beb16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) | |||
258 | static inline void | 258 | static inline void |
259 | nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) | 259 | nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) |
260 | { | 260 | { |
261 | struct inode *inode; | ||
261 | WARN_ON_ONCE(prev == req); | 262 | WARN_ON_ONCE(prev == req); |
262 | 263 | ||
263 | if (!prev) { | 264 | if (!prev) { |
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) | |||
276 | * nfs_page_group_destroy is called */ | 277 | * nfs_page_group_destroy is called */ |
277 | kref_get(&req->wb_head->wb_kref); | 278 | kref_get(&req->wb_head->wb_kref); |
278 | 279 | ||
279 | /* grab extra ref if head request has extra ref from | 280 | /* grab extra ref and bump the request count if head request |
280 | * the write/commit path to handle handoff between write | 281 | * has extra ref from the write/commit path to handle handoff |
281 | * and commit lists */ | 282 | * between write and commit lists. */ |
282 | if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { | 283 | if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { |
284 | inode = page_file_mapping(req->wb_page)->host; | ||
283 | set_bit(PG_INODE_REF, &req->wb_flags); | 285 | set_bit(PG_INODE_REF, &req->wb_flags); |
284 | kref_get(&req->wb_kref); | 286 | kref_get(&req->wb_kref); |
287 | spin_lock(&inode->i_lock); | ||
288 | NFS_I(inode)->nrequests++; | ||
289 | spin_unlock(&inode->i_lock); | ||
285 | } | 290 | } |
286 | } | 291 | } |
287 | } | 292 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f83b02dc9166..d489ff3f438f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
670 | nfs_lock_request(req); | 670 | nfs_lock_request(req); |
671 | 671 | ||
672 | spin_lock(&inode->i_lock); | 672 | spin_lock(&inode->i_lock); |
673 | if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) | 673 | if (!nfsi->nrequests && |
674 | NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) | ||
674 | inode->i_version++; | 675 | inode->i_version++; |
675 | /* | 676 | /* |
676 | * Swap-space should not get truncated. Hence no need to plug the race | 677 | * Swap-space should not get truncated. Hence no need to plug the race |
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
681 | SetPagePrivate(req->wb_page); | 682 | SetPagePrivate(req->wb_page); |
682 | set_page_private(req->wb_page, (unsigned long)req); | 683 | set_page_private(req->wb_page, (unsigned long)req); |
683 | } | 684 | } |
684 | nfsi->npages++; | 685 | nfsi->nrequests++; |
685 | /* this a head request for a page group - mark it as having an | 686 | /* this a head request for a page group - mark it as having an |
686 | * extra reference so sub groups can follow suit */ | 687 | * extra reference so sub groups can follow suit. |
688 | * This flag also informs pgio layer when to bump nrequests when | ||
689 | * adding subrequests. */ | ||
687 | WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); | 690 | WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); |
688 | kref_get(&req->wb_kref); | 691 | kref_get(&req->wb_kref); |
689 | spin_unlock(&inode->i_lock); | 692 | spin_unlock(&inode->i_lock); |
@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
709 | wake_up_page(head->wb_page, PG_private); | 712 | wake_up_page(head->wb_page, PG_private); |
710 | clear_bit(PG_MAPPED, &head->wb_flags); | 713 | clear_bit(PG_MAPPED, &head->wb_flags); |
711 | } | 714 | } |
712 | nfsi->npages--; | 715 | nfsi->nrequests--; |
716 | spin_unlock(&inode->i_lock); | ||
717 | } else { | ||
718 | spin_lock(&inode->i_lock); | ||
719 | nfsi->nrequests--; | ||
713 | spin_unlock(&inode->i_lock); | 720 | spin_unlock(&inode->i_lock); |
714 | } | 721 | } |
715 | 722 | ||
@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1735 | /* Don't commit yet if this is a non-blocking flush and there | 1742 | /* Don't commit yet if this is a non-blocking flush and there |
1736 | * are a lot of outstanding writes for this mapping. | 1743 | * are a lot of outstanding writes for this mapping. |
1737 | */ | 1744 | */ |
1738 | if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) | 1745 | if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) |
1739 | goto out_mark_dirty; | 1746 | goto out_mark_dirty; |
1740 | 1747 | ||
1741 | /* don't wait for the COMMIT response */ | 1748 | /* don't wait for the COMMIT response */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c72d1ad41ad4..6d627b92df53 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -163,7 +163,7 @@ struct nfs_inode { | |||
163 | */ | 163 | */ |
164 | __be32 cookieverf[2]; | 164 | __be32 cookieverf[2]; |
165 | 165 | ||
166 | unsigned long npages; | 166 | unsigned long nrequests; |
167 | struct nfs_mds_commit_info commit_info; | 167 | struct nfs_mds_commit_info commit_info; |
168 | 168 | ||
169 | /* Open contexts for shared mmap writes */ | 169 | /* Open contexts for shared mmap writes */ |
@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); | |||
520 | static inline int | 520 | static inline int |
521 | nfs_have_writebacks(struct inode *inode) | 521 | nfs_have_writebacks(struct inode *inode) |
522 | { | 522 | { |
523 | return NFS_I(inode)->npages != 0; | 523 | return NFS_I(inode)->nrequests != 0; |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |