diff options
author | Konstantin Khlebnikov <khlebnikov@yandex-team.ru> | 2015-04-14 18:45:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 19:49:01 -0400 |
commit | b9ea25152e56365ce149b9a39637cd7a16eec556 (patch) | |
tree | e8ab83d71a0bea5330d38e77e948170c0054d2a3 | |
parent | ca0984caa8235762dc4e22c1c47ae6719dcc4064 (diff) |
page_writeback: clean up mess around cancel_dirty_page()
This patch replaces cancel_dirty_page() with a helper function
account_page_cleaned() which only updates counters. It's called from
truncate_complete_page() and from try_to_free_buffers() (hack for ext3).
Page is locked in both cases, page-lock protects against concurrent
dirtiers: see commit 2d6d7f982846 ("mm: protect set_page_dirty() from
ongoing truncation").
Delete_from_page_cache() shouldn't be called for dirty pages, they must
be handled by caller (either written or truncated). This patch treats
final dirty accounting fixup at the end of __delete_from_page_cache() as
a debug check and adds WARN_ON_ONCE() around it. If something removes
dirty pages without proper handling that might be a bug and unwritten
data might be lost.
Hugetlbfs has no dirty pages accounting, ClearPageDirty() is enough
here.
cancel_dirty_page() in nfs_wb_page_cancel() is redundant. This is
helper for nfs_invalidate_page() and it's called only in case complete
invalidation.
The mess was started in v2.6.20 after commits 46d2277c796f ("Clean up
and make try_to_free_buffers() not race with dirty pages") and
3e67c0987d75 ("truncate: clear page dirtiness before running
try_to_free_buffers()") first was reverted right in v2.6.20 in commit
ecdfc9787fe5 ("Resurrect 'try_to_free_buffers()' VM hackery"), second in
v2.6.25 commit a2b345642f53 ("Fix dirty page accounting leak with ext3
data=journal").
Custom fixes were introduced between these points. NFS in v2.6.23, commit
1b3b4a1a2deb ("NFS: Fix a write request leak in nfs_invalidate_page()").
Kludge in __delete_from_page_cache() in v2.6.24, commit 3a6927906f1b ("Do
dirty page accounting when removing a page from the page cache"). Since
v2.6.25 all of them are redundant.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h | 4 | ||||
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 2 | ||||
-rw-r--r-- | fs/nfs/write.c | 5 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/page-flags.h | 2 | ||||
-rw-r--r-- | mm/filemap.c | 15 | ||||
-rw-r--r-- | mm/page-writeback.c | 19 | ||||
-rw-r--r-- | mm/truncate.c | 37 |
9 files changed, 41 insertions, 49 deletions
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h index a260e99a4447..d72605864b0a 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h | |||
@@ -55,7 +55,9 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
55 | if (PagePrivate(page)) | 55 | if (PagePrivate(page)) |
56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | 56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
57 | 57 | ||
58 | cancel_dirty_page(page, PAGE_SIZE); | 58 | if (TestClearPageDirty(page)) |
59 | account_page_cleaned(page, mapping); | ||
60 | |||
59 | ClearPageMappedToDisk(page); | 61 | ClearPageMappedToDisk(page); |
60 | ll_delete_from_page_cache(page); | 62 | ll_delete_from_page_cache(page); |
61 | } | 63 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) | |||
3243 | * to synchronise against __set_page_dirty_buffers and prevent the | 3243 | * to synchronise against __set_page_dirty_buffers and prevent the |
3244 | * dirty bit from being lost. | 3244 | * dirty bit from being lost. |
3245 | */ | 3245 | */ |
3246 | if (ret) | 3246 | if (ret && TestClearPageDirty(page)) |
3247 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 3247 | account_page_cleaned(page, mapping); |
3248 | spin_unlock(&mapping->private_lock); | 3248 | spin_unlock(&mapping->private_lock); |
3249 | out: | 3249 | out: |
3250 | if (buffers_to_free) { | 3250 | if (buffers_to_free) { |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, | |||
319 | 319 | ||
320 | static void truncate_huge_page(struct page *page) | 320 | static void truncate_huge_page(struct page *page) |
321 | { | 321 | { |
322 | cancel_dirty_page(page, /* No IO accounting for huge pages? */0); | 322 | ClearPageDirty(page); |
323 | ClearPageUptodate(page); | 323 | ClearPageUptodate(page); |
324 | delete_from_page_cache(page); | 324 | delete_from_page_cache(page); |
325 | } | 325 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1876 | * request from the inode / page_private pointer and | 1876 | * request from the inode / page_private pointer and |
1877 | * release it */ | 1877 | * release it */ |
1878 | nfs_inode_remove_request(req); | 1878 | nfs_inode_remove_request(req); |
1879 | /* | ||
1880 | * In case nfs_inode_remove_request has marked the | ||
1881 | * page as being dirty | ||
1882 | */ | ||
1883 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
1884 | nfs_unlock_and_release_request(req); | 1879 | nfs_unlock_and_release_request(req); |
1885 | } | 1880 | } |
1886 | 1881 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index cccbbba12b9d..6571dd78e984 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1294,9 +1294,11 @@ int __set_page_dirty_no_writeback(struct page *page); | |||
1294 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1294 | int redirty_page_for_writepage(struct writeback_control *wbc, |
1295 | struct page *page); | 1295 | struct page *page); |
1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); | 1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
1297 | void account_page_cleaned(struct page *page, struct address_space *mapping); | ||
1297 | int set_page_dirty(struct page *page); | 1298 | int set_page_dirty(struct page *page); |
1298 | int set_page_dirty_lock(struct page *page); | 1299 | int set_page_dirty_lock(struct page *page); |
1299 | int clear_page_dirty_for_io(struct page *page); | 1300 | int clear_page_dirty_for_io(struct page *page); |
1301 | |||
1300 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); | 1302 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); |
1301 | 1303 | ||
1302 | /* Is the vma a continuation of the stack vma above it? */ | 1304 | /* Is the vma a continuation of the stack vma above it? */ |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5ed7bdaf22d5..c851ff92d5b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -328,8 +328,6 @@ static inline void SetPageUptodate(struct page *page) | |||
328 | 328 | ||
329 | CLEARPAGEFLAG(Uptodate, uptodate) | 329 | CLEARPAGEFLAG(Uptodate, uptodate) |
330 | 330 | ||
331 | extern void cancel_dirty_page(struct page *page, unsigned int account_size); | ||
332 | |||
333 | int test_clear_page_writeback(struct page *page); | 331 | int test_clear_page_writeback(struct page *page); |
334 | int __test_set_page_writeback(struct page *page, bool keep_write); | 332 | int __test_set_page_writeback(struct page *page, bool keep_write); |
335 | 333 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index ad7242043bdb..434dba317400 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -203,16 +203,15 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
203 | BUG_ON(page_mapped(page)); | 203 | BUG_ON(page_mapped(page)); |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * Some filesystems seem to re-dirty the page even after | 206 | * At this point page must be either written or cleaned by truncate. |
207 | * the VM has canceled the dirty bit (eg ext3 journaling). | 207 | * Dirty page here signals a bug and loss of unwritten data. |
208 | * | 208 | * |
209 | * Fix it up by doing a final dirty accounting check after | 209 | * This fixes dirty accounting after removing the page entirely but |
210 | * having removed the page entirely. | 210 | * leaves PageDirty set: it has no effect for truncated page and |
211 | * anyway will be cleared before returning page into buddy allocator. | ||
211 | */ | 212 | */ |
212 | if (PageDirty(page) && mapping_cap_account_dirty(mapping)) { | 213 | if (WARN_ON_ONCE(PageDirty(page))) |
213 | dec_zone_page_state(page, NR_FILE_DIRTY); | 214 | account_page_cleaned(page, mapping); |
214 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
215 | } | ||
216 | } | 215 | } |
217 | 216 | ||
218 | /** | 217 | /** |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 644bcb665773..0372411f38fc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2111,6 +2111,25 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
2111 | EXPORT_SYMBOL(account_page_dirtied); | 2111 | EXPORT_SYMBOL(account_page_dirtied); |
2112 | 2112 | ||
2113 | /* | 2113 | /* |
2114 | * Helper function for deaccounting dirty page without writeback. | ||
2115 | * | ||
2116 | * Doing this should *normally* only ever be done when a page | ||
2117 | * is truncated, and is not actually mapped anywhere at all. However, | ||
2118 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
2119 | * out all the buffers on a page without actually doing it through | ||
2120 | * the VM. Can you say "ext3 is horribly ugly"? Thought you could. | ||
2121 | */ | ||
2122 | void account_page_cleaned(struct page *page, struct address_space *mapping) | ||
2123 | { | ||
2124 | if (mapping_cap_account_dirty(mapping)) { | ||
2125 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
2126 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
2127 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); | ||
2128 | } | ||
2129 | } | ||
2130 | EXPORT_SYMBOL(account_page_cleaned); | ||
2131 | |||
2132 | /* | ||
2114 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 2133 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
2115 | * its radix tree. | 2134 | * its radix tree. |
2116 | * | 2135 | * |
diff --git a/mm/truncate.c b/mm/truncate.c index ddec5a5966d7..7a9d8a3cb143 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -93,35 +93,6 @@ void do_invalidatepage(struct page *page, unsigned int offset, | |||
93 | } | 93 | } |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * This cancels just the dirty bit on the kernel page itself, it | ||
97 | * does NOT actually remove dirty bits on any mmap's that may be | ||
98 | * around. It also leaves the page tagged dirty, so any sync | ||
99 | * activity will still find it on the dirty lists, and in particular, | ||
100 | * clear_page_dirty_for_io() will still look at the dirty bits in | ||
101 | * the VM. | ||
102 | * | ||
103 | * Doing this should *normally* only ever be done when a page | ||
104 | * is truncated, and is not actually mapped anywhere at all. However, | ||
105 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
106 | * out all the buffers on a page without actually doing it through | ||
107 | * the VM. Can you say "ext3 is horribly ugly"? Tought you could. | ||
108 | */ | ||
109 | void cancel_dirty_page(struct page *page, unsigned int account_size) | ||
110 | { | ||
111 | if (TestClearPageDirty(page)) { | ||
112 | struct address_space *mapping = page->mapping; | ||
113 | if (mapping && mapping_cap_account_dirty(mapping)) { | ||
114 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
115 | dec_bdi_stat(inode_to_bdi(mapping->host), | ||
116 | BDI_RECLAIMABLE); | ||
117 | if (account_size) | ||
118 | task_io_account_cancelled_write(account_size); | ||
119 | } | ||
120 | } | ||
121 | } | ||
122 | EXPORT_SYMBOL(cancel_dirty_page); | ||
123 | |||
124 | /* | ||
125 | * If truncate cannot remove the fs-private metadata from the page, the page | 96 | * If truncate cannot remove the fs-private metadata from the page, the page |
126 | * becomes orphaned. It will be left on the LRU and may even be mapped into | 97 | * becomes orphaned. It will be left on the LRU and may even be mapped into |
127 | * user pagetables if we're racing with filemap_fault(). | 98 | * user pagetables if we're racing with filemap_fault(). |
@@ -140,7 +111,13 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
140 | if (page_has_private(page)) | 111 | if (page_has_private(page)) |
141 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); | 112 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
142 | 113 | ||
143 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 114 | /* |
115 | * Some filesystems seem to re-dirty the page even after | ||
116 | * the VM has canceled the dirty bit (eg ext3 journaling). | ||
117 | * Hence dirty accounting check is placed after invalidation. | ||
118 | */ | ||
119 | if (TestClearPageDirty(page)) | ||
120 | account_page_cleaned(page, mapping); | ||
144 | 121 | ||
145 | ClearPageMappedToDisk(page); | 122 | ClearPageMappedToDisk(page); |
146 | delete_from_page_cache(page); | 123 | delete_from_page_cache(page); |