aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2015-01-08 17:32:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-08 18:10:51 -0500
commit2d6d7f98284648c5ed113fe22a132148950b140f (patch)
tree71cb2c508fa8eca681139ed84e123bd445ed48e3 /mm
parent7a3ef208e662f4b63d43a23f61a64a129c525bbc (diff)
mm: protect set_page_dirty() from ongoing truncation
Tejun, while reviewing the code, spotted the following race condition between the dirtying and truncation of a page: __set_page_dirty_nobuffers() __delete_from_page_cache() if (TestSetPageDirty(page)) page->mapping = NULL if (PageDirty()) dec_zone_page_state(page, NR_FILE_DIRTY); dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); if (page->mapping) account_page_dirtied(page) __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); which results in an imbalance of NR_FILE_DIRTY and BDI_RECLAIMABLE. Dirtiers usually lock out truncation, either by holding the page lock directly, or in case of zap_pte_range(), by pinning the mapcount with the page table lock held. The notable exception to this rule, though, is do_wp_page(), for which this race exists. However, do_wp_page() already waits for a locked page to unlock before setting the dirty bit, in order to prevent a race where clear_page_dirty() misses the page bit in the presence of dirty ptes. Upgrade that wait to a fully locked set_page_dirty() to also cover the situation explained above. Afterwards, the code in set_page_dirty() dealing with a truncation race is no longer needed. Remove it. Reported-by: Tejun Heo <tj@kernel.org> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memory.c27
-rw-r--r--mm/page-writeback.c43
2 files changed, 29 insertions, 41 deletions
diff --git a/mm/memory.c b/mm/memory.c
index d7e497e98f46..c6565f00fb38 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2137,17 +2137,24 @@ reuse:
2137 if (!dirty_page) 2137 if (!dirty_page)
2138 return ret; 2138 return ret;
2139 2139
2140 /*
2141 * Yes, Virginia, this is actually required to prevent a race
2142 * with clear_page_dirty_for_io() from clearing the page dirty
2143 * bit after it clear all dirty ptes, but before a racing
2144 * do_wp_page installs a dirty pte.
2145 *
2146 * do_shared_fault is protected similarly.
2147 */
2148 if (!page_mkwrite) { 2140 if (!page_mkwrite) {
2149 wait_on_page_locked(dirty_page); 2141 struct address_space *mapping;
2150 set_page_dirty_balance(dirty_page); 2142 int dirtied;
2143
2144 lock_page(dirty_page);
2145 dirtied = set_page_dirty(dirty_page);
2146 VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
2147 mapping = dirty_page->mapping;
2148 unlock_page(dirty_page);
2149
2150 if (dirtied && mapping) {
2151 /*
2152 * Some device drivers do not set page.mapping
2153 * but still dirty their pages
2154 */
2155 balance_dirty_pages_ratelimited(mapping);
2156 }
2157
2151 /* file_update_time outside page_lock */ 2158 /* file_update_time outside page_lock */
2152 if (vma->vm_file) 2159 if (vma->vm_file)
2153 file_update_time(vma->vm_file); 2160 file_update_time(vma->vm_file);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d5d81f5384d1..6f4335238e33 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1541,16 +1541,6 @@ pause:
1541 bdi_start_background_writeback(bdi); 1541 bdi_start_background_writeback(bdi);
1542} 1542}
1543 1543
1544void set_page_dirty_balance(struct page *page)
1545{
1546 if (set_page_dirty(page)) {
1547 struct address_space *mapping = page_mapping(page);
1548
1549 if (mapping)
1550 balance_dirty_pages_ratelimited(mapping);
1551 }
1552}
1553
1554static DEFINE_PER_CPU(int, bdp_ratelimits); 1544static DEFINE_PER_CPU(int, bdp_ratelimits);
1555 1545
1556/* 1546/*
@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied);
2123 * page dirty in that case, but not all the buffers. This is a "bottom-up" 2113 * page dirty in that case, but not all the buffers. This is a "bottom-up"
2124 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. 2114 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
2125 * 2115 *
2126 * Most callers have locked the page, which pins the address_space in memory. 2116 * The caller must ensure this doesn't race with truncation. Most will simply
2127 * But zap_pte_range() does not lock the page, however in that case the 2117 * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
2128 * mapping is pinned by the vma's ->vm_file reference. 2118 * the pte lock held, which also locks out truncation.
2129 *
2130 * We take care to handle the case where the page was truncated from the
2131 * mapping by re-checking page_mapping() inside tree_lock.
2132 */ 2119 */
2133int __set_page_dirty_nobuffers(struct page *page) 2120int __set_page_dirty_nobuffers(struct page *page)
2134{ 2121{
2135 if (!TestSetPageDirty(page)) { 2122 if (!TestSetPageDirty(page)) {
2136 struct address_space *mapping = page_mapping(page); 2123 struct address_space *mapping = page_mapping(page);
2137 struct address_space *mapping2;
2138 unsigned long flags; 2124 unsigned long flags;
2139 2125
2140 if (!mapping) 2126 if (!mapping)
2141 return 1; 2127 return 1;
2142 2128
2143 spin_lock_irqsave(&mapping->tree_lock, flags); 2129 spin_lock_irqsave(&mapping->tree_lock, flags);
2144 mapping2 = page_mapping(page); 2130 BUG_ON(page_mapping(page) != mapping);
2145 if (mapping2) { /* Race with truncate? */ 2131 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2146 BUG_ON(mapping2 != mapping); 2132 account_page_dirtied(page, mapping);
2147 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); 2133 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2148 account_page_dirtied(page, mapping); 2134 PAGECACHE_TAG_DIRTY);
2149 radix_tree_tag_set(&mapping->page_tree,
2150 page_index(page), PAGECACHE_TAG_DIRTY);
2151 }
2152 spin_unlock_irqrestore(&mapping->tree_lock, flags); 2135 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2153 if (mapping->host) { 2136 if (mapping->host) {
2154 /* !PageAnon && !swapper_space */ 2137 /* !PageAnon && !swapper_space */
@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page)
2305 /* 2288 /*
2306 * We carefully synchronise fault handlers against 2289 * We carefully synchronise fault handlers against
2307 * installing a dirty pte and marking the page dirty 2290 * installing a dirty pte and marking the page dirty
2308 * at this point. We do this by having them hold the 2291 * at this point. We do this by having them hold the
2309 * page lock at some point after installing their 2292 * page lock while dirtying the page, and pages are
2310 * pte, but before marking the page dirty. 2293 * always locked coming in here, so we get the desired
2311 * Pages are always locked coming in here, so we get 2294 * exclusion.
2312 * the desired exclusion. See mm/memory.c:do_wp_page()
2313 * for more comments.
2314 */ 2295 */
2315 if (TestClearPageDirty(page)) { 2296 if (TestClearPageDirty(page)) {
2316 dec_zone_page_state(page, NR_FILE_DIRTY); 2297 dec_zone_page_state(page, NR_FILE_DIRTY);