From d7365e783edb858279be1d03f61bc8d5d3383d90 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:48 -0700 Subject: mm: memcontrol: fix missed end-writeback page accounting Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed page migration to uncharge the old page right away. The page is locked, unmapped, truncated, and off the LRU, but it could race with writeback ending, which then doesn't unaccount the page properly: test_clear_page_writeback() migration wait_on_page_writeback() TestClearPageWriteback() mem_cgroup_migrate() clear PCG_USED mem_cgroup_update_page_stat() if (PageCgroupUsed(pc)) decrease memcg pages under writeback release pc->mem_cgroup->move_lock The per-page statistics interface is heavily optimized to avoid a function call and a lookup_page_cgroup() in the file unmap fast path, which means it doesn't verify whether a page is still charged before clearing PageWriteback() and it has to do it in the stat update later. Rework it so that it looks up the page's memcg once at the beginning of the transaction and then uses it throughout. The charge will be verified before clearing PageWriteback() and migration can't uncharge the page as long as that is still set. The RCU lock will protect the memcg past uncharge. As far as losing the optimization goes, the following test results are from a microbenchmark that maps, faults, and unmaps a 4GB sparse file three times in a nested fashion, so that there are two negative passes that don't account but still go through the new transaction overhead. There is no actual difference: old: 33.195102545 seconds time elapsed ( +- 0.01% ) new: 33.199231369 seconds time elapsed ( +- 0.03% ) The time spent in page_remove_rmap()'s callees still adds up to the same, but the time spent in the function itself seems reduced: # Children Self Command Shared Object Symbol old: 0.12% 0.11% filemapstress [kernel.kallsyms] [k] page_remove_rmap new: 0.12% 0.08% filemapstress [kernel.kallsyms] [k] page_remove_rmap Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [3.17.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 116a5053415b..f574046f77d4 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1042,15 +1042,16 @@ void page_add_new_anon_rmap(struct page *page, */ void page_add_file_rmap(struct page *page) { - bool locked; + struct mem_cgroup *memcg; unsigned long flags; + bool locked; - mem_cgroup_begin_update_page_stat(page, &locked, &flags); + memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); if (atomic_inc_and_test(&page->_mapcount)) { __inc_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); } - mem_cgroup_end_update_page_stat(page, &locked, &flags); + mem_cgroup_end_page_stat(memcg, locked, flags); } /** @@ -1061,9 +1062,10 @@ void page_add_file_rmap(struct page *page) */ void page_remove_rmap(struct page *page) { + struct mem_cgroup *uninitialized_var(memcg); bool anon = PageAnon(page); - bool locked; unsigned long flags; + bool locked; /* * The anon case has no mem_cgroup page_stat to update; but may @@ -1071,7 +1073,7 @@ void page_remove_rmap(struct page *page) * we hold the lock against page_stat move: so avoid it on anon. */ if (!anon) - mem_cgroup_begin_update_page_stat(page, &locked, &flags); + memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); /* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) @@ -1096,8 +1098,7 @@ void page_remove_rmap(struct page *page) -hpage_nr_pages(page)); } else { __dec_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); - mem_cgroup_end_update_page_stat(page, &locked, &flags); + mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); } if (unlikely(PageMlocked(page))) clear_page_mlock(page); @@ -1110,10 +1111,9 @@ void page_remove_rmap(struct page *page) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - return; out: if (!anon) - mem_cgroup_end_update_page_stat(page, &locked, &flags); + mem_cgroup_end_page_stat(memcg, locked, flags); } /* -- cgit v1.2.2 From 8186eb6a799e4e32f984b55858d8e393938be0c1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:51 -0700 Subject: mm: rmap: split out page_remove_file_rmap() page_remove_rmap() has too many branches on PageAnon() and is hard to follow. Move the file part into a separate function. Signed-off-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 78 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 32 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index f574046f77d4..19886fb2f13a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1054,6 +1054,36 @@ void page_add_file_rmap(struct page *page) mem_cgroup_end_page_stat(memcg, locked, flags); } +static void page_remove_file_rmap(struct page *page) +{ + struct mem_cgroup *memcg; + unsigned long flags; + bool locked; + + memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + + /* page still mapped by someone else? */ + if (!atomic_add_negative(-1, &page->_mapcount)) + goto out; + + /* Hugepages are not counted in NR_FILE_MAPPED for now. */ + if (unlikely(PageHuge(page))) + goto out; + + /* + * We use the irq-unsafe __{inc|mod}_zone_page_stat because + * these counters are not modified in interrupt context, and + * pte lock(a spinlock) is held, which implies preemption disabled. + */ + __dec_zone_page_state(page, NR_FILE_MAPPED); + mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); + + if (unlikely(PageMlocked(page))) + clear_page_mlock(page); +out: + mem_cgroup_end_page_stat(memcg, locked, flags); +} + /** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from @@ -1062,46 +1092,33 @@ void page_add_file_rmap(struct page *page) */ void page_remove_rmap(struct page *page) { - struct mem_cgroup *uninitialized_var(memcg); - bool anon = PageAnon(page); - unsigned long flags; - bool locked; - - /* - * The anon case has no mem_cgroup page_stat to update; but may - * uncharge_page() below, where the lock ordering can deadlock if - * we hold the lock against page_stat move: so avoid it on anon. - */ - if (!anon) - memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + if (!PageAnon(page)) { + page_remove_file_rmap(page); + return; + } /* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) - goto out; + return; + + /* Hugepages are not counted in NR_ANON_PAGES for now. */ + if (unlikely(PageHuge(page))) + return; /* - * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED - * and not charged by memcg for now. - * * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and - * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ - if (unlikely(PageHuge(page))) - goto out; - if (anon) { - if (PageTransHuge(page)) - __dec_zone_page_state(page, - NR_ANON_TRANSPARENT_HUGEPAGES); - __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, - -hpage_nr_pages(page)); - } else { - __dec_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); - } + if (PageTransHuge(page)) + __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + -hpage_nr_pages(page)); + if (unlikely(PageMlocked(page))) clear_page_mlock(page); + /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap @@ -1111,9 +1128,6 @@ void page_remove_rmap(struct page *page) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ -out: - if (!anon) - mem_cgroup_end_page_stat(memcg, locked, flags); } /* -- cgit v1.2.2