aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-10-29 17:50:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-29 19:33:15 -0400
commitd7365e783edb858279be1d03f61bc8d5d3383d90 (patch)
treeca8c1aea5763cace1eb63022cfea83c480eef487 /include/linux/memcontrol.h
parent3a3c02ecf7f2852f122d6d16fb9b3d9cb0c6f201 (diff)
mm: memcontrol: fix missed end-writeback page accounting
Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed page migration to uncharge the old page right away. The page is locked, unmapped, truncated, and off the LRU, but it could race with writeback ending, which then doesn't unaccount the page properly: test_clear_page_writeback() migration wait_on_page_writeback() TestClearPageWriteback() mem_cgroup_migrate() clear PCG_USED mem_cgroup_update_page_stat() if (PageCgroupUsed(pc)) decrease memcg pages under writeback release pc->mem_cgroup->move_lock The per-page statistics interface is heavily optimized to avoid a function call and a lookup_page_cgroup() in the file unmap fast path, which means it doesn't verify whether a page is still charged before clearing PageWriteback() and it has to do it in the stat update later. Rework it so that it looks up the page's memcg once at the beginning of the transaction and then uses it throughout. The charge will be verified before clearing PageWriteback() and migration can't uncharge the page as long as that is still set. The RCU lock will protect the memcg past uncharge. As far as losing the optimization goes, the following test results are from a microbenchmark that maps, faults, and unmaps a 4GB sparse file three times in a nested fashion, so that there are two negative passes that don't account but still go through the new transaction overhead. There is no actual difference: old: 33.195102545 seconds time elapsed ( +- 0.01% ) new: 33.199231369 seconds time elapsed ( +- 0.03% ) The time spent in page_remove_rmap()'s callees still adds up to the same, but the time spent in the function itself seems reduced: # Children Self Command Shared Object Symbol old: 0.12% 0.11% filemapstress [kernel.kallsyms] [k] page_remove_rmap new: 0.12% 0.08% filemapstress [kernel.kallsyms] [k] page_remove_rmap Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: <stable@vger.kernel.org> [3.17.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h58
1 files changed, 17 insertions, 41 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 19df5d857411..6b75640ef5ab 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void)
139 return false; 139 return false;
140} 140}
141 141
142void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked, 142struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
143 unsigned long *flags); 143 unsigned long *flags);
144 144void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
145extern atomic_t memcg_moving; 145 unsigned long flags);
146 146void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
147static inline void mem_cgroup_begin_update_page_stat(struct page *page, 147 enum mem_cgroup_stat_index idx, int val);
148 bool *locked, unsigned long *flags) 148
149{ 149static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
150 if (mem_cgroup_disabled())
151 return;
152 rcu_read_lock();
153 *locked = false;
154 if (atomic_read(&memcg_moving))
155 __mem_cgroup_begin_update_page_stat(page, locked, flags);
156}
157
158void __mem_cgroup_end_update_page_stat(struct page *page,
159 unsigned long *flags);
160static inline void mem_cgroup_end_update_page_stat(struct page *page,
161 bool *locked, unsigned long *flags)
162{
163 if (mem_cgroup_disabled())
164 return;
165 if (*locked)
166 __mem_cgroup_end_update_page_stat(page, flags);
167 rcu_read_unlock();
168}
169
170void mem_cgroup_update_page_stat(struct page *page,
171 enum mem_cgroup_stat_index idx,
172 int val);
173
174static inline void mem_cgroup_inc_page_stat(struct page *page,
175 enum mem_cgroup_stat_index idx) 150 enum mem_cgroup_stat_index idx)
176{ 151{
177 mem_cgroup_update_page_stat(page, idx, 1); 152 mem_cgroup_update_page_stat(memcg, idx, 1);
178} 153}
179 154
180static inline void mem_cgroup_dec_page_stat(struct page *page, 155static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
181 enum mem_cgroup_stat_index idx) 156 enum mem_cgroup_stat_index idx)
182{ 157{
183 mem_cgroup_update_page_stat(page, idx, -1); 158 mem_cgroup_update_page_stat(memcg, idx, -1);
184} 159}
185 160
186unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 161unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
@@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
315{ 290{
316} 291}
317 292
318static inline void mem_cgroup_begin_update_page_stat(struct page *page, 293static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
319 bool *locked, unsigned long *flags) 294 bool *locked, unsigned long *flags)
320{ 295{
296 return NULL;
321} 297}
322 298
323static inline void mem_cgroup_end_update_page_stat(struct page *page, 299static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
324 bool *locked, unsigned long *flags) 300 bool locked, unsigned long flags)
325{ 301{
326} 302}
327 303
@@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
343 return false; 319 return false;
344} 320}
345 321
346static inline void mem_cgroup_inc_page_stat(struct page *page, 322static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
347 enum mem_cgroup_stat_index idx) 323 enum mem_cgroup_stat_index idx)
348{ 324{
349} 325}
350 326
351static inline void mem_cgroup_dec_page_stat(struct page *page, 327static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
352 enum mem_cgroup_stat_index idx) 328 enum mem_cgroup_stat_index idx)
353{ 329{
354} 330}