diff options
author | Sha Zhengju <handai.szj@taobao.com> | 2013-09-12 18:13:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:38:02 -0400 |
commit | 3ea67d06e4679a16f69f66f43a8d6ee4778985fc (patch) | |
tree | 0ec35a312de85ce91bf0bf6e4c5b88440f3d0f1d /mm | |
parent | 658b72c5a7a033f0dde61b15dff86bf423ce425e (diff) |
memcg: add per cgroup writeback pages accounting
Add memcg routines to count writeback pages, later dirty pages will also
be accounted.
After Kame's commit 89c06bd52fb9 ("memcg: use new logic for page stat
accounting"), we can use 'struct page' flag to test page state instead
of per page_cgroup flag. But memcg has a feature to move a page from a
cgroup to another one and may have race between "move" and "page stat
accounting". So in order to avoid the race we have designed a new lock:
mem_cgroup_begin_update_page_stat()
modify page information -->(a)
mem_cgroup_update_page_stat() -->(b)
mem_cgroup_end_update_page_stat()
It requires both (a) and (b)(writeback pages accounting) to be pretected
in mem_cgroup_{begin/end}_update_page_stat(). It's full no-op for
!CONFIG_MEMCG, almost no-op if memcg is disabled (but compiled in), rcu
read lock in the most cases (no task is moving), and spin_lock_irqsave
on top in the slow path.
There're two writeback interfaces to modify: test_{clear/set}_page_writeback().
And the lock order is:
--> memcg->move_lock
--> mapping->tree_lock
Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Greg Thelen <gthelen@google.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 30 | ||||
-rw-r--r-- | mm/page-writeback.c | 15 |
2 files changed, 38 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0093bc36c5fc..d5ff3ce13029 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -89,6 +89,7 @@ static const char * const mem_cgroup_stat_names[] = { | |||
89 | "rss", | 89 | "rss", |
90 | "rss_huge", | 90 | "rss_huge", |
91 | "mapped_file", | 91 | "mapped_file", |
92 | "writeback", | ||
92 | "swap", | 93 | "swap", |
93 | }; | 94 | }; |
94 | 95 | ||
@@ -3654,6 +3655,20 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
3654 | } | 3655 | } |
3655 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 3656 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
3656 | 3657 | ||
3658 | static inline | ||
3659 | void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, | ||
3660 | struct mem_cgroup *to, | ||
3661 | unsigned int nr_pages, | ||
3662 | enum mem_cgroup_stat_index idx) | ||
3663 | { | ||
3664 | /* Update stat data for mem_cgroup */ | ||
3665 | preempt_disable(); | ||
3666 | WARN_ON_ONCE(from->stat->count[idx] < nr_pages); | ||
3667 | __this_cpu_add(from->stat->count[idx], -nr_pages); | ||
3668 | __this_cpu_add(to->stat->count[idx], nr_pages); | ||
3669 | preempt_enable(); | ||
3670 | } | ||
3671 | |||
3657 | /** | 3672 | /** |
3658 | * mem_cgroup_move_account - move account of the page | 3673 | * mem_cgroup_move_account - move account of the page |
3659 | * @page: the page | 3674 | * @page: the page |
@@ -3699,13 +3714,14 @@ static int mem_cgroup_move_account(struct page *page, | |||
3699 | 3714 | ||
3700 | move_lock_mem_cgroup(from, &flags); | 3715 | move_lock_mem_cgroup(from, &flags); |
3701 | 3716 | ||
3702 | if (!anon && page_mapped(page)) { | 3717 | if (!anon && page_mapped(page)) |
3703 | /* Update mapped_file data for mem_cgroup */ | 3718 | mem_cgroup_move_account_page_stat(from, to, nr_pages, |
3704 | preempt_disable(); | 3719 | MEM_CGROUP_STAT_FILE_MAPPED); |
3705 | __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 3720 | |
3706 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 3721 | if (PageWriteback(page)) |
3707 | preempt_enable(); | 3722 | mem_cgroup_move_account_page_stat(from, to, nr_pages, |
3708 | } | 3723 | MEM_CGROUP_STAT_WRITEBACK); |
3724 | |||
3709 | mem_cgroup_charge_statistics(from, page, anon, -nr_pages); | 3725 | mem_cgroup_charge_statistics(from, page, anon, -nr_pages); |
3710 | 3726 | ||
3711 | /* caller should have done css_get */ | 3727 | /* caller should have done css_get */ |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6c7b0187be8e..f5236f804aa6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2143,11 +2143,17 @@ EXPORT_SYMBOL(account_page_dirtied); | |||
2143 | 2143 | ||
2144 | /* | 2144 | /* |
2145 | * Helper function for set_page_writeback family. | 2145 | * Helper function for set_page_writeback family. |
2146 | * | ||
2147 | * The caller must hold mem_cgroup_begin/end_update_page_stat() lock | ||
2148 | * while calling this function. | ||
2149 | * See test_set_page_writeback for example. | ||
2150 | * | ||
2146 | * NOTE: Unlike account_page_dirtied this does not rely on being atomic | 2151 | * NOTE: Unlike account_page_dirtied this does not rely on being atomic |
2147 | * wrt interrupts. | 2152 | * wrt interrupts. |
2148 | */ | 2153 | */ |
2149 | void account_page_writeback(struct page *page) | 2154 | void account_page_writeback(struct page *page) |
2150 | { | 2155 | { |
2156 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | ||
2151 | inc_zone_page_state(page, NR_WRITEBACK); | 2157 | inc_zone_page_state(page, NR_WRITEBACK); |
2152 | } | 2158 | } |
2153 | EXPORT_SYMBOL(account_page_writeback); | 2159 | EXPORT_SYMBOL(account_page_writeback); |
@@ -2364,7 +2370,10 @@ int test_clear_page_writeback(struct page *page) | |||
2364 | { | 2370 | { |
2365 | struct address_space *mapping = page_mapping(page); | 2371 | struct address_space *mapping = page_mapping(page); |
2366 | int ret; | 2372 | int ret; |
2373 | bool locked; | ||
2374 | unsigned long memcg_flags; | ||
2367 | 2375 | ||
2376 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | ||
2368 | if (mapping) { | 2377 | if (mapping) { |
2369 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2378 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2370 | unsigned long flags; | 2379 | unsigned long flags; |
@@ -2385,9 +2394,11 @@ int test_clear_page_writeback(struct page *page) | |||
2385 | ret = TestClearPageWriteback(page); | 2394 | ret = TestClearPageWriteback(page); |
2386 | } | 2395 | } |
2387 | if (ret) { | 2396 | if (ret) { |
2397 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | ||
2388 | dec_zone_page_state(page, NR_WRITEBACK); | 2398 | dec_zone_page_state(page, NR_WRITEBACK); |
2389 | inc_zone_page_state(page, NR_WRITTEN); | 2399 | inc_zone_page_state(page, NR_WRITTEN); |
2390 | } | 2400 | } |
2401 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | ||
2391 | return ret; | 2402 | return ret; |
2392 | } | 2403 | } |
2393 | 2404 | ||
@@ -2395,7 +2406,10 @@ int test_set_page_writeback(struct page *page) | |||
2395 | { | 2406 | { |
2396 | struct address_space *mapping = page_mapping(page); | 2407 | struct address_space *mapping = page_mapping(page); |
2397 | int ret; | 2408 | int ret; |
2409 | bool locked; | ||
2410 | unsigned long memcg_flags; | ||
2398 | 2411 | ||
2412 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | ||
2399 | if (mapping) { | 2413 | if (mapping) { |
2400 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2414 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2401 | unsigned long flags; | 2415 | unsigned long flags; |
@@ -2422,6 +2436,7 @@ int test_set_page_writeback(struct page *page) | |||
2422 | } | 2436 | } |
2423 | if (!ret) | 2437 | if (!ret) |
2424 | account_page_writeback(page); | 2438 | account_page_writeback(page); |
2439 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | ||
2425 | return ret; | 2440 | return ret; |
2426 | 2441 | ||
2427 | } | 2442 | } |