aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2012-03-21 19:34:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:55:01 -0400
commit89c06bd52fb9ffceddf84f7309d2e8c9f1666216 (patch)
tree43ec3d97a89988bc143bed5796bcd7bef64212dc /mm/memcontrol.c
parent312734c04e2fecc58429aec98194e4ff12d8f7d6 (diff)
memcg: use new logic for page stat accounting
Now, page-stat-per-memcg is recorded into per page_cgroup flag by duplicating page's status into the flag. The reason is that memcg has a feature to move a page from a group to another group and we have race between "move" and "page stat accounting", Under current logic, assume CPU-A and CPU-B. CPU-A does "move" and CPU-B does "page stat accounting". When CPU-A goes 1st, CPU-A CPU-B update "struct page" info. move_lock_mem_cgroup(memcg) see pc->flags copy page stat to new group overwrite pc->mem_cgroup. move_unlock_mem_cgroup(memcg) move_lock_mem_cgroup(mem) set pc->flags update page stat accounting move_unlock_mem_cgroup(mem) stat accounting is guarded by move_lock_mem_cgroup() and "move" logic (CPU-A) doesn't see changes in "struct page" information. But it's costly to have the same information both in 'struct page' and 'struct page_cgroup'. And, there is a potential problem. For example, assume we have PG_dirty accounting in memcg. PG_..is a flag for struct page. PCG_ is a flag for struct page_cgroup. (This is just an example. The same problem can be found in any kind of page stat accounting.) CPU-A CPU-B TestSet PG_dirty (delay) TestClear PG_dirty if (TestClear(PCG_dirty)) memcg->nr_dirty-- if (TestSet(PCG_dirty)) memcg->nr_dirty++ Here, memcg->nr_dirty = +1, this is wrong. This race was reported by Greg Thelen <gthelen@google.com>. Now, only FILE_MAPPED is supported but fortunately, it's serialized by page table lock and this is not real bug, _now_, If this potential problem is caused by having duplicated information in struct page and struct page_cgroup, we may be able to fix this by using original 'struct page' information. But we'll have a problem in "move account" Assume we use only PG_dirty. CPU-A CPU-B TestSet PG_dirty (delay) move_lock_mem_cgroup() if (PageDirty(page)) new_memcg->nr_dirty++ pc->mem_cgroup = new_memcg; move_unlock_mem_cgroup() move_lock_mem_cgroup() memcg = pc->mem_cgroup new_memcg->nr_dirty++ accounting information may be double-counted. This was original reason to have PCG_xxx flags but it seems PCG_xxx has another problem. I think we need a bigger lock as move_lock_mem_cgroup(page) TestSetPageDirty(page) update page stats (without any checks) move_unlock_mem_cgroup(page) This fixes both of problems and we don't have to duplicate page flag into page_cgroup. Please note: move_lock_mem_cgroup() is held only when there are possibility of "account move" under the system. So, in most path, status update will go without atomic locks. This patch introduces mem_cgroup_begin_update_page_stat() and mem_cgroup_end_update_page_stat() both should be called at modifying 'struct page' information if memcg takes care of it. as mem_cgroup_begin_update_page_stat() modify page information mem_cgroup_update_page_stat() => never check any 'struct page' info, just update counters. mem_cgroup_end_update_page_stat(). This patch is slow because we need to call begin_update_page_stat()/ end_update_page_stat() regardless of accounted will be changed or not. A following patch adds an easy optimization and reduces the cost. [akpm@linux-foundation.org: s/lock/locked/] [hughd@google.com: fix deadlock by avoiding stat lock when anon] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Greg Thelen <gthelen@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ying Han <yinghan@google.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c62
1 files changed, 42 insertions, 20 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8afed2819b8f..df1e180f6c30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1910,32 +1910,59 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
1910 * If there is, we take a lock. 1910 * If there is, we take a lock.
1911 */ 1911 */
1912 1912
1913void __mem_cgroup_begin_update_page_stat(struct page *page,
1914 bool *locked, unsigned long *flags)
1915{
1916 struct mem_cgroup *memcg;
1917 struct page_cgroup *pc;
1918
1919 pc = lookup_page_cgroup(page);
1920again:
1921 memcg = pc->mem_cgroup;
1922 if (unlikely(!memcg || !PageCgroupUsed(pc)))
1923 return;
1924 /*
1925 * If this memory cgroup is not under account moving, we don't
1926 * need to take move_lock_page_cgroup(). Because we already hold
1927 * rcu_read_lock(), any calls to move_account will be delayed until
1928 * rcu_read_unlock() if mem_cgroup_stealed() == true.
1929 */
1930 if (!mem_cgroup_stealed(memcg))
1931 return;
1932
1933 move_lock_mem_cgroup(memcg, flags);
1934 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
1935 move_unlock_mem_cgroup(memcg, flags);
1936 goto again;
1937 }
1938 *locked = true;
1939}
1940
1941void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
1942{
1943 struct page_cgroup *pc = lookup_page_cgroup(page);
1944
1945 /*
1946 * It's guaranteed that pc->mem_cgroup never changes while
1947 * lock is held because a routine modifies pc->mem_cgroup
1948 * should take move_lock_page_cgroup().
1949 */
1950 move_unlock_mem_cgroup(pc->mem_cgroup, flags);
1951}
1952
1913void mem_cgroup_update_page_stat(struct page *page, 1953void mem_cgroup_update_page_stat(struct page *page,
1914 enum mem_cgroup_page_stat_item idx, int val) 1954 enum mem_cgroup_page_stat_item idx, int val)
1915{ 1955{
1916 struct mem_cgroup *memcg; 1956 struct mem_cgroup *memcg;
1917 struct page_cgroup *pc = lookup_page_cgroup(page); 1957 struct page_cgroup *pc = lookup_page_cgroup(page);
1918 bool need_unlock = false;
1919 unsigned long uninitialized_var(flags); 1958 unsigned long uninitialized_var(flags);
1920 1959
1921 if (mem_cgroup_disabled()) 1960 if (mem_cgroup_disabled())
1922 return; 1961 return;
1923again: 1962
1924 rcu_read_lock();
1925 memcg = pc->mem_cgroup; 1963 memcg = pc->mem_cgroup;
1926 if (unlikely(!memcg || !PageCgroupUsed(pc))) 1964 if (unlikely(!memcg || !PageCgroupUsed(pc)))
1927 goto out; 1965 return;
1928 /* pc->mem_cgroup is unstable ? */
1929 if (unlikely(mem_cgroup_stealed(memcg))) {
1930 /* take a lock against to access pc->mem_cgroup */
1931 move_lock_mem_cgroup(memcg, &flags);
1932 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
1933 move_unlock_mem_cgroup(memcg, &flags);
1934 rcu_read_unlock();
1935 goto again;
1936 }
1937 need_unlock = true;
1938 }
1939 1966
1940 switch (idx) { 1967 switch (idx) {
1941 case MEMCG_NR_FILE_MAPPED: 1968 case MEMCG_NR_FILE_MAPPED:
@@ -1950,11 +1977,6 @@ again:
1950 } 1977 }
1951 1978
1952 this_cpu_add(memcg->stat->count[idx], val); 1979 this_cpu_add(memcg->stat->count[idx], val);
1953
1954out:
1955 if (unlikely(need_unlock))
1956 move_unlock_mem_cgroup(memcg, &flags);
1957 rcu_read_unlock();
1958} 1980}
1959 1981
1960/* 1982/*