aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2012-03-21 19:34:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:55:01 -0400
commit312734c04e2fecc58429aec98194e4ff12d8f7d6 (patch)
treec1195cd46733b6a3909c11b2b5abcdee4412b99b
parent619d094b5872a5af153f1af77a8b7f7326faf0d0 (diff)
memcg: remove PCG_MOVE_LOCK flag from page_cgroup
PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting pc->mem_cgroup and page statistics accounting per memcg. This lock helps to avoid the race but the race is very rare because moving tasks between cgroup is not a usual job. So, it seems using 1bit per page is too costly. This patch changes this lock as per-memcg spinlock and removes PCG_MOVE_LOCK. If smaller lock is required, we'll be able to add some hashes but I'd like to start from this. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ying Han <yinghan@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/page_cgroup.h19
-rw-r--r--mm/memcontrol.c42
2 files changed, 32 insertions, 29 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 106029243ff4..7a3af748f32b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -7,7 +7,6 @@ enum {
7 PCG_USED, /* this object is in use. */ 7 PCG_USED, /* this object is in use. */
8 PCG_MIGRATION, /* under page migration */ 8 PCG_MIGRATION, /* under page migration */
9 /* flags for mem_cgroup and file and I/O status */ 9 /* flags for mem_cgroup and file and I/O status */
10 PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
11 PCG_FILE_MAPPED, /* page is accounted as "mapped" */ 10 PCG_FILE_MAPPED, /* page is accounted as "mapped" */
12 __NR_PCG_FLAGS, 11 __NR_PCG_FLAGS,
13}; 12};
@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
89 bit_spin_unlock(PCG_LOCK, &pc->flags); 88 bit_spin_unlock(PCG_LOCK, &pc->flags);
90} 89}
91 90
92static inline void move_lock_page_cgroup(struct page_cgroup *pc,
93 unsigned long *flags)
94{
95 /*
96 * We know updates to pc->flags of page cache's stats are from both of
97 * usual context or IRQ context. Disable IRQ to avoid deadlock.
98 */
99 local_irq_save(*flags);
100 bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
101}
102
103static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
104 unsigned long *flags)
105{
106 bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
107 local_irq_restore(*flags);
108}
109
110#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 91#else /* CONFIG_CGROUP_MEM_RES_CTLR */
111struct page_cgroup; 92struct page_cgroup;
112 93
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cfd2db08cfe1..8afed2819b8f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -300,6 +300,8 @@ struct mem_cgroup {
300 * set > 0 if pages under this cgroup are moving to other cgroup. 300 * set > 0 if pages under this cgroup are moving to other cgroup.
301 */ 301 */
302 atomic_t moving_account; 302 atomic_t moving_account;
303 /* taken only while moving_account > 0 */
304 spinlock_t move_lock;
303 /* 305 /*
304 * percpu counter. 306 * percpu counter.
305 */ 307 */
@@ -1376,6 +1378,24 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
1376 return false; 1378 return false;
1377} 1379}
1378 1380
1381/*
1382 * Take this lock when
1383 * - a code tries to modify page's memcg while it's USED.
1384 * - a code tries to modify page state accounting in a memcg.
1385 * see mem_cgroup_stealed(), too.
1386 */
1387static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
1388 unsigned long *flags)
1389{
1390 spin_lock_irqsave(&memcg->move_lock, *flags);
1391}
1392
1393static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
1394 unsigned long *flags)
1395{
1396 spin_unlock_irqrestore(&memcg->move_lock, *flags);
1397}
1398
1379/** 1399/**
1380 * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. 1400 * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
1381 * @memcg: The memory cgroup that went over limit 1401 * @memcg: The memory cgroup that went over limit
@@ -1900,7 +1920,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1900 1920
1901 if (mem_cgroup_disabled()) 1921 if (mem_cgroup_disabled())
1902 return; 1922 return;
1903 1923again:
1904 rcu_read_lock(); 1924 rcu_read_lock();
1905 memcg = pc->mem_cgroup; 1925 memcg = pc->mem_cgroup;
1906 if (unlikely(!memcg || !PageCgroupUsed(pc))) 1926 if (unlikely(!memcg || !PageCgroupUsed(pc)))
@@ -1908,11 +1928,13 @@ void mem_cgroup_update_page_stat(struct page *page,
1908 /* pc->mem_cgroup is unstable ? */ 1928 /* pc->mem_cgroup is unstable ? */
1909 if (unlikely(mem_cgroup_stealed(memcg))) { 1929 if (unlikely(mem_cgroup_stealed(memcg))) {
1910 /* take a lock against to access pc->mem_cgroup */ 1930 /* take a lock against to access pc->mem_cgroup */
1911 move_lock_page_cgroup(pc, &flags); 1931 move_lock_mem_cgroup(memcg, &flags);
1932 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
1933 move_unlock_mem_cgroup(memcg, &flags);
1934 rcu_read_unlock();
1935 goto again;
1936 }
1912 need_unlock = true; 1937 need_unlock = true;
1913 memcg = pc->mem_cgroup;
1914 if (!memcg || !PageCgroupUsed(pc))
1915 goto out;
1916 } 1938 }
1917 1939
1918 switch (idx) { 1940 switch (idx) {
@@ -1931,7 +1953,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1931 1953
1932out: 1954out:
1933 if (unlikely(need_unlock)) 1955 if (unlikely(need_unlock))
1934 move_unlock_page_cgroup(pc, &flags); 1956 move_unlock_mem_cgroup(memcg, &flags);
1935 rcu_read_unlock(); 1957 rcu_read_unlock();
1936} 1958}
1937 1959
@@ -2500,8 +2522,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2500 2522
2501#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2523#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2502 2524
2503#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ 2525#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
2504 (1 << PCG_MIGRATION))
2505/* 2526/*
2506 * Because tail pages are not marked as "used", set it. We're under 2527 * Because tail pages are not marked as "used", set it. We're under
2507 * zone->lru_lock, 'splitting on pmd' and compound_lock. 2528 * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2572,7 +2593,7 @@ static int mem_cgroup_move_account(struct page *page,
2572 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) 2593 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
2573 goto unlock; 2594 goto unlock;
2574 2595
2575 move_lock_page_cgroup(pc, &flags); 2596 move_lock_mem_cgroup(from, &flags);
2576 2597
2577 if (PageCgroupFileMapped(pc)) { 2598 if (PageCgroupFileMapped(pc)) {
2578 /* Update mapped_file data for mem_cgroup */ 2599 /* Update mapped_file data for mem_cgroup */
@@ -2596,7 +2617,7 @@ static int mem_cgroup_move_account(struct page *page,
2596 * guaranteed that "to" is never removed. So, we don't check rmdir 2617 * guaranteed that "to" is never removed. So, we don't check rmdir
2597 * status here. 2618 * status here.
2598 */ 2619 */
2599 move_unlock_page_cgroup(pc, &flags); 2620 move_unlock_mem_cgroup(from, &flags);
2600 ret = 0; 2621 ret = 0;
2601unlock: 2622unlock:
2602 unlock_page_cgroup(pc); 2623 unlock_page_cgroup(pc);
@@ -4971,6 +4992,7 @@ mem_cgroup_create(struct cgroup *cont)
4971 atomic_set(&memcg->refcnt, 1); 4992 atomic_set(&memcg->refcnt, 1);
4972 memcg->move_charge_at_immigrate = 0; 4993 memcg->move_charge_at_immigrate = 0;
4973 mutex_init(&memcg->thresholds_lock); 4994 mutex_init(&memcg->thresholds_lock);
4995 spin_lock_init(&memcg->move_lock);
4974 return &memcg->css; 4996 return &memcg->css;
4975free_out: 4997free_out:
4976 __mem_cgroup_free(memcg); 4998 __mem_cgroup_free(memcg);