diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2012-03-21 19:34:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 20:55:01 -0400 |
commit | 312734c04e2fecc58429aec98194e4ff12d8f7d6 (patch) | |
tree | c1195cd46733b6a3909c11b2b5abcdee4412b99b | |
parent | 619d094b5872a5af153f1af77a8b7f7326faf0d0 (diff) |
memcg: remove PCG_MOVE_LOCK flag from page_cgroup
PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting
pc->mem_cgroup and page statistics accounting per memcg. This lock helps
to avoid the race but the race is very rare because moving tasks between
cgroup is not a usual job. So, it seems using 1bit per page is too
costly.
This patch changes this lock as per-memcg spinlock and removes
PCG_MOVE_LOCK.
If smaller lock is required, we'll be able to add some hashes but I'd like
to start from this.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/page_cgroup.h | 19 | ||||
-rw-r--r-- | mm/memcontrol.c | 42 |
2 files changed, 32 insertions, 29 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 106029243ff4..7a3af748f32b 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -7,7 +7,6 @@ enum { | |||
7 | PCG_USED, /* this object is in use. */ | 7 | PCG_USED, /* this object is in use. */ |
8 | PCG_MIGRATION, /* under page migration */ | 8 | PCG_MIGRATION, /* under page migration */ |
9 | /* flags for mem_cgroup and file and I/O status */ | 9 | /* flags for mem_cgroup and file and I/O status */ |
10 | PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ | ||
11 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ | 10 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ |
12 | __NR_PCG_FLAGS, | 11 | __NR_PCG_FLAGS, |
13 | }; | 12 | }; |
@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) | |||
89 | bit_spin_unlock(PCG_LOCK, &pc->flags); | 88 | bit_spin_unlock(PCG_LOCK, &pc->flags); |
90 | } | 89 | } |
91 | 90 | ||
92 | static inline void move_lock_page_cgroup(struct page_cgroup *pc, | ||
93 | unsigned long *flags) | ||
94 | { | ||
95 | /* | ||
96 | * We know updates to pc->flags of page cache's stats are from both of | ||
97 | * usual context or IRQ context. Disable IRQ to avoid deadlock. | ||
98 | */ | ||
99 | local_irq_save(*flags); | ||
100 | bit_spin_lock(PCG_MOVE_LOCK, &pc->flags); | ||
101 | } | ||
102 | |||
103 | static inline void move_unlock_page_cgroup(struct page_cgroup *pc, | ||
104 | unsigned long *flags) | ||
105 | { | ||
106 | bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags); | ||
107 | local_irq_restore(*flags); | ||
108 | } | ||
109 | |||
110 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 91 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
111 | struct page_cgroup; | 92 | struct page_cgroup; |
112 | 93 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cfd2db08cfe1..8afed2819b8f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -300,6 +300,8 @@ struct mem_cgroup { | |||
300 | * set > 0 if pages under this cgroup are moving to other cgroup. | 300 | * set > 0 if pages under this cgroup are moving to other cgroup. |
301 | */ | 301 | */ |
302 | atomic_t moving_account; | 302 | atomic_t moving_account; |
303 | /* taken only while moving_account > 0 */ | ||
304 | spinlock_t move_lock; | ||
303 | /* | 305 | /* |
304 | * percpu counter. | 306 | * percpu counter. |
305 | */ | 307 | */ |
@@ -1376,6 +1378,24 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) | |||
1376 | return false; | 1378 | return false; |
1377 | } | 1379 | } |
1378 | 1380 | ||
1381 | /* | ||
1382 | * Take this lock when | ||
1383 | * - a code tries to modify page's memcg while it's USED. | ||
1384 | * - a code tries to modify page state accounting in a memcg. | ||
1385 | * see mem_cgroup_stealed(), too. | ||
1386 | */ | ||
1387 | static void move_lock_mem_cgroup(struct mem_cgroup *memcg, | ||
1388 | unsigned long *flags) | ||
1389 | { | ||
1390 | spin_lock_irqsave(&memcg->move_lock, *flags); | ||
1391 | } | ||
1392 | |||
1393 | static void move_unlock_mem_cgroup(struct mem_cgroup *memcg, | ||
1394 | unsigned long *flags) | ||
1395 | { | ||
1396 | spin_unlock_irqrestore(&memcg->move_lock, *flags); | ||
1397 | } | ||
1398 | |||
1379 | /** | 1399 | /** |
1380 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. | 1400 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. |
1381 | * @memcg: The memory cgroup that went over limit | 1401 | * @memcg: The memory cgroup that went over limit |
@@ -1900,7 +1920,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1900 | 1920 | ||
1901 | if (mem_cgroup_disabled()) | 1921 | if (mem_cgroup_disabled()) |
1902 | return; | 1922 | return; |
1903 | 1923 | again: | |
1904 | rcu_read_lock(); | 1924 | rcu_read_lock(); |
1905 | memcg = pc->mem_cgroup; | 1925 | memcg = pc->mem_cgroup; |
1906 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | 1926 | if (unlikely(!memcg || !PageCgroupUsed(pc))) |
@@ -1908,11 +1928,13 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1908 | /* pc->mem_cgroup is unstable ? */ | 1928 | /* pc->mem_cgroup is unstable ? */ |
1909 | if (unlikely(mem_cgroup_stealed(memcg))) { | 1929 | if (unlikely(mem_cgroup_stealed(memcg))) { |
1910 | /* take a lock against to access pc->mem_cgroup */ | 1930 | /* take a lock against to access pc->mem_cgroup */ |
1911 | move_lock_page_cgroup(pc, &flags); | 1931 | move_lock_mem_cgroup(memcg, &flags); |
1932 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { | ||
1933 | move_unlock_mem_cgroup(memcg, &flags); | ||
1934 | rcu_read_unlock(); | ||
1935 | goto again; | ||
1936 | } | ||
1912 | need_unlock = true; | 1937 | need_unlock = true; |
1913 | memcg = pc->mem_cgroup; | ||
1914 | if (!memcg || !PageCgroupUsed(pc)) | ||
1915 | goto out; | ||
1916 | } | 1938 | } |
1917 | 1939 | ||
1918 | switch (idx) { | 1940 | switch (idx) { |
@@ -1931,7 +1953,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1931 | 1953 | ||
1932 | out: | 1954 | out: |
1933 | if (unlikely(need_unlock)) | 1955 | if (unlikely(need_unlock)) |
1934 | move_unlock_page_cgroup(pc, &flags); | 1956 | move_unlock_mem_cgroup(memcg, &flags); |
1935 | rcu_read_unlock(); | 1957 | rcu_read_unlock(); |
1936 | } | 1958 | } |
1937 | 1959 | ||
@@ -2500,8 +2522,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2500 | 2522 | ||
2501 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2523 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2502 | 2524 | ||
2503 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ | 2525 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) |
2504 | (1 << PCG_MIGRATION)) | ||
2505 | /* | 2526 | /* |
2506 | * Because tail pages are not marked as "used", set it. We're under | 2527 | * Because tail pages are not marked as "used", set it. We're under |
2507 | * zone->lru_lock, 'splitting on pmd' and compound_lock. | 2528 | * zone->lru_lock, 'splitting on pmd' and compound_lock. |
@@ -2572,7 +2593,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
2572 | if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) | 2593 | if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) |
2573 | goto unlock; | 2594 | goto unlock; |
2574 | 2595 | ||
2575 | move_lock_page_cgroup(pc, &flags); | 2596 | move_lock_mem_cgroup(from, &flags); |
2576 | 2597 | ||
2577 | if (PageCgroupFileMapped(pc)) { | 2598 | if (PageCgroupFileMapped(pc)) { |
2578 | /* Update mapped_file data for mem_cgroup */ | 2599 | /* Update mapped_file data for mem_cgroup */ |
@@ -2596,7 +2617,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
2596 | * guaranteed that "to" is never removed. So, we don't check rmdir | 2617 | * guaranteed that "to" is never removed. So, we don't check rmdir |
2597 | * status here. | 2618 | * status here. |
2598 | */ | 2619 | */ |
2599 | move_unlock_page_cgroup(pc, &flags); | 2620 | move_unlock_mem_cgroup(from, &flags); |
2600 | ret = 0; | 2621 | ret = 0; |
2601 | unlock: | 2622 | unlock: |
2602 | unlock_page_cgroup(pc); | 2623 | unlock_page_cgroup(pc); |
@@ -4971,6 +4992,7 @@ mem_cgroup_create(struct cgroup *cont) | |||
4971 | atomic_set(&memcg->refcnt, 1); | 4992 | atomic_set(&memcg->refcnt, 1); |
4972 | memcg->move_charge_at_immigrate = 0; | 4993 | memcg->move_charge_at_immigrate = 0; |
4973 | mutex_init(&memcg->thresholds_lock); | 4994 | mutex_init(&memcg->thresholds_lock); |
4995 | spin_lock_init(&memcg->move_lock); | ||
4974 | return &memcg->css; | 4996 | return &memcg->css; |
4975 | free_out: | 4997 | free_out: |
4976 | __mem_cgroup_free(memcg); | 4998 | __mem_cgroup_free(memcg); |