diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2011-01-20 17:44:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-20 20:02:06 -0500 |
commit | ca3e021417eed30ec2b64ce88eb0acf64aa9bc29 (patch) | |
tree | 710590e51764d56c12d00744c402ab15c0d14a6b | |
parent | e401f1761c0b01966e36e41e2c385d455a7b44ee (diff) |
memcg: fix USED bit handling at uncharge in THP
Now, under THP:
at charge:
- PageCgroupUsed bit is set to all page_cgroup on a hugepage.
....set to 512 pages.
at uncharge
- PageCgroupUsed bit is unset on the head page.
So, some pages will remain with "Used" bit.
This patch fixes that Used bit is set only to the head page.
Used bits for tail pages will be set at splitting if necessary.
This patch adds this lock order:
compound_lock() -> page_cgroup_move_lock().
[akpm@linux-foundation.org: fix warning]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 9 | ||||
-rw-r--r-- | mm/huge_memory.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 91 |
3 files changed, 62 insertions, 40 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6a576f989437..f512e189be5a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -146,6 +146,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
146 | gfp_t gfp_mask); | 146 | gfp_t gfp_mask); |
147 | u64 mem_cgroup_get_limit(struct mem_cgroup *mem); | 147 | u64 mem_cgroup_get_limit(struct mem_cgroup *mem); |
148 | 148 | ||
149 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
150 | void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail); | ||
151 | #endif | ||
152 | |||
149 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 153 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
150 | struct mem_cgroup; | 154 | struct mem_cgroup; |
151 | 155 | ||
@@ -335,6 +339,11 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *mem) | |||
335 | return 0; | 339 | return 0; |
336 | } | 340 | } |
337 | 341 | ||
342 | static inline void mem_cgroup_split_huge_fixup(struct page *head, | ||
343 | struct page *tail) | ||
344 | { | ||
345 | } | ||
346 | |||
338 | #endif /* CONFIG_CGROUP_MEM_CONT */ | 347 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
339 | 348 | ||
340 | #endif /* _LINUX_MEMCONTROL_H */ | 349 | #endif /* _LINUX_MEMCONTROL_H */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c4f634b3a48e..e187454d82f6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page) | |||
1203 | BUG_ON(!PageDirty(page_tail)); | 1203 | BUG_ON(!PageDirty(page_tail)); |
1204 | BUG_ON(!PageSwapBacked(page_tail)); | 1204 | BUG_ON(!PageSwapBacked(page_tail)); |
1205 | 1205 | ||
1206 | mem_cgroup_split_huge_fixup(page, page_tail); | ||
1207 | |||
1206 | lru_add_page_tail(zone, page, page_tail); | 1208 | lru_add_page_tail(zone, page, page_tail); |
1207 | } | 1209 | } |
1208 | 1210 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6d59a2bd520a..848b42195e5b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1614,7 +1614,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1614 | if (unlikely(!mem || !PageCgroupUsed(pc))) | 1614 | if (unlikely(!mem || !PageCgroupUsed(pc))) |
1615 | goto out; | 1615 | goto out; |
1616 | /* pc->mem_cgroup is unstable ? */ | 1616 | /* pc->mem_cgroup is unstable ? */ |
1617 | if (unlikely(mem_cgroup_stealed(mem))) { | 1617 | if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { |
1618 | /* take a lock against to access pc->mem_cgroup */ | 1618 | /* take a lock against to access pc->mem_cgroup */ |
1619 | move_lock_page_cgroup(pc, &flags); | 1619 | move_lock_page_cgroup(pc, &flags); |
1620 | need_unlock = true; | 1620 | need_unlock = true; |
@@ -2083,14 +2083,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2083 | return mem; | 2083 | return mem; |
2084 | } | 2084 | } |
2085 | 2085 | ||
2086 | /* | 2086 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2087 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be | 2087 | struct page_cgroup *pc, |
2088 | * USED state. If already USED, uncharge and return. | 2088 | enum charge_type ctype, |
2089 | */ | 2089 | int page_size) |
2090 | static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
2091 | struct page_cgroup *pc, | ||
2092 | enum charge_type ctype) | ||
2093 | { | 2090 | { |
2091 | int nr_pages = page_size >> PAGE_SHIFT; | ||
2092 | |||
2093 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2094 | if (!mem) | ||
2095 | return; | ||
2096 | |||
2097 | lock_page_cgroup(pc); | ||
2098 | if (unlikely(PageCgroupUsed(pc))) { | ||
2099 | unlock_page_cgroup(pc); | ||
2100 | mem_cgroup_cancel_charge(mem, page_size); | ||
2101 | return; | ||
2102 | } | ||
2103 | /* | ||
2104 | * we don't need page_cgroup_lock about tail pages, becase they are not | ||
2105 | * accessed by any other context at this point. | ||
2106 | */ | ||
2094 | pc->mem_cgroup = mem; | 2107 | pc->mem_cgroup = mem; |
2095 | /* | 2108 | /* |
2096 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | 2109 | * We access a page_cgroup asynchronously without lock_page_cgroup(). |
@@ -2114,35 +2127,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2114 | break; | 2127 | break; |
2115 | } | 2128 | } |
2116 | 2129 | ||
2117 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), 1); | 2130 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); |
2118 | } | ||
2119 | |||
2120 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
2121 | struct page_cgroup *pc, | ||
2122 | enum charge_type ctype, | ||
2123 | int page_size) | ||
2124 | { | ||
2125 | int i; | ||
2126 | int count = page_size >> PAGE_SHIFT; | ||
2127 | |||
2128 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2129 | if (!mem) | ||
2130 | return; | ||
2131 | |||
2132 | lock_page_cgroup(pc); | ||
2133 | if (unlikely(PageCgroupUsed(pc))) { | ||
2134 | unlock_page_cgroup(pc); | ||
2135 | mem_cgroup_cancel_charge(mem, page_size); | ||
2136 | return; | ||
2137 | } | ||
2138 | |||
2139 | /* | ||
2140 | * we don't need page_cgroup_lock about tail pages, becase they are not | ||
2141 | * accessed by any other context at this point. | ||
2142 | */ | ||
2143 | for (i = 0; i < count; i++) | ||
2144 | ____mem_cgroup_commit_charge(mem, pc + i, ctype); | ||
2145 | |||
2146 | unlock_page_cgroup(pc); | 2131 | unlock_page_cgroup(pc); |
2147 | /* | 2132 | /* |
2148 | * "charge_statistics" updated event counter. Then, check it. | 2133 | * "charge_statistics" updated event counter. Then, check it. |
@@ -2152,6 +2137,34 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2152 | memcg_check_events(mem, pc->page); | 2137 | memcg_check_events(mem, pc->page); |
2153 | } | 2138 | } |
2154 | 2139 | ||
2140 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
2141 | |||
2142 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ | ||
2143 | (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) | ||
2144 | /* | ||
2145 | * Because tail pages are not marked as "used", set it. We're under | ||
2146 | * zone->lru_lock, 'splitting on pmd' and compund_lock. | ||
2147 | */ | ||
2148 | void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | ||
2149 | { | ||
2150 | struct page_cgroup *head_pc = lookup_page_cgroup(head); | ||
2151 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | ||
2152 | unsigned long flags; | ||
2153 | |||
2154 | /* | ||
2155 | * We have no races witch charge/uncharge but will have races with | ||
2156 | * page state accounting. | ||
2157 | */ | ||
2158 | move_lock_page_cgroup(head_pc, &flags); | ||
2159 | |||
2160 | tail_pc->mem_cgroup = head_pc->mem_cgroup; | ||
2161 | smp_wmb(); /* see __commit_charge() */ | ||
2162 | /* we don't need to copy all flags...*/ | ||
2163 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | ||
2164 | move_unlock_page_cgroup(head_pc, &flags); | ||
2165 | } | ||
2166 | #endif | ||
2167 | |||
2155 | /** | 2168 | /** |
2156 | * __mem_cgroup_move_account - move account of the page | 2169 | * __mem_cgroup_move_account - move account of the page |
2157 | * @pc: page_cgroup of the page. | 2170 | * @pc: page_cgroup of the page. |
@@ -2545,7 +2558,6 @@ direct_uncharge: | |||
2545 | static struct mem_cgroup * | 2558 | static struct mem_cgroup * |
2546 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2559 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2547 | { | 2560 | { |
2548 | int i; | ||
2549 | int count; | 2561 | int count; |
2550 | struct page_cgroup *pc; | 2562 | struct page_cgroup *pc; |
2551 | struct mem_cgroup *mem = NULL; | 2563 | struct mem_cgroup *mem = NULL; |
@@ -2595,8 +2607,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2595 | break; | 2607 | break; |
2596 | } | 2608 | } |
2597 | 2609 | ||
2598 | for (i = 0; i < count; i++) | 2610 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); |
2599 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -1); | ||
2600 | 2611 | ||
2601 | ClearPageCgroupUsed(pc); | 2612 | ClearPageCgroupUsed(pc); |
2602 | /* | 2613 | /* |