aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-01-20 17:44:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-20 20:02:06 -0500
commitca3e021417eed30ec2b64ce88eb0acf64aa9bc29 (patch)
tree710590e51764d56c12d00744c402ab15c0d14a6b
parente401f1761c0b01966e36e41e2c385d455a7b44ee (diff)
memcg: fix USED bit handling at uncharge in THP
Now, under THP: at charge: - PageCgroupUsed bit is set to all page_cgroup on a hugepage. ....set to 512 pages. at uncharge - PageCgroupUsed bit is unset on the head page. So, some pages will remain with "Used" bit. This patch fixes that Used bit is set only to the head page. Used bits for tail pages will be set at splitting if necessary. This patch adds this lock order: compound_lock() -> page_cgroup_move_lock(). [akpm@linux-foundation.org: fix warning] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h9
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/memcontrol.c91
3 files changed, 62 insertions, 40 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6a576f989437..f512e189be5a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -146,6 +146,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
146 gfp_t gfp_mask); 146 gfp_t gfp_mask);
147u64 mem_cgroup_get_limit(struct mem_cgroup *mem); 147u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
148 148
149#ifdef CONFIG_TRANSPARENT_HUGEPAGE
150void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
151#endif
152
149#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 153#else /* CONFIG_CGROUP_MEM_RES_CTLR */
150struct mem_cgroup; 154struct mem_cgroup;
151 155
@@ -335,6 +339,11 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
335 return 0; 339 return 0;
336} 340}
337 341
342static inline void mem_cgroup_split_huge_fixup(struct page *head,
343 struct page *tail)
344{
345}
346
338#endif /* CONFIG_CGROUP_MEM_CONT */ 347#endif /* CONFIG_CGROUP_MEM_CONT */
339 348
340#endif /* _LINUX_MEMCONTROL_H */ 349#endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c4f634b3a48e..e187454d82f6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page)
1203 BUG_ON(!PageDirty(page_tail)); 1203 BUG_ON(!PageDirty(page_tail));
1204 BUG_ON(!PageSwapBacked(page_tail)); 1204 BUG_ON(!PageSwapBacked(page_tail));
1205 1205
1206 mem_cgroup_split_huge_fixup(page, page_tail);
1207
1206 lru_add_page_tail(zone, page, page_tail); 1208 lru_add_page_tail(zone, page, page_tail);
1207 } 1209 }
1208 1210
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6d59a2bd520a..848b42195e5b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1614,7 +1614,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1614 if (unlikely(!mem || !PageCgroupUsed(pc))) 1614 if (unlikely(!mem || !PageCgroupUsed(pc)))
1615 goto out; 1615 goto out;
1616 /* pc->mem_cgroup is unstable ? */ 1616 /* pc->mem_cgroup is unstable ? */
1617 if (unlikely(mem_cgroup_stealed(mem))) { 1617 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
1618 /* take a lock against to access pc->mem_cgroup */ 1618 /* take a lock against to access pc->mem_cgroup */
1619 move_lock_page_cgroup(pc, &flags); 1619 move_lock_page_cgroup(pc, &flags);
1620 need_unlock = true; 1620 need_unlock = true;
@@ -2083,14 +2083,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2083 return mem; 2083 return mem;
2084} 2084}
2085 2085
2086/* 2086static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2087 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be 2087 struct page_cgroup *pc,
2088 * USED state. If already USED, uncharge and return. 2088 enum charge_type ctype,
2089 */ 2089 int page_size)
2090static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2091 struct page_cgroup *pc,
2092 enum charge_type ctype)
2093{ 2090{
2091 int nr_pages = page_size >> PAGE_SHIFT;
2092
2093 /* try_charge() can return NULL to *memcg, taking care of it. */
2094 if (!mem)
2095 return;
2096
2097 lock_page_cgroup(pc);
2098 if (unlikely(PageCgroupUsed(pc))) {
2099 unlock_page_cgroup(pc);
2100 mem_cgroup_cancel_charge(mem, page_size);
2101 return;
2102 }
2103 /*
2104 * we don't need page_cgroup_lock about tail pages, becase they are not
2105 * accessed by any other context at this point.
2106 */
2094 pc->mem_cgroup = mem; 2107 pc->mem_cgroup = mem;
2095 /* 2108 /*
2096 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2109 * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2114,35 +2127,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2114 break; 2127 break;
2115 } 2128 }
2116 2129
2117 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), 1); 2130 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2118}
2119
2120static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2121 struct page_cgroup *pc,
2122 enum charge_type ctype,
2123 int page_size)
2124{
2125 int i;
2126 int count = page_size >> PAGE_SHIFT;
2127
2128 /* try_charge() can return NULL to *memcg, taking care of it. */
2129 if (!mem)
2130 return;
2131
2132 lock_page_cgroup(pc);
2133 if (unlikely(PageCgroupUsed(pc))) {
2134 unlock_page_cgroup(pc);
2135 mem_cgroup_cancel_charge(mem, page_size);
2136 return;
2137 }
2138
2139 /*
2140 * we don't need page_cgroup_lock about tail pages, becase they are not
2141 * accessed by any other context at this point.
2142 */
2143 for (i = 0; i < count; i++)
2144 ____mem_cgroup_commit_charge(mem, pc + i, ctype);
2145
2146 unlock_page_cgroup(pc); 2131 unlock_page_cgroup(pc);
2147 /* 2132 /*
2148 * "charge_statistics" updated event counter. Then, check it. 2133 * "charge_statistics" updated event counter. Then, check it.
@@ -2152,6 +2137,34 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2152 memcg_check_events(mem, pc->page); 2137 memcg_check_events(mem, pc->page);
2153} 2138}
2154 2139
2140#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2141
2142#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
2143 (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
2144/*
2145 * Because tail pages are not marked as "used", set it. We're under
2146 * zone->lru_lock, 'splitting on pmd' and compund_lock.
2147 */
2148void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2149{
2150 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2151 struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
2152 unsigned long flags;
2153
2154 /*
2155 * We have no races witch charge/uncharge but will have races with
2156 * page state accounting.
2157 */
2158 move_lock_page_cgroup(head_pc, &flags);
2159
2160 tail_pc->mem_cgroup = head_pc->mem_cgroup;
2161 smp_wmb(); /* see __commit_charge() */
2162 /* we don't need to copy all flags...*/
2163 tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2164 move_unlock_page_cgroup(head_pc, &flags);
2165}
2166#endif
2167
2155/** 2168/**
2156 * __mem_cgroup_move_account - move account of the page 2169 * __mem_cgroup_move_account - move account of the page
2157 * @pc: page_cgroup of the page. 2170 * @pc: page_cgroup of the page.
@@ -2545,7 +2558,6 @@ direct_uncharge:
2545static struct mem_cgroup * 2558static struct mem_cgroup *
2546__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2559__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2547{ 2560{
2548 int i;
2549 int count; 2561 int count;
2550 struct page_cgroup *pc; 2562 struct page_cgroup *pc;
2551 struct mem_cgroup *mem = NULL; 2563 struct mem_cgroup *mem = NULL;
@@ -2595,8 +2607,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2595 break; 2607 break;
2596 } 2608 }
2597 2609
2598 for (i = 0; i < count; i++) 2610 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2599 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -1);
2600 2611
2601 ClearPageCgroupUsed(pc); 2612 ClearPageCgroupUsed(pc);
2602 /* 2613 /*