aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c190
1 files changed, 110 insertions, 80 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ab841031436..db76ef726293 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
600} 600}
601 601
602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
603 struct page_cgroup *pc, 603 bool file, int nr_pages)
604 bool charge)
605{ 604{
606 int val = (charge) ? 1 : -1;
607
608 preempt_disable(); 605 preempt_disable();
609 606
610 if (PageCgroupCache(pc)) 607 if (file)
611 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); 608 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
612 else 609 else
613 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); 610 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
614 611
615 if (charge) 612 /* pagein of a big page is an event. So, ignore page size */
613 if (nr_pages > 0)
616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); 614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
617 else 615 else
618 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); 616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
619 __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); 617
618 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
620 619
621 preempt_enable(); 620 preempt_enable();
622} 621}
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
815 * removed from global LRU. 814 * removed from global LRU.
816 */ 815 */
817 mz = page_cgroup_zoneinfo(pc); 816 mz = page_cgroup_zoneinfo(pc);
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 817 /* huge page split is done under lru_lock. so, we have no races. */
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
819 if (mem_cgroup_is_root(pc->mem_cgroup)) 819 if (mem_cgroup_is_root(pc->mem_cgroup))
820 return; 820 return;
821 VM_BUG_ON(list_empty(&pc->lru)); 821 VM_BUG_ON(list_empty(&pc->lru));
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
836 return; 836 return;
837 837
838 pc = lookup_page_cgroup(page); 838 pc = lookup_page_cgroup(page);
839 /*
840 * Used bit is set without atomic ops but after smp_wmb().
841 * For making pc->mem_cgroup visible, insert smp_rmb() here.
842 */
843 smp_rmb();
844 /* unused or root page is not rotated. */ 839 /* unused or root page is not rotated. */
845 if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) 840 if (!PageCgroupUsed(pc))
841 return;
842 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
843 smp_rmb();
844 if (mem_cgroup_is_root(pc->mem_cgroup))
846 return; 845 return;
847 mz = page_cgroup_zoneinfo(pc); 846 mz = page_cgroup_zoneinfo(pc);
848 list_move(&pc->lru, &mz->lists[lru]); 847 list_move(&pc->lru, &mz->lists[lru]);
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
857 return; 856 return;
858 pc = lookup_page_cgroup(page); 857 pc = lookup_page_cgroup(page);
859 VM_BUG_ON(PageCgroupAcctLRU(pc)); 858 VM_BUG_ON(PageCgroupAcctLRU(pc));
860 /*
861 * Used bit is set without atomic ops but after smp_wmb().
862 * For making pc->mem_cgroup visible, insert smp_rmb() here.
863 */
864 smp_rmb();
865 if (!PageCgroupUsed(pc)) 859 if (!PageCgroupUsed(pc))
866 return; 860 return;
867 861 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
862 smp_rmb();
868 mz = page_cgroup_zoneinfo(pc); 863 mz = page_cgroup_zoneinfo(pc);
869 MEM_CGROUP_ZSTAT(mz, lru) += 1; 864 /* huge page split is done under lru_lock. so, we have no races. */
865 MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
870 SetPageCgroupAcctLRU(pc); 866 SetPageCgroupAcctLRU(pc);
871 if (mem_cgroup_is_root(pc->mem_cgroup)) 867 if (mem_cgroup_is_root(pc->mem_cgroup))
872 return; 868 return;
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1030 return NULL; 1026 return NULL;
1031 1027
1032 pc = lookup_page_cgroup(page); 1028 pc = lookup_page_cgroup(page);
1033 /*
1034 * Used bit is set without atomic ops but after smp_wmb().
1035 * For making pc->mem_cgroup visible, insert smp_rmb() here.
1036 */
1037 smp_rmb();
1038 if (!PageCgroupUsed(pc)) 1029 if (!PageCgroupUsed(pc))
1039 return NULL; 1030 return NULL;
1040 1031 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1032 smp_rmb();
1041 mz = page_cgroup_zoneinfo(pc); 1033 mz = page_cgroup_zoneinfo(pc);
1042 if (!mz) 1034 if (!mz)
1043 return NULL; 1035 return NULL;
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1615 if (unlikely(!mem || !PageCgroupUsed(pc))) 1607 if (unlikely(!mem || !PageCgroupUsed(pc)))
1616 goto out; 1608 goto out;
1617 /* pc->mem_cgroup is unstable ? */ 1609 /* pc->mem_cgroup is unstable ? */
1618 if (unlikely(mem_cgroup_stealed(mem))) { 1610 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
1619 /* take a lock against to access pc->mem_cgroup */ 1611 /* take a lock against to access pc->mem_cgroup */
1620 move_lock_page_cgroup(pc, &flags); 1612 move_lock_page_cgroup(pc, &flags);
1621 need_unlock = true; 1613 need_unlock = true;
@@ -2084,14 +2076,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2084 return mem; 2076 return mem;
2085} 2077}
2086 2078
2087/* 2079static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2088 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be 2080 struct page_cgroup *pc,
2089 * USED state. If already USED, uncharge and return. 2081 enum charge_type ctype,
2090 */ 2082 int page_size)
2091static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2092 struct page_cgroup *pc,
2093 enum charge_type ctype)
2094{ 2083{
2084 int nr_pages = page_size >> PAGE_SHIFT;
2085
2086 /* try_charge() can return NULL to *memcg, taking care of it. */
2087 if (!mem)
2088 return;
2089
2090 lock_page_cgroup(pc);
2091 if (unlikely(PageCgroupUsed(pc))) {
2092 unlock_page_cgroup(pc);
2093 mem_cgroup_cancel_charge(mem, page_size);
2094 return;
2095 }
2096 /*
2097 * we don't need page_cgroup_lock about tail pages, becase they are not
2098 * accessed by any other context at this point.
2099 */
2095 pc->mem_cgroup = mem; 2100 pc->mem_cgroup = mem;
2096 /* 2101 /*
2097 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2102 * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2115,35 +2120,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2115 break; 2120 break;
2116 } 2121 }
2117 2122
2118 mem_cgroup_charge_statistics(mem, pc, true); 2123 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2119}
2120
2121static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2122 struct page_cgroup *pc,
2123 enum charge_type ctype,
2124 int page_size)
2125{
2126 int i;
2127 int count = page_size >> PAGE_SHIFT;
2128
2129 /* try_charge() can return NULL to *memcg, taking care of it. */
2130 if (!mem)
2131 return;
2132
2133 lock_page_cgroup(pc);
2134 if (unlikely(PageCgroupUsed(pc))) {
2135 unlock_page_cgroup(pc);
2136 mem_cgroup_cancel_charge(mem, page_size);
2137 return;
2138 }
2139
2140 /*
2141 * we don't need page_cgroup_lock about tail pages, becase they are not
2142 * accessed by any other context at this point.
2143 */
2144 for (i = 0; i < count; i++)
2145 ____mem_cgroup_commit_charge(mem, pc + i, ctype);
2146
2147 unlock_page_cgroup(pc); 2124 unlock_page_cgroup(pc);
2148 /* 2125 /*
2149 * "charge_statistics" updated event counter. Then, check it. 2126 * "charge_statistics" updated event counter. Then, check it.
@@ -2153,6 +2130,46 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2153 memcg_check_events(mem, pc->page); 2130 memcg_check_events(mem, pc->page);
2154} 2131}
2155 2132
2133#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2134
2135#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
2136 (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
2137/*
2138 * Because tail pages are not marked as "used", set it. We're under
2139 * zone->lru_lock, 'splitting on pmd' and compund_lock.
2140 */
2141void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2142{
2143 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2144 struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
2145 unsigned long flags;
2146
2147 /*
2148 * We have no races with charge/uncharge but will have races with
2149 * page state accounting.
2150 */
2151 move_lock_page_cgroup(head_pc, &flags);
2152
2153 tail_pc->mem_cgroup = head_pc->mem_cgroup;
2154 smp_wmb(); /* see __commit_charge() */
2155 if (PageCgroupAcctLRU(head_pc)) {
2156 enum lru_list lru;
2157 struct mem_cgroup_per_zone *mz;
2158
2159 /*
2160 * LRU flags cannot be copied because we need to add tail
2161 *.page to LRU by generic call and our hook will be called.
2162 * We hold lru_lock, then, reduce counter directly.
2163 */
2164 lru = page_lru(head);
2165 mz = page_cgroup_zoneinfo(head_pc);
2166 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
2167 }
2168 tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2169 move_unlock_page_cgroup(head_pc, &flags);
2170}
2171#endif
2172
2156/** 2173/**
2157 * __mem_cgroup_move_account - move account of the page 2174 * __mem_cgroup_move_account - move account of the page
2158 * @pc: page_cgroup of the page. 2175 * @pc: page_cgroup of the page.
@@ -2171,8 +2188,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2171 */ 2188 */
2172 2189
2173static void __mem_cgroup_move_account(struct page_cgroup *pc, 2190static void __mem_cgroup_move_account(struct page_cgroup *pc,
2174 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2191 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
2192 int charge_size)
2175{ 2193{
2194 int nr_pages = charge_size >> PAGE_SHIFT;
2195
2176 VM_BUG_ON(from == to); 2196 VM_BUG_ON(from == to);
2177 VM_BUG_ON(PageLRU(pc->page)); 2197 VM_BUG_ON(PageLRU(pc->page));
2178 VM_BUG_ON(!page_is_cgroup_locked(pc)); 2198 VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2186,14 +2206,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2186 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); 2206 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
2187 preempt_enable(); 2207 preempt_enable();
2188 } 2208 }
2189 mem_cgroup_charge_statistics(from, pc, false); 2209 mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
2190 if (uncharge) 2210 if (uncharge)
2191 /* This is not "cancel", but cancel_charge does all we need. */ 2211 /* This is not "cancel", but cancel_charge does all we need. */
2192 mem_cgroup_cancel_charge(from, PAGE_SIZE); 2212 mem_cgroup_cancel_charge(from, charge_size);
2193 2213
2194 /* caller should have done css_get */ 2214 /* caller should have done css_get */
2195 pc->mem_cgroup = to; 2215 pc->mem_cgroup = to;
2196 mem_cgroup_charge_statistics(to, pc, true); 2216 mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
2197 /* 2217 /*
2198 * We charges against "to" which may not have any tasks. Then, "to" 2218 * We charges against "to" which may not have any tasks. Then, "to"
2199 * can be under rmdir(). But in current implementation, caller of 2219 * can be under rmdir(). But in current implementation, caller of
@@ -2208,15 +2228,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2208 * __mem_cgroup_move_account() 2228 * __mem_cgroup_move_account()
2209 */ 2229 */
2210static int mem_cgroup_move_account(struct page_cgroup *pc, 2230static int mem_cgroup_move_account(struct page_cgroup *pc,
2211 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2231 struct mem_cgroup *from, struct mem_cgroup *to,
2232 bool uncharge, int charge_size)
2212{ 2233{
2213 int ret = -EINVAL; 2234 int ret = -EINVAL;
2214 unsigned long flags; 2235 unsigned long flags;
2215 2236
2237 if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
2238 return -EBUSY;
2239
2216 lock_page_cgroup(pc); 2240 lock_page_cgroup(pc);
2217 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { 2241 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
2218 move_lock_page_cgroup(pc, &flags); 2242 move_lock_page_cgroup(pc, &flags);
2219 __mem_cgroup_move_account(pc, from, to, uncharge); 2243 __mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
2220 move_unlock_page_cgroup(pc, &flags); 2244 move_unlock_page_cgroup(pc, &flags);
2221 ret = 0; 2245 ret = 0;
2222 } 2246 }
@@ -2241,6 +2265,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2241 struct cgroup *cg = child->css.cgroup; 2265 struct cgroup *cg = child->css.cgroup;
2242 struct cgroup *pcg = cg->parent; 2266 struct cgroup *pcg = cg->parent;
2243 struct mem_cgroup *parent; 2267 struct mem_cgroup *parent;
2268 int charge = PAGE_SIZE;
2269 unsigned long flags;
2244 int ret; 2270 int ret;
2245 2271
2246 /* Is ROOT ? */ 2272 /* Is ROOT ? */
@@ -2252,17 +2278,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2252 goto out; 2278 goto out;
2253 if (isolate_lru_page(page)) 2279 if (isolate_lru_page(page))
2254 goto put; 2280 goto put;
2281 /* The page is isolated from LRU and we have no race with splitting */
2282 charge = PAGE_SIZE << compound_order(page);
2255 2283
2256 parent = mem_cgroup_from_cont(pcg); 2284 parent = mem_cgroup_from_cont(pcg);
2257 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, 2285 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
2258 PAGE_SIZE);
2259 if (ret || !parent) 2286 if (ret || !parent)
2260 goto put_back; 2287 goto put_back;
2261 2288
2262 ret = mem_cgroup_move_account(pc, child, parent, true); 2289 if (charge > PAGE_SIZE)
2290 flags = compound_lock_irqsave(page);
2291
2292 ret = mem_cgroup_move_account(pc, child, parent, true, charge);
2263 if (ret) 2293 if (ret)
2264 mem_cgroup_cancel_charge(parent, PAGE_SIZE); 2294 mem_cgroup_cancel_charge(parent, charge);
2265put_back: 2295put_back:
2296 if (charge > PAGE_SIZE)
2297 compound_unlock_irqrestore(page, flags);
2266 putback_lru_page(page); 2298 putback_lru_page(page);
2267put: 2299put:
2268 put_page(page); 2300 put_page(page);
@@ -2546,7 +2578,6 @@ direct_uncharge:
2546static struct mem_cgroup * 2578static struct mem_cgroup *
2547__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2579__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2548{ 2580{
2549 int i;
2550 int count; 2581 int count;
2551 struct page_cgroup *pc; 2582 struct page_cgroup *pc;
2552 struct mem_cgroup *mem = NULL; 2583 struct mem_cgroup *mem = NULL;
@@ -2596,8 +2627,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2596 break; 2627 break;
2597 } 2628 }
2598 2629
2599 for (i = 0; i < count; i++) 2630 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2600 mem_cgroup_charge_statistics(mem, pc + i, false);
2601 2631
2602 ClearPageCgroupUsed(pc); 2632 ClearPageCgroupUsed(pc);
2603 /* 2633 /*
@@ -4844,7 +4874,7 @@ retry:
4844 goto put; 4874 goto put;
4845 pc = lookup_page_cgroup(page); 4875 pc = lookup_page_cgroup(page);
4846 if (!mem_cgroup_move_account(pc, 4876 if (!mem_cgroup_move_account(pc,
4847 mc.from, mc.to, false)) { 4877 mc.from, mc.to, false, PAGE_SIZE)) {
4848 mc.precharge--; 4878 mc.precharge--;
4849 /* we uncharge from mc.from later. */ 4879 /* we uncharge from mc.from later. */
4850 mc.moved_charge++; 4880 mc.moved_charge++;