aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c199
1 files changed, 120 insertions, 79 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ab841031436..3878cfe399dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
600} 600}
601 601
602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 602static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
603 struct page_cgroup *pc, 603 bool file, int nr_pages)
604 bool charge)
605{ 604{
606 int val = (charge) ? 1 : -1;
607
608 preempt_disable(); 605 preempt_disable();
609 606
610 if (PageCgroupCache(pc)) 607 if (file)
611 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); 608 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
612 else 609 else
613 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); 610 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
614 611
615 if (charge) 612 /* pagein of a big page is an event. So, ignore page size */
613 if (nr_pages > 0)
616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); 614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
617 else 615 else
618 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); 616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
619 __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); 617
618 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
620 619
621 preempt_enable(); 620 preempt_enable();
622} 621}
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
815 * removed from global LRU. 814 * removed from global LRU.
816 */ 815 */
817 mz = page_cgroup_zoneinfo(pc); 816 mz = page_cgroup_zoneinfo(pc);
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 817 /* huge page split is done under lru_lock. so, we have no races. */
818 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
819 if (mem_cgroup_is_root(pc->mem_cgroup)) 819 if (mem_cgroup_is_root(pc->mem_cgroup))
820 return; 820 return;
821 VM_BUG_ON(list_empty(&pc->lru)); 821 VM_BUG_ON(list_empty(&pc->lru));
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
836 return; 836 return;
837 837
838 pc = lookup_page_cgroup(page); 838 pc = lookup_page_cgroup(page);
839 /*
840 * Used bit is set without atomic ops but after smp_wmb().
841 * For making pc->mem_cgroup visible, insert smp_rmb() here.
842 */
843 smp_rmb();
844 /* unused or root page is not rotated. */ 839 /* unused or root page is not rotated. */
845 if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) 840 if (!PageCgroupUsed(pc))
841 return;
842 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
843 smp_rmb();
844 if (mem_cgroup_is_root(pc->mem_cgroup))
846 return; 845 return;
847 mz = page_cgroup_zoneinfo(pc); 846 mz = page_cgroup_zoneinfo(pc);
848 list_move(&pc->lru, &mz->lists[lru]); 847 list_move(&pc->lru, &mz->lists[lru]);
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
857 return; 856 return;
858 pc = lookup_page_cgroup(page); 857 pc = lookup_page_cgroup(page);
859 VM_BUG_ON(PageCgroupAcctLRU(pc)); 858 VM_BUG_ON(PageCgroupAcctLRU(pc));
860 /*
861 * Used bit is set without atomic ops but after smp_wmb().
862 * For making pc->mem_cgroup visible, insert smp_rmb() here.
863 */
864 smp_rmb();
865 if (!PageCgroupUsed(pc)) 859 if (!PageCgroupUsed(pc))
866 return; 860 return;
867 861 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
862 smp_rmb();
868 mz = page_cgroup_zoneinfo(pc); 863 mz = page_cgroup_zoneinfo(pc);
869 MEM_CGROUP_ZSTAT(mz, lru) += 1; 864 /* huge page split is done under lru_lock. so, we have no races. */
865 MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
870 SetPageCgroupAcctLRU(pc); 866 SetPageCgroupAcctLRU(pc);
871 if (mem_cgroup_is_root(pc->mem_cgroup)) 867 if (mem_cgroup_is_root(pc->mem_cgroup))
872 return; 868 return;
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1030 return NULL; 1026 return NULL;
1031 1027
1032 pc = lookup_page_cgroup(page); 1028 pc = lookup_page_cgroup(page);
1033 /*
1034 * Used bit is set without atomic ops but after smp_wmb().
1035 * For making pc->mem_cgroup visible, insert smp_rmb() here.
1036 */
1037 smp_rmb();
1038 if (!PageCgroupUsed(pc)) 1029 if (!PageCgroupUsed(pc))
1039 return NULL; 1030 return NULL;
1040 1031 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1032 smp_rmb();
1041 mz = page_cgroup_zoneinfo(pc); 1033 mz = page_cgroup_zoneinfo(pc);
1042 if (!mz) 1034 if (!mz)
1043 return NULL; 1035 return NULL;
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1615 if (unlikely(!mem || !PageCgroupUsed(pc))) 1607 if (unlikely(!mem || !PageCgroupUsed(pc)))
1616 goto out; 1608 goto out;
1617 /* pc->mem_cgroup is unstable ? */ 1609 /* pc->mem_cgroup is unstable ? */
1618 if (unlikely(mem_cgroup_stealed(mem))) { 1610 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
1619 /* take a lock against to access pc->mem_cgroup */ 1611 /* take a lock against to access pc->mem_cgroup */
1620 move_lock_page_cgroup(pc, &flags); 1612 move_lock_page_cgroup(pc, &flags);
1621 need_unlock = true; 1613 need_unlock = true;
@@ -1840,6 +1832,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1840 if (likely(!ret)) 1832 if (likely(!ret))
1841 return CHARGE_OK; 1833 return CHARGE_OK;
1842 1834
1835 res_counter_uncharge(&mem->res, csize);
1843 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 1836 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
1844 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1837 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1845 } else 1838 } else
@@ -2084,14 +2077,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2084 return mem; 2077 return mem;
2085} 2078}
2086 2079
2087/* 2080static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2088 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be 2081 struct page_cgroup *pc,
2089 * USED state. If already USED, uncharge and return. 2082 enum charge_type ctype,
2090 */ 2083 int page_size)
2091static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2092 struct page_cgroup *pc,
2093 enum charge_type ctype)
2094{ 2084{
2085 int nr_pages = page_size >> PAGE_SHIFT;
2086
2087 /* try_charge() can return NULL to *memcg, taking care of it. */
2088 if (!mem)
2089 return;
2090
2091 lock_page_cgroup(pc);
2092 if (unlikely(PageCgroupUsed(pc))) {
2093 unlock_page_cgroup(pc);
2094 mem_cgroup_cancel_charge(mem, page_size);
2095 return;
2096 }
2097 /*
2098 * we don't need page_cgroup_lock about tail pages, becase they are not
2099 * accessed by any other context at this point.
2100 */
2095 pc->mem_cgroup = mem; 2101 pc->mem_cgroup = mem;
2096 /* 2102 /*
2097 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2103 * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2115,43 +2121,57 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2115 break; 2121 break;
2116 } 2122 }
2117 2123
2118 mem_cgroup_charge_statistics(mem, pc, true); 2124 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2125 unlock_page_cgroup(pc);
2126 /*
2127 * "charge_statistics" updated event counter. Then, check it.
2128 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2129 * if they exceeds softlimit.
2130 */
2131 memcg_check_events(mem, pc->page);
2119} 2132}
2120 2133
2121static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, 2134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2122 struct page_cgroup *pc,
2123 enum charge_type ctype,
2124 int page_size)
2125{
2126 int i;
2127 int count = page_size >> PAGE_SHIFT;
2128 2135
2129 /* try_charge() can return NULL to *memcg, taking care of it. */ 2136#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
2130 if (!mem) 2137 (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
2131 return; 2138/*
2139 * Because tail pages are not marked as "used", set it. We're under
2140 * zone->lru_lock, 'splitting on pmd' and compund_lock.
2141 */
2142void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2143{
2144 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2145 struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
2146 unsigned long flags;
2132 2147
2133 lock_page_cgroup(pc); 2148 if (mem_cgroup_disabled())
2134 if (unlikely(PageCgroupUsed(pc))) {
2135 unlock_page_cgroup(pc);
2136 mem_cgroup_cancel_charge(mem, page_size);
2137 return; 2149 return;
2138 }
2139
2140 /* 2150 /*
2141 * we don't need page_cgroup_lock about tail pages, becase they are not 2151 * We have no races with charge/uncharge but will have races with
2142 * accessed by any other context at this point. 2152 * page state accounting.
2143 */ 2153 */
2144 for (i = 0; i < count; i++) 2154 move_lock_page_cgroup(head_pc, &flags);
2145 ____mem_cgroup_commit_charge(mem, pc + i, ctype);
2146 2155
2147 unlock_page_cgroup(pc); 2156 tail_pc->mem_cgroup = head_pc->mem_cgroup;
2148 /* 2157 smp_wmb(); /* see __commit_charge() */
2149 * "charge_statistics" updated event counter. Then, check it. 2158 if (PageCgroupAcctLRU(head_pc)) {
2150 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2159 enum lru_list lru;
2151 * if they exceeds softlimit. 2160 struct mem_cgroup_per_zone *mz;
2152 */ 2161
2153 memcg_check_events(mem, pc->page); 2162 /*
2163 * LRU flags cannot be copied because we need to add tail
2164 *.page to LRU by generic call and our hook will be called.
2165 * We hold lru_lock, then, reduce counter directly.
2166 */
2167 lru = page_lru(head);
2168 mz = page_cgroup_zoneinfo(head_pc);
2169 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
2170 }
2171 tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2172 move_unlock_page_cgroup(head_pc, &flags);
2154} 2173}
2174#endif
2155 2175
2156/** 2176/**
2157 * __mem_cgroup_move_account - move account of the page 2177 * __mem_cgroup_move_account - move account of the page
@@ -2171,8 +2191,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2171 */ 2191 */
2172 2192
2173static void __mem_cgroup_move_account(struct page_cgroup *pc, 2193static void __mem_cgroup_move_account(struct page_cgroup *pc,
2174 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2194 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
2195 int charge_size)
2175{ 2196{
2197 int nr_pages = charge_size >> PAGE_SHIFT;
2198
2176 VM_BUG_ON(from == to); 2199 VM_BUG_ON(from == to);
2177 VM_BUG_ON(PageLRU(pc->page)); 2200 VM_BUG_ON(PageLRU(pc->page));
2178 VM_BUG_ON(!page_is_cgroup_locked(pc)); 2201 VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2186,14 +2209,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2186 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); 2209 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
2187 preempt_enable(); 2210 preempt_enable();
2188 } 2211 }
2189 mem_cgroup_charge_statistics(from, pc, false); 2212 mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
2190 if (uncharge) 2213 if (uncharge)
2191 /* This is not "cancel", but cancel_charge does all we need. */ 2214 /* This is not "cancel", but cancel_charge does all we need. */
2192 mem_cgroup_cancel_charge(from, PAGE_SIZE); 2215 mem_cgroup_cancel_charge(from, charge_size);
2193 2216
2194 /* caller should have done css_get */ 2217 /* caller should have done css_get */
2195 pc->mem_cgroup = to; 2218 pc->mem_cgroup = to;
2196 mem_cgroup_charge_statistics(to, pc, true); 2219 mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
2197 /* 2220 /*
2198 * We charges against "to" which may not have any tasks. Then, "to" 2221 * We charges against "to" which may not have any tasks. Then, "to"
2199 * can be under rmdir(). But in current implementation, caller of 2222 * can be under rmdir(). But in current implementation, caller of
@@ -2208,15 +2231,24 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2208 * __mem_cgroup_move_account() 2231 * __mem_cgroup_move_account()
2209 */ 2232 */
2210static int mem_cgroup_move_account(struct page_cgroup *pc, 2233static int mem_cgroup_move_account(struct page_cgroup *pc,
2211 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2234 struct mem_cgroup *from, struct mem_cgroup *to,
2235 bool uncharge, int charge_size)
2212{ 2236{
2213 int ret = -EINVAL; 2237 int ret = -EINVAL;
2214 unsigned long flags; 2238 unsigned long flags;
2239 /*
2240 * The page is isolated from LRU. So, collapse function
2241 * will not handle this page. But page splitting can happen.
2242 * Do this check under compound_page_lock(). The caller should
2243 * hold it.
2244 */
2245 if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
2246 return -EBUSY;
2215 2247
2216 lock_page_cgroup(pc); 2248 lock_page_cgroup(pc);
2217 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { 2249 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
2218 move_lock_page_cgroup(pc, &flags); 2250 move_lock_page_cgroup(pc, &flags);
2219 __mem_cgroup_move_account(pc, from, to, uncharge); 2251 __mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
2220 move_unlock_page_cgroup(pc, &flags); 2252 move_unlock_page_cgroup(pc, &flags);
2221 ret = 0; 2253 ret = 0;
2222 } 2254 }
@@ -2241,6 +2273,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2241 struct cgroup *cg = child->css.cgroup; 2273 struct cgroup *cg = child->css.cgroup;
2242 struct cgroup *pcg = cg->parent; 2274 struct cgroup *pcg = cg->parent;
2243 struct mem_cgroup *parent; 2275 struct mem_cgroup *parent;
2276 int page_size = PAGE_SIZE;
2277 unsigned long flags;
2244 int ret; 2278 int ret;
2245 2279
2246 /* Is ROOT ? */ 2280 /* Is ROOT ? */
@@ -2253,15 +2287,24 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2253 if (isolate_lru_page(page)) 2287 if (isolate_lru_page(page))
2254 goto put; 2288 goto put;
2255 2289
2290 if (PageTransHuge(page))
2291 page_size = HPAGE_SIZE;
2292
2256 parent = mem_cgroup_from_cont(pcg); 2293 parent = mem_cgroup_from_cont(pcg);
2257 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, 2294 ret = __mem_cgroup_try_charge(NULL, gfp_mask,
2258 PAGE_SIZE); 2295 &parent, false, page_size);
2259 if (ret || !parent) 2296 if (ret || !parent)
2260 goto put_back; 2297 goto put_back;
2261 2298
2262 ret = mem_cgroup_move_account(pc, child, parent, true); 2299 if (page_size > PAGE_SIZE)
2300 flags = compound_lock_irqsave(page);
2301
2302 ret = mem_cgroup_move_account(pc, child, parent, true, page_size);
2263 if (ret) 2303 if (ret)
2264 mem_cgroup_cancel_charge(parent, PAGE_SIZE); 2304 mem_cgroup_cancel_charge(parent, page_size);
2305
2306 if (page_size > PAGE_SIZE)
2307 compound_unlock_irqrestore(page, flags);
2265put_back: 2308put_back:
2266 putback_lru_page(page); 2309 putback_lru_page(page);
2267put: 2310put:
@@ -2546,7 +2589,6 @@ direct_uncharge:
2546static struct mem_cgroup * 2589static struct mem_cgroup *
2547__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2590__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2548{ 2591{
2549 int i;
2550 int count; 2592 int count;
2551 struct page_cgroup *pc; 2593 struct page_cgroup *pc;
2552 struct mem_cgroup *mem = NULL; 2594 struct mem_cgroup *mem = NULL;
@@ -2596,8 +2638,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2596 break; 2638 break;
2597 } 2639 }
2598 2640
2599 for (i = 0; i < count; i++) 2641 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2600 mem_cgroup_charge_statistics(mem, pc + i, false);
2601 2642
2602 ClearPageCgroupUsed(pc); 2643 ClearPageCgroupUsed(pc);
2603 /* 2644 /*
@@ -4844,7 +4885,7 @@ retry:
4844 goto put; 4885 goto put;
4845 pc = lookup_page_cgroup(page); 4886 pc = lookup_page_cgroup(page);
4846 if (!mem_cgroup_move_account(pc, 4887 if (!mem_cgroup_move_account(pc,
4847 mc.from, mc.to, false)) { 4888 mc.from, mc.to, false, PAGE_SIZE)) {
4848 mc.precharge--; 4889 mc.precharge--;
4849 /* we uncharge from mc.from later. */ 4890 /* we uncharge from mc.from later. */
4850 mc.moved_charge++; 4891 mc.moved_charge++;