aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c98
1 files changed, 73 insertions, 25 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db76ef726293..da53a252b259 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
612 /* pagein of a big page is an event. So, ignore page size */ 612 /* pagein of a big page is an event. So, ignore page size */
613 if (nr_pages > 0) 613 if (nr_pages > 0)
614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); 614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
615 else 615 else {
616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); 616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
617 nr_pages = -nr_pages; /* for event */
618 }
617 619
618 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); 620 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
619 621
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
1111 return false; 1113 return false;
1112} 1114}
1113 1115
1116/**
1117 * mem_cgroup_check_margin - check if the memory cgroup allows charging
1118 * @mem: memory cgroup to check
1119 * @bytes: the number of bytes the caller intends to charge
1120 *
1121 * Returns a boolean value on whether @mem can be charged @bytes or
1122 * whether this would exceed the limit.
1123 */
1124static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes)
1125{
1126 if (!res_counter_check_margin(&mem->res, bytes))
1127 return false;
1128 if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes))
1129 return false;
1130 return true;
1131}
1132
1114static unsigned int get_swappiness(struct mem_cgroup *memcg) 1133static unsigned int get_swappiness(struct mem_cgroup *memcg)
1115{ 1134{
1116 struct cgroup *cgrp = memcg->css.cgroup; 1135 struct cgroup *cgrp = memcg->css.cgroup;
@@ -1832,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1832 if (likely(!ret)) 1851 if (likely(!ret))
1833 return CHARGE_OK; 1852 return CHARGE_OK;
1834 1853
1854 res_counter_uncharge(&mem->res, csize);
1835 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 1855 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
1836 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1856 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1837 } else 1857 } else
1838 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); 1858 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
1839 1859 /*
1840 if (csize > PAGE_SIZE) /* change csize and retry */ 1860 * csize can be either a huge page (HPAGE_SIZE), a batch of
1861 * regular pages (CHARGE_SIZE), or a single regular page
1862 * (PAGE_SIZE).
1863 *
1864 * Never reclaim on behalf of optional batching, retry with a
1865 * single page instead.
1866 */
1867 if (csize == CHARGE_SIZE)
1841 return CHARGE_RETRY; 1868 return CHARGE_RETRY;
1842 1869
1843 if (!(gfp_mask & __GFP_WAIT)) 1870 if (!(gfp_mask & __GFP_WAIT))
1844 return CHARGE_WOULDBLOCK; 1871 return CHARGE_WOULDBLOCK;
1845 1872
1846 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, 1873 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1847 gfp_mask, flags); 1874 gfp_mask, flags);
1875 if (mem_cgroup_check_margin(mem_over_limit, csize))
1876 return CHARGE_RETRY;
1848 /* 1877 /*
1849 * try_to_free_mem_cgroup_pages() might not give us a full 1878 * Even though the limit is exceeded at this point, reclaim
1850 * picture of reclaim. Some pages are reclaimed and might be 1879 * may have been able to free some pages. Retry the charge
1851 * moved to swap cache or just unmapped from the cgroup. 1880 * before killing the task.
1852 * Check the limit again to see if the reclaim reduced the 1881 *
1853 * current usage of the cgroup before giving up 1882 * Only for regular pages, though: huge pages are rather
1883 * unlikely to succeed so close to the limit, and we fall back
1884 * to regular pages anyway in case of failure.
1854 */ 1885 */
1855 if (ret || mem_cgroup_check_under_limit(mem_over_limit)) 1886 if (csize == PAGE_SIZE && ret)
1856 return CHARGE_RETRY; 1887 return CHARGE_RETRY;
1857 1888
1858 /* 1889 /*
@@ -2144,6 +2175,8 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2144 struct page_cgroup *tail_pc = lookup_page_cgroup(tail); 2175 struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
2145 unsigned long flags; 2176 unsigned long flags;
2146 2177
2178 if (mem_cgroup_disabled())
2179 return;
2147 /* 2180 /*
2148 * We have no races with charge/uncharge but will have races with 2181 * We have no races with charge/uncharge but will have races with
2149 * page state accounting. 2182 * page state accounting.
@@ -2233,7 +2266,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
2233{ 2266{
2234 int ret = -EINVAL; 2267 int ret = -EINVAL;
2235 unsigned long flags; 2268 unsigned long flags;
2236 2269 /*
2270 * The page is isolated from LRU. So, collapse function
2271 * will not handle this page. But page splitting can happen.
2272 * Do this check under compound_page_lock(). The caller should
2273 * hold it.
2274 */
2237 if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) 2275 if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
2238 return -EBUSY; 2276 return -EBUSY;
2239 2277
@@ -2265,7 +2303,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2265 struct cgroup *cg = child->css.cgroup; 2303 struct cgroup *cg = child->css.cgroup;
2266 struct cgroup *pcg = cg->parent; 2304 struct cgroup *pcg = cg->parent;
2267 struct mem_cgroup *parent; 2305 struct mem_cgroup *parent;
2268 int charge = PAGE_SIZE; 2306 int page_size = PAGE_SIZE;
2269 unsigned long flags; 2307 unsigned long flags;
2270 int ret; 2308 int ret;
2271 2309
@@ -2278,23 +2316,26 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2278 goto out; 2316 goto out;
2279 if (isolate_lru_page(page)) 2317 if (isolate_lru_page(page))
2280 goto put; 2318 goto put;
2281 /* The page is isolated from LRU and we have no race with splitting */ 2319
2282 charge = PAGE_SIZE << compound_order(page); 2320 if (PageTransHuge(page))
2321 page_size = HPAGE_SIZE;
2283 2322
2284 parent = mem_cgroup_from_cont(pcg); 2323 parent = mem_cgroup_from_cont(pcg);
2285 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge); 2324 ret = __mem_cgroup_try_charge(NULL, gfp_mask,
2325 &parent, false, page_size);
2286 if (ret || !parent) 2326 if (ret || !parent)
2287 goto put_back; 2327 goto put_back;
2288 2328
2289 if (charge > PAGE_SIZE) 2329 if (page_size > PAGE_SIZE)
2290 flags = compound_lock_irqsave(page); 2330 flags = compound_lock_irqsave(page);
2291 2331
2292 ret = mem_cgroup_move_account(pc, child, parent, true, charge); 2332 ret = mem_cgroup_move_account(pc, child, parent, true, page_size);
2293 if (ret) 2333 if (ret)
2294 mem_cgroup_cancel_charge(parent, charge); 2334 mem_cgroup_cancel_charge(parent, page_size);
2295put_back: 2335
2296 if (charge > PAGE_SIZE) 2336 if (page_size > PAGE_SIZE)
2297 compound_unlock_irqrestore(page, flags); 2337 compound_unlock_irqrestore(page, flags);
2338put_back:
2298 putback_lru_page(page); 2339 putback_lru_page(page);
2299put: 2340put:
2300 put_page(page); 2341 put_page(page);
@@ -2312,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2312 gfp_t gfp_mask, enum charge_type ctype) 2353 gfp_t gfp_mask, enum charge_type ctype)
2313{ 2354{
2314 struct mem_cgroup *mem = NULL; 2355 struct mem_cgroup *mem = NULL;
2356 int page_size = PAGE_SIZE;
2315 struct page_cgroup *pc; 2357 struct page_cgroup *pc;
2358 bool oom = true;
2316 int ret; 2359 int ret;
2317 int page_size = PAGE_SIZE;
2318 2360
2319 if (PageTransHuge(page)) { 2361 if (PageTransHuge(page)) {
2320 page_size <<= compound_order(page); 2362 page_size <<= compound_order(page);
2321 VM_BUG_ON(!PageTransHuge(page)); 2363 VM_BUG_ON(!PageTransHuge(page));
2364 /*
2365 * Never OOM-kill a process for a huge page. The
2366 * fault handler will fall back to regular pages.
2367 */
2368 oom = false;
2322 } 2369 }
2323 2370
2324 pc = lookup_page_cgroup(page); 2371 pc = lookup_page_cgroup(page);
@@ -2327,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2327 return 0; 2374 return 0;
2328 prefetchw(pc); 2375 prefetchw(pc);
2329 2376
2330 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); 2377 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size);
2331 if (ret || !mem) 2378 if (ret || !mem)
2332 return ret; 2379 return ret;
2333 2380
@@ -5013,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = {
5013static int __init enable_swap_account(char *s) 5060static int __init enable_swap_account(char *s)
5014{ 5061{
5015 /* consider enabled if no parameter or 1 is given */ 5062 /* consider enabled if no parameter or 1 is given */
5016 if (!s || !strcmp(s, "1")) 5063 if (!(*s) || !strcmp(s, "=1"))
5017 really_do_swap_account = 1; 5064 really_do_swap_account = 1;
5018 else if (!strcmp(s, "0")) 5065 else if (!strcmp(s, "=0"))
5019 really_do_swap_account = 0; 5066 really_do_swap_account = 0;
5020 return 1; 5067 return 1;
5021} 5068}
@@ -5023,7 +5070,8 @@ __setup("swapaccount", enable_swap_account);
5023 5070
5024static int __init disable_swap_account(char *s) 5071static int __init disable_swap_account(char *s)
5025{ 5072{
5026 enable_swap_account("0"); 5073 printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");
5074 enable_swap_account("=0");
5027 return 1; 5075 return 1;
5028} 5076}
5029__setup("noswapaccount", disable_swap_account); 5077__setup("noswapaccount", disable_swap_account);