diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 98 |
1 files changed, 73 insertions, 25 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db76ef726293..da53a252b259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
612 | /* pagein of a big page is an event. So, ignore page size */ | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | 613 | if (nr_pages > 0) |
614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
615 | else | 615 | else { |
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
617 | nr_pages = -nr_pages; /* for event */ | ||
618 | } | ||
617 | 619 | ||
618 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | 620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); |
619 | 621 | ||
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
1111 | return false; | 1113 | return false; |
1112 | } | 1114 | } |
1113 | 1115 | ||
1116 | /** | ||
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | ||
1118 | * @mem: memory cgroup to check | ||
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | ||
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | ||
1122 | * whether this would exceed the limit. | ||
1123 | */ | ||
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | ||
1125 | { | ||
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | ||
1127 | return false; | ||
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | ||
1129 | return false; | ||
1130 | return true; | ||
1131 | } | ||
1132 | |||
1114 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1115 | { | 1134 | { |
1116 | struct cgroup *cgrp = memcg->css.cgroup; | 1135 | struct cgroup *cgrp = memcg->css.cgroup; |
@@ -1832,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1832 | if (likely(!ret)) | 1851 | if (likely(!ret)) |
1833 | return CHARGE_OK; | 1852 | return CHARGE_OK; |
1834 | 1853 | ||
1854 | res_counter_uncharge(&mem->res, csize); | ||
1835 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 1855 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
1836 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1856 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1837 | } else | 1857 | } else |
1838 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1839 | 1859 | /* | |
1840 | if (csize > PAGE_SIZE) /* change csize and retry */ | 1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | ||
1862 | * (PAGE_SIZE). | ||
1863 | * | ||
1864 | * Never reclaim on behalf of optional batching, retry with a | ||
1865 | * single page instead. | ||
1866 | */ | ||
1867 | if (csize == CHARGE_SIZE) | ||
1841 | return CHARGE_RETRY; | 1868 | return CHARGE_RETRY; |
1842 | 1869 | ||
1843 | if (!(gfp_mask & __GFP_WAIT)) | 1870 | if (!(gfp_mask & __GFP_WAIT)) |
1844 | return CHARGE_WOULDBLOCK; | 1871 | return CHARGE_WOULDBLOCK; |
1845 | 1872 | ||
1846 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1847 | gfp_mask, flags); | 1874 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | ||
1876 | return CHARGE_RETRY; | ||
1848 | /* | 1877 | /* |
1849 | * try_to_free_mem_cgroup_pages() might not give us a full | 1878 | * Even though the limit is exceeded at this point, reclaim |
1850 | * picture of reclaim. Some pages are reclaimed and might be | 1879 | * may have been able to free some pages. Retry the charge |
1851 | * moved to swap cache or just unmapped from the cgroup. | 1880 | * before killing the task. |
1852 | * Check the limit again to see if the reclaim reduced the | 1881 | * |
1853 | * current usage of the cgroup before giving up | 1882 | * Only for regular pages, though: huge pages are rather |
1883 | * unlikely to succeed so close to the limit, and we fall back | ||
1884 | * to regular pages anyway in case of failure. | ||
1854 | */ | 1885 | */ |
1855 | if (ret || mem_cgroup_check_under_limit(mem_over_limit)) | 1886 | if (csize == PAGE_SIZE && ret) |
1856 | return CHARGE_RETRY; | 1887 | return CHARGE_RETRY; |
1857 | 1888 | ||
1858 | /* | 1889 | /* |
@@ -2144,6 +2175,8 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2144 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | 2175 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); |
2145 | unsigned long flags; | 2176 | unsigned long flags; |
2146 | 2177 | ||
2178 | if (mem_cgroup_disabled()) | ||
2179 | return; | ||
2147 | /* | 2180 | /* |
2148 | * We have no races with charge/uncharge but will have races with | 2181 | * We have no races with charge/uncharge but will have races with |
2149 | * page state accounting. | 2182 | * page state accounting. |
@@ -2233,7 +2266,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
2233 | { | 2266 | { |
2234 | int ret = -EINVAL; | 2267 | int ret = -EINVAL; |
2235 | unsigned long flags; | 2268 | unsigned long flags; |
2236 | 2269 | /* | |
2270 | * The page is isolated from LRU. So, collapse function | ||
2271 | * will not handle this page. But page splitting can happen. | ||
2272 | * Do this check under compound_page_lock(). The caller should | ||
2273 | * hold it. | ||
2274 | */ | ||
2237 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | 2275 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) |
2238 | return -EBUSY; | 2276 | return -EBUSY; |
2239 | 2277 | ||
@@ -2265,7 +2303,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2265 | struct cgroup *cg = child->css.cgroup; | 2303 | struct cgroup *cg = child->css.cgroup; |
2266 | struct cgroup *pcg = cg->parent; | 2304 | struct cgroup *pcg = cg->parent; |
2267 | struct mem_cgroup *parent; | 2305 | struct mem_cgroup *parent; |
2268 | int charge = PAGE_SIZE; | 2306 | int page_size = PAGE_SIZE; |
2269 | unsigned long flags; | 2307 | unsigned long flags; |
2270 | int ret; | 2308 | int ret; |
2271 | 2309 | ||
@@ -2278,23 +2316,26 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2278 | goto out; | 2316 | goto out; |
2279 | if (isolate_lru_page(page)) | 2317 | if (isolate_lru_page(page)) |
2280 | goto put; | 2318 | goto put; |
2281 | /* The page is isolated from LRU and we have no race with splitting */ | 2319 | |
2282 | charge = PAGE_SIZE << compound_order(page); | 2320 | if (PageTransHuge(page)) |
2321 | page_size = HPAGE_SIZE; | ||
2283 | 2322 | ||
2284 | parent = mem_cgroup_from_cont(pcg); | 2323 | parent = mem_cgroup_from_cont(pcg); |
2285 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge); | 2324 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, |
2325 | &parent, false, page_size); | ||
2286 | if (ret || !parent) | 2326 | if (ret || !parent) |
2287 | goto put_back; | 2327 | goto put_back; |
2288 | 2328 | ||
2289 | if (charge > PAGE_SIZE) | 2329 | if (page_size > PAGE_SIZE) |
2290 | flags = compound_lock_irqsave(page); | 2330 | flags = compound_lock_irqsave(page); |
2291 | 2331 | ||
2292 | ret = mem_cgroup_move_account(pc, child, parent, true, charge); | 2332 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); |
2293 | if (ret) | 2333 | if (ret) |
2294 | mem_cgroup_cancel_charge(parent, charge); | 2334 | mem_cgroup_cancel_charge(parent, page_size); |
2295 | put_back: | 2335 | |
2296 | if (charge > PAGE_SIZE) | 2336 | if (page_size > PAGE_SIZE) |
2297 | compound_unlock_irqrestore(page, flags); | 2337 | compound_unlock_irqrestore(page, flags); |
2338 | put_back: | ||
2298 | putback_lru_page(page); | 2339 | putback_lru_page(page); |
2299 | put: | 2340 | put: |
2300 | put_page(page); | 2341 | put_page(page); |
@@ -2312,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2312 | gfp_t gfp_mask, enum charge_type ctype) | 2353 | gfp_t gfp_mask, enum charge_type ctype) |
2313 | { | 2354 | { |
2314 | struct mem_cgroup *mem = NULL; | 2355 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | ||
2315 | struct page_cgroup *pc; | 2357 | struct page_cgroup *pc; |
2358 | bool oom = true; | ||
2316 | int ret; | 2359 | int ret; |
2317 | int page_size = PAGE_SIZE; | ||
2318 | 2360 | ||
2319 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
2320 | page_size <<= compound_order(page); | 2362 | page_size <<= compound_order(page); |
2321 | VM_BUG_ON(!PageTransHuge(page)); | 2363 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | ||
2365 | * Never OOM-kill a process for a huge page. The | ||
2366 | * fault handler will fall back to regular pages. | ||
2367 | */ | ||
2368 | oom = false; | ||
2322 | } | 2369 | } |
2323 | 2370 | ||
2324 | pc = lookup_page_cgroup(page); | 2371 | pc = lookup_page_cgroup(page); |
@@ -2327,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2327 | return 0; | 2374 | return 0; |
2328 | prefetchw(pc); | 2375 | prefetchw(pc); |
2329 | 2376 | ||
2330 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | 2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); |
2331 | if (ret || !mem) | 2378 | if (ret || !mem) |
2332 | return ret; | 2379 | return ret; |
2333 | 2380 | ||
@@ -5013,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5013 | static int __init enable_swap_account(char *s) | 5060 | static int __init enable_swap_account(char *s) |
5014 | { | 5061 | { |
5015 | /* consider enabled if no parameter or 1 is given */ | 5062 | /* consider enabled if no parameter or 1 is given */ |
5016 | if (!s || !strcmp(s, "1")) | 5063 | if (!(*s) || !strcmp(s, "=1")) |
5017 | really_do_swap_account = 1; | 5064 | really_do_swap_account = 1; |
5018 | else if (!strcmp(s, "0")) | 5065 | else if (!strcmp(s, "=0")) |
5019 | really_do_swap_account = 0; | 5066 | really_do_swap_account = 0; |
5020 | return 1; | 5067 | return 1; |
5021 | } | 5068 | } |
@@ -5023,7 +5070,8 @@ __setup("swapaccount", enable_swap_account); | |||
5023 | 5070 | ||
5024 | static int __init disable_swap_account(char *s) | 5071 | static int __init disable_swap_account(char *s) |
5025 | { | 5072 | { |
5026 | enable_swap_account("0"); | 5073 | printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n"); |
5074 | enable_swap_account("=0"); | ||
5027 | return 1; | 5075 | return 1; |
5028 | } | 5076 | } |
5029 | __setup("noswapaccount", disable_swap_account); | 5077 | __setup("noswapaccount", disable_swap_account); |