diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:46:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:43 -0500 |
commit | ec1685109f1314a30919489ef2800ed626a38c1e (patch) | |
tree | 48ac32238182623af24b6c25be456c0ca432047b /mm/memcontrol.c | |
parent | 500d65d471018d9a13b0d51b7e141ed2a3555c1d (diff) |
thp: memcg compound
Teach memcg to charge/uncharge compound pages.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 83 |
1 files changed, 53 insertions, 30 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00bb8a64d028..356d4964fe95 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1027,6 +1027,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
1027 | { | 1027 | { |
1028 | struct page_cgroup *pc; | 1028 | struct page_cgroup *pc; |
1029 | struct mem_cgroup_per_zone *mz; | 1029 | struct mem_cgroup_per_zone *mz; |
1030 | int page_size = PAGE_SIZE; | ||
1031 | |||
1032 | if (PageTransHuge(page)) | ||
1033 | page_size <<= compound_order(page); | ||
1030 | 1034 | ||
1031 | if (mem_cgroup_disabled()) | 1035 | if (mem_cgroup_disabled()) |
1032 | return NULL; | 1036 | return NULL; |
@@ -1887,12 +1891,14 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1887 | * oom-killer can be invoked. | 1891 | * oom-killer can be invoked. |
1888 | */ | 1892 | */ |
1889 | static int __mem_cgroup_try_charge(struct mm_struct *mm, | 1893 | static int __mem_cgroup_try_charge(struct mm_struct *mm, |
1890 | gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) | 1894 | gfp_t gfp_mask, |
1895 | struct mem_cgroup **memcg, bool oom, | ||
1896 | int page_size) | ||
1891 | { | 1897 | { |
1892 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1898 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1893 | struct mem_cgroup *mem = NULL; | 1899 | struct mem_cgroup *mem = NULL; |
1894 | int ret; | 1900 | int ret; |
1895 | int csize = CHARGE_SIZE; | 1901 | int csize = max(CHARGE_SIZE, (unsigned long) page_size); |
1896 | 1902 | ||
1897 | /* | 1903 | /* |
1898 | * Unlike gloval-vm's OOM-kill, we're not in memory shortage | 1904 | * Unlike gloval-vm's OOM-kill, we're not in memory shortage |
@@ -1917,7 +1923,7 @@ again: | |||
1917 | VM_BUG_ON(css_is_removed(&mem->css)); | 1923 | VM_BUG_ON(css_is_removed(&mem->css)); |
1918 | if (mem_cgroup_is_root(mem)) | 1924 | if (mem_cgroup_is_root(mem)) |
1919 | goto done; | 1925 | goto done; |
1920 | if (consume_stock(mem)) | 1926 | if (page_size == PAGE_SIZE && consume_stock(mem)) |
1921 | goto done; | 1927 | goto done; |
1922 | css_get(&mem->css); | 1928 | css_get(&mem->css); |
1923 | } else { | 1929 | } else { |
@@ -1940,7 +1946,7 @@ again: | |||
1940 | rcu_read_unlock(); | 1946 | rcu_read_unlock(); |
1941 | goto done; | 1947 | goto done; |
1942 | } | 1948 | } |
1943 | if (consume_stock(mem)) { | 1949 | if (page_size == PAGE_SIZE && consume_stock(mem)) { |
1944 | /* | 1950 | /* |
1945 | * It seems dagerous to access memcg without css_get(). | 1951 | * It seems dagerous to access memcg without css_get(). |
1946 | * But considering how consume_stok works, it's not | 1952 | * But considering how consume_stok works, it's not |
@@ -1981,7 +1987,7 @@ again: | |||
1981 | case CHARGE_OK: | 1987 | case CHARGE_OK: |
1982 | break; | 1988 | break; |
1983 | case CHARGE_RETRY: /* not in OOM situation but retry */ | 1989 | case CHARGE_RETRY: /* not in OOM situation but retry */ |
1984 | csize = PAGE_SIZE; | 1990 | csize = page_size; |
1985 | css_put(&mem->css); | 1991 | css_put(&mem->css); |
1986 | mem = NULL; | 1992 | mem = NULL; |
1987 | goto again; | 1993 | goto again; |
@@ -2002,8 +2008,8 @@ again: | |||
2002 | } | 2008 | } |
2003 | } while (ret != CHARGE_OK); | 2009 | } while (ret != CHARGE_OK); |
2004 | 2010 | ||
2005 | if (csize > PAGE_SIZE) | 2011 | if (csize > page_size) |
2006 | refill_stock(mem, csize - PAGE_SIZE); | 2012 | refill_stock(mem, csize - page_size); |
2007 | css_put(&mem->css); | 2013 | css_put(&mem->css); |
2008 | done: | 2014 | done: |
2009 | *memcg = mem; | 2015 | *memcg = mem; |
@@ -2031,9 +2037,10 @@ static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, | |||
2031 | } | 2037 | } |
2032 | } | 2038 | } |
2033 | 2039 | ||
2034 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | 2040 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem, |
2041 | int page_size) | ||
2035 | { | 2042 | { |
2036 | __mem_cgroup_cancel_charge(mem, 1); | 2043 | __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT); |
2037 | } | 2044 | } |
2038 | 2045 | ||
2039 | /* | 2046 | /* |
@@ -2089,8 +2096,9 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2089 | */ | 2096 | */ |
2090 | 2097 | ||
2091 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2098 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2092 | struct page_cgroup *pc, | 2099 | struct page_cgroup *pc, |
2093 | enum charge_type ctype) | 2100 | enum charge_type ctype, |
2101 | int page_size) | ||
2094 | { | 2102 | { |
2095 | /* try_charge() can return NULL to *memcg, taking care of it. */ | 2103 | /* try_charge() can return NULL to *memcg, taking care of it. */ |
2096 | if (!mem) | 2104 | if (!mem) |
@@ -2099,7 +2107,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2099 | lock_page_cgroup(pc); | 2107 | lock_page_cgroup(pc); |
2100 | if (unlikely(PageCgroupUsed(pc))) { | 2108 | if (unlikely(PageCgroupUsed(pc))) { |
2101 | unlock_page_cgroup(pc); | 2109 | unlock_page_cgroup(pc); |
2102 | mem_cgroup_cancel_charge(mem); | 2110 | mem_cgroup_cancel_charge(mem, page_size); |
2103 | return; | 2111 | return; |
2104 | } | 2112 | } |
2105 | 2113 | ||
@@ -2173,7 +2181,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2173 | mem_cgroup_charge_statistics(from, pc, false); | 2181 | mem_cgroup_charge_statistics(from, pc, false); |
2174 | if (uncharge) | 2182 | if (uncharge) |
2175 | /* This is not "cancel", but cancel_charge does all we need. */ | 2183 | /* This is not "cancel", but cancel_charge does all we need. */ |
2176 | mem_cgroup_cancel_charge(from); | 2184 | mem_cgroup_cancel_charge(from, PAGE_SIZE); |
2177 | 2185 | ||
2178 | /* caller should have done css_get */ | 2186 | /* caller should have done css_get */ |
2179 | pc->mem_cgroup = to; | 2187 | pc->mem_cgroup = to; |
@@ -2234,13 +2242,14 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2234 | goto put; | 2242 | goto put; |
2235 | 2243 | ||
2236 | parent = mem_cgroup_from_cont(pcg); | 2244 | parent = mem_cgroup_from_cont(pcg); |
2237 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); | 2245 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, |
2246 | PAGE_SIZE); | ||
2238 | if (ret || !parent) | 2247 | if (ret || !parent) |
2239 | goto put_back; | 2248 | goto put_back; |
2240 | 2249 | ||
2241 | ret = mem_cgroup_move_account(pc, child, parent, true); | 2250 | ret = mem_cgroup_move_account(pc, child, parent, true); |
2242 | if (ret) | 2251 | if (ret) |
2243 | mem_cgroup_cancel_charge(parent); | 2252 | mem_cgroup_cancel_charge(parent, PAGE_SIZE); |
2244 | put_back: | 2253 | put_back: |
2245 | putback_lru_page(page); | 2254 | putback_lru_page(page); |
2246 | put: | 2255 | put: |
@@ -2261,6 +2270,10 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2261 | struct mem_cgroup *mem = NULL; | 2270 | struct mem_cgroup *mem = NULL; |
2262 | struct page_cgroup *pc; | 2271 | struct page_cgroup *pc; |
2263 | int ret; | 2272 | int ret; |
2273 | int page_size = PAGE_SIZE; | ||
2274 | |||
2275 | if (PageTransHuge(page)) | ||
2276 | page_size <<= compound_order(page); | ||
2264 | 2277 | ||
2265 | pc = lookup_page_cgroup(page); | 2278 | pc = lookup_page_cgroup(page); |
2266 | /* can happen at boot */ | 2279 | /* can happen at boot */ |
@@ -2268,11 +2281,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2268 | return 0; | 2281 | return 0; |
2269 | prefetchw(pc); | 2282 | prefetchw(pc); |
2270 | 2283 | ||
2271 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); | 2284 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); |
2272 | if (ret || !mem) | 2285 | if (ret || !mem) |
2273 | return ret; | 2286 | return ret; |
2274 | 2287 | ||
2275 | __mem_cgroup_commit_charge(mem, pc, ctype); | 2288 | __mem_cgroup_commit_charge(mem, pc, ctype, page_size); |
2276 | return 0; | 2289 | return 0; |
2277 | } | 2290 | } |
2278 | 2291 | ||
@@ -2281,8 +2294,6 @@ int mem_cgroup_newpage_charge(struct page *page, | |||
2281 | { | 2294 | { |
2282 | if (mem_cgroup_disabled()) | 2295 | if (mem_cgroup_disabled()) |
2283 | return 0; | 2296 | return 0; |
2284 | if (PageCompound(page)) | ||
2285 | return 0; | ||
2286 | /* | 2297 | /* |
2287 | * If already mapped, we don't have to account. | 2298 | * If already mapped, we don't have to account. |
2288 | * If page cache, page->mapping has address_space. | 2299 | * If page cache, page->mapping has address_space. |
@@ -2388,13 +2399,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
2388 | if (!mem) | 2399 | if (!mem) |
2389 | goto charge_cur_mm; | 2400 | goto charge_cur_mm; |
2390 | *ptr = mem; | 2401 | *ptr = mem; |
2391 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); | 2402 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE); |
2392 | css_put(&mem->css); | 2403 | css_put(&mem->css); |
2393 | return ret; | 2404 | return ret; |
2394 | charge_cur_mm: | 2405 | charge_cur_mm: |
2395 | if (unlikely(!mm)) | 2406 | if (unlikely(!mm)) |
2396 | mm = &init_mm; | 2407 | mm = &init_mm; |
2397 | return __mem_cgroup_try_charge(mm, mask, ptr, true); | 2408 | return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE); |
2398 | } | 2409 | } |
2399 | 2410 | ||
2400 | static void | 2411 | static void |
@@ -2410,7 +2421,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
2410 | cgroup_exclude_rmdir(&ptr->css); | 2421 | cgroup_exclude_rmdir(&ptr->css); |
2411 | pc = lookup_page_cgroup(page); | 2422 | pc = lookup_page_cgroup(page); |
2412 | mem_cgroup_lru_del_before_commit_swapcache(page); | 2423 | mem_cgroup_lru_del_before_commit_swapcache(page); |
2413 | __mem_cgroup_commit_charge(ptr, pc, ctype); | 2424 | __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE); |
2414 | mem_cgroup_lru_add_after_commit_swapcache(page); | 2425 | mem_cgroup_lru_add_after_commit_swapcache(page); |
2415 | /* | 2426 | /* |
2416 | * Now swap is on-memory. This means this page may be | 2427 | * Now swap is on-memory. This means this page may be |
@@ -2459,11 +2470,12 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
2459 | return; | 2470 | return; |
2460 | if (!mem) | 2471 | if (!mem) |
2461 | return; | 2472 | return; |
2462 | mem_cgroup_cancel_charge(mem); | 2473 | mem_cgroup_cancel_charge(mem, PAGE_SIZE); |
2463 | } | 2474 | } |
2464 | 2475 | ||
2465 | static void | 2476 | static void |
2466 | __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) | 2477 | __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, |
2478 | int page_size) | ||
2467 | { | 2479 | { |
2468 | struct memcg_batch_info *batch = NULL; | 2480 | struct memcg_batch_info *batch = NULL; |
2469 | bool uncharge_memsw = true; | 2481 | bool uncharge_memsw = true; |
@@ -2490,6 +2502,9 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) | |||
2490 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) | 2502 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) |
2491 | goto direct_uncharge; | 2503 | goto direct_uncharge; |
2492 | 2504 | ||
2505 | if (page_size != PAGE_SIZE) | ||
2506 | goto direct_uncharge; | ||
2507 | |||
2493 | /* | 2508 | /* |
2494 | * In typical case, batch->memcg == mem. This means we can | 2509 | * In typical case, batch->memcg == mem. This means we can |
2495 | * merge a series of uncharges to an uncharge of res_counter. | 2510 | * merge a series of uncharges to an uncharge of res_counter. |
@@ -2503,9 +2518,9 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) | |||
2503 | batch->memsw_bytes += PAGE_SIZE; | 2518 | batch->memsw_bytes += PAGE_SIZE; |
2504 | return; | 2519 | return; |
2505 | direct_uncharge: | 2520 | direct_uncharge: |
2506 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 2521 | res_counter_uncharge(&mem->res, page_size); |
2507 | if (uncharge_memsw) | 2522 | if (uncharge_memsw) |
2508 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 2523 | res_counter_uncharge(&mem->memsw, page_size); |
2509 | if (unlikely(batch->memcg != mem)) | 2524 | if (unlikely(batch->memcg != mem)) |
2510 | memcg_oom_recover(mem); | 2525 | memcg_oom_recover(mem); |
2511 | return; | 2526 | return; |
@@ -2519,6 +2534,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2519 | { | 2534 | { |
2520 | struct page_cgroup *pc; | 2535 | struct page_cgroup *pc; |
2521 | struct mem_cgroup *mem = NULL; | 2536 | struct mem_cgroup *mem = NULL; |
2537 | int page_size = PAGE_SIZE; | ||
2522 | 2538 | ||
2523 | if (mem_cgroup_disabled()) | 2539 | if (mem_cgroup_disabled()) |
2524 | return NULL; | 2540 | return NULL; |
@@ -2526,6 +2542,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2526 | if (PageSwapCache(page)) | 2542 | if (PageSwapCache(page)) |
2527 | return NULL; | 2543 | return NULL; |
2528 | 2544 | ||
2545 | if (PageTransHuge(page)) | ||
2546 | page_size <<= compound_order(page); | ||
2547 | |||
2529 | /* | 2548 | /* |
2530 | * Check if our page_cgroup is valid | 2549 | * Check if our page_cgroup is valid |
2531 | */ | 2550 | */ |
@@ -2579,7 +2598,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2579 | mem_cgroup_get(mem); | 2598 | mem_cgroup_get(mem); |
2580 | } | 2599 | } |
2581 | if (!mem_cgroup_is_root(mem)) | 2600 | if (!mem_cgroup_is_root(mem)) |
2582 | __do_uncharge(mem, ctype); | 2601 | __do_uncharge(mem, ctype, page_size); |
2583 | 2602 | ||
2584 | return mem; | 2603 | return mem; |
2585 | 2604 | ||
@@ -2774,6 +2793,7 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
2774 | enum charge_type ctype; | 2793 | enum charge_type ctype; |
2775 | int ret = 0; | 2794 | int ret = 0; |
2776 | 2795 | ||
2796 | VM_BUG_ON(PageTransHuge(page)); | ||
2777 | if (mem_cgroup_disabled()) | 2797 | if (mem_cgroup_disabled()) |
2778 | return 0; | 2798 | return 0; |
2779 | 2799 | ||
@@ -2823,7 +2843,7 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
2823 | return 0; | 2843 | return 0; |
2824 | 2844 | ||
2825 | *ptr = mem; | 2845 | *ptr = mem; |
2826 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); | 2846 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE); |
2827 | css_put(&mem->css);/* drop extra refcnt */ | 2847 | css_put(&mem->css);/* drop extra refcnt */ |
2828 | if (ret || *ptr == NULL) { | 2848 | if (ret || *ptr == NULL) { |
2829 | if (PageAnon(page)) { | 2849 | if (PageAnon(page)) { |
@@ -2850,7 +2870,7 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
2850 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 2870 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; |
2851 | else | 2871 | else |
2852 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 2872 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
2853 | __mem_cgroup_commit_charge(mem, pc, ctype); | 2873 | __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE); |
2854 | return ret; | 2874 | return ret; |
2855 | } | 2875 | } |
2856 | 2876 | ||
@@ -4461,7 +4481,8 @@ one_by_one: | |||
4461 | batch_count = PRECHARGE_COUNT_AT_ONCE; | 4481 | batch_count = PRECHARGE_COUNT_AT_ONCE; |
4462 | cond_resched(); | 4482 | cond_resched(); |
4463 | } | 4483 | } |
4464 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); | 4484 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, |
4485 | PAGE_SIZE); | ||
4465 | if (ret || !mem) | 4486 | if (ret || !mem) |
4466 | /* mem_cgroup_clear_mc() will do uncharge later */ | 4487 | /* mem_cgroup_clear_mc() will do uncharge later */ |
4467 | return -ENOMEM; | 4488 | return -ENOMEM; |
@@ -4623,6 +4644,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
4623 | pte_t *pte; | 4644 | pte_t *pte; |
4624 | spinlock_t *ptl; | 4645 | spinlock_t *ptl; |
4625 | 4646 | ||
4647 | VM_BUG_ON(pmd_trans_huge(*pmd)); | ||
4626 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 4648 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4627 | for (; addr != end; pte++, addr += PAGE_SIZE) | 4649 | for (; addr != end; pte++, addr += PAGE_SIZE) |
4628 | if (is_target_pte_for_mc(vma, addr, *pte, NULL)) | 4650 | if (is_target_pte_for_mc(vma, addr, *pte, NULL)) |
@@ -4789,6 +4811,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, | |||
4789 | spinlock_t *ptl; | 4811 | spinlock_t *ptl; |
4790 | 4812 | ||
4791 | retry: | 4813 | retry: |
4814 | VM_BUG_ON(pmd_trans_huge(*pmd)); | ||
4792 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 4815 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4793 | for (; addr != end; addr += PAGE_SIZE) { | 4816 | for (; addr != end; addr += PAGE_SIZE) { |
4794 | pte_t ptent = *(pte++); | 4817 | pte_t ptent = *(pte++); |