aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2012-07-31 19:45:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:48 -0400
commit0030f535a5cf9b1841d2088c10a0b2f8f2987460 (patch)
tree5a71a5057fb0d3e1b5f22c550c4374ea709ac3bc /mm
parent737449236240e30a7bbe99f4d5586b8ed1416763 (diff)
mm: memcg: fix compaction/migration failing due to memcg limits
Compaction (and page migration in general) can currently be hindered through pages being owned by memory cgroups that are at their limits and unreclaimable. The reason is that the replacement page is being charged against the limit while the page being replaced is also still charged. But this seems unnecessary, given that only one of the two pages will still be in use after migration finishes. This patch changes the memcg migration sequence so that the replacement page is not charged. Whatever page is still in use after successful or failed migration gets to keep the charge of the page that was going to be replaced. The replacement page will still show up temporarily in the rss/cache statistics, this can be fixed in a later patch as it's less urgent. Reported-by: David Rientjes <rientjes@google.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: David Rientjes <rientjes@google.com> Cc: Wanpeng Li <liwp.linux@gmail.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/migrate.c11
2 files changed, 38 insertions, 40 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0f692a2dbfcb..7eadcdad06f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2976,7 +2976,8 @@ direct_uncharge:
2976 * uncharge if !page_mapped(page) 2976 * uncharge if !page_mapped(page)
2977 */ 2977 */
2978static struct mem_cgroup * 2978static struct mem_cgroup *
2979__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2979__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
2980 bool end_migration)
2980{ 2981{
2981 struct mem_cgroup *memcg = NULL; 2982 struct mem_cgroup *memcg = NULL;
2982 unsigned int nr_pages = 1; 2983 unsigned int nr_pages = 1;
@@ -3020,7 +3021,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3020 /* fallthrough */ 3021 /* fallthrough */
3021 case MEM_CGROUP_CHARGE_TYPE_DROP: 3022 case MEM_CGROUP_CHARGE_TYPE_DROP:
3022 /* See mem_cgroup_prepare_migration() */ 3023 /* See mem_cgroup_prepare_migration() */
3023 if (page_mapped(page) || PageCgroupMigration(pc)) 3024 if (page_mapped(page))
3025 goto unlock_out;
3026 /*
3027 * Pages under migration may not be uncharged. But
3028 * end_migration() /must/ be the one uncharging the
3029 * unused post-migration page and so it has to call
3030 * here with the migration bit still set. See the
3031 * res_counter handling below.
3032 */
3033 if (!end_migration && PageCgroupMigration(pc))
3024 goto unlock_out; 3034 goto unlock_out;
3025 break; 3035 break;
3026 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: 3036 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -3054,7 +3064,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3054 mem_cgroup_swap_statistics(memcg, true); 3064 mem_cgroup_swap_statistics(memcg, true);
3055 mem_cgroup_get(memcg); 3065 mem_cgroup_get(memcg);
3056 } 3066 }
3057 if (!mem_cgroup_is_root(memcg)) 3067 /*
3068 * Migration does not charge the res_counter for the
3069 * replacement page, so leave it alone when phasing out the
3070 * page that is unused after the migration.
3071 */
3072 if (!end_migration && !mem_cgroup_is_root(memcg))
3058 mem_cgroup_do_uncharge(memcg, nr_pages, ctype); 3073 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3059 3074
3060 return memcg; 3075 return memcg;
@@ -3070,14 +3085,14 @@ void mem_cgroup_uncharge_page(struct page *page)
3070 if (page_mapped(page)) 3085 if (page_mapped(page))
3071 return; 3086 return;
3072 VM_BUG_ON(page->mapping && !PageAnon(page)); 3087 VM_BUG_ON(page->mapping && !PageAnon(page));
3073 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON); 3088 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
3074} 3089}
3075 3090
3076void mem_cgroup_uncharge_cache_page(struct page *page) 3091void mem_cgroup_uncharge_cache_page(struct page *page)
3077{ 3092{
3078 VM_BUG_ON(page_mapped(page)); 3093 VM_BUG_ON(page_mapped(page));
3079 VM_BUG_ON(page->mapping); 3094 VM_BUG_ON(page->mapping);
3080 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); 3095 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
3081} 3096}
3082 3097
3083/* 3098/*
@@ -3141,7 +3156,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
3141 if (!swapout) /* this was a swap cache but the swap is unused ! */ 3156 if (!swapout) /* this was a swap cache but the swap is unused ! */
3142 ctype = MEM_CGROUP_CHARGE_TYPE_DROP; 3157 ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
3143 3158
3144 memcg = __mem_cgroup_uncharge_common(page, ctype); 3159 memcg = __mem_cgroup_uncharge_common(page, ctype, false);
3145 3160
3146 /* 3161 /*
3147 * record memcg information, if swapout && memcg != NULL, 3162 * record memcg information, if swapout && memcg != NULL,
@@ -3231,19 +3246,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3231 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old 3246 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
3232 * page belongs to. 3247 * page belongs to.
3233 */ 3248 */
3234int mem_cgroup_prepare_migration(struct page *page, 3249void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
3235 struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask) 3250 struct mem_cgroup **memcgp)
3236{ 3251{
3237 struct mem_cgroup *memcg = NULL; 3252 struct mem_cgroup *memcg = NULL;
3238 struct page_cgroup *pc; 3253 struct page_cgroup *pc;
3239 enum charge_type ctype; 3254 enum charge_type ctype;
3240 int ret = 0;
3241 3255
3242 *memcgp = NULL; 3256 *memcgp = NULL;
3243 3257
3244 VM_BUG_ON(PageTransHuge(page)); 3258 VM_BUG_ON(PageTransHuge(page));
3245 if (mem_cgroup_disabled()) 3259 if (mem_cgroup_disabled())
3246 return 0; 3260 return;
3247 3261
3248 pc = lookup_page_cgroup(page); 3262 pc = lookup_page_cgroup(page);
3249 lock_page_cgroup(pc); 3263 lock_page_cgroup(pc);
@@ -3288,24 +3302,9 @@ int mem_cgroup_prepare_migration(struct page *page,
3288 * we return here. 3302 * we return here.
3289 */ 3303 */
3290 if (!memcg) 3304 if (!memcg)
3291 return 0; 3305 return;
3292 3306
3293 *memcgp = memcg; 3307 *memcgp = memcg;
3294 ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
3295 css_put(&memcg->css);/* drop extra refcnt */
3296 if (ret) {
3297 if (PageAnon(page)) {
3298 lock_page_cgroup(pc);
3299 ClearPageCgroupMigration(pc);
3300 unlock_page_cgroup(pc);
3301 /*
3302 * The old page may be fully unmapped while we kept it.
3303 */
3304 mem_cgroup_uncharge_page(page);
3305 }
3306 /* we'll need to revisit this error code (we have -EINTR) */
3307 return -ENOMEM;
3308 }
3309 /* 3308 /*
3310 * We charge new page before it's used/mapped. So, even if unlock_page() 3309 * We charge new page before it's used/mapped. So, even if unlock_page()
3311 * is called before end_migration, we can catch all events on this new 3310 * is called before end_migration, we can catch all events on this new
@@ -3318,8 +3317,12 @@ int mem_cgroup_prepare_migration(struct page *page,
3318 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 3317 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
3319 else 3318 else
3320 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3319 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3320 /*
3321 * The page is committed to the memcg, but it's not actually
3322 * charged to the res_counter since we plan on replacing the
3323 * old one and only one page is going to be left afterwards.
3324 */
3321 __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false); 3325 __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
3322 return ret;
3323} 3326}
3324 3327
3325/* remove redundant charge if migration failed*/ 3328/* remove redundant charge if migration failed*/
@@ -3341,6 +3344,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3341 used = newpage; 3344 used = newpage;
3342 unused = oldpage; 3345 unused = oldpage;
3343 } 3346 }
3347 anon = PageAnon(used);
3348 __mem_cgroup_uncharge_common(unused,
3349 anon ? MEM_CGROUP_CHARGE_TYPE_ANON
3350 : MEM_CGROUP_CHARGE_TYPE_CACHE,
3351 true);
3352 css_put(&memcg->css);
3344 /* 3353 /*
3345 * We disallowed uncharge of pages under migration because mapcount 3354 * We disallowed uncharge of pages under migration because mapcount
3346 * of the page goes down to zero, temporarly. 3355 * of the page goes down to zero, temporarly.
@@ -3350,10 +3359,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3350 lock_page_cgroup(pc); 3359 lock_page_cgroup(pc);
3351 ClearPageCgroupMigration(pc); 3360 ClearPageCgroupMigration(pc);
3352 unlock_page_cgroup(pc); 3361 unlock_page_cgroup(pc);
3353 anon = PageAnon(used);
3354 __mem_cgroup_uncharge_common(unused,
3355 anon ? MEM_CGROUP_CHARGE_TYPE_ANON
3356 : MEM_CGROUP_CHARGE_TYPE_CACHE);
3357 3362
3358 /* 3363 /*
3359 * If a page is a file cache, radix-tree replacement is very atomic 3364 * If a page is a file cache, radix-tree replacement is very atomic
diff --git a/mm/migrate.c b/mm/migrate.c
index 6c37c51565e5..77ed2d773705 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -683,7 +683,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
683{ 683{
684 int rc = -EAGAIN; 684 int rc = -EAGAIN;
685 int remap_swapcache = 1; 685 int remap_swapcache = 1;
686 int charge = 0;
687 struct mem_cgroup *mem; 686 struct mem_cgroup *mem;
688 struct anon_vma *anon_vma = NULL; 687 struct anon_vma *anon_vma = NULL;
689 688
@@ -725,12 +724,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
725 } 724 }
726 725
727 /* charge against new page */ 726 /* charge against new page */
728 charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL); 727 mem_cgroup_prepare_migration(page, newpage, &mem);
729 if (charge == -ENOMEM) {
730 rc = -ENOMEM;
731 goto unlock;
732 }
733 BUG_ON(charge);
734 728
735 if (PageWriteback(page)) { 729 if (PageWriteback(page)) {
736 /* 730 /*
@@ -820,8 +814,7 @@ skip_unmap:
820 put_anon_vma(anon_vma); 814 put_anon_vma(anon_vma);
821 815
822uncharge: 816uncharge:
823 if (!charge) 817 mem_cgroup_end_migration(mem, page, newpage, rc == 0);
824 mem_cgroup_end_migration(mem, page, newpage, rc == 0);
825unlock: 818unlock:
826 unlock_page(page); 819 unlock_page(page);
827out: 820out: