aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h6
-rw-r--r--include/linux/page_cgroup.h5
-rw-r--r--mm/memcontrol.c135
-rw-r--r--mm/migrate.c2
4 files changed, 107 insertions, 41 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 05894795fdc1..9411d32840b0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -90,7 +90,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
90extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); 90extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
91 91
92extern int 92extern int
93mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); 93mem_cgroup_prepare_migration(struct page *page,
94 struct page *newpage, struct mem_cgroup **ptr);
94extern void mem_cgroup_end_migration(struct mem_cgroup *mem, 95extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
95 struct page *oldpage, struct page *newpage); 96 struct page *oldpage, struct page *newpage);
96 97
@@ -227,7 +228,8 @@ static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
227} 228}
228 229
229static inline int 230static inline int
230mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) 231mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
232 struct mem_cgroup **ptr)
231{ 233{
232 return 0; 234 return 0;
233} 235}
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index aef22ae2af47..5bb13b3db84d 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -40,6 +40,7 @@ enum {
40 PCG_USED, /* this object is in use. */ 40 PCG_USED, /* this object is in use. */
41 PCG_ACCT_LRU, /* page has been accounted for */ 41 PCG_ACCT_LRU, /* page has been accounted for */
42 PCG_FILE_MAPPED, /* page is accounted as "mapped" */ 42 PCG_FILE_MAPPED, /* page is accounted as "mapped" */
43 PCG_MIGRATION, /* under page migration */
43}; 44};
44 45
45#define TESTPCGFLAG(uname, lname) \ 46#define TESTPCGFLAG(uname, lname) \
@@ -79,6 +80,10 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
79CLEARPCGFLAG(FileMapped, FILE_MAPPED) 80CLEARPCGFLAG(FileMapped, FILE_MAPPED)
80TESTPCGFLAG(FileMapped, FILE_MAPPED) 81TESTPCGFLAG(FileMapped, FILE_MAPPED)
81 82
83SETPCGFLAG(Migration, MIGRATION)
84CLEARPCGFLAG(Migration, MIGRATION)
85TESTPCGFLAG(Migration, MIGRATION)
86
82static inline int page_cgroup_nid(struct page_cgroup *pc) 87static inline int page_cgroup_nid(struct page_cgroup *pc)
83{ 88{
84 return page_to_nid(pc->page); 89 return page_to_nid(pc->page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8c200e86da4c..df1234c0dac3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2258,7 +2258,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2258 switch (ctype) { 2258 switch (ctype) {
2259 case MEM_CGROUP_CHARGE_TYPE_MAPPED: 2259 case MEM_CGROUP_CHARGE_TYPE_MAPPED:
2260 case MEM_CGROUP_CHARGE_TYPE_DROP: 2260 case MEM_CGROUP_CHARGE_TYPE_DROP:
2261 if (page_mapped(page)) 2261 /* See mem_cgroup_prepare_migration() */
2262 if (page_mapped(page) || PageCgroupMigration(pc))
2262 goto unlock_out; 2263 goto unlock_out;
2263 break; 2264 break;
2264 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: 2265 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -2481,10 +2482,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
2481 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old 2482 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
2482 * page belongs to. 2483 * page belongs to.
2483 */ 2484 */
2484int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) 2485int mem_cgroup_prepare_migration(struct page *page,
2486 struct page *newpage, struct mem_cgroup **ptr)
2485{ 2487{
2486 struct page_cgroup *pc; 2488 struct page_cgroup *pc;
2487 struct mem_cgroup *mem = NULL; 2489 struct mem_cgroup *mem = NULL;
2490 enum charge_type ctype;
2488 int ret = 0; 2491 int ret = 0;
2489 2492
2490 if (mem_cgroup_disabled()) 2493 if (mem_cgroup_disabled())
@@ -2495,69 +2498,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
2495 if (PageCgroupUsed(pc)) { 2498 if (PageCgroupUsed(pc)) {
2496 mem = pc->mem_cgroup; 2499 mem = pc->mem_cgroup;
2497 css_get(&mem->css); 2500 css_get(&mem->css);
2501 /*
2502 * At migrating an anonymous page, its mapcount goes down
2503 * to 0 and uncharge() will be called. But, even if it's fully
2504 * unmapped, migration may fail and this page has to be
2505 * charged again. We set MIGRATION flag here and delay uncharge
2506 * until end_migration() is called
2507 *
2508 * Corner Case Thinking
2509 * A)
2510 * When the old page was mapped as Anon and it's unmap-and-freed
2511 * while migration was ongoing.
2512 * If unmap finds the old page, uncharge() of it will be delayed
2513 * until end_migration(). If unmap finds a new page, it's
2514 * uncharged when it make mapcount to be 1->0. If unmap code
2515 * finds swap_migration_entry, the new page will not be mapped
2516 * and end_migration() will find it(mapcount==0).
2517 *
2518 * B)
2519 * When the old page was mapped but migraion fails, the kernel
2520 * remaps it. A charge for it is kept by MIGRATION flag even
2521 * if mapcount goes down to 0. We can do remap successfully
2522 * without charging it again.
2523 *
2524 * C)
2525 * The "old" page is under lock_page() until the end of
2526 * migration, so, the old page itself will not be swapped-out.
2527 * If the new page is swapped out before end_migraton, our
2528 * hook to usual swap-out path will catch the event.
2529 */
2530 if (PageAnon(page))
2531 SetPageCgroupMigration(pc);
2498 } 2532 }
2499 unlock_page_cgroup(pc); 2533 unlock_page_cgroup(pc);
2534 /*
2535 * If the page is not charged at this point,
2536 * we return here.
2537 */
2538 if (!mem)
2539 return 0;
2500 2540
2501 *ptr = mem; 2541 *ptr = mem;
2502 if (mem) { 2542 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
2503 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); 2543 css_put(&mem->css);/* drop extra refcnt */
2504 css_put(&mem->css); 2544 if (ret || *ptr == NULL) {
2545 if (PageAnon(page)) {
2546 lock_page_cgroup(pc);
2547 ClearPageCgroupMigration(pc);
2548 unlock_page_cgroup(pc);
2549 /*
2550 * The old page may be fully unmapped while we kept it.
2551 */
2552 mem_cgroup_uncharge_page(page);
2553 }
2554 return -ENOMEM;
2505 } 2555 }
2556 /*
2557 * We charge new page before it's used/mapped. So, even if unlock_page()
2558 * is called before end_migration, we can catch all events on this new
2559 * page. In the case new page is migrated but not remapped, new page's
2560 * mapcount will be finally 0 and we call uncharge in end_migration().
2561 */
2562 pc = lookup_page_cgroup(newpage);
2563 if (PageAnon(page))
2564 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
2565 else if (page_is_file_cache(page))
2566 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
2567 else
2568 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2569 __mem_cgroup_commit_charge(mem, pc, ctype);
2506 return ret; 2570 return ret;
2507} 2571}
2508 2572
2509/* remove redundant charge if migration failed*/ 2573/* remove redundant charge if migration failed*/
2510void mem_cgroup_end_migration(struct mem_cgroup *mem, 2574void mem_cgroup_end_migration(struct mem_cgroup *mem,
2511 struct page *oldpage, struct page *newpage) 2575 struct page *oldpage, struct page *newpage)
2512{ 2576{
2513 struct page *target, *unused; 2577 struct page *used, *unused;
2514 struct page_cgroup *pc; 2578 struct page_cgroup *pc;
2515 enum charge_type ctype;
2516 2579
2517 if (!mem) 2580 if (!mem)
2518 return; 2581 return;
2582 /* blocks rmdir() */
2519 cgroup_exclude_rmdir(&mem->css); 2583 cgroup_exclude_rmdir(&mem->css);
2520 /* at migration success, oldpage->mapping is NULL. */ 2584 /* at migration success, oldpage->mapping is NULL. */
2521 if (oldpage->mapping) { 2585 if (oldpage->mapping) {
2522 target = oldpage; 2586 used = oldpage;
2523 unused = NULL; 2587 unused = newpage;
2524 } else { 2588 } else {
2525 target = newpage; 2589 used = newpage;
2526 unused = oldpage; 2590 unused = oldpage;
2527 } 2591 }
2528
2529 if (PageAnon(target))
2530 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
2531 else if (page_is_file_cache(target))
2532 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
2533 else
2534 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2535
2536 /* unused page is not on radix-tree now. */
2537 if (unused)
2538 __mem_cgroup_uncharge_common(unused, ctype);
2539
2540 pc = lookup_page_cgroup(target);
2541 /* 2592 /*
2542 * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. 2593 * We disallowed uncharge of pages under migration because mapcount
2543 * So, double-counting is effectively avoided. 2594 * of the page goes down to zero, temporarly.
2595 * Clear the flag and check the page should be charged.
2544 */ 2596 */
2545 __mem_cgroup_commit_charge(mem, pc, ctype); 2597 pc = lookup_page_cgroup(oldpage);
2598 lock_page_cgroup(pc);
2599 ClearPageCgroupMigration(pc);
2600 unlock_page_cgroup(pc);
2546 2601
2602 if (unused != oldpage)
2603 pc = lookup_page_cgroup(unused);
2604 __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE);
2605
2606 pc = lookup_page_cgroup(used);
2547 /* 2607 /*
2548 * Both of oldpage and newpage are still under lock_page(). 2608 * If a page is a file cache, radix-tree replacement is very atomic
2549 * Then, we don't have to care about race in radix-tree. 2609 * and we can skip this check. When it was an Anon page, its mapcount
2550 * But we have to be careful that this page is unmapped or not. 2610 * goes down to 0. But because we added MIGRATION flage, it's not
2551 * 2611 * uncharged yet. There are several case but page->mapcount check
2552 * There is a case for !page_mapped(). At the start of 2612 * and USED bit check in mem_cgroup_uncharge_page() will do enough
2553 * migration, oldpage was mapped. But now, it's zapped. 2613 * check. (see prepare_charge() also)
2554 * But we know *target* page is not freed/reused under us.
2555 * mem_cgroup_uncharge_page() does all necessary checks.
2556 */ 2614 */
2557 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 2615 if (PageAnon(used))
2558 mem_cgroup_uncharge_page(target); 2616 mem_cgroup_uncharge_page(used);
2559 /* 2617 /*
2560 * At migration, we may charge account against cgroup which has no tasks 2618 * At migration, we may charge account against cgroup which has no
2619 * tasks.
2561 * So, rmdir()->pre_destroy() can be called while we do this charge. 2620 * So, rmdir()->pre_destroy() can be called while we do this charge.
2562 * In that case, we need to call pre_destroy() again. check it here. 2621 * In that case, we need to call pre_destroy() again. check it here.
2563 */ 2622 */
diff --git a/mm/migrate.c b/mm/migrate.c
index 09e2471afa0f..4205b1d6049e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -590,7 +590,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
590 } 590 }
591 591
592 /* charge against new page */ 592 /* charge against new page */
593 charge = mem_cgroup_prepare_migration(page, &mem); 593 charge = mem_cgroup_prepare_migration(page, newpage, &mem);
594 if (charge == -ENOMEM) { 594 if (charge == -ENOMEM) {
595 rc = -ENOMEM; 595 rc = -ENOMEM;
596 goto unlock; 596 goto unlock;