aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c135
1 files changed, 97 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8c200e86da4c..df1234c0dac3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2258,7 +2258,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2258 switch (ctype) { 2258 switch (ctype) {
2259 case MEM_CGROUP_CHARGE_TYPE_MAPPED: 2259 case MEM_CGROUP_CHARGE_TYPE_MAPPED:
2260 case MEM_CGROUP_CHARGE_TYPE_DROP: 2260 case MEM_CGROUP_CHARGE_TYPE_DROP:
2261 if (page_mapped(page)) 2261 /* See mem_cgroup_prepare_migration() */
2262 if (page_mapped(page) || PageCgroupMigration(pc))
2262 goto unlock_out; 2263 goto unlock_out;
2263 break; 2264 break;
2264 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: 2265 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -2481,10 +2482,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
2481 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old 2482 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
2482 * page belongs to. 2483 * page belongs to.
2483 */ 2484 */
2484int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) 2485int mem_cgroup_prepare_migration(struct page *page,
2486 struct page *newpage, struct mem_cgroup **ptr)
2485{ 2487{
2486 struct page_cgroup *pc; 2488 struct page_cgroup *pc;
2487 struct mem_cgroup *mem = NULL; 2489 struct mem_cgroup *mem = NULL;
2490 enum charge_type ctype;
2488 int ret = 0; 2491 int ret = 0;
2489 2492
2490 if (mem_cgroup_disabled()) 2493 if (mem_cgroup_disabled())
@@ -2495,69 +2498,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
2495 if (PageCgroupUsed(pc)) { 2498 if (PageCgroupUsed(pc)) {
2496 mem = pc->mem_cgroup; 2499 mem = pc->mem_cgroup;
2497 css_get(&mem->css); 2500 css_get(&mem->css);
2501 /*
2502 * At migrating an anonymous page, its mapcount goes down
2503 * to 0 and uncharge() will be called. But, even if it's fully
2504 * unmapped, migration may fail and this page has to be
2505 * charged again. We set MIGRATION flag here and delay uncharge
2506 * until end_migration() is called
2507 *
2508 * Corner Case Thinking
2509 * A)
2510 * When the old page was mapped as Anon and it's unmap-and-freed
2511 * while migration was ongoing.
2512 * If unmap finds the old page, uncharge() of it will be delayed
2513 * until end_migration(). If unmap finds a new page, it's
2514 * uncharged when it make mapcount to be 1->0. If unmap code
2515 * finds swap_migration_entry, the new page will not be mapped
2516 * and end_migration() will find it(mapcount==0).
2517 *
2518 * B)
2519 * When the old page was mapped but migraion fails, the kernel
2520 * remaps it. A charge for it is kept by MIGRATION flag even
2521 * if mapcount goes down to 0. We can do remap successfully
2522 * without charging it again.
2523 *
2524 * C)
2525 * The "old" page is under lock_page() until the end of
2526 * migration, so, the old page itself will not be swapped-out.
2527 * If the new page is swapped out before end_migraton, our
2528 * hook to usual swap-out path will catch the event.
2529 */
2530 if (PageAnon(page))
2531 SetPageCgroupMigration(pc);
2498 } 2532 }
2499 unlock_page_cgroup(pc); 2533 unlock_page_cgroup(pc);
2534 /*
2535 * If the page is not charged at this point,
2536 * we return here.
2537 */
2538 if (!mem)
2539 return 0;
2500 2540
2501 *ptr = mem; 2541 *ptr = mem;
2502 if (mem) { 2542 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
2503 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); 2543 css_put(&mem->css);/* drop extra refcnt */
2504 css_put(&mem->css); 2544 if (ret || *ptr == NULL) {
2545 if (PageAnon(page)) {
2546 lock_page_cgroup(pc);
2547 ClearPageCgroupMigration(pc);
2548 unlock_page_cgroup(pc);
2549 /*
2550 * The old page may be fully unmapped while we kept it.
2551 */
2552 mem_cgroup_uncharge_page(page);
2553 }
2554 return -ENOMEM;
2505 } 2555 }
2556 /*
2557 * We charge new page before it's used/mapped. So, even if unlock_page()
2558 * is called before end_migration, we can catch all events on this new
2559 * page. In the case new page is migrated but not remapped, new page's
2560 * mapcount will be finally 0 and we call uncharge in end_migration().
2561 */
2562 pc = lookup_page_cgroup(newpage);
2563 if (PageAnon(page))
2564 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
2565 else if (page_is_file_cache(page))
2566 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
2567 else
2568 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2569 __mem_cgroup_commit_charge(mem, pc, ctype);
2506 return ret; 2570 return ret;
2507} 2571}
2508 2572
2509/* remove redundant charge if migration failed*/ 2573/* remove redundant charge if migration failed*/
2510void mem_cgroup_end_migration(struct mem_cgroup *mem, 2574void mem_cgroup_end_migration(struct mem_cgroup *mem,
2511 struct page *oldpage, struct page *newpage) 2575 struct page *oldpage, struct page *newpage)
2512{ 2576{
2513 struct page *target, *unused; 2577 struct page *used, *unused;
2514 struct page_cgroup *pc; 2578 struct page_cgroup *pc;
2515 enum charge_type ctype;
2516 2579
2517 if (!mem) 2580 if (!mem)
2518 return; 2581 return;
2582 /* blocks rmdir() */
2519 cgroup_exclude_rmdir(&mem->css); 2583 cgroup_exclude_rmdir(&mem->css);
2520 /* at migration success, oldpage->mapping is NULL. */ 2584 /* at migration success, oldpage->mapping is NULL. */
2521 if (oldpage->mapping) { 2585 if (oldpage->mapping) {
2522 target = oldpage; 2586 used = oldpage;
2523 unused = NULL; 2587 unused = newpage;
2524 } else { 2588 } else {
2525 target = newpage; 2589 used = newpage;
2526 unused = oldpage; 2590 unused = oldpage;
2527 } 2591 }
2528
2529 if (PageAnon(target))
2530 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
2531 else if (page_is_file_cache(target))
2532 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
2533 else
2534 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2535
2536 /* unused page is not on radix-tree now. */
2537 if (unused)
2538 __mem_cgroup_uncharge_common(unused, ctype);
2539
2540 pc = lookup_page_cgroup(target);
2541 /* 2592 /*
2542 * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. 2593 * We disallowed uncharge of pages under migration because mapcount
2543 * So, double-counting is effectively avoided. 2594 * of the page goes down to zero, temporarly.
2595 * Clear the flag and check the page should be charged.
2544 */ 2596 */
2545 __mem_cgroup_commit_charge(mem, pc, ctype); 2597 pc = lookup_page_cgroup(oldpage);
2598 lock_page_cgroup(pc);
2599 ClearPageCgroupMigration(pc);
2600 unlock_page_cgroup(pc);
2546 2601
2602 if (unused != oldpage)
2603 pc = lookup_page_cgroup(unused);
2604 __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE);
2605
2606 pc = lookup_page_cgroup(used);
2547 /* 2607 /*
2548 * Both of oldpage and newpage are still under lock_page(). 2608 * If a page is a file cache, radix-tree replacement is very atomic
2549 * Then, we don't have to care about race in radix-tree. 2609 * and we can skip this check. When it was an Anon page, its mapcount
2550 * But we have to be careful that this page is unmapped or not. 2610 * goes down to 0. But because we added MIGRATION flage, it's not
2551 * 2611 * uncharged yet. There are several case but page->mapcount check
2552 * There is a case for !page_mapped(). At the start of 2612 * and USED bit check in mem_cgroup_uncharge_page() will do enough
2553 * migration, oldpage was mapped. But now, it's zapped. 2613 * check. (see prepare_charge() also)
2554 * But we know *target* page is not freed/reused under us.
2555 * mem_cgroup_uncharge_page() does all necessary checks.
2556 */ 2614 */
2557 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 2615 if (PageAnon(used))
2558 mem_cgroup_uncharge_page(target); 2616 mem_cgroup_uncharge_page(used);
2559 /* 2617 /*
2560 * At migration, we may charge account against cgroup which has no tasks 2618 * At migration, we may charge account against cgroup which has no
2619 * tasks.
2561 * So, rmdir()->pre_destroy() can be called while we do this charge. 2620 * So, rmdir()->pre_destroy() can be called while we do this charge.
2562 * In that case, we need to call pre_destroy() again. check it here. 2621 * In that case, we need to call pre_destroy() again. check it here.
2563 */ 2622 */