diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 135 |
1 files changed, 97 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c200e86da4c..df1234c0dac3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2258,7 +2258,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2258 | switch (ctype) { | 2258 | switch (ctype) { |
2259 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: | 2259 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: |
2260 | case MEM_CGROUP_CHARGE_TYPE_DROP: | 2260 | case MEM_CGROUP_CHARGE_TYPE_DROP: |
2261 | if (page_mapped(page)) | 2261 | /* See mem_cgroup_prepare_migration() */ |
2262 | if (page_mapped(page) || PageCgroupMigration(pc)) | ||
2262 | goto unlock_out; | 2263 | goto unlock_out; |
2263 | break; | 2264 | break; |
2264 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: | 2265 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: |
@@ -2481,10 +2482,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
2481 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old | 2482 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old |
2482 | * page belongs to. | 2483 | * page belongs to. |
2483 | */ | 2484 | */ |
2484 | int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | 2485 | int mem_cgroup_prepare_migration(struct page *page, |
2486 | struct page *newpage, struct mem_cgroup **ptr) | ||
2485 | { | 2487 | { |
2486 | struct page_cgroup *pc; | 2488 | struct page_cgroup *pc; |
2487 | struct mem_cgroup *mem = NULL; | 2489 | struct mem_cgroup *mem = NULL; |
2490 | enum charge_type ctype; | ||
2488 | int ret = 0; | 2491 | int ret = 0; |
2489 | 2492 | ||
2490 | if (mem_cgroup_disabled()) | 2493 | if (mem_cgroup_disabled()) |
@@ -2495,69 +2498,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | |||
2495 | if (PageCgroupUsed(pc)) { | 2498 | if (PageCgroupUsed(pc)) { |
2496 | mem = pc->mem_cgroup; | 2499 | mem = pc->mem_cgroup; |
2497 | css_get(&mem->css); | 2500 | css_get(&mem->css); |
2501 | /* | ||
2502 | * At migrating an anonymous page, its mapcount goes down | ||
2503 | * to 0 and uncharge() will be called. But, even if it's fully | ||
2504 | * unmapped, migration may fail and this page has to be | ||
2505 | * charged again. We set MIGRATION flag here and delay uncharge | ||
2506 | * until end_migration() is called | ||
2507 | * | ||
2508 | * Corner Case Thinking | ||
2509 | * A) | ||
2510 | * When the old page was mapped as Anon and it's unmap-and-freed | ||
2511 | * while migration was ongoing. | ||
2512 | * If unmap finds the old page, uncharge() of it will be delayed | ||
2513 | * until end_migration(). If unmap finds a new page, it's | ||
2514 | * uncharged when it make mapcount to be 1->0. If unmap code | ||
2515 | * finds swap_migration_entry, the new page will not be mapped | ||
2516 | * and end_migration() will find it(mapcount==0). | ||
2517 | * | ||
2518 | * B) | ||
2519 | * When the old page was mapped but migraion fails, the kernel | ||
2520 | * remaps it. A charge for it is kept by MIGRATION flag even | ||
2521 | * if mapcount goes down to 0. We can do remap successfully | ||
2522 | * without charging it again. | ||
2523 | * | ||
2524 | * C) | ||
2525 | * The "old" page is under lock_page() until the end of | ||
2526 | * migration, so, the old page itself will not be swapped-out. | ||
2527 | * If the new page is swapped out before end_migraton, our | ||
2528 | * hook to usual swap-out path will catch the event. | ||
2529 | */ | ||
2530 | if (PageAnon(page)) | ||
2531 | SetPageCgroupMigration(pc); | ||
2498 | } | 2532 | } |
2499 | unlock_page_cgroup(pc); | 2533 | unlock_page_cgroup(pc); |
2534 | /* | ||
2535 | * If the page is not charged at this point, | ||
2536 | * we return here. | ||
2537 | */ | ||
2538 | if (!mem) | ||
2539 | return 0; | ||
2500 | 2540 | ||
2501 | *ptr = mem; | 2541 | *ptr = mem; |
2502 | if (mem) { | 2542 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); |
2503 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); | 2543 | css_put(&mem->css);/* drop extra refcnt */ |
2504 | css_put(&mem->css); | 2544 | if (ret || *ptr == NULL) { |
2545 | if (PageAnon(page)) { | ||
2546 | lock_page_cgroup(pc); | ||
2547 | ClearPageCgroupMigration(pc); | ||
2548 | unlock_page_cgroup(pc); | ||
2549 | /* | ||
2550 | * The old page may be fully unmapped while we kept it. | ||
2551 | */ | ||
2552 | mem_cgroup_uncharge_page(page); | ||
2553 | } | ||
2554 | return -ENOMEM; | ||
2505 | } | 2555 | } |
2556 | /* | ||
2557 | * We charge new page before it's used/mapped. So, even if unlock_page() | ||
2558 | * is called before end_migration, we can catch all events on this new | ||
2559 | * page. In the case new page is migrated but not remapped, new page's | ||
2560 | * mapcount will be finally 0 and we call uncharge in end_migration(). | ||
2561 | */ | ||
2562 | pc = lookup_page_cgroup(newpage); | ||
2563 | if (PageAnon(page)) | ||
2564 | ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
2565 | else if (page_is_file_cache(page)) | ||
2566 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
2567 | else | ||
2568 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
2569 | __mem_cgroup_commit_charge(mem, pc, ctype); | ||
2506 | return ret; | 2570 | return ret; |
2507 | } | 2571 | } |
2508 | 2572 | ||
2509 | /* remove redundant charge if migration failed*/ | 2573 | /* remove redundant charge if migration failed*/ |
2510 | void mem_cgroup_end_migration(struct mem_cgroup *mem, | 2574 | void mem_cgroup_end_migration(struct mem_cgroup *mem, |
2511 | struct page *oldpage, struct page *newpage) | 2575 | struct page *oldpage, struct page *newpage) |
2512 | { | 2576 | { |
2513 | struct page *target, *unused; | 2577 | struct page *used, *unused; |
2514 | struct page_cgroup *pc; | 2578 | struct page_cgroup *pc; |
2515 | enum charge_type ctype; | ||
2516 | 2579 | ||
2517 | if (!mem) | 2580 | if (!mem) |
2518 | return; | 2581 | return; |
2582 | /* blocks rmdir() */ | ||
2519 | cgroup_exclude_rmdir(&mem->css); | 2583 | cgroup_exclude_rmdir(&mem->css); |
2520 | /* at migration success, oldpage->mapping is NULL. */ | 2584 | /* at migration success, oldpage->mapping is NULL. */ |
2521 | if (oldpage->mapping) { | 2585 | if (oldpage->mapping) { |
2522 | target = oldpage; | 2586 | used = oldpage; |
2523 | unused = NULL; | 2587 | unused = newpage; |
2524 | } else { | 2588 | } else { |
2525 | target = newpage; | 2589 | used = newpage; |
2526 | unused = oldpage; | 2590 | unused = oldpage; |
2527 | } | 2591 | } |
2528 | |||
2529 | if (PageAnon(target)) | ||
2530 | ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
2531 | else if (page_is_file_cache(target)) | ||
2532 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
2533 | else | ||
2534 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
2535 | |||
2536 | /* unused page is not on radix-tree now. */ | ||
2537 | if (unused) | ||
2538 | __mem_cgroup_uncharge_common(unused, ctype); | ||
2539 | |||
2540 | pc = lookup_page_cgroup(target); | ||
2541 | /* | 2592 | /* |
2542 | * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. | 2593 | * We disallowed uncharge of pages under migration because mapcount |
2543 | * So, double-counting is effectively avoided. | 2594 | * of the page goes down to zero, temporarly. |
2595 | * Clear the flag and check the page should be charged. | ||
2544 | */ | 2596 | */ |
2545 | __mem_cgroup_commit_charge(mem, pc, ctype); | 2597 | pc = lookup_page_cgroup(oldpage); |
2598 | lock_page_cgroup(pc); | ||
2599 | ClearPageCgroupMigration(pc); | ||
2600 | unlock_page_cgroup(pc); | ||
2546 | 2601 | ||
2602 | if (unused != oldpage) | ||
2603 | pc = lookup_page_cgroup(unused); | ||
2604 | __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
2605 | |||
2606 | pc = lookup_page_cgroup(used); | ||
2547 | /* | 2607 | /* |
2548 | * Both of oldpage and newpage are still under lock_page(). | 2608 | * If a page is a file cache, radix-tree replacement is very atomic |
2549 | * Then, we don't have to care about race in radix-tree. | 2609 | * and we can skip this check. When it was an Anon page, its mapcount |
2550 | * But we have to be careful that this page is unmapped or not. | 2610 | * goes down to 0. But because we added MIGRATION flage, it's not |
2551 | * | 2611 | * uncharged yet. There are several case but page->mapcount check |
2552 | * There is a case for !page_mapped(). At the start of | 2612 | * and USED bit check in mem_cgroup_uncharge_page() will do enough |
2553 | * migration, oldpage was mapped. But now, it's zapped. | 2613 | * check. (see prepare_charge() also) |
2554 | * But we know *target* page is not freed/reused under us. | ||
2555 | * mem_cgroup_uncharge_page() does all necessary checks. | ||
2556 | */ | 2614 | */ |
2557 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 2615 | if (PageAnon(used)) |
2558 | mem_cgroup_uncharge_page(target); | 2616 | mem_cgroup_uncharge_page(used); |
2559 | /* | 2617 | /* |
2560 | * At migration, we may charge account against cgroup which has no tasks | 2618 | * At migration, we may charge account against cgroup which has no |
2619 | * tasks. | ||
2561 | * So, rmdir()->pre_destroy() can be called while we do this charge. | 2620 | * So, rmdir()->pre_destroy() can be called while we do this charge. |
2562 | * In that case, we need to call pre_destroy() again. check it here. | 2621 | * In that case, we need to call pre_destroy() again. check it here. |
2563 | */ | 2622 | */ |