diff options
-rw-r--r-- | include/linux/memcontrol.h | 6 | ||||
-rw-r--r-- | include/linux/page_cgroup.h | 5 | ||||
-rw-r--r-- | mm/memcontrol.c | 135 | ||||
-rw-r--r-- | mm/migrate.c | 2 |
4 files changed, 107 insertions, 41 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 05894795fdc1..9411d32840b0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -90,7 +90,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) | |||
90 | extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); | 90 | extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); |
91 | 91 | ||
92 | extern int | 92 | extern int |
93 | mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); | 93 | mem_cgroup_prepare_migration(struct page *page, |
94 | struct page *newpage, struct mem_cgroup **ptr); | ||
94 | extern void mem_cgroup_end_migration(struct mem_cgroup *mem, | 95 | extern void mem_cgroup_end_migration(struct mem_cgroup *mem, |
95 | struct page *oldpage, struct page *newpage); | 96 | struct page *oldpage, struct page *newpage); |
96 | 97 | ||
@@ -227,7 +228,8 @@ static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) | |||
227 | } | 228 | } |
228 | 229 | ||
229 | static inline int | 230 | static inline int |
230 | mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | 231 | mem_cgroup_prepare_migration(struct page *page, struct page *newpage, |
232 | struct mem_cgroup **ptr) | ||
231 | { | 233 | { |
232 | return 0; | 234 | return 0; |
233 | } | 235 | } |
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index aef22ae2af47..5bb13b3db84d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -40,6 +40,7 @@ enum { | |||
40 | PCG_USED, /* this object is in use. */ | 40 | PCG_USED, /* this object is in use. */ |
41 | PCG_ACCT_LRU, /* page has been accounted for */ | 41 | PCG_ACCT_LRU, /* page has been accounted for */ |
42 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ | 42 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ |
43 | PCG_MIGRATION, /* under page migration */ | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | #define TESTPCGFLAG(uname, lname) \ | 46 | #define TESTPCGFLAG(uname, lname) \ |
@@ -79,6 +80,10 @@ SETPCGFLAG(FileMapped, FILE_MAPPED) | |||
79 | CLEARPCGFLAG(FileMapped, FILE_MAPPED) | 80 | CLEARPCGFLAG(FileMapped, FILE_MAPPED) |
80 | TESTPCGFLAG(FileMapped, FILE_MAPPED) | 81 | TESTPCGFLAG(FileMapped, FILE_MAPPED) |
81 | 82 | ||
83 | SETPCGFLAG(Migration, MIGRATION) | ||
84 | CLEARPCGFLAG(Migration, MIGRATION) | ||
85 | TESTPCGFLAG(Migration, MIGRATION) | ||
86 | |||
82 | static inline int page_cgroup_nid(struct page_cgroup *pc) | 87 | static inline int page_cgroup_nid(struct page_cgroup *pc) |
83 | { | 88 | { |
84 | return page_to_nid(pc->page); | 89 | return page_to_nid(pc->page); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c200e86da4c..df1234c0dac3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2258,7 +2258,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2258 | switch (ctype) { | 2258 | switch (ctype) { |
2259 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: | 2259 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: |
2260 | case MEM_CGROUP_CHARGE_TYPE_DROP: | 2260 | case MEM_CGROUP_CHARGE_TYPE_DROP: |
2261 | if (page_mapped(page)) | 2261 | /* See mem_cgroup_prepare_migration() */ |
2262 | if (page_mapped(page) || PageCgroupMigration(pc)) | ||
2262 | goto unlock_out; | 2263 | goto unlock_out; |
2263 | break; | 2264 | break; |
2264 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: | 2265 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: |
@@ -2481,10 +2482,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
2481 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old | 2482 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old |
2482 | * page belongs to. | 2483 | * page belongs to. |
2483 | */ | 2484 | */ |
2484 | int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | 2485 | int mem_cgroup_prepare_migration(struct page *page, |
2486 | struct page *newpage, struct mem_cgroup **ptr) | ||
2485 | { | 2487 | { |
2486 | struct page_cgroup *pc; | 2488 | struct page_cgroup *pc; |
2487 | struct mem_cgroup *mem = NULL; | 2489 | struct mem_cgroup *mem = NULL; |
2490 | enum charge_type ctype; | ||
2488 | int ret = 0; | 2491 | int ret = 0; |
2489 | 2492 | ||
2490 | if (mem_cgroup_disabled()) | 2493 | if (mem_cgroup_disabled()) |
@@ -2495,69 +2498,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | |||
2495 | if (PageCgroupUsed(pc)) { | 2498 | if (PageCgroupUsed(pc)) { |
2496 | mem = pc->mem_cgroup; | 2499 | mem = pc->mem_cgroup; |
2497 | css_get(&mem->css); | 2500 | css_get(&mem->css); |
2501 | /* | ||
2502 | * At migrating an anonymous page, its mapcount goes down | ||
2503 | * to 0 and uncharge() will be called. But, even if it's fully | ||
2504 | * unmapped, migration may fail and this page has to be | ||
2505 | * charged again. We set MIGRATION flag here and delay uncharge | ||
2506 | * until end_migration() is called | ||
2507 | * | ||
2508 | * Corner Case Thinking | ||
2509 | * A) | ||
2510 | * When the old page was mapped as Anon and it's unmap-and-freed | ||
2511 | * while migration was ongoing. | ||
2512 | * If unmap finds the old page, uncharge() of it will be delayed | ||
2513 | * until end_migration(). If unmap finds a new page, it's | ||
2514 | * uncharged when it make mapcount to be 1->0. If unmap code | ||
2515 | * finds swap_migration_entry, the new page will not be mapped | ||
2516 | * and end_migration() will find it(mapcount==0). | ||
2517 | * | ||
2518 | * B) | ||
2519 | * When the old page was mapped but migraion fails, the kernel | ||
2520 | * remaps it. A charge for it is kept by MIGRATION flag even | ||
2521 | * if mapcount goes down to 0. We can do remap successfully | ||
2522 | * without charging it again. | ||
2523 | * | ||
2524 | * C) | ||
2525 | * The "old" page is under lock_page() until the end of | ||
2526 | * migration, so, the old page itself will not be swapped-out. | ||
2527 | * If the new page is swapped out before end_migraton, our | ||
2528 | * hook to usual swap-out path will catch the event. | ||
2529 | */ | ||
2530 | if (PageAnon(page)) | ||
2531 | SetPageCgroupMigration(pc); | ||
2498 | } | 2532 | } |
2499 | unlock_page_cgroup(pc); | 2533 | unlock_page_cgroup(pc); |
2534 | /* | ||
2535 | * If the page is not charged at this point, | ||
2536 | * we return here. | ||
2537 | */ | ||
2538 | if (!mem) | ||
2539 | return 0; | ||
2500 | 2540 | ||
2501 | *ptr = mem; | 2541 | *ptr = mem; |
2502 | if (mem) { | 2542 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); |
2503 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); | 2543 | css_put(&mem->css);/* drop extra refcnt */ |
2504 | css_put(&mem->css); | 2544 | if (ret || *ptr == NULL) { |
2545 | if (PageAnon(page)) { | ||
2546 | lock_page_cgroup(pc); | ||
2547 | ClearPageCgroupMigration(pc); | ||
2548 | unlock_page_cgroup(pc); | ||
2549 | /* | ||
2550 | * The old page may be fully unmapped while we kept it. | ||
2551 | */ | ||
2552 | mem_cgroup_uncharge_page(page); | ||
2553 | } | ||
2554 | return -ENOMEM; | ||
2505 | } | 2555 | } |
2556 | /* | ||
2557 | * We charge new page before it's used/mapped. So, even if unlock_page() | ||
2558 | * is called before end_migration, we can catch all events on this new | ||
2559 | * page. In the case new page is migrated but not remapped, new page's | ||
2560 | * mapcount will be finally 0 and we call uncharge in end_migration(). | ||
2561 | */ | ||
2562 | pc = lookup_page_cgroup(newpage); | ||
2563 | if (PageAnon(page)) | ||
2564 | ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
2565 | else if (page_is_file_cache(page)) | ||
2566 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
2567 | else | ||
2568 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
2569 | __mem_cgroup_commit_charge(mem, pc, ctype); | ||
2506 | return ret; | 2570 | return ret; |
2507 | } | 2571 | } |
2508 | 2572 | ||
2509 | /* remove redundant charge if migration failed*/ | 2573 | /* remove redundant charge if migration failed*/ |
2510 | void mem_cgroup_end_migration(struct mem_cgroup *mem, | 2574 | void mem_cgroup_end_migration(struct mem_cgroup *mem, |
2511 | struct page *oldpage, struct page *newpage) | 2575 | struct page *oldpage, struct page *newpage) |
2512 | { | 2576 | { |
2513 | struct page *target, *unused; | 2577 | struct page *used, *unused; |
2514 | struct page_cgroup *pc; | 2578 | struct page_cgroup *pc; |
2515 | enum charge_type ctype; | ||
2516 | 2579 | ||
2517 | if (!mem) | 2580 | if (!mem) |
2518 | return; | 2581 | return; |
2582 | /* blocks rmdir() */ | ||
2519 | cgroup_exclude_rmdir(&mem->css); | 2583 | cgroup_exclude_rmdir(&mem->css); |
2520 | /* at migration success, oldpage->mapping is NULL. */ | 2584 | /* at migration success, oldpage->mapping is NULL. */ |
2521 | if (oldpage->mapping) { | 2585 | if (oldpage->mapping) { |
2522 | target = oldpage; | 2586 | used = oldpage; |
2523 | unused = NULL; | 2587 | unused = newpage; |
2524 | } else { | 2588 | } else { |
2525 | target = newpage; | 2589 | used = newpage; |
2526 | unused = oldpage; | 2590 | unused = oldpage; |
2527 | } | 2591 | } |
2528 | |||
2529 | if (PageAnon(target)) | ||
2530 | ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
2531 | else if (page_is_file_cache(target)) | ||
2532 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
2533 | else | ||
2534 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
2535 | |||
2536 | /* unused page is not on radix-tree now. */ | ||
2537 | if (unused) | ||
2538 | __mem_cgroup_uncharge_common(unused, ctype); | ||
2539 | |||
2540 | pc = lookup_page_cgroup(target); | ||
2541 | /* | 2592 | /* |
2542 | * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. | 2593 | * We disallowed uncharge of pages under migration because mapcount |
2543 | * So, double-counting is effectively avoided. | 2594 | * of the page goes down to zero, temporarly. |
2595 | * Clear the flag and check the page should be charged. | ||
2544 | */ | 2596 | */ |
2545 | __mem_cgroup_commit_charge(mem, pc, ctype); | 2597 | pc = lookup_page_cgroup(oldpage); |
2598 | lock_page_cgroup(pc); | ||
2599 | ClearPageCgroupMigration(pc); | ||
2600 | unlock_page_cgroup(pc); | ||
2546 | 2601 | ||
2602 | if (unused != oldpage) | ||
2603 | pc = lookup_page_cgroup(unused); | ||
2604 | __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
2605 | |||
2606 | pc = lookup_page_cgroup(used); | ||
2547 | /* | 2607 | /* |
2548 | * Both of oldpage and newpage are still under lock_page(). | 2608 | * If a page is a file cache, radix-tree replacement is very atomic |
2549 | * Then, we don't have to care about race in radix-tree. | 2609 | * and we can skip this check. When it was an Anon page, its mapcount |
2550 | * But we have to be careful that this page is unmapped or not. | 2610 | * goes down to 0. But because we added MIGRATION flage, it's not |
2551 | * | 2611 | * uncharged yet. There are several case but page->mapcount check |
2552 | * There is a case for !page_mapped(). At the start of | 2612 | * and USED bit check in mem_cgroup_uncharge_page() will do enough |
2553 | * migration, oldpage was mapped. But now, it's zapped. | 2613 | * check. (see prepare_charge() also) |
2554 | * But we know *target* page is not freed/reused under us. | ||
2555 | * mem_cgroup_uncharge_page() does all necessary checks. | ||
2556 | */ | 2614 | */ |
2557 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 2615 | if (PageAnon(used)) |
2558 | mem_cgroup_uncharge_page(target); | 2616 | mem_cgroup_uncharge_page(used); |
2559 | /* | 2617 | /* |
2560 | * At migration, we may charge account against cgroup which has no tasks | 2618 | * At migration, we may charge account against cgroup which has no |
2619 | * tasks. | ||
2561 | * So, rmdir()->pre_destroy() can be called while we do this charge. | 2620 | * So, rmdir()->pre_destroy() can be called while we do this charge. |
2562 | * In that case, we need to call pre_destroy() again. check it here. | 2621 | * In that case, we need to call pre_destroy() again. check it here. |
2563 | */ | 2622 | */ |
diff --git a/mm/migrate.c b/mm/migrate.c index 09e2471afa0f..4205b1d6049e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -590,7 +590,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
590 | } | 590 | } |
591 | 591 | ||
592 | /* charge against new page */ | 592 | /* charge against new page */ |
593 | charge = mem_cgroup_prepare_migration(page, &mem); | 593 | charge = mem_cgroup_prepare_migration(page, newpage, &mem); |
594 | if (charge == -ENOMEM) { | 594 | if (charge == -ENOMEM) { |
595 | rc = -ENOMEM; | 595 | rc = -ENOMEM; |
596 | goto unlock; | 596 | goto unlock; |