diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:07:50 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:04 -0500 |
commit | 01b1ae63c2270cbacfd43fea94578c17950eb548 (patch) | |
tree | ab0275f32e8548c4413014d43cab1f52f03c9c5c | |
parent | bced0520fe462bb94021dcabd32e99630c171be2 (diff) |
memcg: simple migration handling
Now, management of "charge" under page migration is done under following
manner. (Assume migrate page contents from oldpage to newpage)
before
- "newpage" is charged before migration.
at success.
- "oldpage" is uncharged at somewhere(unmap, radix-tree-replace)
at failure
- "newpage" is uncharged.
- "oldpage" is charged if necessary (*1)
But (*1) is not reliable....because of GFP_ATOMIC.
This patch tries to change behavior as following by charge/commit/cancel ops.
before
- charge PAGE_SIZE (no target page)
success
- commit charge against "newpage".
failure
- commit charge against "oldpage".
(PCG_USED bit works effectively to avoid double-counting)
- if "oldpage" is obsolete, cancel charge of PAGE_SIZE.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 19 | ||||
-rw-r--r-- | mm/memcontrol.c | 108 | ||||
-rw-r--r-- | mm/migrate.c | 42 |
3 files changed, 73 insertions, 96 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c592f315cd02..b095f5f6ecf7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -29,8 +29,6 @@ struct mm_struct; | |||
29 | 29 | ||
30 | extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, | 30 | extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, |
31 | gfp_t gfp_mask); | 31 | gfp_t gfp_mask); |
32 | extern int mem_cgroup_charge_migrate_fixup(struct page *page, | ||
33 | struct mm_struct *mm, gfp_t gfp_mask); | ||
34 | /* for swap handling */ | 32 | /* for swap handling */ |
35 | extern int mem_cgroup_try_charge(struct mm_struct *mm, | 33 | extern int mem_cgroup_try_charge(struct mm_struct *mm, |
36 | gfp_t gfp_mask, struct mem_cgroup **ptr); | 34 | gfp_t gfp_mask, struct mem_cgroup **ptr); |
@@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | |||
60 | ((cgroup) == mem_cgroup_from_task((mm)->owner)) | 58 | ((cgroup) == mem_cgroup_from_task((mm)->owner)) |
61 | 59 | ||
62 | extern int | 60 | extern int |
63 | mem_cgroup_prepare_migration(struct page *page, struct page *newpage); | 61 | mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); |
64 | extern void mem_cgroup_end_migration(struct page *page); | 62 | extern void mem_cgroup_end_migration(struct mem_cgroup *mem, |
63 | struct page *oldpage, struct page *newpage); | ||
65 | 64 | ||
66 | /* | 65 | /* |
67 | * For memory reclaim. | 66 | * For memory reclaim. |
@@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page, | |||
94 | return 0; | 93 | return 0; |
95 | } | 94 | } |
96 | 95 | ||
97 | static inline int mem_cgroup_charge_migrate_fixup(struct page *page, | ||
98 | struct mm_struct *mm, gfp_t gfp_mask) | ||
99 | { | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static inline int mem_cgroup_try_charge(struct mm_struct *mm, | 96 | static inline int mem_cgroup_try_charge(struct mm_struct *mm, |
104 | gfp_t gfp_mask, struct mem_cgroup **ptr) | 97 | gfp_t gfp_mask, struct mem_cgroup **ptr) |
105 | { | 98 | { |
@@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task, | |||
144 | } | 137 | } |
145 | 138 | ||
146 | static inline int | 139 | static inline int |
147 | mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | 140 | mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) |
148 | { | 141 | { |
149 | return 0; | 142 | return 0; |
150 | } | 143 | } |
151 | 144 | ||
152 | static inline void mem_cgroup_end_migration(struct page *page) | 145 | static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, |
146 | struct page *oldpage, | ||
147 | struct page *newpage) | ||
153 | { | 148 | { |
154 | } | 149 | } |
155 | 150 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c34eb52bdc3f..b71195e8198b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -627,34 +627,6 @@ int mem_cgroup_newpage_charge(struct page *page, | |||
627 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); | 627 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
628 | } | 628 | } |
629 | 629 | ||
630 | /* | ||
631 | * same as mem_cgroup_newpage_charge(), now. | ||
632 | * But what we assume is different from newpage, and this is special case. | ||
633 | * treat this in special function. easy for maintenance. | ||
634 | */ | ||
635 | |||
636 | int mem_cgroup_charge_migrate_fixup(struct page *page, | ||
637 | struct mm_struct *mm, gfp_t gfp_mask) | ||
638 | { | ||
639 | if (mem_cgroup_subsys.disabled) | ||
640 | return 0; | ||
641 | |||
642 | if (PageCompound(page)) | ||
643 | return 0; | ||
644 | |||
645 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | ||
646 | return 0; | ||
647 | |||
648 | if (unlikely(!mm)) | ||
649 | mm = &init_mm; | ||
650 | |||
651 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
652 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); | ||
653 | } | ||
654 | |||
655 | |||
656 | |||
657 | |||
658 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 630 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
659 | gfp_t gfp_mask) | 631 | gfp_t gfp_mask) |
660 | { | 632 | { |
@@ -697,7 +669,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
697 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | 669 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); |
698 | } | 670 | } |
699 | 671 | ||
700 | |||
701 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | 672 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) |
702 | { | 673 | { |
703 | struct page_cgroup *pc; | 674 | struct page_cgroup *pc; |
@@ -782,13 +753,13 @@ void mem_cgroup_uncharge_cache_page(struct page *page) | |||
782 | } | 753 | } |
783 | 754 | ||
784 | /* | 755 | /* |
785 | * Before starting migration, account against new page. | 756 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old |
757 | * page belongs to. | ||
786 | */ | 758 | */ |
787 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | 759 | int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) |
788 | { | 760 | { |
789 | struct page_cgroup *pc; | 761 | struct page_cgroup *pc; |
790 | struct mem_cgroup *mem = NULL; | 762 | struct mem_cgroup *mem = NULL; |
791 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
792 | int ret = 0; | 763 | int ret = 0; |
793 | 764 | ||
794 | if (mem_cgroup_subsys.disabled) | 765 | if (mem_cgroup_subsys.disabled) |
@@ -799,42 +770,67 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | |||
799 | if (PageCgroupUsed(pc)) { | 770 | if (PageCgroupUsed(pc)) { |
800 | mem = pc->mem_cgroup; | 771 | mem = pc->mem_cgroup; |
801 | css_get(&mem->css); | 772 | css_get(&mem->css); |
802 | if (PageCgroupCache(pc)) { | ||
803 | if (page_is_file_cache(page)) | ||
804 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
805 | else | ||
806 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
807 | } | ||
808 | } | 773 | } |
809 | unlock_page_cgroup(pc); | 774 | unlock_page_cgroup(pc); |
775 | |||
810 | if (mem) { | 776 | if (mem) { |
811 | ret = mem_cgroup_charge_common(newpage, NULL, | 777 | ret = mem_cgroup_try_charge(NULL, GFP_HIGHUSER_MOVABLE, &mem); |
812 | GFP_HIGHUSER_MOVABLE, | ||
813 | ctype, mem); | ||
814 | css_put(&mem->css); | 778 | css_put(&mem->css); |
815 | } | 779 | } |
780 | *ptr = mem; | ||
816 | return ret; | 781 | return ret; |
817 | } | 782 | } |
818 | 783 | ||
819 | /* remove redundant charge if migration failed*/ | 784 | /* remove redundant charge if migration failed*/ |
820 | void mem_cgroup_end_migration(struct page *newpage) | 785 | void mem_cgroup_end_migration(struct mem_cgroup *mem, |
786 | struct page *oldpage, struct page *newpage) | ||
821 | { | 787 | { |
788 | struct page *target, *unused; | ||
789 | struct page_cgroup *pc; | ||
790 | enum charge_type ctype; | ||
791 | |||
792 | if (!mem) | ||
793 | return; | ||
794 | |||
795 | /* at migration success, oldpage->mapping is NULL. */ | ||
796 | if (oldpage->mapping) { | ||
797 | target = oldpage; | ||
798 | unused = NULL; | ||
799 | } else { | ||
800 | target = newpage; | ||
801 | unused = oldpage; | ||
802 | } | ||
803 | |||
804 | if (PageAnon(target)) | ||
805 | ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
806 | else if (page_is_file_cache(target)) | ||
807 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
808 | else | ||
809 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
810 | |||
811 | /* unused page is not on radix-tree now. */ | ||
812 | if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED) | ||
813 | __mem_cgroup_uncharge_common(unused, ctype); | ||
814 | |||
815 | pc = lookup_page_cgroup(target); | ||
822 | /* | 816 | /* |
823 | * At success, page->mapping is not NULL. | 817 | * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. |
824 | * special rollback care is necessary when | 818 | * So, double-counting is effectively avoided. |
825 | * 1. at migration failure. (newpage->mapping is cleared in this case) | 819 | */ |
826 | * 2. the newpage was moved but not remapped again because the task | 820 | __mem_cgroup_commit_charge(mem, pc, ctype); |
827 | * exits and the newpage is obsolete. In this case, the new page | 821 | |
828 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() | 822 | /* |
829 | * always for avoiding mess. The page_cgroup will be removed if | 823 | * Both of oldpage and newpage are still under lock_page(). |
830 | * unnecessary. File cache pages is still on radix-tree. Don't | 824 | * Then, we don't have to care about race in radix-tree. |
831 | * care it. | 825 | * But we have to be careful that this page is unmapped or not. |
826 | * | ||
827 | * There is a case for !page_mapped(). At the start of | ||
828 | * migration, oldpage was mapped. But now, it's zapped. | ||
829 | * But we know *target* page is not freed/reused under us. | ||
830 | * mem_cgroup_uncharge_page() does all necessary checks. | ||
832 | */ | 831 | */ |
833 | if (!newpage->mapping) | 832 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
834 | __mem_cgroup_uncharge_common(newpage, | 833 | mem_cgroup_uncharge_page(target); |
835 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
836 | else if (PageAnon(newpage)) | ||
837 | mem_cgroup_uncharge_page(newpage); | ||
838 | } | 834 | } |
839 | 835 | ||
840 | /* | 836 | /* |
diff --git a/mm/migrate.c b/mm/migrate.c index 246dcb973ae7..a30ea5fcf9f1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
121 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 121 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
122 | goto out; | 122 | goto out; |
123 | 123 | ||
124 | /* | ||
125 | * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge. | ||
126 | * Failure is not an option here: we're now expected to remove every | ||
127 | * migration pte, and will cause crashes otherwise. Normally this | ||
128 | * is not an issue: mem_cgroup_prepare_migration bumped up the old | ||
129 | * page_cgroup count for safety, that's now attached to the new page, | ||
130 | * so this charge should just be another incrementation of the count, | ||
131 | * to keep in balance with rmap.c's mem_cgroup_uncharging. But if | ||
132 | * there's been a force_empty, those reference counts may no longer | ||
133 | * be reliable, and this charge can actually fail: oh well, we don't | ||
134 | * make the situation any worse by proceeding as if it had succeeded. | ||
135 | */ | ||
136 | mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC); | ||
137 | |||
138 | get_page(new); | 124 | get_page(new); |
139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 125 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
140 | if (is_write_migration_entry(entry)) | 126 | if (is_write_migration_entry(entry)) |
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
378 | anon = PageAnon(page); | 364 | anon = PageAnon(page); |
379 | page->mapping = NULL; | 365 | page->mapping = NULL; |
380 | 366 | ||
381 | if (!anon) /* This page was removed from radix-tree. */ | ||
382 | mem_cgroup_uncharge_cache_page(page); | ||
383 | |||
384 | /* | 367 | /* |
385 | * If any waiters have accumulated on the new page then | 368 | * If any waiters have accumulated on the new page then |
386 | * wake them up. | 369 | * wake them up. |
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
614 | struct page *newpage = get_new_page(page, private, &result); | 597 | struct page *newpage = get_new_page(page, private, &result); |
615 | int rcu_locked = 0; | 598 | int rcu_locked = 0; |
616 | int charge = 0; | 599 | int charge = 0; |
600 | struct mem_cgroup *mem; | ||
617 | 601 | ||
618 | if (!newpage) | 602 | if (!newpage) |
619 | return -ENOMEM; | 603 | return -ENOMEM; |
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
623 | goto move_newpage; | 607 | goto move_newpage; |
624 | } | 608 | } |
625 | 609 | ||
626 | charge = mem_cgroup_prepare_migration(page, newpage); | ||
627 | if (charge == -ENOMEM) { | ||
628 | rc = -ENOMEM; | ||
629 | goto move_newpage; | ||
630 | } | ||
631 | /* prepare cgroup just returns 0 or -ENOMEM */ | 610 | /* prepare cgroup just returns 0 or -ENOMEM */ |
632 | BUG_ON(charge); | ||
633 | |||
634 | rc = -EAGAIN; | 611 | rc = -EAGAIN; |
612 | |||
635 | if (!trylock_page(page)) { | 613 | if (!trylock_page(page)) { |
636 | if (!force) | 614 | if (!force) |
637 | goto move_newpage; | 615 | goto move_newpage; |
638 | lock_page(page); | 616 | lock_page(page); |
639 | } | 617 | } |
640 | 618 | ||
619 | /* charge against new page */ | ||
620 | charge = mem_cgroup_prepare_migration(page, &mem); | ||
621 | if (charge == -ENOMEM) { | ||
622 | rc = -ENOMEM; | ||
623 | goto unlock; | ||
624 | } | ||
625 | BUG_ON(charge); | ||
626 | |||
641 | if (PageWriteback(page)) { | 627 | if (PageWriteback(page)) { |
642 | if (!force) | 628 | if (!force) |
643 | goto unlock; | 629 | goto uncharge; |
644 | wait_on_page_writeback(page); | 630 | wait_on_page_writeback(page); |
645 | } | 631 | } |
646 | /* | 632 | /* |
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
693 | rcu_unlock: | 679 | rcu_unlock: |
694 | if (rcu_locked) | 680 | if (rcu_locked) |
695 | rcu_read_unlock(); | 681 | rcu_read_unlock(); |
696 | 682 | uncharge: | |
683 | if (!charge) | ||
684 | mem_cgroup_end_migration(mem, page, newpage); | ||
697 | unlock: | 685 | unlock: |
698 | unlock_page(page); | 686 | unlock_page(page); |
699 | 687 | ||
@@ -709,8 +697,6 @@ unlock: | |||
709 | } | 697 | } |
710 | 698 | ||
711 | move_newpage: | 699 | move_newpage: |
712 | if (!charge) | ||
713 | mem_cgroup_end_migration(newpage); | ||
714 | 700 | ||
715 | /* | 701 | /* |
716 | * Move the new page to the LRU. If migration was not successful | 702 | * Move the new page to the LRU. If migration was not successful |