aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:07:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:04 -0500
commit01b1ae63c2270cbacfd43fea94578c17950eb548 (patch)
treeab0275f32e8548c4413014d43cab1f52f03c9c5c
parentbced0520fe462bb94021dcabd32e99630c171be2 (diff)
memcg: simple migration handling
Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h19
-rw-r--r--mm/memcontrol.c108
-rw-r--r--mm/migrate.c42
3 files changed, 73 insertions, 96 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c592f315cd02..b095f5f6ecf7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,8 +29,6 @@ struct mm_struct;
29 29
30extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, 30extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
31 gfp_t gfp_mask); 31 gfp_t gfp_mask);
32extern int mem_cgroup_charge_migrate_fixup(struct page *page,
33 struct mm_struct *mm, gfp_t gfp_mask);
34/* for swap handling */ 32/* for swap handling */
35extern int mem_cgroup_try_charge(struct mm_struct *mm, 33extern int mem_cgroup_try_charge(struct mm_struct *mm,
36 gfp_t gfp_mask, struct mem_cgroup **ptr); 34 gfp_t gfp_mask, struct mem_cgroup **ptr);
@@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
60 ((cgroup) == mem_cgroup_from_task((mm)->owner)) 58 ((cgroup) == mem_cgroup_from_task((mm)->owner))
61 59
62extern int 60extern int
63mem_cgroup_prepare_migration(struct page *page, struct page *newpage); 61mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
64extern void mem_cgroup_end_migration(struct page *page); 62extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
63 struct page *oldpage, struct page *newpage);
65 64
66/* 65/*
67 * For memory reclaim. 66 * For memory reclaim.
@@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page,
94 return 0; 93 return 0;
95} 94}
96 95
97static inline int mem_cgroup_charge_migrate_fixup(struct page *page,
98 struct mm_struct *mm, gfp_t gfp_mask)
99{
100 return 0;
101}
102
103static inline int mem_cgroup_try_charge(struct mm_struct *mm, 96static inline int mem_cgroup_try_charge(struct mm_struct *mm,
104 gfp_t gfp_mask, struct mem_cgroup **ptr) 97 gfp_t gfp_mask, struct mem_cgroup **ptr)
105{ 98{
@@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
144} 137}
145 138
146static inline int 139static inline int
147mem_cgroup_prepare_migration(struct page *page, struct page *newpage) 140mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
148{ 141{
149 return 0; 142 return 0;
150} 143}
151 144
152static inline void mem_cgroup_end_migration(struct page *page) 145static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
146 struct page *oldpage,
147 struct page *newpage)
153{ 148{
154} 149}
155 150
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c34eb52bdc3f..b71195e8198b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -627,34 +627,6 @@ int mem_cgroup_newpage_charge(struct page *page,
627 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); 627 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
628} 628}
629 629
630/*
631 * same as mem_cgroup_newpage_charge(), now.
632 * But what we assume is different from newpage, and this is special case.
633 * treat this in special function. easy for maintenance.
634 */
635
636int mem_cgroup_charge_migrate_fixup(struct page *page,
637 struct mm_struct *mm, gfp_t gfp_mask)
638{
639 if (mem_cgroup_subsys.disabled)
640 return 0;
641
642 if (PageCompound(page))
643 return 0;
644
645 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
646 return 0;
647
648 if (unlikely(!mm))
649 mm = &init_mm;
650
651 return mem_cgroup_charge_common(page, mm, gfp_mask,
652 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
653}
654
655
656
657
658int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 630int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
659 gfp_t gfp_mask) 631 gfp_t gfp_mask)
660{ 632{
@@ -697,7 +669,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
697 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 669 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
698} 670}
699 671
700
701void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) 672void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
702{ 673{
703 struct page_cgroup *pc; 674 struct page_cgroup *pc;
@@ -782,13 +753,13 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
782} 753}
783 754
784/* 755/*
785 * Before starting migration, account against new page. 756 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
757 * page belongs to.
786 */ 758 */
787int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) 759int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
788{ 760{
789 struct page_cgroup *pc; 761 struct page_cgroup *pc;
790 struct mem_cgroup *mem = NULL; 762 struct mem_cgroup *mem = NULL;
791 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
792 int ret = 0; 763 int ret = 0;
793 764
794 if (mem_cgroup_subsys.disabled) 765 if (mem_cgroup_subsys.disabled)
@@ -799,42 +770,67 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
799 if (PageCgroupUsed(pc)) { 770 if (PageCgroupUsed(pc)) {
800 mem = pc->mem_cgroup; 771 mem = pc->mem_cgroup;
801 css_get(&mem->css); 772 css_get(&mem->css);
802 if (PageCgroupCache(pc)) {
803 if (page_is_file_cache(page))
804 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
805 else
806 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
807 }
808 } 773 }
809 unlock_page_cgroup(pc); 774 unlock_page_cgroup(pc);
775
810 if (mem) { 776 if (mem) {
811 ret = mem_cgroup_charge_common(newpage, NULL, 777 ret = mem_cgroup_try_charge(NULL, GFP_HIGHUSER_MOVABLE, &mem);
812 GFP_HIGHUSER_MOVABLE,
813 ctype, mem);
814 css_put(&mem->css); 778 css_put(&mem->css);
815 } 779 }
780 *ptr = mem;
816 return ret; 781 return ret;
817} 782}
818 783
819/* remove redundant charge if migration failed*/ 784/* remove redundant charge if migration failed*/
820void mem_cgroup_end_migration(struct page *newpage) 785void mem_cgroup_end_migration(struct mem_cgroup *mem,
786 struct page *oldpage, struct page *newpage)
821{ 787{
788 struct page *target, *unused;
789 struct page_cgroup *pc;
790 enum charge_type ctype;
791
792 if (!mem)
793 return;
794
795 /* at migration success, oldpage->mapping is NULL. */
796 if (oldpage->mapping) {
797 target = oldpage;
798 unused = NULL;
799 } else {
800 target = newpage;
801 unused = oldpage;
802 }
803
804 if (PageAnon(target))
805 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
806 else if (page_is_file_cache(target))
807 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
808 else
809 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
810
811 /* unused page is not on radix-tree now. */
812 if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED)
813 __mem_cgroup_uncharge_common(unused, ctype);
814
815 pc = lookup_page_cgroup(target);
822 /* 816 /*
823 * At success, page->mapping is not NULL. 817 * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
824 * special rollback care is necessary when 818 * So, double-counting is effectively avoided.
825 * 1. at migration failure. (newpage->mapping is cleared in this case) 819 */
826 * 2. the newpage was moved but not remapped again because the task 820 __mem_cgroup_commit_charge(mem, pc, ctype);
827 * exits and the newpage is obsolete. In this case, the new page 821
828 * may be a swapcache. So, we just call mem_cgroup_uncharge_page() 822 /*
829 * always for avoiding mess. The page_cgroup will be removed if 823 * Both of oldpage and newpage are still under lock_page().
830 * unnecessary. File cache pages is still on radix-tree. Don't 824 * Then, we don't have to care about race in radix-tree.
831 * care it. 825 * But we have to be careful that this page is unmapped or not.
826 *
827 * There is a case for !page_mapped(). At the start of
828 * migration, oldpage was mapped. But now, it's zapped.
829 * But we know *target* page is not freed/reused under us.
830 * mem_cgroup_uncharge_page() does all necessary checks.
832 */ 831 */
833 if (!newpage->mapping) 832 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
834 __mem_cgroup_uncharge_common(newpage, 833 mem_cgroup_uncharge_page(target);
835 MEM_CGROUP_CHARGE_TYPE_FORCE);
836 else if (PageAnon(newpage))
837 mem_cgroup_uncharge_page(newpage);
838} 834}
839 835
840/* 836/*
diff --git a/mm/migrate.c b/mm/migrate.c
index 246dcb973ae7..a30ea5fcf9f1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
122 goto out; 122 goto out;
123 123
124 /*
125 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
126 * Failure is not an option here: we're now expected to remove every
127 * migration pte, and will cause crashes otherwise. Normally this
128 * is not an issue: mem_cgroup_prepare_migration bumped up the old
129 * page_cgroup count for safety, that's now attached to the new page,
130 * so this charge should just be another incrementation of the count,
131 * to keep in balance with rmap.c's mem_cgroup_uncharging. But if
132 * there's been a force_empty, those reference counts may no longer
133 * be reliable, and this charge can actually fail: oh well, we don't
134 * make the situation any worse by proceeding as if it had succeeded.
135 */
136 mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC);
137
138 get_page(new); 124 get_page(new);
139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 125 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
140 if (is_write_migration_entry(entry)) 126 if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
378 anon = PageAnon(page); 364 anon = PageAnon(page);
379 page->mapping = NULL; 365 page->mapping = NULL;
380 366
381 if (!anon) /* This page was removed from radix-tree. */
382 mem_cgroup_uncharge_cache_page(page);
383
384 /* 367 /*
385 * If any waiters have accumulated on the new page then 368 * If any waiters have accumulated on the new page then
386 * wake them up. 369 * wake them up.
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
614 struct page *newpage = get_new_page(page, private, &result); 597 struct page *newpage = get_new_page(page, private, &result);
615 int rcu_locked = 0; 598 int rcu_locked = 0;
616 int charge = 0; 599 int charge = 0;
600 struct mem_cgroup *mem;
617 601
618 if (!newpage) 602 if (!newpage)
619 return -ENOMEM; 603 return -ENOMEM;
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
623 goto move_newpage; 607 goto move_newpage;
624 } 608 }
625 609
626 charge = mem_cgroup_prepare_migration(page, newpage);
627 if (charge == -ENOMEM) {
628 rc = -ENOMEM;
629 goto move_newpage;
630 }
631 /* prepare cgroup just returns 0 or -ENOMEM */ 610 /* prepare cgroup just returns 0 or -ENOMEM */
632 BUG_ON(charge);
633
634 rc = -EAGAIN; 611 rc = -EAGAIN;
612
635 if (!trylock_page(page)) { 613 if (!trylock_page(page)) {
636 if (!force) 614 if (!force)
637 goto move_newpage; 615 goto move_newpage;
638 lock_page(page); 616 lock_page(page);
639 } 617 }
640 618
619 /* charge against new page */
620 charge = mem_cgroup_prepare_migration(page, &mem);
621 if (charge == -ENOMEM) {
622 rc = -ENOMEM;
623 goto unlock;
624 }
625 BUG_ON(charge);
626
641 if (PageWriteback(page)) { 627 if (PageWriteback(page)) {
642 if (!force) 628 if (!force)
643 goto unlock; 629 goto uncharge;
644 wait_on_page_writeback(page); 630 wait_on_page_writeback(page);
645 } 631 }
646 /* 632 /*
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
693rcu_unlock: 679rcu_unlock:
694 if (rcu_locked) 680 if (rcu_locked)
695 rcu_read_unlock(); 681 rcu_read_unlock();
696 682uncharge:
683 if (!charge)
684 mem_cgroup_end_migration(mem, page, newpage);
697unlock: 685unlock:
698 unlock_page(page); 686 unlock_page(page);
699 687
@@ -709,8 +697,6 @@ unlock:
709 } 697 }
710 698
711move_newpage: 699move_newpage:
712 if (!charge)
713 mem_cgroup_end_migration(newpage);
714 700
715 /* 701 /*
716 * Move the new page to the LRU. If migration was not successful 702 * Move the new page to the LRU. If migration was not successful