memcg: simple migration handling

Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 2009-01-07 21:07:50 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-01-08 11:31:04 -0500
commit: 01b1ae63c2270cbacfd43fea94578c17950eb548 (patch)
tree: ab0275f32e8548c4413014d43cab1f52f03c9c5c /mm
parent: bced0520fe462bb94021dcabd32e99630c171be2 (diff)
2 files changed, 66 insertions, 84 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c34eb52bdc3f..b71195e8198b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -627,34 +627,6 @@ int mem_cgroup_newpage_charge(struct page *page,
                                MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
-/*
- * same as mem_cgroup_newpage_charge(), now.
- * But what we assume is different from newpage, and this is special case.
- * treat this in special function. easy for maintenance.
- */
-int mem_cgroup_charge_migrate_fixup(struct page *page,
-                                struct mm_struct *mm, gfp_t gfp_mask)
-{
-        if (mem_cgroup_subsys.disabled)
-                return 0;
-        if (PageCompound(page))
-                return 0;
-        if (page_mapped(page) || (page->mapping && !PageAnon(page)))
-                return 0;
-        if (unlikely(!mm))
-                mm = &init_mm;
-        return mem_cgroup_charge_common(page, mm, gfp_mask,
-                                MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
-}
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
@@ -697,7 +669,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
 }
 void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
 {
        struct page_cgroup *pc;
@@ -782,13 +753,13 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
 }
 /*
- * Before starting migration, account against new page.
+ * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
+ * page belongs to.
 */
-int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
+int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
        struct page_cgroup *pc;
        struct mem_cgroup *mem = NULL;
-        enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
        int ret = 0;
        if (mem_cgroup_subsys.disabled)
@@ -799,42 +770,67 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
        if (PageCgroupUsed(pc)) {
                mem = pc->mem_cgroup;
                css_get(&mem->css);
-                if (PageCgroupCache(pc)) {
-                        if (page_is_file_cache(page))
-                                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
-                        else
-                                ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-                }
        }
        unlock_page_cgroup(pc);
        if (mem) {
-                ret = mem_cgroup_charge_common(newpage, NULL,
+                ret = mem_cgroup_try_charge(NULL, GFP_HIGHUSER_MOVABLE, &mem);
-                                        GFP_HIGHUSER_MOVABLE,
-                                        ctype, mem);
                css_put(&mem->css);
        }
+        *ptr = mem;
        return ret;
 }
 /* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct page *newpage)
+void mem_cgroup_end_migration(struct mem_cgroup *mem,
+                struct page *oldpage, struct page *newpage)
 {
+        struct page *target, *unused;
+        struct page_cgroup *pc;
+        enum charge_type ctype;
+        if (!mem)
+                return;
+        /* at migration success, oldpage->mapping is NULL. */
+        if (oldpage->mapping) {
+                target = oldpage;
+                unused = NULL;
+        } else {
+                target = newpage;
+                unused = oldpage;
+        }
+        if (PageAnon(target))
+                ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+        else if (page_is_file_cache(target))
+                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+        else
+                ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+        /* unused page is not on radix-tree now. */
+        if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED)
+                __mem_cgroup_uncharge_common(unused, ctype);
+        pc = lookup_page_cgroup(target);
        /*
-         * At success, page->mapping is not NULL.
+         * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
-         * special rollback care is necessary when
+         * So, double-counting is effectively avoided.
-         * 1. at migration failure. (newpage->mapping is cleared in this case)
+         */
-         * 2. the newpage was moved but not remapped again because the task
+        __mem_cgroup_commit_charge(mem, pc, ctype);
-         *    exits and the newpage is obsolete. In this case, the new page
-         *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+        /*
-         *    always for avoiding mess. The  page_cgroup will be removed if
+         * Both of oldpage and newpage are still under lock_page().
-         *    unnecessary. File cache pages is still on radix-tree. Don't
+         * Then, we don't have to care about race in radix-tree.
-         *    care it.
+         * But we have to be careful that this page is unmapped or not.
+         *
+         * There is a case for !page_mapped(). At the start of
+         * migration, oldpage was mapped. But now, it's zapped.
+         * But we know *target* page is not freed/reused under us.
+         * mem_cgroup_uncharge_page() does all necessary checks.
         */
-        if (!newpage->mapping)
+        if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-                __mem_cgroup_uncharge_common(newpage,
+                mem_cgroup_uncharge_page(target);
-                                MEM_CGROUP_CHARGE_TYPE_FORCE);
-        else if (PageAnon(newpage))
-                mem_cgroup_uncharge_page(newpage);
 }
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index 246dcb973ae7..a30ea5fcf9f1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
        if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
                goto out;
-        /*
-         * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
-         * Failure is not an option here: we're now expected to remove every
-         * migration pte, and will cause crashes otherwise.  Normally this
-         * is not an issue: mem_cgroup_prepare_migration bumped up the old
-         * page_cgroup count for safety, that's now attached to the new page,
-         * so this charge should just be another incrementation of the count,
-         * to keep in balance with rmap.c's mem_cgroup_uncharging.  But if
-         * there's been a force_empty, those reference counts may no longer
-         * be reliable, and this charge can actually fail: oh well, we don't
-         * make the situation any worse by proceeding as if it had succeeded.
-         */
-        mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC);
        get_page(new);
        pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
        if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
        anon = PageAnon(page);
        page->mapping = NULL;
-        if (!anon) /* This page was removed from radix-tree. */
-                mem_cgroup_uncharge_cache_page(page);
        /*
         * If any waiters have accumulated on the new page then
         * wake them up.
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        struct page *newpage = get_new_page(page, private, &result);
        int rcu_locked = 0;
        int charge = 0;
+        struct mem_cgroup *mem;
        if (!newpage)
                return -ENOMEM;
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                goto move_newpage;
        }
-        charge = mem_cgroup_prepare_migration(page, newpage);
-        if (charge == -ENOMEM) {
-                rc = -ENOMEM;
-                goto move_newpage;
-        }
        /* prepare cgroup just returns 0 or -ENOMEM */
-        BUG_ON(charge);
        rc = -EAGAIN;
        if (!trylock_page(page)) {
                if (!force)
                        goto move_newpage;
                lock_page(page);
        }
+        /* charge against new page */
+        charge = mem_cgroup_prepare_migration(page, &mem);
+        if (charge == -ENOMEM) {
+                rc = -ENOMEM;
+                goto unlock;
+        }
+        BUG_ON(charge);
        if (PageWriteback(page)) {
                if (!force)
-                        goto unlock;
+                        goto uncharge;
                wait_on_page_writeback(page);
        }
        /*
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 rcu_unlock:
        if (rcu_locked)
                rcu_read_unlock();
+uncharge:
+        if (!charge)
+                mem_cgroup_end_migration(mem, page, newpage);
 unlock:
        unlock_page(page);
@@ -709,8 +697,6 @@ unlock:
        }
 move_newpage:
-        if (!charge)
-                mem_cgroup_end_migration(newpage);
        /*
         * Move the new page to the LRU. If migration was not successful
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	2009-01-07 21:07:50 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-01-08 11:31:04 -0500
commit	01b1ae63c2270cbacfd43fea94578c17950eb548 (patch)
tree	ab0275f32e8548c4413014d43cab1f52f03c9c5c /mm
parent	bced0520fe462bb94021dcabd32e99630c171be2 (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c34eb52bdc3f..b71195e8198b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -627,34 +627,6 @@ int mem_cgroup_newpage_charge(struct page *page,
627	MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);	627	MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
628	}	628	}
629		629
630	/*
631	* same as mem_cgroup_newpage_charge(), now.
632	* But what we assume is different from newpage, and this is special case.
633	* treat this in special function. easy for maintenance.
634	*/
635
636	int mem_cgroup_charge_migrate_fixup(struct page *page,
637	struct mm_struct *mm, gfp_t gfp_mask)
638	{
639	if (mem_cgroup_subsys.disabled)
640	return 0;
641
642	if (PageCompound(page))
643	return 0;
644
645	if (page_mapped(page) \|\| (page->mapping && !PageAnon(page)))
646	return 0;
647
648	if (unlikely(!mm))
649	mm = &init_mm;
650
651	return mem_cgroup_charge_common(page, mm, gfp_mask,
652	MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
653	}
654
655
656
657
658	int mem_cgroup_cache_charge(struct page page, struct mm_struct mm,	630	int mem_cgroup_cache_charge(struct page page, struct mm_struct mm,
659	gfp_t gfp_mask)	631	gfp_t gfp_mask)
660	{	632	{
@@ -697,7 +669,6 @@ int mem_cgroup_cache_charge(struct page page, struct mm_struct mm,
697	MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);	669	MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
698	}	670	}
699		671
700
701	void mem_cgroup_commit_charge_swapin(struct page page, struct mem_cgroup ptr)	672	void mem_cgroup_commit_charge_swapin(struct page page, struct mem_cgroup ptr)
702	{	673	{
703	struct page_cgroup *pc;	674	struct page_cgroup *pc;
@@ -782,13 +753,13 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
782	}	753	}
783		754
784	/*	755	/*
785	* Before starting migration, account against new page.	756	* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
		757	* page belongs to.
786	*/	758	*/
787	int mem_cgroup_prepare_migration(struct page page, struct page newpage)	759	int mem_cgroup_prepare_migration(struct page page, struct mem_cgroup *ptr)
788	{	760	{
789	struct page_cgroup *pc;	761	struct page_cgroup *pc;
790	struct mem_cgroup *mem = NULL;	762	struct mem_cgroup *mem = NULL;
791	enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
792	int ret = 0;	763	int ret = 0;
793		764
794	if (mem_cgroup_subsys.disabled)	765	if (mem_cgroup_subsys.disabled)
@@ -799,42 +770,67 @@ int mem_cgroup_prepare_migration(struct page page, struct page newpage)
799	if (PageCgroupUsed(pc)) {	770	if (PageCgroupUsed(pc)) {
800	mem = pc->mem_cgroup;	771	mem = pc->mem_cgroup;
801	css_get(&mem->css);	772	css_get(&mem->css);
802	if (PageCgroupCache(pc)) {
803	if (page_is_file_cache(page))
804	ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
805	else
806	ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
807	}
808	}	773	}
809	unlock_page_cgroup(pc);	774	unlock_page_cgroup(pc);
		775
810	if (mem) {	776	if (mem) {
811	ret = mem_cgroup_charge_common(newpage, NULL,	777	ret = mem_cgroup_try_charge(NULL, GFP_HIGHUSER_MOVABLE, &mem);
812	GFP_HIGHUSER_MOVABLE,
813	ctype, mem);
814	css_put(&mem->css);	778	css_put(&mem->css);
815	}	779	}
		780	*ptr = mem;
816	return ret;	781	return ret;
817	}	782	}
818		783
819	/* remove redundant charge if migration failed*/	784	/* remove redundant charge if migration failed*/
820	void mem_cgroup_end_migration(struct page *newpage)	785	void mem_cgroup_end_migration(struct mem_cgroup *mem,
		786	struct page oldpage, struct page newpage)
821	{	787	{
		788	struct page target, unused;
		789	struct page_cgroup *pc;
		790	enum charge_type ctype;
		791
		792	if (!mem)
		793	return;
		794
		795	/* at migration success, oldpage->mapping is NULL. */
		796	if (oldpage->mapping) {
		797	target = oldpage;
		798	unused = NULL;
		799	} else {
		800	target = newpage;
		801	unused = oldpage;
		802	}
		803
		804	if (PageAnon(target))
		805	ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
		806	else if (page_is_file_cache(target))
		807	ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
		808	else
		809	ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
		810
		811	/* unused page is not on radix-tree now. */
		812	if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED)
		813	__mem_cgroup_uncharge_common(unused, ctype);
		814
		815	pc = lookup_page_cgroup(target);
822	/*	816	/*
823	* At success, page->mapping is not NULL.	817	* __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
824	* special rollback care is necessary when	818	* So, double-counting is effectively avoided.
825	* 1. at migration failure. (newpage->mapping is cleared in this case)	819	*/
826	* 2. the newpage was moved but not remapped again because the task	820	__mem_cgroup_commit_charge(mem, pc, ctype);
827	* exits and the newpage is obsolete. In this case, the new page	821
828	* may be a swapcache. So, we just call mem_cgroup_uncharge_page()	822	/*
829	* always for avoiding mess. The page_cgroup will be removed if	823	* Both of oldpage and newpage are still under lock_page().
830	* unnecessary. File cache pages is still on radix-tree. Don't	824	* Then, we don't have to care about race in radix-tree.
831	* care it.	825	* But we have to be careful that this page is unmapped or not.
		826	*
		827	* There is a case for !page_mapped(). At the start of
		828	* migration, oldpage was mapped. But now, it's zapped.
		829	* But we know target page is not freed/reused under us.
		830	* mem_cgroup_uncharge_page() does all necessary checks.
832	*/	831	*/
833	if (!newpage->mapping)	832	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
834	__mem_cgroup_uncharge_common(newpage,	833	mem_cgroup_uncharge_page(target);
835	MEM_CGROUP_CHARGE_TYPE_FORCE);
836	else if (PageAnon(newpage))
837	mem_cgroup_uncharge_page(newpage);
838	}	834	}
839		835
840	/*	836	/*


diff --git a/mm/migrate.c b/mm/migrate.c index 246dcb973ae7..a30ea5fcf9f1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
121	if (!is_migration_entry(entry) \|\| migration_entry_to_page(entry) != old)	121	if (!is_migration_entry(entry) \|\| migration_entry_to_page(entry) != old)
122	goto out;	122	goto out;
123		123
124	/*
125	* Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
126	* Failure is not an option here: we're now expected to remove every
127	* migration pte, and will cause crashes otherwise. Normally this
128	* is not an issue: mem_cgroup_prepare_migration bumped up the old
129	* page_cgroup count for safety, that's now attached to the new page,
130	* so this charge should just be another incrementation of the count,
131	* to keep in balance with rmap.c's mem_cgroup_uncharging. But if
132	* there's been a force_empty, those reference counts may no longer
133	* be reliable, and this charge can actually fail: oh well, we don't
134	* make the situation any worse by proceeding as if it had succeeded.
135	*/
136	mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC);
137
138	get_page(new);	124	get_page(new);
139	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));	125	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
140	if (is_write_migration_entry(entry))	126	if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page newpage, struct page page)
378	anon = PageAnon(page);	364	anon = PageAnon(page);
379	page->mapping = NULL;	365	page->mapping = NULL;
380		366
381	if (!anon) /* This page was removed from radix-tree. */
382	mem_cgroup_uncharge_cache_page(page);
383
384	/*	367	/*
385	* If any waiters have accumulated on the new page then	368	* If any waiters have accumulated on the new page then
386	* wake them up.	369	* wake them up.
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
614	struct page *newpage = get_new_page(page, private, &result);	597	struct page *newpage = get_new_page(page, private, &result);
615	int rcu_locked = 0;	598	int rcu_locked = 0;
616	int charge = 0;	599	int charge = 0;
		600	struct mem_cgroup *mem;
617		601
618	if (!newpage)	602	if (!newpage)
619	return -ENOMEM;	603	return -ENOMEM;
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
623	goto move_newpage;	607	goto move_newpage;
624	}	608	}
625		609
626	charge = mem_cgroup_prepare_migration(page, newpage);
627	if (charge == -ENOMEM) {
628	rc = -ENOMEM;
629	goto move_newpage;
630	}
631	/* prepare cgroup just returns 0 or -ENOMEM */	610	/* prepare cgroup just returns 0 or -ENOMEM */
632	BUG_ON(charge);
633
634	rc = -EAGAIN;	611	rc = -EAGAIN;
		612
635	if (!trylock_page(page)) {	613	if (!trylock_page(page)) {
636	if (!force)	614	if (!force)
637	goto move_newpage;	615	goto move_newpage;
638	lock_page(page);	616	lock_page(page);
639	}	617	}
640		618
		619	/* charge against new page */
		620	charge = mem_cgroup_prepare_migration(page, &mem);
		621	if (charge == -ENOMEM) {
		622	rc = -ENOMEM;
		623	goto unlock;
		624	}
		625	BUG_ON(charge);
		626
641	if (PageWriteback(page)) {	627	if (PageWriteback(page)) {
642	if (!force)	628	if (!force)
643	goto unlock;	629	goto uncharge;
644	wait_on_page_writeback(page);	630	wait_on_page_writeback(page);
645	}	631	}
646	/*	632	/*
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
693	rcu_unlock:	679	rcu_unlock:
694	if (rcu_locked)	680	if (rcu_locked)
695	rcu_read_unlock();	681	rcu_read_unlock();
696		682	uncharge:
		683	if (!charge)
		684	mem_cgroup_end_migration(mem, page, newpage);
697	unlock:	685	unlock:
698	unlock_page(page);	686	unlock_page(page);
699		687
@@ -709,8 +697,6 @@ unlock:
709	}	697	}
710		698
711	move_newpage:	699	move_newpage:
712	if (!charge)
713	mem_cgroup_end_migration(newpage);
714		700
715	/*	701	/*
716	* Move the new page to the LRU. If migration was not successful	702	* Move the new page to the LRU. If migration was not successful