memcg: simplify force_empty and move_lists

As for force_empty, though this may not be the main topic here, mem_cgroup_force_empty_list() can be implemented simpler. It is possible to make the function just call mem_cgroup_uncharge_page() instead of releasing page_cgroups by itself. The tip is to call get_page() before invoking mem_cgroup_uncharge_page(), so the page won't be released during this function. Kamezawa-san points out that by the time mem_cgroup_uncharge_page() uncharges, the page might have been reassigned to an lru of a different mem_cgroup, and now be emptied from that; but Hugh claims that's okay, the end state is the same as when it hasn't gone to another list. And once force_empty stops taking lock_page_cgroup within mz->lru_lock, mem_cgroup_move_lists() can be simplified to take mz->lru_lock directly while holding page_cgroup lock (but still has to use try_lock_page_cgroup). Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp> Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Hirokazu Takahashi <taka@valinux.co.jp> 2008-03-04 17:29:15 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-04 19:35:15 -0500
commit: 9b3c0a07e0fca35e36751680de3e4c76dbff5df3 (patch)
tree: 3dd6355a88282ea5577a05b7f2d2d3ae216ab9a8
parent: 2680eed723b664d83e6181ae275fac0ec8fa05ff (diff)
1 files changed, 13 insertions, 49 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dcbe30aad1da..f72067094e05 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -353,7 +353,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 void mem_cgroup_move_lists(struct page *page, bool active)
 {
        struct page_cgroup *pc;
-        struct mem_cgroup *mem;
        struct mem_cgroup_per_zone *mz;
        unsigned long flags;
@@ -367,35 +366,14 @@ void mem_cgroup_move_lists(struct page *page, bool active)
        if (!try_lock_page_cgroup(page))
                return;
-        /*
-         * Now page_cgroup is stable, but we cannot acquire mz->lru_lock
-         * while holding it, because mem_cgroup_force_empty_list does the
-         * reverse.  Get a hold on the mem_cgroup before unlocking, so that
-         * the zoneinfo remains stable, then take mz->lru_lock; then check
-         * that page still points to pc and pc (even if freed and reassigned
-         * to that same page meanwhile) still points to the same mem_cgroup.
-         * Then we know mz still points to the right spinlock, so it's safe
-         * to move_lists (page->page_cgroup might be reset while we do so, but
-         * that doesn't matter: pc->page is stable till we drop mz->lru_lock).
-         * We're being a little naughty not to try_lock_page_cgroup again
-         * inside there, but we are safe, aren't we?  Aren't we?  Whistle...
-         */
        pc = page_get_page_cgroup(page);
        if (pc) {
-                mem = pc->mem_cgroup;
                mz = page_cgroup_zoneinfo(pc);
-                css_get(&mem->css);
-                unlock_page_cgroup(page);
                spin_lock_irqsave(&mz->lru_lock, flags);
-                if (page_get_page_cgroup(page) == pc && pc->mem_cgroup == mem)
+                __mem_cgroup_move_lists(pc, active);
-                        __mem_cgroup_move_lists(pc, active);
                spin_unlock_irqrestore(&mz->lru_lock, flags);
+        }
-                css_put(&mem->css);
+        unlock_page_cgroup(page);
-        } else
-                unlock_page_cgroup(page);
 }
 /*
@@ -789,7 +767,7 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 {
        struct page_cgroup *pc;
        struct page *page;
-        int count;
+        int count = FORCE_UNCHARGE_BATCH;
        unsigned long flags;
        struct list_head *list;
@@ -798,35 +776,21 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
        else
                list = &mz->inactive_list;
-        if (list_empty(list))
-                return;
-retry:
-        count = FORCE_UNCHARGE_BATCH;
        spin_lock_irqsave(&mz->lru_lock, flags);
+        while (!list_empty(list)) {
-        while (--count && !list_empty(list)) {
                pc = list_entry(list->prev, struct page_cgroup, lru);
                page = pc->page;
-                lock_page_cgroup(page);
+                get_page(page);
-                if (page_get_page_cgroup(page) == pc) {
+                spin_unlock_irqrestore(&mz->lru_lock, flags);
-                        page_assign_page_cgroup(page, NULL);
+                mem_cgroup_uncharge_page(page);
-                        unlock_page_cgroup(page);
+                put_page(page);
-                        __mem_cgroup_remove_list(pc);
+                if (--count <= 0) {
-                        res_counter_uncharge(&mem->res, PAGE_SIZE);
+                        count = FORCE_UNCHARGE_BATCH;
-                        css_put(&mem->css);
+                        cond_resched();
-                        kfree(pc);
-                } else {
-                        /* racing uncharge: let page go then retry */
-                        unlock_page_cgroup(page);
-                        break;
                }
+                spin_lock_irqsave(&mz->lru_lock, flags);
        }
        spin_unlock_irqrestore(&mz->lru_lock, flags);
-        if (!list_empty(list)) {
-                cond_resched();
-                goto retry;
-        }
 }
 /*
author	Hirokazu Takahashi <taka@valinux.co.jp>	2008-03-04 17:29:15 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-04 19:35:15 -0500
commit	9b3c0a07e0fca35e36751680de3e4c76dbff5df3 (patch)
tree	3dd6355a88282ea5577a05b7f2d2d3ae216ab9a8
parent	2680eed723b664d83e6181ae275fac0ec8fa05ff (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dcbe30aad1da..f72067094e05 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -353,7 +353,6 @@ int task_in_mem_cgroup(struct task_struct task, const struct mem_cgroup mem)
353	void mem_cgroup_move_lists(struct page *page, bool active)	353	void mem_cgroup_move_lists(struct page *page, bool active)
354	{	354	{
355	struct page_cgroup *pc;	355	struct page_cgroup *pc;
356	struct mem_cgroup *mem;
357	struct mem_cgroup_per_zone *mz;	356	struct mem_cgroup_per_zone *mz;
358	unsigned long flags;	357	unsigned long flags;
359		358
@@ -367,35 +366,14 @@ void mem_cgroup_move_lists(struct page *page, bool active)
367	if (!try_lock_page_cgroup(page))	366	if (!try_lock_page_cgroup(page))
368	return;	367	return;
369		368
370	/*
371	* Now page_cgroup is stable, but we cannot acquire mz->lru_lock
372	* while holding it, because mem_cgroup_force_empty_list does the
373	* reverse. Get a hold on the mem_cgroup before unlocking, so that
374	* the zoneinfo remains stable, then take mz->lru_lock; then check
375	* that page still points to pc and pc (even if freed and reassigned
376	* to that same page meanwhile) still points to the same mem_cgroup.
377	* Then we know mz still points to the right spinlock, so it's safe
378	* to move_lists (page->page_cgroup might be reset while we do so, but
379	* that doesn't matter: pc->page is stable till we drop mz->lru_lock).
380	* We're being a little naughty not to try_lock_page_cgroup again
381	* inside there, but we are safe, aren't we? Aren't we? Whistle...
382	*/
383	pc = page_get_page_cgroup(page);	369	pc = page_get_page_cgroup(page);
384	if (pc) {	370	if (pc) {
385	mem = pc->mem_cgroup;
386	mz = page_cgroup_zoneinfo(pc);	371	mz = page_cgroup_zoneinfo(pc);
387	css_get(&mem->css);
388
389	unlock_page_cgroup(page);
390
391	spin_lock_irqsave(&mz->lru_lock, flags);	372	spin_lock_irqsave(&mz->lru_lock, flags);
392	if (page_get_page_cgroup(page) == pc && pc->mem_cgroup == mem)	373	__mem_cgroup_move_lists(pc, active);
393	__mem_cgroup_move_lists(pc, active);
394	spin_unlock_irqrestore(&mz->lru_lock, flags);	374	spin_unlock_irqrestore(&mz->lru_lock, flags);
395		375	}
396	css_put(&mem->css);	376	unlock_page_cgroup(page);
397	} else
398	unlock_page_cgroup(page);
399	}	377	}
400		378
401	/*	379	/*
@@ -789,7 +767,7 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
789	{	767	{
790	struct page_cgroup *pc;	768	struct page_cgroup *pc;
791	struct page *page;	769	struct page *page;
792	int count;	770	int count = FORCE_UNCHARGE_BATCH;
793	unsigned long flags;	771	unsigned long flags;
794	struct list_head *list;	772	struct list_head *list;
795		773
@@ -798,35 +776,21 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
798	else	776	else
799	list = &mz->inactive_list;	777	list = &mz->inactive_list;
800		778
801	if (list_empty(list))
802	return;
803	retry:
804	count = FORCE_UNCHARGE_BATCH;
805	spin_lock_irqsave(&mz->lru_lock, flags);	779	spin_lock_irqsave(&mz->lru_lock, flags);
806		780	while (!list_empty(list)) {
807	while (--count && !list_empty(list)) {
808	pc = list_entry(list->prev, struct page_cgroup, lru);	781	pc = list_entry(list->prev, struct page_cgroup, lru);
809	page = pc->page;	782	page = pc->page;
810	lock_page_cgroup(page);	783	get_page(page);
811	if (page_get_page_cgroup(page) == pc) {	784	spin_unlock_irqrestore(&mz->lru_lock, flags);
812	page_assign_page_cgroup(page, NULL);	785	mem_cgroup_uncharge_page(page);
813	unlock_page_cgroup(page);	786	put_page(page);
814	__mem_cgroup_remove_list(pc);	787	if (--count <= 0) {
815	res_counter_uncharge(&mem->res, PAGE_SIZE);	788	count = FORCE_UNCHARGE_BATCH;
816	css_put(&mem->css);	789	cond_resched();
817	kfree(pc);
818	} else {
819	/* racing uncharge: let page go then retry */
820	unlock_page_cgroup(page);
821	break;
822	}	790	}
		791	spin_lock_irqsave(&mz->lru_lock, flags);
823	}	792	}
824
825	spin_unlock_irqrestore(&mz->lru_lock, flags);	793	spin_unlock_irqrestore(&mz->lru_lock, flags);
826	if (!list_empty(list)) {
827	cond_resched();
828	goto retry;
829	}
830	}	794	}
831		795
832	/*	796	/*