cgroups: add an owner to the mm_struct

Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: David Rientjes <rientjes@google.com>, Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Balbir Singh <balbir@linux.vnet.ibm.com> 2008-04-29 04:00:16 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-04-29 11:06:10 -0400
commit: cf475ad28ac35cc9ba612d67158f29b73b38b05d (patch)
tree: 2c7cd568d00357bd42643ea602884e731cc24f26 /kernel/exit.c
parent: 29486df325e1fe6e1764afcb19e3370804c2b002 (diff)
1 files changed, 83 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 2a9d98c641ac..ae0f2c4e452b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk)
 EXPORT_SYMBOL_GPL(exit_fs);
+#ifdef CONFIG_MM_OWNER
+/*
+ * Task p is exiting and it owned mm, lets find a new owner for it
+ */
+static inline int
+mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
+{
+        /*
+         * If there are other users of the mm and the owner (us) is exiting
+         * we need to find a new owner to take on the responsibility.
+         */
+        if (!mm)
+                return 0;
+        if (atomic_read(&mm->mm_users) <= 1)
+                return 0;
+        if (mm->owner != p)
+                return 0;
+        return 1;
+}
+void mm_update_next_owner(struct mm_struct *mm)
+{
+        struct task_struct *c, *g, *p = current;
+retry:
+        if (!mm_need_new_owner(mm, p))
+                return;
+        read_lock(&tasklist_lock);
+        /*
+         * Search in the children
+         */
+        list_for_each_entry(c, &p->children, sibling) {
+                if (c->mm == mm)
+                        goto assign_new_owner;
+        }
+        /*
+         * Search in the siblings
+         */
+        list_for_each_entry(c, &p->parent->children, sibling) {
+                if (c->mm == mm)
+                        goto assign_new_owner;
+        }
+        /*
+         * Search through everything else. We should not get
+         * here often
+         */
+        do_each_thread(g, c) {
+                if (c->mm == mm)
+                        goto assign_new_owner;
+        } while_each_thread(g, c);
+        read_unlock(&tasklist_lock);
+        return;
+assign_new_owner:
+        BUG_ON(c == p);
+        get_task_struct(c);
+        /*
+         * The task_lock protects c->mm from changing.
+         * We always want mm->owner->mm == mm
+         */
+        task_lock(c);
+        /*
+         * Delay read_unlock() till we have the task_lock()
+         * to ensure that c does not slip away underneath us
+         */
+        read_unlock(&tasklist_lock);
+        if (c->mm != mm) {
+                task_unlock(c);
+                put_task_struct(c);
+                goto retry;
+        }
+        cgroup_mm_owner_callbacks(mm->owner, c);
+        mm->owner = c;
+        task_unlock(c);
+        put_task_struct(c);
+}
+#endif /* CONFIG_MM_OWNER */
 /*
 * Turn us into a lazy TLB process if we
 * aren't already..
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk)
        /* We don't want this task to be frozen prematurely */
        clear_freeze_flag(tsk);
        task_unlock(tsk);
+        mm_update_next_owner(mm);
        mmput(mm);
 }
author	Balbir Singh <balbir@linux.vnet.ibm.com>	2008-04-29 04:00:16 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-04-29 11:06:10 -0400
commit	cf475ad28ac35cc9ba612d67158f29b73b38b05d (patch)
tree	2c7cd568d00357bd42643ea602884e731cc24f26 /kernel/exit.c
parent	29486df325e1fe6e1764afcb19e3370804c2b002 (diff)

diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk)
557		557
558	EXPORT_SYMBOL_GPL(exit_fs);	558	EXPORT_SYMBOL_GPL(exit_fs);
559		559
		560	#ifdef CONFIG_MM_OWNER
		561	/*
		562	* Task p is exiting and it owned mm, lets find a new owner for it
		563	*/
		564	static inline int
		565	mm_need_new_owner(struct mm_struct mm, struct task_struct p)
		566	{
		567	/*
		568	* If there are other users of the mm and the owner (us) is exiting
		569	* we need to find a new owner to take on the responsibility.
		570	*/
		571	if (!mm)
		572	return 0;
		573	if (atomic_read(&mm->mm_users) <= 1)
		574	return 0;
		575	if (mm->owner != p)
		576	return 0;
		577	return 1;
		578	}
		579
		580	void mm_update_next_owner(struct mm_struct *mm)
		581	{
		582	struct task_struct c, g, *p = current;
		583
		584	retry:
		585	if (!mm_need_new_owner(mm, p))
		586	return;
		587
		588	read_lock(&tasklist_lock);
		589	/*
		590	* Search in the children
		591	*/
		592	list_for_each_entry(c, &p->children, sibling) {
		593	if (c->mm == mm)
		594	goto assign_new_owner;
		595	}
		596
		597	/*
		598	* Search in the siblings
		599	*/
		600	list_for_each_entry(c, &p->parent->children, sibling) {
		601	if (c->mm == mm)
		602	goto assign_new_owner;
		603	}
		604
		605	/*
		606	* Search through everything else. We should not get
		607	* here often
		608	*/
		609	do_each_thread(g, c) {
		610	if (c->mm == mm)
		611	goto assign_new_owner;
		612	} while_each_thread(g, c);
		613
		614	read_unlock(&tasklist_lock);
		615	return;
		616
		617	assign_new_owner:
		618	BUG_ON(c == p);
		619	get_task_struct(c);
		620	/*
		621	* The task_lock protects c->mm from changing.
		622	* We always want mm->owner->mm == mm
		623	*/
		624	task_lock(c);
		625	/*
		626	* Delay read_unlock() till we have the task_lock()
		627	* to ensure that c does not slip away underneath us
		628	*/
		629	read_unlock(&tasklist_lock);
		630	if (c->mm != mm) {
		631	task_unlock(c);
		632	put_task_struct(c);
		633	goto retry;
		634	}
		635	cgroup_mm_owner_callbacks(mm->owner, c);
		636	mm->owner = c;
		637	task_unlock(c);
		638	put_task_struct(c);
		639	}
		640	#endif /* CONFIG_MM_OWNER */
		641
560	/*	642	/*
561	* Turn us into a lazy TLB process if we	643	* Turn us into a lazy TLB process if we
562	* aren't already..	644	* aren't already..
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk)
596	/* We don't want this task to be frozen prematurely */	678	/* We don't want this task to be frozen prematurely */
597	clear_freeze_flag(tsk);	679	clear_freeze_flag(tsk);
598	task_unlock(tsk);	680	task_unlock(tsk);
		681	mm_update_next_owner(mm);
599	mmput(mm);	682	mmput(mm);
600	}	683	}
601		684