aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2008-04-29 04:00:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-29 11:06:10 -0400
commitcf475ad28ac35cc9ba612d67158f29b73b38b05d (patch)
tree2c7cd568d00357bd42643ea602884e731cc24f26 /kernel
parent29486df325e1fe6e1764afcb19e3370804c2b002 (diff)
cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: David Rientjes <rientjes@google.com>, Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c30
-rw-r--r--kernel/exit.c83
-rw-r--r--kernel/fork.c11
3 files changed, 121 insertions, 3 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index abc433772e5a..b9d467d83fc1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -119,6 +119,7 @@ static int root_count;
119 * be called. 119 * be called.
120 */ 120 */
121static int need_forkexit_callback; 121static int need_forkexit_callback;
122static int need_mm_owner_callback __read_mostly;
122 123
123/* convenient tests for these bits */ 124/* convenient tests for these bits */
124inline int cgroup_is_removed(const struct cgroup *cgrp) 125inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -2498,6 +2499,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2498 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; 2499 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2499 2500
2500 need_forkexit_callback |= ss->fork || ss->exit; 2501 need_forkexit_callback |= ss->fork || ss->exit;
2502 need_mm_owner_callback |= !!ss->mm_owner_changed;
2501 2503
2502 /* At system boot, before all subsystems have been 2504 /* At system boot, before all subsystems have been
2503 * registered, no tasks have been forked, so we don't 2505 * registered, no tasks have been forked, so we don't
@@ -2748,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child)
2748 } 2750 }
2749} 2751}
2750 2752
2753#ifdef CONFIG_MM_OWNER
2754/**
2755 * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
2756 * @p: the new owner
2757 *
2758 * Called on every change to mm->owner. mm_init_owner() does not
2759 * invoke this routine, since it assigns the mm->owner the first time
2760 * and does not change it.
2761 */
2762void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
2763{
2764 struct cgroup *oldcgrp, *newcgrp;
2765
2766 if (need_mm_owner_callback) {
2767 int i;
2768 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2769 struct cgroup_subsys *ss = subsys[i];
2770 oldcgrp = task_cgroup(old, ss->subsys_id);
2771 newcgrp = task_cgroup(new, ss->subsys_id);
2772 if (oldcgrp == newcgrp)
2773 continue;
2774 if (ss->mm_owner_changed)
2775 ss->mm_owner_changed(ss, oldcgrp, newcgrp);
2776 }
2777 }
2778}
2779#endif /* CONFIG_MM_OWNER */
2780
2751/** 2781/**
2752 * cgroup_post_fork - called on a new task after adding it to the task list 2782 * cgroup_post_fork - called on a new task after adding it to the task list
2753 * @child: the task in question 2783 * @child: the task in question
diff --git a/kernel/exit.c b/kernel/exit.c
index 2a9d98c641ac..ae0f2c4e452b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk)
557 557
558EXPORT_SYMBOL_GPL(exit_fs); 558EXPORT_SYMBOL_GPL(exit_fs);
559 559
560#ifdef CONFIG_MM_OWNER
561/*
562 * Task p is exiting and it owned mm, lets find a new owner for it
563 */
564static inline int
565mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
566{
567 /*
568 * If there are other users of the mm and the owner (us) is exiting
569 * we need to find a new owner to take on the responsibility.
570 */
571 if (!mm)
572 return 0;
573 if (atomic_read(&mm->mm_users) <= 1)
574 return 0;
575 if (mm->owner != p)
576 return 0;
577 return 1;
578}
579
580void mm_update_next_owner(struct mm_struct *mm)
581{
582 struct task_struct *c, *g, *p = current;
583
584retry:
585 if (!mm_need_new_owner(mm, p))
586 return;
587
588 read_lock(&tasklist_lock);
589 /*
590 * Search in the children
591 */
592 list_for_each_entry(c, &p->children, sibling) {
593 if (c->mm == mm)
594 goto assign_new_owner;
595 }
596
597 /*
598 * Search in the siblings
599 */
600 list_for_each_entry(c, &p->parent->children, sibling) {
601 if (c->mm == mm)
602 goto assign_new_owner;
603 }
604
605 /*
606 * Search through everything else. We should not get
607 * here often
608 */
609 do_each_thread(g, c) {
610 if (c->mm == mm)
611 goto assign_new_owner;
612 } while_each_thread(g, c);
613
614 read_unlock(&tasklist_lock);
615 return;
616
617assign_new_owner:
618 BUG_ON(c == p);
619 get_task_struct(c);
620 /*
621 * The task_lock protects c->mm from changing.
622 * We always want mm->owner->mm == mm
623 */
624 task_lock(c);
625 /*
626 * Delay read_unlock() till we have the task_lock()
627 * to ensure that c does not slip away underneath us
628 */
629 read_unlock(&tasklist_lock);
630 if (c->mm != mm) {
631 task_unlock(c);
632 put_task_struct(c);
633 goto retry;
634 }
635 cgroup_mm_owner_callbacks(mm->owner, c);
636 mm->owner = c;
637 task_unlock(c);
638 put_task_struct(c);
639}
640#endif /* CONFIG_MM_OWNER */
641
560/* 642/*
561 * Turn us into a lazy TLB process if we 643 * Turn us into a lazy TLB process if we
562 * aren't already.. 644 * aren't already..
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk)
596 /* We don't want this task to be frozen prematurely */ 678 /* We don't want this task to be frozen prematurely */
597 clear_freeze_flag(tsk); 679 clear_freeze_flag(tsk);
598 task_unlock(tsk); 680 task_unlock(tsk);
681 mm_update_next_owner(mm);
599 mmput(mm); 682 mmput(mm);
600} 683}
601 684
diff --git a/kernel/fork.c b/kernel/fork.c
index 6067e429f281..156db96ff754 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
381 mm->ioctx_list = NULL; 381 mm->ioctx_list = NULL;
382 mm->free_area_cache = TASK_UNMAPPED_BASE; 382 mm->free_area_cache = TASK_UNMAPPED_BASE;
383 mm->cached_hole_size = ~0UL; 383 mm->cached_hole_size = ~0UL;
384 mm_init_cgroup(mm, p); 384 mm_init_owner(mm, p);
385 385
386 if (likely(!mm_alloc_pgd(mm))) { 386 if (likely(!mm_alloc_pgd(mm))) {
387 mm->def_flags = 0; 387 mm->def_flags = 0;
388 return mm; 388 return mm;
389 } 389 }
390 390
391 mm_free_cgroup(mm);
392 free_mm(mm); 391 free_mm(mm);
393 return NULL; 392 return NULL;
394} 393}
@@ -438,7 +437,6 @@ void mmput(struct mm_struct *mm)
438 spin_unlock(&mmlist_lock); 437 spin_unlock(&mmlist_lock);
439 } 438 }
440 put_swap_token(mm); 439 put_swap_token(mm);
441 mm_free_cgroup(mm);
442 mmdrop(mm); 440 mmdrop(mm);
443 } 441 }
444} 442}
@@ -982,6 +980,13 @@ static void rt_mutex_init_task(struct task_struct *p)
982#endif 980#endif
983} 981}
984 982
983#ifdef CONFIG_MM_OWNER
984void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
985{
986 mm->owner = p;
987}
988#endif /* CONFIG_MM_OWNER */
989
985/* 990/*
986 * This creates a new process as a copy of the old one, 991 * This creates a new process as a copy of the old one,
987 * but does not actually start it yet. 992 * but does not actually start it yet.