diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-04-29 04:00:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 11:06:10 -0400 |
commit | cf475ad28ac35cc9ba612d67158f29b73b38b05d (patch) | |
tree | 2c7cd568d00357bd42643ea602884e731cc24f26 /kernel/exit.c | |
parent | 29486df325e1fe6e1764afcb19e3370804c2b002 (diff) |
cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner.
This approach was suggested by Paul Menage. The advantage of this approach
is that, once the mm->owner is known, using the subsystem id, the cgroup
can be determined. It also allows several control groups that are
virtually grouped by mm_struct, to exist independent of the memory
controller i.e., without adding mem_cgroup's for each controller, to
mm_struct.
A new config option CONFIG_MM_OWNER is added and the memory resource
controller selects this config option.
This patch also adds cgroup callbacks to notify subsystems when mm->owner
changes. The mm_cgroup_changed callback is called with the task_lock() of
the new task held and is called just prior to changing the mm->owner.
I am indebted to Paul Menage for the several reviews of this patchset and
helping me make it lighter and simpler.
This patch was tested on a powerpc box, it was compiled with both the
MM_OWNER config turned on and off.
After the thread group leader exits, it's moved to init_css_state by
cgroup_exit(), thus all future charges from runnings threads would be
redirected to the init_css_set's subsystem.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>,
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) | |||
557 | 557 | ||
558 | EXPORT_SYMBOL_GPL(exit_fs); | 558 | EXPORT_SYMBOL_GPL(exit_fs); |
559 | 559 | ||
560 | #ifdef CONFIG_MM_OWNER | ||
561 | /* | ||
562 | * Task p is exiting and it owned mm, lets find a new owner for it | ||
563 | */ | ||
564 | static inline int | ||
565 | mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | ||
566 | { | ||
567 | /* | ||
568 | * If there are other users of the mm and the owner (us) is exiting | ||
569 | * we need to find a new owner to take on the responsibility. | ||
570 | */ | ||
571 | if (!mm) | ||
572 | return 0; | ||
573 | if (atomic_read(&mm->mm_users) <= 1) | ||
574 | return 0; | ||
575 | if (mm->owner != p) | ||
576 | return 0; | ||
577 | return 1; | ||
578 | } | ||
579 | |||
580 | void mm_update_next_owner(struct mm_struct *mm) | ||
581 | { | ||
582 | struct task_struct *c, *g, *p = current; | ||
583 | |||
584 | retry: | ||
585 | if (!mm_need_new_owner(mm, p)) | ||
586 | return; | ||
587 | |||
588 | read_lock(&tasklist_lock); | ||
589 | /* | ||
590 | * Search in the children | ||
591 | */ | ||
592 | list_for_each_entry(c, &p->children, sibling) { | ||
593 | if (c->mm == mm) | ||
594 | goto assign_new_owner; | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * Search in the siblings | ||
599 | */ | ||
600 | list_for_each_entry(c, &p->parent->children, sibling) { | ||
601 | if (c->mm == mm) | ||
602 | goto assign_new_owner; | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * Search through everything else. We should not get | ||
607 | * here often | ||
608 | */ | ||
609 | do_each_thread(g, c) { | ||
610 | if (c->mm == mm) | ||
611 | goto assign_new_owner; | ||
612 | } while_each_thread(g, c); | ||
613 | |||
614 | read_unlock(&tasklist_lock); | ||
615 | return; | ||
616 | |||
617 | assign_new_owner: | ||
618 | BUG_ON(c == p); | ||
619 | get_task_struct(c); | ||
620 | /* | ||
621 | * The task_lock protects c->mm from changing. | ||
622 | * We always want mm->owner->mm == mm | ||
623 | */ | ||
624 | task_lock(c); | ||
625 | /* | ||
626 | * Delay read_unlock() till we have the task_lock() | ||
627 | * to ensure that c does not slip away underneath us | ||
628 | */ | ||
629 | read_unlock(&tasklist_lock); | ||
630 | if (c->mm != mm) { | ||
631 | task_unlock(c); | ||
632 | put_task_struct(c); | ||
633 | goto retry; | ||
634 | } | ||
635 | cgroup_mm_owner_callbacks(mm->owner, c); | ||
636 | mm->owner = c; | ||
637 | task_unlock(c); | ||
638 | put_task_struct(c); | ||
639 | } | ||
640 | #endif /* CONFIG_MM_OWNER */ | ||
641 | |||
560 | /* | 642 | /* |
561 | * Turn us into a lazy TLB process if we | 643 | * Turn us into a lazy TLB process if we |
562 | * aren't already.. | 644 | * aren't already.. |
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) | |||
596 | /* We don't want this task to be frozen prematurely */ | 678 | /* We don't want this task to be frozen prematurely */ |
597 | clear_freeze_flag(tsk); | 679 | clear_freeze_flag(tsk); |
598 | task_unlock(tsk); | 680 | task_unlock(tsk); |
681 | mm_update_next_owner(mm); | ||
599 | mmput(mm); | 682 | mmput(mm); |
600 | } | 683 | } |
601 | 684 | ||