diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-04-29 04:00:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 11:06:10 -0400 |
commit | cf475ad28ac35cc9ba612d67158f29b73b38b05d (patch) | |
tree | 2c7cd568d00357bd42643ea602884e731cc24f26 | |
parent | 29486df325e1fe6e1764afcb19e3370804c2b002 (diff) |
cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner.
This approach was suggested by Paul Menage. The advantage of this approach
is that, once the mm->owner is known, using the subsystem id, the cgroup
can be determined. It also allows several control groups that are
virtually grouped by mm_struct, to exist independent of the memory
controller i.e., without adding mem_cgroup's for each controller, to
mm_struct.
A new config option CONFIG_MM_OWNER is added and the memory resource
controller selects this config option.
This patch also adds cgroup callbacks to notify subsystems when mm->owner
changes. The mm_cgroup_changed callback is called with the task_lock() of
the new task held and is called just prior to changing the mm->owner.
I am indebted to Paul Menage for the several reviews of this patchset and
helping me make it lighter and simpler.
This patch was tested on a powerpc box, it was compiled with both the
MM_OWNER config turned on and off.
After the thread group leader exits, it's moved to init_css_state by
cgroup_exit(), thus all future charges from runnings threads would be
redirected to the init_css_set's subsystem.
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: David Rientjes <rientjes@google.com>,
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/exec.c | 1 | ||||
-rw-r--r-- | include/linux/cgroup.h | 15 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 16 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | init/Kconfig | 7 | ||||
-rw-r--r-- | init/main.c | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 30 | ||||
-rw-r--r-- | kernel/exit.c | 83 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | mm/memcontrol.c | 28 |
11 files changed, 169 insertions, 41 deletions
@@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm) | |||
735 | tsk->active_mm = mm; | 735 | tsk->active_mm = mm; |
736 | activate_mm(active_mm, mm); | 736 | activate_mm(active_mm, mm); |
737 | task_unlock(tsk); | 737 | task_unlock(tsk); |
738 | mm_update_next_owner(mm); | ||
738 | arch_pick_mmap_layout(mm); | 739 | arch_pick_mmap_layout(mm); |
739 | if (old_mm) { | 740 | if (old_mm) { |
740 | up_read(&old_mm->mmap_sem); | 741 | up_read(&old_mm->mmap_sem); |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 095248082b7e..e155aa78d859 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -305,6 +305,12 @@ struct cgroup_subsys { | |||
305 | struct cgroup *cgrp); | 305 | struct cgroup *cgrp); |
306 | void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 306 | void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
307 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); | 307 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); |
308 | /* | ||
309 | * This routine is called with the task_lock of mm->owner held | ||
310 | */ | ||
311 | void (*mm_owner_changed)(struct cgroup_subsys *ss, | ||
312 | struct cgroup *old, | ||
313 | struct cgroup *new); | ||
308 | int subsys_id; | 314 | int subsys_id; |
309 | int active; | 315 | int active; |
310 | int disabled; | 316 | int disabled; |
@@ -390,4 +396,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, | |||
390 | 396 | ||
391 | #endif /* !CONFIG_CGROUPS */ | 397 | #endif /* !CONFIG_CGROUPS */ |
392 | 398 | ||
399 | #ifdef CONFIG_MM_OWNER | ||
400 | extern void | ||
401 | cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new); | ||
402 | #else /* !CONFIG_MM_OWNER */ | ||
403 | static inline void | ||
404 | cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
405 | { | ||
406 | } | ||
407 | #endif /* CONFIG_MM_OWNER */ | ||
393 | #endif /* _LINUX_CGROUP_H */ | 408 | #endif /* _LINUX_CGROUP_H */ |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b1c4295848b..e6608776bc96 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -27,9 +27,6 @@ struct mm_struct; | |||
27 | 27 | ||
28 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 28 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
29 | 29 | ||
30 | extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); | ||
31 | extern void mm_free_cgroup(struct mm_struct *mm); | ||
32 | |||
33 | #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) | 30 | #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) |
34 | 31 | ||
35 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); | 32 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); |
@@ -48,8 +45,10 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
48 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); | 45 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); |
49 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); | 46 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); |
50 | 47 | ||
48 | extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | ||
49 | |||
51 | #define mm_match_cgroup(mm, cgroup) \ | 50 | #define mm_match_cgroup(mm, cgroup) \ |
52 | ((cgroup) == rcu_dereference((mm)->mem_cgroup)) | 51 | ((cgroup) == mem_cgroup_from_task((mm)->owner)) |
53 | 52 | ||
54 | extern int mem_cgroup_prepare_migration(struct page *page); | 53 | extern int mem_cgroup_prepare_migration(struct page *page); |
55 | extern void mem_cgroup_end_migration(struct page *page); | 54 | extern void mem_cgroup_end_migration(struct page *page); |
@@ -73,15 +72,6 @@ extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | |||
73 | struct zone *zone, int priority); | 72 | struct zone *zone, int priority); |
74 | 73 | ||
75 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 74 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
76 | static inline void mm_init_cgroup(struct mm_struct *mm, | ||
77 | struct task_struct *p) | ||
78 | { | ||
79 | } | ||
80 | |||
81 | static inline void mm_free_cgroup(struct mm_struct *mm) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | static inline void page_reset_bad_cgroup(struct page *page) | 75 | static inline void page_reset_bad_cgroup(struct page *page) |
86 | { | 76 | { |
87 | } | 77 | } |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e2bae8dde35a..bc97bd54f606 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -225,8 +225,9 @@ struct mm_struct { | |||
225 | /* aio bits */ | 225 | /* aio bits */ |
226 | rwlock_t ioctx_list_lock; /* aio lock */ | 226 | rwlock_t ioctx_list_lock; /* aio lock */ |
227 | struct kioctx *ioctx_list; | 227 | struct kioctx *ioctx_list; |
228 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 228 | #ifdef CONFIG_MM_OWNER |
229 | struct mem_cgroup *mem_cgroup; | 229 | struct task_struct *owner; /* The thread group leader that */ |
230 | /* owns the mm_struct. */ | ||
230 | #endif | 231 | #endif |
231 | }; | 232 | }; |
232 | 233 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 024d72b47a0c..1d02babdb2c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2148,6 +2148,19 @@ static inline void migration_init(void) | |||
2148 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2148 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2149 | #endif | 2149 | #endif |
2150 | 2150 | ||
2151 | #ifdef CONFIG_MM_OWNER | ||
2152 | extern void mm_update_next_owner(struct mm_struct *mm); | ||
2153 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | ||
2154 | #else | ||
2155 | static inline void mm_update_next_owner(struct mm_struct *mm) | ||
2156 | { | ||
2157 | } | ||
2158 | |||
2159 | static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
2160 | { | ||
2161 | } | ||
2162 | #endif /* CONFIG_MM_OWNER */ | ||
2163 | |||
2151 | #endif /* __KERNEL__ */ | 2164 | #endif /* __KERNEL__ */ |
2152 | 2165 | ||
2153 | #endif | 2166 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index a3457926342a..98fa96eac415 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -378,9 +378,13 @@ config RESOURCE_COUNTERS | |||
378 | infrastructure that works with cgroups | 378 | infrastructure that works with cgroups |
379 | depends on CGROUPS | 379 | depends on CGROUPS |
380 | 380 | ||
381 | config MM_OWNER | ||
382 | bool | ||
383 | |||
381 | config CGROUP_MEM_RES_CTLR | 384 | config CGROUP_MEM_RES_CTLR |
382 | bool "Memory Resource Controller for Control Groups" | 385 | bool "Memory Resource Controller for Control Groups" |
383 | depends on CGROUPS && RESOURCE_COUNTERS | 386 | depends on CGROUPS && RESOURCE_COUNTERS |
387 | select MM_OWNER | ||
384 | help | 388 | help |
385 | Provides a memory resource controller that manages both page cache and | 389 | Provides a memory resource controller that manages both page cache and |
386 | RSS memory. | 390 | RSS memory. |
@@ -393,6 +397,9 @@ config CGROUP_MEM_RES_CTLR | |||
393 | Only enable when you're ok with these trade offs and really | 397 | Only enable when you're ok with these trade offs and really |
394 | sure you need the memory resource controller. | 398 | sure you need the memory resource controller. |
395 | 399 | ||
400 | This config option also selects MM_OWNER config option, which | ||
401 | could in turn add some fork/exit overhead. | ||
402 | |||
396 | config SYSFS_DEPRECATED | 403 | config SYSFS_DEPRECATED |
397 | bool | 404 | bool |
398 | 405 | ||
diff --git a/init/main.c b/init/main.c index 1116d2f40cc1..c62c98f381f2 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -559,6 +559,7 @@ asmlinkage void __init start_kernel(void) | |||
559 | printk(KERN_NOTICE); | 559 | printk(KERN_NOTICE); |
560 | printk(linux_banner); | 560 | printk(linux_banner); |
561 | setup_arch(&command_line); | 561 | setup_arch(&command_line); |
562 | mm_init_owner(&init_mm, &init_task); | ||
562 | setup_command_line(command_line); | 563 | setup_command_line(command_line); |
563 | unwind_setup(); | 564 | unwind_setup(); |
564 | setup_per_cpu_areas(); | 565 | setup_per_cpu_areas(); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index abc433772e5a..b9d467d83fc1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -119,6 +119,7 @@ static int root_count; | |||
119 | * be called. | 119 | * be called. |
120 | */ | 120 | */ |
121 | static int need_forkexit_callback; | 121 | static int need_forkexit_callback; |
122 | static int need_mm_owner_callback __read_mostly; | ||
122 | 123 | ||
123 | /* convenient tests for these bits */ | 124 | /* convenient tests for these bits */ |
124 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 125 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
@@ -2498,6 +2499,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2498 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; | 2499 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; |
2499 | 2500 | ||
2500 | need_forkexit_callback |= ss->fork || ss->exit; | 2501 | need_forkexit_callback |= ss->fork || ss->exit; |
2502 | need_mm_owner_callback |= !!ss->mm_owner_changed; | ||
2501 | 2503 | ||
2502 | /* At system boot, before all subsystems have been | 2504 | /* At system boot, before all subsystems have been |
2503 | * registered, no tasks have been forked, so we don't | 2505 | * registered, no tasks have been forked, so we don't |
@@ -2748,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2748 | } | 2750 | } |
2749 | } | 2751 | } |
2750 | 2752 | ||
2753 | #ifdef CONFIG_MM_OWNER | ||
2754 | /** | ||
2755 | * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes | ||
2756 | * @p: the new owner | ||
2757 | * | ||
2758 | * Called on every change to mm->owner. mm_init_owner() does not | ||
2759 | * invoke this routine, since it assigns the mm->owner the first time | ||
2760 | * and does not change it. | ||
2761 | */ | ||
2762 | void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
2763 | { | ||
2764 | struct cgroup *oldcgrp, *newcgrp; | ||
2765 | |||
2766 | if (need_mm_owner_callback) { | ||
2767 | int i; | ||
2768 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2769 | struct cgroup_subsys *ss = subsys[i]; | ||
2770 | oldcgrp = task_cgroup(old, ss->subsys_id); | ||
2771 | newcgrp = task_cgroup(new, ss->subsys_id); | ||
2772 | if (oldcgrp == newcgrp) | ||
2773 | continue; | ||
2774 | if (ss->mm_owner_changed) | ||
2775 | ss->mm_owner_changed(ss, oldcgrp, newcgrp); | ||
2776 | } | ||
2777 | } | ||
2778 | } | ||
2779 | #endif /* CONFIG_MM_OWNER */ | ||
2780 | |||
2751 | /** | 2781 | /** |
2752 | * cgroup_post_fork - called on a new task after adding it to the task list | 2782 | * cgroup_post_fork - called on a new task after adding it to the task list |
2753 | * @child: the task in question | 2783 | * @child: the task in question |
diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) | |||
557 | 557 | ||
558 | EXPORT_SYMBOL_GPL(exit_fs); | 558 | EXPORT_SYMBOL_GPL(exit_fs); |
559 | 559 | ||
560 | #ifdef CONFIG_MM_OWNER | ||
561 | /* | ||
562 | * Task p is exiting and it owned mm, lets find a new owner for it | ||
563 | */ | ||
564 | static inline int | ||
565 | mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | ||
566 | { | ||
567 | /* | ||
568 | * If there are other users of the mm and the owner (us) is exiting | ||
569 | * we need to find a new owner to take on the responsibility. | ||
570 | */ | ||
571 | if (!mm) | ||
572 | return 0; | ||
573 | if (atomic_read(&mm->mm_users) <= 1) | ||
574 | return 0; | ||
575 | if (mm->owner != p) | ||
576 | return 0; | ||
577 | return 1; | ||
578 | } | ||
579 | |||
580 | void mm_update_next_owner(struct mm_struct *mm) | ||
581 | { | ||
582 | struct task_struct *c, *g, *p = current; | ||
583 | |||
584 | retry: | ||
585 | if (!mm_need_new_owner(mm, p)) | ||
586 | return; | ||
587 | |||
588 | read_lock(&tasklist_lock); | ||
589 | /* | ||
590 | * Search in the children | ||
591 | */ | ||
592 | list_for_each_entry(c, &p->children, sibling) { | ||
593 | if (c->mm == mm) | ||
594 | goto assign_new_owner; | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * Search in the siblings | ||
599 | */ | ||
600 | list_for_each_entry(c, &p->parent->children, sibling) { | ||
601 | if (c->mm == mm) | ||
602 | goto assign_new_owner; | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * Search through everything else. We should not get | ||
607 | * here often | ||
608 | */ | ||
609 | do_each_thread(g, c) { | ||
610 | if (c->mm == mm) | ||
611 | goto assign_new_owner; | ||
612 | } while_each_thread(g, c); | ||
613 | |||
614 | read_unlock(&tasklist_lock); | ||
615 | return; | ||
616 | |||
617 | assign_new_owner: | ||
618 | BUG_ON(c == p); | ||
619 | get_task_struct(c); | ||
620 | /* | ||
621 | * The task_lock protects c->mm from changing. | ||
622 | * We always want mm->owner->mm == mm | ||
623 | */ | ||
624 | task_lock(c); | ||
625 | /* | ||
626 | * Delay read_unlock() till we have the task_lock() | ||
627 | * to ensure that c does not slip away underneath us | ||
628 | */ | ||
629 | read_unlock(&tasklist_lock); | ||
630 | if (c->mm != mm) { | ||
631 | task_unlock(c); | ||
632 | put_task_struct(c); | ||
633 | goto retry; | ||
634 | } | ||
635 | cgroup_mm_owner_callbacks(mm->owner, c); | ||
636 | mm->owner = c; | ||
637 | task_unlock(c); | ||
638 | put_task_struct(c); | ||
639 | } | ||
640 | #endif /* CONFIG_MM_OWNER */ | ||
641 | |||
560 | /* | 642 | /* |
561 | * Turn us into a lazy TLB process if we | 643 | * Turn us into a lazy TLB process if we |
562 | * aren't already.. | 644 | * aren't already.. |
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) | |||
596 | /* We don't want this task to be frozen prematurely */ | 678 | /* We don't want this task to be frozen prematurely */ |
597 | clear_freeze_flag(tsk); | 679 | clear_freeze_flag(tsk); |
598 | task_unlock(tsk); | 680 | task_unlock(tsk); |
681 | mm_update_next_owner(mm); | ||
599 | mmput(mm); | 682 | mmput(mm); |
600 | } | 683 | } |
601 | 684 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 6067e429f281..156db96ff754 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
381 | mm->ioctx_list = NULL; | 381 | mm->ioctx_list = NULL; |
382 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 382 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
383 | mm->cached_hole_size = ~0UL; | 383 | mm->cached_hole_size = ~0UL; |
384 | mm_init_cgroup(mm, p); | 384 | mm_init_owner(mm, p); |
385 | 385 | ||
386 | if (likely(!mm_alloc_pgd(mm))) { | 386 | if (likely(!mm_alloc_pgd(mm))) { |
387 | mm->def_flags = 0; | 387 | mm->def_flags = 0; |
388 | return mm; | 388 | return mm; |
389 | } | 389 | } |
390 | 390 | ||
391 | mm_free_cgroup(mm); | ||
392 | free_mm(mm); | 391 | free_mm(mm); |
393 | return NULL; | 392 | return NULL; |
394 | } | 393 | } |
@@ -438,7 +437,6 @@ void mmput(struct mm_struct *mm) | |||
438 | spin_unlock(&mmlist_lock); | 437 | spin_unlock(&mmlist_lock); |
439 | } | 438 | } |
440 | put_swap_token(mm); | 439 | put_swap_token(mm); |
441 | mm_free_cgroup(mm); | ||
442 | mmdrop(mm); | 440 | mmdrop(mm); |
443 | } | 441 | } |
444 | } | 442 | } |
@@ -982,6 +980,13 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
982 | #endif | 980 | #endif |
983 | } | 981 | } |
984 | 982 | ||
983 | #ifdef CONFIG_MM_OWNER | ||
984 | void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
985 | { | ||
986 | mm->owner = p; | ||
987 | } | ||
988 | #endif /* CONFIG_MM_OWNER */ | ||
989 | |||
985 | /* | 990 | /* |
986 | * This creates a new process as a copy of the old one, | 991 | * This creates a new process as a copy of the old one, |
987 | * but does not actually start it yet. | 992 | * but does not actually start it yet. |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d12795cc7622..49d80814798b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -236,26 +236,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | |||
236 | css); | 236 | css); |
237 | } | 237 | } |
238 | 238 | ||
239 | static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 239 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
240 | { | 240 | { |
241 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | 241 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), |
242 | struct mem_cgroup, css); | 242 | struct mem_cgroup, css); |
243 | } | 243 | } |
244 | 244 | ||
245 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) | ||
246 | { | ||
247 | struct mem_cgroup *mem; | ||
248 | |||
249 | mem = mem_cgroup_from_task(p); | ||
250 | css_get(&mem->css); | ||
251 | mm->mem_cgroup = mem; | ||
252 | } | ||
253 | |||
254 | void mm_free_cgroup(struct mm_struct *mm) | ||
255 | { | ||
256 | css_put(&mm->mem_cgroup->css); | ||
257 | } | ||
258 | |||
259 | static inline int page_cgroup_locked(struct page *page) | 245 | static inline int page_cgroup_locked(struct page *page) |
260 | { | 246 | { |
261 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 247 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
@@ -476,6 +462,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
476 | int zid = zone_idx(z); | 462 | int zid = zone_idx(z); |
477 | struct mem_cgroup_per_zone *mz; | 463 | struct mem_cgroup_per_zone *mz; |
478 | 464 | ||
465 | BUG_ON(!mem_cont); | ||
479 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); | 466 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
480 | if (active) | 467 | if (active) |
481 | src = &mz->active_list; | 468 | src = &mz->active_list; |
@@ -574,7 +561,7 @@ retry: | |||
574 | mm = &init_mm; | 561 | mm = &init_mm; |
575 | 562 | ||
576 | rcu_read_lock(); | 563 | rcu_read_lock(); |
577 | mem = rcu_dereference(mm->mem_cgroup); | 564 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
578 | /* | 565 | /* |
579 | * For every charge from the cgroup, increment reference count | 566 | * For every charge from the cgroup, increment reference count |
580 | */ | 567 | */ |
@@ -985,10 +972,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
985 | struct mem_cgroup *mem; | 972 | struct mem_cgroup *mem; |
986 | int node; | 973 | int node; |
987 | 974 | ||
988 | if (unlikely((cont->parent) == NULL)) { | 975 | if (unlikely((cont->parent) == NULL)) |
989 | mem = &init_mem_cgroup; | 976 | mem = &init_mem_cgroup; |
990 | init_mm.mem_cgroup = mem; | 977 | else |
991 | } else | ||
992 | mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); | 978 | mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); |
993 | 979 | ||
994 | if (mem == NULL) | 980 | if (mem == NULL) |
@@ -1067,10 +1053,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
1067 | if (!thread_group_leader(p)) | 1053 | if (!thread_group_leader(p)) |
1068 | goto out; | 1054 | goto out; |
1069 | 1055 | ||
1070 | css_get(&mem->css); | ||
1071 | rcu_assign_pointer(mm->mem_cgroup, mem); | ||
1072 | css_put(&old_mem->css); | ||
1073 | |||
1074 | out: | 1056 | out: |
1075 | mmput(mm); | 1057 | mmput(mm); |
1076 | } | 1058 | } |