diff options
author | Paul Menage <menage@google.com> | 2007-10-19 02:39:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:36 -0400 |
commit | 817929ec274bcfe771586d338bb31d1659615686 (patch) | |
tree | 5a96ed1afd308016e8720437a00bf2f114e907cb /include | |
parent | a424316ca154317367c7ddf89997d1c80e4a8051 (diff) |
Task Control Groups: shared cgroup subsystem group arrays
Replace the struct css_set embedded in task_struct with a pointer; all tasks
that have the same set of memberships across all hierarchies will share a
css_set object, and will be linked via their css_sets field to the "tasks"
list_head in the css_set.
Assuming that many tasks share the same cgroup assignments, this reduces
overall space usage and keeps the size of the task_struct down (three pointers
added to task_struct compared to a non-cgroups kernel, no matter how many
subsystems are registered).
[akpm@linux-foundation.org: fix a printk]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Paul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cgroup.h | 89 | ||||
-rw-r--r-- | include/linux/sched.h | 33 |
2 files changed, 83 insertions, 39 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a9553568118f..836b3557bb76 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -27,10 +27,19 @@ extern void cgroup_lock(void); | |||
27 | extern void cgroup_unlock(void); | 27 | extern void cgroup_unlock(void); |
28 | extern void cgroup_fork(struct task_struct *p); | 28 | extern void cgroup_fork(struct task_struct *p); |
29 | extern void cgroup_fork_callbacks(struct task_struct *p); | 29 | extern void cgroup_fork_callbacks(struct task_struct *p); |
30 | extern void cgroup_post_fork(struct task_struct *p); | ||
30 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); | 31 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
31 | 32 | ||
32 | extern struct file_operations proc_cgroup_operations; | 33 | extern struct file_operations proc_cgroup_operations; |
33 | 34 | ||
35 | /* Define the enumeration of all cgroup subsystems */ | ||
36 | #define SUBSYS(_x) _x ## _subsys_id, | ||
37 | enum cgroup_subsys_id { | ||
38 | #include <linux/cgroup_subsys.h> | ||
39 | CGROUP_SUBSYS_COUNT | ||
40 | }; | ||
41 | #undef SUBSYS | ||
42 | |||
34 | /* Per-subsystem/per-cgroup state maintained by the system. */ | 43 | /* Per-subsystem/per-cgroup state maintained by the system. */ |
35 | struct cgroup_subsys_state { | 44 | struct cgroup_subsys_state { |
36 | /* The cgroup that this subsystem is attached to. Useful | 45 | /* The cgroup that this subsystem is attached to. Useful |
@@ -97,6 +106,52 @@ struct cgroup { | |||
97 | 106 | ||
98 | struct cgroupfs_root *root; | 107 | struct cgroupfs_root *root; |
99 | struct cgroup *top_cgroup; | 108 | struct cgroup *top_cgroup; |
109 | |||
110 | /* | ||
111 | * List of cg_cgroup_links pointing at css_sets with | ||
112 | * tasks in this cgroup. Protected by css_set_lock | ||
113 | */ | ||
114 | struct list_head css_sets; | ||
115 | }; | ||
116 | |||
117 | /* A css_set is a structure holding pointers to a set of | ||
118 | * cgroup_subsys_state objects. This saves space in the task struct | ||
119 | * object and speeds up fork()/exit(), since a single inc/dec and a | ||
120 | * list_add()/del() can bump the reference count on the entire | ||
121 | * cgroup set for a task. | ||
122 | */ | ||
123 | |||
124 | struct css_set { | ||
125 | |||
126 | /* Reference count */ | ||
127 | struct kref ref; | ||
128 | |||
129 | /* | ||
130 | * List running through all cgroup groups. Protected by | ||
131 | * css_set_lock | ||
132 | */ | ||
133 | struct list_head list; | ||
134 | |||
135 | /* | ||
136 | * List running through all tasks using this cgroup | ||
137 | * group. Protected by css_set_lock | ||
138 | */ | ||
139 | struct list_head tasks; | ||
140 | |||
141 | /* | ||
142 | * List of cg_cgroup_link objects on link chains from | ||
143 | * cgroups referenced from this css_set. Protected by | ||
144 | * css_set_lock | ||
145 | */ | ||
146 | struct list_head cg_links; | ||
147 | |||
148 | /* | ||
149 | * Set of subsystem states, one for each subsystem. This array | ||
150 | * is immutable after creation apart from the init_css_set | ||
151 | * during subsystem registration (at boot time). | ||
152 | */ | ||
153 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | ||
154 | |||
100 | }; | 155 | }; |
101 | 156 | ||
102 | /* struct cftype: | 157 | /* struct cftype: |
@@ -157,15 +212,7 @@ int cgroup_is_removed(const struct cgroup *cont); | |||
157 | 212 | ||
158 | int cgroup_path(const struct cgroup *cont, char *buf, int buflen); | 213 | int cgroup_path(const struct cgroup *cont, char *buf, int buflen); |
159 | 214 | ||
160 | int __cgroup_task_count(const struct cgroup *cont); | 215 | int cgroup_task_count(const struct cgroup *cont); |
161 | static inline int cgroup_task_count(const struct cgroup *cont) | ||
162 | { | ||
163 | int task_count; | ||
164 | rcu_read_lock(); | ||
165 | task_count = __cgroup_task_count(cont); | ||
166 | rcu_read_unlock(); | ||
167 | return task_count; | ||
168 | } | ||
169 | 216 | ||
170 | /* Return true if the cgroup is a descendant of the current cgroup */ | 217 | /* Return true if the cgroup is a descendant of the current cgroup */ |
171 | int cgroup_is_descendant(const struct cgroup *cont); | 218 | int cgroup_is_descendant(const struct cgroup *cont); |
@@ -213,7 +260,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( | |||
213 | static inline struct cgroup_subsys_state *task_subsys_state( | 260 | static inline struct cgroup_subsys_state *task_subsys_state( |
214 | struct task_struct *task, int subsys_id) | 261 | struct task_struct *task, int subsys_id) |
215 | { | 262 | { |
216 | return rcu_dereference(task->cgroups.subsys[subsys_id]); | 263 | return rcu_dereference(task->cgroups->subsys[subsys_id]); |
217 | } | 264 | } |
218 | 265 | ||
219 | static inline struct cgroup* task_cgroup(struct task_struct *task, | 266 | static inline struct cgroup* task_cgroup(struct task_struct *task, |
@@ -226,6 +273,27 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen); | |||
226 | 273 | ||
227 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); | 274 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); |
228 | 275 | ||
276 | /* A cgroup_iter should be treated as an opaque object */ | ||
277 | struct cgroup_iter { | ||
278 | struct list_head *cg_link; | ||
279 | struct list_head *task; | ||
280 | }; | ||
281 | |||
282 | /* To iterate across the tasks in a cgroup: | ||
283 | * | ||
284 | * 1) call cgroup_iter_start to intialize an iterator | ||
285 | * | ||
286 | * 2) call cgroup_iter_next() to retrieve member tasks until it | ||
287 | * returns NULL or until you want to end the iteration | ||
288 | * | ||
289 | * 3) call cgroup_iter_end() to destroy the iterator. | ||
290 | */ | ||
291 | void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it); | ||
292 | struct task_struct *cgroup_iter_next(struct cgroup *cont, | ||
293 | struct cgroup_iter *it); | ||
294 | void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it); | ||
295 | |||
296 | |||
229 | #else /* !CONFIG_CGROUPS */ | 297 | #else /* !CONFIG_CGROUPS */ |
230 | 298 | ||
231 | static inline int cgroup_init_early(void) { return 0; } | 299 | static inline int cgroup_init_early(void) { return 0; } |
@@ -233,6 +301,7 @@ static inline int cgroup_init(void) { return 0; } | |||
233 | static inline void cgroup_init_smp(void) {} | 301 | static inline void cgroup_init_smp(void) {} |
234 | static inline void cgroup_fork(struct task_struct *p) {} | 302 | static inline void cgroup_fork(struct task_struct *p) {} |
235 | static inline void cgroup_fork_callbacks(struct task_struct *p) {} | 303 | static inline void cgroup_fork_callbacks(struct task_struct *p) {} |
304 | static inline void cgroup_post_fork(struct task_struct *p) {} | ||
236 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} | 305 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} |
237 | 306 | ||
238 | static inline void cgroup_lock(void) {} | 307 | static inline void cgroup_lock(void) {} |
diff --git a/include/linux/sched.h b/include/linux/sched.h index af2ed4bae678..1aa1cfa63b37 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -894,34 +894,6 @@ struct sched_entity { | |||
894 | #endif | 894 | #endif |
895 | }; | 895 | }; |
896 | 896 | ||
897 | #ifdef CONFIG_CGROUPS | ||
898 | |||
899 | #define SUBSYS(_x) _x ## _subsys_id, | ||
900 | enum cgroup_subsys_id { | ||
901 | #include <linux/cgroup_subsys.h> | ||
902 | CGROUP_SUBSYS_COUNT | ||
903 | }; | ||
904 | #undef SUBSYS | ||
905 | |||
906 | /* A css_set is a structure holding pointers to a set of | ||
907 | * cgroup_subsys_state objects. | ||
908 | */ | ||
909 | |||
910 | struct css_set { | ||
911 | |||
912 | /* Set of subsystem states, one for each subsystem. NULL for | ||
913 | * subsystems that aren't part of this hierarchy. These | ||
914 | * pointers reduce the number of dereferences required to get | ||
915 | * from a task to its state for a given cgroup, but result | ||
916 | * in increased space usage if tasks are in wildly different | ||
917 | * groupings across different hierarchies. This array is | ||
918 | * immutable after creation */ | ||
919 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | ||
920 | |||
921 | }; | ||
922 | |||
923 | #endif /* CONFIG_CGROUPS */ | ||
924 | |||
925 | struct task_struct { | 897 | struct task_struct { |
926 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 898 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
927 | void *stack; | 899 | void *stack; |
@@ -1159,7 +1131,10 @@ struct task_struct { | |||
1159 | int cpuset_mem_spread_rotor; | 1131 | int cpuset_mem_spread_rotor; |
1160 | #endif | 1132 | #endif |
1161 | #ifdef CONFIG_CGROUPS | 1133 | #ifdef CONFIG_CGROUPS |
1162 | struct css_set cgroups; | 1134 | /* Control Group info protected by css_set_lock */ |
1135 | struct css_set *cgroups; | ||
1136 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ | ||
1137 | struct list_head cg_list; | ||
1163 | #endif | 1138 | #endif |
1164 | #ifdef CONFIG_FUTEX | 1139 | #ifdef CONFIG_FUTEX |
1165 | struct robust_list_head __user *robust_list; | 1140 | struct robust_list_head __user *robust_list; |