diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 17:51:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 17:51:32 -0500 |
commit | 69234acee54407962a20bedf90ef9c96326994b5 (patch) | |
tree | 5e979b1a489d866691c2c65ac3f46b4f29feef68 /include/linux/cgroup-defs.h | |
parent | 11eaaadb3ea376c6c194491c2e9bddd647f9d253 (diff) | |
parent | d57456753787ab158f906f1f8eb58d54a2ccd9f4 (diff) |
Merge branch 'for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"The cgroup core saw several significant updates this cycle:
- percpu_rwsem for threadgroup locking is reinstated. This was
temporarily dropped due to down_write latency issues. Oleg's
rework of percpu_rwsem which is scheduled to be merged in this
merge window resolves the issue.
- On the v2 hierarchy, when controllers are enabled and disabled, all
operations are atomic and can fail and revert cleanly. This allows
->can_attach() failure which is necessary for cpu RT slices.
- Tasks now stay associated with the original cgroups after exit
until released. This allows tracking resources held by zombies
(e.g. pids) and makes it easy to find out where zombies came from
on the v2 hierarchy. The pids controller was broken before these
changes as zombies escaped the limits; unfortunately, updating this
behavior required too many invasive changes and I don't think it's
a good idea to backport them, so the pids controller on 4.3, the
first version which included the pids controller, will stay broken
at least until I'm sure about the cgroup core changes.
- Optimization of a couple common tests using static_key"
* 'for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (38 commits)
cgroup: fix race condition around termination check in css_task_iter_next()
blkcg: don't create "io.stat" on the root cgroup
cgroup: drop cgroup__DEVEL__legacy_files_on_dfl
cgroup: replace error handling in cgroup_init() with WARN_ON()s
cgroup: add cgroup_subsys->free() method and use it to fix pids controller
cgroup: keep zombies associated with their original cgroups
cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock
cgroup: don't hold css_set_rwsem across css task iteration
cgroup: reorganize css_task_iter functions
cgroup: factor out css_set_move_task()
cgroup: keep css_set and task lists in chronological order
cgroup: make cgroup_destroy_locked() test cgroup_is_populated()
cgroup: make css_sets pin the associated cgroups
cgroup: relocate cgroup_[try]get/put()
cgroup: move check_for_release() invocation
cgroup: replace cgroup_has_tasks() with cgroup_is_populated()
cgroup: make cgroup->nr_populated count the number of populated css_sets
cgroup: remove an unused parameter from cgroup_task_migrate()
cgroup: fix too early usage of static_branch_disable()
cgroup: make cgroup_update_dfl_csses() migrate all target processes atomically
...
Diffstat (limited to 'include/linux/cgroup-defs.h')
-rw-r--r-- | include/linux/cgroup-defs.h | 76 |
1 files changed, 59 insertions, 17 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8492721b39be..60d44b26276d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -76,6 +76,7 @@ enum { | |||
76 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ | 76 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ |
77 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ | 77 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ |
78 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ | 78 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ |
79 | CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ | ||
79 | 80 | ||
80 | /* internal flags, do not use outside cgroup core proper */ | 81 | /* internal flags, do not use outside cgroup core proper */ |
81 | __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ | 82 | __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ |
@@ -83,6 +84,17 @@ enum { | |||
83 | }; | 84 | }; |
84 | 85 | ||
85 | /* | 86 | /* |
87 | * cgroup_file is the handle for a file instance created in a cgroup which | ||
88 | * is used, for example, to generate file changed notifications. This can | ||
89 | * be obtained by setting cftype->file_offset. | ||
90 | */ | ||
91 | struct cgroup_file { | ||
92 | /* do not access any fields from outside cgroup core */ | ||
93 | struct list_head node; /* anchored at css->files */ | ||
94 | struct kernfs_node *kn; | ||
95 | }; | ||
96 | |||
97 | /* | ||
86 | * Per-subsystem/per-cgroup state maintained by the system. This is the | 98 | * Per-subsystem/per-cgroup state maintained by the system. This is the |
87 | * fundamental structural building block that controllers deal with. | 99 | * fundamental structural building block that controllers deal with. |
88 | * | 100 | * |
@@ -122,6 +134,9 @@ struct cgroup_subsys_state { | |||
122 | */ | 134 | */ |
123 | u64 serial_nr; | 135 | u64 serial_nr; |
124 | 136 | ||
137 | /* all cgroup_files associated with this css */ | ||
138 | struct list_head files; | ||
139 | |||
125 | /* percpu_ref killing and RCU release */ | 140 | /* percpu_ref killing and RCU release */ |
126 | struct rcu_head rcu_head; | 141 | struct rcu_head rcu_head; |
127 | struct work_struct destroy_work; | 142 | struct work_struct destroy_work; |
@@ -196,6 +211,9 @@ struct css_set { | |||
196 | */ | 211 | */ |
197 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; | 212 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; |
198 | 213 | ||
214 | /* all css_task_iters currently walking this cset */ | ||
215 | struct list_head task_iters; | ||
216 | |||
199 | /* For RCU-protected deletion */ | 217 | /* For RCU-protected deletion */ |
200 | struct rcu_head rcu_head; | 218 | struct rcu_head rcu_head; |
201 | }; | 219 | }; |
@@ -217,16 +235,16 @@ struct cgroup { | |||
217 | int id; | 235 | int id; |
218 | 236 | ||
219 | /* | 237 | /* |
220 | * If this cgroup contains any tasks, it contributes one to | 238 | * Each non-empty css_set associated with this cgroup contributes |
221 | * populated_cnt. All children with non-zero popuplated_cnt of | 239 | * one to populated_cnt. All children with non-zero popuplated_cnt |
222 | * their own contribute one. The count is zero iff there's no task | 240 | * of their own contribute one. The count is zero iff there's no |
223 | * in this cgroup or its subtree. | 241 | * task in this cgroup or its subtree. |
224 | */ | 242 | */ |
225 | int populated_cnt; | 243 | int populated_cnt; |
226 | 244 | ||
227 | struct kernfs_node *kn; /* cgroup kernfs entry */ | 245 | struct kernfs_node *kn; /* cgroup kernfs entry */ |
228 | struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ | 246 | struct cgroup_file procs_file; /* handle for "cgroup.procs" */ |
229 | struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ | 247 | struct cgroup_file events_file; /* handle for "cgroup.events" */ |
230 | 248 | ||
231 | /* | 249 | /* |
232 | * The bitmask of subsystems enabled on the child cgroups. | 250 | * The bitmask of subsystems enabled on the child cgroups. |
@@ -324,11 +342,6 @@ struct cftype { | |||
324 | */ | 342 | */ |
325 | char name[MAX_CFTYPE_NAME]; | 343 | char name[MAX_CFTYPE_NAME]; |
326 | unsigned long private; | 344 | unsigned long private; |
327 | /* | ||
328 | * If not 0, file mode is set to this value, otherwise it will | ||
329 | * be figured out automatically | ||
330 | */ | ||
331 | umode_t mode; | ||
332 | 345 | ||
333 | /* | 346 | /* |
334 | * The maximum length of string, excluding trailing nul, that can | 347 | * The maximum length of string, excluding trailing nul, that can |
@@ -340,6 +353,14 @@ struct cftype { | |||
340 | unsigned int flags; | 353 | unsigned int flags; |
341 | 354 | ||
342 | /* | 355 | /* |
356 | * If non-zero, should contain the offset from the start of css to | ||
357 | * a struct cgroup_file field. cgroup will record the handle of | ||
358 | * the created file into it. The recorded handle can be used as | ||
359 | * long as the containing css remains accessible. | ||
360 | */ | ||
361 | unsigned int file_offset; | ||
362 | |||
363 | /* | ||
343 | * Fields used for internal bookkeeping. Initialized automatically | 364 | * Fields used for internal bookkeeping. Initialized automatically |
344 | * during registration. | 365 | * during registration. |
345 | */ | 366 | */ |
@@ -414,12 +435,10 @@ struct cgroup_subsys { | |||
414 | int (*can_fork)(struct task_struct *task, void **priv_p); | 435 | int (*can_fork)(struct task_struct *task, void **priv_p); |
415 | void (*cancel_fork)(struct task_struct *task, void *priv); | 436 | void (*cancel_fork)(struct task_struct *task, void *priv); |
416 | void (*fork)(struct task_struct *task, void *priv); | 437 | void (*fork)(struct task_struct *task, void *priv); |
417 | void (*exit)(struct cgroup_subsys_state *css, | 438 | void (*exit)(struct task_struct *task); |
418 | struct cgroup_subsys_state *old_css, | 439 | void (*free)(struct task_struct *task); |
419 | struct task_struct *task); | ||
420 | void (*bind)(struct cgroup_subsys_state *root_css); | 440 | void (*bind)(struct cgroup_subsys_state *root_css); |
421 | 441 | ||
422 | int disabled; | ||
423 | int early_init; | 442 | int early_init; |
424 | 443 | ||
425 | /* | 444 | /* |
@@ -473,8 +492,31 @@ struct cgroup_subsys { | |||
473 | unsigned int depends_on; | 492 | unsigned int depends_on; |
474 | }; | 493 | }; |
475 | 494 | ||
476 | void cgroup_threadgroup_change_begin(struct task_struct *tsk); | 495 | extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; |
477 | void cgroup_threadgroup_change_end(struct task_struct *tsk); | 496 | |
497 | /** | ||
498 | * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups | ||
499 | * @tsk: target task | ||
500 | * | ||
501 | * Called from threadgroup_change_begin() and allows cgroup operations to | ||
502 | * synchronize against threadgroup changes using a percpu_rw_semaphore. | ||
503 | */ | ||
504 | static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
505 | { | ||
506 | percpu_down_read(&cgroup_threadgroup_rwsem); | ||
507 | } | ||
508 | |||
509 | /** | ||
510 | * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups | ||
511 | * @tsk: target task | ||
512 | * | ||
513 | * Called from threadgroup_change_end(). Counterpart of | ||
514 | * cgroup_threadcgroup_change_begin(). | ||
515 | */ | ||
516 | static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
517 | { | ||
518 | percpu_up_read(&cgroup_threadgroup_rwsem); | ||
519 | } | ||
478 | 520 | ||
479 | #else /* CONFIG_CGROUPS */ | 521 | #else /* CONFIG_CGROUPS */ |
480 | 522 | ||