diff options
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r-- | include/linux/cgroup.h | 167 |
1 files changed, 114 insertions, 53 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f8a030ced0c7..7d73905dcba2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <linux/rculist.h> | ||
15 | #include <linux/cgroupstats.h> | 16 | #include <linux/cgroupstats.h> |
16 | #include <linux/prio_heap.h> | 17 | #include <linux/prio_heap.h> |
17 | #include <linux/rwsem.h> | 18 | #include <linux/rwsem.h> |
@@ -34,7 +35,6 @@ extern int cgroup_lock_is_held(void); | |||
34 | extern bool cgroup_lock_live_group(struct cgroup *cgrp); | 35 | extern bool cgroup_lock_live_group(struct cgroup *cgrp); |
35 | extern void cgroup_unlock(void); | 36 | extern void cgroup_unlock(void); |
36 | extern void cgroup_fork(struct task_struct *p); | 37 | extern void cgroup_fork(struct task_struct *p); |
37 | extern void cgroup_fork_callbacks(struct task_struct *p); | ||
38 | extern void cgroup_post_fork(struct task_struct *p); | 38 | extern void cgroup_post_fork(struct task_struct *p); |
39 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); | 39 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
40 | extern int cgroupstats_build(struct cgroupstats *stats, | 40 | extern int cgroupstats_build(struct cgroupstats *stats, |
@@ -66,7 +66,7 @@ struct cgroup_subsys_state { | |||
66 | /* | 66 | /* |
67 | * State maintained by the cgroup system to allow subsystems | 67 | * State maintained by the cgroup system to allow subsystems |
68 | * to be "busy". Should be accessed via css_get(), | 68 | * to be "busy". Should be accessed via css_get(), |
69 | * css_tryget() and and css_put(). | 69 | * css_tryget() and css_put(). |
70 | */ | 70 | */ |
71 | 71 | ||
72 | atomic_t refcnt; | 72 | atomic_t refcnt; |
@@ -81,9 +81,8 @@ struct cgroup_subsys_state { | |||
81 | 81 | ||
82 | /* bits in struct cgroup_subsys_state flags field */ | 82 | /* bits in struct cgroup_subsys_state flags field */ |
83 | enum { | 83 | enum { |
84 | CSS_ROOT, /* This CSS is the root of the subsystem */ | 84 | CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ |
85 | CSS_REMOVED, /* This CSS is dead */ | 85 | CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ |
86 | CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */ | ||
87 | }; | 86 | }; |
88 | 87 | ||
89 | /* Caller must verify that the css is not for root cgroup */ | 88 | /* Caller must verify that the css is not for root cgroup */ |
@@ -102,15 +101,10 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count) | |||
102 | static inline void css_get(struct cgroup_subsys_state *css) | 101 | static inline void css_get(struct cgroup_subsys_state *css) |
103 | { | 102 | { |
104 | /* We don't need to reference count the root state */ | 103 | /* We don't need to reference count the root state */ |
105 | if (!test_bit(CSS_ROOT, &css->flags)) | 104 | if (!(css->flags & CSS_ROOT)) |
106 | __css_get(css, 1); | 105 | __css_get(css, 1); |
107 | } | 106 | } |
108 | 107 | ||
109 | static inline bool css_is_removed(struct cgroup_subsys_state *css) | ||
110 | { | ||
111 | return test_bit(CSS_REMOVED, &css->flags); | ||
112 | } | ||
113 | |||
114 | /* | 108 | /* |
115 | * Call css_tryget() to take a reference on a css if your existing | 109 | * Call css_tryget() to take a reference on a css if your existing |
116 | * (known-valid) reference isn't already ref-counted. Returns false if | 110 | * (known-valid) reference isn't already ref-counted. Returns false if |
@@ -120,7 +114,7 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css) | |||
120 | extern bool __css_tryget(struct cgroup_subsys_state *css); | 114 | extern bool __css_tryget(struct cgroup_subsys_state *css); |
121 | static inline bool css_tryget(struct cgroup_subsys_state *css) | 115 | static inline bool css_tryget(struct cgroup_subsys_state *css) |
122 | { | 116 | { |
123 | if (test_bit(CSS_ROOT, &css->flags)) | 117 | if (css->flags & CSS_ROOT) |
124 | return true; | 118 | return true; |
125 | return __css_tryget(css); | 119 | return __css_tryget(css); |
126 | } | 120 | } |
@@ -133,7 +127,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) | |||
133 | extern void __css_put(struct cgroup_subsys_state *css); | 127 | extern void __css_put(struct cgroup_subsys_state *css); |
134 | static inline void css_put(struct cgroup_subsys_state *css) | 128 | static inline void css_put(struct cgroup_subsys_state *css) |
135 | { | 129 | { |
136 | if (!test_bit(CSS_ROOT, &css->flags)) | 130 | if (!(css->flags & CSS_ROOT)) |
137 | __css_put(css); | 131 | __css_put(css); |
138 | } | 132 | } |
139 | 133 | ||
@@ -149,13 +143,11 @@ enum { | |||
149 | /* Control Group requires release notifications to userspace */ | 143 | /* Control Group requires release notifications to userspace */ |
150 | CGRP_NOTIFY_ON_RELEASE, | 144 | CGRP_NOTIFY_ON_RELEASE, |
151 | /* | 145 | /* |
152 | * A thread in rmdir() is wating for this cgroup. | 146 | * Clone the parent's configuration when creating a new child |
153 | */ | 147 | * cpuset cgroup. For historical reasons, this option can be |
154 | CGRP_WAIT_ON_RMDIR, | 148 | * specified at mount time and thus is implemented here. |
155 | /* | ||
156 | * Clone cgroup values when creating a new child cgroup | ||
157 | */ | 149 | */ |
158 | CGRP_CLONE_CHILDREN, | 150 | CGRP_CPUSET_CLONE_CHILDREN, |
159 | }; | 151 | }; |
160 | 152 | ||
161 | struct cgroup { | 153 | struct cgroup { |
@@ -167,6 +159,8 @@ struct cgroup { | |||
167 | */ | 159 | */ |
168 | atomic_t count; | 160 | atomic_t count; |
169 | 161 | ||
162 | int id; /* ida allocated in-hierarchy ID */ | ||
163 | |||
170 | /* | 164 | /* |
171 | * We link our 'sibling' struct into our parent's 'children'. | 165 | * We link our 'sibling' struct into our parent's 'children'. |
172 | * Our children link their 'sibling' into our 'children'. | 166 | * Our children link their 'sibling' into our 'children'. |
@@ -176,7 +170,7 @@ struct cgroup { | |||
176 | struct list_head files; /* my files */ | 170 | struct list_head files; /* my files */ |
177 | 171 | ||
178 | struct cgroup *parent; /* my parent */ | 172 | struct cgroup *parent; /* my parent */ |
179 | struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ | 173 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ |
180 | 174 | ||
181 | /* Private pointers for each registered subsystem */ | 175 | /* Private pointers for each registered subsystem */ |
182 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 176 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
@@ -282,7 +276,7 @@ struct cgroup_map_cb { | |||
282 | 276 | ||
283 | /* cftype->flags */ | 277 | /* cftype->flags */ |
284 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ | 278 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ |
285 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ | 279 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ |
286 | 280 | ||
287 | #define MAX_CFTYPE_NAME 64 | 281 | #define MAX_CFTYPE_NAME 64 |
288 | 282 | ||
@@ -422,23 +416,6 @@ int cgroup_task_count(const struct cgroup *cgrp); | |||
422 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); | 416 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); |
423 | 417 | ||
424 | /* | 418 | /* |
425 | * When the subsys has to access css and may add permanent refcnt to css, | ||
426 | * it should take care of racy conditions with rmdir(). Following set of | ||
427 | * functions, is for stop/restart rmdir if necessary. | ||
428 | * Because these will call css_get/put, "css" should be alive css. | ||
429 | * | ||
430 | * cgroup_exclude_rmdir(); | ||
431 | * ...do some jobs which may access arbitrary empty cgroup | ||
432 | * cgroup_release_and_wakeup_rmdir(); | ||
433 | * | ||
434 | * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, | ||
435 | * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. | ||
436 | */ | ||
437 | |||
438 | void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); | ||
439 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); | ||
440 | |||
441 | /* | ||
442 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys | 419 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys |
443 | * methods. | 420 | * methods. |
444 | */ | 421 | */ |
@@ -466,16 +443,17 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); | |||
466 | */ | 443 | */ |
467 | 444 | ||
468 | struct cgroup_subsys { | 445 | struct cgroup_subsys { |
469 | struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); | 446 | struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); |
470 | int (*pre_destroy)(struct cgroup *cgrp); | 447 | int (*css_online)(struct cgroup *cgrp); |
471 | void (*destroy)(struct cgroup *cgrp); | 448 | void (*css_offline)(struct cgroup *cgrp); |
449 | void (*css_free)(struct cgroup *cgrp); | ||
450 | |||
472 | int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 451 | int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
473 | void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 452 | void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
474 | void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 453 | void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
475 | void (*fork)(struct task_struct *task); | 454 | void (*fork)(struct task_struct *task); |
476 | void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, | 455 | void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, |
477 | struct task_struct *task); | 456 | struct task_struct *task); |
478 | void (*post_clone)(struct cgroup *cgrp); | ||
479 | void (*bind)(struct cgroup *root); | 457 | void (*bind)(struct cgroup *root); |
480 | 458 | ||
481 | int subsys_id; | 459 | int subsys_id; |
@@ -489,17 +467,6 @@ struct cgroup_subsys { | |||
489 | bool use_id; | 467 | bool use_id; |
490 | 468 | ||
491 | /* | 469 | /* |
492 | * If %true, cgroup removal will try to clear css refs by retrying | ||
493 | * ss->pre_destroy() until there's no css ref left. This behavior | ||
494 | * is strictly for backward compatibility and will be removed as | ||
495 | * soon as the current user (memcg) is updated. | ||
496 | * | ||
497 | * If %false, ss->pre_destroy() can't fail and cgroup removal won't | ||
498 | * wait for css refs to drop to zero before proceeding. | ||
499 | */ | ||
500 | bool __DEPRECATED_clear_css_refs; | ||
501 | |||
502 | /* | ||
503 | * If %false, this subsystem is properly hierarchical - | 470 | * If %false, this subsystem is properly hierarchical - |
504 | * configuration, resource accounting and restriction on a parent | 471 | * configuration, resource accounting and restriction on a parent |
505 | * cgroup cover those of its children. If %true, hierarchy support | 472 | * cgroup cover those of its children. If %true, hierarchy support |
@@ -572,6 +539,100 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, | |||
572 | return task_subsys_state(task, subsys_id)->cgroup; | 539 | return task_subsys_state(task, subsys_id)->cgroup; |
573 | } | 540 | } |
574 | 541 | ||
542 | /** | ||
543 | * cgroup_for_each_child - iterate through children of a cgroup | ||
544 | * @pos: the cgroup * to use as the loop cursor | ||
545 | * @cgroup: cgroup whose children to walk | ||
546 | * | ||
547 | * Walk @cgroup's children. Must be called under rcu_read_lock(). A child | ||
548 | * cgroup which hasn't finished ->css_online() or already has finished | ||
549 | * ->css_offline() may show up during traversal and it's each subsystem's | ||
550 | * responsibility to verify that each @pos is alive. | ||
551 | * | ||
552 | * If a subsystem synchronizes against the parent in its ->css_online() and | ||
553 | * before starting iterating, a cgroup which finished ->css_online() is | ||
554 | * guaranteed to be visible in the future iterations. | ||
555 | */ | ||
556 | #define cgroup_for_each_child(pos, cgroup) \ | ||
557 | list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) | ||
558 | |||
559 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | ||
560 | struct cgroup *cgroup); | ||
561 | |||
562 | /** | ||
563 | * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants | ||
564 | * @pos: the cgroup * to use as the loop cursor | ||
565 | * @cgroup: cgroup whose descendants to walk | ||
566 | * | ||
567 | * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A | ||
568 | * descendant cgroup which hasn't finished ->css_online() or already has | ||
569 | * finished ->css_offline() may show up during traversal and it's each | ||
570 | * subsystem's responsibility to verify that each @pos is alive. | ||
571 | * | ||
572 | * If a subsystem synchronizes against the parent in its ->css_online() and | ||
573 | * before starting iterating, and synchronizes against @pos on each | ||
574 | * iteration, any descendant cgroup which finished ->css_offline() is | ||
575 | * guaranteed to be visible in the future iterations. | ||
576 | * | ||
577 | * In other words, the following guarantees that a descendant can't escape | ||
578 | * state updates of its ancestors. | ||
579 | * | ||
580 | * my_online(@cgrp) | ||
581 | * { | ||
582 | * Lock @cgrp->parent and @cgrp; | ||
583 | * Inherit state from @cgrp->parent; | ||
584 | * Unlock both. | ||
585 | * } | ||
586 | * | ||
587 | * my_update_state(@cgrp) | ||
588 | * { | ||
589 | * Lock @cgrp; | ||
590 | * Update @cgrp's state; | ||
591 | * Unlock @cgrp; | ||
592 | * | ||
593 | * cgroup_for_each_descendant_pre(@pos, @cgrp) { | ||
594 | * Lock @pos; | ||
595 | * Verify @pos is alive and inherit state from @pos->parent; | ||
596 | * Unlock @pos; | ||
597 | * } | ||
598 | * } | ||
599 | * | ||
600 | * As long as the inheriting step, including checking the parent state, is | ||
601 | * enclosed inside @pos locking, double-locking the parent isn't necessary | ||
602 | * while inheriting. The state update to the parent is guaranteed to be | ||
603 | * visible by walking order and, as long as inheriting operations to the | ||
604 | * same @pos are atomic to each other, multiple updates racing each other | ||
605 | * still result in the correct state. It's guaranateed that at least one | ||
606 | * inheritance happens for any cgroup after the latest update to its | ||
607 | * parent. | ||
608 | * | ||
609 | * If checking parent's state requires locking the parent, each inheriting | ||
610 | * iteration should lock and unlock both @pos->parent and @pos. | ||
611 | * | ||
612 | * Alternatively, a subsystem may choose to use a single global lock to | ||
613 | * synchronize ->css_online() and ->css_offline() against tree-walking | ||
614 | * operations. | ||
615 | */ | ||
616 | #define cgroup_for_each_descendant_pre(pos, cgroup) \ | ||
617 | for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ | ||
618 | pos = cgroup_next_descendant_pre((pos), (cgroup))) | ||
619 | |||
620 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | ||
621 | struct cgroup *cgroup); | ||
622 | |||
623 | /** | ||
624 | * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants | ||
625 | * @pos: the cgroup * to use as the loop cursor | ||
626 | * @cgroup: cgroup whose descendants to walk | ||
627 | * | ||
628 | * Similar to cgroup_for_each_descendant_pre() but performs post-order | ||
629 | * traversal instead. Note that the walk visibility guarantee described in | ||
630 | * pre-order walk doesn't apply the same to post-order walks. | ||
631 | */ | ||
632 | #define cgroup_for_each_descendant_post(pos, cgroup) \ | ||
633 | for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ | ||
634 | pos = cgroup_next_descendant_post((pos), (cgroup))) | ||
635 | |||
575 | /* A cgroup_iter should be treated as an opaque object */ | 636 | /* A cgroup_iter should be treated as an opaque object */ |
576 | struct cgroup_iter { | 637 | struct cgroup_iter { |
577 | struct list_head *cg_link; | 638 | struct list_head *cg_link; |