aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h170
1 files changed, 116 insertions, 54 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f8a030ced0c7..900af5964f55 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -12,6 +12,7 @@
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
15#include <linux/rculist.h>
15#include <linux/cgroupstats.h> 16#include <linux/cgroupstats.h>
16#include <linux/prio_heap.h> 17#include <linux/prio_heap.h>
17#include <linux/rwsem.h> 18#include <linux/rwsem.h>
@@ -34,7 +35,6 @@ extern int cgroup_lock_is_held(void);
34extern bool cgroup_lock_live_group(struct cgroup *cgrp); 35extern bool cgroup_lock_live_group(struct cgroup *cgrp);
35extern void cgroup_unlock(void); 36extern void cgroup_unlock(void);
36extern void cgroup_fork(struct task_struct *p); 37extern void cgroup_fork(struct task_struct *p);
37extern void cgroup_fork_callbacks(struct task_struct *p);
38extern void cgroup_post_fork(struct task_struct *p); 38extern void cgroup_post_fork(struct task_struct *p);
39extern void cgroup_exit(struct task_struct *p, int run_callbacks); 39extern void cgroup_exit(struct task_struct *p, int run_callbacks);
40extern int cgroupstats_build(struct cgroupstats *stats, 40extern int cgroupstats_build(struct cgroupstats *stats,
@@ -66,7 +66,7 @@ struct cgroup_subsys_state {
66 /* 66 /*
67 * State maintained by the cgroup system to allow subsystems 67 * State maintained by the cgroup system to allow subsystems
68 * to be "busy". Should be accessed via css_get(), 68 * to be "busy". Should be accessed via css_get(),
69 * css_tryget() and and css_put(). 69 * css_tryget() and css_put().
70 */ 70 */
71 71
72 atomic_t refcnt; 72 atomic_t refcnt;
@@ -81,9 +81,8 @@ struct cgroup_subsys_state {
81 81
82/* bits in struct cgroup_subsys_state flags field */ 82/* bits in struct cgroup_subsys_state flags field */
83enum { 83enum {
84 CSS_ROOT, /* This CSS is the root of the subsystem */ 84 CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
85 CSS_REMOVED, /* This CSS is dead */ 85 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
86 CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */
87}; 86};
88 87
89/* Caller must verify that the css is not for root cgroup */ 88/* Caller must verify that the css is not for root cgroup */
@@ -102,15 +101,10 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count)
102static inline void css_get(struct cgroup_subsys_state *css) 101static inline void css_get(struct cgroup_subsys_state *css)
103{ 102{
104 /* We don't need to reference count the root state */ 103 /* We don't need to reference count the root state */
105 if (!test_bit(CSS_ROOT, &css->flags)) 104 if (!(css->flags & CSS_ROOT))
106 __css_get(css, 1); 105 __css_get(css, 1);
107} 106}
108 107
109static inline bool css_is_removed(struct cgroup_subsys_state *css)
110{
111 return test_bit(CSS_REMOVED, &css->flags);
112}
113
114/* 108/*
115 * Call css_tryget() to take a reference on a css if your existing 109 * Call css_tryget() to take a reference on a css if your existing
116 * (known-valid) reference isn't already ref-counted. Returns false if 110 * (known-valid) reference isn't already ref-counted. Returns false if
@@ -120,7 +114,7 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css)
120extern bool __css_tryget(struct cgroup_subsys_state *css); 114extern bool __css_tryget(struct cgroup_subsys_state *css);
121static inline bool css_tryget(struct cgroup_subsys_state *css) 115static inline bool css_tryget(struct cgroup_subsys_state *css)
122{ 116{
123 if (test_bit(CSS_ROOT, &css->flags)) 117 if (css->flags & CSS_ROOT)
124 return true; 118 return true;
125 return __css_tryget(css); 119 return __css_tryget(css);
126} 120}
@@ -133,7 +127,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
133extern void __css_put(struct cgroup_subsys_state *css); 127extern void __css_put(struct cgroup_subsys_state *css);
134static inline void css_put(struct cgroup_subsys_state *css) 128static inline void css_put(struct cgroup_subsys_state *css)
135{ 129{
136 if (!test_bit(CSS_ROOT, &css->flags)) 130 if (!(css->flags & CSS_ROOT))
137 __css_put(css); 131 __css_put(css);
138} 132}
139 133
@@ -149,13 +143,11 @@ enum {
149 /* Control Group requires release notifications to userspace */ 143 /* Control Group requires release notifications to userspace */
150 CGRP_NOTIFY_ON_RELEASE, 144 CGRP_NOTIFY_ON_RELEASE,
151 /* 145 /*
152 * A thread in rmdir() is wating for this cgroup. 146 * Clone the parent's configuration when creating a new child
153 */ 147 * cpuset cgroup. For historical reasons, this option can be
154 CGRP_WAIT_ON_RMDIR, 148 * specified at mount time and thus is implemented here.
155 /*
156 * Clone cgroup values when creating a new child cgroup
157 */ 149 */
158 CGRP_CLONE_CHILDREN, 150 CGRP_CPUSET_CLONE_CHILDREN,
159}; 151};
160 152
161struct cgroup { 153struct cgroup {
@@ -167,6 +159,8 @@ struct cgroup {
167 */ 159 */
168 atomic_t count; 160 atomic_t count;
169 161
162 int id; /* ida allocated in-hierarchy ID */
163
170 /* 164 /*
171 * We link our 'sibling' struct into our parent's 'children'. 165 * We link our 'sibling' struct into our parent's 'children'.
172 * Our children link their 'sibling' into our 'children'. 166 * Our children link their 'sibling' into our 'children'.
@@ -176,7 +170,7 @@ struct cgroup {
176 struct list_head files; /* my files */ 170 struct list_head files; /* my files */
177 171
178 struct cgroup *parent; /* my parent */ 172 struct cgroup *parent; /* my parent */
179 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ 173 struct dentry *dentry; /* cgroup fs entry, RCU protected */
180 174
181 /* Private pointers for each registered subsystem */ 175 /* Private pointers for each registered subsystem */
182 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 176 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -209,6 +203,7 @@ struct cgroup {
209 203
210 /* For RCU-protected deletion */ 204 /* For RCU-protected deletion */
211 struct rcu_head rcu_head; 205 struct rcu_head rcu_head;
206 struct work_struct free_work;
212 207
213 /* List of events which userspace want to receive */ 208 /* List of events which userspace want to receive */
214 struct list_head event_list; 209 struct list_head event_list;
@@ -282,7 +277,7 @@ struct cgroup_map_cb {
282 277
283/* cftype->flags */ 278/* cftype->flags */
284#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 279#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */
285#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ 280#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */
286 281
287#define MAX_CFTYPE_NAME 64 282#define MAX_CFTYPE_NAME 64
288 283
@@ -422,23 +417,6 @@ int cgroup_task_count(const struct cgroup *cgrp);
422int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); 417int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
423 418
424/* 419/*
425 * When the subsys has to access css and may add permanent refcnt to css,
426 * it should take care of racy conditions with rmdir(). Following set of
427 * functions, is for stop/restart rmdir if necessary.
428 * Because these will call css_get/put, "css" should be alive css.
429 *
430 * cgroup_exclude_rmdir();
431 * ...do some jobs which may access arbitrary empty cgroup
432 * cgroup_release_and_wakeup_rmdir();
433 *
434 * When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
435 * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
436 */
437
438void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
439void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
440
441/*
442 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 420 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
443 * methods. 421 * methods.
444 */ 422 */
@@ -466,16 +444,17 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
466 */ 444 */
467 445
468struct cgroup_subsys { 446struct cgroup_subsys {
469 struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); 447 struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
470 int (*pre_destroy)(struct cgroup *cgrp); 448 int (*css_online)(struct cgroup *cgrp);
471 void (*destroy)(struct cgroup *cgrp); 449 void (*css_offline)(struct cgroup *cgrp);
450 void (*css_free)(struct cgroup *cgrp);
451
472 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 452 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
473 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 453 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
474 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 454 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
475 void (*fork)(struct task_struct *task); 455 void (*fork)(struct task_struct *task);
476 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 456 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
477 struct task_struct *task); 457 struct task_struct *task);
478 void (*post_clone)(struct cgroup *cgrp);
479 void (*bind)(struct cgroup *root); 458 void (*bind)(struct cgroup *root);
480 459
481 int subsys_id; 460 int subsys_id;
@@ -489,17 +468,6 @@ struct cgroup_subsys {
489 bool use_id; 468 bool use_id;
490 469
491 /* 470 /*
492 * If %true, cgroup removal will try to clear css refs by retrying
493 * ss->pre_destroy() until there's no css ref left. This behavior
494 * is strictly for backward compatibility and will be removed as
495 * soon as the current user (memcg) is updated.
496 *
497 * If %false, ss->pre_destroy() can't fail and cgroup removal won't
498 * wait for css refs to drop to zero before proceeding.
499 */
500 bool __DEPRECATED_clear_css_refs;
501
502 /*
503 * If %false, this subsystem is properly hierarchical - 471 * If %false, this subsystem is properly hierarchical -
504 * configuration, resource accounting and restriction on a parent 472 * configuration, resource accounting and restriction on a parent
505 * cgroup cover those of its children. If %true, hierarchy support 473 * cgroup cover those of its children. If %true, hierarchy support
@@ -572,6 +540,101 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
572 return task_subsys_state(task, subsys_id)->cgroup; 540 return task_subsys_state(task, subsys_id)->cgroup;
573} 541}
574 542
543/**
544 * cgroup_for_each_child - iterate through children of a cgroup
545 * @pos: the cgroup * to use as the loop cursor
546 * @cgroup: cgroup whose children to walk
547 *
548 * Walk @cgroup's children. Must be called under rcu_read_lock(). A child
549 * cgroup which hasn't finished ->css_online() or already has finished
550 * ->css_offline() may show up during traversal and it's each subsystem's
551 * responsibility to verify that each @pos is alive.
552 *
553 * If a subsystem synchronizes against the parent in its ->css_online() and
554 * before starting iterating, a cgroup which finished ->css_online() is
555 * guaranteed to be visible in the future iterations.
556 */
557#define cgroup_for_each_child(pos, cgroup) \
558 list_for_each_entry_rcu(pos, &(cgroup)->children, sibling)
559
560struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
561 struct cgroup *cgroup);
562struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
563
564/**
565 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
566 * @pos: the cgroup * to use as the loop cursor
567 * @cgroup: cgroup whose descendants to walk
568 *
569 * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
570 * descendant cgroup which hasn't finished ->css_online() or already has
571 * finished ->css_offline() may show up during traversal and it's each
572 * subsystem's responsibility to verify that each @pos is alive.
573 *
574 * If a subsystem synchronizes against the parent in its ->css_online() and
575 * before starting iterating, and synchronizes against @pos on each
576 * iteration, any descendant cgroup which finished ->css_offline() is
577 * guaranteed to be visible in the future iterations.
578 *
579 * In other words, the following guarantees that a descendant can't escape
580 * state updates of its ancestors.
581 *
582 * my_online(@cgrp)
583 * {
584 * Lock @cgrp->parent and @cgrp;
585 * Inherit state from @cgrp->parent;
586 * Unlock both.
587 * }
588 *
589 * my_update_state(@cgrp)
590 * {
591 * Lock @cgrp;
592 * Update @cgrp's state;
593 * Unlock @cgrp;
594 *
595 * cgroup_for_each_descendant_pre(@pos, @cgrp) {
596 * Lock @pos;
597 * Verify @pos is alive and inherit state from @pos->parent;
598 * Unlock @pos;
599 * }
600 * }
601 *
602 * As long as the inheriting step, including checking the parent state, is
603 * enclosed inside @pos locking, double-locking the parent isn't necessary
604 * while inheriting. The state update to the parent is guaranteed to be
605 * visible by walking order and, as long as inheriting operations to the
606 * same @pos are atomic to each other, multiple updates racing each other
607 * still result in the correct state. It's guaranateed that at least one
608 * inheritance happens for any cgroup after the latest update to its
609 * parent.
610 *
611 * If checking parent's state requires locking the parent, each inheriting
612 * iteration should lock and unlock both @pos->parent and @pos.
613 *
614 * Alternatively, a subsystem may choose to use a single global lock to
615 * synchronize ->css_online() and ->css_offline() against tree-walking
616 * operations.
617 */
618#define cgroup_for_each_descendant_pre(pos, cgroup) \
619 for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
620 pos = cgroup_next_descendant_pre((pos), (cgroup)))
621
622struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
623 struct cgroup *cgroup);
624
625/**
626 * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
627 * @pos: the cgroup * to use as the loop cursor
628 * @cgroup: cgroup whose descendants to walk
629 *
630 * Similar to cgroup_for_each_descendant_pre() but performs post-order
631 * traversal instead. Note that the walk visibility guarantee described in
632 * pre-order walk doesn't apply the same to post-order walks.
633 */
634#define cgroup_for_each_descendant_post(pos, cgroup) \
635 for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
636 pos = cgroup_next_descendant_post((pos), (cgroup)))
637
575/* A cgroup_iter should be treated as an opaque object */ 638/* A cgroup_iter should be treated as an opaque object */
576struct cgroup_iter { 639struct cgroup_iter {
577 struct list_head *cg_link; 640 struct list_head *cg_link;
@@ -645,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
645static inline int cgroup_init_early(void) { return 0; } 708static inline int cgroup_init_early(void) { return 0; }
646static inline int cgroup_init(void) { return 0; } 709static inline int cgroup_init(void) { return 0; }
647static inline void cgroup_fork(struct task_struct *p) {} 710static inline void cgroup_fork(struct task_struct *p) {}
648static inline void cgroup_fork_callbacks(struct task_struct *p) {}
649static inline void cgroup_post_fork(struct task_struct *p) {} 711static inline void cgroup_post_fork(struct task_struct *p) {}
650static inline void cgroup_exit(struct task_struct *p, int callbacks) {} 712static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
651 713