aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h167
1 files changed, 114 insertions, 53 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f8a030ced0c7..7d73905dcba2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -12,6 +12,7 @@
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
15#include <linux/rculist.h>
15#include <linux/cgroupstats.h> 16#include <linux/cgroupstats.h>
16#include <linux/prio_heap.h> 17#include <linux/prio_heap.h>
17#include <linux/rwsem.h> 18#include <linux/rwsem.h>
@@ -34,7 +35,6 @@ extern int cgroup_lock_is_held(void);
34extern bool cgroup_lock_live_group(struct cgroup *cgrp); 35extern bool cgroup_lock_live_group(struct cgroup *cgrp);
35extern void cgroup_unlock(void); 36extern void cgroup_unlock(void);
36extern void cgroup_fork(struct task_struct *p); 37extern void cgroup_fork(struct task_struct *p);
37extern void cgroup_fork_callbacks(struct task_struct *p);
38extern void cgroup_post_fork(struct task_struct *p); 38extern void cgroup_post_fork(struct task_struct *p);
39extern void cgroup_exit(struct task_struct *p, int run_callbacks); 39extern void cgroup_exit(struct task_struct *p, int run_callbacks);
40extern int cgroupstats_build(struct cgroupstats *stats, 40extern int cgroupstats_build(struct cgroupstats *stats,
@@ -66,7 +66,7 @@ struct cgroup_subsys_state {
66 /* 66 /*
67 * State maintained by the cgroup system to allow subsystems 67 * State maintained by the cgroup system to allow subsystems
68 * to be "busy". Should be accessed via css_get(), 68 * to be "busy". Should be accessed via css_get(),
69 * css_tryget() and and css_put(). 69 * css_tryget() and css_put().
70 */ 70 */
71 71
72 atomic_t refcnt; 72 atomic_t refcnt;
@@ -81,9 +81,8 @@ struct cgroup_subsys_state {
81 81
82/* bits in struct cgroup_subsys_state flags field */ 82/* bits in struct cgroup_subsys_state flags field */
83enum { 83enum {
84 CSS_ROOT, /* This CSS is the root of the subsystem */ 84 CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
85 CSS_REMOVED, /* This CSS is dead */ 85 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
86 CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */
87}; 86};
88 87
89/* Caller must verify that the css is not for root cgroup */ 88/* Caller must verify that the css is not for root cgroup */
@@ -102,15 +101,10 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count)
102static inline void css_get(struct cgroup_subsys_state *css) 101static inline void css_get(struct cgroup_subsys_state *css)
103{ 102{
104 /* We don't need to reference count the root state */ 103 /* We don't need to reference count the root state */
105 if (!test_bit(CSS_ROOT, &css->flags)) 104 if (!(css->flags & CSS_ROOT))
106 __css_get(css, 1); 105 __css_get(css, 1);
107} 106}
108 107
109static inline bool css_is_removed(struct cgroup_subsys_state *css)
110{
111 return test_bit(CSS_REMOVED, &css->flags);
112}
113
114/* 108/*
115 * Call css_tryget() to take a reference on a css if your existing 109 * Call css_tryget() to take a reference on a css if your existing
116 * (known-valid) reference isn't already ref-counted. Returns false if 110 * (known-valid) reference isn't already ref-counted. Returns false if
@@ -120,7 +114,7 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css)
120extern bool __css_tryget(struct cgroup_subsys_state *css); 114extern bool __css_tryget(struct cgroup_subsys_state *css);
121static inline bool css_tryget(struct cgroup_subsys_state *css) 115static inline bool css_tryget(struct cgroup_subsys_state *css)
122{ 116{
123 if (test_bit(CSS_ROOT, &css->flags)) 117 if (css->flags & CSS_ROOT)
124 return true; 118 return true;
125 return __css_tryget(css); 119 return __css_tryget(css);
126} 120}
@@ -133,7 +127,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
133extern void __css_put(struct cgroup_subsys_state *css); 127extern void __css_put(struct cgroup_subsys_state *css);
134static inline void css_put(struct cgroup_subsys_state *css) 128static inline void css_put(struct cgroup_subsys_state *css)
135{ 129{
136 if (!test_bit(CSS_ROOT, &css->flags)) 130 if (!(css->flags & CSS_ROOT))
137 __css_put(css); 131 __css_put(css);
138} 132}
139 133
@@ -149,13 +143,11 @@ enum {
149 /* Control Group requires release notifications to userspace */ 143 /* Control Group requires release notifications to userspace */
150 CGRP_NOTIFY_ON_RELEASE, 144 CGRP_NOTIFY_ON_RELEASE,
151 /* 145 /*
152 * A thread in rmdir() is wating for this cgroup. 146 * Clone the parent's configuration when creating a new child
153 */ 147 * cpuset cgroup. For historical reasons, this option can be
154 CGRP_WAIT_ON_RMDIR, 148 * specified at mount time and thus is implemented here.
155 /*
156 * Clone cgroup values when creating a new child cgroup
157 */ 149 */
158 CGRP_CLONE_CHILDREN, 150 CGRP_CPUSET_CLONE_CHILDREN,
159}; 151};
160 152
161struct cgroup { 153struct cgroup {
@@ -167,6 +159,8 @@ struct cgroup {
167 */ 159 */
168 atomic_t count; 160 atomic_t count;
169 161
162 int id; /* ida allocated in-hierarchy ID */
163
170 /* 164 /*
171 * We link our 'sibling' struct into our parent's 'children'. 165 * We link our 'sibling' struct into our parent's 'children'.
172 * Our children link their 'sibling' into our 'children'. 166 * Our children link their 'sibling' into our 'children'.
@@ -176,7 +170,7 @@ struct cgroup {
176 struct list_head files; /* my files */ 170 struct list_head files; /* my files */
177 171
178 struct cgroup *parent; /* my parent */ 172 struct cgroup *parent; /* my parent */
179 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ 173 struct dentry *dentry; /* cgroup fs entry, RCU protected */
180 174
181 /* Private pointers for each registered subsystem */ 175 /* Private pointers for each registered subsystem */
182 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 176 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -282,7 +276,7 @@ struct cgroup_map_cb {
282 276
283/* cftype->flags */ 277/* cftype->flags */
284#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 278#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */
285#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ 279#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */
286 280
287#define MAX_CFTYPE_NAME 64 281#define MAX_CFTYPE_NAME 64
288 282
@@ -422,23 +416,6 @@ int cgroup_task_count(const struct cgroup *cgrp);
422int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); 416int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
423 417
424/* 418/*
425 * When the subsys has to access css and may add permanent refcnt to css,
426 * it should take care of racy conditions with rmdir(). Following set of
427 * functions, is for stop/restart rmdir if necessary.
428 * Because these will call css_get/put, "css" should be alive css.
429 *
430 * cgroup_exclude_rmdir();
431 * ...do some jobs which may access arbitrary empty cgroup
432 * cgroup_release_and_wakeup_rmdir();
433 *
434 * When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
435 * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
436 */
437
438void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
439void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
440
441/*
442 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 419 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
443 * methods. 420 * methods.
444 */ 421 */
@@ -466,16 +443,17 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
466 */ 443 */
467 444
468struct cgroup_subsys { 445struct cgroup_subsys {
469 struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); 446 struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
470 int (*pre_destroy)(struct cgroup *cgrp); 447 int (*css_online)(struct cgroup *cgrp);
471 void (*destroy)(struct cgroup *cgrp); 448 void (*css_offline)(struct cgroup *cgrp);
449 void (*css_free)(struct cgroup *cgrp);
450
472 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 451 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
473 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 452 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
474 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 453 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
475 void (*fork)(struct task_struct *task); 454 void (*fork)(struct task_struct *task);
476 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 455 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
477 struct task_struct *task); 456 struct task_struct *task);
478 void (*post_clone)(struct cgroup *cgrp);
479 void (*bind)(struct cgroup *root); 457 void (*bind)(struct cgroup *root);
480 458
481 int subsys_id; 459 int subsys_id;
@@ -489,17 +467,6 @@ struct cgroup_subsys {
489 bool use_id; 467 bool use_id;
490 468
491 /* 469 /*
492 * If %true, cgroup removal will try to clear css refs by retrying
493 * ss->pre_destroy() until there's no css ref left. This behavior
494 * is strictly for backward compatibility and will be removed as
495 * soon as the current user (memcg) is updated.
496 *
497 * If %false, ss->pre_destroy() can't fail and cgroup removal won't
498 * wait for css refs to drop to zero before proceeding.
499 */
500 bool __DEPRECATED_clear_css_refs;
501
502 /*
503 * If %false, this subsystem is properly hierarchical - 470 * If %false, this subsystem is properly hierarchical -
504 * configuration, resource accounting and restriction on a parent 471 * configuration, resource accounting and restriction on a parent
505 * cgroup cover those of its children. If %true, hierarchy support 472 * cgroup cover those of its children. If %true, hierarchy support
@@ -572,6 +539,100 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
572 return task_subsys_state(task, subsys_id)->cgroup; 539 return task_subsys_state(task, subsys_id)->cgroup;
573} 540}
574 541
542/**
543 * cgroup_for_each_child - iterate through children of a cgroup
544 * @pos: the cgroup * to use as the loop cursor
545 * @cgroup: cgroup whose children to walk
546 *
547 * Walk @cgroup's children. Must be called under rcu_read_lock(). A child
548 * cgroup which hasn't finished ->css_online() or already has finished
549 * ->css_offline() may show up during traversal and it's each subsystem's
550 * responsibility to verify that each @pos is alive.
551 *
552 * If a subsystem synchronizes against the parent in its ->css_online() and
553 * before starting iterating, a cgroup which finished ->css_online() is
554 * guaranteed to be visible in the future iterations.
555 */
556#define cgroup_for_each_child(pos, cgroup) \
557 list_for_each_entry_rcu(pos, &(cgroup)->children, sibling)
558
559struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
560 struct cgroup *cgroup);
561
562/**
563 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
564 * @pos: the cgroup * to use as the loop cursor
565 * @cgroup: cgroup whose descendants to walk
566 *
567 * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
568 * descendant cgroup which hasn't finished ->css_online() or already has
569 * finished ->css_offline() may show up during traversal and it's each
570 * subsystem's responsibility to verify that each @pos is alive.
571 *
572 * If a subsystem synchronizes against the parent in its ->css_online() and
573 * before starting iterating, and synchronizes against @pos on each
574 * iteration, any descendant cgroup which finished ->css_offline() is
575 * guaranteed to be visible in the future iterations.
576 *
577 * In other words, the following guarantees that a descendant can't escape
578 * state updates of its ancestors.
579 *
580 * my_online(@cgrp)
581 * {
582 * Lock @cgrp->parent and @cgrp;
583 * Inherit state from @cgrp->parent;
584 * Unlock both.
585 * }
586 *
587 * my_update_state(@cgrp)
588 * {
589 * Lock @cgrp;
590 * Update @cgrp's state;
591 * Unlock @cgrp;
592 *
593 * cgroup_for_each_descendant_pre(@pos, @cgrp) {
594 * Lock @pos;
595 * Verify @pos is alive and inherit state from @pos->parent;
596 * Unlock @pos;
597 * }
598 * }
599 *
600 * As long as the inheriting step, including checking the parent state, is
601 * enclosed inside @pos locking, double-locking the parent isn't necessary
602 * while inheriting. The state update to the parent is guaranteed to be
603 * visible by walking order and, as long as inheriting operations to the
604 * same @pos are atomic to each other, multiple updates racing each other
605 * still result in the correct state. It's guaranateed that at least one
606 * inheritance happens for any cgroup after the latest update to its
607 * parent.
608 *
609 * If checking parent's state requires locking the parent, each inheriting
610 * iteration should lock and unlock both @pos->parent and @pos.
611 *
612 * Alternatively, a subsystem may choose to use a single global lock to
613 * synchronize ->css_online() and ->css_offline() against tree-walking
614 * operations.
615 */
616#define cgroup_for_each_descendant_pre(pos, cgroup) \
617 for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
618 pos = cgroup_next_descendant_pre((pos), (cgroup)))
619
620struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
621 struct cgroup *cgroup);
622
623/**
624 * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
625 * @pos: the cgroup * to use as the loop cursor
626 * @cgroup: cgroup whose descendants to walk
627 *
628 * Similar to cgroup_for_each_descendant_pre() but performs post-order
629 * traversal instead. Note that the walk visibility guarantee described in
630 * pre-order walk doesn't apply the same to post-order walks.
631 */
632#define cgroup_for_each_descendant_post(pos, cgroup) \
633 for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
634 pos = cgroup_next_descendant_post((pos), (cgroup)))
635
575/* A cgroup_iter should be treated as an opaque object */ 636/* A cgroup_iter should be treated as an opaque object */
576struct cgroup_iter { 637struct cgroup_iter {
577 struct list_head *cg_link; 638 struct list_head *cg_link;