diff options
Diffstat (limited to 'include/linux/cgroup.h')
| -rw-r--r-- | include/linux/cgroup.h | 170 |
1 files changed, 116 insertions, 54 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f8a030ced0c7..900af5964f55 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
| 13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
| 14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
| 15 | #include <linux/rculist.h> | ||
| 15 | #include <linux/cgroupstats.h> | 16 | #include <linux/cgroupstats.h> |
| 16 | #include <linux/prio_heap.h> | 17 | #include <linux/prio_heap.h> |
| 17 | #include <linux/rwsem.h> | 18 | #include <linux/rwsem.h> |
| @@ -34,7 +35,6 @@ extern int cgroup_lock_is_held(void); | |||
| 34 | extern bool cgroup_lock_live_group(struct cgroup *cgrp); | 35 | extern bool cgroup_lock_live_group(struct cgroup *cgrp); |
| 35 | extern void cgroup_unlock(void); | 36 | extern void cgroup_unlock(void); |
| 36 | extern void cgroup_fork(struct task_struct *p); | 37 | extern void cgroup_fork(struct task_struct *p); |
| 37 | extern void cgroup_fork_callbacks(struct task_struct *p); | ||
| 38 | extern void cgroup_post_fork(struct task_struct *p); | 38 | extern void cgroup_post_fork(struct task_struct *p); |
| 39 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); | 39 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
| 40 | extern int cgroupstats_build(struct cgroupstats *stats, | 40 | extern int cgroupstats_build(struct cgroupstats *stats, |
| @@ -66,7 +66,7 @@ struct cgroup_subsys_state { | |||
| 66 | /* | 66 | /* |
| 67 | * State maintained by the cgroup system to allow subsystems | 67 | * State maintained by the cgroup system to allow subsystems |
| 68 | * to be "busy". Should be accessed via css_get(), | 68 | * to be "busy". Should be accessed via css_get(), |
| 69 | * css_tryget() and and css_put(). | 69 | * css_tryget() and css_put(). |
| 70 | */ | 70 | */ |
| 71 | 71 | ||
| 72 | atomic_t refcnt; | 72 | atomic_t refcnt; |
| @@ -81,9 +81,8 @@ struct cgroup_subsys_state { | |||
| 81 | 81 | ||
| 82 | /* bits in struct cgroup_subsys_state flags field */ | 82 | /* bits in struct cgroup_subsys_state flags field */ |
| 83 | enum { | 83 | enum { |
| 84 | CSS_ROOT, /* This CSS is the root of the subsystem */ | 84 | CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ |
| 85 | CSS_REMOVED, /* This CSS is dead */ | 85 | CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ |
| 86 | CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */ | ||
| 87 | }; | 86 | }; |
| 88 | 87 | ||
| 89 | /* Caller must verify that the css is not for root cgroup */ | 88 | /* Caller must verify that the css is not for root cgroup */ |
| @@ -102,15 +101,10 @@ static inline void __css_get(struct cgroup_subsys_state *css, int count) | |||
| 102 | static inline void css_get(struct cgroup_subsys_state *css) | 101 | static inline void css_get(struct cgroup_subsys_state *css) |
| 103 | { | 102 | { |
| 104 | /* We don't need to reference count the root state */ | 103 | /* We don't need to reference count the root state */ |
| 105 | if (!test_bit(CSS_ROOT, &css->flags)) | 104 | if (!(css->flags & CSS_ROOT)) |
| 106 | __css_get(css, 1); | 105 | __css_get(css, 1); |
| 107 | } | 106 | } |
| 108 | 107 | ||
| 109 | static inline bool css_is_removed(struct cgroup_subsys_state *css) | ||
| 110 | { | ||
| 111 | return test_bit(CSS_REMOVED, &css->flags); | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | 108 | /* |
| 115 | * Call css_tryget() to take a reference on a css if your existing | 109 | * Call css_tryget() to take a reference on a css if your existing |
| 116 | * (known-valid) reference isn't already ref-counted. Returns false if | 110 | * (known-valid) reference isn't already ref-counted. Returns false if |
| @@ -120,7 +114,7 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css) | |||
| 120 | extern bool __css_tryget(struct cgroup_subsys_state *css); | 114 | extern bool __css_tryget(struct cgroup_subsys_state *css); |
| 121 | static inline bool css_tryget(struct cgroup_subsys_state *css) | 115 | static inline bool css_tryget(struct cgroup_subsys_state *css) |
| 122 | { | 116 | { |
| 123 | if (test_bit(CSS_ROOT, &css->flags)) | 117 | if (css->flags & CSS_ROOT) |
| 124 | return true; | 118 | return true; |
| 125 | return __css_tryget(css); | 119 | return __css_tryget(css); |
| 126 | } | 120 | } |
| @@ -133,7 +127,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) | |||
| 133 | extern void __css_put(struct cgroup_subsys_state *css); | 127 | extern void __css_put(struct cgroup_subsys_state *css); |
| 134 | static inline void css_put(struct cgroup_subsys_state *css) | 128 | static inline void css_put(struct cgroup_subsys_state *css) |
| 135 | { | 129 | { |
| 136 | if (!test_bit(CSS_ROOT, &css->flags)) | 130 | if (!(css->flags & CSS_ROOT)) |
| 137 | __css_put(css); | 131 | __css_put(css); |
| 138 | } | 132 | } |
| 139 | 133 | ||
| @@ -149,13 +143,11 @@ enum { | |||
| 149 | /* Control Group requires release notifications to userspace */ | 143 | /* Control Group requires release notifications to userspace */ |
| 150 | CGRP_NOTIFY_ON_RELEASE, | 144 | CGRP_NOTIFY_ON_RELEASE, |
| 151 | /* | 145 | /* |
| 152 | * A thread in rmdir() is wating for this cgroup. | 146 | * Clone the parent's configuration when creating a new child |
| 153 | */ | 147 | * cpuset cgroup. For historical reasons, this option can be |
| 154 | CGRP_WAIT_ON_RMDIR, | 148 | * specified at mount time and thus is implemented here. |
| 155 | /* | ||
| 156 | * Clone cgroup values when creating a new child cgroup | ||
| 157 | */ | 149 | */ |
| 158 | CGRP_CLONE_CHILDREN, | 150 | CGRP_CPUSET_CLONE_CHILDREN, |
| 159 | }; | 151 | }; |
| 160 | 152 | ||
| 161 | struct cgroup { | 153 | struct cgroup { |
| @@ -167,6 +159,8 @@ struct cgroup { | |||
| 167 | */ | 159 | */ |
| 168 | atomic_t count; | 160 | atomic_t count; |
| 169 | 161 | ||
| 162 | int id; /* ida allocated in-hierarchy ID */ | ||
| 163 | |||
| 170 | /* | 164 | /* |
| 171 | * We link our 'sibling' struct into our parent's 'children'. | 165 | * We link our 'sibling' struct into our parent's 'children'. |
| 172 | * Our children link their 'sibling' into our 'children'. | 166 | * Our children link their 'sibling' into our 'children'. |
| @@ -176,7 +170,7 @@ struct cgroup { | |||
| 176 | struct list_head files; /* my files */ | 170 | struct list_head files; /* my files */ |
| 177 | 171 | ||
| 178 | struct cgroup *parent; /* my parent */ | 172 | struct cgroup *parent; /* my parent */ |
| 179 | struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ | 173 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ |
| 180 | 174 | ||
| 181 | /* Private pointers for each registered subsystem */ | 175 | /* Private pointers for each registered subsystem */ |
| 182 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 176 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
| @@ -209,6 +203,7 @@ struct cgroup { | |||
| 209 | 203 | ||
| 210 | /* For RCU-protected deletion */ | 204 | /* For RCU-protected deletion */ |
| 211 | struct rcu_head rcu_head; | 205 | struct rcu_head rcu_head; |
| 206 | struct work_struct free_work; | ||
| 212 | 207 | ||
| 213 | /* List of events which userspace want to receive */ | 208 | /* List of events which userspace want to receive */ |
| 214 | struct list_head event_list; | 209 | struct list_head event_list; |
| @@ -282,7 +277,7 @@ struct cgroup_map_cb { | |||
| 282 | 277 | ||
| 283 | /* cftype->flags */ | 278 | /* cftype->flags */ |
| 284 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ | 279 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ |
| 285 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ | 280 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ |
| 286 | 281 | ||
| 287 | #define MAX_CFTYPE_NAME 64 | 282 | #define MAX_CFTYPE_NAME 64 |
| 288 | 283 | ||
| @@ -422,23 +417,6 @@ int cgroup_task_count(const struct cgroup *cgrp); | |||
| 422 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); | 417 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); |
| 423 | 418 | ||
| 424 | /* | 419 | /* |
| 425 | * When the subsys has to access css and may add permanent refcnt to css, | ||
| 426 | * it should take care of racy conditions with rmdir(). Following set of | ||
| 427 | * functions, is for stop/restart rmdir if necessary. | ||
| 428 | * Because these will call css_get/put, "css" should be alive css. | ||
| 429 | * | ||
| 430 | * cgroup_exclude_rmdir(); | ||
| 431 | * ...do some jobs which may access arbitrary empty cgroup | ||
| 432 | * cgroup_release_and_wakeup_rmdir(); | ||
| 433 | * | ||
| 434 | * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, | ||
| 435 | * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. | ||
| 436 | */ | ||
| 437 | |||
| 438 | void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); | ||
| 439 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); | ||
| 440 | |||
| 441 | /* | ||
| 442 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys | 420 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys |
| 443 | * methods. | 421 | * methods. |
| 444 | */ | 422 | */ |
| @@ -466,16 +444,17 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); | |||
| 466 | */ | 444 | */ |
| 467 | 445 | ||
| 468 | struct cgroup_subsys { | 446 | struct cgroup_subsys { |
| 469 | struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); | 447 | struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); |
| 470 | int (*pre_destroy)(struct cgroup *cgrp); | 448 | int (*css_online)(struct cgroup *cgrp); |
| 471 | void (*destroy)(struct cgroup *cgrp); | 449 | void (*css_offline)(struct cgroup *cgrp); |
| 450 | void (*css_free)(struct cgroup *cgrp); | ||
| 451 | |||
| 472 | int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 452 | int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
| 473 | void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 453 | void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
| 474 | void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 454 | void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); |
| 475 | void (*fork)(struct task_struct *task); | 455 | void (*fork)(struct task_struct *task); |
| 476 | void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, | 456 | void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, |
| 477 | struct task_struct *task); | 457 | struct task_struct *task); |
| 478 | void (*post_clone)(struct cgroup *cgrp); | ||
| 479 | void (*bind)(struct cgroup *root); | 458 | void (*bind)(struct cgroup *root); |
| 480 | 459 | ||
| 481 | int subsys_id; | 460 | int subsys_id; |
| @@ -489,17 +468,6 @@ struct cgroup_subsys { | |||
| 489 | bool use_id; | 468 | bool use_id; |
| 490 | 469 | ||
| 491 | /* | 470 | /* |
| 492 | * If %true, cgroup removal will try to clear css refs by retrying | ||
| 493 | * ss->pre_destroy() until there's no css ref left. This behavior | ||
| 494 | * is strictly for backward compatibility and will be removed as | ||
| 495 | * soon as the current user (memcg) is updated. | ||
| 496 | * | ||
| 497 | * If %false, ss->pre_destroy() can't fail and cgroup removal won't | ||
| 498 | * wait for css refs to drop to zero before proceeding. | ||
| 499 | */ | ||
| 500 | bool __DEPRECATED_clear_css_refs; | ||
| 501 | |||
| 502 | /* | ||
| 503 | * If %false, this subsystem is properly hierarchical - | 471 | * If %false, this subsystem is properly hierarchical - |
| 504 | * configuration, resource accounting and restriction on a parent | 472 | * configuration, resource accounting and restriction on a parent |
| 505 | * cgroup cover those of its children. If %true, hierarchy support | 473 | * cgroup cover those of its children. If %true, hierarchy support |
| @@ -572,6 +540,101 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, | |||
| 572 | return task_subsys_state(task, subsys_id)->cgroup; | 540 | return task_subsys_state(task, subsys_id)->cgroup; |
| 573 | } | 541 | } |
| 574 | 542 | ||
| 543 | /** | ||
| 544 | * cgroup_for_each_child - iterate through children of a cgroup | ||
| 545 | * @pos: the cgroup * to use as the loop cursor | ||
| 546 | * @cgroup: cgroup whose children to walk | ||
| 547 | * | ||
| 548 | * Walk @cgroup's children. Must be called under rcu_read_lock(). A child | ||
| 549 | * cgroup which hasn't finished ->css_online() or already has finished | ||
| 550 | * ->css_offline() may show up during traversal and it's each subsystem's | ||
| 551 | * responsibility to verify that each @pos is alive. | ||
| 552 | * | ||
| 553 | * If a subsystem synchronizes against the parent in its ->css_online() and | ||
| 554 | * before starting iterating, a cgroup which finished ->css_online() is | ||
| 555 | * guaranteed to be visible in the future iterations. | ||
| 556 | */ | ||
| 557 | #define cgroup_for_each_child(pos, cgroup) \ | ||
| 558 | list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) | ||
| 559 | |||
| 560 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | ||
| 561 | struct cgroup *cgroup); | ||
| 562 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); | ||
| 563 | |||
| 564 | /** | ||
| 565 | * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants | ||
| 566 | * @pos: the cgroup * to use as the loop cursor | ||
| 567 | * @cgroup: cgroup whose descendants to walk | ||
| 568 | * | ||
| 569 | * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A | ||
| 570 | * descendant cgroup which hasn't finished ->css_online() or already has | ||
| 571 | * finished ->css_offline() may show up during traversal and it's each | ||
| 572 | * subsystem's responsibility to verify that each @pos is alive. | ||
| 573 | * | ||
| 574 | * If a subsystem synchronizes against the parent in its ->css_online() and | ||
| 575 | * before starting iterating, and synchronizes against @pos on each | ||
| 576 | * iteration, any descendant cgroup which finished ->css_offline() is | ||
| 577 | * guaranteed to be visible in the future iterations. | ||
| 578 | * | ||
| 579 | * In other words, the following guarantees that a descendant can't escape | ||
| 580 | * state updates of its ancestors. | ||
| 581 | * | ||
| 582 | * my_online(@cgrp) | ||
| 583 | * { | ||
| 584 | * Lock @cgrp->parent and @cgrp; | ||
| 585 | * Inherit state from @cgrp->parent; | ||
| 586 | * Unlock both. | ||
| 587 | * } | ||
| 588 | * | ||
| 589 | * my_update_state(@cgrp) | ||
| 590 | * { | ||
| 591 | * Lock @cgrp; | ||
| 592 | * Update @cgrp's state; | ||
| 593 | * Unlock @cgrp; | ||
| 594 | * | ||
| 595 | * cgroup_for_each_descendant_pre(@pos, @cgrp) { | ||
| 596 | * Lock @pos; | ||
| 597 | * Verify @pos is alive and inherit state from @pos->parent; | ||
| 598 | * Unlock @pos; | ||
| 599 | * } | ||
| 600 | * } | ||
| 601 | * | ||
| 602 | * As long as the inheriting step, including checking the parent state, is | ||
| 603 | * enclosed inside @pos locking, double-locking the parent isn't necessary | ||
| 604 | * while inheriting. The state update to the parent is guaranteed to be | ||
| 605 | * visible by walking order and, as long as inheriting operations to the | ||
| 606 | * same @pos are atomic to each other, multiple updates racing each other | ||
| 607 | * still result in the correct state. It's guaranateed that at least one | ||
| 608 | * inheritance happens for any cgroup after the latest update to its | ||
| 609 | * parent. | ||
| 610 | * | ||
| 611 | * If checking parent's state requires locking the parent, each inheriting | ||
| 612 | * iteration should lock and unlock both @pos->parent and @pos. | ||
| 613 | * | ||
| 614 | * Alternatively, a subsystem may choose to use a single global lock to | ||
| 615 | * synchronize ->css_online() and ->css_offline() against tree-walking | ||
| 616 | * operations. | ||
| 617 | */ | ||
| 618 | #define cgroup_for_each_descendant_pre(pos, cgroup) \ | ||
| 619 | for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ | ||
| 620 | pos = cgroup_next_descendant_pre((pos), (cgroup))) | ||
| 621 | |||
| 622 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | ||
| 623 | struct cgroup *cgroup); | ||
| 624 | |||
| 625 | /** | ||
| 626 | * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants | ||
| 627 | * @pos: the cgroup * to use as the loop cursor | ||
| 628 | * @cgroup: cgroup whose descendants to walk | ||
| 629 | * | ||
| 630 | * Similar to cgroup_for_each_descendant_pre() but performs post-order | ||
| 631 | * traversal instead. Note that the walk visibility guarantee described in | ||
| 632 | * pre-order walk doesn't apply the same to post-order walks. | ||
| 633 | */ | ||
| 634 | #define cgroup_for_each_descendant_post(pos, cgroup) \ | ||
| 635 | for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ | ||
| 636 | pos = cgroup_next_descendant_post((pos), (cgroup))) | ||
| 637 | |||
| 575 | /* A cgroup_iter should be treated as an opaque object */ | 638 | /* A cgroup_iter should be treated as an opaque object */ |
| 576 | struct cgroup_iter { | 639 | struct cgroup_iter { |
| 577 | struct list_head *cg_link; | 640 | struct list_head *cg_link; |
| @@ -645,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); | |||
| 645 | static inline int cgroup_init_early(void) { return 0; } | 708 | static inline int cgroup_init_early(void) { return 0; } |
| 646 | static inline int cgroup_init(void) { return 0; } | 709 | static inline int cgroup_init(void) { return 0; } |
| 647 | static inline void cgroup_fork(struct task_struct *p) {} | 710 | static inline void cgroup_fork(struct task_struct *p) {} |
| 648 | static inline void cgroup_fork_callbacks(struct task_struct *p) {} | ||
| 649 | static inline void cgroup_post_fork(struct task_struct *p) {} | 711 | static inline void cgroup_post_fork(struct task_struct *p) {} |
| 650 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} | 712 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} |
| 651 | 713 | ||
