aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-03 21:25:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-03 21:25:03 -0400
commit32dad03d164206ea886885d0740284ba215b0970 (patch)
tree5fd89fe27295bfbe47dce5f274aa645099741a71 /include/linux
parent357397a14117f0c2eeafcac06a1f8412a02aa6af (diff)
parentd1625964da51bda61306ad3ec45307a799c21f08 (diff)
Merge branch 'for-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "A lot of activities on the cgroup front. Most changes aren't visible to userland at all at this point and are laying foundation for the planned unified hierarchy. - The biggest change is decoupling the lifetime management of css (cgroup_subsys_state) from that of cgroup's. Because controllers (cpu, memory, block and so on) will need to be dynamically enabled and disabled, css which is the association point between a cgroup and a controller may come and go dynamically across the lifetime of a cgroup. Till now, css's were created when the associated cgroup was created and stayed till the cgroup got destroyed. Assumptions around this tight coupling permeated through cgroup core and controllers. These assumptions are gradually removed, which consists bulk of patches, and css destruction path is completely decoupled from cgroup destruction path. Note that decoupling of creation path is relatively easy on top of these changes and the patchset is pending for the next window. - cgroup has its own event mechanism cgroup.event_control, which is only used by memcg. It is overly complex trying to achieve high flexibility whose benefits seem dubious at best. Going forward, new events will simply generate file modified event and the existing mechanism is being made specific to memcg. This pull request contains prepatory patches for such change. - Various fixes and cleanups" Fixed up conflict in kernel/cgroup.c as per Tejun. * 'for-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (69 commits) cgroup: fix cgroup_css() invocation in css_from_id() cgroup: make cgroup_write_event_control() use css_from_dir() instead of __d_cgrp() cgroup: make cgroup_event hold onto cgroup_subsys_state instead of cgroup cgroup: implement CFTYPE_NO_PREFIX cgroup: make cgroup_css() take cgroup_subsys * instead and allow NULL subsys cgroup: rename cgroup_css_from_dir() to css_from_dir() and update its syntax cgroup: fix cgroup_write_event_control() cgroup: fix subsystem file accesses on the root cgroup cgroup: change cgroup_from_id() to css_from_id() cgroup: use css_get() in cgroup_create() to check CSS_ROOT cpuset: remove an unncessary forward declaration cgroup: RCU protect each cgroup_subsys_state release cgroup: move subsys file removal to kill_css() cgroup: factor out kill_css() cgroup: decouple cgroup_subsys_state destruction from cgroup destruction cgroup: replace cgroup->css_kill_cnt with ->nr_css cgroup: bounce cgroup_subsys_state ref kill confirmation to a work item cgroup: move cgroup->subsys[] assignment to online_css() cgroup: reorganize css init / exit paths cgroup: add __rcu modifier to cgroup->subsys[] ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cgroup.h303
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/vmpressure.h6
3 files changed, 170 insertions, 141 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e9ac882868c0..3561d305b1e0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -66,22 +66,25 @@ enum cgroup_subsys_id {
66 66
67/* Per-subsystem/per-cgroup state maintained by the system. */ 67/* Per-subsystem/per-cgroup state maintained by the system. */
68struct cgroup_subsys_state { 68struct cgroup_subsys_state {
69 /* 69 /* the cgroup that this css is attached to */
70 * The cgroup that this subsystem is attached to. Useful
71 * for subsystems that want to know about the cgroup
72 * hierarchy structure
73 */
74 struct cgroup *cgroup; 70 struct cgroup *cgroup;
75 71
72 /* the cgroup subsystem that this css is attached to */
73 struct cgroup_subsys *ss;
74
76 /* reference count - access via css_[try]get() and css_put() */ 75 /* reference count - access via css_[try]get() and css_put() */
77 struct percpu_ref refcnt; 76 struct percpu_ref refcnt;
78 77
78 /* the parent css */
79 struct cgroup_subsys_state *parent;
80
79 unsigned long flags; 81 unsigned long flags;
80 /* ID for this css, if possible */ 82 /* ID for this css, if possible */
81 struct css_id __rcu *id; 83 struct css_id __rcu *id;
82 84
83 /* Used to put @cgroup->dentry on the last css_put() */ 85 /* percpu_ref killing and RCU release */
84 struct work_struct dput_work; 86 struct rcu_head rcu_head;
87 struct work_struct destroy_work;
85}; 88};
86 89
87/* bits in struct cgroup_subsys_state flags field */ 90/* bits in struct cgroup_subsys_state flags field */
@@ -161,7 +164,16 @@ struct cgroup_name {
161struct cgroup { 164struct cgroup {
162 unsigned long flags; /* "unsigned long" so bitops work */ 165 unsigned long flags; /* "unsigned long" so bitops work */
163 166
164 int id; /* ida allocated in-hierarchy ID */ 167 /*
168 * idr allocated in-hierarchy ID.
169 *
170 * The ID of the root cgroup is always 0, and a new cgroup
171 * will be assigned with a smallest available ID.
172 */
173 int id;
174
175 /* the number of attached css's */
176 int nr_css;
165 177
166 /* 178 /*
167 * We link our 'sibling' struct into our parent's 'children'. 179 * We link our 'sibling' struct into our parent's 'children'.
@@ -196,7 +208,7 @@ struct cgroup {
196 struct cgroup_name __rcu *name; 208 struct cgroup_name __rcu *name;
197 209
198 /* Private pointers for each registered subsystem */ 210 /* Private pointers for each registered subsystem */
199 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 211 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
200 212
201 struct cgroupfs_root *root; 213 struct cgroupfs_root *root;
202 214
@@ -220,10 +232,12 @@ struct cgroup {
220 struct list_head pidlists; 232 struct list_head pidlists;
221 struct mutex pidlist_mutex; 233 struct mutex pidlist_mutex;
222 234
235 /* dummy css with NULL ->ss, points back to this cgroup */
236 struct cgroup_subsys_state dummy_css;
237
223 /* For css percpu_ref killing and RCU-protected deletion */ 238 /* For css percpu_ref killing and RCU-protected deletion */
224 struct rcu_head rcu_head; 239 struct rcu_head rcu_head;
225 struct work_struct destroy_work; 240 struct work_struct destroy_work;
226 atomic_t css_kill_cnt;
227 241
228 /* List of events which userspace want to receive */ 242 /* List of events which userspace want to receive */
229 struct list_head event_list; 243 struct list_head event_list;
@@ -322,7 +336,7 @@ struct cgroupfs_root {
322 unsigned long flags; 336 unsigned long flags;
323 337
324 /* IDs for cgroups in this hierarchy */ 338 /* IDs for cgroups in this hierarchy */
325 struct ida cgroup_ida; 339 struct idr cgroup_idr;
326 340
327 /* The path to use for release notifications. */ 341 /* The path to use for release notifications. */
328 char release_agent_path[PATH_MAX]; 342 char release_agent_path[PATH_MAX];
@@ -394,9 +408,10 @@ struct cgroup_map_cb {
394 408
395/* cftype->flags */ 409/* cftype->flags */
396enum { 410enum {
397 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */ 411 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
398 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */ 412 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
399 CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ 413 CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
414 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
400}; 415};
401 416
402#define MAX_CFTYPE_NAME 64 417#define MAX_CFTYPE_NAME 64
@@ -424,35 +439,41 @@ struct cftype {
424 /* CFTYPE_* flags */ 439 /* CFTYPE_* flags */
425 unsigned int flags; 440 unsigned int flags;
426 441
442 /*
443 * The subsys this file belongs to. Initialized automatically
444 * during registration. NULL for cgroup core files.
445 */
446 struct cgroup_subsys *ss;
447
427 int (*open)(struct inode *inode, struct file *file); 448 int (*open)(struct inode *inode, struct file *file);
428 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 449 ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
429 struct file *file, 450 struct file *file,
430 char __user *buf, size_t nbytes, loff_t *ppos); 451 char __user *buf, size_t nbytes, loff_t *ppos);
431 /* 452 /*
432 * read_u64() is a shortcut for the common case of returning a 453 * read_u64() is a shortcut for the common case of returning a
433 * single integer. Use it in place of read() 454 * single integer. Use it in place of read()
434 */ 455 */
435 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); 456 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
436 /* 457 /*
437 * read_s64() is a signed version of read_u64() 458 * read_s64() is a signed version of read_u64()
438 */ 459 */
439 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); 460 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
440 /* 461 /*
441 * read_map() is used for defining a map of key/value 462 * read_map() is used for defining a map of key/value
442 * pairs. It should call cb->fill(cb, key, value) for each 463 * pairs. It should call cb->fill(cb, key, value) for each
443 * entry. The key/value pairs (and their ordering) should not 464 * entry. The key/value pairs (and their ordering) should not
444 * change between reboots. 465 * change between reboots.
445 */ 466 */
446 int (*read_map)(struct cgroup *cgrp, struct cftype *cft, 467 int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
447 struct cgroup_map_cb *cb); 468 struct cgroup_map_cb *cb);
448 /* 469 /*
449 * read_seq_string() is used for outputting a simple sequence 470 * read_seq_string() is used for outputting a simple sequence
450 * using seqfile. 471 * using seqfile.
451 */ 472 */
452 int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft, 473 int (*read_seq_string)(struct cgroup_subsys_state *css,
453 struct seq_file *m); 474 struct cftype *cft, struct seq_file *m);
454 475
455 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, 476 ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
456 struct file *file, 477 struct file *file,
457 const char __user *buf, size_t nbytes, loff_t *ppos); 478 const char __user *buf, size_t nbytes, loff_t *ppos);
458 479
@@ -461,18 +482,20 @@ struct cftype {
461 * a single integer (as parsed by simple_strtoull) from 482 * a single integer (as parsed by simple_strtoull) from
462 * userspace. Use in place of write(); return 0 or error. 483 * userspace. Use in place of write(); return 0 or error.
463 */ 484 */
464 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); 485 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
486 u64 val);
465 /* 487 /*
466 * write_s64() is a signed version of write_u64() 488 * write_s64() is a signed version of write_u64()
467 */ 489 */
468 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); 490 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
491 s64 val);
469 492
470 /* 493 /*
471 * write_string() is passed a nul-terminated kernelspace 494 * write_string() is passed a nul-terminated kernelspace
472 * buffer of maximum length determined by max_write_len. 495 * buffer of maximum length determined by max_write_len.
473 * Returns 0 or -ve error code. 496 * Returns 0 or -ve error code.
474 */ 497 */
475 int (*write_string)(struct cgroup *cgrp, struct cftype *cft, 498 int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
476 const char *buffer); 499 const char *buffer);
477 /* 500 /*
478 * trigger() callback can be used to get some kick from the 501 * trigger() callback can be used to get some kick from the
@@ -480,7 +503,7 @@ struct cftype {
480 * at all. The private field can be used to determine the 503 * at all. The private field can be used to determine the
481 * kick type for multiplexing. 504 * kick type for multiplexing.
482 */ 505 */
483 int (*trigger)(struct cgroup *cgrp, unsigned int event); 506 int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
484 507
485 int (*release)(struct inode *inode, struct file *file); 508 int (*release)(struct inode *inode, struct file *file);
486 509
@@ -490,16 +513,18 @@ struct cftype {
490 * you want to provide this functionality. Use eventfd_signal() 513 * you want to provide this functionality. Use eventfd_signal()
491 * on eventfd to send notification to userspace. 514 * on eventfd to send notification to userspace.
492 */ 515 */
493 int (*register_event)(struct cgroup *cgrp, struct cftype *cft, 516 int (*register_event)(struct cgroup_subsys_state *css,
494 struct eventfd_ctx *eventfd, const char *args); 517 struct cftype *cft, struct eventfd_ctx *eventfd,
518 const char *args);
495 /* 519 /*
496 * unregister_event() callback will be called when userspace 520 * unregister_event() callback will be called when userspace
497 * closes the eventfd or on cgroup removing. 521 * closes the eventfd or on cgroup removing.
498 * This callback must be implemented, if you want provide 522 * This callback must be implemented, if you want provide
499 * notification functionality. 523 * notification functionality.
500 */ 524 */
501 void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, 525 void (*unregister_event)(struct cgroup_subsys_state *css,
502 struct eventfd_ctx *eventfd); 526 struct cftype *cft,
527 struct eventfd_ctx *eventfd);
503}; 528};
504 529
505/* 530/*
@@ -512,15 +537,6 @@ struct cftype_set {
512 struct cftype *cfts; 537 struct cftype *cfts;
513}; 538};
514 539
515struct cgroup_scanner {
516 struct cgroup *cg;
517 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
518 void (*process_task)(struct task_struct *p,
519 struct cgroup_scanner *scan);
520 struct ptr_heap *heap;
521 void *data;
522};
523
524/* 540/*
525 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This 541 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
526 * function can be called as long as @cgrp is accessible. 542 * function can be called as long as @cgrp is accessible.
@@ -537,7 +553,7 @@ static inline const char *cgroup_name(const struct cgroup *cgrp)
537} 553}
538 554
539int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 555int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
540int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 556int cgroup_rm_cftypes(struct cftype *cfts);
541 557
542bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 558bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
543 559
@@ -553,20 +569,22 @@ int cgroup_task_count(const struct cgroup *cgrp);
553struct cgroup_taskset; 569struct cgroup_taskset;
554struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 570struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
555struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 571struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
556struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); 572struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
573 int subsys_id);
557int cgroup_taskset_size(struct cgroup_taskset *tset); 574int cgroup_taskset_size(struct cgroup_taskset *tset);
558 575
559/** 576/**
560 * cgroup_taskset_for_each - iterate cgroup_taskset 577 * cgroup_taskset_for_each - iterate cgroup_taskset
561 * @task: the loop cursor 578 * @task: the loop cursor
562 * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all 579 * @skip_css: skip if task's css matches this, %NULL to iterate through all
563 * @tset: taskset to iterate 580 * @tset: taskset to iterate
564 */ 581 */
565#define cgroup_taskset_for_each(task, skip_cgrp, tset) \ 582#define cgroup_taskset_for_each(task, skip_css, tset) \
566 for ((task) = cgroup_taskset_first((tset)); (task); \ 583 for ((task) = cgroup_taskset_first((tset)); (task); \
567 (task) = cgroup_taskset_next((tset))) \ 584 (task) = cgroup_taskset_next((tset))) \
568 if (!(skip_cgrp) || \ 585 if (!(skip_css) || \
569 cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) 586 cgroup_taskset_cur_css((tset), \
587 (skip_css)->ss->subsys_id) != (skip_css))
570 588
571/* 589/*
572 * Control Group subsystem type. 590 * Control Group subsystem type.
@@ -574,18 +592,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
574 */ 592 */
575 593
576struct cgroup_subsys { 594struct cgroup_subsys {
577 struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); 595 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
578 int (*css_online)(struct cgroup *cgrp); 596 int (*css_online)(struct cgroup_subsys_state *css);
579 void (*css_offline)(struct cgroup *cgrp); 597 void (*css_offline)(struct cgroup_subsys_state *css);
580 void (*css_free)(struct cgroup *cgrp); 598 void (*css_free)(struct cgroup_subsys_state *css);
581 599
582 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 600 int (*can_attach)(struct cgroup_subsys_state *css,
583 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 601 struct cgroup_taskset *tset);
584 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 602 void (*cancel_attach)(struct cgroup_subsys_state *css,
603 struct cgroup_taskset *tset);
604 void (*attach)(struct cgroup_subsys_state *css,
605 struct cgroup_taskset *tset);
585 void (*fork)(struct task_struct *task); 606 void (*fork)(struct task_struct *task);
586 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 607 void (*exit)(struct cgroup_subsys_state *css,
608 struct cgroup_subsys_state *old_css,
587 struct task_struct *task); 609 struct task_struct *task);
588 void (*bind)(struct cgroup *root); 610 void (*bind)(struct cgroup_subsys_state *root_css);
589 611
590 int subsys_id; 612 int subsys_id;
591 int disabled; 613 int disabled;
@@ -641,10 +663,17 @@ struct cgroup_subsys {
641#undef IS_SUBSYS_ENABLED 663#undef IS_SUBSYS_ENABLED
642#undef SUBSYS 664#undef SUBSYS
643 665
644static inline struct cgroup_subsys_state *cgroup_subsys_state( 666/**
645 struct cgroup *cgrp, int subsys_id) 667 * css_parent - find the parent css
668 * @css: the target cgroup_subsys_state
669 *
670 * Return the parent css of @css. This function is guaranteed to return
671 * non-NULL parent as long as @css isn't the root.
672 */
673static inline
674struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
646{ 675{
647 return cgrp->subsys[subsys_id]; 676 return css->parent;
648} 677}
649 678
650/** 679/**
@@ -672,7 +701,7 @@ extern struct mutex cgroup_mutex;
672#endif 701#endif
673 702
674/** 703/**
675 * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds 704 * task_css_check - obtain css for (task, subsys) w/ extra access conds
676 * @task: the target task 705 * @task: the target task
677 * @subsys_id: the target subsystem ID 706 * @subsys_id: the target subsystem ID
678 * @__c: extra condition expression to be passed to rcu_dereference_check() 707 * @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -680,7 +709,7 @@ extern struct mutex cgroup_mutex;
680 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 709 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
681 * synchronization rules are the same as task_css_set_check(). 710 * synchronization rules are the same as task_css_set_check().
682 */ 711 */
683#define task_subsys_state_check(task, subsys_id, __c) \ 712#define task_css_check(task, subsys_id, __c) \
684 task_css_set_check((task), (__c))->subsys[(subsys_id)] 713 task_css_set_check((task), (__c))->subsys[(subsys_id)]
685 714
686/** 715/**
@@ -695,87 +724,92 @@ static inline struct css_set *task_css_set(struct task_struct *task)
695} 724}
696 725
697/** 726/**
698 * task_subsys_state - obtain css for (task, subsys) 727 * task_css - obtain css for (task, subsys)
699 * @task: the target task 728 * @task: the target task
700 * @subsys_id: the target subsystem ID 729 * @subsys_id: the target subsystem ID
701 * 730 *
702 * See task_subsys_state_check(). 731 * See task_css_check().
703 */ 732 */
704static inline struct cgroup_subsys_state * 733static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
705task_subsys_state(struct task_struct *task, int subsys_id) 734 int subsys_id)
706{ 735{
707 return task_subsys_state_check(task, subsys_id, false); 736 return task_css_check(task, subsys_id, false);
708} 737}
709 738
710static inline struct cgroup* task_cgroup(struct task_struct *task, 739static inline struct cgroup *task_cgroup(struct task_struct *task,
711 int subsys_id) 740 int subsys_id)
712{ 741{
713 return task_subsys_state(task, subsys_id)->cgroup; 742 return task_css(task, subsys_id)->cgroup;
714} 743}
715 744
716struct cgroup *cgroup_next_sibling(struct cgroup *pos); 745struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
746 struct cgroup_subsys_state *parent);
747
748struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
717 749
718/** 750/**
719 * cgroup_for_each_child - iterate through children of a cgroup 751 * css_for_each_child - iterate through children of a css
720 * @pos: the cgroup * to use as the loop cursor 752 * @pos: the css * to use as the loop cursor
721 * @cgrp: cgroup whose children to walk 753 * @parent: css whose children to walk
722 * 754 *
723 * Walk @cgrp's children. Must be called under rcu_read_lock(). A child 755 * Walk @parent's children. Must be called under rcu_read_lock(). A child
724 * cgroup which hasn't finished ->css_online() or already has finished 756 * css which hasn't finished ->css_online() or already has finished
725 * ->css_offline() may show up during traversal and it's each subsystem's 757 * ->css_offline() may show up during traversal and it's each subsystem's
726 * responsibility to verify that each @pos is alive. 758 * responsibility to verify that each @pos is alive.
727 * 759 *
728 * If a subsystem synchronizes against the parent in its ->css_online() and 760 * If a subsystem synchronizes against the parent in its ->css_online() and
729 * before starting iterating, a cgroup which finished ->css_online() is 761 * before starting iterating, a css which finished ->css_online() is
730 * guaranteed to be visible in the future iterations. 762 * guaranteed to be visible in the future iterations.
731 * 763 *
732 * It is allowed to temporarily drop RCU read lock during iteration. The 764 * It is allowed to temporarily drop RCU read lock during iteration. The
733 * caller is responsible for ensuring that @pos remains accessible until 765 * caller is responsible for ensuring that @pos remains accessible until
734 * the start of the next iteration by, for example, bumping the css refcnt. 766 * the start of the next iteration by, for example, bumping the css refcnt.
735 */ 767 */
736#define cgroup_for_each_child(pos, cgrp) \ 768#define css_for_each_child(pos, parent) \
737 for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ 769 for ((pos) = css_next_child(NULL, (parent)); (pos); \
738 struct cgroup, sibling); \ 770 (pos) = css_next_child((pos), (parent)))
739 (pos); (pos) = cgroup_next_sibling((pos))) 771
772struct cgroup_subsys_state *
773css_next_descendant_pre(struct cgroup_subsys_state *pos,
774 struct cgroup_subsys_state *css);
740 775
741struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, 776struct cgroup_subsys_state *
742 struct cgroup *cgroup); 777css_rightmost_descendant(struct cgroup_subsys_state *pos);
743struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
744 778
745/** 779/**
746 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants 780 * css_for_each_descendant_pre - pre-order walk of a css's descendants
747 * @pos: the cgroup * to use as the loop cursor 781 * @pos: the css * to use as the loop cursor
748 * @cgroup: cgroup whose descendants to walk 782 * @root: css whose descendants to walk
749 * 783 *
750 * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A 784 * Walk @root's descendants. @root is included in the iteration and the
751 * descendant cgroup which hasn't finished ->css_online() or already has 785 * first node to be visited. Must be called under rcu_read_lock(). A
786 * descendant css which hasn't finished ->css_online() or already has
752 * finished ->css_offline() may show up during traversal and it's each 787 * finished ->css_offline() may show up during traversal and it's each
753 * subsystem's responsibility to verify that each @pos is alive. 788 * subsystem's responsibility to verify that each @pos is alive.
754 * 789 *
755 * If a subsystem synchronizes against the parent in its ->css_online() and 790 * If a subsystem synchronizes against the parent in its ->css_online() and
756 * before starting iterating, and synchronizes against @pos on each 791 * before starting iterating, and synchronizes against @pos on each
757 * iteration, any descendant cgroup which finished ->css_online() is 792 * iteration, any descendant css which finished ->css_online() is
758 * guaranteed to be visible in the future iterations. 793 * guaranteed to be visible in the future iterations.
759 * 794 *
760 * In other words, the following guarantees that a descendant can't escape 795 * In other words, the following guarantees that a descendant can't escape
761 * state updates of its ancestors. 796 * state updates of its ancestors.
762 * 797 *
763 * my_online(@cgrp) 798 * my_online(@css)
764 * { 799 * {
765 * Lock @cgrp->parent and @cgrp; 800 * Lock @css's parent and @css;
766 * Inherit state from @cgrp->parent; 801 * Inherit state from the parent;
767 * Unlock both. 802 * Unlock both.
768 * } 803 * }
769 * 804 *
770 * my_update_state(@cgrp) 805 * my_update_state(@css)
771 * { 806 * {
772 * Lock @cgrp; 807 * css_for_each_descendant_pre(@pos, @css) {
773 * Update @cgrp's state;
774 * Unlock @cgrp;
775 *
776 * cgroup_for_each_descendant_pre(@pos, @cgrp) {
777 * Lock @pos; 808 * Lock @pos;
778 * Verify @pos is alive and inherit state from @pos->parent; 809 * if (@pos == @css)
810 * Update @css's state;
811 * else
812 * Verify @pos is alive and inherit state from its parent;
779 * Unlock @pos; 813 * Unlock @pos;
780 * } 814 * }
781 * } 815 * }
@@ -786,8 +820,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
786 * visible by walking order and, as long as inheriting operations to the 820 * visible by walking order and, as long as inheriting operations to the
787 * same @pos are atomic to each other, multiple updates racing each other 821 * same @pos are atomic to each other, multiple updates racing each other
788 * still result in the correct state. It's guaranateed that at least one 822 * still result in the correct state. It's guaranateed that at least one
789 * inheritance happens for any cgroup after the latest update to its 823 * inheritance happens for any css after the latest update to its parent.
790 * parent.
791 * 824 *
792 * If checking parent's state requires locking the parent, each inheriting 825 * If checking parent's state requires locking the parent, each inheriting
793 * iteration should lock and unlock both @pos->parent and @pos. 826 * iteration should lock and unlock both @pos->parent and @pos.
@@ -800,52 +833,45 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
800 * caller is responsible for ensuring that @pos remains accessible until 833 * caller is responsible for ensuring that @pos remains accessible until
801 * the start of the next iteration by, for example, bumping the css refcnt. 834 * the start of the next iteration by, for example, bumping the css refcnt.
802 */ 835 */
803#define cgroup_for_each_descendant_pre(pos, cgroup) \ 836#define css_for_each_descendant_pre(pos, css) \
804 for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ 837 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
805 pos = cgroup_next_descendant_pre((pos), (cgroup))) 838 (pos) = css_next_descendant_pre((pos), (css)))
806 839
807struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, 840struct cgroup_subsys_state *
808 struct cgroup *cgroup); 841css_next_descendant_post(struct cgroup_subsys_state *pos,
842 struct cgroup_subsys_state *css);
809 843
810/** 844/**
811 * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants 845 * css_for_each_descendant_post - post-order walk of a css's descendants
812 * @pos: the cgroup * to use as the loop cursor 846 * @pos: the css * to use as the loop cursor
813 * @cgroup: cgroup whose descendants to walk 847 * @css: css whose descendants to walk
814 * 848 *
815 * Similar to cgroup_for_each_descendant_pre() but performs post-order 849 * Similar to css_for_each_descendant_pre() but performs post-order
816 * traversal instead. Note that the walk visibility guarantee described in 850 * traversal instead. @root is included in the iteration and the last
817 * pre-order walk doesn't apply the same to post-order walks. 851 * node to be visited. Note that the walk visibility guarantee described
852 * in pre-order walk doesn't apply the same to post-order walks.
818 */ 853 */
819#define cgroup_for_each_descendant_post(pos, cgroup) \ 854#define css_for_each_descendant_post(pos, css) \
820 for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ 855 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
821 pos = cgroup_next_descendant_post((pos), (cgroup))) 856 (pos) = css_next_descendant_post((pos), (css)))
822 857
823/* A cgroup_iter should be treated as an opaque object */ 858/* A css_task_iter should be treated as an opaque object */
824struct cgroup_iter { 859struct css_task_iter {
825 struct list_head *cset_link; 860 struct cgroup_subsys_state *origin_css;
826 struct list_head *task; 861 struct list_head *cset_link;
862 struct list_head *task;
827}; 863};
828 864
829/* 865void css_task_iter_start(struct cgroup_subsys_state *css,
830 * To iterate across the tasks in a cgroup: 866 struct css_task_iter *it);
831 * 867struct task_struct *css_task_iter_next(struct css_task_iter *it);
832 * 1) call cgroup_iter_start to initialize an iterator 868void css_task_iter_end(struct css_task_iter *it);
833 * 869
834 * 2) call cgroup_iter_next() to retrieve member tasks until it 870int css_scan_tasks(struct cgroup_subsys_state *css,
835 * returns NULL or until you want to end the iteration 871 bool (*test)(struct task_struct *, void *),
836 * 872 void (*process)(struct task_struct *, void *),
837 * 3) call cgroup_iter_end() to destroy the iterator. 873 void *data, struct ptr_heap *heap);
838 * 874
839 * Or, call cgroup_scan_tasks() to iterate through every task in a
840 * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling
841 * the test_task() callback, but not while calling the process_task()
842 * callback.
843 */
844void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
845struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
846 struct cgroup_iter *it);
847void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
848int cgroup_scan_tasks(struct cgroup_scanner *scan);
849int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 875int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
850int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 876int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
851 877
@@ -878,7 +904,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
878 904
879/* Get id and depth of css */ 905/* Get id and depth of css */
880unsigned short css_id(struct cgroup_subsys_state *css); 906unsigned short css_id(struct cgroup_subsys_state *css);
881struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); 907struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
908 struct cgroup_subsys *ss);
882 909
883#else /* !CONFIG_CGROUPS */ 910#else /* !CONFIG_CGROUPS */
884 911
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7b4d9d79570b..6c416092e324 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
85extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); 85extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
86 86
87extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); 87extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
88extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); 88extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
89 89
90static inline 90static inline
91bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) 91bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 7dc17e2456de..3f3788d49362 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -34,10 +34,12 @@ extern void vmpressure_cleanup(struct vmpressure *vmpr);
34extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); 34extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
35extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); 35extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
36extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); 36extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
37extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, 37extern int vmpressure_register_event(struct cgroup_subsys_state *css,
38 struct cftype *cft,
38 struct eventfd_ctx *eventfd, 39 struct eventfd_ctx *eventfd,
39 const char *args); 40 const char *args);
40extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, 41extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
42 struct cftype *cft,
41 struct eventfd_ctx *eventfd); 43 struct eventfd_ctx *eventfd);
42#else 44#else
43static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, 45static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,