aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cgroup.h303
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/vmpressure.h6
3 files changed, 170 insertions, 141 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e9ac882868c0..3561d305b1e0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -66,22 +66,25 @@ enum cgroup_subsys_id {
66 66
67/* Per-subsystem/per-cgroup state maintained by the system. */ 67/* Per-subsystem/per-cgroup state maintained by the system. */
68struct cgroup_subsys_state { 68struct cgroup_subsys_state {
69 /* 69 /* the cgroup that this css is attached to */
70 * The cgroup that this subsystem is attached to. Useful
71 * for subsystems that want to know about the cgroup
72 * hierarchy structure
73 */
74 struct cgroup *cgroup; 70 struct cgroup *cgroup;
75 71
72 /* the cgroup subsystem that this css is attached to */
73 struct cgroup_subsys *ss;
74
76 /* reference count - access via css_[try]get() and css_put() */ 75 /* reference count - access via css_[try]get() and css_put() */
77 struct percpu_ref refcnt; 76 struct percpu_ref refcnt;
78 77
78 /* the parent css */
79 struct cgroup_subsys_state *parent;
80
79 unsigned long flags; 81 unsigned long flags;
80 /* ID for this css, if possible */ 82 /* ID for this css, if possible */
81 struct css_id __rcu *id; 83 struct css_id __rcu *id;
82 84
83 /* Used to put @cgroup->dentry on the last css_put() */ 85 /* percpu_ref killing and RCU release */
84 struct work_struct dput_work; 86 struct rcu_head rcu_head;
87 struct work_struct destroy_work;
85}; 88};
86 89
87/* bits in struct cgroup_subsys_state flags field */ 90/* bits in struct cgroup_subsys_state flags field */
@@ -161,7 +164,16 @@ struct cgroup_name {
161struct cgroup { 164struct cgroup {
162 unsigned long flags; /* "unsigned long" so bitops work */ 165 unsigned long flags; /* "unsigned long" so bitops work */
163 166
164 int id; /* ida allocated in-hierarchy ID */ 167 /*
168 * idr allocated in-hierarchy ID.
169 *
170 * The ID of the root cgroup is always 0, and a new cgroup
171 * will be assigned with a smallest available ID.
172 */
173 int id;
174
175 /* the number of attached css's */
176 int nr_css;
165 177
166 /* 178 /*
167 * We link our 'sibling' struct into our parent's 'children'. 179 * We link our 'sibling' struct into our parent's 'children'.
@@ -196,7 +208,7 @@ struct cgroup {
196 struct cgroup_name __rcu *name; 208 struct cgroup_name __rcu *name;
197 209
198 /* Private pointers for each registered subsystem */ 210 /* Private pointers for each registered subsystem */
199 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 211 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
200 212
201 struct cgroupfs_root *root; 213 struct cgroupfs_root *root;
202 214
@@ -220,10 +232,12 @@ struct cgroup {
220 struct list_head pidlists; 232 struct list_head pidlists;
221 struct mutex pidlist_mutex; 233 struct mutex pidlist_mutex;
222 234
235 /* dummy css with NULL ->ss, points back to this cgroup */
236 struct cgroup_subsys_state dummy_css;
237
223 /* For css percpu_ref killing and RCU-protected deletion */ 238 /* For css percpu_ref killing and RCU-protected deletion */
224 struct rcu_head rcu_head; 239 struct rcu_head rcu_head;
225 struct work_struct destroy_work; 240 struct work_struct destroy_work;
226 atomic_t css_kill_cnt;
227 241
228 /* List of events which userspace want to receive */ 242 /* List of events which userspace want to receive */
229 struct list_head event_list; 243 struct list_head event_list;
@@ -322,7 +336,7 @@ struct cgroupfs_root {
322 unsigned long flags; 336 unsigned long flags;
323 337
324 /* IDs for cgroups in this hierarchy */ 338 /* IDs for cgroups in this hierarchy */
325 struct ida cgroup_ida; 339 struct idr cgroup_idr;
326 340
327 /* The path to use for release notifications. */ 341 /* The path to use for release notifications. */
328 char release_agent_path[PATH_MAX]; 342 char release_agent_path[PATH_MAX];
@@ -394,9 +408,10 @@ struct cgroup_map_cb {
394 408
395/* cftype->flags */ 409/* cftype->flags */
396enum { 410enum {
397 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */ 411 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
398 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */ 412 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
399 CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ 413 CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
414 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
400}; 415};
401 416
402#define MAX_CFTYPE_NAME 64 417#define MAX_CFTYPE_NAME 64
@@ -424,35 +439,41 @@ struct cftype {
424 /* CFTYPE_* flags */ 439 /* CFTYPE_* flags */
425 unsigned int flags; 440 unsigned int flags;
426 441
442 /*
443 * The subsys this file belongs to. Initialized automatically
444 * during registration. NULL for cgroup core files.
445 */
446 struct cgroup_subsys *ss;
447
427 int (*open)(struct inode *inode, struct file *file); 448 int (*open)(struct inode *inode, struct file *file);
428 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 449 ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
429 struct file *file, 450 struct file *file,
430 char __user *buf, size_t nbytes, loff_t *ppos); 451 char __user *buf, size_t nbytes, loff_t *ppos);
431 /* 452 /*
432 * read_u64() is a shortcut for the common case of returning a 453 * read_u64() is a shortcut for the common case of returning a
433 * single integer. Use it in place of read() 454 * single integer. Use it in place of read()
434 */ 455 */
435 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); 456 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft);
436 /* 457 /*
437 * read_s64() is a signed version of read_u64() 458 * read_s64() is a signed version of read_u64()
438 */ 459 */
439 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); 460 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
440 /* 461 /*
441 * read_map() is used for defining a map of key/value 462 * read_map() is used for defining a map of key/value
442 * pairs. It should call cb->fill(cb, key, value) for each 463 * pairs. It should call cb->fill(cb, key, value) for each
443 * entry. The key/value pairs (and their ordering) should not 464 * entry. The key/value pairs (and their ordering) should not
444 * change between reboots. 465 * change between reboots.
445 */ 466 */
446 int (*read_map)(struct cgroup *cgrp, struct cftype *cft, 467 int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
447 struct cgroup_map_cb *cb); 468 struct cgroup_map_cb *cb);
448 /* 469 /*
449 * read_seq_string() is used for outputting a simple sequence 470 * read_seq_string() is used for outputting a simple sequence
450 * using seqfile. 471 * using seqfile.
451 */ 472 */
452 int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft, 473 int (*read_seq_string)(struct cgroup_subsys_state *css,
453 struct seq_file *m); 474 struct cftype *cft, struct seq_file *m);
454 475
455 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, 476 ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
456 struct file *file, 477 struct file *file,
457 const char __user *buf, size_t nbytes, loff_t *ppos); 478 const char __user *buf, size_t nbytes, loff_t *ppos);
458 479
@@ -461,18 +482,20 @@ struct cftype {
461 * a single integer (as parsed by simple_strtoull) from 482 * a single integer (as parsed by simple_strtoull) from
462 * userspace. Use in place of write(); return 0 or error. 483 * userspace. Use in place of write(); return 0 or error.
463 */ 484 */
464 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); 485 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft,
486 u64 val);
465 /* 487 /*
466 * write_s64() is a signed version of write_u64() 488 * write_s64() is a signed version of write_u64()
467 */ 489 */
468 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); 490 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
491 s64 val);
469 492
470 /* 493 /*
471 * write_string() is passed a nul-terminated kernelspace 494 * write_string() is passed a nul-terminated kernelspace
472 * buffer of maximum length determined by max_write_len. 495 * buffer of maximum length determined by max_write_len.
473 * Returns 0 or -ve error code. 496 * Returns 0 or -ve error code.
474 */ 497 */
475 int (*write_string)(struct cgroup *cgrp, struct cftype *cft, 498 int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
476 const char *buffer); 499 const char *buffer);
477 /* 500 /*
478 * trigger() callback can be used to get some kick from the 501 * trigger() callback can be used to get some kick from the
@@ -480,7 +503,7 @@ struct cftype {
480 * at all. The private field can be used to determine the 503 * at all. The private field can be used to determine the
481 * kick type for multiplexing. 504 * kick type for multiplexing.
482 */ 505 */
483 int (*trigger)(struct cgroup *cgrp, unsigned int event); 506 int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
484 507
485 int (*release)(struct inode *inode, struct file *file); 508 int (*release)(struct inode *inode, struct file *file);
486 509
@@ -490,16 +513,18 @@ struct cftype {
490 * you want to provide this functionality. Use eventfd_signal() 513 * you want to provide this functionality. Use eventfd_signal()
491 * on eventfd to send notification to userspace. 514 * on eventfd to send notification to userspace.
492 */ 515 */
493 int (*register_event)(struct cgroup *cgrp, struct cftype *cft, 516 int (*register_event)(struct cgroup_subsys_state *css,
494 struct eventfd_ctx *eventfd, const char *args); 517 struct cftype *cft, struct eventfd_ctx *eventfd,
518 const char *args);
495 /* 519 /*
496 * unregister_event() callback will be called when userspace 520 * unregister_event() callback will be called when userspace
497 * closes the eventfd or on cgroup removing. 521 * closes the eventfd or on cgroup removing.
498 * This callback must be implemented, if you want provide 522 * This callback must be implemented, if you want provide
499 * notification functionality. 523 * notification functionality.
500 */ 524 */
501 void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, 525 void (*unregister_event)(struct cgroup_subsys_state *css,
502 struct eventfd_ctx *eventfd); 526 struct cftype *cft,
527 struct eventfd_ctx *eventfd);
503}; 528};
504 529
505/* 530/*
@@ -512,15 +537,6 @@ struct cftype_set {
512 struct cftype *cfts; 537 struct cftype *cfts;
513}; 538};
514 539
515struct cgroup_scanner {
516 struct cgroup *cg;
517 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
518 void (*process_task)(struct task_struct *p,
519 struct cgroup_scanner *scan);
520 struct ptr_heap *heap;
521 void *data;
522};
523
524/* 540/*
525 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This 541 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
526 * function can be called as long as @cgrp is accessible. 542 * function can be called as long as @cgrp is accessible.
@@ -537,7 +553,7 @@ static inline const char *cgroup_name(const struct cgroup *cgrp)
537} 553}
538 554
539int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 555int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
540int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 556int cgroup_rm_cftypes(struct cftype *cfts);
541 557
542bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 558bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
543 559
@@ -553,20 +569,22 @@ int cgroup_task_count(const struct cgroup *cgrp);
553struct cgroup_taskset; 569struct cgroup_taskset;
554struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 570struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
555struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 571struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
556struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); 572struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
573 int subsys_id);
557int cgroup_taskset_size(struct cgroup_taskset *tset); 574int cgroup_taskset_size(struct cgroup_taskset *tset);
558 575
559/** 576/**
560 * cgroup_taskset_for_each - iterate cgroup_taskset 577 * cgroup_taskset_for_each - iterate cgroup_taskset
561 * @task: the loop cursor 578 * @task: the loop cursor
562 * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all 579 * @skip_css: skip if task's css matches this, %NULL to iterate through all
563 * @tset: taskset to iterate 580 * @tset: taskset to iterate
564 */ 581 */
565#define cgroup_taskset_for_each(task, skip_cgrp, tset) \ 582#define cgroup_taskset_for_each(task, skip_css, tset) \
566 for ((task) = cgroup_taskset_first((tset)); (task); \ 583 for ((task) = cgroup_taskset_first((tset)); (task); \
567 (task) = cgroup_taskset_next((tset))) \ 584 (task) = cgroup_taskset_next((tset))) \
568 if (!(skip_cgrp) || \ 585 if (!(skip_css) || \
569 cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) 586 cgroup_taskset_cur_css((tset), \
587 (skip_css)->ss->subsys_id) != (skip_css))
570 588
571/* 589/*
572 * Control Group subsystem type. 590 * Control Group subsystem type.
@@ -574,18 +592,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);
574 */ 592 */
575 593
576struct cgroup_subsys { 594struct cgroup_subsys {
577 struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); 595 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
578 int (*css_online)(struct cgroup *cgrp); 596 int (*css_online)(struct cgroup_subsys_state *css);
579 void (*css_offline)(struct cgroup *cgrp); 597 void (*css_offline)(struct cgroup_subsys_state *css);
580 void (*css_free)(struct cgroup *cgrp); 598 void (*css_free)(struct cgroup_subsys_state *css);
581 599
582 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 600 int (*can_attach)(struct cgroup_subsys_state *css,
583 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 601 struct cgroup_taskset *tset);
584 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 602 void (*cancel_attach)(struct cgroup_subsys_state *css,
603 struct cgroup_taskset *tset);
604 void (*attach)(struct cgroup_subsys_state *css,
605 struct cgroup_taskset *tset);
585 void (*fork)(struct task_struct *task); 606 void (*fork)(struct task_struct *task);
586 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 607 void (*exit)(struct cgroup_subsys_state *css,
608 struct cgroup_subsys_state *old_css,
587 struct task_struct *task); 609 struct task_struct *task);
588 void (*bind)(struct cgroup *root); 610 void (*bind)(struct cgroup_subsys_state *root_css);
589 611
590 int subsys_id; 612 int subsys_id;
591 int disabled; 613 int disabled;
@@ -641,10 +663,17 @@ struct cgroup_subsys {
641#undef IS_SUBSYS_ENABLED 663#undef IS_SUBSYS_ENABLED
642#undef SUBSYS 664#undef SUBSYS
643 665
644static inline struct cgroup_subsys_state *cgroup_subsys_state( 666/**
645 struct cgroup *cgrp, int subsys_id) 667 * css_parent - find the parent css
668 * @css: the target cgroup_subsys_state
669 *
670 * Return the parent css of @css. This function is guaranteed to return
671 * non-NULL parent as long as @css isn't the root.
672 */
673static inline
674struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
646{ 675{
647 return cgrp->subsys[subsys_id]; 676 return css->parent;
648} 677}
649 678
650/** 679/**
@@ -672,7 +701,7 @@ extern struct mutex cgroup_mutex;
672#endif 701#endif
673 702
674/** 703/**
675 * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds 704 * task_css_check - obtain css for (task, subsys) w/ extra access conds
676 * @task: the target task 705 * @task: the target task
677 * @subsys_id: the target subsystem ID 706 * @subsys_id: the target subsystem ID
678 * @__c: extra condition expression to be passed to rcu_dereference_check() 707 * @__c: extra condition expression to be passed to rcu_dereference_check()
@@ -680,7 +709,7 @@ extern struct mutex cgroup_mutex;
680 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 709 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
681 * synchronization rules are the same as task_css_set_check(). 710 * synchronization rules are the same as task_css_set_check().
682 */ 711 */
683#define task_subsys_state_check(task, subsys_id, __c) \ 712#define task_css_check(task, subsys_id, __c) \
684 task_css_set_check((task), (__c))->subsys[(subsys_id)] 713 task_css_set_check((task), (__c))->subsys[(subsys_id)]
685 714
686/** 715/**
@@ -695,87 +724,92 @@ static inline struct css_set *task_css_set(struct task_struct *task)
695} 724}
696 725
697/** 726/**
698 * task_subsys_state - obtain css for (task, subsys) 727 * task_css - obtain css for (task, subsys)
699 * @task: the target task 728 * @task: the target task
700 * @subsys_id: the target subsystem ID 729 * @subsys_id: the target subsystem ID
701 * 730 *
702 * See task_subsys_state_check(). 731 * See task_css_check().
703 */ 732 */
704static inline struct cgroup_subsys_state * 733static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
705task_subsys_state(struct task_struct *task, int subsys_id) 734 int subsys_id)
706{ 735{
707 return task_subsys_state_check(task, subsys_id, false); 736 return task_css_check(task, subsys_id, false);
708} 737}
709 738
710static inline struct cgroup* task_cgroup(struct task_struct *task, 739static inline struct cgroup *task_cgroup(struct task_struct *task,
711 int subsys_id) 740 int subsys_id)
712{ 741{
713 return task_subsys_state(task, subsys_id)->cgroup; 742 return task_css(task, subsys_id)->cgroup;
714} 743}
715 744
716struct cgroup *cgroup_next_sibling(struct cgroup *pos); 745struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
746 struct cgroup_subsys_state *parent);
747
748struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
717 749
718/** 750/**
719 * cgroup_for_each_child - iterate through children of a cgroup 751 * css_for_each_child - iterate through children of a css
720 * @pos: the cgroup * to use as the loop cursor 752 * @pos: the css * to use as the loop cursor
721 * @cgrp: cgroup whose children to walk 753 * @parent: css whose children to walk
722 * 754 *
723 * Walk @cgrp's children. Must be called under rcu_read_lock(). A child 755 * Walk @parent's children. Must be called under rcu_read_lock(). A child
724 * cgroup which hasn't finished ->css_online() or already has finished 756 * css which hasn't finished ->css_online() or already has finished
725 * ->css_offline() may show up during traversal and it's each subsystem's 757 * ->css_offline() may show up during traversal and it's each subsystem's
726 * responsibility to verify that each @pos is alive. 758 * responsibility to verify that each @pos is alive.
727 * 759 *
728 * If a subsystem synchronizes against the parent in its ->css_online() and 760 * If a subsystem synchronizes against the parent in its ->css_online() and
729 * before starting iterating, a cgroup which finished ->css_online() is 761 * before starting iterating, a css which finished ->css_online() is
730 * guaranteed to be visible in the future iterations. 762 * guaranteed to be visible in the future iterations.
731 * 763 *
732 * It is allowed to temporarily drop RCU read lock during iteration. The 764 * It is allowed to temporarily drop RCU read lock during iteration. The
733 * caller is responsible for ensuring that @pos remains accessible until 765 * caller is responsible for ensuring that @pos remains accessible until
734 * the start of the next iteration by, for example, bumping the css refcnt. 766 * the start of the next iteration by, for example, bumping the css refcnt.
735 */ 767 */
736#define cgroup_for_each_child(pos, cgrp) \ 768#define css_for_each_child(pos, parent) \
737 for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ 769 for ((pos) = css_next_child(NULL, (parent)); (pos); \
738 struct cgroup, sibling); \ 770 (pos) = css_next_child((pos), (parent)))
739 (pos); (pos) = cgroup_next_sibling((pos))) 771
772struct cgroup_subsys_state *
773css_next_descendant_pre(struct cgroup_subsys_state *pos,
774 struct cgroup_subsys_state *css);
740 775
741struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, 776struct cgroup_subsys_state *
742 struct cgroup *cgroup); 777css_rightmost_descendant(struct cgroup_subsys_state *pos);
743struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
744 778
745/** 779/**
746 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants 780 * css_for_each_descendant_pre - pre-order walk of a css's descendants
747 * @pos: the cgroup * to use as the loop cursor 781 * @pos: the css * to use as the loop cursor
748 * @cgroup: cgroup whose descendants to walk 782 * @root: css whose descendants to walk
749 * 783 *
750 * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A 784 * Walk @root's descendants. @root is included in the iteration and the
751 * descendant cgroup which hasn't finished ->css_online() or already has 785 * first node to be visited. Must be called under rcu_read_lock(). A
786 * descendant css which hasn't finished ->css_online() or already has
752 * finished ->css_offline() may show up during traversal and it's each 787 * finished ->css_offline() may show up during traversal and it's each
753 * subsystem's responsibility to verify that each @pos is alive. 788 * subsystem's responsibility to verify that each @pos is alive.
754 * 789 *
755 * If a subsystem synchronizes against the parent in its ->css_online() and 790 * If a subsystem synchronizes against the parent in its ->css_online() and
756 * before starting iterating, and synchronizes against @pos on each 791 * before starting iterating, and synchronizes against @pos on each
757 * iteration, any descendant cgroup which finished ->css_online() is 792 * iteration, any descendant css which finished ->css_online() is
758 * guaranteed to be visible in the future iterations. 793 * guaranteed to be visible in the future iterations.
759 * 794 *
760 * In other words, the following guarantees that a descendant can't escape 795 * In other words, the following guarantees that a descendant can't escape
761 * state updates of its ancestors. 796 * state updates of its ancestors.
762 * 797 *
763 * my_online(@cgrp) 798 * my_online(@css)
764 * { 799 * {
765 * Lock @cgrp->parent and @cgrp; 800 * Lock @css's parent and @css;
766 * Inherit state from @cgrp->parent; 801 * Inherit state from the parent;
767 * Unlock both. 802 * Unlock both.
768 * } 803 * }
769 * 804 *
770 * my_update_state(@cgrp) 805 * my_update_state(@css)
771 * { 806 * {
772 * Lock @cgrp; 807 * css_for_each_descendant_pre(@pos, @css) {
773 * Update @cgrp's state;
774 * Unlock @cgrp;
775 *
776 * cgroup_for_each_descendant_pre(@pos, @cgrp) {
777 * Lock @pos; 808 * Lock @pos;
778 * Verify @pos is alive and inherit state from @pos->parent; 809 * if (@pos == @css)
810 * Update @css's state;
811 * else
812 * Verify @pos is alive and inherit state from its parent;
779 * Unlock @pos; 813 * Unlock @pos;
780 * } 814 * }
781 * } 815 * }
@@ -786,8 +820,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
786 * visible by walking order and, as long as inheriting operations to the 820 * visible by walking order and, as long as inheriting operations to the
787 * same @pos are atomic to each other, multiple updates racing each other 821 * same @pos are atomic to each other, multiple updates racing each other
788 * still result in the correct state. It's guaranateed that at least one 822 * still result in the correct state. It's guaranateed that at least one
789 * inheritance happens for any cgroup after the latest update to its 823 * inheritance happens for any css after the latest update to its parent.
790 * parent.
791 * 824 *
792 * If checking parent's state requires locking the parent, each inheriting 825 * If checking parent's state requires locking the parent, each inheriting
793 * iteration should lock and unlock both @pos->parent and @pos. 826 * iteration should lock and unlock both @pos->parent and @pos.
@@ -800,52 +833,45 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
800 * caller is responsible for ensuring that @pos remains accessible until 833 * caller is responsible for ensuring that @pos remains accessible until
801 * the start of the next iteration by, for example, bumping the css refcnt. 834 * the start of the next iteration by, for example, bumping the css refcnt.
802 */ 835 */
803#define cgroup_for_each_descendant_pre(pos, cgroup) \ 836#define css_for_each_descendant_pre(pos, css) \
804 for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ 837 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
805 pos = cgroup_next_descendant_pre((pos), (cgroup))) 838 (pos) = css_next_descendant_pre((pos), (css)))
806 839
807struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, 840struct cgroup_subsys_state *
808 struct cgroup *cgroup); 841css_next_descendant_post(struct cgroup_subsys_state *pos,
842 struct cgroup_subsys_state *css);
809 843
810/** 844/**
811 * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants 845 * css_for_each_descendant_post - post-order walk of a css's descendants
812 * @pos: the cgroup * to use as the loop cursor 846 * @pos: the css * to use as the loop cursor
813 * @cgroup: cgroup whose descendants to walk 847 * @css: css whose descendants to walk
814 * 848 *
815 * Similar to cgroup_for_each_descendant_pre() but performs post-order 849 * Similar to css_for_each_descendant_pre() but performs post-order
816 * traversal instead. Note that the walk visibility guarantee described in 850 * traversal instead. @root is included in the iteration and the last
817 * pre-order walk doesn't apply the same to post-order walks. 851 * node to be visited. Note that the walk visibility guarantee described
852 * in pre-order walk doesn't apply the same to post-order walks.
818 */ 853 */
819#define cgroup_for_each_descendant_post(pos, cgroup) \ 854#define css_for_each_descendant_post(pos, css) \
820 for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ 855 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
821 pos = cgroup_next_descendant_post((pos), (cgroup))) 856 (pos) = css_next_descendant_post((pos), (css)))
822 857
823/* A cgroup_iter should be treated as an opaque object */ 858/* A css_task_iter should be treated as an opaque object */
824struct cgroup_iter { 859struct css_task_iter {
825 struct list_head *cset_link; 860 struct cgroup_subsys_state *origin_css;
826 struct list_head *task; 861 struct list_head *cset_link;
862 struct list_head *task;
827}; 863};
828 864
829/* 865void css_task_iter_start(struct cgroup_subsys_state *css,
830 * To iterate across the tasks in a cgroup: 866 struct css_task_iter *it);
831 * 867struct task_struct *css_task_iter_next(struct css_task_iter *it);
832 * 1) call cgroup_iter_start to initialize an iterator 868void css_task_iter_end(struct css_task_iter *it);
833 * 869
834 * 2) call cgroup_iter_next() to retrieve member tasks until it 870int css_scan_tasks(struct cgroup_subsys_state *css,
835 * returns NULL or until you want to end the iteration 871 bool (*test)(struct task_struct *, void *),
836 * 872 void (*process)(struct task_struct *, void *),
837 * 3) call cgroup_iter_end() to destroy the iterator. 873 void *data, struct ptr_heap *heap);
838 * 874
839 * Or, call cgroup_scan_tasks() to iterate through every task in a
840 * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling
841 * the test_task() callback, but not while calling the process_task()
842 * callback.
843 */
844void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
845struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
846 struct cgroup_iter *it);
847void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
848int cgroup_scan_tasks(struct cgroup_scanner *scan);
849int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 875int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
850int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 876int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
851 877
@@ -878,7 +904,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
878 904
879/* Get id and depth of css */ 905/* Get id and depth of css */
880unsigned short css_id(struct cgroup_subsys_state *css); 906unsigned short css_id(struct cgroup_subsys_state *css);
881struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); 907struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
908 struct cgroup_subsys *ss);
882 909
883#else /* !CONFIG_CGROUPS */ 910#else /* !CONFIG_CGROUPS */
884 911
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7b4d9d79570b..6c416092e324 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
85extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); 85extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
86 86
87extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); 87extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
88extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); 88extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
89 89
90static inline 90static inline
91bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) 91bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 7dc17e2456de..3f3788d49362 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -34,10 +34,12 @@ extern void vmpressure_cleanup(struct vmpressure *vmpr);
34extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); 34extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
35extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); 35extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
36extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); 36extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
37extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, 37extern int vmpressure_register_event(struct cgroup_subsys_state *css,
38 struct cftype *cft,
38 struct eventfd_ctx *eventfd, 39 struct eventfd_ctx *eventfd,
39 const char *args); 40 const char *args);
40extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, 41extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
42 struct cftype *cft,
41 struct eventfd_ctx *eventfd); 43 struct eventfd_ctx *eventfd);
42#else 44#else
43static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, 45static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,