diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 1666 |
1 files changed, 931 insertions, 735 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e91963302c0d..2418b6e71a85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <linux/poll.h> | 60 | #include <linux/poll.h> |
61 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ | 61 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ |
62 | #include <linux/kthread.h> | 62 | #include <linux/kthread.h> |
63 | #include <linux/file.h> | ||
63 | 64 | ||
64 | #include <linux/atomic.h> | 65 | #include <linux/atomic.h> |
65 | 66 | ||
@@ -81,7 +82,7 @@ | |||
81 | */ | 82 | */ |
82 | #ifdef CONFIG_PROVE_RCU | 83 | #ifdef CONFIG_PROVE_RCU |
83 | DEFINE_MUTEX(cgroup_mutex); | 84 | DEFINE_MUTEX(cgroup_mutex); |
84 | EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */ | 85 | EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for lockdep */ |
85 | #else | 86 | #else |
86 | static DEFINE_MUTEX(cgroup_mutex); | 87 | static DEFINE_MUTEX(cgroup_mutex); |
87 | #endif | 88 | #endif |
@@ -117,6 +118,7 @@ struct cfent { | |||
117 | struct list_head node; | 118 | struct list_head node; |
118 | struct dentry *dentry; | 119 | struct dentry *dentry; |
119 | struct cftype *type; | 120 | struct cftype *type; |
121 | struct cgroup_subsys_state *css; | ||
120 | 122 | ||
121 | /* file xattrs */ | 123 | /* file xattrs */ |
122 | struct simple_xattrs xattrs; | 124 | struct simple_xattrs xattrs; |
@@ -159,9 +161,9 @@ struct css_id { | |||
159 | */ | 161 | */ |
160 | struct cgroup_event { | 162 | struct cgroup_event { |
161 | /* | 163 | /* |
162 | * Cgroup which the event belongs to. | 164 | * css which the event belongs to. |
163 | */ | 165 | */ |
164 | struct cgroup *cgrp; | 166 | struct cgroup_subsys_state *css; |
165 | /* | 167 | /* |
166 | * Control file which the event associated. | 168 | * Control file which the event associated. |
167 | */ | 169 | */ |
@@ -215,10 +217,33 @@ static u64 cgroup_serial_nr_next = 1; | |||
215 | */ | 217 | */ |
216 | static int need_forkexit_callback __read_mostly; | 218 | static int need_forkexit_callback __read_mostly; |
217 | 219 | ||
218 | static void cgroup_offline_fn(struct work_struct *work); | 220 | static struct cftype cgroup_base_files[]; |
221 | |||
222 | static void cgroup_destroy_css_killed(struct cgroup *cgrp); | ||
219 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 223 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
220 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 224 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
221 | struct cftype cfts[], bool is_add); | 225 | bool is_add); |
226 | |||
227 | /** | ||
228 | * cgroup_css - obtain a cgroup's css for the specified subsystem | ||
229 | * @cgrp: the cgroup of interest | ||
230 | * @ss: the subsystem of interest (%NULL returns the dummy_css) | ||
231 | * | ||
232 | * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This | ||
233 | * function must be called either under cgroup_mutex or rcu_read_lock() and | ||
234 | * the caller is responsible for pinning the returned css if it wants to | ||
235 | * keep accessing it outside the said locks. This function may return | ||
236 | * %NULL if @cgrp doesn't have @subsys_id enabled. | ||
237 | */ | ||
238 | static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, | ||
239 | struct cgroup_subsys *ss) | ||
240 | { | ||
241 | if (ss) | ||
242 | return rcu_dereference_check(cgrp->subsys[ss->subsys_id], | ||
243 | lockdep_is_held(&cgroup_mutex)); | ||
244 | else | ||
245 | return &cgrp->dummy_css; | ||
246 | } | ||
222 | 247 | ||
223 | /* convenient tests for these bits */ | 248 | /* convenient tests for these bits */ |
224 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) | 249 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) |
@@ -365,9 +390,11 @@ static struct cgrp_cset_link init_cgrp_cset_link; | |||
365 | static int cgroup_init_idr(struct cgroup_subsys *ss, | 390 | static int cgroup_init_idr(struct cgroup_subsys *ss, |
366 | struct cgroup_subsys_state *css); | 391 | struct cgroup_subsys_state *css); |
367 | 392 | ||
368 | /* css_set_lock protects the list of css_set objects, and the | 393 | /* |
369 | * chain of tasks off each css_set. Nests outside task->alloc_lock | 394 | * css_set_lock protects the list of css_set objects, and the chain of |
370 | * due to cgroup_iter_start() */ | 395 | * tasks off each css_set. Nests outside task->alloc_lock due to |
396 | * css_task_iter_start(). | ||
397 | */ | ||
371 | static DEFINE_RWLOCK(css_set_lock); | 398 | static DEFINE_RWLOCK(css_set_lock); |
372 | static int css_set_count; | 399 | static int css_set_count; |
373 | 400 | ||
@@ -392,10 +419,12 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) | |||
392 | return key; | 419 | return key; |
393 | } | 420 | } |
394 | 421 | ||
395 | /* We don't maintain the lists running through each css_set to its | 422 | /* |
396 | * task until after the first call to cgroup_iter_start(). This | 423 | * We don't maintain the lists running through each css_set to its task |
397 | * reduces the fork()/exit() overhead for people who have cgroups | 424 | * until after the first call to css_task_iter_start(). This reduces the |
398 | * compiled into their kernel but not actually in use */ | 425 | * fork()/exit() overhead for people who have cgroups compiled into their |
426 | * kernel but not actually in use. | ||
427 | */ | ||
399 | static int use_task_css_set_links __read_mostly; | 428 | static int use_task_css_set_links __read_mostly; |
400 | 429 | ||
401 | static void __put_css_set(struct css_set *cset, int taskexit) | 430 | static void __put_css_set(struct css_set *cset, int taskexit) |
@@ -464,7 +493,7 @@ static inline void put_css_set_taskexit(struct css_set *cset) | |||
464 | * @new_cgrp: cgroup that's being entered by the task | 493 | * @new_cgrp: cgroup that's being entered by the task |
465 | * @template: desired set of css pointers in css_set (pre-calculated) | 494 | * @template: desired set of css pointers in css_set (pre-calculated) |
466 | * | 495 | * |
467 | * Returns true if "cg" matches "old_cg" except for the hierarchy | 496 | * Returns true if "cset" matches "old_cset" except for the hierarchy |
468 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". | 497 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". |
469 | */ | 498 | */ |
470 | static bool compare_css_sets(struct css_set *cset, | 499 | static bool compare_css_sets(struct css_set *cset, |
@@ -555,7 +584,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, | |||
555 | /* Subsystem is in this hierarchy. So we want | 584 | /* Subsystem is in this hierarchy. So we want |
556 | * the subsystem state from the new | 585 | * the subsystem state from the new |
557 | * cgroup */ | 586 | * cgroup */ |
558 | template[i] = cgrp->subsys[i]; | 587 | template[i] = cgroup_css(cgrp, ss); |
559 | } else { | 588 | } else { |
560 | /* Subsystem is not in this hierarchy, so we | 589 | /* Subsystem is not in this hierarchy, so we |
561 | * don't want to change the subsystem state */ | 590 | * don't want to change the subsystem state */ |
@@ -803,8 +832,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
803 | 832 | ||
804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 833 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
805 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 834 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
806 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 835 | static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); |
807 | unsigned long subsys_mask); | ||
808 | static const struct inode_operations cgroup_dir_inode_operations; | 836 | static const struct inode_operations cgroup_dir_inode_operations; |
809 | static const struct file_operations proc_cgroupstats_operations; | 837 | static const struct file_operations proc_cgroupstats_operations; |
810 | 838 | ||
@@ -813,8 +841,7 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
813 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 841 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
814 | }; | 842 | }; |
815 | 843 | ||
816 | static int alloc_css_id(struct cgroup_subsys *ss, | 844 | static int alloc_css_id(struct cgroup_subsys_state *child_css); |
817 | struct cgroup *parent, struct cgroup *child); | ||
818 | 845 | ||
819 | static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | 846 | static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) |
820 | { | 847 | { |
@@ -845,15 +872,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) | |||
845 | static void cgroup_free_fn(struct work_struct *work) | 872 | static void cgroup_free_fn(struct work_struct *work) |
846 | { | 873 | { |
847 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | 874 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); |
848 | struct cgroup_subsys *ss; | ||
849 | 875 | ||
850 | mutex_lock(&cgroup_mutex); | 876 | mutex_lock(&cgroup_mutex); |
851 | /* | ||
852 | * Release the subsystem state objects. | ||
853 | */ | ||
854 | for_each_root_subsys(cgrp->root, ss) | ||
855 | ss->css_free(cgrp); | ||
856 | |||
857 | cgrp->root->number_of_cgroups--; | 877 | cgrp->root->number_of_cgroups--; |
858 | mutex_unlock(&cgroup_mutex); | 878 | mutex_unlock(&cgroup_mutex); |
859 | 879 | ||
@@ -864,8 +884,6 @@ static void cgroup_free_fn(struct work_struct *work) | |||
864 | */ | 884 | */ |
865 | dput(cgrp->parent->dentry); | 885 | dput(cgrp->parent->dentry); |
866 | 886 | ||
867 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | ||
868 | |||
869 | /* | 887 | /* |
870 | * Drop the active superblock reference that we took when we | 888 | * Drop the active superblock reference that we took when we |
871 | * created the cgroup. This will free cgrp->root, if we are | 889 | * created the cgroup. This will free cgrp->root, if we are |
@@ -956,27 +974,22 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
956 | } | 974 | } |
957 | 975 | ||
958 | /** | 976 | /** |
959 | * cgroup_clear_directory - selective removal of base and subsystem files | 977 | * cgroup_clear_dir - remove subsys files in a cgroup directory |
960 | * @dir: directory containing the files | 978 | * @cgrp: target cgroup |
961 | * @base_files: true if the base files should be removed | ||
962 | * @subsys_mask: mask of the subsystem ids whose files should be removed | 979 | * @subsys_mask: mask of the subsystem ids whose files should be removed |
963 | */ | 980 | */ |
964 | static void cgroup_clear_directory(struct dentry *dir, bool base_files, | 981 | static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) |
965 | unsigned long subsys_mask) | ||
966 | { | 982 | { |
967 | struct cgroup *cgrp = __d_cgrp(dir); | ||
968 | struct cgroup_subsys *ss; | 983 | struct cgroup_subsys *ss; |
984 | int i; | ||
969 | 985 | ||
970 | for_each_root_subsys(cgrp->root, ss) { | 986 | for_each_subsys(ss, i) { |
971 | struct cftype_set *set; | 987 | struct cftype_set *set; |
972 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 988 | |
989 | if (!test_bit(i, &subsys_mask)) | ||
973 | continue; | 990 | continue; |
974 | list_for_each_entry(set, &ss->cftsets, node) | 991 | list_for_each_entry(set, &ss->cftsets, node) |
975 | cgroup_addrm_files(cgrp, NULL, set->cfts, false); | 992 | cgroup_addrm_files(cgrp, set->cfts, false); |
976 | } | ||
977 | if (base_files) { | ||
978 | while (!list_empty(&cgrp->files)) | ||
979 | cgroup_rm_file(cgrp, NULL); | ||
980 | } | 993 | } |
981 | } | 994 | } |
982 | 995 | ||
@@ -986,9 +999,6 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files, | |||
986 | static void cgroup_d_remove_dir(struct dentry *dentry) | 999 | static void cgroup_d_remove_dir(struct dentry *dentry) |
987 | { | 1000 | { |
988 | struct dentry *parent; | 1001 | struct dentry *parent; |
989 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | ||
990 | |||
991 | cgroup_clear_directory(dentry, true, root->subsys_mask); | ||
992 | 1002 | ||
993 | parent = dentry->d_parent; | 1003 | parent = dentry->d_parent; |
994 | spin_lock(&parent->d_lock); | 1004 | spin_lock(&parent->d_lock); |
@@ -1009,79 +1019,84 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1009 | { | 1019 | { |
1010 | struct cgroup *cgrp = &root->top_cgroup; | 1020 | struct cgroup *cgrp = &root->top_cgroup; |
1011 | struct cgroup_subsys *ss; | 1021 | struct cgroup_subsys *ss; |
1012 | int i; | 1022 | unsigned long pinned = 0; |
1023 | int i, ret; | ||
1013 | 1024 | ||
1014 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 1025 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
1015 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | 1026 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); |
1016 | 1027 | ||
1017 | /* Check that any added subsystems are currently free */ | 1028 | /* Check that any added subsystems are currently free */ |
1018 | for_each_subsys(ss, i) { | 1029 | for_each_subsys(ss, i) { |
1019 | unsigned long bit = 1UL << i; | 1030 | if (!(added_mask & (1 << i))) |
1020 | |||
1021 | if (!(bit & added_mask)) | ||
1022 | continue; | 1031 | continue; |
1023 | 1032 | ||
1033 | /* is the subsystem mounted elsewhere? */ | ||
1024 | if (ss->root != &cgroup_dummy_root) { | 1034 | if (ss->root != &cgroup_dummy_root) { |
1025 | /* Subsystem isn't free */ | 1035 | ret = -EBUSY; |
1026 | return -EBUSY; | 1036 | goto out_put; |
1037 | } | ||
1038 | |||
1039 | /* pin the module */ | ||
1040 | if (!try_module_get(ss->module)) { | ||
1041 | ret = -ENOENT; | ||
1042 | goto out_put; | ||
1027 | } | 1043 | } |
1044 | pinned |= 1 << i; | ||
1028 | } | 1045 | } |
1029 | 1046 | ||
1030 | /* Currently we don't handle adding/removing subsystems when | 1047 | /* subsys could be missing if unloaded between parsing and here */ |
1031 | * any child cgroups exist. This is theoretically supportable | 1048 | if (added_mask != pinned) { |
1032 | * but involves complex error handling, so it's being left until | 1049 | ret = -ENOENT; |
1033 | * later */ | 1050 | goto out_put; |
1034 | if (root->number_of_cgroups > 1) | 1051 | } |
1035 | return -EBUSY; | 1052 | |
1053 | ret = cgroup_populate_dir(cgrp, added_mask); | ||
1054 | if (ret) | ||
1055 | goto out_put; | ||
1056 | |||
1057 | /* | ||
1058 | * Nothing can fail from this point on. Remove files for the | ||
1059 | * removed subsystems and rebind each subsystem. | ||
1060 | */ | ||
1061 | cgroup_clear_dir(cgrp, removed_mask); | ||
1036 | 1062 | ||
1037 | /* Process each subsystem */ | ||
1038 | for_each_subsys(ss, i) { | 1063 | for_each_subsys(ss, i) { |
1039 | unsigned long bit = 1UL << i; | 1064 | unsigned long bit = 1UL << i; |
1040 | 1065 | ||
1041 | if (bit & added_mask) { | 1066 | if (bit & added_mask) { |
1042 | /* We're binding this subsystem to this hierarchy */ | 1067 | /* We're binding this subsystem to this hierarchy */ |
1043 | BUG_ON(cgrp->subsys[i]); | 1068 | BUG_ON(cgroup_css(cgrp, ss)); |
1044 | BUG_ON(!cgroup_dummy_top->subsys[i]); | 1069 | BUG_ON(!cgroup_css(cgroup_dummy_top, ss)); |
1045 | BUG_ON(cgroup_dummy_top->subsys[i]->cgroup != cgroup_dummy_top); | 1070 | BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top); |
1071 | |||
1072 | rcu_assign_pointer(cgrp->subsys[i], | ||
1073 | cgroup_css(cgroup_dummy_top, ss)); | ||
1074 | cgroup_css(cgrp, ss)->cgroup = cgrp; | ||
1046 | 1075 | ||
1047 | cgrp->subsys[i] = cgroup_dummy_top->subsys[i]; | ||
1048 | cgrp->subsys[i]->cgroup = cgrp; | ||
1049 | list_move(&ss->sibling, &root->subsys_list); | 1076 | list_move(&ss->sibling, &root->subsys_list); |
1050 | ss->root = root; | 1077 | ss->root = root; |
1051 | if (ss->bind) | 1078 | if (ss->bind) |
1052 | ss->bind(cgrp); | 1079 | ss->bind(cgroup_css(cgrp, ss)); |
1053 | 1080 | ||
1054 | /* refcount was already taken, and we're keeping it */ | 1081 | /* refcount was already taken, and we're keeping it */ |
1055 | root->subsys_mask |= bit; | 1082 | root->subsys_mask |= bit; |
1056 | } else if (bit & removed_mask) { | 1083 | } else if (bit & removed_mask) { |
1057 | /* We're removing this subsystem */ | 1084 | /* We're removing this subsystem */ |
1058 | BUG_ON(cgrp->subsys[i] != cgroup_dummy_top->subsys[i]); | 1085 | BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss)); |
1059 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 1086 | BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp); |
1060 | 1087 | ||
1061 | if (ss->bind) | 1088 | if (ss->bind) |
1062 | ss->bind(cgroup_dummy_top); | 1089 | ss->bind(cgroup_css(cgroup_dummy_top, ss)); |
1063 | cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top; | 1090 | |
1064 | cgrp->subsys[i] = NULL; | 1091 | cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top; |
1092 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); | ||
1093 | |||
1065 | cgroup_subsys[i]->root = &cgroup_dummy_root; | 1094 | cgroup_subsys[i]->root = &cgroup_dummy_root; |
1066 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); | 1095 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); |
1067 | 1096 | ||
1068 | /* subsystem is now free - drop reference on module */ | 1097 | /* subsystem is now free - drop reference on module */ |
1069 | module_put(ss->module); | 1098 | module_put(ss->module); |
1070 | root->subsys_mask &= ~bit; | 1099 | root->subsys_mask &= ~bit; |
1071 | } else if (bit & root->subsys_mask) { | ||
1072 | /* Subsystem state should already exist */ | ||
1073 | BUG_ON(!cgrp->subsys[i]); | ||
1074 | /* | ||
1075 | * a refcount was taken, but we already had one, so | ||
1076 | * drop the extra reference. | ||
1077 | */ | ||
1078 | module_put(ss->module); | ||
1079 | #ifdef CONFIG_MODULE_UNLOAD | ||
1080 | BUG_ON(ss->module && !module_refcount(ss->module)); | ||
1081 | #endif | ||
1082 | } else { | ||
1083 | /* Subsystem state shouldn't exist */ | ||
1084 | BUG_ON(cgrp->subsys[i]); | ||
1085 | } | 1100 | } |
1086 | } | 1101 | } |
1087 | 1102 | ||
@@ -1092,6 +1107,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1092 | root->flags |= CGRP_ROOT_SUBSYS_BOUND; | 1107 | root->flags |= CGRP_ROOT_SUBSYS_BOUND; |
1093 | 1108 | ||
1094 | return 0; | 1109 | return 0; |
1110 | |||
1111 | out_put: | ||
1112 | for_each_subsys(ss, i) | ||
1113 | if (pinned & (1 << i)) | ||
1114 | module_put(ss->module); | ||
1115 | return ret; | ||
1095 | } | 1116 | } |
1096 | 1117 | ||
1097 | static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | 1118 | static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) |
@@ -1142,7 +1163,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1142 | char *token, *o = data; | 1163 | char *token, *o = data; |
1143 | bool all_ss = false, one_ss = false; | 1164 | bool all_ss = false, one_ss = false; |
1144 | unsigned long mask = (unsigned long)-1; | 1165 | unsigned long mask = (unsigned long)-1; |
1145 | bool module_pin_failed = false; | ||
1146 | struct cgroup_subsys *ss; | 1166 | struct cgroup_subsys *ss; |
1147 | int i; | 1167 | int i; |
1148 | 1168 | ||
@@ -1285,52 +1305,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1285 | if (!opts->subsys_mask && !opts->name) | 1305 | if (!opts->subsys_mask && !opts->name) |
1286 | return -EINVAL; | 1306 | return -EINVAL; |
1287 | 1307 | ||
1288 | /* | ||
1289 | * Grab references on all the modules we'll need, so the subsystems | ||
1290 | * don't dance around before rebind_subsystems attaches them. This may | ||
1291 | * take duplicate reference counts on a subsystem that's already used, | ||
1292 | * but rebind_subsystems handles this case. | ||
1293 | */ | ||
1294 | for_each_subsys(ss, i) { | ||
1295 | if (!(opts->subsys_mask & (1UL << i))) | ||
1296 | continue; | ||
1297 | if (!try_module_get(cgroup_subsys[i]->module)) { | ||
1298 | module_pin_failed = true; | ||
1299 | break; | ||
1300 | } | ||
1301 | } | ||
1302 | if (module_pin_failed) { | ||
1303 | /* | ||
1304 | * oops, one of the modules was going away. this means that we | ||
1305 | * raced with a module_delete call, and to the user this is | ||
1306 | * essentially a "subsystem doesn't exist" case. | ||
1307 | */ | ||
1308 | for (i--; i >= 0; i--) { | ||
1309 | /* drop refcounts only on the ones we took */ | ||
1310 | unsigned long bit = 1UL << i; | ||
1311 | |||
1312 | if (!(bit & opts->subsys_mask)) | ||
1313 | continue; | ||
1314 | module_put(cgroup_subsys[i]->module); | ||
1315 | } | ||
1316 | return -ENOENT; | ||
1317 | } | ||
1318 | |||
1319 | return 0; | 1308 | return 0; |
1320 | } | 1309 | } |
1321 | 1310 | ||
1322 | static void drop_parsed_module_refcounts(unsigned long subsys_mask) | ||
1323 | { | ||
1324 | struct cgroup_subsys *ss; | ||
1325 | int i; | ||
1326 | |||
1327 | mutex_lock(&cgroup_mutex); | ||
1328 | for_each_subsys(ss, i) | ||
1329 | if (subsys_mask & (1UL << i)) | ||
1330 | module_put(cgroup_subsys[i]->module); | ||
1331 | mutex_unlock(&cgroup_mutex); | ||
1332 | } | ||
1333 | |||
1334 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) | 1311 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) |
1335 | { | 1312 | { |
1336 | int ret = 0; | 1313 | int ret = 0; |
@@ -1370,22 +1347,15 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1370 | goto out_unlock; | 1347 | goto out_unlock; |
1371 | } | 1348 | } |
1372 | 1349 | ||
1373 | /* | 1350 | /* remounting is not allowed for populated hierarchies */ |
1374 | * Clear out the files of subsystems that should be removed, do | 1351 | if (root->number_of_cgroups > 1) { |
1375 | * this before rebind_subsystems, since rebind_subsystems may | 1352 | ret = -EBUSY; |
1376 | * change this hierarchy's subsys_list. | ||
1377 | */ | ||
1378 | cgroup_clear_directory(cgrp->dentry, false, removed_mask); | ||
1379 | |||
1380 | ret = rebind_subsystems(root, added_mask, removed_mask); | ||
1381 | if (ret) { | ||
1382 | /* rebind_subsystems failed, re-populate the removed files */ | ||
1383 | cgroup_populate_dir(cgrp, false, removed_mask); | ||
1384 | goto out_unlock; | 1353 | goto out_unlock; |
1385 | } | 1354 | } |
1386 | 1355 | ||
1387 | /* re-populate subsystem files */ | 1356 | ret = rebind_subsystems(root, added_mask, removed_mask); |
1388 | cgroup_populate_dir(cgrp, false, added_mask); | 1357 | if (ret) |
1358 | goto out_unlock; | ||
1389 | 1359 | ||
1390 | if (opts.release_agent) | 1360 | if (opts.release_agent) |
1391 | strcpy(root->release_agent_path, opts.release_agent); | 1361 | strcpy(root->release_agent_path, opts.release_agent); |
@@ -1395,8 +1365,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1395 | mutex_unlock(&cgroup_root_mutex); | 1365 | mutex_unlock(&cgroup_root_mutex); |
1396 | mutex_unlock(&cgroup_mutex); | 1366 | mutex_unlock(&cgroup_mutex); |
1397 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1367 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1398 | if (ret) | ||
1399 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1400 | return ret; | 1368 | return ret; |
1401 | } | 1369 | } |
1402 | 1370 | ||
@@ -1416,6 +1384,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1416 | INIT_LIST_HEAD(&cgrp->release_list); | 1384 | INIT_LIST_HEAD(&cgrp->release_list); |
1417 | INIT_LIST_HEAD(&cgrp->pidlists); | 1385 | INIT_LIST_HEAD(&cgrp->pidlists); |
1418 | mutex_init(&cgrp->pidlist_mutex); | 1386 | mutex_init(&cgrp->pidlist_mutex); |
1387 | cgrp->dummy_css.cgroup = cgrp; | ||
1419 | INIT_LIST_HEAD(&cgrp->event_list); | 1388 | INIT_LIST_HEAD(&cgrp->event_list); |
1420 | spin_lock_init(&cgrp->event_list_lock); | 1389 | spin_lock_init(&cgrp->event_list_lock); |
1421 | simple_xattrs_init(&cgrp->xattrs); | 1390 | simple_xattrs_init(&cgrp->xattrs); |
@@ -1431,6 +1400,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
1431 | cgrp->root = root; | 1400 | cgrp->root = root; |
1432 | RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); | 1401 | RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); |
1433 | init_cgroup_housekeeping(cgrp); | 1402 | init_cgroup_housekeeping(cgrp); |
1403 | idr_init(&root->cgroup_idr); | ||
1434 | } | 1404 | } |
1435 | 1405 | ||
1436 | static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end) | 1406 | static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end) |
@@ -1503,7 +1473,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
1503 | */ | 1473 | */ |
1504 | root->subsys_mask = opts->subsys_mask; | 1474 | root->subsys_mask = opts->subsys_mask; |
1505 | root->flags = opts->flags; | 1475 | root->flags = opts->flags; |
1506 | ida_init(&root->cgroup_ida); | ||
1507 | if (opts->release_agent) | 1476 | if (opts->release_agent) |
1508 | strcpy(root->release_agent_path, opts->release_agent); | 1477 | strcpy(root->release_agent_path, opts->release_agent); |
1509 | if (opts->name) | 1478 | if (opts->name) |
@@ -1519,7 +1488,7 @@ static void cgroup_free_root(struct cgroupfs_root *root) | |||
1519 | /* hierarhcy ID shoulid already have been released */ | 1488 | /* hierarhcy ID shoulid already have been released */ |
1520 | WARN_ON_ONCE(root->hierarchy_id); | 1489 | WARN_ON_ONCE(root->hierarchy_id); |
1521 | 1490 | ||
1522 | ida_destroy(&root->cgroup_ida); | 1491 | idr_destroy(&root->cgroup_idr); |
1523 | kfree(root); | 1492 | kfree(root); |
1524 | } | 1493 | } |
1525 | } | 1494 | } |
@@ -1584,7 +1553,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1584 | int ret = 0; | 1553 | int ret = 0; |
1585 | struct super_block *sb; | 1554 | struct super_block *sb; |
1586 | struct cgroupfs_root *new_root; | 1555 | struct cgroupfs_root *new_root; |
1556 | struct list_head tmp_links; | ||
1587 | struct inode *inode; | 1557 | struct inode *inode; |
1558 | const struct cred *cred; | ||
1588 | 1559 | ||
1589 | /* First find the desired set of subsystems */ | 1560 | /* First find the desired set of subsystems */ |
1590 | mutex_lock(&cgroup_mutex); | 1561 | mutex_lock(&cgroup_mutex); |
@@ -1600,7 +1571,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1600 | new_root = cgroup_root_from_opts(&opts); | 1571 | new_root = cgroup_root_from_opts(&opts); |
1601 | if (IS_ERR(new_root)) { | 1572 | if (IS_ERR(new_root)) { |
1602 | ret = PTR_ERR(new_root); | 1573 | ret = PTR_ERR(new_root); |
1603 | goto drop_modules; | 1574 | goto out_err; |
1604 | } | 1575 | } |
1605 | opts.new_root = new_root; | 1576 | opts.new_root = new_root; |
1606 | 1577 | ||
@@ -1609,17 +1580,15 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1609 | if (IS_ERR(sb)) { | 1580 | if (IS_ERR(sb)) { |
1610 | ret = PTR_ERR(sb); | 1581 | ret = PTR_ERR(sb); |
1611 | cgroup_free_root(opts.new_root); | 1582 | cgroup_free_root(opts.new_root); |
1612 | goto drop_modules; | 1583 | goto out_err; |
1613 | } | 1584 | } |
1614 | 1585 | ||
1615 | root = sb->s_fs_info; | 1586 | root = sb->s_fs_info; |
1616 | BUG_ON(!root); | 1587 | BUG_ON(!root); |
1617 | if (root == opts.new_root) { | 1588 | if (root == opts.new_root) { |
1618 | /* We used the new root structure, so this is a new hierarchy */ | 1589 | /* We used the new root structure, so this is a new hierarchy */ |
1619 | struct list_head tmp_links; | ||
1620 | struct cgroup *root_cgrp = &root->top_cgroup; | 1590 | struct cgroup *root_cgrp = &root->top_cgroup; |
1621 | struct cgroupfs_root *existing_root; | 1591 | struct cgroupfs_root *existing_root; |
1622 | const struct cred *cred; | ||
1623 | int i; | 1592 | int i; |
1624 | struct css_set *cset; | 1593 | struct css_set *cset; |
1625 | 1594 | ||
@@ -1634,6 +1603,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1634 | mutex_lock(&cgroup_mutex); | 1603 | mutex_lock(&cgroup_mutex); |
1635 | mutex_lock(&cgroup_root_mutex); | 1604 | mutex_lock(&cgroup_root_mutex); |
1636 | 1605 | ||
1606 | root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp, | ||
1607 | 0, 1, GFP_KERNEL); | ||
1608 | if (root_cgrp->id < 0) | ||
1609 | goto unlock_drop; | ||
1610 | |||
1637 | /* Check for name clashes with existing mounts */ | 1611 | /* Check for name clashes with existing mounts */ |
1638 | ret = -EBUSY; | 1612 | ret = -EBUSY; |
1639 | if (strlen(root->name)) | 1613 | if (strlen(root->name)) |
@@ -1657,26 +1631,37 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1657 | if (ret) | 1631 | if (ret) |
1658 | goto unlock_drop; | 1632 | goto unlock_drop; |
1659 | 1633 | ||
1634 | sb->s_root->d_fsdata = root_cgrp; | ||
1635 | root_cgrp->dentry = sb->s_root; | ||
1636 | |||
1637 | /* | ||
1638 | * We're inside get_sb() and will call lookup_one_len() to | ||
1639 | * create the root files, which doesn't work if SELinux is | ||
1640 | * in use. The following cred dancing somehow works around | ||
1641 | * it. See 2ce9738ba ("cgroupfs: use init_cred when | ||
1642 | * populating new cgroupfs mount") for more details. | ||
1643 | */ | ||
1644 | cred = override_creds(&init_cred); | ||
1645 | |||
1646 | ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true); | ||
1647 | if (ret) | ||
1648 | goto rm_base_files; | ||
1649 | |||
1660 | ret = rebind_subsystems(root, root->subsys_mask, 0); | 1650 | ret = rebind_subsystems(root, root->subsys_mask, 0); |
1661 | if (ret == -EBUSY) { | 1651 | if (ret) |
1662 | free_cgrp_cset_links(&tmp_links); | 1652 | goto rm_base_files; |
1663 | goto unlock_drop; | 1653 | |
1664 | } | 1654 | revert_creds(cred); |
1655 | |||
1665 | /* | 1656 | /* |
1666 | * There must be no failure case after here, since rebinding | 1657 | * There must be no failure case after here, since rebinding |
1667 | * takes care of subsystems' refcounts, which are explicitly | 1658 | * takes care of subsystems' refcounts, which are explicitly |
1668 | * dropped in the failure exit path. | 1659 | * dropped in the failure exit path. |
1669 | */ | 1660 | */ |
1670 | 1661 | ||
1671 | /* EBUSY should be the only error here */ | ||
1672 | BUG_ON(ret); | ||
1673 | |||
1674 | list_add(&root->root_list, &cgroup_roots); | 1662 | list_add(&root->root_list, &cgroup_roots); |
1675 | cgroup_root_count++; | 1663 | cgroup_root_count++; |
1676 | 1664 | ||
1677 | sb->s_root->d_fsdata = root_cgrp; | ||
1678 | root->top_cgroup.dentry = sb->s_root; | ||
1679 | |||
1680 | /* Link the top cgroup in this hierarchy into all | 1665 | /* Link the top cgroup in this hierarchy into all |
1681 | * the css_set objects */ | 1666 | * the css_set objects */ |
1682 | write_lock(&css_set_lock); | 1667 | write_lock(&css_set_lock); |
@@ -1689,9 +1674,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1689 | BUG_ON(!list_empty(&root_cgrp->children)); | 1674 | BUG_ON(!list_empty(&root_cgrp->children)); |
1690 | BUG_ON(root->number_of_cgroups != 1); | 1675 | BUG_ON(root->number_of_cgroups != 1); |
1691 | 1676 | ||
1692 | cred = override_creds(&init_cred); | ||
1693 | cgroup_populate_dir(root_cgrp, true, root->subsys_mask); | ||
1694 | revert_creds(cred); | ||
1695 | mutex_unlock(&cgroup_root_mutex); | 1677 | mutex_unlock(&cgroup_root_mutex); |
1696 | mutex_unlock(&cgroup_mutex); | 1678 | mutex_unlock(&cgroup_mutex); |
1697 | mutex_unlock(&inode->i_mutex); | 1679 | mutex_unlock(&inode->i_mutex); |
@@ -1711,15 +1693,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1711 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); | 1693 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); |
1712 | } | 1694 | } |
1713 | } | 1695 | } |
1714 | |||
1715 | /* no subsys rebinding, so refcounts don't change */ | ||
1716 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1717 | } | 1696 | } |
1718 | 1697 | ||
1719 | kfree(opts.release_agent); | 1698 | kfree(opts.release_agent); |
1720 | kfree(opts.name); | 1699 | kfree(opts.name); |
1721 | return dget(sb->s_root); | 1700 | return dget(sb->s_root); |
1722 | 1701 | ||
1702 | rm_base_files: | ||
1703 | free_cgrp_cset_links(&tmp_links); | ||
1704 | cgroup_addrm_files(&root->top_cgroup, cgroup_base_files, false); | ||
1705 | revert_creds(cred); | ||
1723 | unlock_drop: | 1706 | unlock_drop: |
1724 | cgroup_exit_root_id(root); | 1707 | cgroup_exit_root_id(root); |
1725 | mutex_unlock(&cgroup_root_mutex); | 1708 | mutex_unlock(&cgroup_root_mutex); |
@@ -1727,8 +1710,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1727 | mutex_unlock(&inode->i_mutex); | 1710 | mutex_unlock(&inode->i_mutex); |
1728 | drop_new_super: | 1711 | drop_new_super: |
1729 | deactivate_locked_super(sb); | 1712 | deactivate_locked_super(sb); |
1730 | drop_modules: | ||
1731 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1732 | out_err: | 1713 | out_err: |
1733 | kfree(opts.release_agent); | 1714 | kfree(opts.release_agent); |
1734 | kfree(opts.name); | 1715 | kfree(opts.name); |
@@ -1746,6 +1727,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1746 | BUG_ON(root->number_of_cgroups != 1); | 1727 | BUG_ON(root->number_of_cgroups != 1); |
1747 | BUG_ON(!list_empty(&cgrp->children)); | 1728 | BUG_ON(!list_empty(&cgrp->children)); |
1748 | 1729 | ||
1730 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | ||
1749 | mutex_lock(&cgroup_mutex); | 1731 | mutex_lock(&cgroup_mutex); |
1750 | mutex_lock(&cgroup_root_mutex); | 1732 | mutex_lock(&cgroup_root_mutex); |
1751 | 1733 | ||
@@ -1778,6 +1760,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1778 | 1760 | ||
1779 | mutex_unlock(&cgroup_root_mutex); | 1761 | mutex_unlock(&cgroup_root_mutex); |
1780 | mutex_unlock(&cgroup_mutex); | 1762 | mutex_unlock(&cgroup_mutex); |
1763 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | ||
1781 | 1764 | ||
1782 | simple_xattrs_free(&cgrp->xattrs); | 1765 | simple_xattrs_free(&cgrp->xattrs); |
1783 | 1766 | ||
@@ -1889,7 +1872,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path); | |||
1889 | struct task_and_cgroup { | 1872 | struct task_and_cgroup { |
1890 | struct task_struct *task; | 1873 | struct task_struct *task; |
1891 | struct cgroup *cgrp; | 1874 | struct cgroup *cgrp; |
1892 | struct css_set *cg; | 1875 | struct css_set *cset; |
1893 | }; | 1876 | }; |
1894 | 1877 | ||
1895 | struct cgroup_taskset { | 1878 | struct cgroup_taskset { |
@@ -1939,18 +1922,20 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | |||
1939 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); | 1922 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); |
1940 | 1923 | ||
1941 | /** | 1924 | /** |
1942 | * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task | 1925 | * cgroup_taskset_cur_css - return the matching css for the current task |
1943 | * @tset: taskset of interest | 1926 | * @tset: taskset of interest |
1927 | * @subsys_id: the ID of the target subsystem | ||
1944 | * | 1928 | * |
1945 | * Return the cgroup for the current (last returned) task of @tset. This | 1929 | * Return the css for the current (last returned) task of @tset for |
1946 | * function must be preceded by either cgroup_taskset_first() or | 1930 | * subsystem specified by @subsys_id. This function must be preceded by |
1947 | * cgroup_taskset_next(). | 1931 | * either cgroup_taskset_first() or cgroup_taskset_next(). |
1948 | */ | 1932 | */ |
1949 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset) | 1933 | struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, |
1934 | int subsys_id) | ||
1950 | { | 1935 | { |
1951 | return tset->cur_cgrp; | 1936 | return cgroup_css(tset->cur_cgrp, cgroup_subsys[subsys_id]); |
1952 | } | 1937 | } |
1953 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup); | 1938 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_css); |
1954 | 1939 | ||
1955 | /** | 1940 | /** |
1956 | * cgroup_taskset_size - return the number of tasks in taskset | 1941 | * cgroup_taskset_size - return the number of tasks in taskset |
@@ -2089,8 +2074,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2089 | * step 1: check that we can legitimately attach to the cgroup. | 2074 | * step 1: check that we can legitimately attach to the cgroup. |
2090 | */ | 2075 | */ |
2091 | for_each_root_subsys(root, ss) { | 2076 | for_each_root_subsys(root, ss) { |
2077 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2078 | |||
2092 | if (ss->can_attach) { | 2079 | if (ss->can_attach) { |
2093 | retval = ss->can_attach(cgrp, &tset); | 2080 | retval = ss->can_attach(css, &tset); |
2094 | if (retval) { | 2081 | if (retval) { |
2095 | failed_ss = ss; | 2082 | failed_ss = ss; |
2096 | goto out_cancel_attach; | 2083 | goto out_cancel_attach; |
@@ -2107,8 +2094,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2107 | 2094 | ||
2108 | tc = flex_array_get(group, i); | 2095 | tc = flex_array_get(group, i); |
2109 | old_cset = task_css_set(tc->task); | 2096 | old_cset = task_css_set(tc->task); |
2110 | tc->cg = find_css_set(old_cset, cgrp); | 2097 | tc->cset = find_css_set(old_cset, cgrp); |
2111 | if (!tc->cg) { | 2098 | if (!tc->cset) { |
2112 | retval = -ENOMEM; | 2099 | retval = -ENOMEM; |
2113 | goto out_put_css_set_refs; | 2100 | goto out_put_css_set_refs; |
2114 | } | 2101 | } |
@@ -2121,7 +2108,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2121 | */ | 2108 | */ |
2122 | for (i = 0; i < group_size; i++) { | 2109 | for (i = 0; i < group_size; i++) { |
2123 | tc = flex_array_get(group, i); | 2110 | tc = flex_array_get(group, i); |
2124 | cgroup_task_migrate(tc->cgrp, tc->task, tc->cg); | 2111 | cgroup_task_migrate(tc->cgrp, tc->task, tc->cset); |
2125 | } | 2112 | } |
2126 | /* nothing is sensitive to fork() after this point. */ | 2113 | /* nothing is sensitive to fork() after this point. */ |
2127 | 2114 | ||
@@ -2129,8 +2116,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2129 | * step 4: do subsystem attach callbacks. | 2116 | * step 4: do subsystem attach callbacks. |
2130 | */ | 2117 | */ |
2131 | for_each_root_subsys(root, ss) { | 2118 | for_each_root_subsys(root, ss) { |
2119 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2120 | |||
2132 | if (ss->attach) | 2121 | if (ss->attach) |
2133 | ss->attach(cgrp, &tset); | 2122 | ss->attach(css, &tset); |
2134 | } | 2123 | } |
2135 | 2124 | ||
2136 | /* | 2125 | /* |
@@ -2141,18 +2130,20 @@ out_put_css_set_refs: | |||
2141 | if (retval) { | 2130 | if (retval) { |
2142 | for (i = 0; i < group_size; i++) { | 2131 | for (i = 0; i < group_size; i++) { |
2143 | tc = flex_array_get(group, i); | 2132 | tc = flex_array_get(group, i); |
2144 | if (!tc->cg) | 2133 | if (!tc->cset) |
2145 | break; | 2134 | break; |
2146 | put_css_set(tc->cg); | 2135 | put_css_set(tc->cset); |
2147 | } | 2136 | } |
2148 | } | 2137 | } |
2149 | out_cancel_attach: | 2138 | out_cancel_attach: |
2150 | if (retval) { | 2139 | if (retval) { |
2151 | for_each_root_subsys(root, ss) { | 2140 | for_each_root_subsys(root, ss) { |
2141 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2142 | |||
2152 | if (ss == failed_ss) | 2143 | if (ss == failed_ss) |
2153 | break; | 2144 | break; |
2154 | if (ss->cancel_attach) | 2145 | if (ss->cancel_attach) |
2155 | ss->cancel_attach(cgrp, &tset); | 2146 | ss->cancel_attach(css, &tset); |
2156 | } | 2147 | } |
2157 | } | 2148 | } |
2158 | out_free_group_list: | 2149 | out_free_group_list: |
@@ -2253,9 +2244,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
2253 | 2244 | ||
2254 | mutex_lock(&cgroup_mutex); | 2245 | mutex_lock(&cgroup_mutex); |
2255 | for_each_active_root(root) { | 2246 | for_each_active_root(root) { |
2256 | struct cgroup *from_cg = task_cgroup_from_root(from, root); | 2247 | struct cgroup *from_cgrp = task_cgroup_from_root(from, root); |
2257 | 2248 | ||
2258 | retval = cgroup_attach_task(from_cg, tsk, false); | 2249 | retval = cgroup_attach_task(from_cgrp, tsk, false); |
2259 | if (retval) | 2250 | if (retval) |
2260 | break; | 2251 | break; |
2261 | } | 2252 | } |
@@ -2265,34 +2256,38 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
2265 | } | 2256 | } |
2266 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 2257 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
2267 | 2258 | ||
2268 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2259 | static int cgroup_tasks_write(struct cgroup_subsys_state *css, |
2260 | struct cftype *cft, u64 pid) | ||
2269 | { | 2261 | { |
2270 | return attach_task_by_pid(cgrp, pid, false); | 2262 | return attach_task_by_pid(css->cgroup, pid, false); |
2271 | } | 2263 | } |
2272 | 2264 | ||
2273 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | 2265 | static int cgroup_procs_write(struct cgroup_subsys_state *css, |
2266 | struct cftype *cft, u64 tgid) | ||
2274 | { | 2267 | { |
2275 | return attach_task_by_pid(cgrp, tgid, true); | 2268 | return attach_task_by_pid(css->cgroup, tgid, true); |
2276 | } | 2269 | } |
2277 | 2270 | ||
2278 | static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | 2271 | static int cgroup_release_agent_write(struct cgroup_subsys_state *css, |
2279 | const char *buffer) | 2272 | struct cftype *cft, const char *buffer) |
2280 | { | 2273 | { |
2281 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 2274 | BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX); |
2282 | if (strlen(buffer) >= PATH_MAX) | 2275 | if (strlen(buffer) >= PATH_MAX) |
2283 | return -EINVAL; | 2276 | return -EINVAL; |
2284 | if (!cgroup_lock_live_group(cgrp)) | 2277 | if (!cgroup_lock_live_group(css->cgroup)) |
2285 | return -ENODEV; | 2278 | return -ENODEV; |
2286 | mutex_lock(&cgroup_root_mutex); | 2279 | mutex_lock(&cgroup_root_mutex); |
2287 | strcpy(cgrp->root->release_agent_path, buffer); | 2280 | strcpy(css->cgroup->root->release_agent_path, buffer); |
2288 | mutex_unlock(&cgroup_root_mutex); | 2281 | mutex_unlock(&cgroup_root_mutex); |
2289 | mutex_unlock(&cgroup_mutex); | 2282 | mutex_unlock(&cgroup_mutex); |
2290 | return 0; | 2283 | return 0; |
2291 | } | 2284 | } |
2292 | 2285 | ||
2293 | static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, | 2286 | static int cgroup_release_agent_show(struct cgroup_subsys_state *css, |
2294 | struct seq_file *seq) | 2287 | struct cftype *cft, struct seq_file *seq) |
2295 | { | 2288 | { |
2289 | struct cgroup *cgrp = css->cgroup; | ||
2290 | |||
2296 | if (!cgroup_lock_live_group(cgrp)) | 2291 | if (!cgroup_lock_live_group(cgrp)) |
2297 | return -ENODEV; | 2292 | return -ENODEV; |
2298 | seq_puts(seq, cgrp->root->release_agent_path); | 2293 | seq_puts(seq, cgrp->root->release_agent_path); |
@@ -2301,20 +2296,20 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, | |||
2301 | return 0; | 2296 | return 0; |
2302 | } | 2297 | } |
2303 | 2298 | ||
2304 | static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft, | 2299 | static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css, |
2305 | struct seq_file *seq) | 2300 | struct cftype *cft, struct seq_file *seq) |
2306 | { | 2301 | { |
2307 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | 2302 | seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup)); |
2308 | return 0; | 2303 | return 0; |
2309 | } | 2304 | } |
2310 | 2305 | ||
2311 | /* A buffer size big enough for numbers or short strings */ | 2306 | /* A buffer size big enough for numbers or short strings */ |
2312 | #define CGROUP_LOCAL_BUFFER_SIZE 64 | 2307 | #define CGROUP_LOCAL_BUFFER_SIZE 64 |
2313 | 2308 | ||
2314 | static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, | 2309 | static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css, |
2315 | struct file *file, | 2310 | struct cftype *cft, struct file *file, |
2316 | const char __user *userbuf, | 2311 | const char __user *userbuf, size_t nbytes, |
2317 | size_t nbytes, loff_t *unused_ppos) | 2312 | loff_t *unused_ppos) |
2318 | { | 2313 | { |
2319 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2314 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; |
2320 | int retval = 0; | 2315 | int retval = 0; |
@@ -2332,22 +2327,22 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, | |||
2332 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); | 2327 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); |
2333 | if (*end) | 2328 | if (*end) |
2334 | return -EINVAL; | 2329 | return -EINVAL; |
2335 | retval = cft->write_u64(cgrp, cft, val); | 2330 | retval = cft->write_u64(css, cft, val); |
2336 | } else { | 2331 | } else { |
2337 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); | 2332 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); |
2338 | if (*end) | 2333 | if (*end) |
2339 | return -EINVAL; | 2334 | return -EINVAL; |
2340 | retval = cft->write_s64(cgrp, cft, val); | 2335 | retval = cft->write_s64(css, cft, val); |
2341 | } | 2336 | } |
2342 | if (!retval) | 2337 | if (!retval) |
2343 | retval = nbytes; | 2338 | retval = nbytes; |
2344 | return retval; | 2339 | return retval; |
2345 | } | 2340 | } |
2346 | 2341 | ||
2347 | static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, | 2342 | static ssize_t cgroup_write_string(struct cgroup_subsys_state *css, |
2348 | struct file *file, | 2343 | struct cftype *cft, struct file *file, |
2349 | const char __user *userbuf, | 2344 | const char __user *userbuf, size_t nbytes, |
2350 | size_t nbytes, loff_t *unused_ppos) | 2345 | loff_t *unused_ppos) |
2351 | { | 2346 | { |
2352 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2347 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; |
2353 | int retval = 0; | 2348 | int retval = 0; |
@@ -2370,7 +2365,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, | |||
2370 | } | 2365 | } |
2371 | 2366 | ||
2372 | buffer[nbytes] = 0; /* nul-terminate */ | 2367 | buffer[nbytes] = 0; /* nul-terminate */ |
2373 | retval = cft->write_string(cgrp, cft, strstrip(buffer)); | 2368 | retval = cft->write_string(css, cft, strstrip(buffer)); |
2374 | if (!retval) | 2369 | if (!retval) |
2375 | retval = nbytes; | 2370 | retval = nbytes; |
2376 | out: | 2371 | out: |
@@ -2380,65 +2375,60 @@ out: | |||
2380 | } | 2375 | } |
2381 | 2376 | ||
2382 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | 2377 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, |
2383 | size_t nbytes, loff_t *ppos) | 2378 | size_t nbytes, loff_t *ppos) |
2384 | { | 2379 | { |
2380 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2385 | struct cftype *cft = __d_cft(file->f_dentry); | 2381 | struct cftype *cft = __d_cft(file->f_dentry); |
2386 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2382 | struct cgroup_subsys_state *css = cfe->css; |
2387 | 2383 | ||
2388 | if (cgroup_is_dead(cgrp)) | ||
2389 | return -ENODEV; | ||
2390 | if (cft->write) | 2384 | if (cft->write) |
2391 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 2385 | return cft->write(css, cft, file, buf, nbytes, ppos); |
2392 | if (cft->write_u64 || cft->write_s64) | 2386 | if (cft->write_u64 || cft->write_s64) |
2393 | return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); | 2387 | return cgroup_write_X64(css, cft, file, buf, nbytes, ppos); |
2394 | if (cft->write_string) | 2388 | if (cft->write_string) |
2395 | return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos); | 2389 | return cgroup_write_string(css, cft, file, buf, nbytes, ppos); |
2396 | if (cft->trigger) { | 2390 | if (cft->trigger) { |
2397 | int ret = cft->trigger(cgrp, (unsigned int)cft->private); | 2391 | int ret = cft->trigger(css, (unsigned int)cft->private); |
2398 | return ret ? ret : nbytes; | 2392 | return ret ? ret : nbytes; |
2399 | } | 2393 | } |
2400 | return -EINVAL; | 2394 | return -EINVAL; |
2401 | } | 2395 | } |
2402 | 2396 | ||
2403 | static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, | 2397 | static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css, |
2404 | struct file *file, | 2398 | struct cftype *cft, struct file *file, |
2405 | char __user *buf, size_t nbytes, | 2399 | char __user *buf, size_t nbytes, loff_t *ppos) |
2406 | loff_t *ppos) | ||
2407 | { | 2400 | { |
2408 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2401 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; |
2409 | u64 val = cft->read_u64(cgrp, cft); | 2402 | u64 val = cft->read_u64(css, cft); |
2410 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | 2403 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); |
2411 | 2404 | ||
2412 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2405 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
2413 | } | 2406 | } |
2414 | 2407 | ||
2415 | static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, | 2408 | static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css, |
2416 | struct file *file, | 2409 | struct cftype *cft, struct file *file, |
2417 | char __user *buf, size_t nbytes, | 2410 | char __user *buf, size_t nbytes, loff_t *ppos) |
2418 | loff_t *ppos) | ||
2419 | { | 2411 | { |
2420 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2412 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; |
2421 | s64 val = cft->read_s64(cgrp, cft); | 2413 | s64 val = cft->read_s64(css, cft); |
2422 | int len = sprintf(tmp, "%lld\n", (long long) val); | 2414 | int len = sprintf(tmp, "%lld\n", (long long) val); |
2423 | 2415 | ||
2424 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2416 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
2425 | } | 2417 | } |
2426 | 2418 | ||
2427 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, | 2419 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, |
2428 | size_t nbytes, loff_t *ppos) | 2420 | size_t nbytes, loff_t *ppos) |
2429 | { | 2421 | { |
2422 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2430 | struct cftype *cft = __d_cft(file->f_dentry); | 2423 | struct cftype *cft = __d_cft(file->f_dentry); |
2431 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2424 | struct cgroup_subsys_state *css = cfe->css; |
2432 | |||
2433 | if (cgroup_is_dead(cgrp)) | ||
2434 | return -ENODEV; | ||
2435 | 2425 | ||
2436 | if (cft->read) | 2426 | if (cft->read) |
2437 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); | 2427 | return cft->read(css, cft, file, buf, nbytes, ppos); |
2438 | if (cft->read_u64) | 2428 | if (cft->read_u64) |
2439 | return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); | 2429 | return cgroup_read_u64(css, cft, file, buf, nbytes, ppos); |
2440 | if (cft->read_s64) | 2430 | if (cft->read_s64) |
2441 | return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); | 2431 | return cgroup_read_s64(css, cft, file, buf, nbytes, ppos); |
2442 | return -EINVAL; | 2432 | return -EINVAL; |
2443 | } | 2433 | } |
2444 | 2434 | ||
@@ -2447,11 +2437,6 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
2447 | * supports string->u64 maps, but can be extended in future. | 2437 | * supports string->u64 maps, but can be extended in future. |
2448 | */ | 2438 | */ |
2449 | 2439 | ||
2450 | struct cgroup_seqfile_state { | ||
2451 | struct cftype *cft; | ||
2452 | struct cgroup *cgroup; | ||
2453 | }; | ||
2454 | |||
2455 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | 2440 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) |
2456 | { | 2441 | { |
2457 | struct seq_file *sf = cb->state; | 2442 | struct seq_file *sf = cb->state; |
@@ -2460,69 +2445,86 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | |||
2460 | 2445 | ||
2461 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | 2446 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) |
2462 | { | 2447 | { |
2463 | struct cgroup_seqfile_state *state = m->private; | 2448 | struct cfent *cfe = m->private; |
2464 | struct cftype *cft = state->cft; | 2449 | struct cftype *cft = cfe->type; |
2450 | struct cgroup_subsys_state *css = cfe->css; | ||
2451 | |||
2465 | if (cft->read_map) { | 2452 | if (cft->read_map) { |
2466 | struct cgroup_map_cb cb = { | 2453 | struct cgroup_map_cb cb = { |
2467 | .fill = cgroup_map_add, | 2454 | .fill = cgroup_map_add, |
2468 | .state = m, | 2455 | .state = m, |
2469 | }; | 2456 | }; |
2470 | return cft->read_map(state->cgroup, cft, &cb); | 2457 | return cft->read_map(css, cft, &cb); |
2471 | } | 2458 | } |
2472 | return cft->read_seq_string(state->cgroup, cft, m); | 2459 | return cft->read_seq_string(css, cft, m); |
2473 | } | ||
2474 | |||
2475 | static int cgroup_seqfile_release(struct inode *inode, struct file *file) | ||
2476 | { | ||
2477 | struct seq_file *seq = file->private_data; | ||
2478 | kfree(seq->private); | ||
2479 | return single_release(inode, file); | ||
2480 | } | 2460 | } |
2481 | 2461 | ||
2482 | static const struct file_operations cgroup_seqfile_operations = { | 2462 | static const struct file_operations cgroup_seqfile_operations = { |
2483 | .read = seq_read, | 2463 | .read = seq_read, |
2484 | .write = cgroup_file_write, | 2464 | .write = cgroup_file_write, |
2485 | .llseek = seq_lseek, | 2465 | .llseek = seq_lseek, |
2486 | .release = cgroup_seqfile_release, | 2466 | .release = single_release, |
2487 | }; | 2467 | }; |
2488 | 2468 | ||
2489 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2469 | static int cgroup_file_open(struct inode *inode, struct file *file) |
2490 | { | 2470 | { |
2471 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2472 | struct cftype *cft = __d_cft(file->f_dentry); | ||
2473 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); | ||
2474 | struct cgroup_subsys_state *css; | ||
2491 | int err; | 2475 | int err; |
2492 | struct cftype *cft; | ||
2493 | 2476 | ||
2494 | err = generic_file_open(inode, file); | 2477 | err = generic_file_open(inode, file); |
2495 | if (err) | 2478 | if (err) |
2496 | return err; | 2479 | return err; |
2497 | cft = __d_cft(file->f_dentry); | ||
2498 | 2480 | ||
2499 | if (cft->read_map || cft->read_seq_string) { | 2481 | /* |
2500 | struct cgroup_seqfile_state *state; | 2482 | * If the file belongs to a subsystem, pin the css. Will be |
2483 | * unpinned either on open failure or release. This ensures that | ||
2484 | * @css stays alive for all file operations. | ||
2485 | */ | ||
2486 | rcu_read_lock(); | ||
2487 | css = cgroup_css(cgrp, cft->ss); | ||
2488 | if (cft->ss && !css_tryget(css)) | ||
2489 | css = NULL; | ||
2490 | rcu_read_unlock(); | ||
2501 | 2491 | ||
2502 | state = kzalloc(sizeof(*state), GFP_USER); | 2492 | if (!css) |
2503 | if (!state) | 2493 | return -ENODEV; |
2504 | return -ENOMEM; | 2494 | |
2495 | /* | ||
2496 | * @cfe->css is used by read/write/close to determine the | ||
2497 | * associated css. @file->private_data would be a better place but | ||
2498 | * that's already used by seqfile. Multiple accessors may use it | ||
2499 | * simultaneously which is okay as the association never changes. | ||
2500 | */ | ||
2501 | WARN_ON_ONCE(cfe->css && cfe->css != css); | ||
2502 | cfe->css = css; | ||
2505 | 2503 | ||
2506 | state->cft = cft; | 2504 | if (cft->read_map || cft->read_seq_string) { |
2507 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | ||
2508 | file->f_op = &cgroup_seqfile_operations; | 2505 | file->f_op = &cgroup_seqfile_operations; |
2509 | err = single_open(file, cgroup_seqfile_show, state); | 2506 | err = single_open(file, cgroup_seqfile_show, cfe); |
2510 | if (err < 0) | 2507 | } else if (cft->open) { |
2511 | kfree(state); | ||
2512 | } else if (cft->open) | ||
2513 | err = cft->open(inode, file); | 2508 | err = cft->open(inode, file); |
2514 | else | 2509 | } |
2515 | err = 0; | ||
2516 | 2510 | ||
2511 | if (css->ss && err) | ||
2512 | css_put(css); | ||
2517 | return err; | 2513 | return err; |
2518 | } | 2514 | } |
2519 | 2515 | ||
2520 | static int cgroup_file_release(struct inode *inode, struct file *file) | 2516 | static int cgroup_file_release(struct inode *inode, struct file *file) |
2521 | { | 2517 | { |
2518 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2522 | struct cftype *cft = __d_cft(file->f_dentry); | 2519 | struct cftype *cft = __d_cft(file->f_dentry); |
2520 | struct cgroup_subsys_state *css = cfe->css; | ||
2521 | int ret = 0; | ||
2522 | |||
2523 | if (cft->release) | 2523 | if (cft->release) |
2524 | return cft->release(inode, file); | 2524 | ret = cft->release(inode, file); |
2525 | return 0; | 2525 | if (css->ss) |
2526 | css_put(css); | ||
2527 | return ret; | ||
2526 | } | 2528 | } |
2527 | 2529 | ||
2528 | /* | 2530 | /* |
@@ -2736,8 +2738,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
2736 | return mode; | 2738 | return mode; |
2737 | } | 2739 | } |
2738 | 2740 | ||
2739 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2741 | static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) |
2740 | struct cftype *cft) | ||
2741 | { | 2742 | { |
2742 | struct dentry *dir = cgrp->dentry; | 2743 | struct dentry *dir = cgrp->dentry; |
2743 | struct cgroup *parent = __d_cgrp(dir); | 2744 | struct cgroup *parent = __d_cgrp(dir); |
@@ -2747,8 +2748,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2747 | umode_t mode; | 2748 | umode_t mode; |
2748 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2749 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
2749 | 2750 | ||
2750 | if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { | 2751 | if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) && |
2751 | strcpy(name, subsys->name); | 2752 | !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { |
2753 | strcpy(name, cft->ss->name); | ||
2752 | strcat(name, "."); | 2754 | strcat(name, "."); |
2753 | } | 2755 | } |
2754 | strcat(name, cft->name); | 2756 | strcat(name, cft->name); |
@@ -2782,11 +2784,25 @@ out: | |||
2782 | return error; | 2784 | return error; |
2783 | } | 2785 | } |
2784 | 2786 | ||
2785 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2787 | /** |
2786 | struct cftype cfts[], bool is_add) | 2788 | * cgroup_addrm_files - add or remove files to a cgroup directory |
2789 | * @cgrp: the target cgroup | ||
2790 | * @cfts: array of cftypes to be added | ||
2791 | * @is_add: whether to add or remove | ||
2792 | * | ||
2793 | * Depending on @is_add, add or remove files defined by @cfts on @cgrp. | ||
2794 | * For removals, this function never fails. If addition fails, this | ||
2795 | * function doesn't remove files already added. The caller is responsible | ||
2796 | * for cleaning up. | ||
2797 | */ | ||
2798 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | ||
2799 | bool is_add) | ||
2787 | { | 2800 | { |
2788 | struct cftype *cft; | 2801 | struct cftype *cft; |
2789 | int err, ret = 0; | 2802 | int ret; |
2803 | |||
2804 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | ||
2805 | lockdep_assert_held(&cgroup_mutex); | ||
2790 | 2806 | ||
2791 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 2807 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
2792 | /* does cft->flags tell us to skip this file on @cgrp? */ | 2808 | /* does cft->flags tell us to skip this file on @cgrp? */ |
@@ -2798,16 +2814,17 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2798 | continue; | 2814 | continue; |
2799 | 2815 | ||
2800 | if (is_add) { | 2816 | if (is_add) { |
2801 | err = cgroup_add_file(cgrp, subsys, cft); | 2817 | ret = cgroup_add_file(cgrp, cft); |
2802 | if (err) | 2818 | if (ret) { |
2803 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", | 2819 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
2804 | cft->name, err); | 2820 | cft->name, ret); |
2805 | ret = err; | 2821 | return ret; |
2822 | } | ||
2806 | } else { | 2823 | } else { |
2807 | cgroup_rm_file(cgrp, cft); | 2824 | cgroup_rm_file(cgrp, cft); |
2808 | } | 2825 | } |
2809 | } | 2826 | } |
2810 | return ret; | 2827 | return 0; |
2811 | } | 2828 | } |
2812 | 2829 | ||
2813 | static void cgroup_cfts_prepare(void) | 2830 | static void cgroup_cfts_prepare(void) |
@@ -2816,28 +2833,30 @@ static void cgroup_cfts_prepare(void) | |||
2816 | /* | 2833 | /* |
2817 | * Thanks to the entanglement with vfs inode locking, we can't walk | 2834 | * Thanks to the entanglement with vfs inode locking, we can't walk |
2818 | * the existing cgroups under cgroup_mutex and create files. | 2835 | * the existing cgroups under cgroup_mutex and create files. |
2819 | * Instead, we use cgroup_for_each_descendant_pre() and drop RCU | 2836 | * Instead, we use css_for_each_descendant_pre() and drop RCU read |
2820 | * read lock before calling cgroup_addrm_files(). | 2837 | * lock before calling cgroup_addrm_files(). |
2821 | */ | 2838 | */ |
2822 | mutex_lock(&cgroup_mutex); | 2839 | mutex_lock(&cgroup_mutex); |
2823 | } | 2840 | } |
2824 | 2841 | ||
2825 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | 2842 | static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) |
2826 | struct cftype *cfts, bool is_add) | ||
2827 | __releases(&cgroup_mutex) | 2843 | __releases(&cgroup_mutex) |
2828 | { | 2844 | { |
2829 | LIST_HEAD(pending); | 2845 | LIST_HEAD(pending); |
2830 | struct cgroup *cgrp, *root = &ss->root->top_cgroup; | 2846 | struct cgroup_subsys *ss = cfts[0].ss; |
2847 | struct cgroup *root = &ss->root->top_cgroup; | ||
2831 | struct super_block *sb = ss->root->sb; | 2848 | struct super_block *sb = ss->root->sb; |
2832 | struct dentry *prev = NULL; | 2849 | struct dentry *prev = NULL; |
2833 | struct inode *inode; | 2850 | struct inode *inode; |
2851 | struct cgroup_subsys_state *css; | ||
2834 | u64 update_before; | 2852 | u64 update_before; |
2853 | int ret = 0; | ||
2835 | 2854 | ||
2836 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ | 2855 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ |
2837 | if (!cfts || ss->root == &cgroup_dummy_root || | 2856 | if (!cfts || ss->root == &cgroup_dummy_root || |
2838 | !atomic_inc_not_zero(&sb->s_active)) { | 2857 | !atomic_inc_not_zero(&sb->s_active)) { |
2839 | mutex_unlock(&cgroup_mutex); | 2858 | mutex_unlock(&cgroup_mutex); |
2840 | return; | 2859 | return 0; |
2841 | } | 2860 | } |
2842 | 2861 | ||
2843 | /* | 2862 | /* |
@@ -2849,17 +2868,11 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2849 | 2868 | ||
2850 | mutex_unlock(&cgroup_mutex); | 2869 | mutex_unlock(&cgroup_mutex); |
2851 | 2870 | ||
2852 | /* @root always needs to be updated */ | ||
2853 | inode = root->dentry->d_inode; | ||
2854 | mutex_lock(&inode->i_mutex); | ||
2855 | mutex_lock(&cgroup_mutex); | ||
2856 | cgroup_addrm_files(root, ss, cfts, is_add); | ||
2857 | mutex_unlock(&cgroup_mutex); | ||
2858 | mutex_unlock(&inode->i_mutex); | ||
2859 | |||
2860 | /* add/rm files for all cgroups created before */ | 2871 | /* add/rm files for all cgroups created before */ |
2861 | rcu_read_lock(); | 2872 | rcu_read_lock(); |
2862 | cgroup_for_each_descendant_pre(cgrp, root) { | 2873 | css_for_each_descendant_pre(css, cgroup_css(root, ss)) { |
2874 | struct cgroup *cgrp = css->cgroup; | ||
2875 | |||
2863 | if (cgroup_is_dead(cgrp)) | 2876 | if (cgroup_is_dead(cgrp)) |
2864 | continue; | 2877 | continue; |
2865 | 2878 | ||
@@ -2873,15 +2886,18 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2873 | mutex_lock(&inode->i_mutex); | 2886 | mutex_lock(&inode->i_mutex); |
2874 | mutex_lock(&cgroup_mutex); | 2887 | mutex_lock(&cgroup_mutex); |
2875 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) | 2888 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) |
2876 | cgroup_addrm_files(cgrp, ss, cfts, is_add); | 2889 | ret = cgroup_addrm_files(cgrp, cfts, is_add); |
2877 | mutex_unlock(&cgroup_mutex); | 2890 | mutex_unlock(&cgroup_mutex); |
2878 | mutex_unlock(&inode->i_mutex); | 2891 | mutex_unlock(&inode->i_mutex); |
2879 | 2892 | ||
2880 | rcu_read_lock(); | 2893 | rcu_read_lock(); |
2894 | if (ret) | ||
2895 | break; | ||
2881 | } | 2896 | } |
2882 | rcu_read_unlock(); | 2897 | rcu_read_unlock(); |
2883 | dput(prev); | 2898 | dput(prev); |
2884 | deactivate_super(sb); | 2899 | deactivate_super(sb); |
2900 | return ret; | ||
2885 | } | 2901 | } |
2886 | 2902 | ||
2887 | /** | 2903 | /** |
@@ -2901,49 +2917,56 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2901 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | 2917 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
2902 | { | 2918 | { |
2903 | struct cftype_set *set; | 2919 | struct cftype_set *set; |
2920 | struct cftype *cft; | ||
2921 | int ret; | ||
2904 | 2922 | ||
2905 | set = kzalloc(sizeof(*set), GFP_KERNEL); | 2923 | set = kzalloc(sizeof(*set), GFP_KERNEL); |
2906 | if (!set) | 2924 | if (!set) |
2907 | return -ENOMEM; | 2925 | return -ENOMEM; |
2908 | 2926 | ||
2927 | for (cft = cfts; cft->name[0] != '\0'; cft++) | ||
2928 | cft->ss = ss; | ||
2929 | |||
2909 | cgroup_cfts_prepare(); | 2930 | cgroup_cfts_prepare(); |
2910 | set->cfts = cfts; | 2931 | set->cfts = cfts; |
2911 | list_add_tail(&set->node, &ss->cftsets); | 2932 | list_add_tail(&set->node, &ss->cftsets); |
2912 | cgroup_cfts_commit(ss, cfts, true); | 2933 | ret = cgroup_cfts_commit(cfts, true); |
2913 | 2934 | if (ret) | |
2914 | return 0; | 2935 | cgroup_rm_cftypes(cfts); |
2936 | return ret; | ||
2915 | } | 2937 | } |
2916 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); | 2938 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); |
2917 | 2939 | ||
2918 | /** | 2940 | /** |
2919 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem | 2941 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem |
2920 | * @ss: target cgroup subsystem | ||
2921 | * @cfts: zero-length name terminated array of cftypes | 2942 | * @cfts: zero-length name terminated array of cftypes |
2922 | * | 2943 | * |
2923 | * Unregister @cfts from @ss. Files described by @cfts are removed from | 2944 | * Unregister @cfts. Files described by @cfts are removed from all |
2924 | * all existing cgroups to which @ss is attached and all future cgroups | 2945 | * existing cgroups and all future cgroups won't have them either. This |
2925 | * won't have them either. This function can be called anytime whether @ss | 2946 | * function can be called anytime whether @cfts' subsys is attached or not. |
2926 | * is attached or not. | ||
2927 | * | 2947 | * |
2928 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | 2948 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not |
2929 | * registered with @ss. | 2949 | * registered. |
2930 | */ | 2950 | */ |
2931 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | 2951 | int cgroup_rm_cftypes(struct cftype *cfts) |
2932 | { | 2952 | { |
2933 | struct cftype_set *set; | 2953 | struct cftype_set *set; |
2934 | 2954 | ||
2955 | if (!cfts || !cfts[0].ss) | ||
2956 | return -ENOENT; | ||
2957 | |||
2935 | cgroup_cfts_prepare(); | 2958 | cgroup_cfts_prepare(); |
2936 | 2959 | ||
2937 | list_for_each_entry(set, &ss->cftsets, node) { | 2960 | list_for_each_entry(set, &cfts[0].ss->cftsets, node) { |
2938 | if (set->cfts == cfts) { | 2961 | if (set->cfts == cfts) { |
2939 | list_del(&set->node); | 2962 | list_del(&set->node); |
2940 | kfree(set); | 2963 | kfree(set); |
2941 | cgroup_cfts_commit(ss, cfts, false); | 2964 | cgroup_cfts_commit(cfts, false); |
2942 | return 0; | 2965 | return 0; |
2943 | } | 2966 | } |
2944 | } | 2967 | } |
2945 | 2968 | ||
2946 | cgroup_cfts_commit(ss, NULL, false); | 2969 | cgroup_cfts_commit(NULL, false); |
2947 | return -ENOENT; | 2970 | return -ENOENT; |
2948 | } | 2971 | } |
2949 | 2972 | ||
@@ -2966,34 +2989,10 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
2966 | } | 2989 | } |
2967 | 2990 | ||
2968 | /* | 2991 | /* |
2969 | * Advance a list_head iterator. The iterator should be positioned at | 2992 | * To reduce the fork() overhead for systems that are not actually using |
2970 | * the start of a css_set | 2993 | * their cgroups capability, we don't maintain the lists running through |
2971 | */ | 2994 | * each css_set to its tasks until we see the list actually used - in other |
2972 | static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it) | 2995 | * words after the first call to css_task_iter_start(). |
2973 | { | ||
2974 | struct list_head *l = it->cset_link; | ||
2975 | struct cgrp_cset_link *link; | ||
2976 | struct css_set *cset; | ||
2977 | |||
2978 | /* Advance to the next non-empty css_set */ | ||
2979 | do { | ||
2980 | l = l->next; | ||
2981 | if (l == &cgrp->cset_links) { | ||
2982 | it->cset_link = NULL; | ||
2983 | return; | ||
2984 | } | ||
2985 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
2986 | cset = link->cset; | ||
2987 | } while (list_empty(&cset->tasks)); | ||
2988 | it->cset_link = l; | ||
2989 | it->task = cset->tasks.next; | ||
2990 | } | ||
2991 | |||
2992 | /* | ||
2993 | * To reduce the fork() overhead for systems that are not actually | ||
2994 | * using their cgroups capability, we don't maintain the lists running | ||
2995 | * through each css_set to its tasks until we see the list actually | ||
2996 | * used - in other words after the first call to cgroup_iter_start(). | ||
2997 | */ | 2996 | */ |
2998 | static void cgroup_enable_task_cg_lists(void) | 2997 | static void cgroup_enable_task_cg_lists(void) |
2999 | { | 2998 | { |
@@ -3024,16 +3023,21 @@ static void cgroup_enable_task_cg_lists(void) | |||
3024 | } | 3023 | } |
3025 | 3024 | ||
3026 | /** | 3025 | /** |
3027 | * cgroup_next_sibling - find the next sibling of a given cgroup | 3026 | * css_next_child - find the next child of a given css |
3028 | * @pos: the current cgroup | 3027 | * @pos_css: the current position (%NULL to initiate traversal) |
3028 | * @parent_css: css whose children to walk | ||
3029 | * | 3029 | * |
3030 | * This function returns the next sibling of @pos and should be called | 3030 | * This function returns the next child of @parent_css and should be called |
3031 | * under RCU read lock. The only requirement is that @pos is accessible. | 3031 | * under RCU read lock. The only requirement is that @parent_css and |
3032 | * The next sibling is guaranteed to be returned regardless of @pos's | 3032 | * @pos_css are accessible. The next sibling is guaranteed to be returned |
3033 | * state. | 3033 | * regardless of their states. |
3034 | */ | 3034 | */ |
3035 | struct cgroup *cgroup_next_sibling(struct cgroup *pos) | 3035 | struct cgroup_subsys_state * |
3036 | css_next_child(struct cgroup_subsys_state *pos_css, | ||
3037 | struct cgroup_subsys_state *parent_css) | ||
3036 | { | 3038 | { |
3039 | struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; | ||
3040 | struct cgroup *cgrp = parent_css->cgroup; | ||
3037 | struct cgroup *next; | 3041 | struct cgroup *next; |
3038 | 3042 | ||
3039 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3043 | WARN_ON_ONCE(!rcu_read_lock_held()); |
@@ -3048,78 +3052,81 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos) | |||
3048 | * safe to dereference from this RCU critical section. If | 3052 | * safe to dereference from this RCU critical section. If |
3049 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed | 3053 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed |
3050 | * to be visible as %true here. | 3054 | * to be visible as %true here. |
3055 | * | ||
3056 | * If @pos is dead, its next pointer can't be dereferenced; | ||
3057 | * however, as each cgroup is given a monotonically increasing | ||
3058 | * unique serial number and always appended to the sibling list, | ||
3059 | * the next one can be found by walking the parent's children until | ||
3060 | * we see a cgroup with higher serial number than @pos's. While | ||
3061 | * this path can be slower, it's taken only when either the current | ||
3062 | * cgroup is removed or iteration and removal race. | ||
3051 | */ | 3063 | */ |
3052 | if (likely(!cgroup_is_dead(pos))) { | 3064 | if (!pos) { |
3065 | next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling); | ||
3066 | } else if (likely(!cgroup_is_dead(pos))) { | ||
3053 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); | 3067 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); |
3054 | if (&next->sibling != &pos->parent->children) | 3068 | } else { |
3055 | return next; | 3069 | list_for_each_entry_rcu(next, &cgrp->children, sibling) |
3056 | return NULL; | 3070 | if (next->serial_nr > pos->serial_nr) |
3071 | break; | ||
3057 | } | 3072 | } |
3058 | 3073 | ||
3059 | /* | 3074 | if (&next->sibling == &cgrp->children) |
3060 | * Can't dereference the next pointer. Each cgroup is given a | 3075 | return NULL; |
3061 | * monotonically increasing unique serial number and always | 3076 | |
3062 | * appended to the sibling list, so the next one can be found by | 3077 | return cgroup_css(next, parent_css->ss); |
3063 | * walking the parent's children until we see a cgroup with higher | ||
3064 | * serial number than @pos's. | ||
3065 | * | ||
3066 | * While this path can be slow, it's taken only when either the | ||
3067 | * current cgroup is removed or iteration and removal race. | ||
3068 | */ | ||
3069 | list_for_each_entry_rcu(next, &pos->parent->children, sibling) | ||
3070 | if (next->serial_nr > pos->serial_nr) | ||
3071 | return next; | ||
3072 | return NULL; | ||
3073 | } | 3078 | } |
3074 | EXPORT_SYMBOL_GPL(cgroup_next_sibling); | 3079 | EXPORT_SYMBOL_GPL(css_next_child); |
3075 | 3080 | ||
3076 | /** | 3081 | /** |
3077 | * cgroup_next_descendant_pre - find the next descendant for pre-order walk | 3082 | * css_next_descendant_pre - find the next descendant for pre-order walk |
3078 | * @pos: the current position (%NULL to initiate traversal) | 3083 | * @pos: the current position (%NULL to initiate traversal) |
3079 | * @cgroup: cgroup whose descendants to walk | 3084 | * @root: css whose descendants to walk |
3080 | * | 3085 | * |
3081 | * To be used by cgroup_for_each_descendant_pre(). Find the next | 3086 | * To be used by css_for_each_descendant_pre(). Find the next descendant |
3082 | * descendant to visit for pre-order traversal of @cgroup's descendants. | 3087 | * to visit for pre-order traversal of @root's descendants. @root is |
3088 | * included in the iteration and the first node to be visited. | ||
3083 | * | 3089 | * |
3084 | * While this function requires RCU read locking, it doesn't require the | 3090 | * While this function requires RCU read locking, it doesn't require the |
3085 | * whole traversal to be contained in a single RCU critical section. This | 3091 | * whole traversal to be contained in a single RCU critical section. This |
3086 | * function will return the correct next descendant as long as both @pos | 3092 | * function will return the correct next descendant as long as both @pos |
3087 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3093 | * and @root are accessible and @pos is a descendant of @root. |
3088 | */ | 3094 | */ |
3089 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | 3095 | struct cgroup_subsys_state * |
3090 | struct cgroup *cgroup) | 3096 | css_next_descendant_pre(struct cgroup_subsys_state *pos, |
3097 | struct cgroup_subsys_state *root) | ||
3091 | { | 3098 | { |
3092 | struct cgroup *next; | 3099 | struct cgroup_subsys_state *next; |
3093 | 3100 | ||
3094 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3101 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3095 | 3102 | ||
3096 | /* if first iteration, pretend we just visited @cgroup */ | 3103 | /* if first iteration, visit @root */ |
3097 | if (!pos) | 3104 | if (!pos) |
3098 | pos = cgroup; | 3105 | return root; |
3099 | 3106 | ||
3100 | /* visit the first child if exists */ | 3107 | /* visit the first child if exists */ |
3101 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); | 3108 | next = css_next_child(NULL, pos); |
3102 | if (next) | 3109 | if (next) |
3103 | return next; | 3110 | return next; |
3104 | 3111 | ||
3105 | /* no child, visit my or the closest ancestor's next sibling */ | 3112 | /* no child, visit my or the closest ancestor's next sibling */ |
3106 | while (pos != cgroup) { | 3113 | while (pos != root) { |
3107 | next = cgroup_next_sibling(pos); | 3114 | next = css_next_child(pos, css_parent(pos)); |
3108 | if (next) | 3115 | if (next) |
3109 | return next; | 3116 | return next; |
3110 | pos = pos->parent; | 3117 | pos = css_parent(pos); |
3111 | } | 3118 | } |
3112 | 3119 | ||
3113 | return NULL; | 3120 | return NULL; |
3114 | } | 3121 | } |
3115 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3122 | EXPORT_SYMBOL_GPL(css_next_descendant_pre); |
3116 | 3123 | ||
3117 | /** | 3124 | /** |
3118 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | 3125 | * css_rightmost_descendant - return the rightmost descendant of a css |
3119 | * @pos: cgroup of interest | 3126 | * @pos: css of interest |
3120 | * | 3127 | * |
3121 | * Return the rightmost descendant of @pos. If there's no descendant, | 3128 | * Return the rightmost descendant of @pos. If there's no descendant, @pos |
3122 | * @pos is returned. This can be used during pre-order traversal to skip | 3129 | * is returned. This can be used during pre-order traversal to skip |
3123 | * subtree of @pos. | 3130 | * subtree of @pos. |
3124 | * | 3131 | * |
3125 | * While this function requires RCU read locking, it doesn't require the | 3132 | * While this function requires RCU read locking, it doesn't require the |
@@ -3127,9 +3134,10 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | |||
3127 | * function will return the correct rightmost descendant as long as @pos is | 3134 | * function will return the correct rightmost descendant as long as @pos is |
3128 | * accessible. | 3135 | * accessible. |
3129 | */ | 3136 | */ |
3130 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | 3137 | struct cgroup_subsys_state * |
3138 | css_rightmost_descendant(struct cgroup_subsys_state *pos) | ||
3131 | { | 3139 | { |
3132 | struct cgroup *last, *tmp; | 3140 | struct cgroup_subsys_state *last, *tmp; |
3133 | 3141 | ||
3134 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3142 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3135 | 3143 | ||
@@ -3137,82 +3145,138 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | |||
3137 | last = pos; | 3145 | last = pos; |
3138 | /* ->prev isn't RCU safe, walk ->next till the end */ | 3146 | /* ->prev isn't RCU safe, walk ->next till the end */ |
3139 | pos = NULL; | 3147 | pos = NULL; |
3140 | list_for_each_entry_rcu(tmp, &last->children, sibling) | 3148 | css_for_each_child(tmp, last) |
3141 | pos = tmp; | 3149 | pos = tmp; |
3142 | } while (pos); | 3150 | } while (pos); |
3143 | 3151 | ||
3144 | return last; | 3152 | return last; |
3145 | } | 3153 | } |
3146 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | 3154 | EXPORT_SYMBOL_GPL(css_rightmost_descendant); |
3147 | 3155 | ||
3148 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3156 | static struct cgroup_subsys_state * |
3157 | css_leftmost_descendant(struct cgroup_subsys_state *pos) | ||
3149 | { | 3158 | { |
3150 | struct cgroup *last; | 3159 | struct cgroup_subsys_state *last; |
3151 | 3160 | ||
3152 | do { | 3161 | do { |
3153 | last = pos; | 3162 | last = pos; |
3154 | pos = list_first_or_null_rcu(&pos->children, struct cgroup, | 3163 | pos = css_next_child(NULL, pos); |
3155 | sibling); | ||
3156 | } while (pos); | 3164 | } while (pos); |
3157 | 3165 | ||
3158 | return last; | 3166 | return last; |
3159 | } | 3167 | } |
3160 | 3168 | ||
3161 | /** | 3169 | /** |
3162 | * cgroup_next_descendant_post - find the next descendant for post-order walk | 3170 | * css_next_descendant_post - find the next descendant for post-order walk |
3163 | * @pos: the current position (%NULL to initiate traversal) | 3171 | * @pos: the current position (%NULL to initiate traversal) |
3164 | * @cgroup: cgroup whose descendants to walk | 3172 | * @root: css whose descendants to walk |
3165 | * | 3173 | * |
3166 | * To be used by cgroup_for_each_descendant_post(). Find the next | 3174 | * To be used by css_for_each_descendant_post(). Find the next descendant |
3167 | * descendant to visit for post-order traversal of @cgroup's descendants. | 3175 | * to visit for post-order traversal of @root's descendants. @root is |
3176 | * included in the iteration and the last node to be visited. | ||
3168 | * | 3177 | * |
3169 | * While this function requires RCU read locking, it doesn't require the | 3178 | * While this function requires RCU read locking, it doesn't require the |
3170 | * whole traversal to be contained in a single RCU critical section. This | 3179 | * whole traversal to be contained in a single RCU critical section. This |
3171 | * function will return the correct next descendant as long as both @pos | 3180 | * function will return the correct next descendant as long as both @pos |
3172 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3181 | * and @cgroup are accessible and @pos is a descendant of @cgroup. |
3173 | */ | 3182 | */ |
3174 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | 3183 | struct cgroup_subsys_state * |
3175 | struct cgroup *cgroup) | 3184 | css_next_descendant_post(struct cgroup_subsys_state *pos, |
3185 | struct cgroup_subsys_state *root) | ||
3176 | { | 3186 | { |
3177 | struct cgroup *next; | 3187 | struct cgroup_subsys_state *next; |
3178 | 3188 | ||
3179 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3189 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3180 | 3190 | ||
3181 | /* if first iteration, visit the leftmost descendant */ | 3191 | /* if first iteration, visit the leftmost descendant */ |
3182 | if (!pos) { | 3192 | if (!pos) { |
3183 | next = cgroup_leftmost_descendant(cgroup); | 3193 | next = css_leftmost_descendant(root); |
3184 | return next != cgroup ? next : NULL; | 3194 | return next != root ? next : NULL; |
3185 | } | 3195 | } |
3186 | 3196 | ||
3197 | /* if we visited @root, we're done */ | ||
3198 | if (pos == root) | ||
3199 | return NULL; | ||
3200 | |||
3187 | /* if there's an unvisited sibling, visit its leftmost descendant */ | 3201 | /* if there's an unvisited sibling, visit its leftmost descendant */ |
3188 | next = cgroup_next_sibling(pos); | 3202 | next = css_next_child(pos, css_parent(pos)); |
3189 | if (next) | 3203 | if (next) |
3190 | return cgroup_leftmost_descendant(next); | 3204 | return css_leftmost_descendant(next); |
3191 | 3205 | ||
3192 | /* no sibling left, visit parent */ | 3206 | /* no sibling left, visit parent */ |
3193 | next = pos->parent; | 3207 | return css_parent(pos); |
3194 | return next != cgroup ? next : NULL; | ||
3195 | } | 3208 | } |
3196 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_post); | 3209 | EXPORT_SYMBOL_GPL(css_next_descendant_post); |
3197 | 3210 | ||
3198 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | 3211 | /** |
3212 | * css_advance_task_iter - advance a task itererator to the next css_set | ||
3213 | * @it: the iterator to advance | ||
3214 | * | ||
3215 | * Advance @it to the next css_set to walk. | ||
3216 | */ | ||
3217 | static void css_advance_task_iter(struct css_task_iter *it) | ||
3218 | { | ||
3219 | struct list_head *l = it->cset_link; | ||
3220 | struct cgrp_cset_link *link; | ||
3221 | struct css_set *cset; | ||
3222 | |||
3223 | /* Advance to the next non-empty css_set */ | ||
3224 | do { | ||
3225 | l = l->next; | ||
3226 | if (l == &it->origin_css->cgroup->cset_links) { | ||
3227 | it->cset_link = NULL; | ||
3228 | return; | ||
3229 | } | ||
3230 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
3231 | cset = link->cset; | ||
3232 | } while (list_empty(&cset->tasks)); | ||
3233 | it->cset_link = l; | ||
3234 | it->task = cset->tasks.next; | ||
3235 | } | ||
3236 | |||
3237 | /** | ||
3238 | * css_task_iter_start - initiate task iteration | ||
3239 | * @css: the css to walk tasks of | ||
3240 | * @it: the task iterator to use | ||
3241 | * | ||
3242 | * Initiate iteration through the tasks of @css. The caller can call | ||
3243 | * css_task_iter_next() to walk through the tasks until the function | ||
3244 | * returns NULL. On completion of iteration, css_task_iter_end() must be | ||
3245 | * called. | ||
3246 | * | ||
3247 | * Note that this function acquires a lock which is released when the | ||
3248 | * iteration finishes. The caller can't sleep while iteration is in | ||
3249 | * progress. | ||
3250 | */ | ||
3251 | void css_task_iter_start(struct cgroup_subsys_state *css, | ||
3252 | struct css_task_iter *it) | ||
3199 | __acquires(css_set_lock) | 3253 | __acquires(css_set_lock) |
3200 | { | 3254 | { |
3201 | /* | 3255 | /* |
3202 | * The first time anyone tries to iterate across a cgroup, | 3256 | * The first time anyone tries to iterate across a css, we need to |
3203 | * we need to enable the list linking each css_set to its | 3257 | * enable the list linking each css_set to its tasks, and fix up |
3204 | * tasks, and fix up all existing tasks. | 3258 | * all existing tasks. |
3205 | */ | 3259 | */ |
3206 | if (!use_task_css_set_links) | 3260 | if (!use_task_css_set_links) |
3207 | cgroup_enable_task_cg_lists(); | 3261 | cgroup_enable_task_cg_lists(); |
3208 | 3262 | ||
3209 | read_lock(&css_set_lock); | 3263 | read_lock(&css_set_lock); |
3210 | it->cset_link = &cgrp->cset_links; | 3264 | |
3211 | cgroup_advance_iter(cgrp, it); | 3265 | it->origin_css = css; |
3266 | it->cset_link = &css->cgroup->cset_links; | ||
3267 | |||
3268 | css_advance_task_iter(it); | ||
3212 | } | 3269 | } |
3213 | 3270 | ||
3214 | struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | 3271 | /** |
3215 | struct cgroup_iter *it) | 3272 | * css_task_iter_next - return the next task for the iterator |
3273 | * @it: the task iterator being iterated | ||
3274 | * | ||
3275 | * The "next" function for task iteration. @it should have been | ||
3276 | * initialized via css_task_iter_start(). Returns NULL when the iteration | ||
3277 | * reaches the end. | ||
3278 | */ | ||
3279 | struct task_struct *css_task_iter_next(struct css_task_iter *it) | ||
3216 | { | 3280 | { |
3217 | struct task_struct *res; | 3281 | struct task_struct *res; |
3218 | struct list_head *l = it->task; | 3282 | struct list_head *l = it->task; |
@@ -3226,16 +3290,24 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
3226 | l = l->next; | 3290 | l = l->next; |
3227 | link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); | 3291 | link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); |
3228 | if (l == &link->cset->tasks) { | 3292 | if (l == &link->cset->tasks) { |
3229 | /* We reached the end of this task list - move on to | 3293 | /* |
3230 | * the next cg_cgroup_link */ | 3294 | * We reached the end of this task list - move on to the |
3231 | cgroup_advance_iter(cgrp, it); | 3295 | * next cgrp_cset_link. |
3296 | */ | ||
3297 | css_advance_task_iter(it); | ||
3232 | } else { | 3298 | } else { |
3233 | it->task = l; | 3299 | it->task = l; |
3234 | } | 3300 | } |
3235 | return res; | 3301 | return res; |
3236 | } | 3302 | } |
3237 | 3303 | ||
3238 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) | 3304 | /** |
3305 | * css_task_iter_end - finish task iteration | ||
3306 | * @it: the task iterator to finish | ||
3307 | * | ||
3308 | * Finish task iteration started by css_task_iter_start(). | ||
3309 | */ | ||
3310 | void css_task_iter_end(struct css_task_iter *it) | ||
3239 | __releases(css_set_lock) | 3311 | __releases(css_set_lock) |
3240 | { | 3312 | { |
3241 | read_unlock(&css_set_lock); | 3313 | read_unlock(&css_set_lock); |
@@ -3276,46 +3348,49 @@ static inline int started_after(void *p1, void *p2) | |||
3276 | } | 3348 | } |
3277 | 3349 | ||
3278 | /** | 3350 | /** |
3279 | * cgroup_scan_tasks - iterate though all the tasks in a cgroup | 3351 | * css_scan_tasks - iterate though all the tasks in a css |
3280 | * @scan: struct cgroup_scanner containing arguments for the scan | 3352 | * @css: the css to iterate tasks of |
3353 | * @test: optional test callback | ||
3354 | * @process: process callback | ||
3355 | * @data: data passed to @test and @process | ||
3356 | * @heap: optional pre-allocated heap used for task iteration | ||
3357 | * | ||
3358 | * Iterate through all the tasks in @css, calling @test for each, and if it | ||
3359 | * returns %true, call @process for it also. | ||
3360 | * | ||
3361 | * @test may be NULL, meaning always true (select all tasks), which | ||
3362 | * effectively duplicates css_task_iter_{start,next,end}() but does not | ||
3363 | * lock css_set_lock for the call to @process. | ||
3364 | * | ||
3365 | * It is guaranteed that @process will act on every task that is a member | ||
3366 | * of @css for the duration of this call. This function may or may not | ||
3367 | * call @process for tasks that exit or move to a different css during the | ||
3368 | * call, or are forked or move into the css during the call. | ||
3281 | * | 3369 | * |
3282 | * Arguments include pointers to callback functions test_task() and | 3370 | * Note that @test may be called with locks held, and may in some |
3283 | * process_task(). | 3371 | * situations be called multiple times for the same task, so it should be |
3284 | * Iterate through all the tasks in a cgroup, calling test_task() for each, | 3372 | * cheap. |
3285 | * and if it returns true, call process_task() for it also. | ||
3286 | * The test_task pointer may be NULL, meaning always true (select all tasks). | ||
3287 | * Effectively duplicates cgroup_iter_{start,next,end}() | ||
3288 | * but does not lock css_set_lock for the call to process_task(). | ||
3289 | * The struct cgroup_scanner may be embedded in any structure of the caller's | ||
3290 | * creation. | ||
3291 | * It is guaranteed that process_task() will act on every task that | ||
3292 | * is a member of the cgroup for the duration of this call. This | ||
3293 | * function may or may not call process_task() for tasks that exit | ||
3294 | * or move to a different cgroup during the call, or are forked or | ||
3295 | * move into the cgroup during the call. | ||
3296 | * | 3373 | * |
3297 | * Note that test_task() may be called with locks held, and may in some | 3374 | * If @heap is non-NULL, a heap has been pre-allocated and will be used for |
3298 | * situations be called multiple times for the same task, so it should | 3375 | * heap operations (and its "gt" member will be overwritten), else a |
3299 | * be cheap. | 3376 | * temporary heap will be used (allocation of which may cause this function |
3300 | * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been | 3377 | * to fail). |
3301 | * pre-allocated and will be used for heap operations (and its "gt" member will | ||
3302 | * be overwritten), else a temporary heap will be used (allocation of which | ||
3303 | * may cause this function to fail). | ||
3304 | */ | 3378 | */ |
3305 | int cgroup_scan_tasks(struct cgroup_scanner *scan) | 3379 | int css_scan_tasks(struct cgroup_subsys_state *css, |
3380 | bool (*test)(struct task_struct *, void *), | ||
3381 | void (*process)(struct task_struct *, void *), | ||
3382 | void *data, struct ptr_heap *heap) | ||
3306 | { | 3383 | { |
3307 | int retval, i; | 3384 | int retval, i; |
3308 | struct cgroup_iter it; | 3385 | struct css_task_iter it; |
3309 | struct task_struct *p, *dropped; | 3386 | struct task_struct *p, *dropped; |
3310 | /* Never dereference latest_task, since it's not refcounted */ | 3387 | /* Never dereference latest_task, since it's not refcounted */ |
3311 | struct task_struct *latest_task = NULL; | 3388 | struct task_struct *latest_task = NULL; |
3312 | struct ptr_heap tmp_heap; | 3389 | struct ptr_heap tmp_heap; |
3313 | struct ptr_heap *heap; | ||
3314 | struct timespec latest_time = { 0, 0 }; | 3390 | struct timespec latest_time = { 0, 0 }; |
3315 | 3391 | ||
3316 | if (scan->heap) { | 3392 | if (heap) { |
3317 | /* The caller supplied our heap and pre-allocated its memory */ | 3393 | /* The caller supplied our heap and pre-allocated its memory */ |
3318 | heap = scan->heap; | ||
3319 | heap->gt = &started_after; | 3394 | heap->gt = &started_after; |
3320 | } else { | 3395 | } else { |
3321 | /* We need to allocate our own heap memory */ | 3396 | /* We need to allocate our own heap memory */ |
@@ -3328,25 +3403,24 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3328 | 3403 | ||
3329 | again: | 3404 | again: |
3330 | /* | 3405 | /* |
3331 | * Scan tasks in the cgroup, using the scanner's "test_task" callback | 3406 | * Scan tasks in the css, using the @test callback to determine |
3332 | * to determine which are of interest, and using the scanner's | 3407 | * which are of interest, and invoking @process callback on the |
3333 | * "process_task" callback to process any of them that need an update. | 3408 | * ones which need an update. Since we don't want to hold any |
3334 | * Since we don't want to hold any locks during the task updates, | 3409 | * locks during the task updates, gather tasks to be processed in a |
3335 | * gather tasks to be processed in a heap structure. | 3410 | * heap structure. The heap is sorted by descending task start |
3336 | * The heap is sorted by descending task start time. | 3411 | * time. If the statically-sized heap fills up, we overflow tasks |
3337 | * If the statically-sized heap fills up, we overflow tasks that | 3412 | * that started later, and in future iterations only consider tasks |
3338 | * started later, and in future iterations only consider tasks that | 3413 | * that started after the latest task in the previous pass. This |
3339 | * started after the latest task in the previous pass. This | ||
3340 | * guarantees forward progress and that we don't miss any tasks. | 3414 | * guarantees forward progress and that we don't miss any tasks. |
3341 | */ | 3415 | */ |
3342 | heap->size = 0; | 3416 | heap->size = 0; |
3343 | cgroup_iter_start(scan->cg, &it); | 3417 | css_task_iter_start(css, &it); |
3344 | while ((p = cgroup_iter_next(scan->cg, &it))) { | 3418 | while ((p = css_task_iter_next(&it))) { |
3345 | /* | 3419 | /* |
3346 | * Only affect tasks that qualify per the caller's callback, | 3420 | * Only affect tasks that qualify per the caller's callback, |
3347 | * if he provided one | 3421 | * if he provided one |
3348 | */ | 3422 | */ |
3349 | if (scan->test_task && !scan->test_task(p, scan)) | 3423 | if (test && !test(p, data)) |
3350 | continue; | 3424 | continue; |
3351 | /* | 3425 | /* |
3352 | * Only process tasks that started after the last task | 3426 | * Only process tasks that started after the last task |
@@ -3374,7 +3448,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3374 | * the heap and wasn't inserted | 3448 | * the heap and wasn't inserted |
3375 | */ | 3449 | */ |
3376 | } | 3450 | } |
3377 | cgroup_iter_end(scan->cg, &it); | 3451 | css_task_iter_end(&it); |
3378 | 3452 | ||
3379 | if (heap->size) { | 3453 | if (heap->size) { |
3380 | for (i = 0; i < heap->size; i++) { | 3454 | for (i = 0; i < heap->size; i++) { |
@@ -3384,7 +3458,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3384 | latest_task = q; | 3458 | latest_task = q; |
3385 | } | 3459 | } |
3386 | /* Process the task per the caller's callback */ | 3460 | /* Process the task per the caller's callback */ |
3387 | scan->process_task(q, scan); | 3461 | process(q, data); |
3388 | put_task_struct(q); | 3462 | put_task_struct(q); |
3389 | } | 3463 | } |
3390 | /* | 3464 | /* |
@@ -3401,10 +3475,9 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3401 | return 0; | 3475 | return 0; |
3402 | } | 3476 | } |
3403 | 3477 | ||
3404 | static void cgroup_transfer_one_task(struct task_struct *task, | 3478 | static void cgroup_transfer_one_task(struct task_struct *task, void *data) |
3405 | struct cgroup_scanner *scan) | ||
3406 | { | 3479 | { |
3407 | struct cgroup *new_cgroup = scan->data; | 3480 | struct cgroup *new_cgroup = data; |
3408 | 3481 | ||
3409 | mutex_lock(&cgroup_mutex); | 3482 | mutex_lock(&cgroup_mutex); |
3410 | cgroup_attach_task(new_cgroup, task, false); | 3483 | cgroup_attach_task(new_cgroup, task, false); |
@@ -3418,15 +3491,8 @@ static void cgroup_transfer_one_task(struct task_struct *task, | |||
3418 | */ | 3491 | */ |
3419 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | 3492 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) |
3420 | { | 3493 | { |
3421 | struct cgroup_scanner scan; | 3494 | return css_scan_tasks(&from->dummy_css, NULL, cgroup_transfer_one_task, |
3422 | 3495 | to, NULL); | |
3423 | scan.cg = from; | ||
3424 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
3425 | scan.process_task = cgroup_transfer_one_task; | ||
3426 | scan.heap = NULL; | ||
3427 | scan.data = to; | ||
3428 | |||
3429 | return cgroup_scan_tasks(&scan); | ||
3430 | } | 3496 | } |
3431 | 3497 | ||
3432 | /* | 3498 | /* |
@@ -3468,7 +3534,7 @@ struct cgroup_pidlist { | |||
3468 | /* pointer to the cgroup we belong to, for list removal purposes */ | 3534 | /* pointer to the cgroup we belong to, for list removal purposes */ |
3469 | struct cgroup *owner; | 3535 | struct cgroup *owner; |
3470 | /* protects the other fields */ | 3536 | /* protects the other fields */ |
3471 | struct rw_semaphore mutex; | 3537 | struct rw_semaphore rwsem; |
3472 | }; | 3538 | }; |
3473 | 3539 | ||
3474 | /* | 3540 | /* |
@@ -3541,7 +3607,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3541 | struct pid_namespace *ns = task_active_pid_ns(current); | 3607 | struct pid_namespace *ns = task_active_pid_ns(current); |
3542 | 3608 | ||
3543 | /* | 3609 | /* |
3544 | * We can't drop the pidlist_mutex before taking the l->mutex in case | 3610 | * We can't drop the pidlist_mutex before taking the l->rwsem in case |
3545 | * the last ref-holder is trying to remove l from the list at the same | 3611 | * the last ref-holder is trying to remove l from the list at the same |
3546 | * time. Holding the pidlist_mutex precludes somebody taking whichever | 3612 | * time. Holding the pidlist_mutex precludes somebody taking whichever |
3547 | * list we find out from under us - compare release_pid_array(). | 3613 | * list we find out from under us - compare release_pid_array(). |
@@ -3550,7 +3616,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3550 | list_for_each_entry(l, &cgrp->pidlists, links) { | 3616 | list_for_each_entry(l, &cgrp->pidlists, links) { |
3551 | if (l->key.type == type && l->key.ns == ns) { | 3617 | if (l->key.type == type && l->key.ns == ns) { |
3552 | /* make sure l doesn't vanish out from under us */ | 3618 | /* make sure l doesn't vanish out from under us */ |
3553 | down_write(&l->mutex); | 3619 | down_write(&l->rwsem); |
3554 | mutex_unlock(&cgrp->pidlist_mutex); | 3620 | mutex_unlock(&cgrp->pidlist_mutex); |
3555 | return l; | 3621 | return l; |
3556 | } | 3622 | } |
@@ -3561,8 +3627,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3561 | mutex_unlock(&cgrp->pidlist_mutex); | 3627 | mutex_unlock(&cgrp->pidlist_mutex); |
3562 | return l; | 3628 | return l; |
3563 | } | 3629 | } |
3564 | init_rwsem(&l->mutex); | 3630 | init_rwsem(&l->rwsem); |
3565 | down_write(&l->mutex); | 3631 | down_write(&l->rwsem); |
3566 | l->key.type = type; | 3632 | l->key.type = type; |
3567 | l->key.ns = get_pid_ns(ns); | 3633 | l->key.ns = get_pid_ns(ns); |
3568 | l->owner = cgrp; | 3634 | l->owner = cgrp; |
@@ -3580,7 +3646,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3580 | pid_t *array; | 3646 | pid_t *array; |
3581 | int length; | 3647 | int length; |
3582 | int pid, n = 0; /* used for populating the array */ | 3648 | int pid, n = 0; /* used for populating the array */ |
3583 | struct cgroup_iter it; | 3649 | struct css_task_iter it; |
3584 | struct task_struct *tsk; | 3650 | struct task_struct *tsk; |
3585 | struct cgroup_pidlist *l; | 3651 | struct cgroup_pidlist *l; |
3586 | 3652 | ||
@@ -3595,8 +3661,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3595 | if (!array) | 3661 | if (!array) |
3596 | return -ENOMEM; | 3662 | return -ENOMEM; |
3597 | /* now, populate the array */ | 3663 | /* now, populate the array */ |
3598 | cgroup_iter_start(cgrp, &it); | 3664 | css_task_iter_start(&cgrp->dummy_css, &it); |
3599 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 3665 | while ((tsk = css_task_iter_next(&it))) { |
3600 | if (unlikely(n == length)) | 3666 | if (unlikely(n == length)) |
3601 | break; | 3667 | break; |
3602 | /* get tgid or pid for procs or tasks file respectively */ | 3668 | /* get tgid or pid for procs or tasks file respectively */ |
@@ -3607,7 +3673,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3607 | if (pid > 0) /* make sure to only use valid results */ | 3673 | if (pid > 0) /* make sure to only use valid results */ |
3608 | array[n++] = pid; | 3674 | array[n++] = pid; |
3609 | } | 3675 | } |
3610 | cgroup_iter_end(cgrp, &it); | 3676 | css_task_iter_end(&it); |
3611 | length = n; | 3677 | length = n; |
3612 | /* now sort & (if procs) strip out duplicates */ | 3678 | /* now sort & (if procs) strip out duplicates */ |
3613 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3679 | sort(array, length, sizeof(pid_t), cmppid, NULL); |
@@ -3623,7 +3689,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3623 | l->list = array; | 3689 | l->list = array; |
3624 | l->length = length; | 3690 | l->length = length; |
3625 | l->use_count++; | 3691 | l->use_count++; |
3626 | up_write(&l->mutex); | 3692 | up_write(&l->rwsem); |
3627 | *lp = l; | 3693 | *lp = l; |
3628 | return 0; | 3694 | return 0; |
3629 | } | 3695 | } |
@@ -3641,7 +3707,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3641 | { | 3707 | { |
3642 | int ret = -EINVAL; | 3708 | int ret = -EINVAL; |
3643 | struct cgroup *cgrp; | 3709 | struct cgroup *cgrp; |
3644 | struct cgroup_iter it; | 3710 | struct css_task_iter it; |
3645 | struct task_struct *tsk; | 3711 | struct task_struct *tsk; |
3646 | 3712 | ||
3647 | /* | 3713 | /* |
@@ -3655,8 +3721,8 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3655 | ret = 0; | 3721 | ret = 0; |
3656 | cgrp = dentry->d_fsdata; | 3722 | cgrp = dentry->d_fsdata; |
3657 | 3723 | ||
3658 | cgroup_iter_start(cgrp, &it); | 3724 | css_task_iter_start(&cgrp->dummy_css, &it); |
3659 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 3725 | while ((tsk = css_task_iter_next(&it))) { |
3660 | switch (tsk->state) { | 3726 | switch (tsk->state) { |
3661 | case TASK_RUNNING: | 3727 | case TASK_RUNNING: |
3662 | stats->nr_running++; | 3728 | stats->nr_running++; |
@@ -3676,7 +3742,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3676 | break; | 3742 | break; |
3677 | } | 3743 | } |
3678 | } | 3744 | } |
3679 | cgroup_iter_end(cgrp, &it); | 3745 | css_task_iter_end(&it); |
3680 | 3746 | ||
3681 | err: | 3747 | err: |
3682 | return ret; | 3748 | return ret; |
@@ -3701,7 +3767,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3701 | int index = 0, pid = *pos; | 3767 | int index = 0, pid = *pos; |
3702 | int *iter; | 3768 | int *iter; |
3703 | 3769 | ||
3704 | down_read(&l->mutex); | 3770 | down_read(&l->rwsem); |
3705 | if (pid) { | 3771 | if (pid) { |
3706 | int end = l->length; | 3772 | int end = l->length; |
3707 | 3773 | ||
@@ -3728,7 +3794,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3728 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) | 3794 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
3729 | { | 3795 | { |
3730 | struct cgroup_pidlist *l = s->private; | 3796 | struct cgroup_pidlist *l = s->private; |
3731 | up_read(&l->mutex); | 3797 | up_read(&l->rwsem); |
3732 | } | 3798 | } |
3733 | 3799 | ||
3734 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | 3800 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
@@ -3774,7 +3840,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) | |||
3774 | * pidlist_mutex, we have to take pidlist_mutex first. | 3840 | * pidlist_mutex, we have to take pidlist_mutex first. |
3775 | */ | 3841 | */ |
3776 | mutex_lock(&l->owner->pidlist_mutex); | 3842 | mutex_lock(&l->owner->pidlist_mutex); |
3777 | down_write(&l->mutex); | 3843 | down_write(&l->rwsem); |
3778 | BUG_ON(!l->use_count); | 3844 | BUG_ON(!l->use_count); |
3779 | if (!--l->use_count) { | 3845 | if (!--l->use_count) { |
3780 | /* we're the last user if refcount is 0; remove and free */ | 3846 | /* we're the last user if refcount is 0; remove and free */ |
@@ -3782,12 +3848,12 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) | |||
3782 | mutex_unlock(&l->owner->pidlist_mutex); | 3848 | mutex_unlock(&l->owner->pidlist_mutex); |
3783 | pidlist_free(l->list); | 3849 | pidlist_free(l->list); |
3784 | put_pid_ns(l->key.ns); | 3850 | put_pid_ns(l->key.ns); |
3785 | up_write(&l->mutex); | 3851 | up_write(&l->rwsem); |
3786 | kfree(l); | 3852 | kfree(l); |
3787 | return; | 3853 | return; |
3788 | } | 3854 | } |
3789 | mutex_unlock(&l->owner->pidlist_mutex); | 3855 | mutex_unlock(&l->owner->pidlist_mutex); |
3790 | up_write(&l->mutex); | 3856 | up_write(&l->rwsem); |
3791 | } | 3857 | } |
3792 | 3858 | ||
3793 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) | 3859 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) |
@@ -3851,21 +3917,20 @@ static int cgroup_procs_open(struct inode *unused, struct file *file) | |||
3851 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | 3917 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); |
3852 | } | 3918 | } |
3853 | 3919 | ||
3854 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | 3920 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, |
3855 | struct cftype *cft) | 3921 | struct cftype *cft) |
3856 | { | 3922 | { |
3857 | return notify_on_release(cgrp); | 3923 | return notify_on_release(css->cgroup); |
3858 | } | 3924 | } |
3859 | 3925 | ||
3860 | static int cgroup_write_notify_on_release(struct cgroup *cgrp, | 3926 | static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css, |
3861 | struct cftype *cft, | 3927 | struct cftype *cft, u64 val) |
3862 | u64 val) | ||
3863 | { | 3928 | { |
3864 | clear_bit(CGRP_RELEASABLE, &cgrp->flags); | 3929 | clear_bit(CGRP_RELEASABLE, &css->cgroup->flags); |
3865 | if (val) | 3930 | if (val) |
3866 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3931 | set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); |
3867 | else | 3932 | else |
3868 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3933 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); |
3869 | return 0; | 3934 | return 0; |
3870 | } | 3935 | } |
3871 | 3936 | ||
@@ -3895,18 +3960,18 @@ static void cgroup_event_remove(struct work_struct *work) | |||
3895 | { | 3960 | { |
3896 | struct cgroup_event *event = container_of(work, struct cgroup_event, | 3961 | struct cgroup_event *event = container_of(work, struct cgroup_event, |
3897 | remove); | 3962 | remove); |
3898 | struct cgroup *cgrp = event->cgrp; | 3963 | struct cgroup_subsys_state *css = event->css; |
3899 | 3964 | ||
3900 | remove_wait_queue(event->wqh, &event->wait); | 3965 | remove_wait_queue(event->wqh, &event->wait); |
3901 | 3966 | ||
3902 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3967 | event->cft->unregister_event(css, event->cft, event->eventfd); |
3903 | 3968 | ||
3904 | /* Notify userspace the event is going away. */ | 3969 | /* Notify userspace the event is going away. */ |
3905 | eventfd_signal(event->eventfd, 1); | 3970 | eventfd_signal(event->eventfd, 1); |
3906 | 3971 | ||
3907 | eventfd_ctx_put(event->eventfd); | 3972 | eventfd_ctx_put(event->eventfd); |
3908 | kfree(event); | 3973 | kfree(event); |
3909 | cgroup_dput(cgrp); | 3974 | css_put(css); |
3910 | } | 3975 | } |
3911 | 3976 | ||
3912 | /* | 3977 | /* |
@@ -3919,7 +3984,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
3919 | { | 3984 | { |
3920 | struct cgroup_event *event = container_of(wait, | 3985 | struct cgroup_event *event = container_of(wait, |
3921 | struct cgroup_event, wait); | 3986 | struct cgroup_event, wait); |
3922 | struct cgroup *cgrp = event->cgrp; | 3987 | struct cgroup *cgrp = event->css->cgroup; |
3923 | unsigned long flags = (unsigned long)key; | 3988 | unsigned long flags = (unsigned long)key; |
3924 | 3989 | ||
3925 | if (flags & POLLHUP) { | 3990 | if (flags & POLLHUP) { |
@@ -3963,14 +4028,15 @@ static void cgroup_event_ptable_queue_proc(struct file *file, | |||
3963 | * Input must be in format '<event_fd> <control_fd> <args>'. | 4028 | * Input must be in format '<event_fd> <control_fd> <args>'. |
3964 | * Interpretation of args is defined by control file implementation. | 4029 | * Interpretation of args is defined by control file implementation. |
3965 | */ | 4030 | */ |
3966 | static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | 4031 | static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, |
3967 | const char *buffer) | 4032 | struct cftype *cft, const char *buffer) |
3968 | { | 4033 | { |
3969 | struct cgroup_event *event = NULL; | 4034 | struct cgroup *cgrp = dummy_css->cgroup; |
3970 | struct cgroup *cgrp_cfile; | 4035 | struct cgroup_event *event; |
4036 | struct cgroup_subsys_state *cfile_css; | ||
3971 | unsigned int efd, cfd; | 4037 | unsigned int efd, cfd; |
3972 | struct file *efile = NULL; | 4038 | struct fd efile; |
3973 | struct file *cfile = NULL; | 4039 | struct fd cfile; |
3974 | char *endp; | 4040 | char *endp; |
3975 | int ret; | 4041 | int ret; |
3976 | 4042 | ||
@@ -3987,109 +4053,113 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3987 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 4053 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
3988 | if (!event) | 4054 | if (!event) |
3989 | return -ENOMEM; | 4055 | return -ENOMEM; |
3990 | event->cgrp = cgrp; | 4056 | |
3991 | INIT_LIST_HEAD(&event->list); | 4057 | INIT_LIST_HEAD(&event->list); |
3992 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); | 4058 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); |
3993 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); | 4059 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); |
3994 | INIT_WORK(&event->remove, cgroup_event_remove); | 4060 | INIT_WORK(&event->remove, cgroup_event_remove); |
3995 | 4061 | ||
3996 | efile = eventfd_fget(efd); | 4062 | efile = fdget(efd); |
3997 | if (IS_ERR(efile)) { | 4063 | if (!efile.file) { |
3998 | ret = PTR_ERR(efile); | 4064 | ret = -EBADF; |
3999 | goto fail; | 4065 | goto out_kfree; |
4000 | } | 4066 | } |
4001 | 4067 | ||
4002 | event->eventfd = eventfd_ctx_fileget(efile); | 4068 | event->eventfd = eventfd_ctx_fileget(efile.file); |
4003 | if (IS_ERR(event->eventfd)) { | 4069 | if (IS_ERR(event->eventfd)) { |
4004 | ret = PTR_ERR(event->eventfd); | 4070 | ret = PTR_ERR(event->eventfd); |
4005 | goto fail; | 4071 | goto out_put_efile; |
4006 | } | 4072 | } |
4007 | 4073 | ||
4008 | cfile = fget(cfd); | 4074 | cfile = fdget(cfd); |
4009 | if (!cfile) { | 4075 | if (!cfile.file) { |
4010 | ret = -EBADF; | 4076 | ret = -EBADF; |
4011 | goto fail; | 4077 | goto out_put_eventfd; |
4012 | } | 4078 | } |
4013 | 4079 | ||
4014 | /* the process need read permission on control file */ | 4080 | /* the process need read permission on control file */ |
4015 | /* AV: shouldn't we check that it's been opened for read instead? */ | 4081 | /* AV: shouldn't we check that it's been opened for read instead? */ |
4016 | ret = inode_permission(file_inode(cfile), MAY_READ); | 4082 | ret = inode_permission(file_inode(cfile.file), MAY_READ); |
4017 | if (ret < 0) | 4083 | if (ret < 0) |
4018 | goto fail; | 4084 | goto out_put_cfile; |
4019 | 4085 | ||
4020 | event->cft = __file_cft(cfile); | 4086 | event->cft = __file_cft(cfile.file); |
4021 | if (IS_ERR(event->cft)) { | 4087 | if (IS_ERR(event->cft)) { |
4022 | ret = PTR_ERR(event->cft); | 4088 | ret = PTR_ERR(event->cft); |
4023 | goto fail; | 4089 | goto out_put_cfile; |
4090 | } | ||
4091 | |||
4092 | if (!event->cft->ss) { | ||
4093 | ret = -EBADF; | ||
4094 | goto out_put_cfile; | ||
4024 | } | 4095 | } |
4025 | 4096 | ||
4026 | /* | 4097 | /* |
4027 | * The file to be monitored must be in the same cgroup as | 4098 | * Determine the css of @cfile, verify it belongs to the same |
4028 | * cgroup.event_control is. | 4099 | * cgroup as cgroup.event_control, and associate @event with it. |
4100 | * Remaining events are automatically removed on cgroup destruction | ||
4101 | * but the removal is asynchronous, so take an extra ref. | ||
4029 | */ | 4102 | */ |
4030 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | 4103 | rcu_read_lock(); |
4031 | if (cgrp_cfile != cgrp) { | 4104 | |
4032 | ret = -EINVAL; | 4105 | ret = -EINVAL; |
4033 | goto fail; | 4106 | event->css = cgroup_css(cgrp, event->cft->ss); |
4034 | } | 4107 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); |
4108 | if (event->css && event->css == cfile_css && css_tryget(event->css)) | ||
4109 | ret = 0; | ||
4110 | |||
4111 | rcu_read_unlock(); | ||
4112 | if (ret) | ||
4113 | goto out_put_cfile; | ||
4035 | 4114 | ||
4036 | if (!event->cft->register_event || !event->cft->unregister_event) { | 4115 | if (!event->cft->register_event || !event->cft->unregister_event) { |
4037 | ret = -EINVAL; | 4116 | ret = -EINVAL; |
4038 | goto fail; | 4117 | goto out_put_css; |
4039 | } | 4118 | } |
4040 | 4119 | ||
4041 | ret = event->cft->register_event(cgrp, event->cft, | 4120 | ret = event->cft->register_event(event->css, event->cft, |
4042 | event->eventfd, buffer); | 4121 | event->eventfd, buffer); |
4043 | if (ret) | 4122 | if (ret) |
4044 | goto fail; | 4123 | goto out_put_css; |
4045 | 4124 | ||
4046 | efile->f_op->poll(efile, &event->pt); | 4125 | efile.file->f_op->poll(efile.file, &event->pt); |
4047 | |||
4048 | /* | ||
4049 | * Events should be removed after rmdir of cgroup directory, but before | ||
4050 | * destroying subsystem state objects. Let's take reference to cgroup | ||
4051 | * directory dentry to do that. | ||
4052 | */ | ||
4053 | dget(cgrp->dentry); | ||
4054 | 4126 | ||
4055 | spin_lock(&cgrp->event_list_lock); | 4127 | spin_lock(&cgrp->event_list_lock); |
4056 | list_add(&event->list, &cgrp->event_list); | 4128 | list_add(&event->list, &cgrp->event_list); |
4057 | spin_unlock(&cgrp->event_list_lock); | 4129 | spin_unlock(&cgrp->event_list_lock); |
4058 | 4130 | ||
4059 | fput(cfile); | 4131 | fdput(cfile); |
4060 | fput(efile); | 4132 | fdput(efile); |
4061 | 4133 | ||
4062 | return 0; | 4134 | return 0; |
4063 | 4135 | ||
4064 | fail: | 4136 | out_put_css: |
4065 | if (cfile) | 4137 | css_put(event->css); |
4066 | fput(cfile); | 4138 | out_put_cfile: |
4067 | 4139 | fdput(cfile); | |
4068 | if (event && event->eventfd && !IS_ERR(event->eventfd)) | 4140 | out_put_eventfd: |
4069 | eventfd_ctx_put(event->eventfd); | 4141 | eventfd_ctx_put(event->eventfd); |
4070 | 4142 | out_put_efile: | |
4071 | if (!IS_ERR_OR_NULL(efile)) | 4143 | fdput(efile); |
4072 | fput(efile); | 4144 | out_kfree: |
4073 | |||
4074 | kfree(event); | 4145 | kfree(event); |
4075 | 4146 | ||
4076 | return ret; | 4147 | return ret; |
4077 | } | 4148 | } |
4078 | 4149 | ||
4079 | static u64 cgroup_clone_children_read(struct cgroup *cgrp, | 4150 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, |
4080 | struct cftype *cft) | 4151 | struct cftype *cft) |
4081 | { | 4152 | { |
4082 | return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4153 | return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4083 | } | 4154 | } |
4084 | 4155 | ||
4085 | static int cgroup_clone_children_write(struct cgroup *cgrp, | 4156 | static int cgroup_clone_children_write(struct cgroup_subsys_state *css, |
4086 | struct cftype *cft, | 4157 | struct cftype *cft, u64 val) |
4087 | u64 val) | ||
4088 | { | 4158 | { |
4089 | if (val) | 4159 | if (val) |
4090 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4160 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4091 | else | 4161 | else |
4092 | clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4162 | clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4093 | return 0; | 4163 | return 0; |
4094 | } | 4164 | } |
4095 | 4165 | ||
@@ -4148,36 +4218,34 @@ static struct cftype cgroup_base_files[] = { | |||
4148 | }; | 4218 | }; |
4149 | 4219 | ||
4150 | /** | 4220 | /** |
4151 | * cgroup_populate_dir - selectively creation of files in a directory | 4221 | * cgroup_populate_dir - create subsys files in a cgroup directory |
4152 | * @cgrp: target cgroup | 4222 | * @cgrp: target cgroup |
4153 | * @base_files: true if the base files should be added | ||
4154 | * @subsys_mask: mask of the subsystem ids whose files should be added | 4223 | * @subsys_mask: mask of the subsystem ids whose files should be added |
4224 | * | ||
4225 | * On failure, no file is added. | ||
4155 | */ | 4226 | */ |
4156 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 4227 | static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask) |
4157 | unsigned long subsys_mask) | ||
4158 | { | 4228 | { |
4159 | int err; | ||
4160 | struct cgroup_subsys *ss; | 4229 | struct cgroup_subsys *ss; |
4161 | 4230 | int i, ret = 0; | |
4162 | if (base_files) { | ||
4163 | err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true); | ||
4164 | if (err < 0) | ||
4165 | return err; | ||
4166 | } | ||
4167 | 4231 | ||
4168 | /* process cftsets of each subsystem */ | 4232 | /* process cftsets of each subsystem */ |
4169 | for_each_root_subsys(cgrp->root, ss) { | 4233 | for_each_subsys(ss, i) { |
4170 | struct cftype_set *set; | 4234 | struct cftype_set *set; |
4171 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 4235 | |
4236 | if (!test_bit(i, &subsys_mask)) | ||
4172 | continue; | 4237 | continue; |
4173 | 4238 | ||
4174 | list_for_each_entry(set, &ss->cftsets, node) | 4239 | list_for_each_entry(set, &ss->cftsets, node) { |
4175 | cgroup_addrm_files(cgrp, ss, set->cfts, true); | 4240 | ret = cgroup_addrm_files(cgrp, set->cfts, true); |
4241 | if (ret < 0) | ||
4242 | goto err; | ||
4243 | } | ||
4176 | } | 4244 | } |
4177 | 4245 | ||
4178 | /* This cgroup is ready now */ | 4246 | /* This cgroup is ready now */ |
4179 | for_each_root_subsys(cgrp->root, ss) { | 4247 | for_each_root_subsys(cgrp->root, ss) { |
4180 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4248 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); |
4181 | struct css_id *id = rcu_dereference_protected(css->id, true); | 4249 | struct css_id *id = rcu_dereference_protected(css->id, true); |
4182 | 4250 | ||
4183 | /* | 4251 | /* |
@@ -4190,14 +4258,57 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | |||
4190 | } | 4258 | } |
4191 | 4259 | ||
4192 | return 0; | 4260 | return 0; |
4261 | err: | ||
4262 | cgroup_clear_dir(cgrp, subsys_mask); | ||
4263 | return ret; | ||
4264 | } | ||
4265 | |||
4266 | /* | ||
4267 | * css destruction is four-stage process. | ||
4268 | * | ||
4269 | * 1. Destruction starts. Killing of the percpu_ref is initiated. | ||
4270 | * Implemented in kill_css(). | ||
4271 | * | ||
4272 | * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs | ||
4273 | * and thus css_tryget() is guaranteed to fail, the css can be offlined | ||
4274 | * by invoking offline_css(). After offlining, the base ref is put. | ||
4275 | * Implemented in css_killed_work_fn(). | ||
4276 | * | ||
4277 | * 3. When the percpu_ref reaches zero, the only possible remaining | ||
4278 | * accessors are inside RCU read sections. css_release() schedules the | ||
4279 | * RCU callback. | ||
4280 | * | ||
4281 | * 4. After the grace period, the css can be freed. Implemented in | ||
4282 | * css_free_work_fn(). | ||
4283 | * | ||
4284 | * It is actually hairier because both step 2 and 4 require process context | ||
4285 | * and thus involve punting to css->destroy_work adding two additional | ||
4286 | * steps to the already complex sequence. | ||
4287 | */ | ||
4288 | static void css_free_work_fn(struct work_struct *work) | ||
4289 | { | ||
4290 | struct cgroup_subsys_state *css = | ||
4291 | container_of(work, struct cgroup_subsys_state, destroy_work); | ||
4292 | struct cgroup *cgrp = css->cgroup; | ||
4293 | |||
4294 | if (css->parent) | ||
4295 | css_put(css->parent); | ||
4296 | |||
4297 | css->ss->css_free(css); | ||
4298 | cgroup_dput(cgrp); | ||
4193 | } | 4299 | } |
4194 | 4300 | ||
4195 | static void css_dput_fn(struct work_struct *work) | 4301 | static void css_free_rcu_fn(struct rcu_head *rcu_head) |
4196 | { | 4302 | { |
4197 | struct cgroup_subsys_state *css = | 4303 | struct cgroup_subsys_state *css = |
4198 | container_of(work, struct cgroup_subsys_state, dput_work); | 4304 | container_of(rcu_head, struct cgroup_subsys_state, rcu_head); |
4199 | 4305 | ||
4200 | cgroup_dput(css->cgroup); | 4306 | /* |
4307 | * css holds an extra ref to @cgrp->dentry which is put on the last | ||
4308 | * css_put(). dput() requires process context which we don't have. | ||
4309 | */ | ||
4310 | INIT_WORK(&css->destroy_work, css_free_work_fn); | ||
4311 | schedule_work(&css->destroy_work); | ||
4201 | } | 4312 | } |
4202 | 4313 | ||
4203 | static void css_release(struct percpu_ref *ref) | 4314 | static void css_release(struct percpu_ref *ref) |
@@ -4205,49 +4316,47 @@ static void css_release(struct percpu_ref *ref) | |||
4205 | struct cgroup_subsys_state *css = | 4316 | struct cgroup_subsys_state *css = |
4206 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4317 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4207 | 4318 | ||
4208 | schedule_work(&css->dput_work); | 4319 | call_rcu(&css->rcu_head, css_free_rcu_fn); |
4209 | } | 4320 | } |
4210 | 4321 | ||
4211 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 4322 | static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, |
4212 | struct cgroup_subsys *ss, | 4323 | struct cgroup *cgrp) |
4213 | struct cgroup *cgrp) | ||
4214 | { | 4324 | { |
4215 | css->cgroup = cgrp; | 4325 | css->cgroup = cgrp; |
4326 | css->ss = ss; | ||
4216 | css->flags = 0; | 4327 | css->flags = 0; |
4217 | css->id = NULL; | 4328 | css->id = NULL; |
4218 | if (cgrp == cgroup_dummy_top) | 4329 | |
4330 | if (cgrp->parent) | ||
4331 | css->parent = cgroup_css(cgrp->parent, ss); | ||
4332 | else | ||
4219 | css->flags |= CSS_ROOT; | 4333 | css->flags |= CSS_ROOT; |
4220 | BUG_ON(cgrp->subsys[ss->subsys_id]); | ||
4221 | cgrp->subsys[ss->subsys_id] = css; | ||
4222 | 4334 | ||
4223 | /* | 4335 | BUG_ON(cgroup_css(cgrp, ss)); |
4224 | * css holds an extra ref to @cgrp->dentry which is put on the last | ||
4225 | * css_put(). dput() requires process context, which css_put() may | ||
4226 | * be called without. @css->dput_work will be used to invoke | ||
4227 | * dput() asynchronously from css_put(). | ||
4228 | */ | ||
4229 | INIT_WORK(&css->dput_work, css_dput_fn); | ||
4230 | } | 4336 | } |
4231 | 4337 | ||
4232 | /* invoke ->post_create() on a new CSS and mark it online if successful */ | 4338 | /* invoke ->css_online() on a new CSS and mark it online if successful */ |
4233 | static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | 4339 | static int online_css(struct cgroup_subsys_state *css) |
4234 | { | 4340 | { |
4341 | struct cgroup_subsys *ss = css->ss; | ||
4235 | int ret = 0; | 4342 | int ret = 0; |
4236 | 4343 | ||
4237 | lockdep_assert_held(&cgroup_mutex); | 4344 | lockdep_assert_held(&cgroup_mutex); |
4238 | 4345 | ||
4239 | if (ss->css_online) | 4346 | if (ss->css_online) |
4240 | ret = ss->css_online(cgrp); | 4347 | ret = ss->css_online(css); |
4241 | if (!ret) | 4348 | if (!ret) { |
4242 | cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE; | 4349 | css->flags |= CSS_ONLINE; |
4350 | css->cgroup->nr_css++; | ||
4351 | rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css); | ||
4352 | } | ||
4243 | return ret; | 4353 | return ret; |
4244 | } | 4354 | } |
4245 | 4355 | ||
4246 | /* if the CSS is online, invoke ->pre_destory() on it and mark it offline */ | 4356 | /* if the CSS is online, invoke ->css_offline() on it and mark it offline */ |
4247 | static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | 4357 | static void offline_css(struct cgroup_subsys_state *css) |
4248 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | ||
4249 | { | 4358 | { |
4250 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4359 | struct cgroup_subsys *ss = css->ss; |
4251 | 4360 | ||
4252 | lockdep_assert_held(&cgroup_mutex); | 4361 | lockdep_assert_held(&cgroup_mutex); |
4253 | 4362 | ||
@@ -4255,9 +4364,11 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
4255 | return; | 4364 | return; |
4256 | 4365 | ||
4257 | if (ss->css_offline) | 4366 | if (ss->css_offline) |
4258 | ss->css_offline(cgrp); | 4367 | ss->css_offline(css); |
4259 | 4368 | ||
4260 | cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE; | 4369 | css->flags &= ~CSS_ONLINE; |
4370 | css->cgroup->nr_css--; | ||
4371 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); | ||
4261 | } | 4372 | } |
4262 | 4373 | ||
4263 | /* | 4374 | /* |
@@ -4271,6 +4382,7 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
4271 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 4382 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
4272 | umode_t mode) | 4383 | umode_t mode) |
4273 | { | 4384 | { |
4385 | struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { }; | ||
4274 | struct cgroup *cgrp; | 4386 | struct cgroup *cgrp; |
4275 | struct cgroup_name *name; | 4387 | struct cgroup_name *name; |
4276 | struct cgroupfs_root *root = parent->root; | 4388 | struct cgroupfs_root *root = parent->root; |
@@ -4288,7 +4400,11 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4288 | goto err_free_cgrp; | 4400 | goto err_free_cgrp; |
4289 | rcu_assign_pointer(cgrp->name, name); | 4401 | rcu_assign_pointer(cgrp->name, name); |
4290 | 4402 | ||
4291 | cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); | 4403 | /* |
4404 | * Temporarily set the pointer to NULL, so idr_find() won't return | ||
4405 | * a half-baked cgroup. | ||
4406 | */ | ||
4407 | cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); | ||
4292 | if (cgrp->id < 0) | 4408 | if (cgrp->id < 0) |
4293 | goto err_free_name; | 4409 | goto err_free_name; |
4294 | 4410 | ||
@@ -4317,6 +4433,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4317 | cgrp->dentry = dentry; | 4433 | cgrp->dentry = dentry; |
4318 | 4434 | ||
4319 | cgrp->parent = parent; | 4435 | cgrp->parent = parent; |
4436 | cgrp->dummy_css.parent = &parent->dummy_css; | ||
4320 | cgrp->root = parent->root; | 4437 | cgrp->root = parent->root; |
4321 | 4438 | ||
4322 | if (notify_on_release(parent)) | 4439 | if (notify_on_release(parent)) |
@@ -4328,22 +4445,21 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4328 | for_each_root_subsys(root, ss) { | 4445 | for_each_root_subsys(root, ss) { |
4329 | struct cgroup_subsys_state *css; | 4446 | struct cgroup_subsys_state *css; |
4330 | 4447 | ||
4331 | css = ss->css_alloc(cgrp); | 4448 | css = ss->css_alloc(cgroup_css(parent, ss)); |
4332 | if (IS_ERR(css)) { | 4449 | if (IS_ERR(css)) { |
4333 | err = PTR_ERR(css); | 4450 | err = PTR_ERR(css); |
4334 | goto err_free_all; | 4451 | goto err_free_all; |
4335 | } | 4452 | } |
4453 | css_ar[ss->subsys_id] = css; | ||
4336 | 4454 | ||
4337 | err = percpu_ref_init(&css->refcnt, css_release); | 4455 | err = percpu_ref_init(&css->refcnt, css_release); |
4338 | if (err) { | 4456 | if (err) |
4339 | ss->css_free(cgrp); | ||
4340 | goto err_free_all; | 4457 | goto err_free_all; |
4341 | } | ||
4342 | 4458 | ||
4343 | init_cgroup_css(css, ss, cgrp); | 4459 | init_css(css, ss, cgrp); |
4344 | 4460 | ||
4345 | if (ss->use_id) { | 4461 | if (ss->use_id) { |
4346 | err = alloc_css_id(ss, parent, cgrp); | 4462 | err = alloc_css_id(css); |
4347 | if (err) | 4463 | if (err) |
4348 | goto err_free_all; | 4464 | goto err_free_all; |
4349 | } | 4465 | } |
@@ -4365,16 +4481,22 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4365 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4481 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4366 | root->number_of_cgroups++; | 4482 | root->number_of_cgroups++; |
4367 | 4483 | ||
4368 | /* each css holds a ref to the cgroup's dentry */ | 4484 | /* each css holds a ref to the cgroup's dentry and the parent css */ |
4369 | for_each_root_subsys(root, ss) | 4485 | for_each_root_subsys(root, ss) { |
4486 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4487 | |||
4370 | dget(dentry); | 4488 | dget(dentry); |
4489 | css_get(css->parent); | ||
4490 | } | ||
4371 | 4491 | ||
4372 | /* hold a ref to the parent's dentry */ | 4492 | /* hold a ref to the parent's dentry */ |
4373 | dget(parent->dentry); | 4493 | dget(parent->dentry); |
4374 | 4494 | ||
4375 | /* creation succeeded, notify subsystems */ | 4495 | /* creation succeeded, notify subsystems */ |
4376 | for_each_root_subsys(root, ss) { | 4496 | for_each_root_subsys(root, ss) { |
4377 | err = online_css(ss, cgrp); | 4497 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; |
4498 | |||
4499 | err = online_css(css); | ||
4378 | if (err) | 4500 | if (err) |
4379 | goto err_destroy; | 4501 | goto err_destroy; |
4380 | 4502 | ||
@@ -4388,7 +4510,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4388 | } | 4510 | } |
4389 | } | 4511 | } |
4390 | 4512 | ||
4391 | err = cgroup_populate_dir(cgrp, true, root->subsys_mask); | 4513 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); |
4514 | |||
4515 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); | ||
4516 | if (err) | ||
4517 | goto err_destroy; | ||
4518 | |||
4519 | err = cgroup_populate_dir(cgrp, root->subsys_mask); | ||
4392 | if (err) | 4520 | if (err) |
4393 | goto err_destroy; | 4521 | goto err_destroy; |
4394 | 4522 | ||
@@ -4399,18 +4527,18 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4399 | 4527 | ||
4400 | err_free_all: | 4528 | err_free_all: |
4401 | for_each_root_subsys(root, ss) { | 4529 | for_each_root_subsys(root, ss) { |
4402 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4530 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; |
4403 | 4531 | ||
4404 | if (css) { | 4532 | if (css) { |
4405 | percpu_ref_cancel_init(&css->refcnt); | 4533 | percpu_ref_cancel_init(&css->refcnt); |
4406 | ss->css_free(cgrp); | 4534 | ss->css_free(css); |
4407 | } | 4535 | } |
4408 | } | 4536 | } |
4409 | mutex_unlock(&cgroup_mutex); | 4537 | mutex_unlock(&cgroup_mutex); |
4410 | /* Release the reference count that we took on the superblock */ | 4538 | /* Release the reference count that we took on the superblock */ |
4411 | deactivate_super(sb); | 4539 | deactivate_super(sb); |
4412 | err_free_id: | 4540 | err_free_id: |
4413 | ida_simple_remove(&root->cgroup_ida, cgrp->id); | 4541 | idr_remove(&root->cgroup_idr, cgrp->id); |
4414 | err_free_name: | 4542 | err_free_name: |
4415 | kfree(rcu_dereference_raw(cgrp->name)); | 4543 | kfree(rcu_dereference_raw(cgrp->name)); |
4416 | err_free_cgrp: | 4544 | err_free_cgrp: |
@@ -4432,22 +4560,84 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
4432 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4560 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
4433 | } | 4561 | } |
4434 | 4562 | ||
4435 | static void cgroup_css_killed(struct cgroup *cgrp) | 4563 | /* |
4564 | * This is called when the refcnt of a css is confirmed to be killed. | ||
4565 | * css_tryget() is now guaranteed to fail. | ||
4566 | */ | ||
4567 | static void css_killed_work_fn(struct work_struct *work) | ||
4436 | { | 4568 | { |
4437 | if (!atomic_dec_and_test(&cgrp->css_kill_cnt)) | 4569 | struct cgroup_subsys_state *css = |
4438 | return; | 4570 | container_of(work, struct cgroup_subsys_state, destroy_work); |
4571 | struct cgroup *cgrp = css->cgroup; | ||
4439 | 4572 | ||
4440 | /* percpu ref's of all css's are killed, kick off the next step */ | 4573 | mutex_lock(&cgroup_mutex); |
4441 | INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); | 4574 | |
4442 | schedule_work(&cgrp->destroy_work); | 4575 | /* |
4576 | * css_tryget() is guaranteed to fail now. Tell subsystems to | ||
4577 | * initate destruction. | ||
4578 | */ | ||
4579 | offline_css(css); | ||
4580 | |||
4581 | /* | ||
4582 | * If @cgrp is marked dead, it's waiting for refs of all css's to | ||
4583 | * be disabled before proceeding to the second phase of cgroup | ||
4584 | * destruction. If we are the last one, kick it off. | ||
4585 | */ | ||
4586 | if (!cgrp->nr_css && cgroup_is_dead(cgrp)) | ||
4587 | cgroup_destroy_css_killed(cgrp); | ||
4588 | |||
4589 | mutex_unlock(&cgroup_mutex); | ||
4590 | |||
4591 | /* | ||
4592 | * Put the css refs from kill_css(). Each css holds an extra | ||
4593 | * reference to the cgroup's dentry and cgroup removal proceeds | ||
4594 | * regardless of css refs. On the last put of each css, whenever | ||
4595 | * that may be, the extra dentry ref is put so that dentry | ||
4596 | * destruction happens only after all css's are released. | ||
4597 | */ | ||
4598 | css_put(css); | ||
4443 | } | 4599 | } |
4444 | 4600 | ||
4445 | static void css_ref_killed_fn(struct percpu_ref *ref) | 4601 | /* css kill confirmation processing requires process context, bounce */ |
4602 | static void css_killed_ref_fn(struct percpu_ref *ref) | ||
4446 | { | 4603 | { |
4447 | struct cgroup_subsys_state *css = | 4604 | struct cgroup_subsys_state *css = |
4448 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4605 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4449 | 4606 | ||
4450 | cgroup_css_killed(css->cgroup); | 4607 | INIT_WORK(&css->destroy_work, css_killed_work_fn); |
4608 | schedule_work(&css->destroy_work); | ||
4609 | } | ||
4610 | |||
4611 | /** | ||
4612 | * kill_css - destroy a css | ||
4613 | * @css: css to destroy | ||
4614 | * | ||
4615 | * This function initiates destruction of @css by removing cgroup interface | ||
4616 | * files and putting its base reference. ->css_offline() will be invoked | ||
4617 | * asynchronously once css_tryget() is guaranteed to fail and when the | ||
4618 | * reference count reaches zero, @css will be released. | ||
4619 | */ | ||
4620 | static void kill_css(struct cgroup_subsys_state *css) | ||
4621 | { | ||
4622 | cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id); | ||
4623 | |||
4624 | /* | ||
4625 | * Killing would put the base ref, but we need to keep it alive | ||
4626 | * until after ->css_offline(). | ||
4627 | */ | ||
4628 | css_get(css); | ||
4629 | |||
4630 | /* | ||
4631 | * cgroup core guarantees that, by the time ->css_offline() is | ||
4632 | * invoked, no new css reference will be given out via | ||
4633 | * css_tryget(). We can't simply call percpu_ref_kill() and | ||
4634 | * proceed to offlining css's because percpu_ref_kill() doesn't | ||
4635 | * guarantee that the ref is seen as killed on all CPUs on return. | ||
4636 | * | ||
4637 | * Use percpu_ref_kill_and_confirm() to get notifications as each | ||
4638 | * css is confirmed to be seen as killed on all CPUs. | ||
4639 | */ | ||
4640 | percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); | ||
4451 | } | 4641 | } |
4452 | 4642 | ||
4453 | /** | 4643 | /** |
@@ -4513,41 +4703,19 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4513 | return -EBUSY; | 4703 | return -EBUSY; |
4514 | 4704 | ||
4515 | /* | 4705 | /* |
4516 | * Block new css_tryget() by killing css refcnts. cgroup core | 4706 | * Initiate massacre of all css's. cgroup_destroy_css_killed() |
4517 | * guarantees that, by the time ->css_offline() is invoked, no new | 4707 | * will be invoked to perform the rest of destruction once the |
4518 | * css reference will be given out via css_tryget(). We can't | 4708 | * percpu refs of all css's are confirmed to be killed. |
4519 | * simply call percpu_ref_kill() and proceed to offlining css's | ||
4520 | * because percpu_ref_kill() doesn't guarantee that the ref is seen | ||
4521 | * as killed on all CPUs on return. | ||
4522 | * | ||
4523 | * Use percpu_ref_kill_and_confirm() to get notifications as each | ||
4524 | * css is confirmed to be seen as killed on all CPUs. The | ||
4525 | * notification callback keeps track of the number of css's to be | ||
4526 | * killed and schedules cgroup_offline_fn() to perform the rest of | ||
4527 | * destruction once the percpu refs of all css's are confirmed to | ||
4528 | * be killed. | ||
4529 | */ | 4709 | */ |
4530 | atomic_set(&cgrp->css_kill_cnt, 1); | 4710 | for_each_root_subsys(cgrp->root, ss) |
4531 | for_each_root_subsys(cgrp->root, ss) { | 4711 | kill_css(cgroup_css(cgrp, ss)); |
4532 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
4533 | |||
4534 | /* | ||
4535 | * Killing would put the base ref, but we need to keep it | ||
4536 | * alive until after ->css_offline. | ||
4537 | */ | ||
4538 | percpu_ref_get(&css->refcnt); | ||
4539 | |||
4540 | atomic_inc(&cgrp->css_kill_cnt); | ||
4541 | percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn); | ||
4542 | } | ||
4543 | cgroup_css_killed(cgrp); | ||
4544 | 4712 | ||
4545 | /* | 4713 | /* |
4546 | * Mark @cgrp dead. This prevents further task migration and child | 4714 | * Mark @cgrp dead. This prevents further task migration and child |
4547 | * creation by disabling cgroup_lock_live_group(). Note that | 4715 | * creation by disabling cgroup_lock_live_group(). Note that |
4548 | * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to | 4716 | * CGRP_DEAD assertion is depended upon by css_next_child() to |
4549 | * resume iteration after dropping RCU read lock. See | 4717 | * resume iteration after dropping RCU read lock. See |
4550 | * cgroup_next_sibling() for details. | 4718 | * css_next_child() for details. |
4551 | */ | 4719 | */ |
4552 | set_bit(CGRP_DEAD, &cgrp->flags); | 4720 | set_bit(CGRP_DEAD, &cgrp->flags); |
4553 | 4721 | ||
@@ -4558,9 +4726,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4558 | raw_spin_unlock(&release_list_lock); | 4726 | raw_spin_unlock(&release_list_lock); |
4559 | 4727 | ||
4560 | /* | 4728 | /* |
4561 | * Remove @cgrp directory. The removal puts the base ref but we | 4729 | * If @cgrp has css's attached, the second stage of cgroup |
4562 | * aren't quite done with @cgrp yet, so hold onto it. | 4730 | * destruction is kicked off from css_killed_work_fn() after the |
4731 | * refs of all attached css's are killed. If @cgrp doesn't have | ||
4732 | * any css, we kick it off here. | ||
4733 | */ | ||
4734 | if (!cgrp->nr_css) | ||
4735 | cgroup_destroy_css_killed(cgrp); | ||
4736 | |||
4737 | /* | ||
4738 | * Clear the base files and remove @cgrp directory. The removal | ||
4739 | * puts the base ref but we aren't quite done with @cgrp yet, so | ||
4740 | * hold onto it. | ||
4563 | */ | 4741 | */ |
4742 | cgroup_addrm_files(cgrp, cgroup_base_files, false); | ||
4564 | dget(d); | 4743 | dget(d); |
4565 | cgroup_d_remove_dir(d); | 4744 | cgroup_d_remove_dir(d); |
4566 | 4745 | ||
@@ -4580,50 +4759,36 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4580 | }; | 4759 | }; |
4581 | 4760 | ||
4582 | /** | 4761 | /** |
4583 | * cgroup_offline_fn - the second step of cgroup destruction | 4762 | * cgroup_destroy_css_killed - the second step of cgroup destruction |
4584 | * @work: cgroup->destroy_free_work | 4763 | * @work: cgroup->destroy_free_work |
4585 | * | 4764 | * |
4586 | * This function is invoked from a work item for a cgroup which is being | 4765 | * This function is invoked from a work item for a cgroup which is being |
4587 | * destroyed after the percpu refcnts of all css's are guaranteed to be | 4766 | * destroyed after all css's are offlined and performs the rest of |
4588 | * seen as killed on all CPUs, and performs the rest of destruction. This | 4767 | * destruction. This is the second step of destruction described in the |
4589 | * is the second step of destruction described in the comment above | 4768 | * comment above cgroup_destroy_locked(). |
4590 | * cgroup_destroy_locked(). | ||
4591 | */ | 4769 | */ |
4592 | static void cgroup_offline_fn(struct work_struct *work) | 4770 | static void cgroup_destroy_css_killed(struct cgroup *cgrp) |
4593 | { | 4771 | { |
4594 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | ||
4595 | struct cgroup *parent = cgrp->parent; | 4772 | struct cgroup *parent = cgrp->parent; |
4596 | struct dentry *d = cgrp->dentry; | 4773 | struct dentry *d = cgrp->dentry; |
4597 | struct cgroup_subsys *ss; | ||
4598 | 4774 | ||
4599 | mutex_lock(&cgroup_mutex); | 4775 | lockdep_assert_held(&cgroup_mutex); |
4600 | 4776 | ||
4601 | /* | 4777 | /* delete this cgroup from parent->children */ |
4602 | * css_tryget() is guaranteed to fail now. Tell subsystems to | 4778 | list_del_rcu(&cgrp->sibling); |
4603 | * initate destruction. | ||
4604 | */ | ||
4605 | for_each_root_subsys(cgrp->root, ss) | ||
4606 | offline_css(ss, cgrp); | ||
4607 | 4779 | ||
4608 | /* | 4780 | /* |
4609 | * Put the css refs from cgroup_destroy_locked(). Each css holds | 4781 | * We should remove the cgroup object from idr before its grace |
4610 | * an extra reference to the cgroup's dentry and cgroup removal | 4782 | * period starts, so we won't be looking up a cgroup while the |
4611 | * proceeds regardless of css refs. On the last put of each css, | 4783 | * cgroup is being freed. |
4612 | * whenever that may be, the extra dentry ref is put so that dentry | ||
4613 | * destruction happens only after all css's are released. | ||
4614 | */ | 4784 | */ |
4615 | for_each_root_subsys(cgrp->root, ss) | 4785 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |
4616 | css_put(cgrp->subsys[ss->subsys_id]); | 4786 | cgrp->id = -1; |
4617 | |||
4618 | /* delete this cgroup from parent->children */ | ||
4619 | list_del_rcu(&cgrp->sibling); | ||
4620 | 4787 | ||
4621 | dput(d); | 4788 | dput(d); |
4622 | 4789 | ||
4623 | set_bit(CGRP_RELEASABLE, &parent->flags); | 4790 | set_bit(CGRP_RELEASABLE, &parent->flags); |
4624 | check_for_release(parent); | 4791 | check_for_release(parent); |
4625 | |||
4626 | mutex_unlock(&cgroup_mutex); | ||
4627 | } | 4792 | } |
4628 | 4793 | ||
4629 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 4794 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
@@ -4646,6 +4811,11 @@ static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss) | |||
4646 | * deregistration. | 4811 | * deregistration. |
4647 | */ | 4812 | */ |
4648 | if (ss->base_cftypes) { | 4813 | if (ss->base_cftypes) { |
4814 | struct cftype *cft; | ||
4815 | |||
4816 | for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++) | ||
4817 | cft->ss = ss; | ||
4818 | |||
4649 | ss->base_cftset.cfts = ss->base_cftypes; | 4819 | ss->base_cftset.cfts = ss->base_cftypes; |
4650 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); | 4820 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); |
4651 | } | 4821 | } |
@@ -4665,10 +4835,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4665 | /* Create the top cgroup state for this subsystem */ | 4835 | /* Create the top cgroup state for this subsystem */ |
4666 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | 4836 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); |
4667 | ss->root = &cgroup_dummy_root; | 4837 | ss->root = &cgroup_dummy_root; |
4668 | css = ss->css_alloc(cgroup_dummy_top); | 4838 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
4669 | /* We don't handle early failures gracefully */ | 4839 | /* We don't handle early failures gracefully */ |
4670 | BUG_ON(IS_ERR(css)); | 4840 | BUG_ON(IS_ERR(css)); |
4671 | init_cgroup_css(css, ss, cgroup_dummy_top); | 4841 | init_css(css, ss, cgroup_dummy_top); |
4672 | 4842 | ||
4673 | /* Update the init_css_set to contain a subsys | 4843 | /* Update the init_css_set to contain a subsys |
4674 | * pointer to this state - since the subsystem is | 4844 | * pointer to this state - since the subsystem is |
@@ -4683,7 +4853,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4683 | * need to invoke fork callbacks here. */ | 4853 | * need to invoke fork callbacks here. */ |
4684 | BUG_ON(!list_empty(&init_task.tasks)); | 4854 | BUG_ON(!list_empty(&init_task.tasks)); |
4685 | 4855 | ||
4686 | BUG_ON(online_css(ss, cgroup_dummy_top)); | 4856 | BUG_ON(online_css(css)); |
4687 | 4857 | ||
4688 | mutex_unlock(&cgroup_mutex); | 4858 | mutex_unlock(&cgroup_mutex); |
4689 | 4859 | ||
@@ -4744,7 +4914,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4744 | * struct, so this can happen first (i.e. before the dummy root | 4914 | * struct, so this can happen first (i.e. before the dummy root |
4745 | * attachment). | 4915 | * attachment). |
4746 | */ | 4916 | */ |
4747 | css = ss->css_alloc(cgroup_dummy_top); | 4917 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
4748 | if (IS_ERR(css)) { | 4918 | if (IS_ERR(css)) { |
4749 | /* failure case - need to deassign the cgroup_subsys[] slot. */ | 4919 | /* failure case - need to deassign the cgroup_subsys[] slot. */ |
4750 | cgroup_subsys[ss->subsys_id] = NULL; | 4920 | cgroup_subsys[ss->subsys_id] = NULL; |
@@ -4756,8 +4926,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4756 | ss->root = &cgroup_dummy_root; | 4926 | ss->root = &cgroup_dummy_root; |
4757 | 4927 | ||
4758 | /* our new subsystem will be attached to the dummy hierarchy. */ | 4928 | /* our new subsystem will be attached to the dummy hierarchy. */ |
4759 | init_cgroup_css(css, ss, cgroup_dummy_top); | 4929 | init_css(css, ss, cgroup_dummy_top); |
4760 | /* init_idr must be after init_cgroup_css because it sets css->id. */ | 4930 | /* init_idr must be after init_css() because it sets css->id. */ |
4761 | if (ss->use_id) { | 4931 | if (ss->use_id) { |
4762 | ret = cgroup_init_idr(ss, css); | 4932 | ret = cgroup_init_idr(ss, css); |
4763 | if (ret) | 4933 | if (ret) |
@@ -4787,7 +4957,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4787 | } | 4957 | } |
4788 | write_unlock(&css_set_lock); | 4958 | write_unlock(&css_set_lock); |
4789 | 4959 | ||
4790 | ret = online_css(ss, cgroup_dummy_top); | 4960 | ret = online_css(css); |
4791 | if (ret) | 4961 | if (ret) |
4792 | goto err_unload; | 4962 | goto err_unload; |
4793 | 4963 | ||
@@ -4819,14 +4989,14 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4819 | 4989 | ||
4820 | /* | 4990 | /* |
4821 | * we shouldn't be called if the subsystem is in use, and the use of | 4991 | * we shouldn't be called if the subsystem is in use, and the use of |
4822 | * try_module_get in parse_cgroupfs_options should ensure that it | 4992 | * try_module_get() in rebind_subsystems() should ensure that it |
4823 | * doesn't start being used while we're killing it off. | 4993 | * doesn't start being used while we're killing it off. |
4824 | */ | 4994 | */ |
4825 | BUG_ON(ss->root != &cgroup_dummy_root); | 4995 | BUG_ON(ss->root != &cgroup_dummy_root); |
4826 | 4996 | ||
4827 | mutex_lock(&cgroup_mutex); | 4997 | mutex_lock(&cgroup_mutex); |
4828 | 4998 | ||
4829 | offline_css(ss, cgroup_dummy_top); | 4999 | offline_css(cgroup_css(cgroup_dummy_top, ss)); |
4830 | 5000 | ||
4831 | if (ss->use_id) | 5001 | if (ss->use_id) |
4832 | idr_destroy(&ss->idr); | 5002 | idr_destroy(&ss->idr); |
@@ -4860,8 +5030,8 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4860 | * the cgrp->subsys pointer to find their state. note that this | 5030 | * the cgrp->subsys pointer to find their state. note that this |
4861 | * also takes care of freeing the css_id. | 5031 | * also takes care of freeing the css_id. |
4862 | */ | 5032 | */ |
4863 | ss->css_free(cgroup_dummy_top); | 5033 | ss->css_free(cgroup_css(cgroup_dummy_top, ss)); |
4864 | cgroup_dummy_top->subsys[ss->subsys_id] = NULL; | 5034 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); |
4865 | 5035 | ||
4866 | mutex_unlock(&cgroup_mutex); | 5036 | mutex_unlock(&cgroup_mutex); |
4867 | } | 5037 | } |
@@ -4943,6 +5113,10 @@ int __init cgroup_init(void) | |||
4943 | 5113 | ||
4944 | BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1)); | 5114 | BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1)); |
4945 | 5115 | ||
5116 | err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top, | ||
5117 | 0, 1, GFP_KERNEL); | ||
5118 | BUG_ON(err < 0); | ||
5119 | |||
4946 | mutex_unlock(&cgroup_root_mutex); | 5120 | mutex_unlock(&cgroup_root_mutex); |
4947 | mutex_unlock(&cgroup_mutex); | 5121 | mutex_unlock(&cgroup_mutex); |
4948 | 5122 | ||
@@ -5099,7 +5273,7 @@ void cgroup_fork(struct task_struct *child) | |||
5099 | * Adds the task to the list running through its css_set if necessary and | 5273 | * Adds the task to the list running through its css_set if necessary and |
5100 | * call the subsystem fork() callbacks. Has to be after the task is | 5274 | * call the subsystem fork() callbacks. Has to be after the task is |
5101 | * visible on the task list in case we race with the first call to | 5275 | * visible on the task list in case we race with the first call to |
5102 | * cgroup_iter_start() - to guarantee that the new task ends up on its | 5276 | * cgroup_task_iter_start() - to guarantee that the new task ends up on its |
5103 | * list. | 5277 | * list. |
5104 | */ | 5278 | */ |
5105 | void cgroup_post_fork(struct task_struct *child) | 5279 | void cgroup_post_fork(struct task_struct *child) |
@@ -5212,10 +5386,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
5212 | */ | 5386 | */ |
5213 | for_each_builtin_subsys(ss, i) { | 5387 | for_each_builtin_subsys(ss, i) { |
5214 | if (ss->exit) { | 5388 | if (ss->exit) { |
5215 | struct cgroup *old_cgrp = cset->subsys[i]->cgroup; | 5389 | struct cgroup_subsys_state *old_css = cset->subsys[i]; |
5216 | struct cgroup *cgrp = task_cgroup(tsk, i); | 5390 | struct cgroup_subsys_state *css = task_css(tsk, i); |
5217 | 5391 | ||
5218 | ss->exit(cgrp, old_cgrp, tsk); | 5392 | ss->exit(css, old_css, tsk); |
5219 | } | 5393 | } |
5220 | } | 5394 | } |
5221 | } | 5395 | } |
@@ -5474,20 +5648,16 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
5474 | return 0; | 5648 | return 0; |
5475 | } | 5649 | } |
5476 | 5650 | ||
5477 | static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | 5651 | static int alloc_css_id(struct cgroup_subsys_state *child_css) |
5478 | struct cgroup *child) | ||
5479 | { | 5652 | { |
5480 | int subsys_id, i, depth = 0; | 5653 | struct cgroup_subsys_state *parent_css = css_parent(child_css); |
5481 | struct cgroup_subsys_state *parent_css, *child_css; | ||
5482 | struct css_id *child_id, *parent_id; | 5654 | struct css_id *child_id, *parent_id; |
5655 | int i, depth; | ||
5483 | 5656 | ||
5484 | subsys_id = ss->subsys_id; | ||
5485 | parent_css = parent->subsys[subsys_id]; | ||
5486 | child_css = child->subsys[subsys_id]; | ||
5487 | parent_id = rcu_dereference_protected(parent_css->id, true); | 5657 | parent_id = rcu_dereference_protected(parent_css->id, true); |
5488 | depth = parent_id->depth + 1; | 5658 | depth = parent_id->depth + 1; |
5489 | 5659 | ||
5490 | child_id = get_new_cssid(ss, depth); | 5660 | child_id = get_new_cssid(child_css->ss, depth); |
5491 | if (IS_ERR(child_id)) | 5661 | if (IS_ERR(child_id)) |
5492 | return PTR_ERR(child_id); | 5662 | return PTR_ERR(child_id); |
5493 | 5663 | ||
@@ -5525,31 +5695,56 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) | |||
5525 | } | 5695 | } |
5526 | EXPORT_SYMBOL_GPL(css_lookup); | 5696 | EXPORT_SYMBOL_GPL(css_lookup); |
5527 | 5697 | ||
5528 | /* | 5698 | /** |
5529 | * get corresponding css from file open on cgroupfs directory | 5699 | * css_from_dir - get corresponding css from the dentry of a cgroup dir |
5700 | * @dentry: directory dentry of interest | ||
5701 | * @ss: subsystem of interest | ||
5702 | * | ||
5703 | * Must be called under RCU read lock. The caller is responsible for | ||
5704 | * pinning the returned css if it needs to be accessed outside the RCU | ||
5705 | * critical section. | ||
5530 | */ | 5706 | */ |
5531 | struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) | 5707 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, |
5708 | struct cgroup_subsys *ss) | ||
5532 | { | 5709 | { |
5533 | struct cgroup *cgrp; | 5710 | struct cgroup *cgrp; |
5534 | struct inode *inode; | ||
5535 | struct cgroup_subsys_state *css; | ||
5536 | 5711 | ||
5537 | inode = file_inode(f); | 5712 | WARN_ON_ONCE(!rcu_read_lock_held()); |
5538 | /* check in cgroup filesystem dir */ | 5713 | |
5539 | if (inode->i_op != &cgroup_dir_inode_operations) | 5714 | /* is @dentry a cgroup dir? */ |
5715 | if (!dentry->d_inode || | ||
5716 | dentry->d_inode->i_op != &cgroup_dir_inode_operations) | ||
5540 | return ERR_PTR(-EBADF); | 5717 | return ERR_PTR(-EBADF); |
5541 | 5718 | ||
5542 | if (id < 0 || id >= CGROUP_SUBSYS_COUNT) | 5719 | cgrp = __d_cgrp(dentry); |
5543 | return ERR_PTR(-EINVAL); | 5720 | return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT); |
5721 | } | ||
5544 | 5722 | ||
5545 | /* get cgroup */ | 5723 | /** |
5546 | cgrp = __d_cgrp(f->f_dentry); | 5724 | * css_from_id - lookup css by id |
5547 | css = cgrp->subsys[id]; | 5725 | * @id: the cgroup id |
5548 | return css ? css : ERR_PTR(-ENOENT); | 5726 | * @ss: cgroup subsys to be looked into |
5727 | * | ||
5728 | * Returns the css if there's valid one with @id, otherwise returns NULL. | ||
5729 | * Should be called under rcu_read_lock(). | ||
5730 | */ | ||
5731 | struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) | ||
5732 | { | ||
5733 | struct cgroup *cgrp; | ||
5734 | |||
5735 | rcu_lockdep_assert(rcu_read_lock_held() || | ||
5736 | lockdep_is_held(&cgroup_mutex), | ||
5737 | "css_from_id() needs proper protection"); | ||
5738 | |||
5739 | cgrp = idr_find(&ss->root->cgroup_idr, id); | ||
5740 | if (cgrp) | ||
5741 | return cgroup_css(cgrp, ss); | ||
5742 | return NULL; | ||
5549 | } | 5743 | } |
5550 | 5744 | ||
5551 | #ifdef CONFIG_CGROUP_DEBUG | 5745 | #ifdef CONFIG_CGROUP_DEBUG |
5552 | static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp) | 5746 | static struct cgroup_subsys_state * |
5747 | debug_css_alloc(struct cgroup_subsys_state *parent_css) | ||
5553 | { | 5748 | { |
5554 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | 5749 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); |
5555 | 5750 | ||
@@ -5559,22 +5754,24 @@ static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp) | |||
5559 | return css; | 5754 | return css; |
5560 | } | 5755 | } |
5561 | 5756 | ||
5562 | static void debug_css_free(struct cgroup *cgrp) | 5757 | static void debug_css_free(struct cgroup_subsys_state *css) |
5563 | { | 5758 | { |
5564 | kfree(cgrp->subsys[debug_subsys_id]); | 5759 | kfree(css); |
5565 | } | 5760 | } |
5566 | 5761 | ||
5567 | static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft) | 5762 | static u64 debug_taskcount_read(struct cgroup_subsys_state *css, |
5763 | struct cftype *cft) | ||
5568 | { | 5764 | { |
5569 | return cgroup_task_count(cgrp); | 5765 | return cgroup_task_count(css->cgroup); |
5570 | } | 5766 | } |
5571 | 5767 | ||
5572 | static u64 current_css_set_read(struct cgroup *cgrp, struct cftype *cft) | 5768 | static u64 current_css_set_read(struct cgroup_subsys_state *css, |
5769 | struct cftype *cft) | ||
5573 | { | 5770 | { |
5574 | return (u64)(unsigned long)current->cgroups; | 5771 | return (u64)(unsigned long)current->cgroups; |
5575 | } | 5772 | } |
5576 | 5773 | ||
5577 | static u64 current_css_set_refcount_read(struct cgroup *cgrp, | 5774 | static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, |
5578 | struct cftype *cft) | 5775 | struct cftype *cft) |
5579 | { | 5776 | { |
5580 | u64 count; | 5777 | u64 count; |
@@ -5585,7 +5782,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cgrp, | |||
5585 | return count; | 5782 | return count; |
5586 | } | 5783 | } |
5587 | 5784 | ||
5588 | static int current_css_set_cg_links_read(struct cgroup *cgrp, | 5785 | static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, |
5589 | struct cftype *cft, | 5786 | struct cftype *cft, |
5590 | struct seq_file *seq) | 5787 | struct seq_file *seq) |
5591 | { | 5788 | { |
@@ -5612,14 +5809,13 @@ static int current_css_set_cg_links_read(struct cgroup *cgrp, | |||
5612 | } | 5809 | } |
5613 | 5810 | ||
5614 | #define MAX_TASKS_SHOWN_PER_CSS 25 | 5811 | #define MAX_TASKS_SHOWN_PER_CSS 25 |
5615 | static int cgroup_css_links_read(struct cgroup *cgrp, | 5812 | static int cgroup_css_links_read(struct cgroup_subsys_state *css, |
5616 | struct cftype *cft, | 5813 | struct cftype *cft, struct seq_file *seq) |
5617 | struct seq_file *seq) | ||
5618 | { | 5814 | { |
5619 | struct cgrp_cset_link *link; | 5815 | struct cgrp_cset_link *link; |
5620 | 5816 | ||
5621 | read_lock(&css_set_lock); | 5817 | read_lock(&css_set_lock); |
5622 | list_for_each_entry(link, &cgrp->cset_links, cset_link) { | 5818 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { |
5623 | struct css_set *cset = link->cset; | 5819 | struct css_set *cset = link->cset; |
5624 | struct task_struct *task; | 5820 | struct task_struct *task; |
5625 | int count = 0; | 5821 | int count = 0; |
@@ -5638,9 +5834,9 @@ static int cgroup_css_links_read(struct cgroup *cgrp, | |||
5638 | return 0; | 5834 | return 0; |
5639 | } | 5835 | } |
5640 | 5836 | ||
5641 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | 5837 | static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) |
5642 | { | 5838 | { |
5643 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | 5839 | return test_bit(CGRP_RELEASABLE, &css->cgroup->flags); |
5644 | } | 5840 | } |
5645 | 5841 | ||
5646 | static struct cftype debug_files[] = { | 5842 | static struct cftype debug_files[] = { |