diff options
-rw-r--r-- | block/blk-cgroup.c | 49 | ||||
-rw-r--r-- | block/blk-cgroup.h | 38 | ||||
-rw-r--r-- | block/blk-throttle.c | 43 | ||||
-rw-r--r-- | block/cfq-iosched.c | 90 | ||||
-rw-r--r-- | fs/bio.c | 2 | ||||
-rw-r--r-- | include/linux/cgroup.h | 303 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 2 | ||||
-rw-r--r-- | include/linux/vmpressure.h | 6 | ||||
-rw-r--r-- | include/net/cls_cgroup.h | 4 | ||||
-rw-r--r-- | include/net/netprio_cgroup.h | 8 | ||||
-rw-r--r-- | kernel/cgroup.c | 1643 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 155 | ||||
-rw-r--r-- | kernel/cpuset.c | 317 | ||||
-rw-r--r-- | kernel/events/core.c | 27 | ||||
-rw-r--r-- | kernel/sched/core.c | 113 | ||||
-rw-r--r-- | kernel/sched/cpuacct.c | 51 | ||||
-rw-r--r-- | kernel/sched/sched.h | 6 | ||||
-rw-r--r-- | mm/hugetlb_cgroup.c | 69 | ||||
-rw-r--r-- | mm/memcontrol.c | 223 | ||||
-rw-r--r-- | mm/vmpressure.c | 25 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 72 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 12 | ||||
-rw-r--r-- | net/sched/cls_cgroup.c | 39 | ||||
-rw-r--r-- | security/device_cgroup.c | 65 |
24 files changed, 1751 insertions, 1611 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 290792a13e3c..e90c7c164c83 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -437,10 +437,10 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, | |||
437 | return &blkg->rl; | 437 | return &blkg->rl; |
438 | } | 438 | } |
439 | 439 | ||
440 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, | 440 | static int blkcg_reset_stats(struct cgroup_subsys_state *css, |
441 | u64 val) | 441 | struct cftype *cftype, u64 val) |
442 | { | 442 | { |
443 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 443 | struct blkcg *blkcg = css_to_blkcg(css); |
444 | struct blkcg_gq *blkg; | 444 | struct blkcg_gq *blkg; |
445 | int i; | 445 | int i; |
446 | 446 | ||
@@ -614,15 +614,13 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) | |||
614 | { | 614 | { |
615 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | 615 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; |
616 | struct blkcg_gq *pos_blkg; | 616 | struct blkcg_gq *pos_blkg; |
617 | struct cgroup *pos_cgrp; | 617 | struct cgroup_subsys_state *pos_css; |
618 | u64 sum; | 618 | u64 sum = 0; |
619 | 619 | ||
620 | lockdep_assert_held(pd->blkg->q->queue_lock); | 620 | lockdep_assert_held(pd->blkg->q->queue_lock); |
621 | 621 | ||
622 | sum = blkg_stat_read((void *)pd + off); | ||
623 | |||
624 | rcu_read_lock(); | 622 | rcu_read_lock(); |
625 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | 623 | blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { |
626 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | 624 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); |
627 | struct blkg_stat *stat = (void *)pos_pd + off; | 625 | struct blkg_stat *stat = (void *)pos_pd + off; |
628 | 626 | ||
@@ -649,16 +647,14 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | |||
649 | { | 647 | { |
650 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | 648 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; |
651 | struct blkcg_gq *pos_blkg; | 649 | struct blkcg_gq *pos_blkg; |
652 | struct cgroup *pos_cgrp; | 650 | struct cgroup_subsys_state *pos_css; |
653 | struct blkg_rwstat sum; | 651 | struct blkg_rwstat sum = { }; |
654 | int i; | 652 | int i; |
655 | 653 | ||
656 | lockdep_assert_held(pd->blkg->q->queue_lock); | 654 | lockdep_assert_held(pd->blkg->q->queue_lock); |
657 | 655 | ||
658 | sum = blkg_rwstat_read((void *)pd + off); | ||
659 | |||
660 | rcu_read_lock(); | 656 | rcu_read_lock(); |
661 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | 657 | blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { |
662 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | 658 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); |
663 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; | 659 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; |
664 | struct blkg_rwstat tmp; | 660 | struct blkg_rwstat tmp; |
@@ -765,18 +761,18 @@ struct cftype blkcg_files[] = { | |||
765 | 761 | ||
766 | /** | 762 | /** |
767 | * blkcg_css_offline - cgroup css_offline callback | 763 | * blkcg_css_offline - cgroup css_offline callback |
768 | * @cgroup: cgroup of interest | 764 | * @css: css of interest |
769 | * | 765 | * |
770 | * This function is called when @cgroup is about to go away and responsible | 766 | * This function is called when @css is about to go away and responsible |
771 | * for shooting down all blkgs associated with @cgroup. blkgs should be | 767 | * for shooting down all blkgs associated with @css. blkgs should be |
772 | * removed while holding both q and blkcg locks. As blkcg lock is nested | 768 | * removed while holding both q and blkcg locks. As blkcg lock is nested |
773 | * inside q lock, this function performs reverse double lock dancing. | 769 | * inside q lock, this function performs reverse double lock dancing. |
774 | * | 770 | * |
775 | * This is the blkcg counterpart of ioc_release_fn(). | 771 | * This is the blkcg counterpart of ioc_release_fn(). |
776 | */ | 772 | */ |
777 | static void blkcg_css_offline(struct cgroup *cgroup) | 773 | static void blkcg_css_offline(struct cgroup_subsys_state *css) |
778 | { | 774 | { |
779 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 775 | struct blkcg *blkcg = css_to_blkcg(css); |
780 | 776 | ||
781 | spin_lock_irq(&blkcg->lock); | 777 | spin_lock_irq(&blkcg->lock); |
782 | 778 | ||
@@ -798,21 +794,21 @@ static void blkcg_css_offline(struct cgroup *cgroup) | |||
798 | spin_unlock_irq(&blkcg->lock); | 794 | spin_unlock_irq(&blkcg->lock); |
799 | } | 795 | } |
800 | 796 | ||
801 | static void blkcg_css_free(struct cgroup *cgroup) | 797 | static void blkcg_css_free(struct cgroup_subsys_state *css) |
802 | { | 798 | { |
803 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 799 | struct blkcg *blkcg = css_to_blkcg(css); |
804 | 800 | ||
805 | if (blkcg != &blkcg_root) | 801 | if (blkcg != &blkcg_root) |
806 | kfree(blkcg); | 802 | kfree(blkcg); |
807 | } | 803 | } |
808 | 804 | ||
809 | static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) | 805 | static struct cgroup_subsys_state * |
806 | blkcg_css_alloc(struct cgroup_subsys_state *parent_css) | ||
810 | { | 807 | { |
811 | static atomic64_t id_seq = ATOMIC64_INIT(0); | 808 | static atomic64_t id_seq = ATOMIC64_INIT(0); |
812 | struct blkcg *blkcg; | 809 | struct blkcg *blkcg; |
813 | struct cgroup *parent = cgroup->parent; | ||
814 | 810 | ||
815 | if (!parent) { | 811 | if (!parent_css) { |
816 | blkcg = &blkcg_root; | 812 | blkcg = &blkcg_root; |
817 | goto done; | 813 | goto done; |
818 | } | 814 | } |
@@ -883,14 +879,15 @@ void blkcg_exit_queue(struct request_queue *q) | |||
883 | * of the main cic data structures. For now we allow a task to change | 879 | * of the main cic data structures. For now we allow a task to change |
884 | * its cgroup only if it's the only owner of its ioc. | 880 | * its cgroup only if it's the only owner of its ioc. |
885 | */ | 881 | */ |
886 | static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 882 | static int blkcg_can_attach(struct cgroup_subsys_state *css, |
883 | struct cgroup_taskset *tset) | ||
887 | { | 884 | { |
888 | struct task_struct *task; | 885 | struct task_struct *task; |
889 | struct io_context *ioc; | 886 | struct io_context *ioc; |
890 | int ret = 0; | 887 | int ret = 0; |
891 | 888 | ||
892 | /* task_lock() is needed to avoid races with exit_io_context() */ | 889 | /* task_lock() is needed to avoid races with exit_io_context() */ |
893 | cgroup_taskset_for_each(task, cgrp, tset) { | 890 | cgroup_taskset_for_each(task, css, tset) { |
894 | task_lock(task); | 891 | task_lock(task); |
895 | ioc = task->io_context; | 892 | ioc = task->io_context; |
896 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) | 893 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) |
@@ -1127,7 +1124,7 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) | |||
1127 | 1124 | ||
1128 | /* kill the intf files first */ | 1125 | /* kill the intf files first */ |
1129 | if (pol->cftypes) | 1126 | if (pol->cftypes) |
1130 | cgroup_rm_cftypes(&blkio_subsys, pol->cftypes); | 1127 | cgroup_rm_cftypes(pol->cftypes); |
1131 | 1128 | ||
1132 | /* unregister and update blkgs */ | 1129 | /* unregister and update blkgs */ |
1133 | blkcg_policy[pol->plid] = NULL; | 1130 | blkcg_policy[pol->plid] = NULL; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 8056c03a3382..ae6969a7ffd4 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -179,22 +179,20 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | |||
179 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 179 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
180 | 180 | ||
181 | 181 | ||
182 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | 182 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
183 | { | 183 | { |
184 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 184 | return css ? container_of(css, struct blkcg, css) : NULL; |
185 | struct blkcg, css); | ||
186 | } | 185 | } |
187 | 186 | ||
188 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | 187 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) |
189 | { | 188 | { |
190 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | 189 | return css_to_blkcg(task_css(tsk, blkio_subsys_id)); |
191 | struct blkcg, css); | ||
192 | } | 190 | } |
193 | 191 | ||
194 | static inline struct blkcg *bio_blkcg(struct bio *bio) | 192 | static inline struct blkcg *bio_blkcg(struct bio *bio) |
195 | { | 193 | { |
196 | if (bio && bio->bi_css) | 194 | if (bio && bio->bi_css) |
197 | return container_of(bio->bi_css, struct blkcg, css); | 195 | return css_to_blkcg(bio->bi_css); |
198 | return task_blkcg(current); | 196 | return task_blkcg(current); |
199 | } | 197 | } |
200 | 198 | ||
@@ -206,9 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) | |||
206 | */ | 204 | */ |
207 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | 205 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) |
208 | { | 206 | { |
209 | struct cgroup *pcg = blkcg->css.cgroup->parent; | 207 | return css_to_blkcg(css_parent(&blkcg->css)); |
210 | |||
211 | return pcg ? cgroup_to_blkcg(pcg) : NULL; | ||
212 | } | 208 | } |
213 | 209 | ||
214 | /** | 210 | /** |
@@ -288,32 +284,33 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, | |||
288 | /** | 284 | /** |
289 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | 285 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants |
290 | * @d_blkg: loop cursor pointing to the current descendant | 286 | * @d_blkg: loop cursor pointing to the current descendant |
291 | * @pos_cgrp: used for iteration | 287 | * @pos_css: used for iteration |
292 | * @p_blkg: target blkg to walk descendants of | 288 | * @p_blkg: target blkg to walk descendants of |
293 | * | 289 | * |
294 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | 290 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU |
295 | * read locked. If called under either blkcg or queue lock, the iteration | 291 | * read locked. If called under either blkcg or queue lock, the iteration |
296 | * is guaranteed to include all and only online blkgs. The caller may | 292 | * is guaranteed to include all and only online blkgs. The caller may |
297 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip | 293 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. |
298 | * subtree. | 294 | * @p_blkg is included in the iteration and the first node to be visited. |
299 | */ | 295 | */ |
300 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ | 296 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ |
301 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | 297 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ |
302 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | 298 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ |
303 | (p_blkg)->q, false))) | 299 | (p_blkg)->q, false))) |
304 | 300 | ||
305 | /** | 301 | /** |
306 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants | 302 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants |
307 | * @d_blkg: loop cursor pointing to the current descendant | 303 | * @d_blkg: loop cursor pointing to the current descendant |
308 | * @pos_cgrp: used for iteration | 304 | * @pos_css: used for iteration |
309 | * @p_blkg: target blkg to walk descendants of | 305 | * @p_blkg: target blkg to walk descendants of |
310 | * | 306 | * |
311 | * Similar to blkg_for_each_descendant_pre() but performs post-order | 307 | * Similar to blkg_for_each_descendant_pre() but performs post-order |
312 | * traversal instead. Synchronization rules are the same. | 308 | * traversal instead. Synchronization rules are the same. @p_blkg is |
309 | * included in the iteration and the last node to be visited. | ||
313 | */ | 310 | */ |
314 | #define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \ | 311 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ |
315 | cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | 312 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ |
316 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | 313 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ |
317 | (p_blkg)->q, false))) | 314 | (p_blkg)->q, false))) |
318 | 315 | ||
319 | /** | 316 | /** |
@@ -576,7 +573,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, | |||
576 | static inline void blkcg_deactivate_policy(struct request_queue *q, | 573 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
577 | const struct blkcg_policy *pol) { } | 574 | const struct blkcg_policy *pol) { } |
578 | 575 | ||
579 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | ||
580 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | 576 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
581 | 577 | ||
582 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 578 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 08a32dfd3844..8331aba9426f 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -1293,10 +1293,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, | |||
1293 | return __blkg_prfill_rwstat(sf, pd, &rwstat); | 1293 | return __blkg_prfill_rwstat(sf, pd, &rwstat); |
1294 | } | 1294 | } |
1295 | 1295 | ||
1296 | static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, | 1296 | static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, |
1297 | struct seq_file *sf) | 1297 | struct cftype *cft, struct seq_file *sf) |
1298 | { | 1298 | { |
1299 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1299 | struct blkcg *blkcg = css_to_blkcg(css); |
1300 | 1300 | ||
1301 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, | 1301 | blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, |
1302 | cft->private, true); | 1302 | cft->private, true); |
@@ -1325,31 +1325,31 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, | |||
1325 | return __blkg_prfill_u64(sf, pd, v); | 1325 | return __blkg_prfill_u64(sf, pd, v); |
1326 | } | 1326 | } |
1327 | 1327 | ||
1328 | static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, | 1328 | static int tg_print_conf_u64(struct cgroup_subsys_state *css, |
1329 | struct seq_file *sf) | 1329 | struct cftype *cft, struct seq_file *sf) |
1330 | { | 1330 | { |
1331 | blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_u64, | 1331 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, |
1332 | &blkcg_policy_throtl, cft->private, false); | 1332 | &blkcg_policy_throtl, cft->private, false); |
1333 | return 0; | 1333 | return 0; |
1334 | } | 1334 | } |
1335 | 1335 | ||
1336 | static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, | 1336 | static int tg_print_conf_uint(struct cgroup_subsys_state *css, |
1337 | struct seq_file *sf) | 1337 | struct cftype *cft, struct seq_file *sf) |
1338 | { | 1338 | { |
1339 | blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_uint, | 1339 | blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, |
1340 | &blkcg_policy_throtl, cft->private, false); | 1340 | &blkcg_policy_throtl, cft->private, false); |
1341 | return 0; | 1341 | return 0; |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, | 1344 | static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, |
1345 | bool is_u64) | 1345 | const char *buf, bool is_u64) |
1346 | { | 1346 | { |
1347 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1347 | struct blkcg *blkcg = css_to_blkcg(css); |
1348 | struct blkg_conf_ctx ctx; | 1348 | struct blkg_conf_ctx ctx; |
1349 | struct throtl_grp *tg; | 1349 | struct throtl_grp *tg; |
1350 | struct throtl_service_queue *sq; | 1350 | struct throtl_service_queue *sq; |
1351 | struct blkcg_gq *blkg; | 1351 | struct blkcg_gq *blkg; |
1352 | struct cgroup *pos_cgrp; | 1352 | struct cgroup_subsys_state *pos_css; |
1353 | int ret; | 1353 | int ret; |
1354 | 1354 | ||
1355 | ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); | 1355 | ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); |
@@ -1379,8 +1379,7 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, | |||
1379 | * restrictions in the whole hierarchy and allows them to bypass | 1379 | * restrictions in the whole hierarchy and allows them to bypass |
1380 | * blk-throttle. | 1380 | * blk-throttle. |
1381 | */ | 1381 | */ |
1382 | tg_update_has_rules(tg); | 1382 | blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg) |
1383 | blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg) | ||
1384 | tg_update_has_rules(blkg_to_tg(blkg)); | 1383 | tg_update_has_rules(blkg_to_tg(blkg)); |
1385 | 1384 | ||
1386 | /* | 1385 | /* |
@@ -1403,16 +1402,16 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, | |||
1403 | return 0; | 1402 | return 0; |
1404 | } | 1403 | } |
1405 | 1404 | ||
1406 | static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, | 1405 | static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft, |
1407 | const char *buf) | 1406 | const char *buf) |
1408 | { | 1407 | { |
1409 | return tg_set_conf(cgrp, cft, buf, true); | 1408 | return tg_set_conf(css, cft, buf, true); |
1410 | } | 1409 | } |
1411 | 1410 | ||
1412 | static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft, | 1411 | static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft, |
1413 | const char *buf) | 1412 | const char *buf) |
1414 | { | 1413 | { |
1415 | return tg_set_conf(cgrp, cft, buf, false); | 1414 | return tg_set_conf(css, cft, buf, false); |
1416 | } | 1415 | } |
1417 | 1416 | ||
1418 | static struct cftype throtl_files[] = { | 1417 | static struct cftype throtl_files[] = { |
@@ -1623,7 +1622,7 @@ void blk_throtl_drain(struct request_queue *q) | |||
1623 | { | 1622 | { |
1624 | struct throtl_data *td = q->td; | 1623 | struct throtl_data *td = q->td; |
1625 | struct blkcg_gq *blkg; | 1624 | struct blkcg_gq *blkg; |
1626 | struct cgroup *pos_cgrp; | 1625 | struct cgroup_subsys_state *pos_css; |
1627 | struct bio *bio; | 1626 | struct bio *bio; |
1628 | int rw; | 1627 | int rw; |
1629 | 1628 | ||
@@ -1636,11 +1635,9 @@ void blk_throtl_drain(struct request_queue *q) | |||
1636 | * better to walk service_queue tree directly but blkg walk is | 1635 | * better to walk service_queue tree directly but blkg walk is |
1637 | * easier. | 1636 | * easier. |
1638 | */ | 1637 | */ |
1639 | blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg) | 1638 | blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) |
1640 | tg_drain_bios(&blkg_to_tg(blkg)->service_queue); | 1639 | tg_drain_bios(&blkg_to_tg(blkg)->service_queue); |
1641 | 1640 | ||
1642 | tg_drain_bios(&td_root_tg(td)->service_queue); | ||
1643 | |||
1644 | /* finally, transfer bios from top-level tg's into the td */ | 1641 | /* finally, transfer bios from top-level tg's into the td */ |
1645 | tg_drain_bios(&td->service_queue); | 1642 | tg_drain_bios(&td->service_queue); |
1646 | 1643 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d5bbdcfd0dab..dabb9d02cf9a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1607,12 +1607,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, | |||
1607 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); | 1607 | return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); |
1608 | } | 1608 | } |
1609 | 1609 | ||
1610 | static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1610 | static int cfqg_print_weight_device(struct cgroup_subsys_state *css, |
1611 | struct seq_file *sf) | 1611 | struct cftype *cft, struct seq_file *sf) |
1612 | { | 1612 | { |
1613 | blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), | 1613 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, |
1614 | cfqg_prfill_weight_device, &blkcg_policy_cfq, 0, | 1614 | &blkcg_policy_cfq, 0, false); |
1615 | false); | ||
1616 | return 0; | 1615 | return 0; |
1617 | } | 1616 | } |
1618 | 1617 | ||
@@ -1626,35 +1625,34 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, | |||
1626 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); | 1625 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); |
1627 | } | 1626 | } |
1628 | 1627 | ||
1629 | static int cfqg_print_leaf_weight_device(struct cgroup *cgrp, | 1628 | static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, |
1630 | struct cftype *cft, | 1629 | struct cftype *cft, |
1631 | struct seq_file *sf) | 1630 | struct seq_file *sf) |
1632 | { | 1631 | { |
1633 | blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), | 1632 | blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, |
1634 | cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0, | 1633 | &blkcg_policy_cfq, 0, false); |
1635 | false); | ||
1636 | return 0; | 1634 | return 0; |
1637 | } | 1635 | } |
1638 | 1636 | ||
1639 | static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, | 1637 | static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, |
1640 | struct seq_file *sf) | 1638 | struct seq_file *sf) |
1641 | { | 1639 | { |
1642 | seq_printf(sf, "%u\n", cgroup_to_blkcg(cgrp)->cfq_weight); | 1640 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); |
1643 | return 0; | 1641 | return 0; |
1644 | } | 1642 | } |
1645 | 1643 | ||
1646 | static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft, | 1644 | static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, |
1647 | struct seq_file *sf) | 1645 | struct cftype *cft, struct seq_file *sf) |
1648 | { | 1646 | { |
1649 | seq_printf(sf, "%u\n", | 1647 | seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); |
1650 | cgroup_to_blkcg(cgrp)->cfq_leaf_weight); | ||
1651 | return 0; | 1648 | return 0; |
1652 | } | 1649 | } |
1653 | 1650 | ||
1654 | static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1651 | static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, |
1655 | const char *buf, bool is_leaf_weight) | 1652 | struct cftype *cft, const char *buf, |
1653 | bool is_leaf_weight) | ||
1656 | { | 1654 | { |
1657 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1655 | struct blkcg *blkcg = css_to_blkcg(css); |
1658 | struct blkg_conf_ctx ctx; | 1656 | struct blkg_conf_ctx ctx; |
1659 | struct cfq_group *cfqg; | 1657 | struct cfq_group *cfqg; |
1660 | int ret; | 1658 | int ret; |
@@ -1680,22 +1678,22 @@ static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | |||
1680 | return ret; | 1678 | return ret; |
1681 | } | 1679 | } |
1682 | 1680 | ||
1683 | static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1681 | static int cfqg_set_weight_device(struct cgroup_subsys_state *css, |
1684 | const char *buf) | 1682 | struct cftype *cft, const char *buf) |
1685 | { | 1683 | { |
1686 | return __cfqg_set_weight_device(cgrp, cft, buf, false); | 1684 | return __cfqg_set_weight_device(css, cft, buf, false); |
1687 | } | 1685 | } |
1688 | 1686 | ||
1689 | static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1687 | static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css, |
1690 | const char *buf) | 1688 | struct cftype *cft, const char *buf) |
1691 | { | 1689 | { |
1692 | return __cfqg_set_weight_device(cgrp, cft, buf, true); | 1690 | return __cfqg_set_weight_device(css, cft, buf, true); |
1693 | } | 1691 | } |
1694 | 1692 | ||
1695 | static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, | 1693 | static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, |
1696 | bool is_leaf_weight) | 1694 | u64 val, bool is_leaf_weight) |
1697 | { | 1695 | { |
1698 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1696 | struct blkcg *blkcg = css_to_blkcg(css); |
1699 | struct blkcg_gq *blkg; | 1697 | struct blkcg_gq *blkg; |
1700 | 1698 | ||
1701 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) | 1699 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) |
@@ -1727,30 +1725,32 @@ static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, | |||
1727 | return 0; | 1725 | return 0; |
1728 | } | 1726 | } |
1729 | 1727 | ||
1730 | static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1728 | static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, |
1729 | u64 val) | ||
1731 | { | 1730 | { |
1732 | return __cfq_set_weight(cgrp, cft, val, false); | 1731 | return __cfq_set_weight(css, cft, val, false); |
1733 | } | 1732 | } |
1734 | 1733 | ||
1735 | static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1734 | static int cfq_set_leaf_weight(struct cgroup_subsys_state *css, |
1735 | struct cftype *cft, u64 val) | ||
1736 | { | 1736 | { |
1737 | return __cfq_set_weight(cgrp, cft, val, true); | 1737 | return __cfq_set_weight(css, cft, val, true); |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, | 1740 | static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, |
1741 | struct seq_file *sf) | 1741 | struct seq_file *sf) |
1742 | { | 1742 | { |
1743 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1743 | struct blkcg *blkcg = css_to_blkcg(css); |
1744 | 1744 | ||
1745 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, | 1745 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, |
1746 | cft->private, false); | 1746 | cft->private, false); |
1747 | return 0; | 1747 | return 0; |
1748 | } | 1748 | } |
1749 | 1749 | ||
1750 | static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, | 1750 | static int cfqg_print_rwstat(struct cgroup_subsys_state *css, |
1751 | struct seq_file *sf) | 1751 | struct cftype *cft, struct seq_file *sf) |
1752 | { | 1752 | { |
1753 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1753 | struct blkcg *blkcg = css_to_blkcg(css); |
1754 | 1754 | ||
1755 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, | 1755 | blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, |
1756 | cft->private, true); | 1756 | cft->private, true); |
@@ -1773,20 +1773,20 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, | |||
1773 | return __blkg_prfill_rwstat(sf, pd, &sum); | 1773 | return __blkg_prfill_rwstat(sf, pd, &sum); |
1774 | } | 1774 | } |
1775 | 1775 | ||
1776 | static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft, | 1776 | static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, |
1777 | struct seq_file *sf) | 1777 | struct cftype *cft, struct seq_file *sf) |
1778 | { | 1778 | { |
1779 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1779 | struct blkcg *blkcg = css_to_blkcg(css); |
1780 | 1780 | ||
1781 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, | 1781 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, |
1782 | &blkcg_policy_cfq, cft->private, false); | 1782 | &blkcg_policy_cfq, cft->private, false); |
1783 | return 0; | 1783 | return 0; |
1784 | } | 1784 | } |
1785 | 1785 | ||
1786 | static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft, | 1786 | static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, |
1787 | struct seq_file *sf) | 1787 | struct cftype *cft, struct seq_file *sf) |
1788 | { | 1788 | { |
1789 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1789 | struct blkcg *blkcg = css_to_blkcg(css); |
1790 | 1790 | ||
1791 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, | 1791 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, |
1792 | &blkcg_policy_cfq, cft->private, true); | 1792 | &blkcg_policy_cfq, cft->private, true); |
@@ -1810,10 +1810,10 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, | |||
1810 | } | 1810 | } |
1811 | 1811 | ||
1812 | /* print avg_queue_size */ | 1812 | /* print avg_queue_size */ |
1813 | static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, | 1813 | static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, |
1814 | struct seq_file *sf) | 1814 | struct cftype *cft, struct seq_file *sf) |
1815 | { | 1815 | { |
1816 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1816 | struct blkcg *blkcg = css_to_blkcg(css); |
1817 | 1817 | ||
1818 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, | 1818 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, |
1819 | &blkcg_policy_cfq, 0, false); | 1819 | &blkcg_policy_cfq, 0, false); |
@@ -1956,7 +1956,7 @@ int bio_associate_current(struct bio *bio) | |||
1956 | 1956 | ||
1957 | /* associate blkcg if exists */ | 1957 | /* associate blkcg if exists */ |
1958 | rcu_read_lock(); | 1958 | rcu_read_lock(); |
1959 | css = task_subsys_state(current, blkio_subsys_id); | 1959 | css = task_css(current, blkio_subsys_id); |
1960 | if (css && css_tryget(css)) | 1960 | if (css && css_tryget(css)) |
1961 | bio->bi_css = css; | 1961 | bio->bi_css = css; |
1962 | rcu_read_unlock(); | 1962 | rcu_read_unlock(); |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e9ac882868c0..3561d305b1e0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -66,22 +66,25 @@ enum cgroup_subsys_id { | |||
66 | 66 | ||
67 | /* Per-subsystem/per-cgroup state maintained by the system. */ | 67 | /* Per-subsystem/per-cgroup state maintained by the system. */ |
68 | struct cgroup_subsys_state { | 68 | struct cgroup_subsys_state { |
69 | /* | 69 | /* the cgroup that this css is attached to */ |
70 | * The cgroup that this subsystem is attached to. Useful | ||
71 | * for subsystems that want to know about the cgroup | ||
72 | * hierarchy structure | ||
73 | */ | ||
74 | struct cgroup *cgroup; | 70 | struct cgroup *cgroup; |
75 | 71 | ||
72 | /* the cgroup subsystem that this css is attached to */ | ||
73 | struct cgroup_subsys *ss; | ||
74 | |||
76 | /* reference count - access via css_[try]get() and css_put() */ | 75 | /* reference count - access via css_[try]get() and css_put() */ |
77 | struct percpu_ref refcnt; | 76 | struct percpu_ref refcnt; |
78 | 77 | ||
78 | /* the parent css */ | ||
79 | struct cgroup_subsys_state *parent; | ||
80 | |||
79 | unsigned long flags; | 81 | unsigned long flags; |
80 | /* ID for this css, if possible */ | 82 | /* ID for this css, if possible */ |
81 | struct css_id __rcu *id; | 83 | struct css_id __rcu *id; |
82 | 84 | ||
83 | /* Used to put @cgroup->dentry on the last css_put() */ | 85 | /* percpu_ref killing and RCU release */ |
84 | struct work_struct dput_work; | 86 | struct rcu_head rcu_head; |
87 | struct work_struct destroy_work; | ||
85 | }; | 88 | }; |
86 | 89 | ||
87 | /* bits in struct cgroup_subsys_state flags field */ | 90 | /* bits in struct cgroup_subsys_state flags field */ |
@@ -161,7 +164,16 @@ struct cgroup_name { | |||
161 | struct cgroup { | 164 | struct cgroup { |
162 | unsigned long flags; /* "unsigned long" so bitops work */ | 165 | unsigned long flags; /* "unsigned long" so bitops work */ |
163 | 166 | ||
164 | int id; /* ida allocated in-hierarchy ID */ | 167 | /* |
168 | * idr allocated in-hierarchy ID. | ||
169 | * | ||
170 | * The ID of the root cgroup is always 0, and a new cgroup | ||
171 | * will be assigned with a smallest available ID. | ||
172 | */ | ||
173 | int id; | ||
174 | |||
175 | /* the number of attached css's */ | ||
176 | int nr_css; | ||
165 | 177 | ||
166 | /* | 178 | /* |
167 | * We link our 'sibling' struct into our parent's 'children'. | 179 | * We link our 'sibling' struct into our parent's 'children'. |
@@ -196,7 +208,7 @@ struct cgroup { | |||
196 | struct cgroup_name __rcu *name; | 208 | struct cgroup_name __rcu *name; |
197 | 209 | ||
198 | /* Private pointers for each registered subsystem */ | 210 | /* Private pointers for each registered subsystem */ |
199 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 211 | struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; |
200 | 212 | ||
201 | struct cgroupfs_root *root; | 213 | struct cgroupfs_root *root; |
202 | 214 | ||
@@ -220,10 +232,12 @@ struct cgroup { | |||
220 | struct list_head pidlists; | 232 | struct list_head pidlists; |
221 | struct mutex pidlist_mutex; | 233 | struct mutex pidlist_mutex; |
222 | 234 | ||
235 | /* dummy css with NULL ->ss, points back to this cgroup */ | ||
236 | struct cgroup_subsys_state dummy_css; | ||
237 | |||
223 | /* For css percpu_ref killing and RCU-protected deletion */ | 238 | /* For css percpu_ref killing and RCU-protected deletion */ |
224 | struct rcu_head rcu_head; | 239 | struct rcu_head rcu_head; |
225 | struct work_struct destroy_work; | 240 | struct work_struct destroy_work; |
226 | atomic_t css_kill_cnt; | ||
227 | 241 | ||
228 | /* List of events which userspace want to receive */ | 242 | /* List of events which userspace want to receive */ |
229 | struct list_head event_list; | 243 | struct list_head event_list; |
@@ -322,7 +336,7 @@ struct cgroupfs_root { | |||
322 | unsigned long flags; | 336 | unsigned long flags; |
323 | 337 | ||
324 | /* IDs for cgroups in this hierarchy */ | 338 | /* IDs for cgroups in this hierarchy */ |
325 | struct ida cgroup_ida; | 339 | struct idr cgroup_idr; |
326 | 340 | ||
327 | /* The path to use for release notifications. */ | 341 | /* The path to use for release notifications. */ |
328 | char release_agent_path[PATH_MAX]; | 342 | char release_agent_path[PATH_MAX]; |
@@ -394,9 +408,10 @@ struct cgroup_map_cb { | |||
394 | 408 | ||
395 | /* cftype->flags */ | 409 | /* cftype->flags */ |
396 | enum { | 410 | enum { |
397 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */ | 411 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ |
398 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */ | 412 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ |
399 | CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ | 413 | CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ |
414 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ | ||
400 | }; | 415 | }; |
401 | 416 | ||
402 | #define MAX_CFTYPE_NAME 64 | 417 | #define MAX_CFTYPE_NAME 64 |
@@ -424,35 +439,41 @@ struct cftype { | |||
424 | /* CFTYPE_* flags */ | 439 | /* CFTYPE_* flags */ |
425 | unsigned int flags; | 440 | unsigned int flags; |
426 | 441 | ||
442 | /* | ||
443 | * The subsys this file belongs to. Initialized automatically | ||
444 | * during registration. NULL for cgroup core files. | ||
445 | */ | ||
446 | struct cgroup_subsys *ss; | ||
447 | |||
427 | int (*open)(struct inode *inode, struct file *file); | 448 | int (*open)(struct inode *inode, struct file *file); |
428 | ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, | 449 | ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, |
429 | struct file *file, | 450 | struct file *file, |
430 | char __user *buf, size_t nbytes, loff_t *ppos); | 451 | char __user *buf, size_t nbytes, loff_t *ppos); |
431 | /* | 452 | /* |
432 | * read_u64() is a shortcut for the common case of returning a | 453 | * read_u64() is a shortcut for the common case of returning a |
433 | * single integer. Use it in place of read() | 454 | * single integer. Use it in place of read() |
434 | */ | 455 | */ |
435 | u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); | 456 | u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); |
436 | /* | 457 | /* |
437 | * read_s64() is a signed version of read_u64() | 458 | * read_s64() is a signed version of read_u64() |
438 | */ | 459 | */ |
439 | s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); | 460 | s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); |
440 | /* | 461 | /* |
441 | * read_map() is used for defining a map of key/value | 462 | * read_map() is used for defining a map of key/value |
442 | * pairs. It should call cb->fill(cb, key, value) for each | 463 | * pairs. It should call cb->fill(cb, key, value) for each |
443 | * entry. The key/value pairs (and their ordering) should not | 464 | * entry. The key/value pairs (and their ordering) should not |
444 | * change between reboots. | 465 | * change between reboots. |
445 | */ | 466 | */ |
446 | int (*read_map)(struct cgroup *cgrp, struct cftype *cft, | 467 | int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, |
447 | struct cgroup_map_cb *cb); | 468 | struct cgroup_map_cb *cb); |
448 | /* | 469 | /* |
449 | * read_seq_string() is used for outputting a simple sequence | 470 | * read_seq_string() is used for outputting a simple sequence |
450 | * using seqfile. | 471 | * using seqfile. |
451 | */ | 472 | */ |
452 | int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft, | 473 | int (*read_seq_string)(struct cgroup_subsys_state *css, |
453 | struct seq_file *m); | 474 | struct cftype *cft, struct seq_file *m); |
454 | 475 | ||
455 | ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, | 476 | ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, |
456 | struct file *file, | 477 | struct file *file, |
457 | const char __user *buf, size_t nbytes, loff_t *ppos); | 478 | const char __user *buf, size_t nbytes, loff_t *ppos); |
458 | 479 | ||
@@ -461,18 +482,20 @@ struct cftype { | |||
461 | * a single integer (as parsed by simple_strtoull) from | 482 | * a single integer (as parsed by simple_strtoull) from |
462 | * userspace. Use in place of write(); return 0 or error. | 483 | * userspace. Use in place of write(); return 0 or error. |
463 | */ | 484 | */ |
464 | int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); | 485 | int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, |
486 | u64 val); | ||
465 | /* | 487 | /* |
466 | * write_s64() is a signed version of write_u64() | 488 | * write_s64() is a signed version of write_u64() |
467 | */ | 489 | */ |
468 | int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); | 490 | int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, |
491 | s64 val); | ||
469 | 492 | ||
470 | /* | 493 | /* |
471 | * write_string() is passed a nul-terminated kernelspace | 494 | * write_string() is passed a nul-terminated kernelspace |
472 | * buffer of maximum length determined by max_write_len. | 495 | * buffer of maximum length determined by max_write_len. |
473 | * Returns 0 or -ve error code. | 496 | * Returns 0 or -ve error code. |
474 | */ | 497 | */ |
475 | int (*write_string)(struct cgroup *cgrp, struct cftype *cft, | 498 | int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, |
476 | const char *buffer); | 499 | const char *buffer); |
477 | /* | 500 | /* |
478 | * trigger() callback can be used to get some kick from the | 501 | * trigger() callback can be used to get some kick from the |
@@ -480,7 +503,7 @@ struct cftype { | |||
480 | * at all. The private field can be used to determine the | 503 | * at all. The private field can be used to determine the |
481 | * kick type for multiplexing. | 504 | * kick type for multiplexing. |
482 | */ | 505 | */ |
483 | int (*trigger)(struct cgroup *cgrp, unsigned int event); | 506 | int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); |
484 | 507 | ||
485 | int (*release)(struct inode *inode, struct file *file); | 508 | int (*release)(struct inode *inode, struct file *file); |
486 | 509 | ||
@@ -490,16 +513,18 @@ struct cftype { | |||
490 | * you want to provide this functionality. Use eventfd_signal() | 513 | * you want to provide this functionality. Use eventfd_signal() |
491 | * on eventfd to send notification to userspace. | 514 | * on eventfd to send notification to userspace. |
492 | */ | 515 | */ |
493 | int (*register_event)(struct cgroup *cgrp, struct cftype *cft, | 516 | int (*register_event)(struct cgroup_subsys_state *css, |
494 | struct eventfd_ctx *eventfd, const char *args); | 517 | struct cftype *cft, struct eventfd_ctx *eventfd, |
518 | const char *args); | ||
495 | /* | 519 | /* |
496 | * unregister_event() callback will be called when userspace | 520 | * unregister_event() callback will be called when userspace |
497 | * closes the eventfd or on cgroup removing. | 521 | * closes the eventfd or on cgroup removing. |
498 | * This callback must be implemented, if you want provide | 522 | * This callback must be implemented, if you want provide |
499 | * notification functionality. | 523 | * notification functionality. |
500 | */ | 524 | */ |
501 | void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, | 525 | void (*unregister_event)(struct cgroup_subsys_state *css, |
502 | struct eventfd_ctx *eventfd); | 526 | struct cftype *cft, |
527 | struct eventfd_ctx *eventfd); | ||
503 | }; | 528 | }; |
504 | 529 | ||
505 | /* | 530 | /* |
@@ -512,15 +537,6 @@ struct cftype_set { | |||
512 | struct cftype *cfts; | 537 | struct cftype *cfts; |
513 | }; | 538 | }; |
514 | 539 | ||
515 | struct cgroup_scanner { | ||
516 | struct cgroup *cg; | ||
517 | int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); | ||
518 | void (*process_task)(struct task_struct *p, | ||
519 | struct cgroup_scanner *scan); | ||
520 | struct ptr_heap *heap; | ||
521 | void *data; | ||
522 | }; | ||
523 | |||
524 | /* | 540 | /* |
525 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | 541 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This |
526 | * function can be called as long as @cgrp is accessible. | 542 | * function can be called as long as @cgrp is accessible. |
@@ -537,7 +553,7 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) | |||
537 | } | 553 | } |
538 | 554 | ||
539 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 555 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
540 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 556 | int cgroup_rm_cftypes(struct cftype *cfts); |
541 | 557 | ||
542 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); | 558 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); |
543 | 559 | ||
@@ -553,20 +569,22 @@ int cgroup_task_count(const struct cgroup *cgrp); | |||
553 | struct cgroup_taskset; | 569 | struct cgroup_taskset; |
554 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); | 570 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); |
555 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); | 571 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); |
556 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); | 572 | struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, |
573 | int subsys_id); | ||
557 | int cgroup_taskset_size(struct cgroup_taskset *tset); | 574 | int cgroup_taskset_size(struct cgroup_taskset *tset); |
558 | 575 | ||
559 | /** | 576 | /** |
560 | * cgroup_taskset_for_each - iterate cgroup_taskset | 577 | * cgroup_taskset_for_each - iterate cgroup_taskset |
561 | * @task: the loop cursor | 578 | * @task: the loop cursor |
562 | * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all | 579 | * @skip_css: skip if task's css matches this, %NULL to iterate through all |
563 | * @tset: taskset to iterate | 580 | * @tset: taskset to iterate |
564 | */ | 581 | */ |
565 | #define cgroup_taskset_for_each(task, skip_cgrp, tset) \ | 582 | #define cgroup_taskset_for_each(task, skip_css, tset) \ |
566 | for ((task) = cgroup_taskset_first((tset)); (task); \ | 583 | for ((task) = cgroup_taskset_first((tset)); (task); \ |
567 | (task) = cgroup_taskset_next((tset))) \ | 584 | (task) = cgroup_taskset_next((tset))) \ |
568 | if (!(skip_cgrp) || \ | 585 | if (!(skip_css) || \ |
569 | cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) | 586 | cgroup_taskset_cur_css((tset), \ |
587 | (skip_css)->ss->subsys_id) != (skip_css)) | ||
570 | 588 | ||
571 | /* | 589 | /* |
572 | * Control Group subsystem type. | 590 | * Control Group subsystem type. |
@@ -574,18 +592,22 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); | |||
574 | */ | 592 | */ |
575 | 593 | ||
576 | struct cgroup_subsys { | 594 | struct cgroup_subsys { |
577 | struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); | 595 | struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); |
578 | int (*css_online)(struct cgroup *cgrp); | 596 | int (*css_online)(struct cgroup_subsys_state *css); |
579 | void (*css_offline)(struct cgroup *cgrp); | 597 | void (*css_offline)(struct cgroup_subsys_state *css); |
580 | void (*css_free)(struct cgroup *cgrp); | 598 | void (*css_free)(struct cgroup_subsys_state *css); |
581 | 599 | ||
582 | int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 600 | int (*can_attach)(struct cgroup_subsys_state *css, |
583 | void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 601 | struct cgroup_taskset *tset); |
584 | void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); | 602 | void (*cancel_attach)(struct cgroup_subsys_state *css, |
603 | struct cgroup_taskset *tset); | ||
604 | void (*attach)(struct cgroup_subsys_state *css, | ||
605 | struct cgroup_taskset *tset); | ||
585 | void (*fork)(struct task_struct *task); | 606 | void (*fork)(struct task_struct *task); |
586 | void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, | 607 | void (*exit)(struct cgroup_subsys_state *css, |
608 | struct cgroup_subsys_state *old_css, | ||
587 | struct task_struct *task); | 609 | struct task_struct *task); |
588 | void (*bind)(struct cgroup *root); | 610 | void (*bind)(struct cgroup_subsys_state *root_css); |
589 | 611 | ||
590 | int subsys_id; | 612 | int subsys_id; |
591 | int disabled; | 613 | int disabled; |
@@ -641,10 +663,17 @@ struct cgroup_subsys { | |||
641 | #undef IS_SUBSYS_ENABLED | 663 | #undef IS_SUBSYS_ENABLED |
642 | #undef SUBSYS | 664 | #undef SUBSYS |
643 | 665 | ||
644 | static inline struct cgroup_subsys_state *cgroup_subsys_state( | 666 | /** |
645 | struct cgroup *cgrp, int subsys_id) | 667 | * css_parent - find the parent css |
668 | * @css: the target cgroup_subsys_state | ||
669 | * | ||
670 | * Return the parent css of @css. This function is guaranteed to return | ||
671 | * non-NULL parent as long as @css isn't the root. | ||
672 | */ | ||
673 | static inline | ||
674 | struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) | ||
646 | { | 675 | { |
647 | return cgrp->subsys[subsys_id]; | 676 | return css->parent; |
648 | } | 677 | } |
649 | 678 | ||
650 | /** | 679 | /** |
@@ -672,7 +701,7 @@ extern struct mutex cgroup_mutex; | |||
672 | #endif | 701 | #endif |
673 | 702 | ||
674 | /** | 703 | /** |
675 | * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds | 704 | * task_css_check - obtain css for (task, subsys) w/ extra access conds |
676 | * @task: the target task | 705 | * @task: the target task |
677 | * @subsys_id: the target subsystem ID | 706 | * @subsys_id: the target subsystem ID |
678 | * @__c: extra condition expression to be passed to rcu_dereference_check() | 707 | * @__c: extra condition expression to be passed to rcu_dereference_check() |
@@ -680,7 +709,7 @@ extern struct mutex cgroup_mutex; | |||
680 | * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The | 709 | * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The |
681 | * synchronization rules are the same as task_css_set_check(). | 710 | * synchronization rules are the same as task_css_set_check(). |
682 | */ | 711 | */ |
683 | #define task_subsys_state_check(task, subsys_id, __c) \ | 712 | #define task_css_check(task, subsys_id, __c) \ |
684 | task_css_set_check((task), (__c))->subsys[(subsys_id)] | 713 | task_css_set_check((task), (__c))->subsys[(subsys_id)] |
685 | 714 | ||
686 | /** | 715 | /** |
@@ -695,87 +724,92 @@ static inline struct css_set *task_css_set(struct task_struct *task) | |||
695 | } | 724 | } |
696 | 725 | ||
697 | /** | 726 | /** |
698 | * task_subsys_state - obtain css for (task, subsys) | 727 | * task_css - obtain css for (task, subsys) |
699 | * @task: the target task | 728 | * @task: the target task |
700 | * @subsys_id: the target subsystem ID | 729 | * @subsys_id: the target subsystem ID |
701 | * | 730 | * |
702 | * See task_subsys_state_check(). | 731 | * See task_css_check(). |
703 | */ | 732 | */ |
704 | static inline struct cgroup_subsys_state * | 733 | static inline struct cgroup_subsys_state *task_css(struct task_struct *task, |
705 | task_subsys_state(struct task_struct *task, int subsys_id) | 734 | int subsys_id) |
706 | { | 735 | { |
707 | return task_subsys_state_check(task, subsys_id, false); | 736 | return task_css_check(task, subsys_id, false); |
708 | } | 737 | } |
709 | 738 | ||
710 | static inline struct cgroup* task_cgroup(struct task_struct *task, | 739 | static inline struct cgroup *task_cgroup(struct task_struct *task, |
711 | int subsys_id) | 740 | int subsys_id) |
712 | { | 741 | { |
713 | return task_subsys_state(task, subsys_id)->cgroup; | 742 | return task_css(task, subsys_id)->cgroup; |
714 | } | 743 | } |
715 | 744 | ||
716 | struct cgroup *cgroup_next_sibling(struct cgroup *pos); | 745 | struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, |
746 | struct cgroup_subsys_state *parent); | ||
747 | |||
748 | struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); | ||
717 | 749 | ||
718 | /** | 750 | /** |
719 | * cgroup_for_each_child - iterate through children of a cgroup | 751 | * css_for_each_child - iterate through children of a css |
720 | * @pos: the cgroup * to use as the loop cursor | 752 | * @pos: the css * to use as the loop cursor |
721 | * @cgrp: cgroup whose children to walk | 753 | * @parent: css whose children to walk |
722 | * | 754 | * |
723 | * Walk @cgrp's children. Must be called under rcu_read_lock(). A child | 755 | * Walk @parent's children. Must be called under rcu_read_lock(). A child |
724 | * cgroup which hasn't finished ->css_online() or already has finished | 756 | * css which hasn't finished ->css_online() or already has finished |
725 | * ->css_offline() may show up during traversal and it's each subsystem's | 757 | * ->css_offline() may show up during traversal and it's each subsystem's |
726 | * responsibility to verify that each @pos is alive. | 758 | * responsibility to verify that each @pos is alive. |
727 | * | 759 | * |
728 | * If a subsystem synchronizes against the parent in its ->css_online() and | 760 | * If a subsystem synchronizes against the parent in its ->css_online() and |
729 | * before starting iterating, a cgroup which finished ->css_online() is | 761 | * before starting iterating, a css which finished ->css_online() is |
730 | * guaranteed to be visible in the future iterations. | 762 | * guaranteed to be visible in the future iterations. |
731 | * | 763 | * |
732 | * It is allowed to temporarily drop RCU read lock during iteration. The | 764 | * It is allowed to temporarily drop RCU read lock during iteration. The |
733 | * caller is responsible for ensuring that @pos remains accessible until | 765 | * caller is responsible for ensuring that @pos remains accessible until |
734 | * the start of the next iteration by, for example, bumping the css refcnt. | 766 | * the start of the next iteration by, for example, bumping the css refcnt. |
735 | */ | 767 | */ |
736 | #define cgroup_for_each_child(pos, cgrp) \ | 768 | #define css_for_each_child(pos, parent) \ |
737 | for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ | 769 | for ((pos) = css_next_child(NULL, (parent)); (pos); \ |
738 | struct cgroup, sibling); \ | 770 | (pos) = css_next_child((pos), (parent))) |
739 | (pos); (pos) = cgroup_next_sibling((pos))) | 771 | |
772 | struct cgroup_subsys_state * | ||
773 | css_next_descendant_pre(struct cgroup_subsys_state *pos, | ||
774 | struct cgroup_subsys_state *css); | ||
740 | 775 | ||
741 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | 776 | struct cgroup_subsys_state * |
742 | struct cgroup *cgroup); | 777 | css_rightmost_descendant(struct cgroup_subsys_state *pos); |
743 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); | ||
744 | 778 | ||
745 | /** | 779 | /** |
746 | * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants | 780 | * css_for_each_descendant_pre - pre-order walk of a css's descendants |
747 | * @pos: the cgroup * to use as the loop cursor | 781 | * @pos: the css * to use as the loop cursor |
748 | * @cgroup: cgroup whose descendants to walk | 782 | * @root: css whose descendants to walk |
749 | * | 783 | * |
750 | * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A | 784 | * Walk @root's descendants. @root is included in the iteration and the |
751 | * descendant cgroup which hasn't finished ->css_online() or already has | 785 | * first node to be visited. Must be called under rcu_read_lock(). A |
786 | * descendant css which hasn't finished ->css_online() or already has | ||
752 | * finished ->css_offline() may show up during traversal and it's each | 787 | * finished ->css_offline() may show up during traversal and it's each |
753 | * subsystem's responsibility to verify that each @pos is alive. | 788 | * subsystem's responsibility to verify that each @pos is alive. |
754 | * | 789 | * |
755 | * If a subsystem synchronizes against the parent in its ->css_online() and | 790 | * If a subsystem synchronizes against the parent in its ->css_online() and |
756 | * before starting iterating, and synchronizes against @pos on each | 791 | * before starting iterating, and synchronizes against @pos on each |
757 | * iteration, any descendant cgroup which finished ->css_online() is | 792 | * iteration, any descendant css which finished ->css_online() is |
758 | * guaranteed to be visible in the future iterations. | 793 | * guaranteed to be visible in the future iterations. |
759 | * | 794 | * |
760 | * In other words, the following guarantees that a descendant can't escape | 795 | * In other words, the following guarantees that a descendant can't escape |
761 | * state updates of its ancestors. | 796 | * state updates of its ancestors. |
762 | * | 797 | * |
763 | * my_online(@cgrp) | 798 | * my_online(@css) |
764 | * { | 799 | * { |
765 | * Lock @cgrp->parent and @cgrp; | 800 | * Lock @css's parent and @css; |
766 | * Inherit state from @cgrp->parent; | 801 | * Inherit state from the parent; |
767 | * Unlock both. | 802 | * Unlock both. |
768 | * } | 803 | * } |
769 | * | 804 | * |
770 | * my_update_state(@cgrp) | 805 | * my_update_state(@css) |
771 | * { | 806 | * { |
772 | * Lock @cgrp; | 807 | * css_for_each_descendant_pre(@pos, @css) { |
773 | * Update @cgrp's state; | ||
774 | * Unlock @cgrp; | ||
775 | * | ||
776 | * cgroup_for_each_descendant_pre(@pos, @cgrp) { | ||
777 | * Lock @pos; | 808 | * Lock @pos; |
778 | * Verify @pos is alive and inherit state from @pos->parent; | 809 | * if (@pos == @css) |
810 | * Update @css's state; | ||
811 | * else | ||
812 | * Verify @pos is alive and inherit state from its parent; | ||
779 | * Unlock @pos; | 813 | * Unlock @pos; |
780 | * } | 814 | * } |
781 | * } | 815 | * } |
@@ -786,8 +820,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); | |||
786 | * visible by walking order and, as long as inheriting operations to the | 820 | * visible by walking order and, as long as inheriting operations to the |
787 | * same @pos are atomic to each other, multiple updates racing each other | 821 | * same @pos are atomic to each other, multiple updates racing each other |
788 | * still result in the correct state. It's guaranateed that at least one | 822 | * still result in the correct state. It's guaranateed that at least one |
789 | * inheritance happens for any cgroup after the latest update to its | 823 | * inheritance happens for any css after the latest update to its parent. |
790 | * parent. | ||
791 | * | 824 | * |
792 | * If checking parent's state requires locking the parent, each inheriting | 825 | * If checking parent's state requires locking the parent, each inheriting |
793 | * iteration should lock and unlock both @pos->parent and @pos. | 826 | * iteration should lock and unlock both @pos->parent and @pos. |
@@ -800,52 +833,45 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); | |||
800 | * caller is responsible for ensuring that @pos remains accessible until | 833 | * caller is responsible for ensuring that @pos remains accessible until |
801 | * the start of the next iteration by, for example, bumping the css refcnt. | 834 | * the start of the next iteration by, for example, bumping the css refcnt. |
802 | */ | 835 | */ |
803 | #define cgroup_for_each_descendant_pre(pos, cgroup) \ | 836 | #define css_for_each_descendant_pre(pos, css) \ |
804 | for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ | 837 | for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ |
805 | pos = cgroup_next_descendant_pre((pos), (cgroup))) | 838 | (pos) = css_next_descendant_pre((pos), (css))) |
806 | 839 | ||
807 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | 840 | struct cgroup_subsys_state * |
808 | struct cgroup *cgroup); | 841 | css_next_descendant_post(struct cgroup_subsys_state *pos, |
842 | struct cgroup_subsys_state *css); | ||
809 | 843 | ||
810 | /** | 844 | /** |
811 | * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants | 845 | * css_for_each_descendant_post - post-order walk of a css's descendants |
812 | * @pos: the cgroup * to use as the loop cursor | 846 | * @pos: the css * to use as the loop cursor |
813 | * @cgroup: cgroup whose descendants to walk | 847 | * @css: css whose descendants to walk |
814 | * | 848 | * |
815 | * Similar to cgroup_for_each_descendant_pre() but performs post-order | 849 | * Similar to css_for_each_descendant_pre() but performs post-order |
816 | * traversal instead. Note that the walk visibility guarantee described in | 850 | * traversal instead. @root is included in the iteration and the last |
817 | * pre-order walk doesn't apply the same to post-order walks. | 851 | * node to be visited. Note that the walk visibility guarantee described |
852 | * in pre-order walk doesn't apply the same to post-order walks. | ||
818 | */ | 853 | */ |
819 | #define cgroup_for_each_descendant_post(pos, cgroup) \ | 854 | #define css_for_each_descendant_post(pos, css) \ |
820 | for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ | 855 | for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ |
821 | pos = cgroup_next_descendant_post((pos), (cgroup))) | 856 | (pos) = css_next_descendant_post((pos), (css))) |
822 | 857 | ||
823 | /* A cgroup_iter should be treated as an opaque object */ | 858 | /* A css_task_iter should be treated as an opaque object */ |
824 | struct cgroup_iter { | 859 | struct css_task_iter { |
825 | struct list_head *cset_link; | 860 | struct cgroup_subsys_state *origin_css; |
826 | struct list_head *task; | 861 | struct list_head *cset_link; |
862 | struct list_head *task; | ||
827 | }; | 863 | }; |
828 | 864 | ||
829 | /* | 865 | void css_task_iter_start(struct cgroup_subsys_state *css, |
830 | * To iterate across the tasks in a cgroup: | 866 | struct css_task_iter *it); |
831 | * | 867 | struct task_struct *css_task_iter_next(struct css_task_iter *it); |
832 | * 1) call cgroup_iter_start to initialize an iterator | 868 | void css_task_iter_end(struct css_task_iter *it); |
833 | * | 869 | |
834 | * 2) call cgroup_iter_next() to retrieve member tasks until it | 870 | int css_scan_tasks(struct cgroup_subsys_state *css, |
835 | * returns NULL or until you want to end the iteration | 871 | bool (*test)(struct task_struct *, void *), |
836 | * | 872 | void (*process)(struct task_struct *, void *), |
837 | * 3) call cgroup_iter_end() to destroy the iterator. | 873 | void *data, struct ptr_heap *heap); |
838 | * | 874 | |
839 | * Or, call cgroup_scan_tasks() to iterate through every task in a | ||
840 | * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling | ||
841 | * the test_task() callback, but not while calling the process_task() | ||
842 | * callback. | ||
843 | */ | ||
844 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); | ||
845 | struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | ||
846 | struct cgroup_iter *it); | ||
847 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); | ||
848 | int cgroup_scan_tasks(struct cgroup_scanner *scan); | ||
849 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); | 875 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); |
850 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); | 876 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); |
851 | 877 | ||
@@ -878,7 +904,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg, | |||
878 | 904 | ||
879 | /* Get id and depth of css */ | 905 | /* Get id and depth of css */ |
880 | unsigned short css_id(struct cgroup_subsys_state *css); | 906 | unsigned short css_id(struct cgroup_subsys_state *css); |
881 | struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); | 907 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, |
908 | struct cgroup_subsys *ss); | ||
882 | 909 | ||
883 | #else /* !CONFIG_CGROUPS */ | 910 | #else /* !CONFIG_CGROUPS */ |
884 | 911 | ||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7b4d9d79570b..6c416092e324 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -85,7 +85,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | |||
85 | extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); | 85 | extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); |
86 | 86 | ||
87 | extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); | 87 | extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); |
88 | extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); | 88 | extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); |
89 | 89 | ||
90 | static inline | 90 | static inline |
91 | bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) | 91 | bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) |
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 7dc17e2456de..3f3788d49362 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h | |||
@@ -34,10 +34,12 @@ extern void vmpressure_cleanup(struct vmpressure *vmpr); | |||
34 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); | 34 | extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); |
35 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); | 35 | extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); |
36 | extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); | 36 | extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); |
37 | extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, | 37 | extern int vmpressure_register_event(struct cgroup_subsys_state *css, |
38 | struct cftype *cft, | ||
38 | struct eventfd_ctx *eventfd, | 39 | struct eventfd_ctx *eventfd, |
39 | const char *args); | 40 | const char *args); |
40 | extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, | 41 | extern void vmpressure_unregister_event(struct cgroup_subsys_state *css, |
42 | struct cftype *cft, | ||
41 | struct eventfd_ctx *eventfd); | 43 | struct eventfd_ctx *eventfd); |
42 | #else | 44 | #else |
43 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 45 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, |
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 0fee0617fb7d..52adaa75dac9 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h | |||
@@ -35,7 +35,7 @@ static inline u32 task_cls_classid(struct task_struct *p) | |||
35 | return 0; | 35 | return 0; |
36 | 36 | ||
37 | rcu_read_lock(); | 37 | rcu_read_lock(); |
38 | classid = container_of(task_subsys_state(p, net_cls_subsys_id), | 38 | classid = container_of(task_css(p, net_cls_subsys_id), |
39 | struct cgroup_cls_state, css)->classid; | 39 | struct cgroup_cls_state, css)->classid; |
40 | rcu_read_unlock(); | 40 | rcu_read_unlock(); |
41 | 41 | ||
@@ -51,7 +51,7 @@ static inline u32 task_cls_classid(struct task_struct *p) | |||
51 | return 0; | 51 | return 0; |
52 | 52 | ||
53 | rcu_read_lock(); | 53 | rcu_read_lock(); |
54 | css = task_subsys_state(p, net_cls_subsys_id); | 54 | css = task_css(p, net_cls_subsys_id); |
55 | if (css) | 55 | if (css) |
56 | classid = container_of(css, | 56 | classid = container_of(css, |
57 | struct cgroup_cls_state, css)->classid; | 57 | struct cgroup_cls_state, css)->classid; |
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 50ab8c26ab59..a24f8bb3ca47 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h | |||
@@ -25,10 +25,6 @@ struct netprio_map { | |||
25 | u32 priomap[]; | 25 | u32 priomap[]; |
26 | }; | 26 | }; |
27 | 27 | ||
28 | struct cgroup_netprio_state { | ||
29 | struct cgroup_subsys_state css; | ||
30 | }; | ||
31 | |||
32 | extern void sock_update_netprioidx(struct sock *sk); | 28 | extern void sock_update_netprioidx(struct sock *sk); |
33 | 29 | ||
34 | #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) | 30 | #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) |
@@ -39,7 +35,7 @@ static inline u32 task_netprioidx(struct task_struct *p) | |||
39 | u32 idx; | 35 | u32 idx; |
40 | 36 | ||
41 | rcu_read_lock(); | 37 | rcu_read_lock(); |
42 | css = task_subsys_state(p, net_prio_subsys_id); | 38 | css = task_css(p, net_prio_subsys_id); |
43 | idx = css->cgroup->id; | 39 | idx = css->cgroup->id; |
44 | rcu_read_unlock(); | 40 | rcu_read_unlock(); |
45 | return idx; | 41 | return idx; |
@@ -53,7 +49,7 @@ static inline u32 task_netprioidx(struct task_struct *p) | |||
53 | u32 idx = 0; | 49 | u32 idx = 0; |
54 | 50 | ||
55 | rcu_read_lock(); | 51 | rcu_read_lock(); |
56 | css = task_subsys_state(p, net_prio_subsys_id); | 52 | css = task_css(p, net_prio_subsys_id); |
57 | if (css) | 53 | if (css) |
58 | idx = css->cgroup->id; | 54 | idx = css->cgroup->id; |
59 | rcu_read_unlock(); | 55 | rcu_read_unlock(); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e91963302c0d..e0aeb32415ff 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -81,7 +81,7 @@ | |||
81 | */ | 81 | */ |
82 | #ifdef CONFIG_PROVE_RCU | 82 | #ifdef CONFIG_PROVE_RCU |
83 | DEFINE_MUTEX(cgroup_mutex); | 83 | DEFINE_MUTEX(cgroup_mutex); |
84 | EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */ | 84 | EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for lockdep */ |
85 | #else | 85 | #else |
86 | static DEFINE_MUTEX(cgroup_mutex); | 86 | static DEFINE_MUTEX(cgroup_mutex); |
87 | #endif | 87 | #endif |
@@ -117,6 +117,7 @@ struct cfent { | |||
117 | struct list_head node; | 117 | struct list_head node; |
118 | struct dentry *dentry; | 118 | struct dentry *dentry; |
119 | struct cftype *type; | 119 | struct cftype *type; |
120 | struct cgroup_subsys_state *css; | ||
120 | 121 | ||
121 | /* file xattrs */ | 122 | /* file xattrs */ |
122 | struct simple_xattrs xattrs; | 123 | struct simple_xattrs xattrs; |
@@ -159,9 +160,9 @@ struct css_id { | |||
159 | */ | 160 | */ |
160 | struct cgroup_event { | 161 | struct cgroup_event { |
161 | /* | 162 | /* |
162 | * Cgroup which the event belongs to. | 163 | * css which the event belongs to. |
163 | */ | 164 | */ |
164 | struct cgroup *cgrp; | 165 | struct cgroup_subsys_state *css; |
165 | /* | 166 | /* |
166 | * Control file which the event associated. | 167 | * Control file which the event associated. |
167 | */ | 168 | */ |
@@ -215,10 +216,33 @@ static u64 cgroup_serial_nr_next = 1; | |||
215 | */ | 216 | */ |
216 | static int need_forkexit_callback __read_mostly; | 217 | static int need_forkexit_callback __read_mostly; |
217 | 218 | ||
218 | static void cgroup_offline_fn(struct work_struct *work); | 219 | static struct cftype cgroup_base_files[]; |
220 | |||
221 | static void cgroup_destroy_css_killed(struct cgroup *cgrp); | ||
219 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 222 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
220 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 223 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
221 | struct cftype cfts[], bool is_add); | 224 | bool is_add); |
225 | |||
226 | /** | ||
227 | * cgroup_css - obtain a cgroup's css for the specified subsystem | ||
228 | * @cgrp: the cgroup of interest | ||
229 | * @ss: the subsystem of interest (%NULL returns the dummy_css) | ||
230 | * | ||
231 | * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This | ||
232 | * function must be called either under cgroup_mutex or rcu_read_lock() and | ||
233 | * the caller is responsible for pinning the returned css if it wants to | ||
234 | * keep accessing it outside the said locks. This function may return | ||
235 | * %NULL if @cgrp doesn't have @subsys_id enabled. | ||
236 | */ | ||
237 | static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, | ||
238 | struct cgroup_subsys *ss) | ||
239 | { | ||
240 | if (ss) | ||
241 | return rcu_dereference_check(cgrp->subsys[ss->subsys_id], | ||
242 | lockdep_is_held(&cgroup_mutex)); | ||
243 | else | ||
244 | return &cgrp->dummy_css; | ||
245 | } | ||
222 | 246 | ||
223 | /* convenient tests for these bits */ | 247 | /* convenient tests for these bits */ |
224 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) | 248 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) |
@@ -365,9 +389,11 @@ static struct cgrp_cset_link init_cgrp_cset_link; | |||
365 | static int cgroup_init_idr(struct cgroup_subsys *ss, | 389 | static int cgroup_init_idr(struct cgroup_subsys *ss, |
366 | struct cgroup_subsys_state *css); | 390 | struct cgroup_subsys_state *css); |
367 | 391 | ||
368 | /* css_set_lock protects the list of css_set objects, and the | 392 | /* |
369 | * chain of tasks off each css_set. Nests outside task->alloc_lock | 393 | * css_set_lock protects the list of css_set objects, and the chain of |
370 | * due to cgroup_iter_start() */ | 394 | * tasks off each css_set. Nests outside task->alloc_lock due to |
395 | * css_task_iter_start(). | ||
396 | */ | ||
371 | static DEFINE_RWLOCK(css_set_lock); | 397 | static DEFINE_RWLOCK(css_set_lock); |
372 | static int css_set_count; | 398 | static int css_set_count; |
373 | 399 | ||
@@ -392,10 +418,12 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) | |||
392 | return key; | 418 | return key; |
393 | } | 419 | } |
394 | 420 | ||
395 | /* We don't maintain the lists running through each css_set to its | 421 | /* |
396 | * task until after the first call to cgroup_iter_start(). This | 422 | * We don't maintain the lists running through each css_set to its task |
397 | * reduces the fork()/exit() overhead for people who have cgroups | 423 | * until after the first call to css_task_iter_start(). This reduces the |
398 | * compiled into their kernel but not actually in use */ | 424 | * fork()/exit() overhead for people who have cgroups compiled into their |
425 | * kernel but not actually in use. | ||
426 | */ | ||
399 | static int use_task_css_set_links __read_mostly; | 427 | static int use_task_css_set_links __read_mostly; |
400 | 428 | ||
401 | static void __put_css_set(struct css_set *cset, int taskexit) | 429 | static void __put_css_set(struct css_set *cset, int taskexit) |
@@ -464,7 +492,7 @@ static inline void put_css_set_taskexit(struct css_set *cset) | |||
464 | * @new_cgrp: cgroup that's being entered by the task | 492 | * @new_cgrp: cgroup that's being entered by the task |
465 | * @template: desired set of css pointers in css_set (pre-calculated) | 493 | * @template: desired set of css pointers in css_set (pre-calculated) |
466 | * | 494 | * |
467 | * Returns true if "cg" matches "old_cg" except for the hierarchy | 495 | * Returns true if "cset" matches "old_cset" except for the hierarchy |
468 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". | 496 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". |
469 | */ | 497 | */ |
470 | static bool compare_css_sets(struct css_set *cset, | 498 | static bool compare_css_sets(struct css_set *cset, |
@@ -555,7 +583,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, | |||
555 | /* Subsystem is in this hierarchy. So we want | 583 | /* Subsystem is in this hierarchy. So we want |
556 | * the subsystem state from the new | 584 | * the subsystem state from the new |
557 | * cgroup */ | 585 | * cgroup */ |
558 | template[i] = cgrp->subsys[i]; | 586 | template[i] = cgroup_css(cgrp, ss); |
559 | } else { | 587 | } else { |
560 | /* Subsystem is not in this hierarchy, so we | 588 | /* Subsystem is not in this hierarchy, so we |
561 | * don't want to change the subsystem state */ | 589 | * don't want to change the subsystem state */ |
@@ -803,8 +831,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
803 | 831 | ||
804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 832 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
805 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 833 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
806 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 834 | static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); |
807 | unsigned long subsys_mask); | ||
808 | static const struct inode_operations cgroup_dir_inode_operations; | 835 | static const struct inode_operations cgroup_dir_inode_operations; |
809 | static const struct file_operations proc_cgroupstats_operations; | 836 | static const struct file_operations proc_cgroupstats_operations; |
810 | 837 | ||
@@ -813,8 +840,7 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
813 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 840 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
814 | }; | 841 | }; |
815 | 842 | ||
816 | static int alloc_css_id(struct cgroup_subsys *ss, | 843 | static int alloc_css_id(struct cgroup_subsys_state *child_css); |
817 | struct cgroup *parent, struct cgroup *child); | ||
818 | 844 | ||
819 | static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | 845 | static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) |
820 | { | 846 | { |
@@ -845,15 +871,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) | |||
845 | static void cgroup_free_fn(struct work_struct *work) | 871 | static void cgroup_free_fn(struct work_struct *work) |
846 | { | 872 | { |
847 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | 873 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); |
848 | struct cgroup_subsys *ss; | ||
849 | 874 | ||
850 | mutex_lock(&cgroup_mutex); | 875 | mutex_lock(&cgroup_mutex); |
851 | /* | ||
852 | * Release the subsystem state objects. | ||
853 | */ | ||
854 | for_each_root_subsys(cgrp->root, ss) | ||
855 | ss->css_free(cgrp); | ||
856 | |||
857 | cgrp->root->number_of_cgroups--; | 876 | cgrp->root->number_of_cgroups--; |
858 | mutex_unlock(&cgroup_mutex); | 877 | mutex_unlock(&cgroup_mutex); |
859 | 878 | ||
@@ -864,8 +883,6 @@ static void cgroup_free_fn(struct work_struct *work) | |||
864 | */ | 883 | */ |
865 | dput(cgrp->parent->dentry); | 884 | dput(cgrp->parent->dentry); |
866 | 885 | ||
867 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | ||
868 | |||
869 | /* | 886 | /* |
870 | * Drop the active superblock reference that we took when we | 887 | * Drop the active superblock reference that we took when we |
871 | * created the cgroup. This will free cgrp->root, if we are | 888 | * created the cgroup. This will free cgrp->root, if we are |
@@ -956,27 +973,22 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
956 | } | 973 | } |
957 | 974 | ||
958 | /** | 975 | /** |
959 | * cgroup_clear_directory - selective removal of base and subsystem files | 976 | * cgroup_clear_dir - remove subsys files in a cgroup directory |
960 | * @dir: directory containing the files | 977 | * @cgrp: target cgroup |
961 | * @base_files: true if the base files should be removed | ||
962 | * @subsys_mask: mask of the subsystem ids whose files should be removed | 978 | * @subsys_mask: mask of the subsystem ids whose files should be removed |
963 | */ | 979 | */ |
964 | static void cgroup_clear_directory(struct dentry *dir, bool base_files, | 980 | static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) |
965 | unsigned long subsys_mask) | ||
966 | { | 981 | { |
967 | struct cgroup *cgrp = __d_cgrp(dir); | ||
968 | struct cgroup_subsys *ss; | 982 | struct cgroup_subsys *ss; |
983 | int i; | ||
969 | 984 | ||
970 | for_each_root_subsys(cgrp->root, ss) { | 985 | for_each_subsys(ss, i) { |
971 | struct cftype_set *set; | 986 | struct cftype_set *set; |
972 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 987 | |
988 | if (!test_bit(i, &subsys_mask)) | ||
973 | continue; | 989 | continue; |
974 | list_for_each_entry(set, &ss->cftsets, node) | 990 | list_for_each_entry(set, &ss->cftsets, node) |
975 | cgroup_addrm_files(cgrp, NULL, set->cfts, false); | 991 | cgroup_addrm_files(cgrp, set->cfts, false); |
976 | } | ||
977 | if (base_files) { | ||
978 | while (!list_empty(&cgrp->files)) | ||
979 | cgroup_rm_file(cgrp, NULL); | ||
980 | } | 992 | } |
981 | } | 993 | } |
982 | 994 | ||
@@ -986,9 +998,6 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files, | |||
986 | static void cgroup_d_remove_dir(struct dentry *dentry) | 998 | static void cgroup_d_remove_dir(struct dentry *dentry) |
987 | { | 999 | { |
988 | struct dentry *parent; | 1000 | struct dentry *parent; |
989 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | ||
990 | |||
991 | cgroup_clear_directory(dentry, true, root->subsys_mask); | ||
992 | 1001 | ||
993 | parent = dentry->d_parent; | 1002 | parent = dentry->d_parent; |
994 | spin_lock(&parent->d_lock); | 1003 | spin_lock(&parent->d_lock); |
@@ -1009,79 +1018,84 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1009 | { | 1018 | { |
1010 | struct cgroup *cgrp = &root->top_cgroup; | 1019 | struct cgroup *cgrp = &root->top_cgroup; |
1011 | struct cgroup_subsys *ss; | 1020 | struct cgroup_subsys *ss; |
1012 | int i; | 1021 | unsigned long pinned = 0; |
1022 | int i, ret; | ||
1013 | 1023 | ||
1014 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 1024 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
1015 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | 1025 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); |
1016 | 1026 | ||
1017 | /* Check that any added subsystems are currently free */ | 1027 | /* Check that any added subsystems are currently free */ |
1018 | for_each_subsys(ss, i) { | 1028 | for_each_subsys(ss, i) { |
1019 | unsigned long bit = 1UL << i; | 1029 | if (!(added_mask & (1 << i))) |
1020 | |||
1021 | if (!(bit & added_mask)) | ||
1022 | continue; | 1030 | continue; |
1023 | 1031 | ||
1032 | /* is the subsystem mounted elsewhere? */ | ||
1024 | if (ss->root != &cgroup_dummy_root) { | 1033 | if (ss->root != &cgroup_dummy_root) { |
1025 | /* Subsystem isn't free */ | 1034 | ret = -EBUSY; |
1026 | return -EBUSY; | 1035 | goto out_put; |
1036 | } | ||
1037 | |||
1038 | /* pin the module */ | ||
1039 | if (!try_module_get(ss->module)) { | ||
1040 | ret = -ENOENT; | ||
1041 | goto out_put; | ||
1027 | } | 1042 | } |
1043 | pinned |= 1 << i; | ||
1028 | } | 1044 | } |
1029 | 1045 | ||
1030 | /* Currently we don't handle adding/removing subsystems when | 1046 | /* subsys could be missing if unloaded between parsing and here */ |
1031 | * any child cgroups exist. This is theoretically supportable | 1047 | if (added_mask != pinned) { |
1032 | * but involves complex error handling, so it's being left until | 1048 | ret = -ENOENT; |
1033 | * later */ | 1049 | goto out_put; |
1034 | if (root->number_of_cgroups > 1) | 1050 | } |
1035 | return -EBUSY; | 1051 | |
1052 | ret = cgroup_populate_dir(cgrp, added_mask); | ||
1053 | if (ret) | ||
1054 | goto out_put; | ||
1055 | |||
1056 | /* | ||
1057 | * Nothing can fail from this point on. Remove files for the | ||
1058 | * removed subsystems and rebind each subsystem. | ||
1059 | */ | ||
1060 | cgroup_clear_dir(cgrp, removed_mask); | ||
1036 | 1061 | ||
1037 | /* Process each subsystem */ | ||
1038 | for_each_subsys(ss, i) { | 1062 | for_each_subsys(ss, i) { |
1039 | unsigned long bit = 1UL << i; | 1063 | unsigned long bit = 1UL << i; |
1040 | 1064 | ||
1041 | if (bit & added_mask) { | 1065 | if (bit & added_mask) { |
1042 | /* We're binding this subsystem to this hierarchy */ | 1066 | /* We're binding this subsystem to this hierarchy */ |
1043 | BUG_ON(cgrp->subsys[i]); | 1067 | BUG_ON(cgroup_css(cgrp, ss)); |
1044 | BUG_ON(!cgroup_dummy_top->subsys[i]); | 1068 | BUG_ON(!cgroup_css(cgroup_dummy_top, ss)); |
1045 | BUG_ON(cgroup_dummy_top->subsys[i]->cgroup != cgroup_dummy_top); | 1069 | BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top); |
1070 | |||
1071 | rcu_assign_pointer(cgrp->subsys[i], | ||
1072 | cgroup_css(cgroup_dummy_top, ss)); | ||
1073 | cgroup_css(cgrp, ss)->cgroup = cgrp; | ||
1046 | 1074 | ||
1047 | cgrp->subsys[i] = cgroup_dummy_top->subsys[i]; | ||
1048 | cgrp->subsys[i]->cgroup = cgrp; | ||
1049 | list_move(&ss->sibling, &root->subsys_list); | 1075 | list_move(&ss->sibling, &root->subsys_list); |
1050 | ss->root = root; | 1076 | ss->root = root; |
1051 | if (ss->bind) | 1077 | if (ss->bind) |
1052 | ss->bind(cgrp); | 1078 | ss->bind(cgroup_css(cgrp, ss)); |
1053 | 1079 | ||
1054 | /* refcount was already taken, and we're keeping it */ | 1080 | /* refcount was already taken, and we're keeping it */ |
1055 | root->subsys_mask |= bit; | 1081 | root->subsys_mask |= bit; |
1056 | } else if (bit & removed_mask) { | 1082 | } else if (bit & removed_mask) { |
1057 | /* We're removing this subsystem */ | 1083 | /* We're removing this subsystem */ |
1058 | BUG_ON(cgrp->subsys[i] != cgroup_dummy_top->subsys[i]); | 1084 | BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss)); |
1059 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 1085 | BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp); |
1060 | 1086 | ||
1061 | if (ss->bind) | 1087 | if (ss->bind) |
1062 | ss->bind(cgroup_dummy_top); | 1088 | ss->bind(cgroup_css(cgroup_dummy_top, ss)); |
1063 | cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top; | 1089 | |
1064 | cgrp->subsys[i] = NULL; | 1090 | cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top; |
1091 | RCU_INIT_POINTER(cgrp->subsys[i], NULL); | ||
1092 | |||
1065 | cgroup_subsys[i]->root = &cgroup_dummy_root; | 1093 | cgroup_subsys[i]->root = &cgroup_dummy_root; |
1066 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); | 1094 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); |
1067 | 1095 | ||
1068 | /* subsystem is now free - drop reference on module */ | 1096 | /* subsystem is now free - drop reference on module */ |
1069 | module_put(ss->module); | 1097 | module_put(ss->module); |
1070 | root->subsys_mask &= ~bit; | 1098 | root->subsys_mask &= ~bit; |
1071 | } else if (bit & root->subsys_mask) { | ||
1072 | /* Subsystem state should already exist */ | ||
1073 | BUG_ON(!cgrp->subsys[i]); | ||
1074 | /* | ||
1075 | * a refcount was taken, but we already had one, so | ||
1076 | * drop the extra reference. | ||
1077 | */ | ||
1078 | module_put(ss->module); | ||
1079 | #ifdef CONFIG_MODULE_UNLOAD | ||
1080 | BUG_ON(ss->module && !module_refcount(ss->module)); | ||
1081 | #endif | ||
1082 | } else { | ||
1083 | /* Subsystem state shouldn't exist */ | ||
1084 | BUG_ON(cgrp->subsys[i]); | ||
1085 | } | 1099 | } |
1086 | } | 1100 | } |
1087 | 1101 | ||
@@ -1092,6 +1106,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1092 | root->flags |= CGRP_ROOT_SUBSYS_BOUND; | 1106 | root->flags |= CGRP_ROOT_SUBSYS_BOUND; |
1093 | 1107 | ||
1094 | return 0; | 1108 | return 0; |
1109 | |||
1110 | out_put: | ||
1111 | for_each_subsys(ss, i) | ||
1112 | if (pinned & (1 << i)) | ||
1113 | module_put(ss->module); | ||
1114 | return ret; | ||
1095 | } | 1115 | } |
1096 | 1116 | ||
1097 | static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | 1117 | static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) |
@@ -1142,7 +1162,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1142 | char *token, *o = data; | 1162 | char *token, *o = data; |
1143 | bool all_ss = false, one_ss = false; | 1163 | bool all_ss = false, one_ss = false; |
1144 | unsigned long mask = (unsigned long)-1; | 1164 | unsigned long mask = (unsigned long)-1; |
1145 | bool module_pin_failed = false; | ||
1146 | struct cgroup_subsys *ss; | 1165 | struct cgroup_subsys *ss; |
1147 | int i; | 1166 | int i; |
1148 | 1167 | ||
@@ -1285,52 +1304,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1285 | if (!opts->subsys_mask && !opts->name) | 1304 | if (!opts->subsys_mask && !opts->name) |
1286 | return -EINVAL; | 1305 | return -EINVAL; |
1287 | 1306 | ||
1288 | /* | ||
1289 | * Grab references on all the modules we'll need, so the subsystems | ||
1290 | * don't dance around before rebind_subsystems attaches them. This may | ||
1291 | * take duplicate reference counts on a subsystem that's already used, | ||
1292 | * but rebind_subsystems handles this case. | ||
1293 | */ | ||
1294 | for_each_subsys(ss, i) { | ||
1295 | if (!(opts->subsys_mask & (1UL << i))) | ||
1296 | continue; | ||
1297 | if (!try_module_get(cgroup_subsys[i]->module)) { | ||
1298 | module_pin_failed = true; | ||
1299 | break; | ||
1300 | } | ||
1301 | } | ||
1302 | if (module_pin_failed) { | ||
1303 | /* | ||
1304 | * oops, one of the modules was going away. this means that we | ||
1305 | * raced with a module_delete call, and to the user this is | ||
1306 | * essentially a "subsystem doesn't exist" case. | ||
1307 | */ | ||
1308 | for (i--; i >= 0; i--) { | ||
1309 | /* drop refcounts only on the ones we took */ | ||
1310 | unsigned long bit = 1UL << i; | ||
1311 | |||
1312 | if (!(bit & opts->subsys_mask)) | ||
1313 | continue; | ||
1314 | module_put(cgroup_subsys[i]->module); | ||
1315 | } | ||
1316 | return -ENOENT; | ||
1317 | } | ||
1318 | |||
1319 | return 0; | 1307 | return 0; |
1320 | } | 1308 | } |
1321 | 1309 | ||
1322 | static void drop_parsed_module_refcounts(unsigned long subsys_mask) | ||
1323 | { | ||
1324 | struct cgroup_subsys *ss; | ||
1325 | int i; | ||
1326 | |||
1327 | mutex_lock(&cgroup_mutex); | ||
1328 | for_each_subsys(ss, i) | ||
1329 | if (subsys_mask & (1UL << i)) | ||
1330 | module_put(cgroup_subsys[i]->module); | ||
1331 | mutex_unlock(&cgroup_mutex); | ||
1332 | } | ||
1333 | |||
1334 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) | 1310 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) |
1335 | { | 1311 | { |
1336 | int ret = 0; | 1312 | int ret = 0; |
@@ -1370,22 +1346,15 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1370 | goto out_unlock; | 1346 | goto out_unlock; |
1371 | } | 1347 | } |
1372 | 1348 | ||
1373 | /* | 1349 | /* remounting is not allowed for populated hierarchies */ |
1374 | * Clear out the files of subsystems that should be removed, do | 1350 | if (root->number_of_cgroups > 1) { |
1375 | * this before rebind_subsystems, since rebind_subsystems may | 1351 | ret = -EBUSY; |
1376 | * change this hierarchy's subsys_list. | ||
1377 | */ | ||
1378 | cgroup_clear_directory(cgrp->dentry, false, removed_mask); | ||
1379 | |||
1380 | ret = rebind_subsystems(root, added_mask, removed_mask); | ||
1381 | if (ret) { | ||
1382 | /* rebind_subsystems failed, re-populate the removed files */ | ||
1383 | cgroup_populate_dir(cgrp, false, removed_mask); | ||
1384 | goto out_unlock; | 1352 | goto out_unlock; |
1385 | } | 1353 | } |
1386 | 1354 | ||
1387 | /* re-populate subsystem files */ | 1355 | ret = rebind_subsystems(root, added_mask, removed_mask); |
1388 | cgroup_populate_dir(cgrp, false, added_mask); | 1356 | if (ret) |
1357 | goto out_unlock; | ||
1389 | 1358 | ||
1390 | if (opts.release_agent) | 1359 | if (opts.release_agent) |
1391 | strcpy(root->release_agent_path, opts.release_agent); | 1360 | strcpy(root->release_agent_path, opts.release_agent); |
@@ -1395,8 +1364,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1395 | mutex_unlock(&cgroup_root_mutex); | 1364 | mutex_unlock(&cgroup_root_mutex); |
1396 | mutex_unlock(&cgroup_mutex); | 1365 | mutex_unlock(&cgroup_mutex); |
1397 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1366 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1398 | if (ret) | ||
1399 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1400 | return ret; | 1367 | return ret; |
1401 | } | 1368 | } |
1402 | 1369 | ||
@@ -1416,6 +1383,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1416 | INIT_LIST_HEAD(&cgrp->release_list); | 1383 | INIT_LIST_HEAD(&cgrp->release_list); |
1417 | INIT_LIST_HEAD(&cgrp->pidlists); | 1384 | INIT_LIST_HEAD(&cgrp->pidlists); |
1418 | mutex_init(&cgrp->pidlist_mutex); | 1385 | mutex_init(&cgrp->pidlist_mutex); |
1386 | cgrp->dummy_css.cgroup = cgrp; | ||
1419 | INIT_LIST_HEAD(&cgrp->event_list); | 1387 | INIT_LIST_HEAD(&cgrp->event_list); |
1420 | spin_lock_init(&cgrp->event_list_lock); | 1388 | spin_lock_init(&cgrp->event_list_lock); |
1421 | simple_xattrs_init(&cgrp->xattrs); | 1389 | simple_xattrs_init(&cgrp->xattrs); |
@@ -1431,6 +1399,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
1431 | cgrp->root = root; | 1399 | cgrp->root = root; |
1432 | RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); | 1400 | RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); |
1433 | init_cgroup_housekeeping(cgrp); | 1401 | init_cgroup_housekeeping(cgrp); |
1402 | idr_init(&root->cgroup_idr); | ||
1434 | } | 1403 | } |
1435 | 1404 | ||
1436 | static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end) | 1405 | static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end) |
@@ -1503,7 +1472,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
1503 | */ | 1472 | */ |
1504 | root->subsys_mask = opts->subsys_mask; | 1473 | root->subsys_mask = opts->subsys_mask; |
1505 | root->flags = opts->flags; | 1474 | root->flags = opts->flags; |
1506 | ida_init(&root->cgroup_ida); | ||
1507 | if (opts->release_agent) | 1475 | if (opts->release_agent) |
1508 | strcpy(root->release_agent_path, opts->release_agent); | 1476 | strcpy(root->release_agent_path, opts->release_agent); |
1509 | if (opts->name) | 1477 | if (opts->name) |
@@ -1519,7 +1487,7 @@ static void cgroup_free_root(struct cgroupfs_root *root) | |||
1519 | /* hierarhcy ID shoulid already have been released */ | 1487 | /* hierarhcy ID shoulid already have been released */ |
1520 | WARN_ON_ONCE(root->hierarchy_id); | 1488 | WARN_ON_ONCE(root->hierarchy_id); |
1521 | 1489 | ||
1522 | ida_destroy(&root->cgroup_ida); | 1490 | idr_destroy(&root->cgroup_idr); |
1523 | kfree(root); | 1491 | kfree(root); |
1524 | } | 1492 | } |
1525 | } | 1493 | } |
@@ -1584,7 +1552,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1584 | int ret = 0; | 1552 | int ret = 0; |
1585 | struct super_block *sb; | 1553 | struct super_block *sb; |
1586 | struct cgroupfs_root *new_root; | 1554 | struct cgroupfs_root *new_root; |
1555 | struct list_head tmp_links; | ||
1587 | struct inode *inode; | 1556 | struct inode *inode; |
1557 | const struct cred *cred; | ||
1588 | 1558 | ||
1589 | /* First find the desired set of subsystems */ | 1559 | /* First find the desired set of subsystems */ |
1590 | mutex_lock(&cgroup_mutex); | 1560 | mutex_lock(&cgroup_mutex); |
@@ -1600,7 +1570,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1600 | new_root = cgroup_root_from_opts(&opts); | 1570 | new_root = cgroup_root_from_opts(&opts); |
1601 | if (IS_ERR(new_root)) { | 1571 | if (IS_ERR(new_root)) { |
1602 | ret = PTR_ERR(new_root); | 1572 | ret = PTR_ERR(new_root); |
1603 | goto drop_modules; | 1573 | goto out_err; |
1604 | } | 1574 | } |
1605 | opts.new_root = new_root; | 1575 | opts.new_root = new_root; |
1606 | 1576 | ||
@@ -1609,17 +1579,15 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1609 | if (IS_ERR(sb)) { | 1579 | if (IS_ERR(sb)) { |
1610 | ret = PTR_ERR(sb); | 1580 | ret = PTR_ERR(sb); |
1611 | cgroup_free_root(opts.new_root); | 1581 | cgroup_free_root(opts.new_root); |
1612 | goto drop_modules; | 1582 | goto out_err; |
1613 | } | 1583 | } |
1614 | 1584 | ||
1615 | root = sb->s_fs_info; | 1585 | root = sb->s_fs_info; |
1616 | BUG_ON(!root); | 1586 | BUG_ON(!root); |
1617 | if (root == opts.new_root) { | 1587 | if (root == opts.new_root) { |
1618 | /* We used the new root structure, so this is a new hierarchy */ | 1588 | /* We used the new root structure, so this is a new hierarchy */ |
1619 | struct list_head tmp_links; | ||
1620 | struct cgroup *root_cgrp = &root->top_cgroup; | 1589 | struct cgroup *root_cgrp = &root->top_cgroup; |
1621 | struct cgroupfs_root *existing_root; | 1590 | struct cgroupfs_root *existing_root; |
1622 | const struct cred *cred; | ||
1623 | int i; | 1591 | int i; |
1624 | struct css_set *cset; | 1592 | struct css_set *cset; |
1625 | 1593 | ||
@@ -1634,6 +1602,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1634 | mutex_lock(&cgroup_mutex); | 1602 | mutex_lock(&cgroup_mutex); |
1635 | mutex_lock(&cgroup_root_mutex); | 1603 | mutex_lock(&cgroup_root_mutex); |
1636 | 1604 | ||
1605 | root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp, | ||
1606 | 0, 1, GFP_KERNEL); | ||
1607 | if (root_cgrp->id < 0) | ||
1608 | goto unlock_drop; | ||
1609 | |||
1637 | /* Check for name clashes with existing mounts */ | 1610 | /* Check for name clashes with existing mounts */ |
1638 | ret = -EBUSY; | 1611 | ret = -EBUSY; |
1639 | if (strlen(root->name)) | 1612 | if (strlen(root->name)) |
@@ -1657,26 +1630,37 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1657 | if (ret) | 1630 | if (ret) |
1658 | goto unlock_drop; | 1631 | goto unlock_drop; |
1659 | 1632 | ||
1633 | sb->s_root->d_fsdata = root_cgrp; | ||
1634 | root_cgrp->dentry = sb->s_root; | ||
1635 | |||
1636 | /* | ||
1637 | * We're inside get_sb() and will call lookup_one_len() to | ||
1638 | * create the root files, which doesn't work if SELinux is | ||
1639 | * in use. The following cred dancing somehow works around | ||
1640 | * it. See 2ce9738ba ("cgroupfs: use init_cred when | ||
1641 | * populating new cgroupfs mount") for more details. | ||
1642 | */ | ||
1643 | cred = override_creds(&init_cred); | ||
1644 | |||
1645 | ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true); | ||
1646 | if (ret) | ||
1647 | goto rm_base_files; | ||
1648 | |||
1660 | ret = rebind_subsystems(root, root->subsys_mask, 0); | 1649 | ret = rebind_subsystems(root, root->subsys_mask, 0); |
1661 | if (ret == -EBUSY) { | 1650 | if (ret) |
1662 | free_cgrp_cset_links(&tmp_links); | 1651 | goto rm_base_files; |
1663 | goto unlock_drop; | 1652 | |
1664 | } | 1653 | revert_creds(cred); |
1654 | |||
1665 | /* | 1655 | /* |
1666 | * There must be no failure case after here, since rebinding | 1656 | * There must be no failure case after here, since rebinding |
1667 | * takes care of subsystems' refcounts, which are explicitly | 1657 | * takes care of subsystems' refcounts, which are explicitly |
1668 | * dropped in the failure exit path. | 1658 | * dropped in the failure exit path. |
1669 | */ | 1659 | */ |
1670 | 1660 | ||
1671 | /* EBUSY should be the only error here */ | ||
1672 | BUG_ON(ret); | ||
1673 | |||
1674 | list_add(&root->root_list, &cgroup_roots); | 1661 | list_add(&root->root_list, &cgroup_roots); |
1675 | cgroup_root_count++; | 1662 | cgroup_root_count++; |
1676 | 1663 | ||
1677 | sb->s_root->d_fsdata = root_cgrp; | ||
1678 | root->top_cgroup.dentry = sb->s_root; | ||
1679 | |||
1680 | /* Link the top cgroup in this hierarchy into all | 1664 | /* Link the top cgroup in this hierarchy into all |
1681 | * the css_set objects */ | 1665 | * the css_set objects */ |
1682 | write_lock(&css_set_lock); | 1666 | write_lock(&css_set_lock); |
@@ -1689,9 +1673,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1689 | BUG_ON(!list_empty(&root_cgrp->children)); | 1673 | BUG_ON(!list_empty(&root_cgrp->children)); |
1690 | BUG_ON(root->number_of_cgroups != 1); | 1674 | BUG_ON(root->number_of_cgroups != 1); |
1691 | 1675 | ||
1692 | cred = override_creds(&init_cred); | ||
1693 | cgroup_populate_dir(root_cgrp, true, root->subsys_mask); | ||
1694 | revert_creds(cred); | ||
1695 | mutex_unlock(&cgroup_root_mutex); | 1676 | mutex_unlock(&cgroup_root_mutex); |
1696 | mutex_unlock(&cgroup_mutex); | 1677 | mutex_unlock(&cgroup_mutex); |
1697 | mutex_unlock(&inode->i_mutex); | 1678 | mutex_unlock(&inode->i_mutex); |
@@ -1711,15 +1692,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1711 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); | 1692 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); |
1712 | } | 1693 | } |
1713 | } | 1694 | } |
1714 | |||
1715 | /* no subsys rebinding, so refcounts don't change */ | ||
1716 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1717 | } | 1695 | } |
1718 | 1696 | ||
1719 | kfree(opts.release_agent); | 1697 | kfree(opts.release_agent); |
1720 | kfree(opts.name); | 1698 | kfree(opts.name); |
1721 | return dget(sb->s_root); | 1699 | return dget(sb->s_root); |
1722 | 1700 | ||
1701 | rm_base_files: | ||
1702 | free_cgrp_cset_links(&tmp_links); | ||
1703 | cgroup_addrm_files(&root->top_cgroup, cgroup_base_files, false); | ||
1704 | revert_creds(cred); | ||
1723 | unlock_drop: | 1705 | unlock_drop: |
1724 | cgroup_exit_root_id(root); | 1706 | cgroup_exit_root_id(root); |
1725 | mutex_unlock(&cgroup_root_mutex); | 1707 | mutex_unlock(&cgroup_root_mutex); |
@@ -1727,8 +1709,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1727 | mutex_unlock(&inode->i_mutex); | 1709 | mutex_unlock(&inode->i_mutex); |
1728 | drop_new_super: | 1710 | drop_new_super: |
1729 | deactivate_locked_super(sb); | 1711 | deactivate_locked_super(sb); |
1730 | drop_modules: | ||
1731 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
1732 | out_err: | 1712 | out_err: |
1733 | kfree(opts.release_agent); | 1713 | kfree(opts.release_agent); |
1734 | kfree(opts.name); | 1714 | kfree(opts.name); |
@@ -1746,6 +1726,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1746 | BUG_ON(root->number_of_cgroups != 1); | 1726 | BUG_ON(root->number_of_cgroups != 1); |
1747 | BUG_ON(!list_empty(&cgrp->children)); | 1727 | BUG_ON(!list_empty(&cgrp->children)); |
1748 | 1728 | ||
1729 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | ||
1749 | mutex_lock(&cgroup_mutex); | 1730 | mutex_lock(&cgroup_mutex); |
1750 | mutex_lock(&cgroup_root_mutex); | 1731 | mutex_lock(&cgroup_root_mutex); |
1751 | 1732 | ||
@@ -1778,6 +1759,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1778 | 1759 | ||
1779 | mutex_unlock(&cgroup_root_mutex); | 1760 | mutex_unlock(&cgroup_root_mutex); |
1780 | mutex_unlock(&cgroup_mutex); | 1761 | mutex_unlock(&cgroup_mutex); |
1762 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | ||
1781 | 1763 | ||
1782 | simple_xattrs_free(&cgrp->xattrs); | 1764 | simple_xattrs_free(&cgrp->xattrs); |
1783 | 1765 | ||
@@ -1889,7 +1871,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path); | |||
1889 | struct task_and_cgroup { | 1871 | struct task_and_cgroup { |
1890 | struct task_struct *task; | 1872 | struct task_struct *task; |
1891 | struct cgroup *cgrp; | 1873 | struct cgroup *cgrp; |
1892 | struct css_set *cg; | 1874 | struct css_set *cset; |
1893 | }; | 1875 | }; |
1894 | 1876 | ||
1895 | struct cgroup_taskset { | 1877 | struct cgroup_taskset { |
@@ -1939,18 +1921,20 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | |||
1939 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); | 1921 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); |
1940 | 1922 | ||
1941 | /** | 1923 | /** |
1942 | * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task | 1924 | * cgroup_taskset_cur_css - return the matching css for the current task |
1943 | * @tset: taskset of interest | 1925 | * @tset: taskset of interest |
1926 | * @subsys_id: the ID of the target subsystem | ||
1944 | * | 1927 | * |
1945 | * Return the cgroup for the current (last returned) task of @tset. This | 1928 | * Return the css for the current (last returned) task of @tset for |
1946 | * function must be preceded by either cgroup_taskset_first() or | 1929 | * subsystem specified by @subsys_id. This function must be preceded by |
1947 | * cgroup_taskset_next(). | 1930 | * either cgroup_taskset_first() or cgroup_taskset_next(). |
1948 | */ | 1931 | */ |
1949 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset) | 1932 | struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, |
1933 | int subsys_id) | ||
1950 | { | 1934 | { |
1951 | return tset->cur_cgrp; | 1935 | return cgroup_css(tset->cur_cgrp, cgroup_subsys[subsys_id]); |
1952 | } | 1936 | } |
1953 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup); | 1937 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_css); |
1954 | 1938 | ||
1955 | /** | 1939 | /** |
1956 | * cgroup_taskset_size - return the number of tasks in taskset | 1940 | * cgroup_taskset_size - return the number of tasks in taskset |
@@ -2089,8 +2073,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2089 | * step 1: check that we can legitimately attach to the cgroup. | 2073 | * step 1: check that we can legitimately attach to the cgroup. |
2090 | */ | 2074 | */ |
2091 | for_each_root_subsys(root, ss) { | 2075 | for_each_root_subsys(root, ss) { |
2076 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2077 | |||
2092 | if (ss->can_attach) { | 2078 | if (ss->can_attach) { |
2093 | retval = ss->can_attach(cgrp, &tset); | 2079 | retval = ss->can_attach(css, &tset); |
2094 | if (retval) { | 2080 | if (retval) { |
2095 | failed_ss = ss; | 2081 | failed_ss = ss; |
2096 | goto out_cancel_attach; | 2082 | goto out_cancel_attach; |
@@ -2107,8 +2093,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2107 | 2093 | ||
2108 | tc = flex_array_get(group, i); | 2094 | tc = flex_array_get(group, i); |
2109 | old_cset = task_css_set(tc->task); | 2095 | old_cset = task_css_set(tc->task); |
2110 | tc->cg = find_css_set(old_cset, cgrp); | 2096 | tc->cset = find_css_set(old_cset, cgrp); |
2111 | if (!tc->cg) { | 2097 | if (!tc->cset) { |
2112 | retval = -ENOMEM; | 2098 | retval = -ENOMEM; |
2113 | goto out_put_css_set_refs; | 2099 | goto out_put_css_set_refs; |
2114 | } | 2100 | } |
@@ -2121,7 +2107,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2121 | */ | 2107 | */ |
2122 | for (i = 0; i < group_size; i++) { | 2108 | for (i = 0; i < group_size; i++) { |
2123 | tc = flex_array_get(group, i); | 2109 | tc = flex_array_get(group, i); |
2124 | cgroup_task_migrate(tc->cgrp, tc->task, tc->cg); | 2110 | cgroup_task_migrate(tc->cgrp, tc->task, tc->cset); |
2125 | } | 2111 | } |
2126 | /* nothing is sensitive to fork() after this point. */ | 2112 | /* nothing is sensitive to fork() after this point. */ |
2127 | 2113 | ||
@@ -2129,8 +2115,10 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
2129 | * step 4: do subsystem attach callbacks. | 2115 | * step 4: do subsystem attach callbacks. |
2130 | */ | 2116 | */ |
2131 | for_each_root_subsys(root, ss) { | 2117 | for_each_root_subsys(root, ss) { |
2118 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2119 | |||
2132 | if (ss->attach) | 2120 | if (ss->attach) |
2133 | ss->attach(cgrp, &tset); | 2121 | ss->attach(css, &tset); |
2134 | } | 2122 | } |
2135 | 2123 | ||
2136 | /* | 2124 | /* |
@@ -2141,18 +2129,20 @@ out_put_css_set_refs: | |||
2141 | if (retval) { | 2129 | if (retval) { |
2142 | for (i = 0; i < group_size; i++) { | 2130 | for (i = 0; i < group_size; i++) { |
2143 | tc = flex_array_get(group, i); | 2131 | tc = flex_array_get(group, i); |
2144 | if (!tc->cg) | 2132 | if (!tc->cset) |
2145 | break; | 2133 | break; |
2146 | put_css_set(tc->cg); | 2134 | put_css_set(tc->cset); |
2147 | } | 2135 | } |
2148 | } | 2136 | } |
2149 | out_cancel_attach: | 2137 | out_cancel_attach: |
2150 | if (retval) { | 2138 | if (retval) { |
2151 | for_each_root_subsys(root, ss) { | 2139 | for_each_root_subsys(root, ss) { |
2140 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); | ||
2141 | |||
2152 | if (ss == failed_ss) | 2142 | if (ss == failed_ss) |
2153 | break; | 2143 | break; |
2154 | if (ss->cancel_attach) | 2144 | if (ss->cancel_attach) |
2155 | ss->cancel_attach(cgrp, &tset); | 2145 | ss->cancel_attach(css, &tset); |
2156 | } | 2146 | } |
2157 | } | 2147 | } |
2158 | out_free_group_list: | 2148 | out_free_group_list: |
@@ -2253,9 +2243,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
2253 | 2243 | ||
2254 | mutex_lock(&cgroup_mutex); | 2244 | mutex_lock(&cgroup_mutex); |
2255 | for_each_active_root(root) { | 2245 | for_each_active_root(root) { |
2256 | struct cgroup *from_cg = task_cgroup_from_root(from, root); | 2246 | struct cgroup *from_cgrp = task_cgroup_from_root(from, root); |
2257 | 2247 | ||
2258 | retval = cgroup_attach_task(from_cg, tsk, false); | 2248 | retval = cgroup_attach_task(from_cgrp, tsk, false); |
2259 | if (retval) | 2249 | if (retval) |
2260 | break; | 2250 | break; |
2261 | } | 2251 | } |
@@ -2265,34 +2255,38 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
2265 | } | 2255 | } |
2266 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 2256 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
2267 | 2257 | ||
2268 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2258 | static int cgroup_tasks_write(struct cgroup_subsys_state *css, |
2259 | struct cftype *cft, u64 pid) | ||
2269 | { | 2260 | { |
2270 | return attach_task_by_pid(cgrp, pid, false); | 2261 | return attach_task_by_pid(css->cgroup, pid, false); |
2271 | } | 2262 | } |
2272 | 2263 | ||
2273 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | 2264 | static int cgroup_procs_write(struct cgroup_subsys_state *css, |
2265 | struct cftype *cft, u64 tgid) | ||
2274 | { | 2266 | { |
2275 | return attach_task_by_pid(cgrp, tgid, true); | 2267 | return attach_task_by_pid(css->cgroup, tgid, true); |
2276 | } | 2268 | } |
2277 | 2269 | ||
2278 | static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | 2270 | static int cgroup_release_agent_write(struct cgroup_subsys_state *css, |
2279 | const char *buffer) | 2271 | struct cftype *cft, const char *buffer) |
2280 | { | 2272 | { |
2281 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 2273 | BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX); |
2282 | if (strlen(buffer) >= PATH_MAX) | 2274 | if (strlen(buffer) >= PATH_MAX) |
2283 | return -EINVAL; | 2275 | return -EINVAL; |
2284 | if (!cgroup_lock_live_group(cgrp)) | 2276 | if (!cgroup_lock_live_group(css->cgroup)) |
2285 | return -ENODEV; | 2277 | return -ENODEV; |
2286 | mutex_lock(&cgroup_root_mutex); | 2278 | mutex_lock(&cgroup_root_mutex); |
2287 | strcpy(cgrp->root->release_agent_path, buffer); | 2279 | strcpy(css->cgroup->root->release_agent_path, buffer); |
2288 | mutex_unlock(&cgroup_root_mutex); | 2280 | mutex_unlock(&cgroup_root_mutex); |
2289 | mutex_unlock(&cgroup_mutex); | 2281 | mutex_unlock(&cgroup_mutex); |
2290 | return 0; | 2282 | return 0; |
2291 | } | 2283 | } |
2292 | 2284 | ||
2293 | static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, | 2285 | static int cgroup_release_agent_show(struct cgroup_subsys_state *css, |
2294 | struct seq_file *seq) | 2286 | struct cftype *cft, struct seq_file *seq) |
2295 | { | 2287 | { |
2288 | struct cgroup *cgrp = css->cgroup; | ||
2289 | |||
2296 | if (!cgroup_lock_live_group(cgrp)) | 2290 | if (!cgroup_lock_live_group(cgrp)) |
2297 | return -ENODEV; | 2291 | return -ENODEV; |
2298 | seq_puts(seq, cgrp->root->release_agent_path); | 2292 | seq_puts(seq, cgrp->root->release_agent_path); |
@@ -2301,20 +2295,20 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, | |||
2301 | return 0; | 2295 | return 0; |
2302 | } | 2296 | } |
2303 | 2297 | ||
2304 | static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft, | 2298 | static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css, |
2305 | struct seq_file *seq) | 2299 | struct cftype *cft, struct seq_file *seq) |
2306 | { | 2300 | { |
2307 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | 2301 | seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup)); |
2308 | return 0; | 2302 | return 0; |
2309 | } | 2303 | } |
2310 | 2304 | ||
2311 | /* A buffer size big enough for numbers or short strings */ | 2305 | /* A buffer size big enough for numbers or short strings */ |
2312 | #define CGROUP_LOCAL_BUFFER_SIZE 64 | 2306 | #define CGROUP_LOCAL_BUFFER_SIZE 64 |
2313 | 2307 | ||
2314 | static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, | 2308 | static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css, |
2315 | struct file *file, | 2309 | struct cftype *cft, struct file *file, |
2316 | const char __user *userbuf, | 2310 | const char __user *userbuf, size_t nbytes, |
2317 | size_t nbytes, loff_t *unused_ppos) | 2311 | loff_t *unused_ppos) |
2318 | { | 2312 | { |
2319 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2313 | char buffer[CGROUP_LOCAL_BUFFER_SIZE]; |
2320 | int retval = 0; | 2314 | int retval = 0; |
@@ -2332,22 +2326,22 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, | |||
2332 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); | 2326 | u64 val = simple_strtoull(strstrip(buffer), &end, 0); |
2333 | if (*end) | 2327 | if (*end) |
2334 | return -EINVAL; | 2328 | return -EINVAL; |
2335 | retval = cft->write_u64(cgrp, cft, val); | 2329 | retval = cft->write_u64(css, cft, val); |
2336 | } else { | 2330 | } else { |
2337 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); | 2331 | s64 val = simple_strtoll(strstrip(buffer), &end, 0); |
2338 | if (*end) | 2332 | if (*end) |
2339 | return -EINVAL; | 2333 | return -EINVAL; |
2340 | retval = cft->write_s64(cgrp, cft, val); | 2334 | retval = cft->write_s64(css, cft, val); |
2341 | } | 2335 | } |
2342 | if (!retval) | 2336 | if (!retval) |
2343 | retval = nbytes; | 2337 | retval = nbytes; |
2344 | return retval; | 2338 | return retval; |
2345 | } | 2339 | } |
2346 | 2340 | ||
2347 | static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, | 2341 | static ssize_t cgroup_write_string(struct cgroup_subsys_state *css, |
2348 | struct file *file, | 2342 | struct cftype *cft, struct file *file, |
2349 | const char __user *userbuf, | 2343 | const char __user *userbuf, size_t nbytes, |
2350 | size_t nbytes, loff_t *unused_ppos) | 2344 | loff_t *unused_ppos) |
2351 | { | 2345 | { |
2352 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; | 2346 | char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; |
2353 | int retval = 0; | 2347 | int retval = 0; |
@@ -2370,7 +2364,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, | |||
2370 | } | 2364 | } |
2371 | 2365 | ||
2372 | buffer[nbytes] = 0; /* nul-terminate */ | 2366 | buffer[nbytes] = 0; /* nul-terminate */ |
2373 | retval = cft->write_string(cgrp, cft, strstrip(buffer)); | 2367 | retval = cft->write_string(css, cft, strstrip(buffer)); |
2374 | if (!retval) | 2368 | if (!retval) |
2375 | retval = nbytes; | 2369 | retval = nbytes; |
2376 | out: | 2370 | out: |
@@ -2380,65 +2374,60 @@ out: | |||
2380 | } | 2374 | } |
2381 | 2375 | ||
2382 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | 2376 | static ssize_t cgroup_file_write(struct file *file, const char __user *buf, |
2383 | size_t nbytes, loff_t *ppos) | 2377 | size_t nbytes, loff_t *ppos) |
2384 | { | 2378 | { |
2379 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2385 | struct cftype *cft = __d_cft(file->f_dentry); | 2380 | struct cftype *cft = __d_cft(file->f_dentry); |
2386 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2381 | struct cgroup_subsys_state *css = cfe->css; |
2387 | 2382 | ||
2388 | if (cgroup_is_dead(cgrp)) | ||
2389 | return -ENODEV; | ||
2390 | if (cft->write) | 2383 | if (cft->write) |
2391 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 2384 | return cft->write(css, cft, file, buf, nbytes, ppos); |
2392 | if (cft->write_u64 || cft->write_s64) | 2385 | if (cft->write_u64 || cft->write_s64) |
2393 | return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); | 2386 | return cgroup_write_X64(css, cft, file, buf, nbytes, ppos); |
2394 | if (cft->write_string) | 2387 | if (cft->write_string) |
2395 | return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos); | 2388 | return cgroup_write_string(css, cft, file, buf, nbytes, ppos); |
2396 | if (cft->trigger) { | 2389 | if (cft->trigger) { |
2397 | int ret = cft->trigger(cgrp, (unsigned int)cft->private); | 2390 | int ret = cft->trigger(css, (unsigned int)cft->private); |
2398 | return ret ? ret : nbytes; | 2391 | return ret ? ret : nbytes; |
2399 | } | 2392 | } |
2400 | return -EINVAL; | 2393 | return -EINVAL; |
2401 | } | 2394 | } |
2402 | 2395 | ||
2403 | static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, | 2396 | static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css, |
2404 | struct file *file, | 2397 | struct cftype *cft, struct file *file, |
2405 | char __user *buf, size_t nbytes, | 2398 | char __user *buf, size_t nbytes, loff_t *ppos) |
2406 | loff_t *ppos) | ||
2407 | { | 2399 | { |
2408 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2400 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; |
2409 | u64 val = cft->read_u64(cgrp, cft); | 2401 | u64 val = cft->read_u64(css, cft); |
2410 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | 2402 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); |
2411 | 2403 | ||
2412 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2404 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
2413 | } | 2405 | } |
2414 | 2406 | ||
2415 | static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, | 2407 | static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css, |
2416 | struct file *file, | 2408 | struct cftype *cft, struct file *file, |
2417 | char __user *buf, size_t nbytes, | 2409 | char __user *buf, size_t nbytes, loff_t *ppos) |
2418 | loff_t *ppos) | ||
2419 | { | 2410 | { |
2420 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; | 2411 | char tmp[CGROUP_LOCAL_BUFFER_SIZE]; |
2421 | s64 val = cft->read_s64(cgrp, cft); | 2412 | s64 val = cft->read_s64(css, cft); |
2422 | int len = sprintf(tmp, "%lld\n", (long long) val); | 2413 | int len = sprintf(tmp, "%lld\n", (long long) val); |
2423 | 2414 | ||
2424 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 2415 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
2425 | } | 2416 | } |
2426 | 2417 | ||
2427 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, | 2418 | static ssize_t cgroup_file_read(struct file *file, char __user *buf, |
2428 | size_t nbytes, loff_t *ppos) | 2419 | size_t nbytes, loff_t *ppos) |
2429 | { | 2420 | { |
2421 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2430 | struct cftype *cft = __d_cft(file->f_dentry); | 2422 | struct cftype *cft = __d_cft(file->f_dentry); |
2431 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2423 | struct cgroup_subsys_state *css = cfe->css; |
2432 | |||
2433 | if (cgroup_is_dead(cgrp)) | ||
2434 | return -ENODEV; | ||
2435 | 2424 | ||
2436 | if (cft->read) | 2425 | if (cft->read) |
2437 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); | 2426 | return cft->read(css, cft, file, buf, nbytes, ppos); |
2438 | if (cft->read_u64) | 2427 | if (cft->read_u64) |
2439 | return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); | 2428 | return cgroup_read_u64(css, cft, file, buf, nbytes, ppos); |
2440 | if (cft->read_s64) | 2429 | if (cft->read_s64) |
2441 | return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); | 2430 | return cgroup_read_s64(css, cft, file, buf, nbytes, ppos); |
2442 | return -EINVAL; | 2431 | return -EINVAL; |
2443 | } | 2432 | } |
2444 | 2433 | ||
@@ -2447,11 +2436,6 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
2447 | * supports string->u64 maps, but can be extended in future. | 2436 | * supports string->u64 maps, but can be extended in future. |
2448 | */ | 2437 | */ |
2449 | 2438 | ||
2450 | struct cgroup_seqfile_state { | ||
2451 | struct cftype *cft; | ||
2452 | struct cgroup *cgroup; | ||
2453 | }; | ||
2454 | |||
2455 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | 2439 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) |
2456 | { | 2440 | { |
2457 | struct seq_file *sf = cb->state; | 2441 | struct seq_file *sf = cb->state; |
@@ -2460,69 +2444,86 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | |||
2460 | 2444 | ||
2461 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | 2445 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) |
2462 | { | 2446 | { |
2463 | struct cgroup_seqfile_state *state = m->private; | 2447 | struct cfent *cfe = m->private; |
2464 | struct cftype *cft = state->cft; | 2448 | struct cftype *cft = cfe->type; |
2449 | struct cgroup_subsys_state *css = cfe->css; | ||
2450 | |||
2465 | if (cft->read_map) { | 2451 | if (cft->read_map) { |
2466 | struct cgroup_map_cb cb = { | 2452 | struct cgroup_map_cb cb = { |
2467 | .fill = cgroup_map_add, | 2453 | .fill = cgroup_map_add, |
2468 | .state = m, | 2454 | .state = m, |
2469 | }; | 2455 | }; |
2470 | return cft->read_map(state->cgroup, cft, &cb); | 2456 | return cft->read_map(css, cft, &cb); |
2471 | } | 2457 | } |
2472 | return cft->read_seq_string(state->cgroup, cft, m); | 2458 | return cft->read_seq_string(css, cft, m); |
2473 | } | ||
2474 | |||
2475 | static int cgroup_seqfile_release(struct inode *inode, struct file *file) | ||
2476 | { | ||
2477 | struct seq_file *seq = file->private_data; | ||
2478 | kfree(seq->private); | ||
2479 | return single_release(inode, file); | ||
2480 | } | 2459 | } |
2481 | 2460 | ||
2482 | static const struct file_operations cgroup_seqfile_operations = { | 2461 | static const struct file_operations cgroup_seqfile_operations = { |
2483 | .read = seq_read, | 2462 | .read = seq_read, |
2484 | .write = cgroup_file_write, | 2463 | .write = cgroup_file_write, |
2485 | .llseek = seq_lseek, | 2464 | .llseek = seq_lseek, |
2486 | .release = cgroup_seqfile_release, | 2465 | .release = single_release, |
2487 | }; | 2466 | }; |
2488 | 2467 | ||
2489 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2468 | static int cgroup_file_open(struct inode *inode, struct file *file) |
2490 | { | 2469 | { |
2470 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2471 | struct cftype *cft = __d_cft(file->f_dentry); | ||
2472 | struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent); | ||
2473 | struct cgroup_subsys_state *css; | ||
2491 | int err; | 2474 | int err; |
2492 | struct cftype *cft; | ||
2493 | 2475 | ||
2494 | err = generic_file_open(inode, file); | 2476 | err = generic_file_open(inode, file); |
2495 | if (err) | 2477 | if (err) |
2496 | return err; | 2478 | return err; |
2497 | cft = __d_cft(file->f_dentry); | ||
2498 | 2479 | ||
2499 | if (cft->read_map || cft->read_seq_string) { | 2480 | /* |
2500 | struct cgroup_seqfile_state *state; | 2481 | * If the file belongs to a subsystem, pin the css. Will be |
2482 | * unpinned either on open failure or release. This ensures that | ||
2483 | * @css stays alive for all file operations. | ||
2484 | */ | ||
2485 | rcu_read_lock(); | ||
2486 | css = cgroup_css(cgrp, cft->ss); | ||
2487 | if (cft->ss && !css_tryget(css)) | ||
2488 | css = NULL; | ||
2489 | rcu_read_unlock(); | ||
2501 | 2490 | ||
2502 | state = kzalloc(sizeof(*state), GFP_USER); | 2491 | if (!css) |
2503 | if (!state) | 2492 | return -ENODEV; |
2504 | return -ENOMEM; | 2493 | |
2494 | /* | ||
2495 | * @cfe->css is used by read/write/close to determine the | ||
2496 | * associated css. @file->private_data would be a better place but | ||
2497 | * that's already used by seqfile. Multiple accessors may use it | ||
2498 | * simultaneously which is okay as the association never changes. | ||
2499 | */ | ||
2500 | WARN_ON_ONCE(cfe->css && cfe->css != css); | ||
2501 | cfe->css = css; | ||
2505 | 2502 | ||
2506 | state->cft = cft; | 2503 | if (cft->read_map || cft->read_seq_string) { |
2507 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | ||
2508 | file->f_op = &cgroup_seqfile_operations; | 2504 | file->f_op = &cgroup_seqfile_operations; |
2509 | err = single_open(file, cgroup_seqfile_show, state); | 2505 | err = single_open(file, cgroup_seqfile_show, cfe); |
2510 | if (err < 0) | 2506 | } else if (cft->open) { |
2511 | kfree(state); | ||
2512 | } else if (cft->open) | ||
2513 | err = cft->open(inode, file); | 2507 | err = cft->open(inode, file); |
2514 | else | 2508 | } |
2515 | err = 0; | ||
2516 | 2509 | ||
2510 | if (css->ss && err) | ||
2511 | css_put(css); | ||
2517 | return err; | 2512 | return err; |
2518 | } | 2513 | } |
2519 | 2514 | ||
2520 | static int cgroup_file_release(struct inode *inode, struct file *file) | 2515 | static int cgroup_file_release(struct inode *inode, struct file *file) |
2521 | { | 2516 | { |
2517 | struct cfent *cfe = __d_cfe(file->f_dentry); | ||
2522 | struct cftype *cft = __d_cft(file->f_dentry); | 2518 | struct cftype *cft = __d_cft(file->f_dentry); |
2519 | struct cgroup_subsys_state *css = cfe->css; | ||
2520 | int ret = 0; | ||
2521 | |||
2523 | if (cft->release) | 2522 | if (cft->release) |
2524 | return cft->release(inode, file); | 2523 | ret = cft->release(inode, file); |
2525 | return 0; | 2524 | if (css->ss) |
2525 | css_put(css); | ||
2526 | return ret; | ||
2526 | } | 2527 | } |
2527 | 2528 | ||
2528 | /* | 2529 | /* |
@@ -2736,8 +2737,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
2736 | return mode; | 2737 | return mode; |
2737 | } | 2738 | } |
2738 | 2739 | ||
2739 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2740 | static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) |
2740 | struct cftype *cft) | ||
2741 | { | 2741 | { |
2742 | struct dentry *dir = cgrp->dentry; | 2742 | struct dentry *dir = cgrp->dentry; |
2743 | struct cgroup *parent = __d_cgrp(dir); | 2743 | struct cgroup *parent = __d_cgrp(dir); |
@@ -2747,8 +2747,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2747 | umode_t mode; | 2747 | umode_t mode; |
2748 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2748 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
2749 | 2749 | ||
2750 | if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { | 2750 | if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) && |
2751 | strcpy(name, subsys->name); | 2751 | !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { |
2752 | strcpy(name, cft->ss->name); | ||
2752 | strcat(name, "."); | 2753 | strcat(name, "."); |
2753 | } | 2754 | } |
2754 | strcat(name, cft->name); | 2755 | strcat(name, cft->name); |
@@ -2782,11 +2783,25 @@ out: | |||
2782 | return error; | 2783 | return error; |
2783 | } | 2784 | } |
2784 | 2785 | ||
2785 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2786 | /** |
2786 | struct cftype cfts[], bool is_add) | 2787 | * cgroup_addrm_files - add or remove files to a cgroup directory |
2788 | * @cgrp: the target cgroup | ||
2789 | * @cfts: array of cftypes to be added | ||
2790 | * @is_add: whether to add or remove | ||
2791 | * | ||
2792 | * Depending on @is_add, add or remove files defined by @cfts on @cgrp. | ||
2793 | * For removals, this function never fails. If addition fails, this | ||
2794 | * function doesn't remove files already added. The caller is responsible | ||
2795 | * for cleaning up. | ||
2796 | */ | ||
2797 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | ||
2798 | bool is_add) | ||
2787 | { | 2799 | { |
2788 | struct cftype *cft; | 2800 | struct cftype *cft; |
2789 | int err, ret = 0; | 2801 | int ret; |
2802 | |||
2803 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | ||
2804 | lockdep_assert_held(&cgroup_mutex); | ||
2790 | 2805 | ||
2791 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 2806 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
2792 | /* does cft->flags tell us to skip this file on @cgrp? */ | 2807 | /* does cft->flags tell us to skip this file on @cgrp? */ |
@@ -2798,16 +2813,17 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2798 | continue; | 2813 | continue; |
2799 | 2814 | ||
2800 | if (is_add) { | 2815 | if (is_add) { |
2801 | err = cgroup_add_file(cgrp, subsys, cft); | 2816 | ret = cgroup_add_file(cgrp, cft); |
2802 | if (err) | 2817 | if (ret) { |
2803 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", | 2818 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
2804 | cft->name, err); | 2819 | cft->name, ret); |
2805 | ret = err; | 2820 | return ret; |
2821 | } | ||
2806 | } else { | 2822 | } else { |
2807 | cgroup_rm_file(cgrp, cft); | 2823 | cgroup_rm_file(cgrp, cft); |
2808 | } | 2824 | } |
2809 | } | 2825 | } |
2810 | return ret; | 2826 | return 0; |
2811 | } | 2827 | } |
2812 | 2828 | ||
2813 | static void cgroup_cfts_prepare(void) | 2829 | static void cgroup_cfts_prepare(void) |
@@ -2816,28 +2832,30 @@ static void cgroup_cfts_prepare(void) | |||
2816 | /* | 2832 | /* |
2817 | * Thanks to the entanglement with vfs inode locking, we can't walk | 2833 | * Thanks to the entanglement with vfs inode locking, we can't walk |
2818 | * the existing cgroups under cgroup_mutex and create files. | 2834 | * the existing cgroups under cgroup_mutex and create files. |
2819 | * Instead, we use cgroup_for_each_descendant_pre() and drop RCU | 2835 | * Instead, we use css_for_each_descendant_pre() and drop RCU read |
2820 | * read lock before calling cgroup_addrm_files(). | 2836 | * lock before calling cgroup_addrm_files(). |
2821 | */ | 2837 | */ |
2822 | mutex_lock(&cgroup_mutex); | 2838 | mutex_lock(&cgroup_mutex); |
2823 | } | 2839 | } |
2824 | 2840 | ||
2825 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | 2841 | static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) |
2826 | struct cftype *cfts, bool is_add) | ||
2827 | __releases(&cgroup_mutex) | 2842 | __releases(&cgroup_mutex) |
2828 | { | 2843 | { |
2829 | LIST_HEAD(pending); | 2844 | LIST_HEAD(pending); |
2830 | struct cgroup *cgrp, *root = &ss->root->top_cgroup; | 2845 | struct cgroup_subsys *ss = cfts[0].ss; |
2846 | struct cgroup *root = &ss->root->top_cgroup; | ||
2831 | struct super_block *sb = ss->root->sb; | 2847 | struct super_block *sb = ss->root->sb; |
2832 | struct dentry *prev = NULL; | 2848 | struct dentry *prev = NULL; |
2833 | struct inode *inode; | 2849 | struct inode *inode; |
2850 | struct cgroup_subsys_state *css; | ||
2834 | u64 update_before; | 2851 | u64 update_before; |
2852 | int ret = 0; | ||
2835 | 2853 | ||
2836 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ | 2854 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ |
2837 | if (!cfts || ss->root == &cgroup_dummy_root || | 2855 | if (!cfts || ss->root == &cgroup_dummy_root || |
2838 | !atomic_inc_not_zero(&sb->s_active)) { | 2856 | !atomic_inc_not_zero(&sb->s_active)) { |
2839 | mutex_unlock(&cgroup_mutex); | 2857 | mutex_unlock(&cgroup_mutex); |
2840 | return; | 2858 | return 0; |
2841 | } | 2859 | } |
2842 | 2860 | ||
2843 | /* | 2861 | /* |
@@ -2849,17 +2867,11 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2849 | 2867 | ||
2850 | mutex_unlock(&cgroup_mutex); | 2868 | mutex_unlock(&cgroup_mutex); |
2851 | 2869 | ||
2852 | /* @root always needs to be updated */ | ||
2853 | inode = root->dentry->d_inode; | ||
2854 | mutex_lock(&inode->i_mutex); | ||
2855 | mutex_lock(&cgroup_mutex); | ||
2856 | cgroup_addrm_files(root, ss, cfts, is_add); | ||
2857 | mutex_unlock(&cgroup_mutex); | ||
2858 | mutex_unlock(&inode->i_mutex); | ||
2859 | |||
2860 | /* add/rm files for all cgroups created before */ | 2870 | /* add/rm files for all cgroups created before */ |
2861 | rcu_read_lock(); | 2871 | rcu_read_lock(); |
2862 | cgroup_for_each_descendant_pre(cgrp, root) { | 2872 | css_for_each_descendant_pre(css, cgroup_css(root, ss)) { |
2873 | struct cgroup *cgrp = css->cgroup; | ||
2874 | |||
2863 | if (cgroup_is_dead(cgrp)) | 2875 | if (cgroup_is_dead(cgrp)) |
2864 | continue; | 2876 | continue; |
2865 | 2877 | ||
@@ -2873,15 +2885,18 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2873 | mutex_lock(&inode->i_mutex); | 2885 | mutex_lock(&inode->i_mutex); |
2874 | mutex_lock(&cgroup_mutex); | 2886 | mutex_lock(&cgroup_mutex); |
2875 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) | 2887 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) |
2876 | cgroup_addrm_files(cgrp, ss, cfts, is_add); | 2888 | ret = cgroup_addrm_files(cgrp, cfts, is_add); |
2877 | mutex_unlock(&cgroup_mutex); | 2889 | mutex_unlock(&cgroup_mutex); |
2878 | mutex_unlock(&inode->i_mutex); | 2890 | mutex_unlock(&inode->i_mutex); |
2879 | 2891 | ||
2880 | rcu_read_lock(); | 2892 | rcu_read_lock(); |
2893 | if (ret) | ||
2894 | break; | ||
2881 | } | 2895 | } |
2882 | rcu_read_unlock(); | 2896 | rcu_read_unlock(); |
2883 | dput(prev); | 2897 | dput(prev); |
2884 | deactivate_super(sb); | 2898 | deactivate_super(sb); |
2899 | return ret; | ||
2885 | } | 2900 | } |
2886 | 2901 | ||
2887 | /** | 2902 | /** |
@@ -2901,49 +2916,56 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2901 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | 2916 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
2902 | { | 2917 | { |
2903 | struct cftype_set *set; | 2918 | struct cftype_set *set; |
2919 | struct cftype *cft; | ||
2920 | int ret; | ||
2904 | 2921 | ||
2905 | set = kzalloc(sizeof(*set), GFP_KERNEL); | 2922 | set = kzalloc(sizeof(*set), GFP_KERNEL); |
2906 | if (!set) | 2923 | if (!set) |
2907 | return -ENOMEM; | 2924 | return -ENOMEM; |
2908 | 2925 | ||
2926 | for (cft = cfts; cft->name[0] != '\0'; cft++) | ||
2927 | cft->ss = ss; | ||
2928 | |||
2909 | cgroup_cfts_prepare(); | 2929 | cgroup_cfts_prepare(); |
2910 | set->cfts = cfts; | 2930 | set->cfts = cfts; |
2911 | list_add_tail(&set->node, &ss->cftsets); | 2931 | list_add_tail(&set->node, &ss->cftsets); |
2912 | cgroup_cfts_commit(ss, cfts, true); | 2932 | ret = cgroup_cfts_commit(cfts, true); |
2913 | 2933 | if (ret) | |
2914 | return 0; | 2934 | cgroup_rm_cftypes(cfts); |
2935 | return ret; | ||
2915 | } | 2936 | } |
2916 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); | 2937 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); |
2917 | 2938 | ||
2918 | /** | 2939 | /** |
2919 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem | 2940 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem |
2920 | * @ss: target cgroup subsystem | ||
2921 | * @cfts: zero-length name terminated array of cftypes | 2941 | * @cfts: zero-length name terminated array of cftypes |
2922 | * | 2942 | * |
2923 | * Unregister @cfts from @ss. Files described by @cfts are removed from | 2943 | * Unregister @cfts. Files described by @cfts are removed from all |
2924 | * all existing cgroups to which @ss is attached and all future cgroups | 2944 | * existing cgroups and all future cgroups won't have them either. This |
2925 | * won't have them either. This function can be called anytime whether @ss | 2945 | * function can be called anytime whether @cfts' subsys is attached or not. |
2926 | * is attached or not. | ||
2927 | * | 2946 | * |
2928 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | 2947 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not |
2929 | * registered with @ss. | 2948 | * registered. |
2930 | */ | 2949 | */ |
2931 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | 2950 | int cgroup_rm_cftypes(struct cftype *cfts) |
2932 | { | 2951 | { |
2933 | struct cftype_set *set; | 2952 | struct cftype_set *set; |
2934 | 2953 | ||
2954 | if (!cfts || !cfts[0].ss) | ||
2955 | return -ENOENT; | ||
2956 | |||
2935 | cgroup_cfts_prepare(); | 2957 | cgroup_cfts_prepare(); |
2936 | 2958 | ||
2937 | list_for_each_entry(set, &ss->cftsets, node) { | 2959 | list_for_each_entry(set, &cfts[0].ss->cftsets, node) { |
2938 | if (set->cfts == cfts) { | 2960 | if (set->cfts == cfts) { |
2939 | list_del(&set->node); | 2961 | list_del(&set->node); |
2940 | kfree(set); | 2962 | kfree(set); |
2941 | cgroup_cfts_commit(ss, cfts, false); | 2963 | cgroup_cfts_commit(cfts, false); |
2942 | return 0; | 2964 | return 0; |
2943 | } | 2965 | } |
2944 | } | 2966 | } |
2945 | 2967 | ||
2946 | cgroup_cfts_commit(ss, NULL, false); | 2968 | cgroup_cfts_commit(NULL, false); |
2947 | return -ENOENT; | 2969 | return -ENOENT; |
2948 | } | 2970 | } |
2949 | 2971 | ||
@@ -2966,34 +2988,10 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
2966 | } | 2988 | } |
2967 | 2989 | ||
2968 | /* | 2990 | /* |
2969 | * Advance a list_head iterator. The iterator should be positioned at | 2991 | * To reduce the fork() overhead for systems that are not actually using |
2970 | * the start of a css_set | 2992 | * their cgroups capability, we don't maintain the lists running through |
2971 | */ | 2993 | * each css_set to its tasks until we see the list actually used - in other |
2972 | static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it) | 2994 | * words after the first call to css_task_iter_start(). |
2973 | { | ||
2974 | struct list_head *l = it->cset_link; | ||
2975 | struct cgrp_cset_link *link; | ||
2976 | struct css_set *cset; | ||
2977 | |||
2978 | /* Advance to the next non-empty css_set */ | ||
2979 | do { | ||
2980 | l = l->next; | ||
2981 | if (l == &cgrp->cset_links) { | ||
2982 | it->cset_link = NULL; | ||
2983 | return; | ||
2984 | } | ||
2985 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
2986 | cset = link->cset; | ||
2987 | } while (list_empty(&cset->tasks)); | ||
2988 | it->cset_link = l; | ||
2989 | it->task = cset->tasks.next; | ||
2990 | } | ||
2991 | |||
2992 | /* | ||
2993 | * To reduce the fork() overhead for systems that are not actually | ||
2994 | * using their cgroups capability, we don't maintain the lists running | ||
2995 | * through each css_set to its tasks until we see the list actually | ||
2996 | * used - in other words after the first call to cgroup_iter_start(). | ||
2997 | */ | 2995 | */ |
2998 | static void cgroup_enable_task_cg_lists(void) | 2996 | static void cgroup_enable_task_cg_lists(void) |
2999 | { | 2997 | { |
@@ -3024,16 +3022,21 @@ static void cgroup_enable_task_cg_lists(void) | |||
3024 | } | 3022 | } |
3025 | 3023 | ||
3026 | /** | 3024 | /** |
3027 | * cgroup_next_sibling - find the next sibling of a given cgroup | 3025 | * css_next_child - find the next child of a given css |
3028 | * @pos: the current cgroup | 3026 | * @pos_css: the current position (%NULL to initiate traversal) |
3027 | * @parent_css: css whose children to walk | ||
3029 | * | 3028 | * |
3030 | * This function returns the next sibling of @pos and should be called | 3029 | * This function returns the next child of @parent_css and should be called |
3031 | * under RCU read lock. The only requirement is that @pos is accessible. | 3030 | * under RCU read lock. The only requirement is that @parent_css and |
3032 | * The next sibling is guaranteed to be returned regardless of @pos's | 3031 | * @pos_css are accessible. The next sibling is guaranteed to be returned |
3033 | * state. | 3032 | * regardless of their states. |
3034 | */ | 3033 | */ |
3035 | struct cgroup *cgroup_next_sibling(struct cgroup *pos) | 3034 | struct cgroup_subsys_state * |
3035 | css_next_child(struct cgroup_subsys_state *pos_css, | ||
3036 | struct cgroup_subsys_state *parent_css) | ||
3036 | { | 3037 | { |
3038 | struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; | ||
3039 | struct cgroup *cgrp = parent_css->cgroup; | ||
3037 | struct cgroup *next; | 3040 | struct cgroup *next; |
3038 | 3041 | ||
3039 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3042 | WARN_ON_ONCE(!rcu_read_lock_held()); |
@@ -3048,78 +3051,81 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos) | |||
3048 | * safe to dereference from this RCU critical section. If | 3051 | * safe to dereference from this RCU critical section. If |
3049 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed | 3052 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed |
3050 | * to be visible as %true here. | 3053 | * to be visible as %true here. |
3054 | * | ||
3055 | * If @pos is dead, its next pointer can't be dereferenced; | ||
3056 | * however, as each cgroup is given a monotonically increasing | ||
3057 | * unique serial number and always appended to the sibling list, | ||
3058 | * the next one can be found by walking the parent's children until | ||
3059 | * we see a cgroup with higher serial number than @pos's. While | ||
3060 | * this path can be slower, it's taken only when either the current | ||
3061 | * cgroup is removed or iteration and removal race. | ||
3051 | */ | 3062 | */ |
3052 | if (likely(!cgroup_is_dead(pos))) { | 3063 | if (!pos) { |
3064 | next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling); | ||
3065 | } else if (likely(!cgroup_is_dead(pos))) { | ||
3053 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); | 3066 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); |
3054 | if (&next->sibling != &pos->parent->children) | 3067 | } else { |
3055 | return next; | 3068 | list_for_each_entry_rcu(next, &cgrp->children, sibling) |
3056 | return NULL; | 3069 | if (next->serial_nr > pos->serial_nr) |
3070 | break; | ||
3057 | } | 3071 | } |
3058 | 3072 | ||
3059 | /* | 3073 | if (&next->sibling == &cgrp->children) |
3060 | * Can't dereference the next pointer. Each cgroup is given a | 3074 | return NULL; |
3061 | * monotonically increasing unique serial number and always | 3075 | |
3062 | * appended to the sibling list, so the next one can be found by | 3076 | return cgroup_css(next, parent_css->ss); |
3063 | * walking the parent's children until we see a cgroup with higher | ||
3064 | * serial number than @pos's. | ||
3065 | * | ||
3066 | * While this path can be slow, it's taken only when either the | ||
3067 | * current cgroup is removed or iteration and removal race. | ||
3068 | */ | ||
3069 | list_for_each_entry_rcu(next, &pos->parent->children, sibling) | ||
3070 | if (next->serial_nr > pos->serial_nr) | ||
3071 | return next; | ||
3072 | return NULL; | ||
3073 | } | 3077 | } |
3074 | EXPORT_SYMBOL_GPL(cgroup_next_sibling); | 3078 | EXPORT_SYMBOL_GPL(css_next_child); |
3075 | 3079 | ||
3076 | /** | 3080 | /** |
3077 | * cgroup_next_descendant_pre - find the next descendant for pre-order walk | 3081 | * css_next_descendant_pre - find the next descendant for pre-order walk |
3078 | * @pos: the current position (%NULL to initiate traversal) | 3082 | * @pos: the current position (%NULL to initiate traversal) |
3079 | * @cgroup: cgroup whose descendants to walk | 3083 | * @root: css whose descendants to walk |
3080 | * | 3084 | * |
3081 | * To be used by cgroup_for_each_descendant_pre(). Find the next | 3085 | * To be used by css_for_each_descendant_pre(). Find the next descendant |
3082 | * descendant to visit for pre-order traversal of @cgroup's descendants. | 3086 | * to visit for pre-order traversal of @root's descendants. @root is |
3087 | * included in the iteration and the first node to be visited. | ||
3083 | * | 3088 | * |
3084 | * While this function requires RCU read locking, it doesn't require the | 3089 | * While this function requires RCU read locking, it doesn't require the |
3085 | * whole traversal to be contained in a single RCU critical section. This | 3090 | * whole traversal to be contained in a single RCU critical section. This |
3086 | * function will return the correct next descendant as long as both @pos | 3091 | * function will return the correct next descendant as long as both @pos |
3087 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3092 | * and @root are accessible and @pos is a descendant of @root. |
3088 | */ | 3093 | */ |
3089 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | 3094 | struct cgroup_subsys_state * |
3090 | struct cgroup *cgroup) | 3095 | css_next_descendant_pre(struct cgroup_subsys_state *pos, |
3096 | struct cgroup_subsys_state *root) | ||
3091 | { | 3097 | { |
3092 | struct cgroup *next; | 3098 | struct cgroup_subsys_state *next; |
3093 | 3099 | ||
3094 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3100 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3095 | 3101 | ||
3096 | /* if first iteration, pretend we just visited @cgroup */ | 3102 | /* if first iteration, visit @root */ |
3097 | if (!pos) | 3103 | if (!pos) |
3098 | pos = cgroup; | 3104 | return root; |
3099 | 3105 | ||
3100 | /* visit the first child if exists */ | 3106 | /* visit the first child if exists */ |
3101 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); | 3107 | next = css_next_child(NULL, pos); |
3102 | if (next) | 3108 | if (next) |
3103 | return next; | 3109 | return next; |
3104 | 3110 | ||
3105 | /* no child, visit my or the closest ancestor's next sibling */ | 3111 | /* no child, visit my or the closest ancestor's next sibling */ |
3106 | while (pos != cgroup) { | 3112 | while (pos != root) { |
3107 | next = cgroup_next_sibling(pos); | 3113 | next = css_next_child(pos, css_parent(pos)); |
3108 | if (next) | 3114 | if (next) |
3109 | return next; | 3115 | return next; |
3110 | pos = pos->parent; | 3116 | pos = css_parent(pos); |
3111 | } | 3117 | } |
3112 | 3118 | ||
3113 | return NULL; | 3119 | return NULL; |
3114 | } | 3120 | } |
3115 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3121 | EXPORT_SYMBOL_GPL(css_next_descendant_pre); |
3116 | 3122 | ||
3117 | /** | 3123 | /** |
3118 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | 3124 | * css_rightmost_descendant - return the rightmost descendant of a css |
3119 | * @pos: cgroup of interest | 3125 | * @pos: css of interest |
3120 | * | 3126 | * |
3121 | * Return the rightmost descendant of @pos. If there's no descendant, | 3127 | * Return the rightmost descendant of @pos. If there's no descendant, @pos |
3122 | * @pos is returned. This can be used during pre-order traversal to skip | 3128 | * is returned. This can be used during pre-order traversal to skip |
3123 | * subtree of @pos. | 3129 | * subtree of @pos. |
3124 | * | 3130 | * |
3125 | * While this function requires RCU read locking, it doesn't require the | 3131 | * While this function requires RCU read locking, it doesn't require the |
@@ -3127,9 +3133,10 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | |||
3127 | * function will return the correct rightmost descendant as long as @pos is | 3133 | * function will return the correct rightmost descendant as long as @pos is |
3128 | * accessible. | 3134 | * accessible. |
3129 | */ | 3135 | */ |
3130 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | 3136 | struct cgroup_subsys_state * |
3137 | css_rightmost_descendant(struct cgroup_subsys_state *pos) | ||
3131 | { | 3138 | { |
3132 | struct cgroup *last, *tmp; | 3139 | struct cgroup_subsys_state *last, *tmp; |
3133 | 3140 | ||
3134 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3141 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3135 | 3142 | ||
@@ -3137,82 +3144,138 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | |||
3137 | last = pos; | 3144 | last = pos; |
3138 | /* ->prev isn't RCU safe, walk ->next till the end */ | 3145 | /* ->prev isn't RCU safe, walk ->next till the end */ |
3139 | pos = NULL; | 3146 | pos = NULL; |
3140 | list_for_each_entry_rcu(tmp, &last->children, sibling) | 3147 | css_for_each_child(tmp, last) |
3141 | pos = tmp; | 3148 | pos = tmp; |
3142 | } while (pos); | 3149 | } while (pos); |
3143 | 3150 | ||
3144 | return last; | 3151 | return last; |
3145 | } | 3152 | } |
3146 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | 3153 | EXPORT_SYMBOL_GPL(css_rightmost_descendant); |
3147 | 3154 | ||
3148 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3155 | static struct cgroup_subsys_state * |
3156 | css_leftmost_descendant(struct cgroup_subsys_state *pos) | ||
3149 | { | 3157 | { |
3150 | struct cgroup *last; | 3158 | struct cgroup_subsys_state *last; |
3151 | 3159 | ||
3152 | do { | 3160 | do { |
3153 | last = pos; | 3161 | last = pos; |
3154 | pos = list_first_or_null_rcu(&pos->children, struct cgroup, | 3162 | pos = css_next_child(NULL, pos); |
3155 | sibling); | ||
3156 | } while (pos); | 3163 | } while (pos); |
3157 | 3164 | ||
3158 | return last; | 3165 | return last; |
3159 | } | 3166 | } |
3160 | 3167 | ||
3161 | /** | 3168 | /** |
3162 | * cgroup_next_descendant_post - find the next descendant for post-order walk | 3169 | * css_next_descendant_post - find the next descendant for post-order walk |
3163 | * @pos: the current position (%NULL to initiate traversal) | 3170 | * @pos: the current position (%NULL to initiate traversal) |
3164 | * @cgroup: cgroup whose descendants to walk | 3171 | * @root: css whose descendants to walk |
3165 | * | 3172 | * |
3166 | * To be used by cgroup_for_each_descendant_post(). Find the next | 3173 | * To be used by css_for_each_descendant_post(). Find the next descendant |
3167 | * descendant to visit for post-order traversal of @cgroup's descendants. | 3174 | * to visit for post-order traversal of @root's descendants. @root is |
3175 | * included in the iteration and the last node to be visited. | ||
3168 | * | 3176 | * |
3169 | * While this function requires RCU read locking, it doesn't require the | 3177 | * While this function requires RCU read locking, it doesn't require the |
3170 | * whole traversal to be contained in a single RCU critical section. This | 3178 | * whole traversal to be contained in a single RCU critical section. This |
3171 | * function will return the correct next descendant as long as both @pos | 3179 | * function will return the correct next descendant as long as both @pos |
3172 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | 3180 | * and @cgroup are accessible and @pos is a descendant of @cgroup. |
3173 | */ | 3181 | */ |
3174 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | 3182 | struct cgroup_subsys_state * |
3175 | struct cgroup *cgroup) | 3183 | css_next_descendant_post(struct cgroup_subsys_state *pos, |
3184 | struct cgroup_subsys_state *root) | ||
3176 | { | 3185 | { |
3177 | struct cgroup *next; | 3186 | struct cgroup_subsys_state *next; |
3178 | 3187 | ||
3179 | WARN_ON_ONCE(!rcu_read_lock_held()); | 3188 | WARN_ON_ONCE(!rcu_read_lock_held()); |
3180 | 3189 | ||
3181 | /* if first iteration, visit the leftmost descendant */ | 3190 | /* if first iteration, visit the leftmost descendant */ |
3182 | if (!pos) { | 3191 | if (!pos) { |
3183 | next = cgroup_leftmost_descendant(cgroup); | 3192 | next = css_leftmost_descendant(root); |
3184 | return next != cgroup ? next : NULL; | 3193 | return next != root ? next : NULL; |
3185 | } | 3194 | } |
3186 | 3195 | ||
3196 | /* if we visited @root, we're done */ | ||
3197 | if (pos == root) | ||
3198 | return NULL; | ||
3199 | |||
3187 | /* if there's an unvisited sibling, visit its leftmost descendant */ | 3200 | /* if there's an unvisited sibling, visit its leftmost descendant */ |
3188 | next = cgroup_next_sibling(pos); | 3201 | next = css_next_child(pos, css_parent(pos)); |
3189 | if (next) | 3202 | if (next) |
3190 | return cgroup_leftmost_descendant(next); | 3203 | return css_leftmost_descendant(next); |
3191 | 3204 | ||
3192 | /* no sibling left, visit parent */ | 3205 | /* no sibling left, visit parent */ |
3193 | next = pos->parent; | 3206 | return css_parent(pos); |
3194 | return next != cgroup ? next : NULL; | ||
3195 | } | 3207 | } |
3196 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_post); | 3208 | EXPORT_SYMBOL_GPL(css_next_descendant_post); |
3197 | 3209 | ||
3198 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | 3210 | /** |
3211 | * css_advance_task_iter - advance a task itererator to the next css_set | ||
3212 | * @it: the iterator to advance | ||
3213 | * | ||
3214 | * Advance @it to the next css_set to walk. | ||
3215 | */ | ||
3216 | static void css_advance_task_iter(struct css_task_iter *it) | ||
3217 | { | ||
3218 | struct list_head *l = it->cset_link; | ||
3219 | struct cgrp_cset_link *link; | ||
3220 | struct css_set *cset; | ||
3221 | |||
3222 | /* Advance to the next non-empty css_set */ | ||
3223 | do { | ||
3224 | l = l->next; | ||
3225 | if (l == &it->origin_css->cgroup->cset_links) { | ||
3226 | it->cset_link = NULL; | ||
3227 | return; | ||
3228 | } | ||
3229 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
3230 | cset = link->cset; | ||
3231 | } while (list_empty(&cset->tasks)); | ||
3232 | it->cset_link = l; | ||
3233 | it->task = cset->tasks.next; | ||
3234 | } | ||
3235 | |||
3236 | /** | ||
3237 | * css_task_iter_start - initiate task iteration | ||
3238 | * @css: the css to walk tasks of | ||
3239 | * @it: the task iterator to use | ||
3240 | * | ||
3241 | * Initiate iteration through the tasks of @css. The caller can call | ||
3242 | * css_task_iter_next() to walk through the tasks until the function | ||
3243 | * returns NULL. On completion of iteration, css_task_iter_end() must be | ||
3244 | * called. | ||
3245 | * | ||
3246 | * Note that this function acquires a lock which is released when the | ||
3247 | * iteration finishes. The caller can't sleep while iteration is in | ||
3248 | * progress. | ||
3249 | */ | ||
3250 | void css_task_iter_start(struct cgroup_subsys_state *css, | ||
3251 | struct css_task_iter *it) | ||
3199 | __acquires(css_set_lock) | 3252 | __acquires(css_set_lock) |
3200 | { | 3253 | { |
3201 | /* | 3254 | /* |
3202 | * The first time anyone tries to iterate across a cgroup, | 3255 | * The first time anyone tries to iterate across a css, we need to |
3203 | * we need to enable the list linking each css_set to its | 3256 | * enable the list linking each css_set to its tasks, and fix up |
3204 | * tasks, and fix up all existing tasks. | 3257 | * all existing tasks. |
3205 | */ | 3258 | */ |
3206 | if (!use_task_css_set_links) | 3259 | if (!use_task_css_set_links) |
3207 | cgroup_enable_task_cg_lists(); | 3260 | cgroup_enable_task_cg_lists(); |
3208 | 3261 | ||
3209 | read_lock(&css_set_lock); | 3262 | read_lock(&css_set_lock); |
3210 | it->cset_link = &cgrp->cset_links; | 3263 | |
3211 | cgroup_advance_iter(cgrp, it); | 3264 | it->origin_css = css; |
3265 | it->cset_link = &css->cgroup->cset_links; | ||
3266 | |||
3267 | css_advance_task_iter(it); | ||
3212 | } | 3268 | } |
3213 | 3269 | ||
3214 | struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | 3270 | /** |
3215 | struct cgroup_iter *it) | 3271 | * css_task_iter_next - return the next task for the iterator |
3272 | * @it: the task iterator being iterated | ||
3273 | * | ||
3274 | * The "next" function for task iteration. @it should have been | ||
3275 | * initialized via css_task_iter_start(). Returns NULL when the iteration | ||
3276 | * reaches the end. | ||
3277 | */ | ||
3278 | struct task_struct *css_task_iter_next(struct css_task_iter *it) | ||
3216 | { | 3279 | { |
3217 | struct task_struct *res; | 3280 | struct task_struct *res; |
3218 | struct list_head *l = it->task; | 3281 | struct list_head *l = it->task; |
@@ -3226,16 +3289,24 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
3226 | l = l->next; | 3289 | l = l->next; |
3227 | link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); | 3290 | link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); |
3228 | if (l == &link->cset->tasks) { | 3291 | if (l == &link->cset->tasks) { |
3229 | /* We reached the end of this task list - move on to | 3292 | /* |
3230 | * the next cg_cgroup_link */ | 3293 | * We reached the end of this task list - move on to the |
3231 | cgroup_advance_iter(cgrp, it); | 3294 | * next cgrp_cset_link. |
3295 | */ | ||
3296 | css_advance_task_iter(it); | ||
3232 | } else { | 3297 | } else { |
3233 | it->task = l; | 3298 | it->task = l; |
3234 | } | 3299 | } |
3235 | return res; | 3300 | return res; |
3236 | } | 3301 | } |
3237 | 3302 | ||
3238 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) | 3303 | /** |
3304 | * css_task_iter_end - finish task iteration | ||
3305 | * @it: the task iterator to finish | ||
3306 | * | ||
3307 | * Finish task iteration started by css_task_iter_start(). | ||
3308 | */ | ||
3309 | void css_task_iter_end(struct css_task_iter *it) | ||
3239 | __releases(css_set_lock) | 3310 | __releases(css_set_lock) |
3240 | { | 3311 | { |
3241 | read_unlock(&css_set_lock); | 3312 | read_unlock(&css_set_lock); |
@@ -3276,46 +3347,49 @@ static inline int started_after(void *p1, void *p2) | |||
3276 | } | 3347 | } |
3277 | 3348 | ||
3278 | /** | 3349 | /** |
3279 | * cgroup_scan_tasks - iterate though all the tasks in a cgroup | 3350 | * css_scan_tasks - iterate though all the tasks in a css |
3280 | * @scan: struct cgroup_scanner containing arguments for the scan | 3351 | * @css: the css to iterate tasks of |
3352 | * @test: optional test callback | ||
3353 | * @process: process callback | ||
3354 | * @data: data passed to @test and @process | ||
3355 | * @heap: optional pre-allocated heap used for task iteration | ||
3356 | * | ||
3357 | * Iterate through all the tasks in @css, calling @test for each, and if it | ||
3358 | * returns %true, call @process for it also. | ||
3359 | * | ||
3360 | * @test may be NULL, meaning always true (select all tasks), which | ||
3361 | * effectively duplicates css_task_iter_{start,next,end}() but does not | ||
3362 | * lock css_set_lock for the call to @process. | ||
3363 | * | ||
3364 | * It is guaranteed that @process will act on every task that is a member | ||
3365 | * of @css for the duration of this call. This function may or may not | ||
3366 | * call @process for tasks that exit or move to a different css during the | ||
3367 | * call, or are forked or move into the css during the call. | ||
3281 | * | 3368 | * |
3282 | * Arguments include pointers to callback functions test_task() and | 3369 | * Note that @test may be called with locks held, and may in some |
3283 | * process_task(). | 3370 | * situations be called multiple times for the same task, so it should be |
3284 | * Iterate through all the tasks in a cgroup, calling test_task() for each, | 3371 | * cheap. |
3285 | * and if it returns true, call process_task() for it also. | ||
3286 | * The test_task pointer may be NULL, meaning always true (select all tasks). | ||
3287 | * Effectively duplicates cgroup_iter_{start,next,end}() | ||
3288 | * but does not lock css_set_lock for the call to process_task(). | ||
3289 | * The struct cgroup_scanner may be embedded in any structure of the caller's | ||
3290 | * creation. | ||
3291 | * It is guaranteed that process_task() will act on every task that | ||
3292 | * is a member of the cgroup for the duration of this call. This | ||
3293 | * function may or may not call process_task() for tasks that exit | ||
3294 | * or move to a different cgroup during the call, or are forked or | ||
3295 | * move into the cgroup during the call. | ||
3296 | * | 3372 | * |
3297 | * Note that test_task() may be called with locks held, and may in some | 3373 | * If @heap is non-NULL, a heap has been pre-allocated and will be used for |
3298 | * situations be called multiple times for the same task, so it should | 3374 | * heap operations (and its "gt" member will be overwritten), else a |
3299 | * be cheap. | 3375 | * temporary heap will be used (allocation of which may cause this function |
3300 | * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been | 3376 | * to fail). |
3301 | * pre-allocated and will be used for heap operations (and its "gt" member will | ||
3302 | * be overwritten), else a temporary heap will be used (allocation of which | ||
3303 | * may cause this function to fail). | ||
3304 | */ | 3377 | */ |
3305 | int cgroup_scan_tasks(struct cgroup_scanner *scan) | 3378 | int css_scan_tasks(struct cgroup_subsys_state *css, |
3379 | bool (*test)(struct task_struct *, void *), | ||
3380 | void (*process)(struct task_struct *, void *), | ||
3381 | void *data, struct ptr_heap *heap) | ||
3306 | { | 3382 | { |
3307 | int retval, i; | 3383 | int retval, i; |
3308 | struct cgroup_iter it; | 3384 | struct css_task_iter it; |
3309 | struct task_struct *p, *dropped; | 3385 | struct task_struct *p, *dropped; |
3310 | /* Never dereference latest_task, since it's not refcounted */ | 3386 | /* Never dereference latest_task, since it's not refcounted */ |
3311 | struct task_struct *latest_task = NULL; | 3387 | struct task_struct *latest_task = NULL; |
3312 | struct ptr_heap tmp_heap; | 3388 | struct ptr_heap tmp_heap; |
3313 | struct ptr_heap *heap; | ||
3314 | struct timespec latest_time = { 0, 0 }; | 3389 | struct timespec latest_time = { 0, 0 }; |
3315 | 3390 | ||
3316 | if (scan->heap) { | 3391 | if (heap) { |
3317 | /* The caller supplied our heap and pre-allocated its memory */ | 3392 | /* The caller supplied our heap and pre-allocated its memory */ |
3318 | heap = scan->heap; | ||
3319 | heap->gt = &started_after; | 3393 | heap->gt = &started_after; |
3320 | } else { | 3394 | } else { |
3321 | /* We need to allocate our own heap memory */ | 3395 | /* We need to allocate our own heap memory */ |
@@ -3328,25 +3402,24 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3328 | 3402 | ||
3329 | again: | 3403 | again: |
3330 | /* | 3404 | /* |
3331 | * Scan tasks in the cgroup, using the scanner's "test_task" callback | 3405 | * Scan tasks in the css, using the @test callback to determine |
3332 | * to determine which are of interest, and using the scanner's | 3406 | * which are of interest, and invoking @process callback on the |
3333 | * "process_task" callback to process any of them that need an update. | 3407 | * ones which need an update. Since we don't want to hold any |
3334 | * Since we don't want to hold any locks during the task updates, | 3408 | * locks during the task updates, gather tasks to be processed in a |
3335 | * gather tasks to be processed in a heap structure. | 3409 | * heap structure. The heap is sorted by descending task start |
3336 | * The heap is sorted by descending task start time. | 3410 | * time. If the statically-sized heap fills up, we overflow tasks |
3337 | * If the statically-sized heap fills up, we overflow tasks that | 3411 | * that started later, and in future iterations only consider tasks |
3338 | * started later, and in future iterations only consider tasks that | 3412 | * that started after the latest task in the previous pass. This |
3339 | * started after the latest task in the previous pass. This | ||
3340 | * guarantees forward progress and that we don't miss any tasks. | 3413 | * guarantees forward progress and that we don't miss any tasks. |
3341 | */ | 3414 | */ |
3342 | heap->size = 0; | 3415 | heap->size = 0; |
3343 | cgroup_iter_start(scan->cg, &it); | 3416 | css_task_iter_start(css, &it); |
3344 | while ((p = cgroup_iter_next(scan->cg, &it))) { | 3417 | while ((p = css_task_iter_next(&it))) { |
3345 | /* | 3418 | /* |
3346 | * Only affect tasks that qualify per the caller's callback, | 3419 | * Only affect tasks that qualify per the caller's callback, |
3347 | * if he provided one | 3420 | * if he provided one |
3348 | */ | 3421 | */ |
3349 | if (scan->test_task && !scan->test_task(p, scan)) | 3422 | if (test && !test(p, data)) |
3350 | continue; | 3423 | continue; |
3351 | /* | 3424 | /* |
3352 | * Only process tasks that started after the last task | 3425 | * Only process tasks that started after the last task |
@@ -3374,7 +3447,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3374 | * the heap and wasn't inserted | 3447 | * the heap and wasn't inserted |
3375 | */ | 3448 | */ |
3376 | } | 3449 | } |
3377 | cgroup_iter_end(scan->cg, &it); | 3450 | css_task_iter_end(&it); |
3378 | 3451 | ||
3379 | if (heap->size) { | 3452 | if (heap->size) { |
3380 | for (i = 0; i < heap->size; i++) { | 3453 | for (i = 0; i < heap->size; i++) { |
@@ -3384,7 +3457,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3384 | latest_task = q; | 3457 | latest_task = q; |
3385 | } | 3458 | } |
3386 | /* Process the task per the caller's callback */ | 3459 | /* Process the task per the caller's callback */ |
3387 | scan->process_task(q, scan); | 3460 | process(q, data); |
3388 | put_task_struct(q); | 3461 | put_task_struct(q); |
3389 | } | 3462 | } |
3390 | /* | 3463 | /* |
@@ -3401,10 +3474,9 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3401 | return 0; | 3474 | return 0; |
3402 | } | 3475 | } |
3403 | 3476 | ||
3404 | static void cgroup_transfer_one_task(struct task_struct *task, | 3477 | static void cgroup_transfer_one_task(struct task_struct *task, void *data) |
3405 | struct cgroup_scanner *scan) | ||
3406 | { | 3478 | { |
3407 | struct cgroup *new_cgroup = scan->data; | 3479 | struct cgroup *new_cgroup = data; |
3408 | 3480 | ||
3409 | mutex_lock(&cgroup_mutex); | 3481 | mutex_lock(&cgroup_mutex); |
3410 | cgroup_attach_task(new_cgroup, task, false); | 3482 | cgroup_attach_task(new_cgroup, task, false); |
@@ -3418,15 +3490,8 @@ static void cgroup_transfer_one_task(struct task_struct *task, | |||
3418 | */ | 3490 | */ |
3419 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | 3491 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) |
3420 | { | 3492 | { |
3421 | struct cgroup_scanner scan; | 3493 | return css_scan_tasks(&from->dummy_css, NULL, cgroup_transfer_one_task, |
3422 | 3494 | to, NULL); | |
3423 | scan.cg = from; | ||
3424 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
3425 | scan.process_task = cgroup_transfer_one_task; | ||
3426 | scan.heap = NULL; | ||
3427 | scan.data = to; | ||
3428 | |||
3429 | return cgroup_scan_tasks(&scan); | ||
3430 | } | 3495 | } |
3431 | 3496 | ||
3432 | /* | 3497 | /* |
@@ -3468,7 +3533,7 @@ struct cgroup_pidlist { | |||
3468 | /* pointer to the cgroup we belong to, for list removal purposes */ | 3533 | /* pointer to the cgroup we belong to, for list removal purposes */ |
3469 | struct cgroup *owner; | 3534 | struct cgroup *owner; |
3470 | /* protects the other fields */ | 3535 | /* protects the other fields */ |
3471 | struct rw_semaphore mutex; | 3536 | struct rw_semaphore rwsem; |
3472 | }; | 3537 | }; |
3473 | 3538 | ||
3474 | /* | 3539 | /* |
@@ -3541,7 +3606,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3541 | struct pid_namespace *ns = task_active_pid_ns(current); | 3606 | struct pid_namespace *ns = task_active_pid_ns(current); |
3542 | 3607 | ||
3543 | /* | 3608 | /* |
3544 | * We can't drop the pidlist_mutex before taking the l->mutex in case | 3609 | * We can't drop the pidlist_mutex before taking the l->rwsem in case |
3545 | * the last ref-holder is trying to remove l from the list at the same | 3610 | * the last ref-holder is trying to remove l from the list at the same |
3546 | * time. Holding the pidlist_mutex precludes somebody taking whichever | 3611 | * time. Holding the pidlist_mutex precludes somebody taking whichever |
3547 | * list we find out from under us - compare release_pid_array(). | 3612 | * list we find out from under us - compare release_pid_array(). |
@@ -3550,7 +3615,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3550 | list_for_each_entry(l, &cgrp->pidlists, links) { | 3615 | list_for_each_entry(l, &cgrp->pidlists, links) { |
3551 | if (l->key.type == type && l->key.ns == ns) { | 3616 | if (l->key.type == type && l->key.ns == ns) { |
3552 | /* make sure l doesn't vanish out from under us */ | 3617 | /* make sure l doesn't vanish out from under us */ |
3553 | down_write(&l->mutex); | 3618 | down_write(&l->rwsem); |
3554 | mutex_unlock(&cgrp->pidlist_mutex); | 3619 | mutex_unlock(&cgrp->pidlist_mutex); |
3555 | return l; | 3620 | return l; |
3556 | } | 3621 | } |
@@ -3561,8 +3626,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3561 | mutex_unlock(&cgrp->pidlist_mutex); | 3626 | mutex_unlock(&cgrp->pidlist_mutex); |
3562 | return l; | 3627 | return l; |
3563 | } | 3628 | } |
3564 | init_rwsem(&l->mutex); | 3629 | init_rwsem(&l->rwsem); |
3565 | down_write(&l->mutex); | 3630 | down_write(&l->rwsem); |
3566 | l->key.type = type; | 3631 | l->key.type = type; |
3567 | l->key.ns = get_pid_ns(ns); | 3632 | l->key.ns = get_pid_ns(ns); |
3568 | l->owner = cgrp; | 3633 | l->owner = cgrp; |
@@ -3580,7 +3645,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3580 | pid_t *array; | 3645 | pid_t *array; |
3581 | int length; | 3646 | int length; |
3582 | int pid, n = 0; /* used for populating the array */ | 3647 | int pid, n = 0; /* used for populating the array */ |
3583 | struct cgroup_iter it; | 3648 | struct css_task_iter it; |
3584 | struct task_struct *tsk; | 3649 | struct task_struct *tsk; |
3585 | struct cgroup_pidlist *l; | 3650 | struct cgroup_pidlist *l; |
3586 | 3651 | ||
@@ -3595,8 +3660,8 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3595 | if (!array) | 3660 | if (!array) |
3596 | return -ENOMEM; | 3661 | return -ENOMEM; |
3597 | /* now, populate the array */ | 3662 | /* now, populate the array */ |
3598 | cgroup_iter_start(cgrp, &it); | 3663 | css_task_iter_start(&cgrp->dummy_css, &it); |
3599 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 3664 | while ((tsk = css_task_iter_next(&it))) { |
3600 | if (unlikely(n == length)) | 3665 | if (unlikely(n == length)) |
3601 | break; | 3666 | break; |
3602 | /* get tgid or pid for procs or tasks file respectively */ | 3667 | /* get tgid or pid for procs or tasks file respectively */ |
@@ -3607,7 +3672,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3607 | if (pid > 0) /* make sure to only use valid results */ | 3672 | if (pid > 0) /* make sure to only use valid results */ |
3608 | array[n++] = pid; | 3673 | array[n++] = pid; |
3609 | } | 3674 | } |
3610 | cgroup_iter_end(cgrp, &it); | 3675 | css_task_iter_end(&it); |
3611 | length = n; | 3676 | length = n; |
3612 | /* now sort & (if procs) strip out duplicates */ | 3677 | /* now sort & (if procs) strip out duplicates */ |
3613 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3678 | sort(array, length, sizeof(pid_t), cmppid, NULL); |
@@ -3623,7 +3688,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3623 | l->list = array; | 3688 | l->list = array; |
3624 | l->length = length; | 3689 | l->length = length; |
3625 | l->use_count++; | 3690 | l->use_count++; |
3626 | up_write(&l->mutex); | 3691 | up_write(&l->rwsem); |
3627 | *lp = l; | 3692 | *lp = l; |
3628 | return 0; | 3693 | return 0; |
3629 | } | 3694 | } |
@@ -3641,7 +3706,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3641 | { | 3706 | { |
3642 | int ret = -EINVAL; | 3707 | int ret = -EINVAL; |
3643 | struct cgroup *cgrp; | 3708 | struct cgroup *cgrp; |
3644 | struct cgroup_iter it; | 3709 | struct css_task_iter it; |
3645 | struct task_struct *tsk; | 3710 | struct task_struct *tsk; |
3646 | 3711 | ||
3647 | /* | 3712 | /* |
@@ -3655,8 +3720,8 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3655 | ret = 0; | 3720 | ret = 0; |
3656 | cgrp = dentry->d_fsdata; | 3721 | cgrp = dentry->d_fsdata; |
3657 | 3722 | ||
3658 | cgroup_iter_start(cgrp, &it); | 3723 | css_task_iter_start(&cgrp->dummy_css, &it); |
3659 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 3724 | while ((tsk = css_task_iter_next(&it))) { |
3660 | switch (tsk->state) { | 3725 | switch (tsk->state) { |
3661 | case TASK_RUNNING: | 3726 | case TASK_RUNNING: |
3662 | stats->nr_running++; | 3727 | stats->nr_running++; |
@@ -3676,7 +3741,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
3676 | break; | 3741 | break; |
3677 | } | 3742 | } |
3678 | } | 3743 | } |
3679 | cgroup_iter_end(cgrp, &it); | 3744 | css_task_iter_end(&it); |
3680 | 3745 | ||
3681 | err: | 3746 | err: |
3682 | return ret; | 3747 | return ret; |
@@ -3701,7 +3766,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3701 | int index = 0, pid = *pos; | 3766 | int index = 0, pid = *pos; |
3702 | int *iter; | 3767 | int *iter; |
3703 | 3768 | ||
3704 | down_read(&l->mutex); | 3769 | down_read(&l->rwsem); |
3705 | if (pid) { | 3770 | if (pid) { |
3706 | int end = l->length; | 3771 | int end = l->length; |
3707 | 3772 | ||
@@ -3728,7 +3793,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) | |||
3728 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) | 3793 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
3729 | { | 3794 | { |
3730 | struct cgroup_pidlist *l = s->private; | 3795 | struct cgroup_pidlist *l = s->private; |
3731 | up_read(&l->mutex); | 3796 | up_read(&l->rwsem); |
3732 | } | 3797 | } |
3733 | 3798 | ||
3734 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) | 3799 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
@@ -3774,7 +3839,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) | |||
3774 | * pidlist_mutex, we have to take pidlist_mutex first. | 3839 | * pidlist_mutex, we have to take pidlist_mutex first. |
3775 | */ | 3840 | */ |
3776 | mutex_lock(&l->owner->pidlist_mutex); | 3841 | mutex_lock(&l->owner->pidlist_mutex); |
3777 | down_write(&l->mutex); | 3842 | down_write(&l->rwsem); |
3778 | BUG_ON(!l->use_count); | 3843 | BUG_ON(!l->use_count); |
3779 | if (!--l->use_count) { | 3844 | if (!--l->use_count) { |
3780 | /* we're the last user if refcount is 0; remove and free */ | 3845 | /* we're the last user if refcount is 0; remove and free */ |
@@ -3782,12 +3847,12 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) | |||
3782 | mutex_unlock(&l->owner->pidlist_mutex); | 3847 | mutex_unlock(&l->owner->pidlist_mutex); |
3783 | pidlist_free(l->list); | 3848 | pidlist_free(l->list); |
3784 | put_pid_ns(l->key.ns); | 3849 | put_pid_ns(l->key.ns); |
3785 | up_write(&l->mutex); | 3850 | up_write(&l->rwsem); |
3786 | kfree(l); | 3851 | kfree(l); |
3787 | return; | 3852 | return; |
3788 | } | 3853 | } |
3789 | mutex_unlock(&l->owner->pidlist_mutex); | 3854 | mutex_unlock(&l->owner->pidlist_mutex); |
3790 | up_write(&l->mutex); | 3855 | up_write(&l->rwsem); |
3791 | } | 3856 | } |
3792 | 3857 | ||
3793 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) | 3858 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) |
@@ -3851,21 +3916,20 @@ static int cgroup_procs_open(struct inode *unused, struct file *file) | |||
3851 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | 3916 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); |
3852 | } | 3917 | } |
3853 | 3918 | ||
3854 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | 3919 | static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, |
3855 | struct cftype *cft) | 3920 | struct cftype *cft) |
3856 | { | 3921 | { |
3857 | return notify_on_release(cgrp); | 3922 | return notify_on_release(css->cgroup); |
3858 | } | 3923 | } |
3859 | 3924 | ||
3860 | static int cgroup_write_notify_on_release(struct cgroup *cgrp, | 3925 | static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css, |
3861 | struct cftype *cft, | 3926 | struct cftype *cft, u64 val) |
3862 | u64 val) | ||
3863 | { | 3927 | { |
3864 | clear_bit(CGRP_RELEASABLE, &cgrp->flags); | 3928 | clear_bit(CGRP_RELEASABLE, &css->cgroup->flags); |
3865 | if (val) | 3929 | if (val) |
3866 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3930 | set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); |
3867 | else | 3931 | else |
3868 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3932 | clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); |
3869 | return 0; | 3933 | return 0; |
3870 | } | 3934 | } |
3871 | 3935 | ||
@@ -3895,18 +3959,18 @@ static void cgroup_event_remove(struct work_struct *work) | |||
3895 | { | 3959 | { |
3896 | struct cgroup_event *event = container_of(work, struct cgroup_event, | 3960 | struct cgroup_event *event = container_of(work, struct cgroup_event, |
3897 | remove); | 3961 | remove); |
3898 | struct cgroup *cgrp = event->cgrp; | 3962 | struct cgroup_subsys_state *css = event->css; |
3899 | 3963 | ||
3900 | remove_wait_queue(event->wqh, &event->wait); | 3964 | remove_wait_queue(event->wqh, &event->wait); |
3901 | 3965 | ||
3902 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3966 | event->cft->unregister_event(css, event->cft, event->eventfd); |
3903 | 3967 | ||
3904 | /* Notify userspace the event is going away. */ | 3968 | /* Notify userspace the event is going away. */ |
3905 | eventfd_signal(event->eventfd, 1); | 3969 | eventfd_signal(event->eventfd, 1); |
3906 | 3970 | ||
3907 | eventfd_ctx_put(event->eventfd); | 3971 | eventfd_ctx_put(event->eventfd); |
3908 | kfree(event); | 3972 | kfree(event); |
3909 | cgroup_dput(cgrp); | 3973 | css_put(css); |
3910 | } | 3974 | } |
3911 | 3975 | ||
3912 | /* | 3976 | /* |
@@ -3919,7 +3983,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
3919 | { | 3983 | { |
3920 | struct cgroup_event *event = container_of(wait, | 3984 | struct cgroup_event *event = container_of(wait, |
3921 | struct cgroup_event, wait); | 3985 | struct cgroup_event, wait); |
3922 | struct cgroup *cgrp = event->cgrp; | 3986 | struct cgroup *cgrp = event->css->cgroup; |
3923 | unsigned long flags = (unsigned long)key; | 3987 | unsigned long flags = (unsigned long)key; |
3924 | 3988 | ||
3925 | if (flags & POLLHUP) { | 3989 | if (flags & POLLHUP) { |
@@ -3963,14 +4027,15 @@ static void cgroup_event_ptable_queue_proc(struct file *file, | |||
3963 | * Input must be in format '<event_fd> <control_fd> <args>'. | 4027 | * Input must be in format '<event_fd> <control_fd> <args>'. |
3964 | * Interpretation of args is defined by control file implementation. | 4028 | * Interpretation of args is defined by control file implementation. |
3965 | */ | 4029 | */ |
3966 | static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | 4030 | static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, |
3967 | const char *buffer) | 4031 | struct cftype *cft, const char *buffer) |
3968 | { | 4032 | { |
3969 | struct cgroup_event *event = NULL; | 4033 | struct cgroup *cgrp = dummy_css->cgroup; |
3970 | struct cgroup *cgrp_cfile; | 4034 | struct cgroup_event *event; |
4035 | struct cgroup_subsys_state *cfile_css; | ||
3971 | unsigned int efd, cfd; | 4036 | unsigned int efd, cfd; |
3972 | struct file *efile = NULL; | 4037 | struct file *efile; |
3973 | struct file *cfile = NULL; | 4038 | struct file *cfile; |
3974 | char *endp; | 4039 | char *endp; |
3975 | int ret; | 4040 | int ret; |
3976 | 4041 | ||
@@ -3987,7 +4052,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3987 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 4052 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
3988 | if (!event) | 4053 | if (!event) |
3989 | return -ENOMEM; | 4054 | return -ENOMEM; |
3990 | event->cgrp = cgrp; | 4055 | |
3991 | INIT_LIST_HEAD(&event->list); | 4056 | INIT_LIST_HEAD(&event->list); |
3992 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); | 4057 | init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); |
3993 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); | 4058 | init_waitqueue_func_entry(&event->wait, cgroup_event_wake); |
@@ -3996,62 +4061,68 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3996 | efile = eventfd_fget(efd); | 4061 | efile = eventfd_fget(efd); |
3997 | if (IS_ERR(efile)) { | 4062 | if (IS_ERR(efile)) { |
3998 | ret = PTR_ERR(efile); | 4063 | ret = PTR_ERR(efile); |
3999 | goto fail; | 4064 | goto out_kfree; |
4000 | } | 4065 | } |
4001 | 4066 | ||
4002 | event->eventfd = eventfd_ctx_fileget(efile); | 4067 | event->eventfd = eventfd_ctx_fileget(efile); |
4003 | if (IS_ERR(event->eventfd)) { | 4068 | if (IS_ERR(event->eventfd)) { |
4004 | ret = PTR_ERR(event->eventfd); | 4069 | ret = PTR_ERR(event->eventfd); |
4005 | goto fail; | 4070 | goto out_put_efile; |
4006 | } | 4071 | } |
4007 | 4072 | ||
4008 | cfile = fget(cfd); | 4073 | cfile = fget(cfd); |
4009 | if (!cfile) { | 4074 | if (!cfile) { |
4010 | ret = -EBADF; | 4075 | ret = -EBADF; |
4011 | goto fail; | 4076 | goto out_put_eventfd; |
4012 | } | 4077 | } |
4013 | 4078 | ||
4014 | /* the process need read permission on control file */ | 4079 | /* the process need read permission on control file */ |
4015 | /* AV: shouldn't we check that it's been opened for read instead? */ | 4080 | /* AV: shouldn't we check that it's been opened for read instead? */ |
4016 | ret = inode_permission(file_inode(cfile), MAY_READ); | 4081 | ret = inode_permission(file_inode(cfile), MAY_READ); |
4017 | if (ret < 0) | 4082 | if (ret < 0) |
4018 | goto fail; | 4083 | goto out_put_cfile; |
4019 | 4084 | ||
4020 | event->cft = __file_cft(cfile); | 4085 | event->cft = __file_cft(cfile); |
4021 | if (IS_ERR(event->cft)) { | 4086 | if (IS_ERR(event->cft)) { |
4022 | ret = PTR_ERR(event->cft); | 4087 | ret = PTR_ERR(event->cft); |
4023 | goto fail; | 4088 | goto out_put_cfile; |
4089 | } | ||
4090 | |||
4091 | if (!event->cft->ss) { | ||
4092 | ret = -EBADF; | ||
4093 | goto out_put_cfile; | ||
4024 | } | 4094 | } |
4025 | 4095 | ||
4026 | /* | 4096 | /* |
4027 | * The file to be monitored must be in the same cgroup as | 4097 | * Determine the css of @cfile, verify it belongs to the same |
4028 | * cgroup.event_control is. | 4098 | * cgroup as cgroup.event_control, and associate @event with it. |
4099 | * Remaining events are automatically removed on cgroup destruction | ||
4100 | * but the removal is asynchronous, so take an extra ref. | ||
4029 | */ | 4101 | */ |
4030 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | 4102 | rcu_read_lock(); |
4031 | if (cgrp_cfile != cgrp) { | 4103 | |
4032 | ret = -EINVAL; | 4104 | ret = -EINVAL; |
4033 | goto fail; | 4105 | event->css = cgroup_css(cgrp, event->cft->ss); |
4034 | } | 4106 | cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss); |
4107 | if (event->css && event->css == cfile_css && css_tryget(event->css)) | ||
4108 | ret = 0; | ||
4109 | |||
4110 | rcu_read_unlock(); | ||
4111 | if (ret) | ||
4112 | goto out_put_cfile; | ||
4035 | 4113 | ||
4036 | if (!event->cft->register_event || !event->cft->unregister_event) { | 4114 | if (!event->cft->register_event || !event->cft->unregister_event) { |
4037 | ret = -EINVAL; | 4115 | ret = -EINVAL; |
4038 | goto fail; | 4116 | goto out_put_css; |
4039 | } | 4117 | } |
4040 | 4118 | ||
4041 | ret = event->cft->register_event(cgrp, event->cft, | 4119 | ret = event->cft->register_event(event->css, event->cft, |
4042 | event->eventfd, buffer); | 4120 | event->eventfd, buffer); |
4043 | if (ret) | 4121 | if (ret) |
4044 | goto fail; | 4122 | goto out_put_css; |
4045 | 4123 | ||
4046 | efile->f_op->poll(efile, &event->pt); | 4124 | efile->f_op->poll(efile, &event->pt); |
4047 | 4125 | ||
4048 | /* | ||
4049 | * Events should be removed after rmdir of cgroup directory, but before | ||
4050 | * destroying subsystem state objects. Let's take reference to cgroup | ||
4051 | * directory dentry to do that. | ||
4052 | */ | ||
4053 | dget(cgrp->dentry); | ||
4054 | |||
4055 | spin_lock(&cgrp->event_list_lock); | 4126 | spin_lock(&cgrp->event_list_lock); |
4056 | list_add(&event->list, &cgrp->event_list); | 4127 | list_add(&event->list, &cgrp->event_list); |
4057 | spin_unlock(&cgrp->event_list_lock); | 4128 | spin_unlock(&cgrp->event_list_lock); |
@@ -4061,35 +4132,33 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
4061 | 4132 | ||
4062 | return 0; | 4133 | return 0; |
4063 | 4134 | ||
4064 | fail: | 4135 | out_put_css: |
4065 | if (cfile) | 4136 | css_put(event->css); |
4066 | fput(cfile); | 4137 | out_put_cfile: |
4067 | 4138 | fput(cfile); | |
4068 | if (event && event->eventfd && !IS_ERR(event->eventfd)) | 4139 | out_put_eventfd: |
4069 | eventfd_ctx_put(event->eventfd); | 4140 | eventfd_ctx_put(event->eventfd); |
4070 | 4141 | out_put_efile: | |
4071 | if (!IS_ERR_OR_NULL(efile)) | 4142 | fput(efile); |
4072 | fput(efile); | 4143 | out_kfree: |
4073 | |||
4074 | kfree(event); | 4144 | kfree(event); |
4075 | 4145 | ||
4076 | return ret; | 4146 | return ret; |
4077 | } | 4147 | } |
4078 | 4148 | ||
4079 | static u64 cgroup_clone_children_read(struct cgroup *cgrp, | 4149 | static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, |
4080 | struct cftype *cft) | 4150 | struct cftype *cft) |
4081 | { | 4151 | { |
4082 | return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4152 | return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4083 | } | 4153 | } |
4084 | 4154 | ||
4085 | static int cgroup_clone_children_write(struct cgroup *cgrp, | 4155 | static int cgroup_clone_children_write(struct cgroup_subsys_state *css, |
4086 | struct cftype *cft, | 4156 | struct cftype *cft, u64 val) |
4087 | u64 val) | ||
4088 | { | 4157 | { |
4089 | if (val) | 4158 | if (val) |
4090 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4159 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4091 | else | 4160 | else |
4092 | clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4161 | clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); |
4093 | return 0; | 4162 | return 0; |
4094 | } | 4163 | } |
4095 | 4164 | ||
@@ -4148,36 +4217,34 @@ static struct cftype cgroup_base_files[] = { | |||
4148 | }; | 4217 | }; |
4149 | 4218 | ||
4150 | /** | 4219 | /** |
4151 | * cgroup_populate_dir - selectively creation of files in a directory | 4220 | * cgroup_populate_dir - create subsys files in a cgroup directory |
4152 | * @cgrp: target cgroup | 4221 | * @cgrp: target cgroup |
4153 | * @base_files: true if the base files should be added | ||
4154 | * @subsys_mask: mask of the subsystem ids whose files should be added | 4222 | * @subsys_mask: mask of the subsystem ids whose files should be added |
4223 | * | ||
4224 | * On failure, no file is added. | ||
4155 | */ | 4225 | */ |
4156 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 4226 | static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask) |
4157 | unsigned long subsys_mask) | ||
4158 | { | 4227 | { |
4159 | int err; | ||
4160 | struct cgroup_subsys *ss; | 4228 | struct cgroup_subsys *ss; |
4161 | 4229 | int i, ret = 0; | |
4162 | if (base_files) { | ||
4163 | err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true); | ||
4164 | if (err < 0) | ||
4165 | return err; | ||
4166 | } | ||
4167 | 4230 | ||
4168 | /* process cftsets of each subsystem */ | 4231 | /* process cftsets of each subsystem */ |
4169 | for_each_root_subsys(cgrp->root, ss) { | 4232 | for_each_subsys(ss, i) { |
4170 | struct cftype_set *set; | 4233 | struct cftype_set *set; |
4171 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 4234 | |
4235 | if (!test_bit(i, &subsys_mask)) | ||
4172 | continue; | 4236 | continue; |
4173 | 4237 | ||
4174 | list_for_each_entry(set, &ss->cftsets, node) | 4238 | list_for_each_entry(set, &ss->cftsets, node) { |
4175 | cgroup_addrm_files(cgrp, ss, set->cfts, true); | 4239 | ret = cgroup_addrm_files(cgrp, set->cfts, true); |
4240 | if (ret < 0) | ||
4241 | goto err; | ||
4242 | } | ||
4176 | } | 4243 | } |
4177 | 4244 | ||
4178 | /* This cgroup is ready now */ | 4245 | /* This cgroup is ready now */ |
4179 | for_each_root_subsys(cgrp->root, ss) { | 4246 | for_each_root_subsys(cgrp->root, ss) { |
4180 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4247 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); |
4181 | struct css_id *id = rcu_dereference_protected(css->id, true); | 4248 | struct css_id *id = rcu_dereference_protected(css->id, true); |
4182 | 4249 | ||
4183 | /* | 4250 | /* |
@@ -4190,14 +4257,57 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | |||
4190 | } | 4257 | } |
4191 | 4258 | ||
4192 | return 0; | 4259 | return 0; |
4260 | err: | ||
4261 | cgroup_clear_dir(cgrp, subsys_mask); | ||
4262 | return ret; | ||
4193 | } | 4263 | } |
4194 | 4264 | ||
4195 | static void css_dput_fn(struct work_struct *work) | 4265 | /* |
4266 | * css destruction is four-stage process. | ||
4267 | * | ||
4268 | * 1. Destruction starts. Killing of the percpu_ref is initiated. | ||
4269 | * Implemented in kill_css(). | ||
4270 | * | ||
4271 | * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs | ||
4272 | * and thus css_tryget() is guaranteed to fail, the css can be offlined | ||
4273 | * by invoking offline_css(). After offlining, the base ref is put. | ||
4274 | * Implemented in css_killed_work_fn(). | ||
4275 | * | ||
4276 | * 3. When the percpu_ref reaches zero, the only possible remaining | ||
4277 | * accessors are inside RCU read sections. css_release() schedules the | ||
4278 | * RCU callback. | ||
4279 | * | ||
4280 | * 4. After the grace period, the css can be freed. Implemented in | ||
4281 | * css_free_work_fn(). | ||
4282 | * | ||
4283 | * It is actually hairier because both step 2 and 4 require process context | ||
4284 | * and thus involve punting to css->destroy_work adding two additional | ||
4285 | * steps to the already complex sequence. | ||
4286 | */ | ||
4287 | static void css_free_work_fn(struct work_struct *work) | ||
4196 | { | 4288 | { |
4197 | struct cgroup_subsys_state *css = | 4289 | struct cgroup_subsys_state *css = |
4198 | container_of(work, struct cgroup_subsys_state, dput_work); | 4290 | container_of(work, struct cgroup_subsys_state, destroy_work); |
4291 | struct cgroup *cgrp = css->cgroup; | ||
4199 | 4292 | ||
4200 | cgroup_dput(css->cgroup); | 4293 | if (css->parent) |
4294 | css_put(css->parent); | ||
4295 | |||
4296 | css->ss->css_free(css); | ||
4297 | cgroup_dput(cgrp); | ||
4298 | } | ||
4299 | |||
4300 | static void css_free_rcu_fn(struct rcu_head *rcu_head) | ||
4301 | { | ||
4302 | struct cgroup_subsys_state *css = | ||
4303 | container_of(rcu_head, struct cgroup_subsys_state, rcu_head); | ||
4304 | |||
4305 | /* | ||
4306 | * css holds an extra ref to @cgrp->dentry which is put on the last | ||
4307 | * css_put(). dput() requires process context which we don't have. | ||
4308 | */ | ||
4309 | INIT_WORK(&css->destroy_work, css_free_work_fn); | ||
4310 | schedule_work(&css->destroy_work); | ||
4201 | } | 4311 | } |
4202 | 4312 | ||
4203 | static void css_release(struct percpu_ref *ref) | 4313 | static void css_release(struct percpu_ref *ref) |
@@ -4205,49 +4315,47 @@ static void css_release(struct percpu_ref *ref) | |||
4205 | struct cgroup_subsys_state *css = | 4315 | struct cgroup_subsys_state *css = |
4206 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4316 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4207 | 4317 | ||
4208 | schedule_work(&css->dput_work); | 4318 | call_rcu(&css->rcu_head, css_free_rcu_fn); |
4209 | } | 4319 | } |
4210 | 4320 | ||
4211 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 4321 | static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, |
4212 | struct cgroup_subsys *ss, | 4322 | struct cgroup *cgrp) |
4213 | struct cgroup *cgrp) | ||
4214 | { | 4323 | { |
4215 | css->cgroup = cgrp; | 4324 | css->cgroup = cgrp; |
4325 | css->ss = ss; | ||
4216 | css->flags = 0; | 4326 | css->flags = 0; |
4217 | css->id = NULL; | 4327 | css->id = NULL; |
4218 | if (cgrp == cgroup_dummy_top) | 4328 | |
4329 | if (cgrp->parent) | ||
4330 | css->parent = cgroup_css(cgrp->parent, ss); | ||
4331 | else | ||
4219 | css->flags |= CSS_ROOT; | 4332 | css->flags |= CSS_ROOT; |
4220 | BUG_ON(cgrp->subsys[ss->subsys_id]); | ||
4221 | cgrp->subsys[ss->subsys_id] = css; | ||
4222 | 4333 | ||
4223 | /* | 4334 | BUG_ON(cgroup_css(cgrp, ss)); |
4224 | * css holds an extra ref to @cgrp->dentry which is put on the last | ||
4225 | * css_put(). dput() requires process context, which css_put() may | ||
4226 | * be called without. @css->dput_work will be used to invoke | ||
4227 | * dput() asynchronously from css_put(). | ||
4228 | */ | ||
4229 | INIT_WORK(&css->dput_work, css_dput_fn); | ||
4230 | } | 4335 | } |
4231 | 4336 | ||
4232 | /* invoke ->post_create() on a new CSS and mark it online if successful */ | 4337 | /* invoke ->css_online() on a new CSS and mark it online if successful */ |
4233 | static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | 4338 | static int online_css(struct cgroup_subsys_state *css) |
4234 | { | 4339 | { |
4340 | struct cgroup_subsys *ss = css->ss; | ||
4235 | int ret = 0; | 4341 | int ret = 0; |
4236 | 4342 | ||
4237 | lockdep_assert_held(&cgroup_mutex); | 4343 | lockdep_assert_held(&cgroup_mutex); |
4238 | 4344 | ||
4239 | if (ss->css_online) | 4345 | if (ss->css_online) |
4240 | ret = ss->css_online(cgrp); | 4346 | ret = ss->css_online(css); |
4241 | if (!ret) | 4347 | if (!ret) { |
4242 | cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE; | 4348 | css->flags |= CSS_ONLINE; |
4349 | css->cgroup->nr_css++; | ||
4350 | rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css); | ||
4351 | } | ||
4243 | return ret; | 4352 | return ret; |
4244 | } | 4353 | } |
4245 | 4354 | ||
4246 | /* if the CSS is online, invoke ->pre_destory() on it and mark it offline */ | 4355 | /* if the CSS is online, invoke ->css_offline() on it and mark it offline */ |
4247 | static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | 4356 | static void offline_css(struct cgroup_subsys_state *css) |
4248 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | ||
4249 | { | 4357 | { |
4250 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4358 | struct cgroup_subsys *ss = css->ss; |
4251 | 4359 | ||
4252 | lockdep_assert_held(&cgroup_mutex); | 4360 | lockdep_assert_held(&cgroup_mutex); |
4253 | 4361 | ||
@@ -4255,9 +4363,11 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
4255 | return; | 4363 | return; |
4256 | 4364 | ||
4257 | if (ss->css_offline) | 4365 | if (ss->css_offline) |
4258 | ss->css_offline(cgrp); | 4366 | ss->css_offline(css); |
4259 | 4367 | ||
4260 | cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE; | 4368 | css->flags &= ~CSS_ONLINE; |
4369 | css->cgroup->nr_css--; | ||
4370 | RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css); | ||
4261 | } | 4371 | } |
4262 | 4372 | ||
4263 | /* | 4373 | /* |
@@ -4271,6 +4381,7 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
4271 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 4381 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
4272 | umode_t mode) | 4382 | umode_t mode) |
4273 | { | 4383 | { |
4384 | struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { }; | ||
4274 | struct cgroup *cgrp; | 4385 | struct cgroup *cgrp; |
4275 | struct cgroup_name *name; | 4386 | struct cgroup_name *name; |
4276 | struct cgroupfs_root *root = parent->root; | 4387 | struct cgroupfs_root *root = parent->root; |
@@ -4288,7 +4399,11 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4288 | goto err_free_cgrp; | 4399 | goto err_free_cgrp; |
4289 | rcu_assign_pointer(cgrp->name, name); | 4400 | rcu_assign_pointer(cgrp->name, name); |
4290 | 4401 | ||
4291 | cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); | 4402 | /* |
4403 | * Temporarily set the pointer to NULL, so idr_find() won't return | ||
4404 | * a half-baked cgroup. | ||
4405 | */ | ||
4406 | cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); | ||
4292 | if (cgrp->id < 0) | 4407 | if (cgrp->id < 0) |
4293 | goto err_free_name; | 4408 | goto err_free_name; |
4294 | 4409 | ||
@@ -4317,6 +4432,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4317 | cgrp->dentry = dentry; | 4432 | cgrp->dentry = dentry; |
4318 | 4433 | ||
4319 | cgrp->parent = parent; | 4434 | cgrp->parent = parent; |
4435 | cgrp->dummy_css.parent = &parent->dummy_css; | ||
4320 | cgrp->root = parent->root; | 4436 | cgrp->root = parent->root; |
4321 | 4437 | ||
4322 | if (notify_on_release(parent)) | 4438 | if (notify_on_release(parent)) |
@@ -4328,22 +4444,21 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4328 | for_each_root_subsys(root, ss) { | 4444 | for_each_root_subsys(root, ss) { |
4329 | struct cgroup_subsys_state *css; | 4445 | struct cgroup_subsys_state *css; |
4330 | 4446 | ||
4331 | css = ss->css_alloc(cgrp); | 4447 | css = ss->css_alloc(cgroup_css(parent, ss)); |
4332 | if (IS_ERR(css)) { | 4448 | if (IS_ERR(css)) { |
4333 | err = PTR_ERR(css); | 4449 | err = PTR_ERR(css); |
4334 | goto err_free_all; | 4450 | goto err_free_all; |
4335 | } | 4451 | } |
4452 | css_ar[ss->subsys_id] = css; | ||
4336 | 4453 | ||
4337 | err = percpu_ref_init(&css->refcnt, css_release); | 4454 | err = percpu_ref_init(&css->refcnt, css_release); |
4338 | if (err) { | 4455 | if (err) |
4339 | ss->css_free(cgrp); | ||
4340 | goto err_free_all; | 4456 | goto err_free_all; |
4341 | } | ||
4342 | 4457 | ||
4343 | init_cgroup_css(css, ss, cgrp); | 4458 | init_css(css, ss, cgrp); |
4344 | 4459 | ||
4345 | if (ss->use_id) { | 4460 | if (ss->use_id) { |
4346 | err = alloc_css_id(ss, parent, cgrp); | 4461 | err = alloc_css_id(css); |
4347 | if (err) | 4462 | if (err) |
4348 | goto err_free_all; | 4463 | goto err_free_all; |
4349 | } | 4464 | } |
@@ -4365,16 +4480,22 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4365 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4480 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4366 | root->number_of_cgroups++; | 4481 | root->number_of_cgroups++; |
4367 | 4482 | ||
4368 | /* each css holds a ref to the cgroup's dentry */ | 4483 | /* each css holds a ref to the cgroup's dentry and the parent css */ |
4369 | for_each_root_subsys(root, ss) | 4484 | for_each_root_subsys(root, ss) { |
4485 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4486 | |||
4370 | dget(dentry); | 4487 | dget(dentry); |
4488 | css_get(css->parent); | ||
4489 | } | ||
4371 | 4490 | ||
4372 | /* hold a ref to the parent's dentry */ | 4491 | /* hold a ref to the parent's dentry */ |
4373 | dget(parent->dentry); | 4492 | dget(parent->dentry); |
4374 | 4493 | ||
4375 | /* creation succeeded, notify subsystems */ | 4494 | /* creation succeeded, notify subsystems */ |
4376 | for_each_root_subsys(root, ss) { | 4495 | for_each_root_subsys(root, ss) { |
4377 | err = online_css(ss, cgrp); | 4496 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; |
4497 | |||
4498 | err = online_css(css); | ||
4378 | if (err) | 4499 | if (err) |
4379 | goto err_destroy; | 4500 | goto err_destroy; |
4380 | 4501 | ||
@@ -4388,7 +4509,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4388 | } | 4509 | } |
4389 | } | 4510 | } |
4390 | 4511 | ||
4391 | err = cgroup_populate_dir(cgrp, true, root->subsys_mask); | 4512 | idr_replace(&root->cgroup_idr, cgrp, cgrp->id); |
4513 | |||
4514 | err = cgroup_addrm_files(cgrp, cgroup_base_files, true); | ||
4515 | if (err) | ||
4516 | goto err_destroy; | ||
4517 | |||
4518 | err = cgroup_populate_dir(cgrp, root->subsys_mask); | ||
4392 | if (err) | 4519 | if (err) |
4393 | goto err_destroy; | 4520 | goto err_destroy; |
4394 | 4521 | ||
@@ -4399,18 +4526,18 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4399 | 4526 | ||
4400 | err_free_all: | 4527 | err_free_all: |
4401 | for_each_root_subsys(root, ss) { | 4528 | for_each_root_subsys(root, ss) { |
4402 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4529 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; |
4403 | 4530 | ||
4404 | if (css) { | 4531 | if (css) { |
4405 | percpu_ref_cancel_init(&css->refcnt); | 4532 | percpu_ref_cancel_init(&css->refcnt); |
4406 | ss->css_free(cgrp); | 4533 | ss->css_free(css); |
4407 | } | 4534 | } |
4408 | } | 4535 | } |
4409 | mutex_unlock(&cgroup_mutex); | 4536 | mutex_unlock(&cgroup_mutex); |
4410 | /* Release the reference count that we took on the superblock */ | 4537 | /* Release the reference count that we took on the superblock */ |
4411 | deactivate_super(sb); | 4538 | deactivate_super(sb); |
4412 | err_free_id: | 4539 | err_free_id: |
4413 | ida_simple_remove(&root->cgroup_ida, cgrp->id); | 4540 | idr_remove(&root->cgroup_idr, cgrp->id); |
4414 | err_free_name: | 4541 | err_free_name: |
4415 | kfree(rcu_dereference_raw(cgrp->name)); | 4542 | kfree(rcu_dereference_raw(cgrp->name)); |
4416 | err_free_cgrp: | 4543 | err_free_cgrp: |
@@ -4432,22 +4559,84 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
4432 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4559 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
4433 | } | 4560 | } |
4434 | 4561 | ||
4435 | static void cgroup_css_killed(struct cgroup *cgrp) | 4562 | /* |
4563 | * This is called when the refcnt of a css is confirmed to be killed. | ||
4564 | * css_tryget() is now guaranteed to fail. | ||
4565 | */ | ||
4566 | static void css_killed_work_fn(struct work_struct *work) | ||
4436 | { | 4567 | { |
4437 | if (!atomic_dec_and_test(&cgrp->css_kill_cnt)) | 4568 | struct cgroup_subsys_state *css = |
4438 | return; | 4569 | container_of(work, struct cgroup_subsys_state, destroy_work); |
4570 | struct cgroup *cgrp = css->cgroup; | ||
4439 | 4571 | ||
4440 | /* percpu ref's of all css's are killed, kick off the next step */ | 4572 | mutex_lock(&cgroup_mutex); |
4441 | INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); | 4573 | |
4442 | schedule_work(&cgrp->destroy_work); | 4574 | /* |
4575 | * css_tryget() is guaranteed to fail now. Tell subsystems to | ||
4576 | * initate destruction. | ||
4577 | */ | ||
4578 | offline_css(css); | ||
4579 | |||
4580 | /* | ||
4581 | * If @cgrp is marked dead, it's waiting for refs of all css's to | ||
4582 | * be disabled before proceeding to the second phase of cgroup | ||
4583 | * destruction. If we are the last one, kick it off. | ||
4584 | */ | ||
4585 | if (!cgrp->nr_css && cgroup_is_dead(cgrp)) | ||
4586 | cgroup_destroy_css_killed(cgrp); | ||
4587 | |||
4588 | mutex_unlock(&cgroup_mutex); | ||
4589 | |||
4590 | /* | ||
4591 | * Put the css refs from kill_css(). Each css holds an extra | ||
4592 | * reference to the cgroup's dentry and cgroup removal proceeds | ||
4593 | * regardless of css refs. On the last put of each css, whenever | ||
4594 | * that may be, the extra dentry ref is put so that dentry | ||
4595 | * destruction happens only after all css's are released. | ||
4596 | */ | ||
4597 | css_put(css); | ||
4443 | } | 4598 | } |
4444 | 4599 | ||
4445 | static void css_ref_killed_fn(struct percpu_ref *ref) | 4600 | /* css kill confirmation processing requires process context, bounce */ |
4601 | static void css_killed_ref_fn(struct percpu_ref *ref) | ||
4446 | { | 4602 | { |
4447 | struct cgroup_subsys_state *css = | 4603 | struct cgroup_subsys_state *css = |
4448 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4604 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4449 | 4605 | ||
4450 | cgroup_css_killed(css->cgroup); | 4606 | INIT_WORK(&css->destroy_work, css_killed_work_fn); |
4607 | schedule_work(&css->destroy_work); | ||
4608 | } | ||
4609 | |||
4610 | /** | ||
4611 | * kill_css - destroy a css | ||
4612 | * @css: css to destroy | ||
4613 | * | ||
4614 | * This function initiates destruction of @css by removing cgroup interface | ||
4615 | * files and putting its base reference. ->css_offline() will be invoked | ||
4616 | * asynchronously once css_tryget() is guaranteed to fail and when the | ||
4617 | * reference count reaches zero, @css will be released. | ||
4618 | */ | ||
4619 | static void kill_css(struct cgroup_subsys_state *css) | ||
4620 | { | ||
4621 | cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id); | ||
4622 | |||
4623 | /* | ||
4624 | * Killing would put the base ref, but we need to keep it alive | ||
4625 | * until after ->css_offline(). | ||
4626 | */ | ||
4627 | css_get(css); | ||
4628 | |||
4629 | /* | ||
4630 | * cgroup core guarantees that, by the time ->css_offline() is | ||
4631 | * invoked, no new css reference will be given out via | ||
4632 | * css_tryget(). We can't simply call percpu_ref_kill() and | ||
4633 | * proceed to offlining css's because percpu_ref_kill() doesn't | ||
4634 | * guarantee that the ref is seen as killed on all CPUs on return. | ||
4635 | * | ||
4636 | * Use percpu_ref_kill_and_confirm() to get notifications as each | ||
4637 | * css is confirmed to be seen as killed on all CPUs. | ||
4638 | */ | ||
4639 | percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); | ||
4451 | } | 4640 | } |
4452 | 4641 | ||
4453 | /** | 4642 | /** |
@@ -4513,41 +4702,19 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4513 | return -EBUSY; | 4702 | return -EBUSY; |
4514 | 4703 | ||
4515 | /* | 4704 | /* |
4516 | * Block new css_tryget() by killing css refcnts. cgroup core | 4705 | * Initiate massacre of all css's. cgroup_destroy_css_killed() |
4517 | * guarantees that, by the time ->css_offline() is invoked, no new | 4706 | * will be invoked to perform the rest of destruction once the |
4518 | * css reference will be given out via css_tryget(). We can't | 4707 | * percpu refs of all css's are confirmed to be killed. |
4519 | * simply call percpu_ref_kill() and proceed to offlining css's | ||
4520 | * because percpu_ref_kill() doesn't guarantee that the ref is seen | ||
4521 | * as killed on all CPUs on return. | ||
4522 | * | ||
4523 | * Use percpu_ref_kill_and_confirm() to get notifications as each | ||
4524 | * css is confirmed to be seen as killed on all CPUs. The | ||
4525 | * notification callback keeps track of the number of css's to be | ||
4526 | * killed and schedules cgroup_offline_fn() to perform the rest of | ||
4527 | * destruction once the percpu refs of all css's are confirmed to | ||
4528 | * be killed. | ||
4529 | */ | 4708 | */ |
4530 | atomic_set(&cgrp->css_kill_cnt, 1); | 4709 | for_each_root_subsys(cgrp->root, ss) |
4531 | for_each_root_subsys(cgrp->root, ss) { | 4710 | kill_css(cgroup_css(cgrp, ss)); |
4532 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
4533 | |||
4534 | /* | ||
4535 | * Killing would put the base ref, but we need to keep it | ||
4536 | * alive until after ->css_offline. | ||
4537 | */ | ||
4538 | percpu_ref_get(&css->refcnt); | ||
4539 | |||
4540 | atomic_inc(&cgrp->css_kill_cnt); | ||
4541 | percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn); | ||
4542 | } | ||
4543 | cgroup_css_killed(cgrp); | ||
4544 | 4711 | ||
4545 | /* | 4712 | /* |
4546 | * Mark @cgrp dead. This prevents further task migration and child | 4713 | * Mark @cgrp dead. This prevents further task migration and child |
4547 | * creation by disabling cgroup_lock_live_group(). Note that | 4714 | * creation by disabling cgroup_lock_live_group(). Note that |
4548 | * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to | 4715 | * CGRP_DEAD assertion is depended upon by css_next_child() to |
4549 | * resume iteration after dropping RCU read lock. See | 4716 | * resume iteration after dropping RCU read lock. See |
4550 | * cgroup_next_sibling() for details. | 4717 | * css_next_child() for details. |
4551 | */ | 4718 | */ |
4552 | set_bit(CGRP_DEAD, &cgrp->flags); | 4719 | set_bit(CGRP_DEAD, &cgrp->flags); |
4553 | 4720 | ||
@@ -4558,9 +4725,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4558 | raw_spin_unlock(&release_list_lock); | 4725 | raw_spin_unlock(&release_list_lock); |
4559 | 4726 | ||
4560 | /* | 4727 | /* |
4561 | * Remove @cgrp directory. The removal puts the base ref but we | 4728 | * If @cgrp has css's attached, the second stage of cgroup |
4562 | * aren't quite done with @cgrp yet, so hold onto it. | 4729 | * destruction is kicked off from css_killed_work_fn() after the |
4730 | * refs of all attached css's are killed. If @cgrp doesn't have | ||
4731 | * any css, we kick it off here. | ||
4732 | */ | ||
4733 | if (!cgrp->nr_css) | ||
4734 | cgroup_destroy_css_killed(cgrp); | ||
4735 | |||
4736 | /* | ||
4737 | * Clear the base files and remove @cgrp directory. The removal | ||
4738 | * puts the base ref but we aren't quite done with @cgrp yet, so | ||
4739 | * hold onto it. | ||
4563 | */ | 4740 | */ |
4741 | cgroup_addrm_files(cgrp, cgroup_base_files, false); | ||
4564 | dget(d); | 4742 | dget(d); |
4565 | cgroup_d_remove_dir(d); | 4743 | cgroup_d_remove_dir(d); |
4566 | 4744 | ||
@@ -4580,50 +4758,36 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4580 | }; | 4758 | }; |
4581 | 4759 | ||
4582 | /** | 4760 | /** |
4583 | * cgroup_offline_fn - the second step of cgroup destruction | 4761 | * cgroup_destroy_css_killed - the second step of cgroup destruction |
4584 | * @work: cgroup->destroy_free_work | 4762 | * @work: cgroup->destroy_free_work |
4585 | * | 4763 | * |
4586 | * This function is invoked from a work item for a cgroup which is being | 4764 | * This function is invoked from a work item for a cgroup which is being |
4587 | * destroyed after the percpu refcnts of all css's are guaranteed to be | 4765 | * destroyed after all css's are offlined and performs the rest of |
4588 | * seen as killed on all CPUs, and performs the rest of destruction. This | 4766 | * destruction. This is the second step of destruction described in the |
4589 | * is the second step of destruction described in the comment above | 4767 | * comment above cgroup_destroy_locked(). |
4590 | * cgroup_destroy_locked(). | ||
4591 | */ | 4768 | */ |
4592 | static void cgroup_offline_fn(struct work_struct *work) | 4769 | static void cgroup_destroy_css_killed(struct cgroup *cgrp) |
4593 | { | 4770 | { |
4594 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | ||
4595 | struct cgroup *parent = cgrp->parent; | 4771 | struct cgroup *parent = cgrp->parent; |
4596 | struct dentry *d = cgrp->dentry; | 4772 | struct dentry *d = cgrp->dentry; |
4597 | struct cgroup_subsys *ss; | ||
4598 | 4773 | ||
4599 | mutex_lock(&cgroup_mutex); | 4774 | lockdep_assert_held(&cgroup_mutex); |
4600 | 4775 | ||
4601 | /* | 4776 | /* delete this cgroup from parent->children */ |
4602 | * css_tryget() is guaranteed to fail now. Tell subsystems to | 4777 | list_del_rcu(&cgrp->sibling); |
4603 | * initate destruction. | ||
4604 | */ | ||
4605 | for_each_root_subsys(cgrp->root, ss) | ||
4606 | offline_css(ss, cgrp); | ||
4607 | 4778 | ||
4608 | /* | 4779 | /* |
4609 | * Put the css refs from cgroup_destroy_locked(). Each css holds | 4780 | * We should remove the cgroup object from idr before its grace |
4610 | * an extra reference to the cgroup's dentry and cgroup removal | 4781 | * period starts, so we won't be looking up a cgroup while the |
4611 | * proceeds regardless of css refs. On the last put of each css, | 4782 | * cgroup is being freed. |
4612 | * whenever that may be, the extra dentry ref is put so that dentry | ||
4613 | * destruction happens only after all css's are released. | ||
4614 | */ | 4783 | */ |
4615 | for_each_root_subsys(cgrp->root, ss) | 4784 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |
4616 | css_put(cgrp->subsys[ss->subsys_id]); | 4785 | cgrp->id = -1; |
4617 | |||
4618 | /* delete this cgroup from parent->children */ | ||
4619 | list_del_rcu(&cgrp->sibling); | ||
4620 | 4786 | ||
4621 | dput(d); | 4787 | dput(d); |
4622 | 4788 | ||
4623 | set_bit(CGRP_RELEASABLE, &parent->flags); | 4789 | set_bit(CGRP_RELEASABLE, &parent->flags); |
4624 | check_for_release(parent); | 4790 | check_for_release(parent); |
4625 | |||
4626 | mutex_unlock(&cgroup_mutex); | ||
4627 | } | 4791 | } |
4628 | 4792 | ||
4629 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 4793 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
@@ -4646,6 +4810,11 @@ static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss) | |||
4646 | * deregistration. | 4810 | * deregistration. |
4647 | */ | 4811 | */ |
4648 | if (ss->base_cftypes) { | 4812 | if (ss->base_cftypes) { |
4813 | struct cftype *cft; | ||
4814 | |||
4815 | for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++) | ||
4816 | cft->ss = ss; | ||
4817 | |||
4649 | ss->base_cftset.cfts = ss->base_cftypes; | 4818 | ss->base_cftset.cfts = ss->base_cftypes; |
4650 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); | 4819 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); |
4651 | } | 4820 | } |
@@ -4665,10 +4834,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4665 | /* Create the top cgroup state for this subsystem */ | 4834 | /* Create the top cgroup state for this subsystem */ |
4666 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); | 4835 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); |
4667 | ss->root = &cgroup_dummy_root; | 4836 | ss->root = &cgroup_dummy_root; |
4668 | css = ss->css_alloc(cgroup_dummy_top); | 4837 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
4669 | /* We don't handle early failures gracefully */ | 4838 | /* We don't handle early failures gracefully */ |
4670 | BUG_ON(IS_ERR(css)); | 4839 | BUG_ON(IS_ERR(css)); |
4671 | init_cgroup_css(css, ss, cgroup_dummy_top); | 4840 | init_css(css, ss, cgroup_dummy_top); |
4672 | 4841 | ||
4673 | /* Update the init_css_set to contain a subsys | 4842 | /* Update the init_css_set to contain a subsys |
4674 | * pointer to this state - since the subsystem is | 4843 | * pointer to this state - since the subsystem is |
@@ -4683,7 +4852,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4683 | * need to invoke fork callbacks here. */ | 4852 | * need to invoke fork callbacks here. */ |
4684 | BUG_ON(!list_empty(&init_task.tasks)); | 4853 | BUG_ON(!list_empty(&init_task.tasks)); |
4685 | 4854 | ||
4686 | BUG_ON(online_css(ss, cgroup_dummy_top)); | 4855 | BUG_ON(online_css(css)); |
4687 | 4856 | ||
4688 | mutex_unlock(&cgroup_mutex); | 4857 | mutex_unlock(&cgroup_mutex); |
4689 | 4858 | ||
@@ -4744,7 +4913,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4744 | * struct, so this can happen first (i.e. before the dummy root | 4913 | * struct, so this can happen first (i.e. before the dummy root |
4745 | * attachment). | 4914 | * attachment). |
4746 | */ | 4915 | */ |
4747 | css = ss->css_alloc(cgroup_dummy_top); | 4916 | css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss)); |
4748 | if (IS_ERR(css)) { | 4917 | if (IS_ERR(css)) { |
4749 | /* failure case - need to deassign the cgroup_subsys[] slot. */ | 4918 | /* failure case - need to deassign the cgroup_subsys[] slot. */ |
4750 | cgroup_subsys[ss->subsys_id] = NULL; | 4919 | cgroup_subsys[ss->subsys_id] = NULL; |
@@ -4756,8 +4925,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4756 | ss->root = &cgroup_dummy_root; | 4925 | ss->root = &cgroup_dummy_root; |
4757 | 4926 | ||
4758 | /* our new subsystem will be attached to the dummy hierarchy. */ | 4927 | /* our new subsystem will be attached to the dummy hierarchy. */ |
4759 | init_cgroup_css(css, ss, cgroup_dummy_top); | 4928 | init_css(css, ss, cgroup_dummy_top); |
4760 | /* init_idr must be after init_cgroup_css because it sets css->id. */ | 4929 | /* init_idr must be after init_css() because it sets css->id. */ |
4761 | if (ss->use_id) { | 4930 | if (ss->use_id) { |
4762 | ret = cgroup_init_idr(ss, css); | 4931 | ret = cgroup_init_idr(ss, css); |
4763 | if (ret) | 4932 | if (ret) |
@@ -4787,7 +4956,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4787 | } | 4956 | } |
4788 | write_unlock(&css_set_lock); | 4957 | write_unlock(&css_set_lock); |
4789 | 4958 | ||
4790 | ret = online_css(ss, cgroup_dummy_top); | 4959 | ret = online_css(css); |
4791 | if (ret) | 4960 | if (ret) |
4792 | goto err_unload; | 4961 | goto err_unload; |
4793 | 4962 | ||
@@ -4819,14 +4988,14 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4819 | 4988 | ||
4820 | /* | 4989 | /* |
4821 | * we shouldn't be called if the subsystem is in use, and the use of | 4990 | * we shouldn't be called if the subsystem is in use, and the use of |
4822 | * try_module_get in parse_cgroupfs_options should ensure that it | 4991 | * try_module_get() in rebind_subsystems() should ensure that it |
4823 | * doesn't start being used while we're killing it off. | 4992 | * doesn't start being used while we're killing it off. |
4824 | */ | 4993 | */ |
4825 | BUG_ON(ss->root != &cgroup_dummy_root); | 4994 | BUG_ON(ss->root != &cgroup_dummy_root); |
4826 | 4995 | ||
4827 | mutex_lock(&cgroup_mutex); | 4996 | mutex_lock(&cgroup_mutex); |
4828 | 4997 | ||
4829 | offline_css(ss, cgroup_dummy_top); | 4998 | offline_css(cgroup_css(cgroup_dummy_top, ss)); |
4830 | 4999 | ||
4831 | if (ss->use_id) | 5000 | if (ss->use_id) |
4832 | idr_destroy(&ss->idr); | 5001 | idr_destroy(&ss->idr); |
@@ -4860,8 +5029,8 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4860 | * the cgrp->subsys pointer to find their state. note that this | 5029 | * the cgrp->subsys pointer to find their state. note that this |
4861 | * also takes care of freeing the css_id. | 5030 | * also takes care of freeing the css_id. |
4862 | */ | 5031 | */ |
4863 | ss->css_free(cgroup_dummy_top); | 5032 | ss->css_free(cgroup_css(cgroup_dummy_top, ss)); |
4864 | cgroup_dummy_top->subsys[ss->subsys_id] = NULL; | 5033 | RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL); |
4865 | 5034 | ||
4866 | mutex_unlock(&cgroup_mutex); | 5035 | mutex_unlock(&cgroup_mutex); |
4867 | } | 5036 | } |
@@ -4943,6 +5112,10 @@ int __init cgroup_init(void) | |||
4943 | 5112 | ||
4944 | BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1)); | 5113 | BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1)); |
4945 | 5114 | ||
5115 | err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top, | ||
5116 | 0, 1, GFP_KERNEL); | ||
5117 | BUG_ON(err < 0); | ||
5118 | |||
4946 | mutex_unlock(&cgroup_root_mutex); | 5119 | mutex_unlock(&cgroup_root_mutex); |
4947 | mutex_unlock(&cgroup_mutex); | 5120 | mutex_unlock(&cgroup_mutex); |
4948 | 5121 | ||
@@ -5099,7 +5272,7 @@ void cgroup_fork(struct task_struct *child) | |||
5099 | * Adds the task to the list running through its css_set if necessary and | 5272 | * Adds the task to the list running through its css_set if necessary and |
5100 | * call the subsystem fork() callbacks. Has to be after the task is | 5273 | * call the subsystem fork() callbacks. Has to be after the task is |
5101 | * visible on the task list in case we race with the first call to | 5274 | * visible on the task list in case we race with the first call to |
5102 | * cgroup_iter_start() - to guarantee that the new task ends up on its | 5275 | * cgroup_task_iter_start() - to guarantee that the new task ends up on its |
5103 | * list. | 5276 | * list. |
5104 | */ | 5277 | */ |
5105 | void cgroup_post_fork(struct task_struct *child) | 5278 | void cgroup_post_fork(struct task_struct *child) |
@@ -5212,10 +5385,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
5212 | */ | 5385 | */ |
5213 | for_each_builtin_subsys(ss, i) { | 5386 | for_each_builtin_subsys(ss, i) { |
5214 | if (ss->exit) { | 5387 | if (ss->exit) { |
5215 | struct cgroup *old_cgrp = cset->subsys[i]->cgroup; | 5388 | struct cgroup_subsys_state *old_css = cset->subsys[i]; |
5216 | struct cgroup *cgrp = task_cgroup(tsk, i); | 5389 | struct cgroup_subsys_state *css = task_css(tsk, i); |
5217 | 5390 | ||
5218 | ss->exit(cgrp, old_cgrp, tsk); | 5391 | ss->exit(css, old_css, tsk); |
5219 | } | 5392 | } |
5220 | } | 5393 | } |
5221 | } | 5394 | } |
@@ -5474,20 +5647,16 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
5474 | return 0; | 5647 | return 0; |
5475 | } | 5648 | } |
5476 | 5649 | ||
5477 | static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | 5650 | static int alloc_css_id(struct cgroup_subsys_state *child_css) |
5478 | struct cgroup *child) | ||
5479 | { | 5651 | { |
5480 | int subsys_id, i, depth = 0; | 5652 | struct cgroup_subsys_state *parent_css = css_parent(child_css); |
5481 | struct cgroup_subsys_state *parent_css, *child_css; | ||
5482 | struct css_id *child_id, *parent_id; | 5653 | struct css_id *child_id, *parent_id; |
5654 | int i, depth; | ||
5483 | 5655 | ||
5484 | subsys_id = ss->subsys_id; | ||
5485 | parent_css = parent->subsys[subsys_id]; | ||
5486 | child_css = child->subsys[subsys_id]; | ||
5487 | parent_id = rcu_dereference_protected(parent_css->id, true); | 5656 | parent_id = rcu_dereference_protected(parent_css->id, true); |
5488 | depth = parent_id->depth + 1; | 5657 | depth = parent_id->depth + 1; |
5489 | 5658 | ||
5490 | child_id = get_new_cssid(ss, depth); | 5659 | child_id = get_new_cssid(child_css->ss, depth); |
5491 | if (IS_ERR(child_id)) | 5660 | if (IS_ERR(child_id)) |
5492 | return PTR_ERR(child_id); | 5661 | return PTR_ERR(child_id); |
5493 | 5662 | ||
@@ -5525,31 +5694,56 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) | |||
5525 | } | 5694 | } |
5526 | EXPORT_SYMBOL_GPL(css_lookup); | 5695 | EXPORT_SYMBOL_GPL(css_lookup); |
5527 | 5696 | ||
5528 | /* | 5697 | /** |
5529 | * get corresponding css from file open on cgroupfs directory | 5698 | * css_from_dir - get corresponding css from the dentry of a cgroup dir |
5699 | * @dentry: directory dentry of interest | ||
5700 | * @ss: subsystem of interest | ||
5701 | * | ||
5702 | * Must be called under RCU read lock. The caller is responsible for | ||
5703 | * pinning the returned css if it needs to be accessed outside the RCU | ||
5704 | * critical section. | ||
5530 | */ | 5705 | */ |
5531 | struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) | 5706 | struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, |
5707 | struct cgroup_subsys *ss) | ||
5532 | { | 5708 | { |
5533 | struct cgroup *cgrp; | 5709 | struct cgroup *cgrp; |
5534 | struct inode *inode; | ||
5535 | struct cgroup_subsys_state *css; | ||
5536 | 5710 | ||
5537 | inode = file_inode(f); | 5711 | WARN_ON_ONCE(!rcu_read_lock_held()); |
5538 | /* check in cgroup filesystem dir */ | 5712 | |
5539 | if (inode->i_op != &cgroup_dir_inode_operations) | 5713 | /* is @dentry a cgroup dir? */ |
5714 | if (!dentry->d_inode || | ||
5715 | dentry->d_inode->i_op != &cgroup_dir_inode_operations) | ||
5540 | return ERR_PTR(-EBADF); | 5716 | return ERR_PTR(-EBADF); |
5541 | 5717 | ||
5542 | if (id < 0 || id >= CGROUP_SUBSYS_COUNT) | 5718 | cgrp = __d_cgrp(dentry); |
5543 | return ERR_PTR(-EINVAL); | 5719 | return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT); |
5720 | } | ||
5544 | 5721 | ||
5545 | /* get cgroup */ | 5722 | /** |
5546 | cgrp = __d_cgrp(f->f_dentry); | 5723 | * css_from_id - lookup css by id |
5547 | css = cgrp->subsys[id]; | 5724 | * @id: the cgroup id |
5548 | return css ? css : ERR_PTR(-ENOENT); | 5725 | * @ss: cgroup subsys to be looked into |
5726 | * | ||
5727 | * Returns the css if there's valid one with @id, otherwise returns NULL. | ||
5728 | * Should be called under rcu_read_lock(). | ||
5729 | */ | ||
5730 | struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) | ||
5731 | { | ||
5732 | struct cgroup *cgrp; | ||
5733 | |||
5734 | rcu_lockdep_assert(rcu_read_lock_held() || | ||
5735 | lockdep_is_held(&cgroup_mutex), | ||
5736 | "css_from_id() needs proper protection"); | ||
5737 | |||
5738 | cgrp = idr_find(&ss->root->cgroup_idr, id); | ||
5739 | if (cgrp) | ||
5740 | return cgroup_css(cgrp, ss); | ||
5741 | return NULL; | ||
5549 | } | 5742 | } |
5550 | 5743 | ||
5551 | #ifdef CONFIG_CGROUP_DEBUG | 5744 | #ifdef CONFIG_CGROUP_DEBUG |
5552 | static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp) | 5745 | static struct cgroup_subsys_state * |
5746 | debug_css_alloc(struct cgroup_subsys_state *parent_css) | ||
5553 | { | 5747 | { |
5554 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | 5748 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); |
5555 | 5749 | ||
@@ -5559,22 +5753,24 @@ static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp) | |||
5559 | return css; | 5753 | return css; |
5560 | } | 5754 | } |
5561 | 5755 | ||
5562 | static void debug_css_free(struct cgroup *cgrp) | 5756 | static void debug_css_free(struct cgroup_subsys_state *css) |
5563 | { | 5757 | { |
5564 | kfree(cgrp->subsys[debug_subsys_id]); | 5758 | kfree(css); |
5565 | } | 5759 | } |
5566 | 5760 | ||
5567 | static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft) | 5761 | static u64 debug_taskcount_read(struct cgroup_subsys_state *css, |
5762 | struct cftype *cft) | ||
5568 | { | 5763 | { |
5569 | return cgroup_task_count(cgrp); | 5764 | return cgroup_task_count(css->cgroup); |
5570 | } | 5765 | } |
5571 | 5766 | ||
5572 | static u64 current_css_set_read(struct cgroup *cgrp, struct cftype *cft) | 5767 | static u64 current_css_set_read(struct cgroup_subsys_state *css, |
5768 | struct cftype *cft) | ||
5573 | { | 5769 | { |
5574 | return (u64)(unsigned long)current->cgroups; | 5770 | return (u64)(unsigned long)current->cgroups; |
5575 | } | 5771 | } |
5576 | 5772 | ||
5577 | static u64 current_css_set_refcount_read(struct cgroup *cgrp, | 5773 | static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, |
5578 | struct cftype *cft) | 5774 | struct cftype *cft) |
5579 | { | 5775 | { |
5580 | u64 count; | 5776 | u64 count; |
@@ -5585,7 +5781,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cgrp, | |||
5585 | return count; | 5781 | return count; |
5586 | } | 5782 | } |
5587 | 5783 | ||
5588 | static int current_css_set_cg_links_read(struct cgroup *cgrp, | 5784 | static int current_css_set_cg_links_read(struct cgroup_subsys_state *css, |
5589 | struct cftype *cft, | 5785 | struct cftype *cft, |
5590 | struct seq_file *seq) | 5786 | struct seq_file *seq) |
5591 | { | 5787 | { |
@@ -5612,14 +5808,13 @@ static int current_css_set_cg_links_read(struct cgroup *cgrp, | |||
5612 | } | 5808 | } |
5613 | 5809 | ||
5614 | #define MAX_TASKS_SHOWN_PER_CSS 25 | 5810 | #define MAX_TASKS_SHOWN_PER_CSS 25 |
5615 | static int cgroup_css_links_read(struct cgroup *cgrp, | 5811 | static int cgroup_css_links_read(struct cgroup_subsys_state *css, |
5616 | struct cftype *cft, | 5812 | struct cftype *cft, struct seq_file *seq) |
5617 | struct seq_file *seq) | ||
5618 | { | 5813 | { |
5619 | struct cgrp_cset_link *link; | 5814 | struct cgrp_cset_link *link; |
5620 | 5815 | ||
5621 | read_lock(&css_set_lock); | 5816 | read_lock(&css_set_lock); |
5622 | list_for_each_entry(link, &cgrp->cset_links, cset_link) { | 5817 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { |
5623 | struct css_set *cset = link->cset; | 5818 | struct css_set *cset = link->cset; |
5624 | struct task_struct *task; | 5819 | struct task_struct *task; |
5625 | int count = 0; | 5820 | int count = 0; |
@@ -5638,9 +5833,9 @@ static int cgroup_css_links_read(struct cgroup *cgrp, | |||
5638 | return 0; | 5833 | return 0; |
5639 | } | 5834 | } |
5640 | 5835 | ||
5641 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | 5836 | static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) |
5642 | { | 5837 | { |
5643 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | 5838 | return test_bit(CGRP_RELEASABLE, &css->cgroup->flags); |
5644 | } | 5839 | } |
5645 | 5840 | ||
5646 | static struct cftype debug_files[] = { | 5841 | static struct cftype debug_files[] = { |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 75dda1ea5026..f0ff64d0ebaa 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -45,25 +45,19 @@ struct freezer { | |||
45 | spinlock_t lock; | 45 | spinlock_t lock; |
46 | }; | 46 | }; |
47 | 47 | ||
48 | static inline struct freezer *cgroup_freezer(struct cgroup *cgroup) | 48 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
49 | { | 49 | { |
50 | return container_of(cgroup_subsys_state(cgroup, freezer_subsys_id), | 50 | return css ? container_of(css, struct freezer, css) : NULL; |
51 | struct freezer, css); | ||
52 | } | 51 | } |
53 | 52 | ||
54 | static inline struct freezer *task_freezer(struct task_struct *task) | 53 | static inline struct freezer *task_freezer(struct task_struct *task) |
55 | { | 54 | { |
56 | return container_of(task_subsys_state(task, freezer_subsys_id), | 55 | return css_freezer(task_css(task, freezer_subsys_id)); |
57 | struct freezer, css); | ||
58 | } | 56 | } |
59 | 57 | ||
60 | static struct freezer *parent_freezer(struct freezer *freezer) | 58 | static struct freezer *parent_freezer(struct freezer *freezer) |
61 | { | 59 | { |
62 | struct cgroup *pcg = freezer->css.cgroup->parent; | 60 | return css_freezer(css_parent(&freezer->css)); |
63 | |||
64 | if (pcg) | ||
65 | return cgroup_freezer(pcg); | ||
66 | return NULL; | ||
67 | } | 61 | } |
68 | 62 | ||
69 | bool cgroup_freezing(struct task_struct *task) | 63 | bool cgroup_freezing(struct task_struct *task) |
@@ -92,7 +86,8 @@ static const char *freezer_state_strs(unsigned int state) | |||
92 | 86 | ||
93 | struct cgroup_subsys freezer_subsys; | 87 | struct cgroup_subsys freezer_subsys; |
94 | 88 | ||
95 | static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup) | 89 | static struct cgroup_subsys_state * |
90 | freezer_css_alloc(struct cgroup_subsys_state *parent_css) | ||
96 | { | 91 | { |
97 | struct freezer *freezer; | 92 | struct freezer *freezer; |
98 | 93 | ||
@@ -105,22 +100,22 @@ static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup) | |||
105 | } | 100 | } |
106 | 101 | ||
107 | /** | 102 | /** |
108 | * freezer_css_online - commit creation of a freezer cgroup | 103 | * freezer_css_online - commit creation of a freezer css |
109 | * @cgroup: cgroup being created | 104 | * @css: css being created |
110 | * | 105 | * |
111 | * We're committing to creation of @cgroup. Mark it online and inherit | 106 | * We're committing to creation of @css. Mark it online and inherit |
112 | * parent's freezing state while holding both parent's and our | 107 | * parent's freezing state while holding both parent's and our |
113 | * freezer->lock. | 108 | * freezer->lock. |
114 | */ | 109 | */ |
115 | static int freezer_css_online(struct cgroup *cgroup) | 110 | static int freezer_css_online(struct cgroup_subsys_state *css) |
116 | { | 111 | { |
117 | struct freezer *freezer = cgroup_freezer(cgroup); | 112 | struct freezer *freezer = css_freezer(css); |
118 | struct freezer *parent = parent_freezer(freezer); | 113 | struct freezer *parent = parent_freezer(freezer); |
119 | 114 | ||
120 | /* | 115 | /* |
121 | * The following double locking and freezing state inheritance | 116 | * The following double locking and freezing state inheritance |
122 | * guarantee that @cgroup can never escape ancestors' freezing | 117 | * guarantee that @cgroup can never escape ancestors' freezing |
123 | * states. See cgroup_for_each_descendant_pre() for details. | 118 | * states. See css_for_each_descendant_pre() for details. |
124 | */ | 119 | */ |
125 | if (parent) | 120 | if (parent) |
126 | spin_lock_irq(&parent->lock); | 121 | spin_lock_irq(&parent->lock); |
@@ -141,15 +136,15 @@ static int freezer_css_online(struct cgroup *cgroup) | |||
141 | } | 136 | } |
142 | 137 | ||
143 | /** | 138 | /** |
144 | * freezer_css_offline - initiate destruction of @cgroup | 139 | * freezer_css_offline - initiate destruction of a freezer css |
145 | * @cgroup: cgroup being destroyed | 140 | * @css: css being destroyed |
146 | * | 141 | * |
147 | * @cgroup is going away. Mark it dead and decrement system_freezing_count | 142 | * @css is going away. Mark it dead and decrement system_freezing_count if |
148 | * if it was holding one. | 143 | * it was holding one. |
149 | */ | 144 | */ |
150 | static void freezer_css_offline(struct cgroup *cgroup) | 145 | static void freezer_css_offline(struct cgroup_subsys_state *css) |
151 | { | 146 | { |
152 | struct freezer *freezer = cgroup_freezer(cgroup); | 147 | struct freezer *freezer = css_freezer(css); |
153 | 148 | ||
154 | spin_lock_irq(&freezer->lock); | 149 | spin_lock_irq(&freezer->lock); |
155 | 150 | ||
@@ -161,9 +156,9 @@ static void freezer_css_offline(struct cgroup *cgroup) | |||
161 | spin_unlock_irq(&freezer->lock); | 156 | spin_unlock_irq(&freezer->lock); |
162 | } | 157 | } |
163 | 158 | ||
164 | static void freezer_css_free(struct cgroup *cgroup) | 159 | static void freezer_css_free(struct cgroup_subsys_state *css) |
165 | { | 160 | { |
166 | kfree(cgroup_freezer(cgroup)); | 161 | kfree(css_freezer(css)); |
167 | } | 162 | } |
168 | 163 | ||
169 | /* | 164 | /* |
@@ -175,25 +170,26 @@ static void freezer_css_free(struct cgroup *cgroup) | |||
175 | * @freezer->lock. freezer_attach() makes the new tasks conform to the | 170 | * @freezer->lock. freezer_attach() makes the new tasks conform to the |
176 | * current state and all following state changes can see the new tasks. | 171 | * current state and all following state changes can see the new tasks. |
177 | */ | 172 | */ |
178 | static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset) | 173 | static void freezer_attach(struct cgroup_subsys_state *new_css, |
174 | struct cgroup_taskset *tset) | ||
179 | { | 175 | { |
180 | struct freezer *freezer = cgroup_freezer(new_cgrp); | 176 | struct freezer *freezer = css_freezer(new_css); |
181 | struct task_struct *task; | 177 | struct task_struct *task; |
182 | bool clear_frozen = false; | 178 | bool clear_frozen = false; |
183 | 179 | ||
184 | spin_lock_irq(&freezer->lock); | 180 | spin_lock_irq(&freezer->lock); |
185 | 181 | ||
186 | /* | 182 | /* |
187 | * Make the new tasks conform to the current state of @new_cgrp. | 183 | * Make the new tasks conform to the current state of @new_css. |
188 | * For simplicity, when migrating any task to a FROZEN cgroup, we | 184 | * For simplicity, when migrating any task to a FROZEN cgroup, we |
189 | * revert it to FREEZING and let update_if_frozen() determine the | 185 | * revert it to FREEZING and let update_if_frozen() determine the |
190 | * correct state later. | 186 | * correct state later. |
191 | * | 187 | * |
192 | * Tasks in @tset are on @new_cgrp but may not conform to its | 188 | * Tasks in @tset are on @new_css but may not conform to its |
193 | * current state before executing the following - !frozen tasks may | 189 | * current state before executing the following - !frozen tasks may |
194 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. | 190 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. |
195 | */ | 191 | */ |
196 | cgroup_taskset_for_each(task, new_cgrp, tset) { | 192 | cgroup_taskset_for_each(task, new_css, tset) { |
197 | if (!(freezer->state & CGROUP_FREEZING)) { | 193 | if (!(freezer->state & CGROUP_FREEZING)) { |
198 | __thaw_task(task); | 194 | __thaw_task(task); |
199 | } else { | 195 | } else { |
@@ -231,7 +227,7 @@ static void freezer_fork(struct task_struct *task) | |||
231 | * The root cgroup is non-freezable, so we can skip the | 227 | * The root cgroup is non-freezable, so we can skip the |
232 | * following check. | 228 | * following check. |
233 | */ | 229 | */ |
234 | if (!freezer->css.cgroup->parent) | 230 | if (!parent_freezer(freezer)) |
235 | goto out; | 231 | goto out; |
236 | 232 | ||
237 | spin_lock_irq(&freezer->lock); | 233 | spin_lock_irq(&freezer->lock); |
@@ -244,7 +240,7 @@ out: | |||
244 | 240 | ||
245 | /** | 241 | /** |
246 | * update_if_frozen - update whether a cgroup finished freezing | 242 | * update_if_frozen - update whether a cgroup finished freezing |
247 | * @cgroup: cgroup of interest | 243 | * @css: css of interest |
248 | * | 244 | * |
249 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by | 245 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by |
250 | * calling this function. If the current state is FREEZING but not FROZEN, | 246 | * calling this function. If the current state is FREEZING but not FROZEN, |
@@ -255,14 +251,14 @@ out: | |||
255 | * update_if_frozen() on all descendants prior to invoking this function. | 251 | * update_if_frozen() on all descendants prior to invoking this function. |
256 | * | 252 | * |
257 | * Task states and freezer state might disagree while tasks are being | 253 | * Task states and freezer state might disagree while tasks are being |
258 | * migrated into or out of @cgroup, so we can't verify task states against | 254 | * migrated into or out of @css, so we can't verify task states against |
259 | * @freezer state here. See freezer_attach() for details. | 255 | * @freezer state here. See freezer_attach() for details. |
260 | */ | 256 | */ |
261 | static void update_if_frozen(struct cgroup *cgroup) | 257 | static void update_if_frozen(struct cgroup_subsys_state *css) |
262 | { | 258 | { |
263 | struct freezer *freezer = cgroup_freezer(cgroup); | 259 | struct freezer *freezer = css_freezer(css); |
264 | struct cgroup *pos; | 260 | struct cgroup_subsys_state *pos; |
265 | struct cgroup_iter it; | 261 | struct css_task_iter it; |
266 | struct task_struct *task; | 262 | struct task_struct *task; |
267 | 263 | ||
268 | WARN_ON_ONCE(!rcu_read_lock_held()); | 264 | WARN_ON_ONCE(!rcu_read_lock_held()); |
@@ -274,8 +270,8 @@ static void update_if_frozen(struct cgroup *cgroup) | |||
274 | goto out_unlock; | 270 | goto out_unlock; |
275 | 271 | ||
276 | /* are all (live) children frozen? */ | 272 | /* are all (live) children frozen? */ |
277 | cgroup_for_each_child(pos, cgroup) { | 273 | css_for_each_child(pos, css) { |
278 | struct freezer *child = cgroup_freezer(pos); | 274 | struct freezer *child = css_freezer(pos); |
279 | 275 | ||
280 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 276 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
281 | !(child->state & CGROUP_FROZEN)) | 277 | !(child->state & CGROUP_FROZEN)) |
@@ -283,9 +279,9 @@ static void update_if_frozen(struct cgroup *cgroup) | |||
283 | } | 279 | } |
284 | 280 | ||
285 | /* are all tasks frozen? */ | 281 | /* are all tasks frozen? */ |
286 | cgroup_iter_start(cgroup, &it); | 282 | css_task_iter_start(css, &it); |
287 | 283 | ||
288 | while ((task = cgroup_iter_next(cgroup, &it))) { | 284 | while ((task = css_task_iter_next(&it))) { |
289 | if (freezing(task)) { | 285 | if (freezing(task)) { |
290 | /* | 286 | /* |
291 | * freezer_should_skip() indicates that the task | 287 | * freezer_should_skip() indicates that the task |
@@ -300,52 +296,49 @@ static void update_if_frozen(struct cgroup *cgroup) | |||
300 | 296 | ||
301 | freezer->state |= CGROUP_FROZEN; | 297 | freezer->state |= CGROUP_FROZEN; |
302 | out_iter_end: | 298 | out_iter_end: |
303 | cgroup_iter_end(cgroup, &it); | 299 | css_task_iter_end(&it); |
304 | out_unlock: | 300 | out_unlock: |
305 | spin_unlock_irq(&freezer->lock); | 301 | spin_unlock_irq(&freezer->lock); |
306 | } | 302 | } |
307 | 303 | ||
308 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | 304 | static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft, |
309 | struct seq_file *m) | 305 | struct seq_file *m) |
310 | { | 306 | { |
311 | struct cgroup *pos; | 307 | struct cgroup_subsys_state *pos; |
312 | 308 | ||
313 | rcu_read_lock(); | 309 | rcu_read_lock(); |
314 | 310 | ||
315 | /* update states bottom-up */ | 311 | /* update states bottom-up */ |
316 | cgroup_for_each_descendant_post(pos, cgroup) | 312 | css_for_each_descendant_post(pos, css) |
317 | update_if_frozen(pos); | 313 | update_if_frozen(pos); |
318 | update_if_frozen(cgroup); | ||
319 | 314 | ||
320 | rcu_read_unlock(); | 315 | rcu_read_unlock(); |
321 | 316 | ||
322 | seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state)); | 317 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); |
323 | seq_putc(m, '\n'); | 318 | seq_putc(m, '\n'); |
324 | return 0; | 319 | return 0; |
325 | } | 320 | } |
326 | 321 | ||
327 | static void freeze_cgroup(struct freezer *freezer) | 322 | static void freeze_cgroup(struct freezer *freezer) |
328 | { | 323 | { |
329 | struct cgroup *cgroup = freezer->css.cgroup; | 324 | struct css_task_iter it; |
330 | struct cgroup_iter it; | ||
331 | struct task_struct *task; | 325 | struct task_struct *task; |
332 | 326 | ||
333 | cgroup_iter_start(cgroup, &it); | 327 | css_task_iter_start(&freezer->css, &it); |
334 | while ((task = cgroup_iter_next(cgroup, &it))) | 328 | while ((task = css_task_iter_next(&it))) |
335 | freeze_task(task); | 329 | freeze_task(task); |
336 | cgroup_iter_end(cgroup, &it); | 330 | css_task_iter_end(&it); |
337 | } | 331 | } |
338 | 332 | ||
339 | static void unfreeze_cgroup(struct freezer *freezer) | 333 | static void unfreeze_cgroup(struct freezer *freezer) |
340 | { | 334 | { |
341 | struct cgroup *cgroup = freezer->css.cgroup; | 335 | struct css_task_iter it; |
342 | struct cgroup_iter it; | ||
343 | struct task_struct *task; | 336 | struct task_struct *task; |
344 | 337 | ||
345 | cgroup_iter_start(cgroup, &it); | 338 | css_task_iter_start(&freezer->css, &it); |
346 | while ((task = cgroup_iter_next(cgroup, &it))) | 339 | while ((task = css_task_iter_next(&it))) |
347 | __thaw_task(task); | 340 | __thaw_task(task); |
348 | cgroup_iter_end(cgroup, &it); | 341 | css_task_iter_end(&it); |
349 | } | 342 | } |
350 | 343 | ||
351 | /** | 344 | /** |
@@ -395,12 +388,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
395 | */ | 388 | */ |
396 | static void freezer_change_state(struct freezer *freezer, bool freeze) | 389 | static void freezer_change_state(struct freezer *freezer, bool freeze) |
397 | { | 390 | { |
398 | struct cgroup *pos; | 391 | struct cgroup_subsys_state *pos; |
399 | |||
400 | /* update @freezer */ | ||
401 | spin_lock_irq(&freezer->lock); | ||
402 | freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); | ||
403 | spin_unlock_irq(&freezer->lock); | ||
404 | 392 | ||
405 | /* | 393 | /* |
406 | * Update all its descendants in pre-order traversal. Each | 394 | * Update all its descendants in pre-order traversal. Each |
@@ -408,24 +396,33 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) | |||
408 | * CGROUP_FREEZING_PARENT. | 396 | * CGROUP_FREEZING_PARENT. |
409 | */ | 397 | */ |
410 | rcu_read_lock(); | 398 | rcu_read_lock(); |
411 | cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) { | 399 | css_for_each_descendant_pre(pos, &freezer->css) { |
412 | struct freezer *pos_f = cgroup_freezer(pos); | 400 | struct freezer *pos_f = css_freezer(pos); |
413 | struct freezer *parent = parent_freezer(pos_f); | 401 | struct freezer *parent = parent_freezer(pos_f); |
414 | 402 | ||
415 | /* | ||
416 | * Our update to @parent->state is already visible which is | ||
417 | * all we need. No need to lock @parent. For more info on | ||
418 | * synchronization, see freezer_post_create(). | ||
419 | */ | ||
420 | spin_lock_irq(&pos_f->lock); | 403 | spin_lock_irq(&pos_f->lock); |
421 | freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, | 404 | |
422 | CGROUP_FREEZING_PARENT); | 405 | if (pos_f == freezer) { |
406 | freezer_apply_state(pos_f, freeze, | ||
407 | CGROUP_FREEZING_SELF); | ||
408 | } else { | ||
409 | /* | ||
410 | * Our update to @parent->state is already visible | ||
411 | * which is all we need. No need to lock @parent. | ||
412 | * For more info on synchronization, see | ||
413 | * freezer_post_create(). | ||
414 | */ | ||
415 | freezer_apply_state(pos_f, | ||
416 | parent->state & CGROUP_FREEZING, | ||
417 | CGROUP_FREEZING_PARENT); | ||
418 | } | ||
419 | |||
423 | spin_unlock_irq(&pos_f->lock); | 420 | spin_unlock_irq(&pos_f->lock); |
424 | } | 421 | } |
425 | rcu_read_unlock(); | 422 | rcu_read_unlock(); |
426 | } | 423 | } |
427 | 424 | ||
428 | static int freezer_write(struct cgroup *cgroup, struct cftype *cft, | 425 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, |
429 | const char *buffer) | 426 | const char *buffer) |
430 | { | 427 | { |
431 | bool freeze; | 428 | bool freeze; |
@@ -437,20 +434,22 @@ static int freezer_write(struct cgroup *cgroup, struct cftype *cft, | |||
437 | else | 434 | else |
438 | return -EINVAL; | 435 | return -EINVAL; |
439 | 436 | ||
440 | freezer_change_state(cgroup_freezer(cgroup), freeze); | 437 | freezer_change_state(css_freezer(css), freeze); |
441 | return 0; | 438 | return 0; |
442 | } | 439 | } |
443 | 440 | ||
444 | static u64 freezer_self_freezing_read(struct cgroup *cgroup, struct cftype *cft) | 441 | static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, |
442 | struct cftype *cft) | ||
445 | { | 443 | { |
446 | struct freezer *freezer = cgroup_freezer(cgroup); | 444 | struct freezer *freezer = css_freezer(css); |
447 | 445 | ||
448 | return (bool)(freezer->state & CGROUP_FREEZING_SELF); | 446 | return (bool)(freezer->state & CGROUP_FREEZING_SELF); |
449 | } | 447 | } |
450 | 448 | ||
451 | static u64 freezer_parent_freezing_read(struct cgroup *cgroup, struct cftype *cft) | 449 | static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, |
450 | struct cftype *cft) | ||
452 | { | 451 | { |
453 | struct freezer *freezer = cgroup_freezer(cgroup); | 452 | struct freezer *freezer = css_freezer(css); |
454 | 453 | ||
455 | return (bool)(freezer->state & CGROUP_FREEZING_PARENT); | 454 | return (bool)(freezer->state & CGROUP_FREEZING_PARENT); |
456 | } | 455 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ea1966db34f2..6bf981e13c43 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -68,10 +68,6 @@ | |||
68 | */ | 68 | */ |
69 | int number_of_cpusets __read_mostly; | 69 | int number_of_cpusets __read_mostly; |
70 | 70 | ||
71 | /* Forward declare cgroup structures */ | ||
72 | struct cgroup_subsys cpuset_subsys; | ||
73 | struct cpuset; | ||
74 | |||
75 | /* See "Frequency meter" comments, below. */ | 71 | /* See "Frequency meter" comments, below. */ |
76 | 72 | ||
77 | struct fmeter { | 73 | struct fmeter { |
@@ -115,27 +111,20 @@ struct cpuset { | |||
115 | int relax_domain_level; | 111 | int relax_domain_level; |
116 | }; | 112 | }; |
117 | 113 | ||
118 | /* Retrieve the cpuset for a cgroup */ | 114 | static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) |
119 | static inline struct cpuset *cgroup_cs(struct cgroup *cgrp) | ||
120 | { | 115 | { |
121 | return container_of(cgroup_subsys_state(cgrp, cpuset_subsys_id), | 116 | return css ? container_of(css, struct cpuset, css) : NULL; |
122 | struct cpuset, css); | ||
123 | } | 117 | } |
124 | 118 | ||
125 | /* Retrieve the cpuset for a task */ | 119 | /* Retrieve the cpuset for a task */ |
126 | static inline struct cpuset *task_cs(struct task_struct *task) | 120 | static inline struct cpuset *task_cs(struct task_struct *task) |
127 | { | 121 | { |
128 | return container_of(task_subsys_state(task, cpuset_subsys_id), | 122 | return css_cs(task_css(task, cpuset_subsys_id)); |
129 | struct cpuset, css); | ||
130 | } | 123 | } |
131 | 124 | ||
132 | static inline struct cpuset *parent_cs(const struct cpuset *cs) | 125 | static inline struct cpuset *parent_cs(struct cpuset *cs) |
133 | { | 126 | { |
134 | struct cgroup *pcgrp = cs->css.cgroup->parent; | 127 | return css_cs(css_parent(&cs->css)); |
135 | |||
136 | if (pcgrp) | ||
137 | return cgroup_cs(pcgrp); | ||
138 | return NULL; | ||
139 | } | 128 | } |
140 | 129 | ||
141 | #ifdef CONFIG_NUMA | 130 | #ifdef CONFIG_NUMA |
@@ -212,29 +201,30 @@ static struct cpuset top_cpuset = { | |||
212 | /** | 201 | /** |
213 | * cpuset_for_each_child - traverse online children of a cpuset | 202 | * cpuset_for_each_child - traverse online children of a cpuset |
214 | * @child_cs: loop cursor pointing to the current child | 203 | * @child_cs: loop cursor pointing to the current child |
215 | * @pos_cgrp: used for iteration | 204 | * @pos_css: used for iteration |
216 | * @parent_cs: target cpuset to walk children of | 205 | * @parent_cs: target cpuset to walk children of |
217 | * | 206 | * |
218 | * Walk @child_cs through the online children of @parent_cs. Must be used | 207 | * Walk @child_cs through the online children of @parent_cs. Must be used |
219 | * with RCU read locked. | 208 | * with RCU read locked. |
220 | */ | 209 | */ |
221 | #define cpuset_for_each_child(child_cs, pos_cgrp, parent_cs) \ | 210 | #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \ |
222 | cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \ | 211 | css_for_each_child((pos_css), &(parent_cs)->css) \ |
223 | if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp))))) | 212 | if (is_cpuset_online(((child_cs) = css_cs((pos_css))))) |
224 | 213 | ||
225 | /** | 214 | /** |
226 | * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants | 215 | * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants |
227 | * @des_cs: loop cursor pointing to the current descendant | 216 | * @des_cs: loop cursor pointing to the current descendant |
228 | * @pos_cgrp: used for iteration | 217 | * @pos_css: used for iteration |
229 | * @root_cs: target cpuset to walk ancestor of | 218 | * @root_cs: target cpuset to walk ancestor of |
230 | * | 219 | * |
231 | * Walk @des_cs through the online descendants of @root_cs. Must be used | 220 | * Walk @des_cs through the online descendants of @root_cs. Must be used |
232 | * with RCU read locked. The caller may modify @pos_cgrp by calling | 221 | * with RCU read locked. The caller may modify @pos_css by calling |
233 | * cgroup_rightmost_descendant() to skip subtree. | 222 | * css_rightmost_descendant() to skip subtree. @root_cs is included in the |
223 | * iteration and the first node to be visited. | ||
234 | */ | 224 | */ |
235 | #define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs) \ | 225 | #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \ |
236 | cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \ | 226 | css_for_each_descendant_pre((pos_css), &(root_cs)->css) \ |
237 | if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp))))) | 227 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) |
238 | 228 | ||
239 | /* | 229 | /* |
240 | * There are two global mutexes guarding cpuset structures - cpuset_mutex | 230 | * There are two global mutexes guarding cpuset structures - cpuset_mutex |
@@ -320,8 +310,7 @@ static struct file_system_type cpuset_fs_type = { | |||
320 | * | 310 | * |
321 | * Call with callback_mutex held. | 311 | * Call with callback_mutex held. |
322 | */ | 312 | */ |
323 | static void guarantee_online_cpus(const struct cpuset *cs, | 313 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) |
324 | struct cpumask *pmask) | ||
325 | { | 314 | { |
326 | while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) | 315 | while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) |
327 | cs = parent_cs(cs); | 316 | cs = parent_cs(cs); |
@@ -339,7 +328,7 @@ static void guarantee_online_cpus(const struct cpuset *cs, | |||
339 | * | 328 | * |
340 | * Call with callback_mutex held. | 329 | * Call with callback_mutex held. |
341 | */ | 330 | */ |
342 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | 331 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) |
343 | { | 332 | { |
344 | while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) | 333 | while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) |
345 | cs = parent_cs(cs); | 334 | cs = parent_cs(cs); |
@@ -384,7 +373,7 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | |||
384 | * alloc_trial_cpuset - allocate a trial cpuset | 373 | * alloc_trial_cpuset - allocate a trial cpuset |
385 | * @cs: the cpuset that the trial cpuset duplicates | 374 | * @cs: the cpuset that the trial cpuset duplicates |
386 | */ | 375 | */ |
387 | static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs) | 376 | static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) |
388 | { | 377 | { |
389 | struct cpuset *trial; | 378 | struct cpuset *trial; |
390 | 379 | ||
@@ -431,9 +420,9 @@ static void free_trial_cpuset(struct cpuset *trial) | |||
431 | * Return 0 if valid, -errno if not. | 420 | * Return 0 if valid, -errno if not. |
432 | */ | 421 | */ |
433 | 422 | ||
434 | static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | 423 | static int validate_change(struct cpuset *cur, struct cpuset *trial) |
435 | { | 424 | { |
436 | struct cgroup *cgrp; | 425 | struct cgroup_subsys_state *css; |
437 | struct cpuset *c, *par; | 426 | struct cpuset *c, *par; |
438 | int ret; | 427 | int ret; |
439 | 428 | ||
@@ -441,7 +430,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
441 | 430 | ||
442 | /* Each of our child cpusets must be a subset of us */ | 431 | /* Each of our child cpusets must be a subset of us */ |
443 | ret = -EBUSY; | 432 | ret = -EBUSY; |
444 | cpuset_for_each_child(c, cgrp, cur) | 433 | cpuset_for_each_child(c, css, cur) |
445 | if (!is_cpuset_subset(c, trial)) | 434 | if (!is_cpuset_subset(c, trial)) |
446 | goto out; | 435 | goto out; |
447 | 436 | ||
@@ -462,7 +451,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
462 | * overlap | 451 | * overlap |
463 | */ | 452 | */ |
464 | ret = -EINVAL; | 453 | ret = -EINVAL; |
465 | cpuset_for_each_child(c, cgrp, par) { | 454 | cpuset_for_each_child(c, css, par) { |
466 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && | 455 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && |
467 | c != cur && | 456 | c != cur && |
468 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) | 457 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) |
@@ -515,13 +504,16 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, | |||
515 | struct cpuset *root_cs) | 504 | struct cpuset *root_cs) |
516 | { | 505 | { |
517 | struct cpuset *cp; | 506 | struct cpuset *cp; |
518 | struct cgroup *pos_cgrp; | 507 | struct cgroup_subsys_state *pos_css; |
519 | 508 | ||
520 | rcu_read_lock(); | 509 | rcu_read_lock(); |
521 | cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { | 510 | cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { |
511 | if (cp == root_cs) | ||
512 | continue; | ||
513 | |||
522 | /* skip the whole subtree if @cp doesn't have any CPU */ | 514 | /* skip the whole subtree if @cp doesn't have any CPU */ |
523 | if (cpumask_empty(cp->cpus_allowed)) { | 515 | if (cpumask_empty(cp->cpus_allowed)) { |
524 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | 516 | pos_css = css_rightmost_descendant(pos_css); |
525 | continue; | 517 | continue; |
526 | } | 518 | } |
527 | 519 | ||
@@ -596,7 +588,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
596 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 588 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
597 | int ndoms = 0; /* number of sched domains in result */ | 589 | int ndoms = 0; /* number of sched domains in result */ |
598 | int nslot; /* next empty doms[] struct cpumask slot */ | 590 | int nslot; /* next empty doms[] struct cpumask slot */ |
599 | struct cgroup *pos_cgrp; | 591 | struct cgroup_subsys_state *pos_css; |
600 | 592 | ||
601 | doms = NULL; | 593 | doms = NULL; |
602 | dattr = NULL; | 594 | dattr = NULL; |
@@ -625,7 +617,9 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
625 | csn = 0; | 617 | csn = 0; |
626 | 618 | ||
627 | rcu_read_lock(); | 619 | rcu_read_lock(); |
628 | cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) { | 620 | cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { |
621 | if (cp == &top_cpuset) | ||
622 | continue; | ||
629 | /* | 623 | /* |
630 | * Continue traversing beyond @cp iff @cp has some CPUs and | 624 | * Continue traversing beyond @cp iff @cp has some CPUs and |
631 | * isn't load balancing. The former is obvious. The | 625 | * isn't load balancing. The former is obvious. The |
@@ -642,7 +636,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
642 | csa[csn++] = cp; | 636 | csa[csn++] = cp; |
643 | 637 | ||
644 | /* skip @cp's subtree */ | 638 | /* skip @cp's subtree */ |
645 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | 639 | pos_css = css_rightmost_descendant(pos_css); |
646 | } | 640 | } |
647 | rcu_read_unlock(); | 641 | rcu_read_unlock(); |
648 | 642 | ||
@@ -837,52 +831,45 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) | |||
837 | /** | 831 | /** |
838 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's | 832 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's |
839 | * @tsk: task to test | 833 | * @tsk: task to test |
840 | * @scan: struct cgroup_scanner containing the cgroup of the task | 834 | * @data: cpuset to @tsk belongs to |
841 | * | 835 | * |
842 | * Called by cgroup_scan_tasks() for each task in a cgroup whose | 836 | * Called by css_scan_tasks() for each task in a cgroup whose cpus_allowed |
843 | * cpus_allowed mask needs to be changed. | 837 | * mask needs to be changed. |
844 | * | 838 | * |
845 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 839 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
846 | * holding cpuset_mutex at this point. | 840 | * holding cpuset_mutex at this point. |
847 | */ | 841 | */ |
848 | static void cpuset_change_cpumask(struct task_struct *tsk, | 842 | static void cpuset_change_cpumask(struct task_struct *tsk, void *data) |
849 | struct cgroup_scanner *scan) | ||
850 | { | 843 | { |
851 | struct cpuset *cpus_cs; | 844 | struct cpuset *cs = data; |
845 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); | ||
852 | 846 | ||
853 | cpus_cs = effective_cpumask_cpuset(cgroup_cs(scan->cg)); | ||
854 | set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); | 847 | set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); |
855 | } | 848 | } |
856 | 849 | ||
857 | /** | 850 | /** |
858 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. | 851 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. |
859 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | 852 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed |
860 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 853 | * @heap: if NULL, defer allocating heap memory to css_scan_tasks() |
861 | * | 854 | * |
862 | * Called with cpuset_mutex held | 855 | * Called with cpuset_mutex held |
863 | * | 856 | * |
864 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | 857 | * The css_scan_tasks() function will scan all the tasks in a cgroup, |
865 | * calling callback functions for each. | 858 | * calling callback functions for each. |
866 | * | 859 | * |
867 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 | 860 | * No return value. It's guaranteed that css_scan_tasks() always returns 0 |
868 | * if @heap != NULL. | 861 | * if @heap != NULL. |
869 | */ | 862 | */ |
870 | static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) | 863 | static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) |
871 | { | 864 | { |
872 | struct cgroup_scanner scan; | 865 | css_scan_tasks(&cs->css, NULL, cpuset_change_cpumask, cs, heap); |
873 | |||
874 | scan.cg = cs->css.cgroup; | ||
875 | scan.test_task = NULL; | ||
876 | scan.process_task = cpuset_change_cpumask; | ||
877 | scan.heap = heap; | ||
878 | cgroup_scan_tasks(&scan); | ||
879 | } | 866 | } |
880 | 867 | ||
881 | /* | 868 | /* |
882 | * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. | 869 | * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. |
883 | * @root_cs: the root cpuset of the hierarchy | 870 | * @root_cs: the root cpuset of the hierarchy |
884 | * @update_root: update root cpuset or not? | 871 | * @update_root: update root cpuset or not? |
885 | * @heap: the heap used by cgroup_scan_tasks() | 872 | * @heap: the heap used by css_scan_tasks() |
886 | * | 873 | * |
887 | * This will update cpumasks of tasks in @root_cs and all other empty cpusets | 874 | * This will update cpumasks of tasks in @root_cs and all other empty cpusets |
888 | * which take on cpumask of @root_cs. | 875 | * which take on cpumask of @root_cs. |
@@ -893,17 +880,19 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, | |||
893 | bool update_root, struct ptr_heap *heap) | 880 | bool update_root, struct ptr_heap *heap) |
894 | { | 881 | { |
895 | struct cpuset *cp; | 882 | struct cpuset *cp; |
896 | struct cgroup *pos_cgrp; | 883 | struct cgroup_subsys_state *pos_css; |
897 | |||
898 | if (update_root) | ||
899 | update_tasks_cpumask(root_cs, heap); | ||
900 | 884 | ||
901 | rcu_read_lock(); | 885 | rcu_read_lock(); |
902 | cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { | 886 | cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { |
903 | /* skip the whole subtree if @cp have some CPU */ | 887 | if (cp == root_cs) { |
904 | if (!cpumask_empty(cp->cpus_allowed)) { | 888 | if (!update_root) |
905 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | 889 | continue; |
906 | continue; | 890 | } else { |
891 | /* skip the whole subtree if @cp have some CPU */ | ||
892 | if (!cpumask_empty(cp->cpus_allowed)) { | ||
893 | pos_css = css_rightmost_descendant(pos_css); | ||
894 | continue; | ||
895 | } | ||
907 | } | 896 | } |
908 | if (!css_tryget(&cp->css)) | 897 | if (!css_tryget(&cp->css)) |
909 | continue; | 898 | continue; |
@@ -1059,20 +1048,24 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1059 | task_unlock(tsk); | 1048 | task_unlock(tsk); |
1060 | } | 1049 | } |
1061 | 1050 | ||
1051 | struct cpuset_change_nodemask_arg { | ||
1052 | struct cpuset *cs; | ||
1053 | nodemask_t *newmems; | ||
1054 | }; | ||
1055 | |||
1062 | /* | 1056 | /* |
1063 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy | 1057 | * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy |
1064 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if | 1058 | * of it to cpuset's new mems_allowed, and migrate pages to new nodes if |
1065 | * memory_migrate flag is set. Called with cpuset_mutex held. | 1059 | * memory_migrate flag is set. Called with cpuset_mutex held. |
1066 | */ | 1060 | */ |
1067 | static void cpuset_change_nodemask(struct task_struct *p, | 1061 | static void cpuset_change_nodemask(struct task_struct *p, void *data) |
1068 | struct cgroup_scanner *scan) | ||
1069 | { | 1062 | { |
1070 | struct cpuset *cs = cgroup_cs(scan->cg); | 1063 | struct cpuset_change_nodemask_arg *arg = data; |
1064 | struct cpuset *cs = arg->cs; | ||
1071 | struct mm_struct *mm; | 1065 | struct mm_struct *mm; |
1072 | int migrate; | 1066 | int migrate; |
1073 | nodemask_t *newmems = scan->data; | ||
1074 | 1067 | ||
1075 | cpuset_change_task_nodemask(p, newmems); | 1068 | cpuset_change_task_nodemask(p, arg->newmems); |
1076 | 1069 | ||
1077 | mm = get_task_mm(p); | 1070 | mm = get_task_mm(p); |
1078 | if (!mm) | 1071 | if (!mm) |
@@ -1082,7 +1075,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1082 | 1075 | ||
1083 | mpol_rebind_mm(mm, &cs->mems_allowed); | 1076 | mpol_rebind_mm(mm, &cs->mems_allowed); |
1084 | if (migrate) | 1077 | if (migrate) |
1085 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems); | 1078 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems); |
1086 | mmput(mm); | 1079 | mmput(mm); |
1087 | } | 1080 | } |
1088 | 1081 | ||
@@ -1091,28 +1084,22 @@ static void *cpuset_being_rebound; | |||
1091 | /** | 1084 | /** |
1092 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. | 1085 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. |
1093 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed | 1086 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed |
1094 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 1087 | * @heap: if NULL, defer allocating heap memory to css_scan_tasks() |
1095 | * | 1088 | * |
1096 | * Called with cpuset_mutex held | 1089 | * Called with cpuset_mutex held. No return value. It's guaranteed that |
1097 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 | 1090 | * css_scan_tasks() always returns 0 if @heap != NULL. |
1098 | * if @heap != NULL. | ||
1099 | */ | 1091 | */ |
1100 | static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) | 1092 | static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) |
1101 | { | 1093 | { |
1102 | static nodemask_t newmems; /* protected by cpuset_mutex */ | 1094 | static nodemask_t newmems; /* protected by cpuset_mutex */ |
1103 | struct cgroup_scanner scan; | ||
1104 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); | 1095 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); |
1096 | struct cpuset_change_nodemask_arg arg = { .cs = cs, | ||
1097 | .newmems = &newmems }; | ||
1105 | 1098 | ||
1106 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1099 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1107 | 1100 | ||
1108 | guarantee_online_mems(mems_cs, &newmems); | 1101 | guarantee_online_mems(mems_cs, &newmems); |
1109 | 1102 | ||
1110 | scan.cg = cs->css.cgroup; | ||
1111 | scan.test_task = NULL; | ||
1112 | scan.process_task = cpuset_change_nodemask; | ||
1113 | scan.heap = heap; | ||
1114 | scan.data = &newmems; | ||
1115 | |||
1116 | /* | 1103 | /* |
1117 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't | 1104 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't |
1118 | * take while holding tasklist_lock. Forks can happen - the | 1105 | * take while holding tasklist_lock. Forks can happen - the |
@@ -1123,7 +1110,7 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) | |||
1123 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() | 1110 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
1124 | * is idempotent. Also migrate pages in each mm to new nodes. | 1111 | * is idempotent. Also migrate pages in each mm to new nodes. |
1125 | */ | 1112 | */ |
1126 | cgroup_scan_tasks(&scan); | 1113 | css_scan_tasks(&cs->css, NULL, cpuset_change_nodemask, &arg, heap); |
1127 | 1114 | ||
1128 | /* | 1115 | /* |
1129 | * All the tasks' nodemasks have been updated, update | 1116 | * All the tasks' nodemasks have been updated, update |
@@ -1139,7 +1126,7 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) | |||
1139 | * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. | 1126 | * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. |
1140 | * @cs: the root cpuset of the hierarchy | 1127 | * @cs: the root cpuset of the hierarchy |
1141 | * @update_root: update the root cpuset or not? | 1128 | * @update_root: update the root cpuset or not? |
1142 | * @heap: the heap used by cgroup_scan_tasks() | 1129 | * @heap: the heap used by css_scan_tasks() |
1143 | * | 1130 | * |
1144 | * This will update nodemasks of tasks in @root_cs and all other empty cpusets | 1131 | * This will update nodemasks of tasks in @root_cs and all other empty cpusets |
1145 | * which take on nodemask of @root_cs. | 1132 | * which take on nodemask of @root_cs. |
@@ -1150,17 +1137,19 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, | |||
1150 | bool update_root, struct ptr_heap *heap) | 1137 | bool update_root, struct ptr_heap *heap) |
1151 | { | 1138 | { |
1152 | struct cpuset *cp; | 1139 | struct cpuset *cp; |
1153 | struct cgroup *pos_cgrp; | 1140 | struct cgroup_subsys_state *pos_css; |
1154 | |||
1155 | if (update_root) | ||
1156 | update_tasks_nodemask(root_cs, heap); | ||
1157 | 1141 | ||
1158 | rcu_read_lock(); | 1142 | rcu_read_lock(); |
1159 | cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) { | 1143 | cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { |
1160 | /* skip the whole subtree if @cp have some CPU */ | 1144 | if (cp == root_cs) { |
1161 | if (!nodes_empty(cp->mems_allowed)) { | 1145 | if (!update_root) |
1162 | pos_cgrp = cgroup_rightmost_descendant(pos_cgrp); | 1146 | continue; |
1163 | continue; | 1147 | } else { |
1148 | /* skip the whole subtree if @cp have some CPU */ | ||
1149 | if (!nodes_empty(cp->mems_allowed)) { | ||
1150 | pos_css = css_rightmost_descendant(pos_css); | ||
1151 | continue; | ||
1152 | } | ||
1164 | } | 1153 | } |
1165 | if (!css_tryget(&cp->css)) | 1154 | if (!css_tryget(&cp->css)) |
1166 | continue; | 1155 | continue; |
@@ -1267,44 +1256,39 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1267 | return 0; | 1256 | return 0; |
1268 | } | 1257 | } |
1269 | 1258 | ||
1270 | /* | 1259 | /** |
1271 | * cpuset_change_flag - make a task's spread flags the same as its cpuset's | 1260 | * cpuset_change_flag - make a task's spread flags the same as its cpuset's |
1272 | * @tsk: task to be updated | 1261 | * @tsk: task to be updated |
1273 | * @scan: struct cgroup_scanner containing the cgroup of the task | 1262 | * @data: cpuset to @tsk belongs to |
1274 | * | 1263 | * |
1275 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1264 | * Called by css_scan_tasks() for each task in a cgroup. |
1276 | * | 1265 | * |
1277 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 1266 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
1278 | * holding cpuset_mutex at this point. | 1267 | * holding cpuset_mutex at this point. |
1279 | */ | 1268 | */ |
1280 | static void cpuset_change_flag(struct task_struct *tsk, | 1269 | static void cpuset_change_flag(struct task_struct *tsk, void *data) |
1281 | struct cgroup_scanner *scan) | ||
1282 | { | 1270 | { |
1283 | cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk); | 1271 | struct cpuset *cs = data; |
1272 | |||
1273 | cpuset_update_task_spread_flag(cs, tsk); | ||
1284 | } | 1274 | } |
1285 | 1275 | ||
1286 | /* | 1276 | /** |
1287 | * update_tasks_flags - update the spread flags of tasks in the cpuset. | 1277 | * update_tasks_flags - update the spread flags of tasks in the cpuset. |
1288 | * @cs: the cpuset in which each task's spread flags needs to be changed | 1278 | * @cs: the cpuset in which each task's spread flags needs to be changed |
1289 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 1279 | * @heap: if NULL, defer allocating heap memory to css_scan_tasks() |
1290 | * | 1280 | * |
1291 | * Called with cpuset_mutex held | 1281 | * Called with cpuset_mutex held |
1292 | * | 1282 | * |
1293 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | 1283 | * The css_scan_tasks() function will scan all the tasks in a cgroup, |
1294 | * calling callback functions for each. | 1284 | * calling callback functions for each. |
1295 | * | 1285 | * |
1296 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 | 1286 | * No return value. It's guaranteed that css_scan_tasks() always returns 0 |
1297 | * if @heap != NULL. | 1287 | * if @heap != NULL. |
1298 | */ | 1288 | */ |
1299 | static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) | 1289 | static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) |
1300 | { | 1290 | { |
1301 | struct cgroup_scanner scan; | 1291 | css_scan_tasks(&cs->css, NULL, cpuset_change_flag, cs, heap); |
1302 | |||
1303 | scan.cg = cs->css.cgroup; | ||
1304 | scan.test_task = NULL; | ||
1305 | scan.process_task = cpuset_change_flag; | ||
1306 | scan.heap = heap; | ||
1307 | cgroup_scan_tasks(&scan); | ||
1308 | } | 1292 | } |
1309 | 1293 | ||
1310 | /* | 1294 | /* |
@@ -1462,9 +1446,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1462 | } | 1446 | } |
1463 | 1447 | ||
1464 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ | 1448 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ |
1465 | static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 1449 | static int cpuset_can_attach(struct cgroup_subsys_state *css, |
1450 | struct cgroup_taskset *tset) | ||
1466 | { | 1451 | { |
1467 | struct cpuset *cs = cgroup_cs(cgrp); | 1452 | struct cpuset *cs = css_cs(css); |
1468 | struct task_struct *task; | 1453 | struct task_struct *task; |
1469 | int ret; | 1454 | int ret; |
1470 | 1455 | ||
@@ -1475,11 +1460,11 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1475 | * flag is set. | 1460 | * flag is set. |
1476 | */ | 1461 | */ |
1477 | ret = -ENOSPC; | 1462 | ret = -ENOSPC; |
1478 | if (!cgroup_sane_behavior(cgrp) && | 1463 | if (!cgroup_sane_behavior(css->cgroup) && |
1479 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) | 1464 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) |
1480 | goto out_unlock; | 1465 | goto out_unlock; |
1481 | 1466 | ||
1482 | cgroup_taskset_for_each(task, cgrp, tset) { | 1467 | cgroup_taskset_for_each(task, css, tset) { |
1483 | /* | 1468 | /* |
1484 | * Kthreads which disallow setaffinity shouldn't be moved | 1469 | * Kthreads which disallow setaffinity shouldn't be moved |
1485 | * to a new cpuset; we don't want to change their cpu | 1470 | * to a new cpuset; we don't want to change their cpu |
@@ -1508,11 +1493,11 @@ out_unlock: | |||
1508 | return ret; | 1493 | return ret; |
1509 | } | 1494 | } |
1510 | 1495 | ||
1511 | static void cpuset_cancel_attach(struct cgroup *cgrp, | 1496 | static void cpuset_cancel_attach(struct cgroup_subsys_state *css, |
1512 | struct cgroup_taskset *tset) | 1497 | struct cgroup_taskset *tset) |
1513 | { | 1498 | { |
1514 | mutex_lock(&cpuset_mutex); | 1499 | mutex_lock(&cpuset_mutex); |
1515 | cgroup_cs(cgrp)->attach_in_progress--; | 1500 | css_cs(css)->attach_in_progress--; |
1516 | mutex_unlock(&cpuset_mutex); | 1501 | mutex_unlock(&cpuset_mutex); |
1517 | } | 1502 | } |
1518 | 1503 | ||
@@ -1523,16 +1508,18 @@ static void cpuset_cancel_attach(struct cgroup *cgrp, | |||
1523 | */ | 1508 | */ |
1524 | static cpumask_var_t cpus_attach; | 1509 | static cpumask_var_t cpus_attach; |
1525 | 1510 | ||
1526 | static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 1511 | static void cpuset_attach(struct cgroup_subsys_state *css, |
1512 | struct cgroup_taskset *tset) | ||
1527 | { | 1513 | { |
1528 | /* static buf protected by cpuset_mutex */ | 1514 | /* static buf protected by cpuset_mutex */ |
1529 | static nodemask_t cpuset_attach_nodemask_to; | 1515 | static nodemask_t cpuset_attach_nodemask_to; |
1530 | struct mm_struct *mm; | 1516 | struct mm_struct *mm; |
1531 | struct task_struct *task; | 1517 | struct task_struct *task; |
1532 | struct task_struct *leader = cgroup_taskset_first(tset); | 1518 | struct task_struct *leader = cgroup_taskset_first(tset); |
1533 | struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset); | 1519 | struct cgroup_subsys_state *oldcss = cgroup_taskset_cur_css(tset, |
1534 | struct cpuset *cs = cgroup_cs(cgrp); | 1520 | cpuset_subsys_id); |
1535 | struct cpuset *oldcs = cgroup_cs(oldcgrp); | 1521 | struct cpuset *cs = css_cs(css); |
1522 | struct cpuset *oldcs = css_cs(oldcss); | ||
1536 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); | 1523 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); |
1537 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); | 1524 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); |
1538 | 1525 | ||
@@ -1546,7 +1533,7 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1546 | 1533 | ||
1547 | guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); | 1534 | guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); |
1548 | 1535 | ||
1549 | cgroup_taskset_for_each(task, cgrp, tset) { | 1536 | cgroup_taskset_for_each(task, css, tset) { |
1550 | /* | 1537 | /* |
1551 | * can_attach beforehand should guarantee that this doesn't | 1538 | * can_attach beforehand should guarantee that this doesn't |
1552 | * fail. TODO: have a better way to handle failure here | 1539 | * fail. TODO: have a better way to handle failure here |
@@ -1608,9 +1595,10 @@ typedef enum { | |||
1608 | FILE_SPREAD_SLAB, | 1595 | FILE_SPREAD_SLAB, |
1609 | } cpuset_filetype_t; | 1596 | } cpuset_filetype_t; |
1610 | 1597 | ||
1611 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1598 | static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, |
1599 | u64 val) | ||
1612 | { | 1600 | { |
1613 | struct cpuset *cs = cgroup_cs(cgrp); | 1601 | struct cpuset *cs = css_cs(css); |
1614 | cpuset_filetype_t type = cft->private; | 1602 | cpuset_filetype_t type = cft->private; |
1615 | int retval = 0; | 1603 | int retval = 0; |
1616 | 1604 | ||
@@ -1657,9 +1645,10 @@ out_unlock: | |||
1657 | return retval; | 1645 | return retval; |
1658 | } | 1646 | } |
1659 | 1647 | ||
1660 | static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | 1648 | static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, |
1649 | s64 val) | ||
1661 | { | 1650 | { |
1662 | struct cpuset *cs = cgroup_cs(cgrp); | 1651 | struct cpuset *cs = css_cs(css); |
1663 | cpuset_filetype_t type = cft->private; | 1652 | cpuset_filetype_t type = cft->private; |
1664 | int retval = -ENODEV; | 1653 | int retval = -ENODEV; |
1665 | 1654 | ||
@@ -1683,10 +1672,10 @@ out_unlock: | |||
1683 | /* | 1672 | /* |
1684 | * Common handling for a write to a "cpus" or "mems" file. | 1673 | * Common handling for a write to a "cpus" or "mems" file. |
1685 | */ | 1674 | */ |
1686 | static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | 1675 | static int cpuset_write_resmask(struct cgroup_subsys_state *css, |
1687 | const char *buf) | 1676 | struct cftype *cft, const char *buf) |
1688 | { | 1677 | { |
1689 | struct cpuset *cs = cgroup_cs(cgrp); | 1678 | struct cpuset *cs = css_cs(css); |
1690 | struct cpuset *trialcs; | 1679 | struct cpuset *trialcs; |
1691 | int retval = -ENODEV; | 1680 | int retval = -ENODEV; |
1692 | 1681 | ||
@@ -1765,13 +1754,12 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) | |||
1765 | return count; | 1754 | return count; |
1766 | } | 1755 | } |
1767 | 1756 | ||
1768 | static ssize_t cpuset_common_file_read(struct cgroup *cgrp, | 1757 | static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css, |
1769 | struct cftype *cft, | 1758 | struct cftype *cft, struct file *file, |
1770 | struct file *file, | 1759 | char __user *buf, size_t nbytes, |
1771 | char __user *buf, | 1760 | loff_t *ppos) |
1772 | size_t nbytes, loff_t *ppos) | ||
1773 | { | 1761 | { |
1774 | struct cpuset *cs = cgroup_cs(cgrp); | 1762 | struct cpuset *cs = css_cs(css); |
1775 | cpuset_filetype_t type = cft->private; | 1763 | cpuset_filetype_t type = cft->private; |
1776 | char *page; | 1764 | char *page; |
1777 | ssize_t retval = 0; | 1765 | ssize_t retval = 0; |
@@ -1801,9 +1789,9 @@ out: | |||
1801 | return retval; | 1789 | return retval; |
1802 | } | 1790 | } |
1803 | 1791 | ||
1804 | static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft) | 1792 | static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) |
1805 | { | 1793 | { |
1806 | struct cpuset *cs = cgroup_cs(cgrp); | 1794 | struct cpuset *cs = css_cs(css); |
1807 | cpuset_filetype_t type = cft->private; | 1795 | cpuset_filetype_t type = cft->private; |
1808 | switch (type) { | 1796 | switch (type) { |
1809 | case FILE_CPU_EXCLUSIVE: | 1797 | case FILE_CPU_EXCLUSIVE: |
@@ -1832,9 +1820,9 @@ static u64 cpuset_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
1832 | return 0; | 1820 | return 0; |
1833 | } | 1821 | } |
1834 | 1822 | ||
1835 | static s64 cpuset_read_s64(struct cgroup *cgrp, struct cftype *cft) | 1823 | static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) |
1836 | { | 1824 | { |
1837 | struct cpuset *cs = cgroup_cs(cgrp); | 1825 | struct cpuset *cs = css_cs(css); |
1838 | cpuset_filetype_t type = cft->private; | 1826 | cpuset_filetype_t type = cft->private; |
1839 | switch (type) { | 1827 | switch (type) { |
1840 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1828 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
@@ -1949,11 +1937,12 @@ static struct cftype files[] = { | |||
1949 | * cgrp: control group that the new cpuset will be part of | 1937 | * cgrp: control group that the new cpuset will be part of |
1950 | */ | 1938 | */ |
1951 | 1939 | ||
1952 | static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp) | 1940 | static struct cgroup_subsys_state * |
1941 | cpuset_css_alloc(struct cgroup_subsys_state *parent_css) | ||
1953 | { | 1942 | { |
1954 | struct cpuset *cs; | 1943 | struct cpuset *cs; |
1955 | 1944 | ||
1956 | if (!cgrp->parent) | 1945 | if (!parent_css) |
1957 | return &top_cpuset.css; | 1946 | return &top_cpuset.css; |
1958 | 1947 | ||
1959 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 1948 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
@@ -1973,12 +1962,12 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cgrp) | |||
1973 | return &cs->css; | 1962 | return &cs->css; |
1974 | } | 1963 | } |
1975 | 1964 | ||
1976 | static int cpuset_css_online(struct cgroup *cgrp) | 1965 | static int cpuset_css_online(struct cgroup_subsys_state *css) |
1977 | { | 1966 | { |
1978 | struct cpuset *cs = cgroup_cs(cgrp); | 1967 | struct cpuset *cs = css_cs(css); |
1979 | struct cpuset *parent = parent_cs(cs); | 1968 | struct cpuset *parent = parent_cs(cs); |
1980 | struct cpuset *tmp_cs; | 1969 | struct cpuset *tmp_cs; |
1981 | struct cgroup *pos_cg; | 1970 | struct cgroup_subsys_state *pos_css; |
1982 | 1971 | ||
1983 | if (!parent) | 1972 | if (!parent) |
1984 | return 0; | 1973 | return 0; |
@@ -1993,7 +1982,7 @@ static int cpuset_css_online(struct cgroup *cgrp) | |||
1993 | 1982 | ||
1994 | number_of_cpusets++; | 1983 | number_of_cpusets++; |
1995 | 1984 | ||
1996 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags)) | 1985 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
1997 | goto out_unlock; | 1986 | goto out_unlock; |
1998 | 1987 | ||
1999 | /* | 1988 | /* |
@@ -2010,7 +1999,7 @@ static int cpuset_css_online(struct cgroup *cgrp) | |||
2010 | * (and likewise for mems) to the new cgroup. | 1999 | * (and likewise for mems) to the new cgroup. |
2011 | */ | 2000 | */ |
2012 | rcu_read_lock(); | 2001 | rcu_read_lock(); |
2013 | cpuset_for_each_child(tmp_cs, pos_cg, parent) { | 2002 | cpuset_for_each_child(tmp_cs, pos_css, parent) { |
2014 | if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) { | 2003 | if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) { |
2015 | rcu_read_unlock(); | 2004 | rcu_read_unlock(); |
2016 | goto out_unlock; | 2005 | goto out_unlock; |
@@ -2027,9 +2016,15 @@ out_unlock: | |||
2027 | return 0; | 2016 | return 0; |
2028 | } | 2017 | } |
2029 | 2018 | ||
2030 | static void cpuset_css_offline(struct cgroup *cgrp) | 2019 | /* |
2020 | * If the cpuset being removed has its flag 'sched_load_balance' | ||
2021 | * enabled, then simulate turning sched_load_balance off, which | ||
2022 | * will call rebuild_sched_domains_locked(). | ||
2023 | */ | ||
2024 | |||
2025 | static void cpuset_css_offline(struct cgroup_subsys_state *css) | ||
2031 | { | 2026 | { |
2032 | struct cpuset *cs = cgroup_cs(cgrp); | 2027 | struct cpuset *cs = css_cs(css); |
2033 | 2028 | ||
2034 | mutex_lock(&cpuset_mutex); | 2029 | mutex_lock(&cpuset_mutex); |
2035 | 2030 | ||
@@ -2042,15 +2037,9 @@ static void cpuset_css_offline(struct cgroup *cgrp) | |||
2042 | mutex_unlock(&cpuset_mutex); | 2037 | mutex_unlock(&cpuset_mutex); |
2043 | } | 2038 | } |
2044 | 2039 | ||
2045 | /* | 2040 | static void cpuset_css_free(struct cgroup_subsys_state *css) |
2046 | * If the cpuset being removed has its flag 'sched_load_balance' | ||
2047 | * enabled, then simulate turning sched_load_balance off, which | ||
2048 | * will call rebuild_sched_domains_locked(). | ||
2049 | */ | ||
2050 | |||
2051 | static void cpuset_css_free(struct cgroup *cgrp) | ||
2052 | { | 2041 | { |
2053 | struct cpuset *cs = cgroup_cs(cgrp); | 2042 | struct cpuset *cs = css_cs(css); |
2054 | 2043 | ||
2055 | free_cpumask_var(cs->cpus_allowed); | 2044 | free_cpumask_var(cs->cpus_allowed); |
2056 | kfree(cs); | 2045 | kfree(cs); |
@@ -2257,11 +2246,11 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2257 | /* if cpus or mems changed, we need to propagate to descendants */ | 2246 | /* if cpus or mems changed, we need to propagate to descendants */ |
2258 | if (cpus_updated || mems_updated) { | 2247 | if (cpus_updated || mems_updated) { |
2259 | struct cpuset *cs; | 2248 | struct cpuset *cs; |
2260 | struct cgroup *pos_cgrp; | 2249 | struct cgroup_subsys_state *pos_css; |
2261 | 2250 | ||
2262 | rcu_read_lock(); | 2251 | rcu_read_lock(); |
2263 | cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset) { | 2252 | cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { |
2264 | if (!css_tryget(&cs->css)) | 2253 | if (cs == &top_cpuset || !css_tryget(&cs->css)) |
2265 | continue; | 2254 | continue; |
2266 | rcu_read_unlock(); | 2255 | rcu_read_unlock(); |
2267 | 2256 | ||
@@ -2350,7 +2339,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | |||
2350 | 2339 | ||
2351 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2340 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
2352 | { | 2341 | { |
2353 | const struct cpuset *cpus_cs; | 2342 | struct cpuset *cpus_cs; |
2354 | 2343 | ||
2355 | rcu_read_lock(); | 2344 | rcu_read_lock(); |
2356 | cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); | 2345 | cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); |
@@ -2423,7 +2412,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
2423 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall | 2412 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall |
2424 | * (an unusual configuration), then returns the root cpuset. | 2413 | * (an unusual configuration), then returns the root cpuset. |
2425 | */ | 2414 | */ |
2426 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) | 2415 | static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) |
2427 | { | 2416 | { |
2428 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) | 2417 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) |
2429 | cs = parent_cs(cs); | 2418 | cs = parent_cs(cs); |
@@ -2493,7 +2482,7 @@ static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) | |||
2493 | */ | 2482 | */ |
2494 | int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | 2483 | int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) |
2495 | { | 2484 | { |
2496 | const struct cpuset *cs; /* current cpuset ancestors */ | 2485 | struct cpuset *cs; /* current cpuset ancestors */ |
2497 | int allowed; /* is allocation in zone z allowed? */ | 2486 | int allowed; /* is allocation in zone z allowed? */ |
2498 | 2487 | ||
2499 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | 2488 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) |
@@ -2731,7 +2720,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) | |||
2731 | goto out_free; | 2720 | goto out_free; |
2732 | 2721 | ||
2733 | rcu_read_lock(); | 2722 | rcu_read_lock(); |
2734 | css = task_subsys_state(tsk, cpuset_subsys_id); | 2723 | css = task_css(tsk, cpuset_subsys_id); |
2735 | retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); | 2724 | retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); |
2736 | rcu_read_unlock(); | 2725 | rcu_read_unlock(); |
2737 | if (retval < 0) | 2726 | if (retval < 0) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f86599e8c123..9300f5226077 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -340,8 +340,8 @@ struct perf_cgroup { | |||
340 | static inline struct perf_cgroup * | 340 | static inline struct perf_cgroup * |
341 | perf_cgroup_from_task(struct task_struct *task) | 341 | perf_cgroup_from_task(struct task_struct *task) |
342 | { | 342 | { |
343 | return container_of(task_subsys_state(task, perf_subsys_id), | 343 | return container_of(task_css(task, perf_subsys_id), |
344 | struct perf_cgroup, css); | 344 | struct perf_cgroup, css); |
345 | } | 345 | } |
346 | 346 | ||
347 | static inline bool | 347 | static inline bool |
@@ -591,7 +591,9 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
591 | if (!f.file) | 591 | if (!f.file) |
592 | return -EBADF; | 592 | return -EBADF; |
593 | 593 | ||
594 | css = cgroup_css_from_dir(f.file, perf_subsys_id); | 594 | rcu_read_lock(); |
595 | |||
596 | css = css_from_dir(f.file->f_dentry, &perf_subsys); | ||
595 | if (IS_ERR(css)) { | 597 | if (IS_ERR(css)) { |
596 | ret = PTR_ERR(css); | 598 | ret = PTR_ERR(css); |
597 | goto out; | 599 | goto out; |
@@ -617,6 +619,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
617 | ret = -EINVAL; | 619 | ret = -EINVAL; |
618 | } | 620 | } |
619 | out: | 621 | out: |
622 | rcu_read_unlock(); | ||
620 | fdput(f); | 623 | fdput(f); |
621 | return ret; | 624 | return ret; |
622 | } | 625 | } |
@@ -7798,7 +7801,8 @@ unlock: | |||
7798 | device_initcall(perf_event_sysfs_init); | 7801 | device_initcall(perf_event_sysfs_init); |
7799 | 7802 | ||
7800 | #ifdef CONFIG_CGROUP_PERF | 7803 | #ifdef CONFIG_CGROUP_PERF |
7801 | static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont) | 7804 | static struct cgroup_subsys_state * |
7805 | perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | ||
7802 | { | 7806 | { |
7803 | struct perf_cgroup *jc; | 7807 | struct perf_cgroup *jc; |
7804 | 7808 | ||
@@ -7815,11 +7819,10 @@ static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont) | |||
7815 | return &jc->css; | 7819 | return &jc->css; |
7816 | } | 7820 | } |
7817 | 7821 | ||
7818 | static void perf_cgroup_css_free(struct cgroup *cont) | 7822 | static void perf_cgroup_css_free(struct cgroup_subsys_state *css) |
7819 | { | 7823 | { |
7820 | struct perf_cgroup *jc; | 7824 | struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css); |
7821 | jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), | 7825 | |
7822 | struct perf_cgroup, css); | ||
7823 | free_percpu(jc->info); | 7826 | free_percpu(jc->info); |
7824 | kfree(jc); | 7827 | kfree(jc); |
7825 | } | 7828 | } |
@@ -7831,15 +7834,17 @@ static int __perf_cgroup_move(void *info) | |||
7831 | return 0; | 7834 | return 0; |
7832 | } | 7835 | } |
7833 | 7836 | ||
7834 | static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 7837 | static void perf_cgroup_attach(struct cgroup_subsys_state *css, |
7838 | struct cgroup_taskset *tset) | ||
7835 | { | 7839 | { |
7836 | struct task_struct *task; | 7840 | struct task_struct *task; |
7837 | 7841 | ||
7838 | cgroup_taskset_for_each(task, cgrp, tset) | 7842 | cgroup_taskset_for_each(task, css, tset) |
7839 | task_function_call(task, __perf_cgroup_move, task); | 7843 | task_function_call(task, __perf_cgroup_move, task); |
7840 | } | 7844 | } |
7841 | 7845 | ||
7842 | static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, | 7846 | static void perf_cgroup_exit(struct cgroup_subsys_state *css, |
7847 | struct cgroup_subsys_state *old_css, | ||
7843 | struct task_struct *task) | 7848 | struct task_struct *task) |
7844 | { | 7849 | { |
7845 | /* | 7850 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 05c39f030314..e53bda3ff2f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -6815,7 +6815,7 @@ void sched_move_task(struct task_struct *tsk) | |||
6815 | if (unlikely(running)) | 6815 | if (unlikely(running)) |
6816 | tsk->sched_class->put_prev_task(rq, tsk); | 6816 | tsk->sched_class->put_prev_task(rq, tsk); |
6817 | 6817 | ||
6818 | tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id, | 6818 | tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id, |
6819 | lockdep_is_held(&tsk->sighand->siglock)), | 6819 | lockdep_is_held(&tsk->sighand->siglock)), |
6820 | struct task_group, css); | 6820 | struct task_group, css); |
6821 | tg = autogroup_task_group(tsk, tg); | 6821 | tg = autogroup_task_group(tsk, tg); |
@@ -7137,23 +7137,22 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
7137 | 7137 | ||
7138 | #ifdef CONFIG_CGROUP_SCHED | 7138 | #ifdef CONFIG_CGROUP_SCHED |
7139 | 7139 | ||
7140 | /* return corresponding task_group object of a cgroup */ | 7140 | static inline struct task_group *css_tg(struct cgroup_subsys_state *css) |
7141 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | ||
7142 | { | 7141 | { |
7143 | return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id), | 7142 | return css ? container_of(css, struct task_group, css) : NULL; |
7144 | struct task_group, css); | ||
7145 | } | 7143 | } |
7146 | 7144 | ||
7147 | static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) | 7145 | static struct cgroup_subsys_state * |
7146 | cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | ||
7148 | { | 7147 | { |
7149 | struct task_group *tg, *parent; | 7148 | struct task_group *parent = css_tg(parent_css); |
7149 | struct task_group *tg; | ||
7150 | 7150 | ||
7151 | if (!cgrp->parent) { | 7151 | if (!parent) { |
7152 | /* This is early initialization for the top cgroup */ | 7152 | /* This is early initialization for the top cgroup */ |
7153 | return &root_task_group.css; | 7153 | return &root_task_group.css; |
7154 | } | 7154 | } |
7155 | 7155 | ||
7156 | parent = cgroup_tg(cgrp->parent); | ||
7157 | tg = sched_create_group(parent); | 7156 | tg = sched_create_group(parent); |
7158 | if (IS_ERR(tg)) | 7157 | if (IS_ERR(tg)) |
7159 | return ERR_PTR(-ENOMEM); | 7158 | return ERR_PTR(-ENOMEM); |
@@ -7161,41 +7160,38 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) | |||
7161 | return &tg->css; | 7160 | return &tg->css; |
7162 | } | 7161 | } |
7163 | 7162 | ||
7164 | static int cpu_cgroup_css_online(struct cgroup *cgrp) | 7163 | static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) |
7165 | { | 7164 | { |
7166 | struct task_group *tg = cgroup_tg(cgrp); | 7165 | struct task_group *tg = css_tg(css); |
7167 | struct task_group *parent; | 7166 | struct task_group *parent = css_tg(css_parent(css)); |
7168 | |||
7169 | if (!cgrp->parent) | ||
7170 | return 0; | ||
7171 | 7167 | ||
7172 | parent = cgroup_tg(cgrp->parent); | 7168 | if (parent) |
7173 | sched_online_group(tg, parent); | 7169 | sched_online_group(tg, parent); |
7174 | return 0; | 7170 | return 0; |
7175 | } | 7171 | } |
7176 | 7172 | ||
7177 | static void cpu_cgroup_css_free(struct cgroup *cgrp) | 7173 | static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) |
7178 | { | 7174 | { |
7179 | struct task_group *tg = cgroup_tg(cgrp); | 7175 | struct task_group *tg = css_tg(css); |
7180 | 7176 | ||
7181 | sched_destroy_group(tg); | 7177 | sched_destroy_group(tg); |
7182 | } | 7178 | } |
7183 | 7179 | ||
7184 | static void cpu_cgroup_css_offline(struct cgroup *cgrp) | 7180 | static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) |
7185 | { | 7181 | { |
7186 | struct task_group *tg = cgroup_tg(cgrp); | 7182 | struct task_group *tg = css_tg(css); |
7187 | 7183 | ||
7188 | sched_offline_group(tg); | 7184 | sched_offline_group(tg); |
7189 | } | 7185 | } |
7190 | 7186 | ||
7191 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, | 7187 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, |
7192 | struct cgroup_taskset *tset) | 7188 | struct cgroup_taskset *tset) |
7193 | { | 7189 | { |
7194 | struct task_struct *task; | 7190 | struct task_struct *task; |
7195 | 7191 | ||
7196 | cgroup_taskset_for_each(task, cgrp, tset) { | 7192 | cgroup_taskset_for_each(task, css, tset) { |
7197 | #ifdef CONFIG_RT_GROUP_SCHED | 7193 | #ifdef CONFIG_RT_GROUP_SCHED |
7198 | if (!sched_rt_can_attach(cgroup_tg(cgrp), task)) | 7194 | if (!sched_rt_can_attach(css_tg(css), task)) |
7199 | return -EINVAL; | 7195 | return -EINVAL; |
7200 | #else | 7196 | #else |
7201 | /* We don't support RT-tasks being in separate groups */ | 7197 | /* We don't support RT-tasks being in separate groups */ |
@@ -7206,18 +7202,18 @@ static int cpu_cgroup_can_attach(struct cgroup *cgrp, | |||
7206 | return 0; | 7202 | return 0; |
7207 | } | 7203 | } |
7208 | 7204 | ||
7209 | static void cpu_cgroup_attach(struct cgroup *cgrp, | 7205 | static void cpu_cgroup_attach(struct cgroup_subsys_state *css, |
7210 | struct cgroup_taskset *tset) | 7206 | struct cgroup_taskset *tset) |
7211 | { | 7207 | { |
7212 | struct task_struct *task; | 7208 | struct task_struct *task; |
7213 | 7209 | ||
7214 | cgroup_taskset_for_each(task, cgrp, tset) | 7210 | cgroup_taskset_for_each(task, css, tset) |
7215 | sched_move_task(task); | 7211 | sched_move_task(task); |
7216 | } | 7212 | } |
7217 | 7213 | ||
7218 | static void | 7214 | static void cpu_cgroup_exit(struct cgroup_subsys_state *css, |
7219 | cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, | 7215 | struct cgroup_subsys_state *old_css, |
7220 | struct task_struct *task) | 7216 | struct task_struct *task) |
7221 | { | 7217 | { |
7222 | /* | 7218 | /* |
7223 | * cgroup_exit() is called in the copy_process() failure path. | 7219 | * cgroup_exit() is called in the copy_process() failure path. |
@@ -7231,15 +7227,16 @@ cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, | |||
7231 | } | 7227 | } |
7232 | 7228 | ||
7233 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7229 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7234 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 7230 | static int cpu_shares_write_u64(struct cgroup_subsys_state *css, |
7235 | u64 shareval) | 7231 | struct cftype *cftype, u64 shareval) |
7236 | { | 7232 | { |
7237 | return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval)); | 7233 | return sched_group_set_shares(css_tg(css), scale_load(shareval)); |
7238 | } | 7234 | } |
7239 | 7235 | ||
7240 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | 7236 | static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, |
7237 | struct cftype *cft) | ||
7241 | { | 7238 | { |
7242 | struct task_group *tg = cgroup_tg(cgrp); | 7239 | struct task_group *tg = css_tg(css); |
7243 | 7240 | ||
7244 | return (u64) scale_load_down(tg->shares); | 7241 | return (u64) scale_load_down(tg->shares); |
7245 | } | 7242 | } |
@@ -7361,26 +7358,28 @@ long tg_get_cfs_period(struct task_group *tg) | |||
7361 | return cfs_period_us; | 7358 | return cfs_period_us; |
7362 | } | 7359 | } |
7363 | 7360 | ||
7364 | static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft) | 7361 | static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css, |
7362 | struct cftype *cft) | ||
7365 | { | 7363 | { |
7366 | return tg_get_cfs_quota(cgroup_tg(cgrp)); | 7364 | return tg_get_cfs_quota(css_tg(css)); |
7367 | } | 7365 | } |
7368 | 7366 | ||
7369 | static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype, | 7367 | static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css, |
7370 | s64 cfs_quota_us) | 7368 | struct cftype *cftype, s64 cfs_quota_us) |
7371 | { | 7369 | { |
7372 | return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us); | 7370 | return tg_set_cfs_quota(css_tg(css), cfs_quota_us); |
7373 | } | 7371 | } |
7374 | 7372 | ||
7375 | static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft) | 7373 | static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css, |
7374 | struct cftype *cft) | ||
7376 | { | 7375 | { |
7377 | return tg_get_cfs_period(cgroup_tg(cgrp)); | 7376 | return tg_get_cfs_period(css_tg(css)); |
7378 | } | 7377 | } |
7379 | 7378 | ||
7380 | static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 7379 | static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css, |
7381 | u64 cfs_period_us) | 7380 | struct cftype *cftype, u64 cfs_period_us) |
7382 | { | 7381 | { |
7383 | return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); | 7382 | return tg_set_cfs_period(css_tg(css), cfs_period_us); |
7384 | } | 7383 | } |
7385 | 7384 | ||
7386 | struct cfs_schedulable_data { | 7385 | struct cfs_schedulable_data { |
@@ -7461,10 +7460,10 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | |||
7461 | return ret; | 7460 | return ret; |
7462 | } | 7461 | } |
7463 | 7462 | ||
7464 | static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | 7463 | static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft, |
7465 | struct cgroup_map_cb *cb) | 7464 | struct cgroup_map_cb *cb) |
7466 | { | 7465 | { |
7467 | struct task_group *tg = cgroup_tg(cgrp); | 7466 | struct task_group *tg = css_tg(css); |
7468 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; | 7467 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
7469 | 7468 | ||
7470 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | 7469 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); |
@@ -7477,26 +7476,28 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | |||
7477 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7476 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7478 | 7477 | ||
7479 | #ifdef CONFIG_RT_GROUP_SCHED | 7478 | #ifdef CONFIG_RT_GROUP_SCHED |
7480 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 7479 | static int cpu_rt_runtime_write(struct cgroup_subsys_state *css, |
7481 | s64 val) | 7480 | struct cftype *cft, s64 val) |
7482 | { | 7481 | { |
7483 | return sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | 7482 | return sched_group_set_rt_runtime(css_tg(css), val); |
7484 | } | 7483 | } |
7485 | 7484 | ||
7486 | static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft) | 7485 | static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css, |
7486 | struct cftype *cft) | ||
7487 | { | 7487 | { |
7488 | return sched_group_rt_runtime(cgroup_tg(cgrp)); | 7488 | return sched_group_rt_runtime(css_tg(css)); |
7489 | } | 7489 | } |
7490 | 7490 | ||
7491 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 7491 | static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css, |
7492 | u64 rt_period_us) | 7492 | struct cftype *cftype, u64 rt_period_us) |
7493 | { | 7493 | { |
7494 | return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us); | 7494 | return sched_group_set_rt_period(css_tg(css), rt_period_us); |
7495 | } | 7495 | } |
7496 | 7496 | ||
7497 | static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) | 7497 | static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, |
7498 | struct cftype *cft) | ||
7498 | { | 7499 | { |
7499 | return sched_group_rt_period(cgroup_tg(cgrp)); | 7500 | return sched_group_rt_period(css_tg(css)); |
7500 | } | 7501 | } |
7501 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7502 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7502 | 7503 | ||
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index dbb7e2cd95eb..f64722ff0299 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c | |||
@@ -33,30 +33,20 @@ struct cpuacct { | |||
33 | struct kernel_cpustat __percpu *cpustat; | 33 | struct kernel_cpustat __percpu *cpustat; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | /* return cpu accounting group corresponding to this container */ | 36 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
37 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | ||
38 | { | 37 | { |
39 | return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), | 38 | return css ? container_of(css, struct cpuacct, css) : NULL; |
40 | struct cpuacct, css); | ||
41 | } | 39 | } |
42 | 40 | ||
43 | /* return cpu accounting group to which this task belongs */ | 41 | /* return cpu accounting group to which this task belongs */ |
44 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | 42 | static inline struct cpuacct *task_ca(struct task_struct *tsk) |
45 | { | 43 | { |
46 | return container_of(task_subsys_state(tsk, cpuacct_subsys_id), | 44 | return css_ca(task_css(tsk, cpuacct_subsys_id)); |
47 | struct cpuacct, css); | ||
48 | } | ||
49 | |||
50 | static inline struct cpuacct *__parent_ca(struct cpuacct *ca) | ||
51 | { | ||
52 | return cgroup_ca(ca->css.cgroup->parent); | ||
53 | } | 45 | } |
54 | 46 | ||
55 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) | 47 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
56 | { | 48 | { |
57 | if (!ca->css.cgroup->parent) | 49 | return css_ca(css_parent(&ca->css)); |
58 | return NULL; | ||
59 | return cgroup_ca(ca->css.cgroup->parent); | ||
60 | } | 50 | } |
61 | 51 | ||
62 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); | 52 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
@@ -66,11 +56,12 @@ static struct cpuacct root_cpuacct = { | |||
66 | }; | 56 | }; |
67 | 57 | ||
68 | /* create a new cpu accounting group */ | 58 | /* create a new cpu accounting group */ |
69 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | 59 | static struct cgroup_subsys_state * |
60 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | ||
70 | { | 61 | { |
71 | struct cpuacct *ca; | 62 | struct cpuacct *ca; |
72 | 63 | ||
73 | if (!cgrp->parent) | 64 | if (!parent_css) |
74 | return &root_cpuacct.css; | 65 | return &root_cpuacct.css; |
75 | 66 | ||
76 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 67 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
@@ -96,9 +87,9 @@ out: | |||
96 | } | 87 | } |
97 | 88 | ||
98 | /* destroy an existing cpu accounting group */ | 89 | /* destroy an existing cpu accounting group */ |
99 | static void cpuacct_css_free(struct cgroup *cgrp) | 90 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
100 | { | 91 | { |
101 | struct cpuacct *ca = cgroup_ca(cgrp); | 92 | struct cpuacct *ca = css_ca(css); |
102 | 93 | ||
103 | free_percpu(ca->cpustat); | 94 | free_percpu(ca->cpustat); |
104 | free_percpu(ca->cpuusage); | 95 | free_percpu(ca->cpuusage); |
@@ -141,9 +132,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |||
141 | } | 132 | } |
142 | 133 | ||
143 | /* return total cpu usage (in nanoseconds) of a group */ | 134 | /* return total cpu usage (in nanoseconds) of a group */ |
144 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | 135 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
145 | { | 136 | { |
146 | struct cpuacct *ca = cgroup_ca(cgrp); | 137 | struct cpuacct *ca = css_ca(css); |
147 | u64 totalcpuusage = 0; | 138 | u64 totalcpuusage = 0; |
148 | int i; | 139 | int i; |
149 | 140 | ||
@@ -153,10 +144,10 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |||
153 | return totalcpuusage; | 144 | return totalcpuusage; |
154 | } | 145 | } |
155 | 146 | ||
156 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | 147 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
157 | u64 reset) | 148 | u64 reset) |
158 | { | 149 | { |
159 | struct cpuacct *ca = cgroup_ca(cgrp); | 150 | struct cpuacct *ca = css_ca(css); |
160 | int err = 0; | 151 | int err = 0; |
161 | int i; | 152 | int i; |
162 | 153 | ||
@@ -172,10 +163,10 @@ out: | |||
172 | return err; | 163 | return err; |
173 | } | 164 | } |
174 | 165 | ||
175 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | 166 | static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css, |
176 | struct seq_file *m) | 167 | struct cftype *cft, struct seq_file *m) |
177 | { | 168 | { |
178 | struct cpuacct *ca = cgroup_ca(cgroup); | 169 | struct cpuacct *ca = css_ca(css); |
179 | u64 percpu; | 170 | u64 percpu; |
180 | int i; | 171 | int i; |
181 | 172 | ||
@@ -192,10 +183,10 @@ static const char * const cpuacct_stat_desc[] = { | |||
192 | [CPUACCT_STAT_SYSTEM] = "system", | 183 | [CPUACCT_STAT_SYSTEM] = "system", |
193 | }; | 184 | }; |
194 | 185 | ||
195 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | 186 | static int cpuacct_stats_show(struct cgroup_subsys_state *css, |
196 | struct cgroup_map_cb *cb) | 187 | struct cftype *cft, struct cgroup_map_cb *cb) |
197 | { | 188 | { |
198 | struct cpuacct *ca = cgroup_ca(cgrp); | 189 | struct cpuacct *ca = css_ca(css); |
199 | int cpu; | 190 | int cpu; |
200 | s64 val = 0; | 191 | s64 val = 0; |
201 | 192 | ||
@@ -281,7 +272,7 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) | |||
281 | while (ca != &root_cpuacct) { | 272 | while (ca != &root_cpuacct) { |
282 | kcpustat = this_cpu_ptr(ca->cpustat); | 273 | kcpustat = this_cpu_ptr(ca->cpustat); |
283 | kcpustat->cpustat[index] += val; | 274 | kcpustat->cpustat[index] += val; |
284 | ca = __parent_ca(ca); | 275 | ca = parent_ca(ca); |
285 | } | 276 | } |
286 | rcu_read_unlock(); | 277 | rcu_read_unlock(); |
287 | } | 278 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ef0a7b2439dd..471a56db05ea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -665,9 +665,9 @@ extern int group_balance_cpu(struct sched_group *sg); | |||
665 | /* | 665 | /* |
666 | * Return the group to which this tasks belongs. | 666 | * Return the group to which this tasks belongs. |
667 | * | 667 | * |
668 | * We cannot use task_subsys_state() and friends because the cgroup | 668 | * We cannot use task_css() and friends because the cgroup subsystem |
669 | * subsystem changes that value before the cgroup_subsys::attach() method | 669 | * changes that value before the cgroup_subsys::attach() method is called, |
670 | * is called, therefore we cannot pin it and might observe the wrong value. | 670 | * therefore we cannot pin it and might observe the wrong value. |
671 | * | 671 | * |
672 | * The same is true for autogroup's p->signal->autogroup->tg, the autogroup | 672 | * The same is true for autogroup's p->signal->autogroup->tg, the autogroup |
673 | * core changes this before calling sched_move_task(). | 673 | * core changes this before calling sched_move_task(). |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 9cea7de22ffb..bda8e44f6fde 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
@@ -36,21 +36,13 @@ static struct hugetlb_cgroup *root_h_cgroup __read_mostly; | |||
36 | static inline | 36 | static inline |
37 | struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) | 37 | struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) |
38 | { | 38 | { |
39 | return container_of(s, struct hugetlb_cgroup, css); | 39 | return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; |
40 | } | ||
41 | |||
42 | static inline | ||
43 | struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup) | ||
44 | { | ||
45 | return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup, | ||
46 | hugetlb_subsys_id)); | ||
47 | } | 40 | } |
48 | 41 | ||
49 | static inline | 42 | static inline |
50 | struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) | 43 | struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) |
51 | { | 44 | { |
52 | return hugetlb_cgroup_from_css(task_subsys_state(task, | 45 | return hugetlb_cgroup_from_css(task_css(task, hugetlb_subsys_id)); |
53 | hugetlb_subsys_id)); | ||
54 | } | 46 | } |
55 | 47 | ||
56 | static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) | 48 | static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) |
@@ -58,17 +50,15 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) | |||
58 | return (h_cg == root_h_cgroup); | 50 | return (h_cg == root_h_cgroup); |
59 | } | 51 | } |
60 | 52 | ||
61 | static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg) | 53 | static inline struct hugetlb_cgroup * |
54 | parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) | ||
62 | { | 55 | { |
63 | if (!cg->parent) | 56 | return hugetlb_cgroup_from_css(css_parent(&h_cg->css)); |
64 | return NULL; | ||
65 | return hugetlb_cgroup_from_cgroup(cg->parent); | ||
66 | } | 57 | } |
67 | 58 | ||
68 | static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg) | 59 | static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) |
69 | { | 60 | { |
70 | int idx; | 61 | int idx; |
71 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg); | ||
72 | 62 | ||
73 | for (idx = 0; idx < hugetlb_max_hstate; idx++) { | 63 | for (idx = 0; idx < hugetlb_max_hstate; idx++) { |
74 | if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) | 64 | if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) |
@@ -77,19 +67,18 @@ static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg) | |||
77 | return false; | 67 | return false; |
78 | } | 68 | } |
79 | 69 | ||
80 | static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup) | 70 | static struct cgroup_subsys_state * |
71 | hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | ||
81 | { | 72 | { |
73 | struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); | ||
74 | struct hugetlb_cgroup *h_cgroup; | ||
82 | int idx; | 75 | int idx; |
83 | struct cgroup *parent_cgroup; | ||
84 | struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup; | ||
85 | 76 | ||
86 | h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); | 77 | h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); |
87 | if (!h_cgroup) | 78 | if (!h_cgroup) |
88 | return ERR_PTR(-ENOMEM); | 79 | return ERR_PTR(-ENOMEM); |
89 | 80 | ||
90 | parent_cgroup = cgroup->parent; | 81 | if (parent_h_cgroup) { |
91 | if (parent_cgroup) { | ||
92 | parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup); | ||
93 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) | 82 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) |
94 | res_counter_init(&h_cgroup->hugepage[idx], | 83 | res_counter_init(&h_cgroup->hugepage[idx], |
95 | &parent_h_cgroup->hugepage[idx]); | 84 | &parent_h_cgroup->hugepage[idx]); |
@@ -101,11 +90,11 @@ static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgrou | |||
101 | return &h_cgroup->css; | 90 | return &h_cgroup->css; |
102 | } | 91 | } |
103 | 92 | ||
104 | static void hugetlb_cgroup_css_free(struct cgroup *cgroup) | 93 | static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) |
105 | { | 94 | { |
106 | struct hugetlb_cgroup *h_cgroup; | 95 | struct hugetlb_cgroup *h_cgroup; |
107 | 96 | ||
108 | h_cgroup = hugetlb_cgroup_from_cgroup(cgroup); | 97 | h_cgroup = hugetlb_cgroup_from_css(css); |
109 | kfree(h_cgroup); | 98 | kfree(h_cgroup); |
110 | } | 99 | } |
111 | 100 | ||
@@ -117,15 +106,14 @@ static void hugetlb_cgroup_css_free(struct cgroup *cgroup) | |||
117 | * page reference and test for page active here. This function | 106 | * page reference and test for page active here. This function |
118 | * cannot fail. | 107 | * cannot fail. |
119 | */ | 108 | */ |
120 | static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup, | 109 | static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, |
121 | struct page *page) | 110 | struct page *page) |
122 | { | 111 | { |
123 | int csize; | 112 | int csize; |
124 | struct res_counter *counter; | 113 | struct res_counter *counter; |
125 | struct res_counter *fail_res; | 114 | struct res_counter *fail_res; |
126 | struct hugetlb_cgroup *page_hcg; | 115 | struct hugetlb_cgroup *page_hcg; |
127 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); | 116 | struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); |
128 | struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup); | ||
129 | 117 | ||
130 | page_hcg = hugetlb_cgroup_from_page(page); | 118 | page_hcg = hugetlb_cgroup_from_page(page); |
131 | /* | 119 | /* |
@@ -155,8 +143,9 @@ out: | |||
155 | * Force the hugetlb cgroup to empty the hugetlb resources by moving them to | 143 | * Force the hugetlb cgroup to empty the hugetlb resources by moving them to |
156 | * the parent cgroup. | 144 | * the parent cgroup. |
157 | */ | 145 | */ |
158 | static void hugetlb_cgroup_css_offline(struct cgroup *cgroup) | 146 | static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) |
159 | { | 147 | { |
148 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); | ||
160 | struct hstate *h; | 149 | struct hstate *h; |
161 | struct page *page; | 150 | struct page *page; |
162 | int idx = 0; | 151 | int idx = 0; |
@@ -165,13 +154,13 @@ static void hugetlb_cgroup_css_offline(struct cgroup *cgroup) | |||
165 | for_each_hstate(h) { | 154 | for_each_hstate(h) { |
166 | spin_lock(&hugetlb_lock); | 155 | spin_lock(&hugetlb_lock); |
167 | list_for_each_entry(page, &h->hugepage_activelist, lru) | 156 | list_for_each_entry(page, &h->hugepage_activelist, lru) |
168 | hugetlb_cgroup_move_parent(idx, cgroup, page); | 157 | hugetlb_cgroup_move_parent(idx, h_cg, page); |
169 | 158 | ||
170 | spin_unlock(&hugetlb_lock); | 159 | spin_unlock(&hugetlb_lock); |
171 | idx++; | 160 | idx++; |
172 | } | 161 | } |
173 | cond_resched(); | 162 | cond_resched(); |
174 | } while (hugetlb_cgroup_have_usage(cgroup)); | 163 | } while (hugetlb_cgroup_have_usage(h_cg)); |
175 | } | 164 | } |
176 | 165 | ||
177 | int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, | 166 | int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
@@ -253,14 +242,15 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, | |||
253 | return; | 242 | return; |
254 | } | 243 | } |
255 | 244 | ||
256 | static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft, | 245 | static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css, |
257 | struct file *file, char __user *buf, | 246 | struct cftype *cft, struct file *file, |
258 | size_t nbytes, loff_t *ppos) | 247 | char __user *buf, size_t nbytes, |
248 | loff_t *ppos) | ||
259 | { | 249 | { |
260 | u64 val; | 250 | u64 val; |
261 | char str[64]; | 251 | char str[64]; |
262 | int idx, name, len; | 252 | int idx, name, len; |
263 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); | 253 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
264 | 254 | ||
265 | idx = MEMFILE_IDX(cft->private); | 255 | idx = MEMFILE_IDX(cft->private); |
266 | name = MEMFILE_ATTR(cft->private); | 256 | name = MEMFILE_ATTR(cft->private); |
@@ -270,12 +260,12 @@ static ssize_t hugetlb_cgroup_read(struct cgroup *cgroup, struct cftype *cft, | |||
270 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | 260 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); |
271 | } | 261 | } |
272 | 262 | ||
273 | static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft, | 263 | static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, |
274 | const char *buffer) | 264 | struct cftype *cft, const char *buffer) |
275 | { | 265 | { |
276 | int idx, name, ret; | 266 | int idx, name, ret; |
277 | unsigned long long val; | 267 | unsigned long long val; |
278 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); | 268 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
279 | 269 | ||
280 | idx = MEMFILE_IDX(cft->private); | 270 | idx = MEMFILE_IDX(cft->private); |
281 | name = MEMFILE_ATTR(cft->private); | 271 | name = MEMFILE_ATTR(cft->private); |
@@ -300,10 +290,11 @@ static int hugetlb_cgroup_write(struct cgroup *cgroup, struct cftype *cft, | |||
300 | return ret; | 290 | return ret; |
301 | } | 291 | } |
302 | 292 | ||
303 | static int hugetlb_cgroup_reset(struct cgroup *cgroup, unsigned int event) | 293 | static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, |
294 | unsigned int event) | ||
304 | { | 295 | { |
305 | int idx, name, ret = 0; | 296 | int idx, name, ret = 0; |
306 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); | 297 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
307 | 298 | ||
308 | idx = MEMFILE_IDX(event); | 299 | idx = MEMFILE_IDX(event); |
309 | name = MEMFILE_ATTR(event); | 300 | name = MEMFILE_ATTR(event); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0878ff7c26a9..3b83957b6439 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -483,10 +483,9 @@ enum res_type { | |||
483 | */ | 483 | */ |
484 | static DEFINE_MUTEX(memcg_create_mutex); | 484 | static DEFINE_MUTEX(memcg_create_mutex); |
485 | 485 | ||
486 | static inline | ||
487 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) | 486 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) |
488 | { | 487 | { |
489 | return container_of(s, struct mem_cgroup, css); | 488 | return s ? container_of(s, struct mem_cgroup, css) : NULL; |
490 | } | 489 | } |
491 | 490 | ||
492 | /* Some nice accessors for the vmpressure. */ | 491 | /* Some nice accessors for the vmpressure. */ |
@@ -1035,12 +1034,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) | |||
1035 | preempt_enable(); | 1034 | preempt_enable(); |
1036 | } | 1035 | } |
1037 | 1036 | ||
1038 | struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | ||
1039 | { | ||
1040 | return mem_cgroup_from_css( | ||
1041 | cgroup_subsys_state(cont, mem_cgroup_subsys_id)); | ||
1042 | } | ||
1043 | |||
1044 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 1037 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
1045 | { | 1038 | { |
1046 | /* | 1039 | /* |
@@ -1051,7 +1044,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
1051 | if (unlikely(!p)) | 1044 | if (unlikely(!p)) |
1052 | return NULL; | 1045 | return NULL; |
1053 | 1046 | ||
1054 | return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id)); | 1047 | return mem_cgroup_from_css(task_css(p, mem_cgroup_subsys_id)); |
1055 | } | 1048 | } |
1056 | 1049 | ||
1057 | struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | 1050 | struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) |
@@ -1084,20 +1077,11 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
1084 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, | 1077 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, |
1085 | struct mem_cgroup *last_visited) | 1078 | struct mem_cgroup *last_visited) |
1086 | { | 1079 | { |
1087 | struct cgroup *prev_cgroup, *next_cgroup; | 1080 | struct cgroup_subsys_state *prev_css, *next_css; |
1088 | 1081 | ||
1089 | /* | 1082 | prev_css = last_visited ? &last_visited->css : NULL; |
1090 | * Root is not visited by cgroup iterators so it needs an | ||
1091 | * explicit visit. | ||
1092 | */ | ||
1093 | if (!last_visited) | ||
1094 | return root; | ||
1095 | |||
1096 | prev_cgroup = (last_visited == root) ? NULL | ||
1097 | : last_visited->css.cgroup; | ||
1098 | skip_node: | 1083 | skip_node: |
1099 | next_cgroup = cgroup_next_descendant_pre( | 1084 | next_css = css_next_descendant_pre(prev_css, &root->css); |
1100 | prev_cgroup, root->css.cgroup); | ||
1101 | 1085 | ||
1102 | /* | 1086 | /* |
1103 | * Even if we found a group we have to make sure it is | 1087 | * Even if we found a group we have to make sure it is |
@@ -1106,13 +1090,13 @@ skip_node: | |||
1106 | * last_visited css is safe to use because it is | 1090 | * last_visited css is safe to use because it is |
1107 | * protected by css_get and the tree walk is rcu safe. | 1091 | * protected by css_get and the tree walk is rcu safe. |
1108 | */ | 1092 | */ |
1109 | if (next_cgroup) { | 1093 | if (next_css) { |
1110 | struct mem_cgroup *mem = mem_cgroup_from_cont( | 1094 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); |
1111 | next_cgroup); | 1095 | |
1112 | if (css_tryget(&mem->css)) | 1096 | if (css_tryget(&mem->css)) |
1113 | return mem; | 1097 | return mem; |
1114 | else { | 1098 | else { |
1115 | prev_cgroup = next_cgroup; | 1099 | prev_css = next_css; |
1116 | goto skip_node; | 1100 | goto skip_node; |
1117 | } | 1101 | } |
1118 | } | 1102 | } |
@@ -1525,10 +1509,8 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) | |||
1525 | 1509 | ||
1526 | int mem_cgroup_swappiness(struct mem_cgroup *memcg) | 1510 | int mem_cgroup_swappiness(struct mem_cgroup *memcg) |
1527 | { | 1511 | { |
1528 | struct cgroup *cgrp = memcg->css.cgroup; | ||
1529 | |||
1530 | /* root ? */ | 1512 | /* root ? */ |
1531 | if (cgrp->parent == NULL) | 1513 | if (!css_parent(&memcg->css)) |
1532 | return vm_swappiness; | 1514 | return vm_swappiness; |
1533 | 1515 | ||
1534 | return memcg->swappiness; | 1516 | return memcg->swappiness; |
@@ -1805,12 +1787,11 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1805 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); | 1787 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); |
1806 | totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1; | 1788 | totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1; |
1807 | for_each_mem_cgroup_tree(iter, memcg) { | 1789 | for_each_mem_cgroup_tree(iter, memcg) { |
1808 | struct cgroup *cgroup = iter->css.cgroup; | 1790 | struct css_task_iter it; |
1809 | struct cgroup_iter it; | ||
1810 | struct task_struct *task; | 1791 | struct task_struct *task; |
1811 | 1792 | ||
1812 | cgroup_iter_start(cgroup, &it); | 1793 | css_task_iter_start(&iter->css, &it); |
1813 | while ((task = cgroup_iter_next(cgroup, &it))) { | 1794 | while ((task = css_task_iter_next(&it))) { |
1814 | switch (oom_scan_process_thread(task, totalpages, NULL, | 1795 | switch (oom_scan_process_thread(task, totalpages, NULL, |
1815 | false)) { | 1796 | false)) { |
1816 | case OOM_SCAN_SELECT: | 1797 | case OOM_SCAN_SELECT: |
@@ -1823,7 +1804,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1823 | case OOM_SCAN_CONTINUE: | 1804 | case OOM_SCAN_CONTINUE: |
1824 | continue; | 1805 | continue; |
1825 | case OOM_SCAN_ABORT: | 1806 | case OOM_SCAN_ABORT: |
1826 | cgroup_iter_end(cgroup, &it); | 1807 | css_task_iter_end(&it); |
1827 | mem_cgroup_iter_break(memcg, iter); | 1808 | mem_cgroup_iter_break(memcg, iter); |
1828 | if (chosen) | 1809 | if (chosen) |
1829 | put_task_struct(chosen); | 1810 | put_task_struct(chosen); |
@@ -1840,7 +1821,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1840 | get_task_struct(chosen); | 1821 | get_task_struct(chosen); |
1841 | } | 1822 | } |
1842 | } | 1823 | } |
1843 | cgroup_iter_end(cgroup, &it); | 1824 | css_task_iter_end(&it); |
1844 | } | 1825 | } |
1845 | 1826 | ||
1846 | if (!chosen) | 1827 | if (!chosen) |
@@ -2954,10 +2935,10 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | |||
2954 | } | 2935 | } |
2955 | 2936 | ||
2956 | #ifdef CONFIG_SLABINFO | 2937 | #ifdef CONFIG_SLABINFO |
2957 | static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft, | 2938 | static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css, |
2958 | struct seq_file *m) | 2939 | struct cftype *cft, struct seq_file *m) |
2959 | { | 2940 | { |
2960 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 2941 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
2961 | struct memcg_cache_params *params; | 2942 | struct memcg_cache_params *params; |
2962 | 2943 | ||
2963 | if (!memcg_can_account_kmem(memcg)) | 2944 | if (!memcg_can_account_kmem(memcg)) |
@@ -4943,10 +4924,10 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) | |||
4943 | */ | 4924 | */ |
4944 | static inline bool __memcg_has_children(struct mem_cgroup *memcg) | 4925 | static inline bool __memcg_has_children(struct mem_cgroup *memcg) |
4945 | { | 4926 | { |
4946 | struct cgroup *pos; | 4927 | struct cgroup_subsys_state *pos; |
4947 | 4928 | ||
4948 | /* bounce at first found */ | 4929 | /* bounce at first found */ |
4949 | cgroup_for_each_child(pos, memcg->css.cgroup) | 4930 | css_for_each_child(pos, &memcg->css) |
4950 | return true; | 4931 | return true; |
4951 | return false; | 4932 | return false; |
4952 | } | 4933 | } |
@@ -5002,9 +4983,10 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) | |||
5002 | return 0; | 4983 | return 0; |
5003 | } | 4984 | } |
5004 | 4985 | ||
5005 | static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 4986 | static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, |
4987 | unsigned int event) | ||
5006 | { | 4988 | { |
5007 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 4989 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5008 | int ret; | 4990 | int ret; |
5009 | 4991 | ||
5010 | if (mem_cgroup_is_root(memcg)) | 4992 | if (mem_cgroup_is_root(memcg)) |
@@ -5017,21 +4999,18 @@ static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | |||
5017 | } | 4999 | } |
5018 | 5000 | ||
5019 | 5001 | ||
5020 | static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft) | 5002 | static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, |
5003 | struct cftype *cft) | ||
5021 | { | 5004 | { |
5022 | return mem_cgroup_from_cont(cont)->use_hierarchy; | 5005 | return mem_cgroup_from_css(css)->use_hierarchy; |
5023 | } | 5006 | } |
5024 | 5007 | ||
5025 | static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | 5008 | static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, |
5026 | u64 val) | 5009 | struct cftype *cft, u64 val) |
5027 | { | 5010 | { |
5028 | int retval = 0; | 5011 | int retval = 0; |
5029 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5012 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5030 | struct cgroup *parent = cont->parent; | 5013 | struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css)); |
5031 | struct mem_cgroup *parent_memcg = NULL; | ||
5032 | |||
5033 | if (parent) | ||
5034 | parent_memcg = mem_cgroup_from_cont(parent); | ||
5035 | 5014 | ||
5036 | mutex_lock(&memcg_create_mutex); | 5015 | mutex_lock(&memcg_create_mutex); |
5037 | 5016 | ||
@@ -5101,11 +5080,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | |||
5101 | return val << PAGE_SHIFT; | 5080 | return val << PAGE_SHIFT; |
5102 | } | 5081 | } |
5103 | 5082 | ||
5104 | static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, | 5083 | static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, |
5105 | struct file *file, char __user *buf, | 5084 | struct cftype *cft, struct file *file, |
5106 | size_t nbytes, loff_t *ppos) | 5085 | char __user *buf, size_t nbytes, loff_t *ppos) |
5107 | { | 5086 | { |
5108 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5087 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5109 | char str[64]; | 5088 | char str[64]; |
5110 | u64 val; | 5089 | u64 val; |
5111 | int name, len; | 5090 | int name, len; |
@@ -5138,11 +5117,11 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, | |||
5138 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | 5117 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); |
5139 | } | 5118 | } |
5140 | 5119 | ||
5141 | static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) | 5120 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) |
5142 | { | 5121 | { |
5143 | int ret = -EINVAL; | 5122 | int ret = -EINVAL; |
5144 | #ifdef CONFIG_MEMCG_KMEM | 5123 | #ifdef CONFIG_MEMCG_KMEM |
5145 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5124 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5146 | /* | 5125 | /* |
5147 | * For simplicity, we won't allow this to be disabled. It also can't | 5126 | * For simplicity, we won't allow this to be disabled. It also can't |
5148 | * be changed if the cgroup has children already, or if tasks had | 5127 | * be changed if the cgroup has children already, or if tasks had |
@@ -5158,7 +5137,7 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) | |||
5158 | mutex_lock(&memcg_create_mutex); | 5137 | mutex_lock(&memcg_create_mutex); |
5159 | mutex_lock(&set_limit_mutex); | 5138 | mutex_lock(&set_limit_mutex); |
5160 | if (!memcg->kmem_account_flags && val != RESOURCE_MAX) { | 5139 | if (!memcg->kmem_account_flags && val != RESOURCE_MAX) { |
5161 | if (cgroup_task_count(cont) || memcg_has_children(memcg)) { | 5140 | if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { |
5162 | ret = -EBUSY; | 5141 | ret = -EBUSY; |
5163 | goto out; | 5142 | goto out; |
5164 | } | 5143 | } |
@@ -5228,10 +5207,10 @@ out: | |||
5228 | * The user of this function is... | 5207 | * The user of this function is... |
5229 | * RES_LIMIT. | 5208 | * RES_LIMIT. |
5230 | */ | 5209 | */ |
5231 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | 5210 | static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, |
5232 | const char *buffer) | 5211 | const char *buffer) |
5233 | { | 5212 | { |
5234 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5213 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5235 | enum res_type type; | 5214 | enum res_type type; |
5236 | int name; | 5215 | int name; |
5237 | unsigned long long val; | 5216 | unsigned long long val; |
@@ -5255,7 +5234,7 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
5255 | else if (type == _MEMSWAP) | 5234 | else if (type == _MEMSWAP) |
5256 | ret = mem_cgroup_resize_memsw_limit(memcg, val); | 5235 | ret = mem_cgroup_resize_memsw_limit(memcg, val); |
5257 | else if (type == _KMEM) | 5236 | else if (type == _KMEM) |
5258 | ret = memcg_update_kmem_limit(cont, val); | 5237 | ret = memcg_update_kmem_limit(css, val); |
5259 | else | 5238 | else |
5260 | return -EINVAL; | 5239 | return -EINVAL; |
5261 | break; | 5240 | break; |
@@ -5283,18 +5262,15 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
5283 | static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, | 5262 | static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, |
5284 | unsigned long long *mem_limit, unsigned long long *memsw_limit) | 5263 | unsigned long long *mem_limit, unsigned long long *memsw_limit) |
5285 | { | 5264 | { |
5286 | struct cgroup *cgroup; | ||
5287 | unsigned long long min_limit, min_memsw_limit, tmp; | 5265 | unsigned long long min_limit, min_memsw_limit, tmp; |
5288 | 5266 | ||
5289 | min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT); | 5267 | min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT); |
5290 | min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | 5268 | min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); |
5291 | cgroup = memcg->css.cgroup; | ||
5292 | if (!memcg->use_hierarchy) | 5269 | if (!memcg->use_hierarchy) |
5293 | goto out; | 5270 | goto out; |
5294 | 5271 | ||
5295 | while (cgroup->parent) { | 5272 | while (css_parent(&memcg->css)) { |
5296 | cgroup = cgroup->parent; | 5273 | memcg = mem_cgroup_from_css(css_parent(&memcg->css)); |
5297 | memcg = mem_cgroup_from_cont(cgroup); | ||
5298 | if (!memcg->use_hierarchy) | 5274 | if (!memcg->use_hierarchy) |
5299 | break; | 5275 | break; |
5300 | tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); | 5276 | tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); |
@@ -5307,9 +5283,9 @@ out: | |||
5307 | *memsw_limit = min_memsw_limit; | 5283 | *memsw_limit = min_memsw_limit; |
5308 | } | 5284 | } |
5309 | 5285 | ||
5310 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 5286 | static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) |
5311 | { | 5287 | { |
5312 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5288 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5313 | int name; | 5289 | int name; |
5314 | enum res_type type; | 5290 | enum res_type type; |
5315 | 5291 | ||
@@ -5342,17 +5318,17 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | |||
5342 | return 0; | 5318 | return 0; |
5343 | } | 5319 | } |
5344 | 5320 | ||
5345 | static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp, | 5321 | static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, |
5346 | struct cftype *cft) | 5322 | struct cftype *cft) |
5347 | { | 5323 | { |
5348 | return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate; | 5324 | return mem_cgroup_from_css(css)->move_charge_at_immigrate; |
5349 | } | 5325 | } |
5350 | 5326 | ||
5351 | #ifdef CONFIG_MMU | 5327 | #ifdef CONFIG_MMU |
5352 | static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | 5328 | static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, |
5353 | struct cftype *cft, u64 val) | 5329 | struct cftype *cft, u64 val) |
5354 | { | 5330 | { |
5355 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5331 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5356 | 5332 | ||
5357 | if (val >= (1 << NR_MOVE_TYPE)) | 5333 | if (val >= (1 << NR_MOVE_TYPE)) |
5358 | return -EINVAL; | 5334 | return -EINVAL; |
@@ -5367,7 +5343,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | |||
5367 | return 0; | 5343 | return 0; |
5368 | } | 5344 | } |
5369 | #else | 5345 | #else |
5370 | static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | 5346 | static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, |
5371 | struct cftype *cft, u64 val) | 5347 | struct cftype *cft, u64 val) |
5372 | { | 5348 | { |
5373 | return -ENOSYS; | 5349 | return -ENOSYS; |
@@ -5375,13 +5351,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | |||
5375 | #endif | 5351 | #endif |
5376 | 5352 | ||
5377 | #ifdef CONFIG_NUMA | 5353 | #ifdef CONFIG_NUMA |
5378 | static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft, | 5354 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, |
5379 | struct seq_file *m) | 5355 | struct cftype *cft, struct seq_file *m) |
5380 | { | 5356 | { |
5381 | int nid; | 5357 | int nid; |
5382 | unsigned long total_nr, file_nr, anon_nr, unevictable_nr; | 5358 | unsigned long total_nr, file_nr, anon_nr, unevictable_nr; |
5383 | unsigned long node_nr; | 5359 | unsigned long node_nr; |
5384 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5360 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5385 | 5361 | ||
5386 | total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); | 5362 | total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); |
5387 | seq_printf(m, "total=%lu", total_nr); | 5363 | seq_printf(m, "total=%lu", total_nr); |
@@ -5426,10 +5402,10 @@ static inline void mem_cgroup_lru_names_not_uptodate(void) | |||
5426 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 5402 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
5427 | } | 5403 | } |
5428 | 5404 | ||
5429 | static int memcg_stat_show(struct cgroup *cont, struct cftype *cft, | 5405 | static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft, |
5430 | struct seq_file *m) | 5406 | struct seq_file *m) |
5431 | { | 5407 | { |
5432 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 5408 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5433 | struct mem_cgroup *mi; | 5409 | struct mem_cgroup *mi; |
5434 | unsigned int i; | 5410 | unsigned int i; |
5435 | 5411 | ||
@@ -5513,27 +5489,23 @@ static int memcg_stat_show(struct cgroup *cont, struct cftype *cft, | |||
5513 | return 0; | 5489 | return 0; |
5514 | } | 5490 | } |
5515 | 5491 | ||
5516 | static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) | 5492 | static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, |
5493 | struct cftype *cft) | ||
5517 | { | 5494 | { |
5518 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5495 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5519 | 5496 | ||
5520 | return mem_cgroup_swappiness(memcg); | 5497 | return mem_cgroup_swappiness(memcg); |
5521 | } | 5498 | } |
5522 | 5499 | ||
5523 | static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | 5500 | static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, |
5524 | u64 val) | 5501 | struct cftype *cft, u64 val) |
5525 | { | 5502 | { |
5526 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5503 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5527 | struct mem_cgroup *parent; | 5504 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); |
5528 | |||
5529 | if (val > 100) | ||
5530 | return -EINVAL; | ||
5531 | 5505 | ||
5532 | if (cgrp->parent == NULL) | 5506 | if (val > 100 || !parent) |
5533 | return -EINVAL; | 5507 | return -EINVAL; |
5534 | 5508 | ||
5535 | parent = mem_cgroup_from_cont(cgrp->parent); | ||
5536 | |||
5537 | mutex_lock(&memcg_create_mutex); | 5509 | mutex_lock(&memcg_create_mutex); |
5538 | 5510 | ||
5539 | /* If under hierarchy, only empty-root can set this value */ | 5511 | /* If under hierarchy, only empty-root can set this value */ |
@@ -5636,10 +5608,10 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) | |||
5636 | mem_cgroup_oom_notify_cb(iter); | 5608 | mem_cgroup_oom_notify_cb(iter); |
5637 | } | 5609 | } |
5638 | 5610 | ||
5639 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | 5611 | static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, |
5640 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5612 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) |
5641 | { | 5613 | { |
5642 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5614 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5643 | struct mem_cgroup_thresholds *thresholds; | 5615 | struct mem_cgroup_thresholds *thresholds; |
5644 | struct mem_cgroup_threshold_ary *new; | 5616 | struct mem_cgroup_threshold_ary *new; |
5645 | enum res_type type = MEMFILE_TYPE(cft->private); | 5617 | enum res_type type = MEMFILE_TYPE(cft->private); |
@@ -5719,10 +5691,10 @@ unlock: | |||
5719 | return ret; | 5691 | return ret; |
5720 | } | 5692 | } |
5721 | 5693 | ||
5722 | static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, | 5694 | static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, |
5723 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5695 | struct cftype *cft, struct eventfd_ctx *eventfd) |
5724 | { | 5696 | { |
5725 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5697 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5726 | struct mem_cgroup_thresholds *thresholds; | 5698 | struct mem_cgroup_thresholds *thresholds; |
5727 | struct mem_cgroup_threshold_ary *new; | 5699 | struct mem_cgroup_threshold_ary *new; |
5728 | enum res_type type = MEMFILE_TYPE(cft->private); | 5700 | enum res_type type = MEMFILE_TYPE(cft->private); |
@@ -5798,10 +5770,10 @@ unlock: | |||
5798 | mutex_unlock(&memcg->thresholds_lock); | 5770 | mutex_unlock(&memcg->thresholds_lock); |
5799 | } | 5771 | } |
5800 | 5772 | ||
5801 | static int mem_cgroup_oom_register_event(struct cgroup *cgrp, | 5773 | static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, |
5802 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5774 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) |
5803 | { | 5775 | { |
5804 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5776 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5805 | struct mem_cgroup_eventfd_list *event; | 5777 | struct mem_cgroup_eventfd_list *event; |
5806 | enum res_type type = MEMFILE_TYPE(cft->private); | 5778 | enum res_type type = MEMFILE_TYPE(cft->private); |
5807 | 5779 | ||
@@ -5823,10 +5795,10 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp, | |||
5823 | return 0; | 5795 | return 0; |
5824 | } | 5796 | } |
5825 | 5797 | ||
5826 | static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, | 5798 | static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, |
5827 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5799 | struct cftype *cft, struct eventfd_ctx *eventfd) |
5828 | { | 5800 | { |
5829 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5801 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5830 | struct mem_cgroup_eventfd_list *ev, *tmp; | 5802 | struct mem_cgroup_eventfd_list *ev, *tmp; |
5831 | enum res_type type = MEMFILE_TYPE(cft->private); | 5803 | enum res_type type = MEMFILE_TYPE(cft->private); |
5832 | 5804 | ||
@@ -5844,10 +5816,10 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, | |||
5844 | spin_unlock(&memcg_oom_lock); | 5816 | spin_unlock(&memcg_oom_lock); |
5845 | } | 5817 | } |
5846 | 5818 | ||
5847 | static int mem_cgroup_oom_control_read(struct cgroup *cgrp, | 5819 | static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css, |
5848 | struct cftype *cft, struct cgroup_map_cb *cb) | 5820 | struct cftype *cft, struct cgroup_map_cb *cb) |
5849 | { | 5821 | { |
5850 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5822 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5851 | 5823 | ||
5852 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); | 5824 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); |
5853 | 5825 | ||
@@ -5858,18 +5830,16 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp, | |||
5858 | return 0; | 5830 | return 0; |
5859 | } | 5831 | } |
5860 | 5832 | ||
5861 | static int mem_cgroup_oom_control_write(struct cgroup *cgrp, | 5833 | static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, |
5862 | struct cftype *cft, u64 val) | 5834 | struct cftype *cft, u64 val) |
5863 | { | 5835 | { |
5864 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 5836 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5865 | struct mem_cgroup *parent; | 5837 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); |
5866 | 5838 | ||
5867 | /* cannot set to root cgroup and only 0 and 1 are allowed */ | 5839 | /* cannot set to root cgroup and only 0 and 1 are allowed */ |
5868 | if (!cgrp->parent || !((val == 0) || (val == 1))) | 5840 | if (!parent || !((val == 0) || (val == 1))) |
5869 | return -EINVAL; | 5841 | return -EINVAL; |
5870 | 5842 | ||
5871 | parent = mem_cgroup_from_cont(cgrp->parent); | ||
5872 | |||
5873 | mutex_lock(&memcg_create_mutex); | 5843 | mutex_lock(&memcg_create_mutex); |
5874 | /* oom-kill-disable is a flag for subhierarchy. */ | 5844 | /* oom-kill-disable is a flag for subhierarchy. */ |
5875 | if ((parent->use_hierarchy) || memcg_has_children(memcg)) { | 5845 | if ((parent->use_hierarchy) || memcg_has_children(memcg)) { |
@@ -6228,7 +6198,7 @@ static void __init mem_cgroup_soft_limit_tree_init(void) | |||
6228 | } | 6198 | } |
6229 | 6199 | ||
6230 | static struct cgroup_subsys_state * __ref | 6200 | static struct cgroup_subsys_state * __ref |
6231 | mem_cgroup_css_alloc(struct cgroup *cont) | 6201 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
6232 | { | 6202 | { |
6233 | struct mem_cgroup *memcg; | 6203 | struct mem_cgroup *memcg; |
6234 | long error = -ENOMEM; | 6204 | long error = -ENOMEM; |
@@ -6243,7 +6213,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) | |||
6243 | goto free_out; | 6213 | goto free_out; |
6244 | 6214 | ||
6245 | /* root ? */ | 6215 | /* root ? */ |
6246 | if (cont->parent == NULL) { | 6216 | if (parent_css == NULL) { |
6247 | root_mem_cgroup = memcg; | 6217 | root_mem_cgroup = memcg; |
6248 | res_counter_init(&memcg->res, NULL); | 6218 | res_counter_init(&memcg->res, NULL); |
6249 | res_counter_init(&memcg->memsw, NULL); | 6219 | res_counter_init(&memcg->memsw, NULL); |
@@ -6265,17 +6235,16 @@ free_out: | |||
6265 | } | 6235 | } |
6266 | 6236 | ||
6267 | static int | 6237 | static int |
6268 | mem_cgroup_css_online(struct cgroup *cont) | 6238 | mem_cgroup_css_online(struct cgroup_subsys_state *css) |
6269 | { | 6239 | { |
6270 | struct mem_cgroup *memcg, *parent; | 6240 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6241 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); | ||
6271 | int error = 0; | 6242 | int error = 0; |
6272 | 6243 | ||
6273 | if (!cont->parent) | 6244 | if (!parent) |
6274 | return 0; | 6245 | return 0; |
6275 | 6246 | ||
6276 | mutex_lock(&memcg_create_mutex); | 6247 | mutex_lock(&memcg_create_mutex); |
6277 | memcg = mem_cgroup_from_cont(cont); | ||
6278 | parent = mem_cgroup_from_cont(cont->parent); | ||
6279 | 6248 | ||
6280 | memcg->use_hierarchy = parent->use_hierarchy; | 6249 | memcg->use_hierarchy = parent->use_hierarchy; |
6281 | memcg->oom_kill_disable = parent->oom_kill_disable; | 6250 | memcg->oom_kill_disable = parent->oom_kill_disable; |
@@ -6326,9 +6295,9 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
6326 | mem_cgroup_iter_invalidate(root_mem_cgroup); | 6295 | mem_cgroup_iter_invalidate(root_mem_cgroup); |
6327 | } | 6296 | } |
6328 | 6297 | ||
6329 | static void mem_cgroup_css_offline(struct cgroup *cont) | 6298 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) |
6330 | { | 6299 | { |
6331 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 6300 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6332 | 6301 | ||
6333 | kmem_cgroup_css_offline(memcg); | 6302 | kmem_cgroup_css_offline(memcg); |
6334 | 6303 | ||
@@ -6338,9 +6307,9 @@ static void mem_cgroup_css_offline(struct cgroup *cont) | |||
6338 | vmpressure_cleanup(&memcg->vmpressure); | 6307 | vmpressure_cleanup(&memcg->vmpressure); |
6339 | } | 6308 | } |
6340 | 6309 | ||
6341 | static void mem_cgroup_css_free(struct cgroup *cont) | 6310 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) |
6342 | { | 6311 | { |
6343 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 6312 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6344 | 6313 | ||
6345 | memcg_destroy_kmem(memcg); | 6314 | memcg_destroy_kmem(memcg); |
6346 | __mem_cgroup_free(memcg); | 6315 | __mem_cgroup_free(memcg); |
@@ -6710,12 +6679,12 @@ static void mem_cgroup_clear_mc(void) | |||
6710 | mem_cgroup_end_move(from); | 6679 | mem_cgroup_end_move(from); |
6711 | } | 6680 | } |
6712 | 6681 | ||
6713 | static int mem_cgroup_can_attach(struct cgroup *cgroup, | 6682 | static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, |
6714 | struct cgroup_taskset *tset) | 6683 | struct cgroup_taskset *tset) |
6715 | { | 6684 | { |
6716 | struct task_struct *p = cgroup_taskset_first(tset); | 6685 | struct task_struct *p = cgroup_taskset_first(tset); |
6717 | int ret = 0; | 6686 | int ret = 0; |
6718 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); | 6687 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6719 | unsigned long move_charge_at_immigrate; | 6688 | unsigned long move_charge_at_immigrate; |
6720 | 6689 | ||
6721 | /* | 6690 | /* |
@@ -6757,7 +6726,7 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup, | |||
6757 | return ret; | 6726 | return ret; |
6758 | } | 6727 | } |
6759 | 6728 | ||
6760 | static void mem_cgroup_cancel_attach(struct cgroup *cgroup, | 6729 | static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, |
6761 | struct cgroup_taskset *tset) | 6730 | struct cgroup_taskset *tset) |
6762 | { | 6731 | { |
6763 | mem_cgroup_clear_mc(); | 6732 | mem_cgroup_clear_mc(); |
@@ -6905,7 +6874,7 @@ retry: | |||
6905 | up_read(&mm->mmap_sem); | 6874 | up_read(&mm->mmap_sem); |
6906 | } | 6875 | } |
6907 | 6876 | ||
6908 | static void mem_cgroup_move_task(struct cgroup *cont, | 6877 | static void mem_cgroup_move_task(struct cgroup_subsys_state *css, |
6909 | struct cgroup_taskset *tset) | 6878 | struct cgroup_taskset *tset) |
6910 | { | 6879 | { |
6911 | struct task_struct *p = cgroup_taskset_first(tset); | 6880 | struct task_struct *p = cgroup_taskset_first(tset); |
@@ -6920,16 +6889,16 @@ static void mem_cgroup_move_task(struct cgroup *cont, | |||
6920 | mem_cgroup_clear_mc(); | 6889 | mem_cgroup_clear_mc(); |
6921 | } | 6890 | } |
6922 | #else /* !CONFIG_MMU */ | 6891 | #else /* !CONFIG_MMU */ |
6923 | static int mem_cgroup_can_attach(struct cgroup *cgroup, | 6892 | static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, |
6924 | struct cgroup_taskset *tset) | 6893 | struct cgroup_taskset *tset) |
6925 | { | 6894 | { |
6926 | return 0; | 6895 | return 0; |
6927 | } | 6896 | } |
6928 | static void mem_cgroup_cancel_attach(struct cgroup *cgroup, | 6897 | static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, |
6929 | struct cgroup_taskset *tset) | 6898 | struct cgroup_taskset *tset) |
6930 | { | 6899 | { |
6931 | } | 6900 | } |
6932 | static void mem_cgroup_move_task(struct cgroup *cont, | 6901 | static void mem_cgroup_move_task(struct cgroup_subsys_state *css, |
6933 | struct cgroup_taskset *tset) | 6902 | struct cgroup_taskset *tset) |
6934 | { | 6903 | { |
6935 | } | 6904 | } |
@@ -6939,15 +6908,15 @@ static void mem_cgroup_move_task(struct cgroup *cont, | |||
6939 | * Cgroup retains root cgroups across [un]mount cycles making it necessary | 6908 | * Cgroup retains root cgroups across [un]mount cycles making it necessary |
6940 | * to verify sane_behavior flag on each mount attempt. | 6909 | * to verify sane_behavior flag on each mount attempt. |
6941 | */ | 6910 | */ |
6942 | static void mem_cgroup_bind(struct cgroup *root) | 6911 | static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) |
6943 | { | 6912 | { |
6944 | /* | 6913 | /* |
6945 | * use_hierarchy is forced with sane_behavior. cgroup core | 6914 | * use_hierarchy is forced with sane_behavior. cgroup core |
6946 | * guarantees that @root doesn't have any children, so turning it | 6915 | * guarantees that @root doesn't have any children, so turning it |
6947 | * on for the root memcg is enough. | 6916 | * on for the root memcg is enough. |
6948 | */ | 6917 | */ |
6949 | if (cgroup_sane_behavior(root)) | 6918 | if (cgroup_sane_behavior(root_css->cgroup)) |
6950 | mem_cgroup_from_cont(root)->use_hierarchy = true; | 6919 | mem_cgroup_from_css(root_css)->use_hierarchy = true; |
6951 | } | 6920 | } |
6952 | 6921 | ||
6953 | struct cgroup_subsys mem_cgroup_subsys = { | 6922 | struct cgroup_subsys mem_cgroup_subsys = { |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 0c1e37d829fa..e0f62837c3f4 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -74,15 +74,10 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work) | |||
74 | return container_of(work, struct vmpressure, work); | 74 | return container_of(work, struct vmpressure, work); |
75 | } | 75 | } |
76 | 76 | ||
77 | static struct vmpressure *cg_to_vmpressure(struct cgroup *cg) | ||
78 | { | ||
79 | return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id)); | ||
80 | } | ||
81 | |||
82 | static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) | 77 | static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) |
83 | { | 78 | { |
84 | struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup; | 79 | struct cgroup_subsys_state *css = vmpressure_to_css(vmpr); |
85 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cg); | 80 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
86 | 81 | ||
87 | memcg = parent_mem_cgroup(memcg); | 82 | memcg = parent_mem_cgroup(memcg); |
88 | if (!memcg) | 83 | if (!memcg) |
@@ -283,7 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
283 | 278 | ||
284 | /** | 279 | /** |
285 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd | 280 | * vmpressure_register_event() - Bind vmpressure notifications to an eventfd |
286 | * @cg: cgroup that is interested in vmpressure notifications | 281 | * @css: css that is interested in vmpressure notifications |
287 | * @cft: cgroup control files handle | 282 | * @cft: cgroup control files handle |
288 | * @eventfd: eventfd context to link notifications with | 283 | * @eventfd: eventfd context to link notifications with |
289 | * @args: event arguments (used to set up a pressure level threshold) | 284 | * @args: event arguments (used to set up a pressure level threshold) |
@@ -298,10 +293,11 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
298 | * cftype).register_event, and then cgroup core will handle everything by | 293 | * cftype).register_event, and then cgroup core will handle everything by |
299 | * itself. | 294 | * itself. |
300 | */ | 295 | */ |
301 | int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, | 296 | int vmpressure_register_event(struct cgroup_subsys_state *css, |
302 | struct eventfd_ctx *eventfd, const char *args) | 297 | struct cftype *cft, struct eventfd_ctx *eventfd, |
298 | const char *args) | ||
303 | { | 299 | { |
304 | struct vmpressure *vmpr = cg_to_vmpressure(cg); | 300 | struct vmpressure *vmpr = css_to_vmpressure(css); |
305 | struct vmpressure_event *ev; | 301 | struct vmpressure_event *ev; |
306 | int level; | 302 | int level; |
307 | 303 | ||
@@ -329,7 +325,7 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, | |||
329 | 325 | ||
330 | /** | 326 | /** |
331 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure | 327 | * vmpressure_unregister_event() - Unbind eventfd from vmpressure |
332 | * @cg: cgroup handle | 328 | * @css: css handle |
333 | * @cft: cgroup control files handle | 329 | * @cft: cgroup control files handle |
334 | * @eventfd: eventfd context that was used to link vmpressure with the @cg | 330 | * @eventfd: eventfd context that was used to link vmpressure with the @cg |
335 | * | 331 | * |
@@ -341,10 +337,11 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, | |||
341 | * cftype).unregister_event, and then cgroup core will handle everything | 337 | * cftype).unregister_event, and then cgroup core will handle everything |
342 | * by itself. | 338 | * by itself. |
343 | */ | 339 | */ |
344 | void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, | 340 | void vmpressure_unregister_event(struct cgroup_subsys_state *css, |
341 | struct cftype *cft, | ||
345 | struct eventfd_ctx *eventfd) | 342 | struct eventfd_ctx *eventfd) |
346 | { | 343 | { |
347 | struct vmpressure *vmpr = cg_to_vmpressure(cg); | 344 | struct vmpressure *vmpr = css_to_vmpressure(css); |
348 | struct vmpressure_event *ev; | 345 | struct vmpressure_event *ev; |
349 | 346 | ||
350 | mutex_lock(&vmpr->events_lock); | 347 | mutex_lock(&vmpr->events_lock); |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index e533259dce3c..d9cd627e6a16 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -29,12 +29,6 @@ | |||
29 | 29 | ||
30 | #define PRIOMAP_MIN_SZ 128 | 30 | #define PRIOMAP_MIN_SZ 128 |
31 | 31 | ||
32 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | ||
33 | { | ||
34 | return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), | ||
35 | struct cgroup_netprio_state, css); | ||
36 | } | ||
37 | |||
38 | /* | 32 | /* |
39 | * Extend @dev->priomap so that it's large enough to accomodate | 33 | * Extend @dev->priomap so that it's large enough to accomodate |
40 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful | 34 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx) | |||
87 | 81 | ||
88 | /** | 82 | /** |
89 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | 83 | * netprio_prio - return the effective netprio of a cgroup-net_device pair |
90 | * @cgrp: cgroup part of the target pair | 84 | * @css: css part of the target pair |
91 | * @dev: net_device part of the target pair | 85 | * @dev: net_device part of the target pair |
92 | * | 86 | * |
93 | * Should be called under RCU read or rtnl lock. | 87 | * Should be called under RCU read or rtnl lock. |
94 | */ | 88 | */ |
95 | static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) | 89 | static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev) |
96 | { | 90 | { |
97 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | 91 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); |
92 | int id = css->cgroup->id; | ||
98 | 93 | ||
99 | if (map && cgrp->id < map->priomap_len) | 94 | if (map && id < map->priomap_len) |
100 | return map->priomap[cgrp->id]; | 95 | return map->priomap[id]; |
101 | return 0; | 96 | return 0; |
102 | } | 97 | } |
103 | 98 | ||
104 | /** | 99 | /** |
105 | * netprio_set_prio - set netprio on a cgroup-net_device pair | 100 | * netprio_set_prio - set netprio on a cgroup-net_device pair |
106 | * @cgrp: cgroup part of the target pair | 101 | * @css: css part of the target pair |
107 | * @dev: net_device part of the target pair | 102 | * @dev: net_device part of the target pair |
108 | * @prio: prio to set | 103 | * @prio: prio to set |
109 | * | 104 | * |
110 | * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl | 105 | * Set netprio to @prio on @css-@dev pair. Should be called under rtnl |
111 | * lock and may fail under memory pressure for non-zero @prio. | 106 | * lock and may fail under memory pressure for non-zero @prio. |
112 | */ | 107 | */ |
113 | static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, | 108 | static int netprio_set_prio(struct cgroup_subsys_state *css, |
114 | u32 prio) | 109 | struct net_device *dev, u32 prio) |
115 | { | 110 | { |
116 | struct netprio_map *map; | 111 | struct netprio_map *map; |
112 | int id = css->cgroup->id; | ||
117 | int ret; | 113 | int ret; |
118 | 114 | ||
119 | /* avoid extending priomap for zero writes */ | 115 | /* avoid extending priomap for zero writes */ |
120 | map = rtnl_dereference(dev->priomap); | 116 | map = rtnl_dereference(dev->priomap); |
121 | if (!prio && (!map || map->priomap_len <= cgrp->id)) | 117 | if (!prio && (!map || map->priomap_len <= id)) |
122 | return 0; | 118 | return 0; |
123 | 119 | ||
124 | ret = extend_netdev_table(dev, cgrp->id); | 120 | ret = extend_netdev_table(dev, id); |
125 | if (ret) | 121 | if (ret) |
126 | return ret; | 122 | return ret; |
127 | 123 | ||
128 | map = rtnl_dereference(dev->priomap); | 124 | map = rtnl_dereference(dev->priomap); |
129 | map->priomap[cgrp->id] = prio; | 125 | map->priomap[id] = prio; |
130 | return 0; | 126 | return 0; |
131 | } | 127 | } |
132 | 128 | ||
133 | static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) | 129 | static struct cgroup_subsys_state * |
130 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | ||
134 | { | 131 | { |
135 | struct cgroup_netprio_state *cs; | 132 | struct cgroup_subsys_state *css; |
136 | 133 | ||
137 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 134 | css = kzalloc(sizeof(*css), GFP_KERNEL); |
138 | if (!cs) | 135 | if (!css) |
139 | return ERR_PTR(-ENOMEM); | 136 | return ERR_PTR(-ENOMEM); |
140 | 137 | ||
141 | return &cs->css; | 138 | return css; |
142 | } | 139 | } |
143 | 140 | ||
144 | static int cgrp_css_online(struct cgroup *cgrp) | 141 | static int cgrp_css_online(struct cgroup_subsys_state *css) |
145 | { | 142 | { |
146 | struct cgroup *parent = cgrp->parent; | 143 | struct cgroup_subsys_state *parent_css = css_parent(css); |
147 | struct net_device *dev; | 144 | struct net_device *dev; |
148 | int ret = 0; | 145 | int ret = 0; |
149 | 146 | ||
150 | if (!parent) | 147 | if (!parent_css) |
151 | return 0; | 148 | return 0; |
152 | 149 | ||
153 | rtnl_lock(); | 150 | rtnl_lock(); |
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp) | |||
156 | * onlining, there is no need to clear them on offline. | 153 | * onlining, there is no need to clear them on offline. |
157 | */ | 154 | */ |
158 | for_each_netdev(&init_net, dev) { | 155 | for_each_netdev(&init_net, dev) { |
159 | u32 prio = netprio_prio(parent, dev); | 156 | u32 prio = netprio_prio(parent_css, dev); |
160 | 157 | ||
161 | ret = netprio_set_prio(cgrp, dev, prio); | 158 | ret = netprio_set_prio(css, dev, prio); |
162 | if (ret) | 159 | if (ret) |
163 | break; | 160 | break; |
164 | } | 161 | } |
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp) | |||
166 | return ret; | 163 | return ret; |
167 | } | 164 | } |
168 | 165 | ||
169 | static void cgrp_css_free(struct cgroup *cgrp) | 166 | static void cgrp_css_free(struct cgroup_subsys_state *css) |
170 | { | 167 | { |
171 | kfree(cgrp_netprio_state(cgrp)); | 168 | kfree(css); |
172 | } | 169 | } |
173 | 170 | ||
174 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | 171 | static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) |
175 | { | 172 | { |
176 | return cgrp->id; | 173 | return css->cgroup->id; |
177 | } | 174 | } |
178 | 175 | ||
179 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | 176 | static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft, |
180 | struct cgroup_map_cb *cb) | 177 | struct cgroup_map_cb *cb) |
181 | { | 178 | { |
182 | struct net_device *dev; | 179 | struct net_device *dev; |
183 | 180 | ||
184 | rcu_read_lock(); | 181 | rcu_read_lock(); |
185 | for_each_netdev_rcu(&init_net, dev) | 182 | for_each_netdev_rcu(&init_net, dev) |
186 | cb->fill(cb, dev->name, netprio_prio(cont, dev)); | 183 | cb->fill(cb, dev->name, netprio_prio(css, dev)); |
187 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
188 | return 0; | 185 | return 0; |
189 | } | 186 | } |
190 | 187 | ||
191 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | 188 | static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, |
192 | const char *buffer) | 189 | const char *buffer) |
193 | { | 190 | { |
194 | char devname[IFNAMSIZ + 1]; | 191 | char devname[IFNAMSIZ + 1]; |
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | |||
205 | 202 | ||
206 | rtnl_lock(); | 203 | rtnl_lock(); |
207 | 204 | ||
208 | ret = netprio_set_prio(cgrp, dev, prio); | 205 | ret = netprio_set_prio(css, dev, prio); |
209 | 206 | ||
210 | rtnl_unlock(); | 207 | rtnl_unlock(); |
211 | dev_put(dev); | 208 | dev_put(dev); |
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n) | |||
221 | return 0; | 218 | return 0; |
222 | } | 219 | } |
223 | 220 | ||
224 | static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 221 | static void net_prio_attach(struct cgroup_subsys_state *css, |
222 | struct cgroup_taskset *tset) | ||
225 | { | 223 | { |
226 | struct task_struct *p; | 224 | struct task_struct *p; |
227 | void *v; | 225 | void *v; |
228 | 226 | ||
229 | cgroup_taskset_for_each(p, cgrp, tset) { | 227 | cgroup_taskset_for_each(p, css, tset) { |
230 | task_lock(p); | 228 | task_lock(p); |
231 | v = (void *)(unsigned long)task_netprioidx(p); | 229 | v = (void *)(unsigned long)task_netprioidx(p); |
232 | iterate_fd(p->files, 0, update_netprio, v); | 230 | iterate_fd(p->files, 0, update_netprio, v); |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index da14436c1735..8a57d79b0b16 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |||
132 | return 0; | 132 | return 0; |
133 | } | 133 | } |
134 | 134 | ||
135 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | 135 | static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, |
136 | const char *buffer) | 136 | const char *buffer) |
137 | { | 137 | { |
138 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 138 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
139 | unsigned long long val; | 139 | unsigned long long val; |
140 | int ret = 0; | 140 | int ret = 0; |
141 | 141 | ||
@@ -180,9 +180,9 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg) | |||
180 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | 180 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); |
181 | } | 181 | } |
182 | 182 | ||
183 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) | 183 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
184 | { | 184 | { |
185 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 185 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
186 | u64 val; | 186 | u64 val; |
187 | 187 | ||
188 | switch (cft->private) { | 188 | switch (cft->private) { |
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) | |||
202 | return val; | 202 | return val; |
203 | } | 203 | } |
204 | 204 | ||
205 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) | 205 | static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) |
206 | { | 206 | { |
207 | struct mem_cgroup *memcg; | 207 | struct mem_cgroup *memcg; |
208 | struct tcp_memcontrol *tcp; | 208 | struct tcp_memcontrol *tcp; |
209 | struct cg_proto *cg_proto; | 209 | struct cg_proto *cg_proto; |
210 | 210 | ||
211 | memcg = mem_cgroup_from_cont(cont); | 211 | memcg = mem_cgroup_from_css(css); |
212 | cg_proto = tcp_prot.proto_cgroup(memcg); | 212 | cg_proto = tcp_prot.proto_cgroup(memcg); |
213 | if (!cg_proto) | 213 | if (!cg_proto) |
214 | return 0; | 214 | return 0; |
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3a294eb98d61..867b4a3e3980 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c | |||
@@ -23,19 +23,18 @@ | |||
23 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <net/cls_cgroup.h> | 24 | #include <net/cls_cgroup.h> |
25 | 25 | ||
26 | static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) | 26 | static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) |
27 | { | 27 | { |
28 | return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), | 28 | return css ? container_of(css, struct cgroup_cls_state, css) : NULL; |
29 | struct cgroup_cls_state, css); | ||
30 | } | 29 | } |
31 | 30 | ||
32 | static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) | 31 | static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) |
33 | { | 32 | { |
34 | return container_of(task_subsys_state(p, net_cls_subsys_id), | 33 | return css_cls_state(task_css(p, net_cls_subsys_id)); |
35 | struct cgroup_cls_state, css); | ||
36 | } | 34 | } |
37 | 35 | ||
38 | static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) | 36 | static struct cgroup_subsys_state * |
37 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | ||
39 | { | 38 | { |
40 | struct cgroup_cls_state *cs; | 39 | struct cgroup_cls_state *cs; |
41 | 40 | ||
@@ -45,17 +44,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) | |||
45 | return &cs->css; | 44 | return &cs->css; |
46 | } | 45 | } |
47 | 46 | ||
48 | static int cgrp_css_online(struct cgroup *cgrp) | 47 | static int cgrp_css_online(struct cgroup_subsys_state *css) |
49 | { | 48 | { |
50 | if (cgrp->parent) | 49 | struct cgroup_cls_state *cs = css_cls_state(css); |
51 | cgrp_cls_state(cgrp)->classid = | 50 | struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); |
52 | cgrp_cls_state(cgrp->parent)->classid; | 51 | |
52 | if (parent) | ||
53 | cs->classid = parent->classid; | ||
53 | return 0; | 54 | return 0; |
54 | } | 55 | } |
55 | 56 | ||
56 | static void cgrp_css_free(struct cgroup *cgrp) | 57 | static void cgrp_css_free(struct cgroup_subsys_state *css) |
57 | { | 58 | { |
58 | kfree(cgrp_cls_state(cgrp)); | 59 | kfree(css_cls_state(css)); |
59 | } | 60 | } |
60 | 61 | ||
61 | static int update_classid(const void *v, struct file *file, unsigned n) | 62 | static int update_classid(const void *v, struct file *file, unsigned n) |
@@ -67,12 +68,13 @@ static int update_classid(const void *v, struct file *file, unsigned n) | |||
67 | return 0; | 68 | return 0; |
68 | } | 69 | } |
69 | 70 | ||
70 | static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 71 | static void cgrp_attach(struct cgroup_subsys_state *css, |
72 | struct cgroup_taskset *tset) | ||
71 | { | 73 | { |
72 | struct task_struct *p; | 74 | struct task_struct *p; |
73 | void *v; | 75 | void *v; |
74 | 76 | ||
75 | cgroup_taskset_for_each(p, cgrp, tset) { | 77 | cgroup_taskset_for_each(p, css, tset) { |
76 | task_lock(p); | 78 | task_lock(p); |
77 | v = (void *)(unsigned long)task_cls_classid(p); | 79 | v = (void *)(unsigned long)task_cls_classid(p); |
78 | iterate_fd(p->files, 0, update_classid, v); | 80 | iterate_fd(p->files, 0, update_classid, v); |
@@ -80,14 +82,15 @@ static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
80 | } | 82 | } |
81 | } | 83 | } |
82 | 84 | ||
83 | static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) | 85 | static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) |
84 | { | 86 | { |
85 | return cgrp_cls_state(cgrp)->classid; | 87 | return css_cls_state(css)->classid; |
86 | } | 88 | } |
87 | 89 | ||
88 | static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) | 90 | static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, |
91 | u64 value) | ||
89 | { | 92 | { |
90 | cgrp_cls_state(cgrp)->classid = (u32) value; | 93 | css_cls_state(css)->classid = (u32) value; |
91 | return 0; | 94 | return 0; |
92 | } | 95 | } |
93 | 96 | ||
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index e8aad69f0d69..c123628d3f84 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -53,22 +53,17 @@ struct dev_cgroup { | |||
53 | 53 | ||
54 | static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) | 54 | static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) |
55 | { | 55 | { |
56 | return container_of(s, struct dev_cgroup, css); | 56 | return s ? container_of(s, struct dev_cgroup, css) : NULL; |
57 | } | ||
58 | |||
59 | static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) | ||
60 | { | ||
61 | return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); | ||
62 | } | 57 | } |
63 | 58 | ||
64 | static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) | 59 | static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) |
65 | { | 60 | { |
66 | return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); | 61 | return css_to_devcgroup(task_css(task, devices_subsys_id)); |
67 | } | 62 | } |
68 | 63 | ||
69 | struct cgroup_subsys devices_subsys; | 64 | struct cgroup_subsys devices_subsys; |
70 | 65 | ||
71 | static int devcgroup_can_attach(struct cgroup *new_cgrp, | 66 | static int devcgroup_can_attach(struct cgroup_subsys_state *new_css, |
72 | struct cgroup_taskset *set) | 67 | struct cgroup_taskset *set) |
73 | { | 68 | { |
74 | struct task_struct *task = cgroup_taskset_first(set); | 69 | struct task_struct *task = cgroup_taskset_first(set); |
@@ -193,18 +188,16 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg) | |||
193 | /** | 188 | /** |
194 | * devcgroup_online - initializes devcgroup's behavior and exceptions based on | 189 | * devcgroup_online - initializes devcgroup's behavior and exceptions based on |
195 | * parent's | 190 | * parent's |
196 | * @cgroup: cgroup getting online | 191 | * @css: css getting online |
197 | * returns 0 in case of success, error code otherwise | 192 | * returns 0 in case of success, error code otherwise |
198 | */ | 193 | */ |
199 | static int devcgroup_online(struct cgroup *cgroup) | 194 | static int devcgroup_online(struct cgroup_subsys_state *css) |
200 | { | 195 | { |
201 | struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; | 196 | struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); |
197 | struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css)); | ||
202 | int ret = 0; | 198 | int ret = 0; |
203 | 199 | ||
204 | mutex_lock(&devcgroup_mutex); | 200 | mutex_lock(&devcgroup_mutex); |
205 | dev_cgroup = cgroup_to_devcgroup(cgroup); | ||
206 | if (cgroup->parent) | ||
207 | parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent); | ||
208 | 201 | ||
209 | if (parent_dev_cgroup == NULL) | 202 | if (parent_dev_cgroup == NULL) |
210 | dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; | 203 | dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; |
@@ -219,9 +212,9 @@ static int devcgroup_online(struct cgroup *cgroup) | |||
219 | return ret; | 212 | return ret; |
220 | } | 213 | } |
221 | 214 | ||
222 | static void devcgroup_offline(struct cgroup *cgroup) | 215 | static void devcgroup_offline(struct cgroup_subsys_state *css) |
223 | { | 216 | { |
224 | struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); | 217 | struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); |
225 | 218 | ||
226 | mutex_lock(&devcgroup_mutex); | 219 | mutex_lock(&devcgroup_mutex); |
227 | dev_cgroup->behavior = DEVCG_DEFAULT_NONE; | 220 | dev_cgroup->behavior = DEVCG_DEFAULT_NONE; |
@@ -231,7 +224,8 @@ static void devcgroup_offline(struct cgroup *cgroup) | |||
231 | /* | 224 | /* |
232 | * called from kernel/cgroup.c with cgroup_lock() held. | 225 | * called from kernel/cgroup.c with cgroup_lock() held. |
233 | */ | 226 | */ |
234 | static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) | 227 | static struct cgroup_subsys_state * |
228 | devcgroup_css_alloc(struct cgroup_subsys_state *parent_css) | ||
235 | { | 229 | { |
236 | struct dev_cgroup *dev_cgroup; | 230 | struct dev_cgroup *dev_cgroup; |
237 | 231 | ||
@@ -244,11 +238,10 @@ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) | |||
244 | return &dev_cgroup->css; | 238 | return &dev_cgroup->css; |
245 | } | 239 | } |
246 | 240 | ||
247 | static void devcgroup_css_free(struct cgroup *cgroup) | 241 | static void devcgroup_css_free(struct cgroup_subsys_state *css) |
248 | { | 242 | { |
249 | struct dev_cgroup *dev_cgroup; | 243 | struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); |
250 | 244 | ||
251 | dev_cgroup = cgroup_to_devcgroup(cgroup); | ||
252 | __dev_exception_clean(dev_cgroup); | 245 | __dev_exception_clean(dev_cgroup); |
253 | kfree(dev_cgroup); | 246 | kfree(dev_cgroup); |
254 | } | 247 | } |
@@ -291,10 +284,10 @@ static void set_majmin(char *str, unsigned m) | |||
291 | sprintf(str, "%u", m); | 284 | sprintf(str, "%u", m); |
292 | } | 285 | } |
293 | 286 | ||
294 | static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, | 287 | static int devcgroup_seq_read(struct cgroup_subsys_state *css, |
295 | struct seq_file *m) | 288 | struct cftype *cft, struct seq_file *m) |
296 | { | 289 | { |
297 | struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); | 290 | struct dev_cgroup *devcgroup = css_to_devcgroup(css); |
298 | struct dev_exception_item *ex; | 291 | struct dev_exception_item *ex; |
299 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; | 292 | char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; |
300 | 293 | ||
@@ -394,12 +387,10 @@ static bool may_access(struct dev_cgroup *dev_cgroup, | |||
394 | static int parent_has_perm(struct dev_cgroup *childcg, | 387 | static int parent_has_perm(struct dev_cgroup *childcg, |
395 | struct dev_exception_item *ex) | 388 | struct dev_exception_item *ex) |
396 | { | 389 | { |
397 | struct cgroup *pcg = childcg->css.cgroup->parent; | 390 | struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); |
398 | struct dev_cgroup *parent; | ||
399 | 391 | ||
400 | if (!pcg) | 392 | if (!parent) |
401 | return 1; | 393 | return 1; |
402 | parent = cgroup_to_devcgroup(pcg); | ||
403 | return may_access(parent, ex, childcg->behavior); | 394 | return may_access(parent, ex, childcg->behavior); |
404 | } | 395 | } |
405 | 396 | ||
@@ -451,13 +442,13 @@ static void revalidate_active_exceptions(struct dev_cgroup *devcg) | |||
451 | static int propagate_exception(struct dev_cgroup *devcg_root, | 442 | static int propagate_exception(struct dev_cgroup *devcg_root, |
452 | struct dev_exception_item *ex) | 443 | struct dev_exception_item *ex) |
453 | { | 444 | { |
454 | struct cgroup *root = devcg_root->css.cgroup, *pos; | 445 | struct cgroup_subsys_state *pos; |
455 | int rc = 0; | 446 | int rc = 0; |
456 | 447 | ||
457 | rcu_read_lock(); | 448 | rcu_read_lock(); |
458 | 449 | ||
459 | cgroup_for_each_descendant_pre(pos, root) { | 450 | css_for_each_descendant_pre(pos, &devcg_root->css) { |
460 | struct dev_cgroup *devcg = cgroup_to_devcgroup(pos); | 451 | struct dev_cgroup *devcg = css_to_devcgroup(pos); |
461 | 452 | ||
462 | /* | 453 | /* |
463 | * Because devcgroup_mutex is held, no devcg will become | 454 | * Because devcgroup_mutex is held, no devcg will become |
@@ -465,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, | |||
465 | * methods), and online ones are safe to access outside RCU | 456 | * methods), and online ones are safe to access outside RCU |
466 | * read lock without bumping refcnt. | 457 | * read lock without bumping refcnt. |
467 | */ | 458 | */ |
468 | if (!is_devcg_online(devcg)) | 459 | if (pos == &devcg_root->css || !is_devcg_online(devcg)) |
469 | continue; | 460 | continue; |
470 | 461 | ||
471 | rcu_read_unlock(); | 462 | rcu_read_unlock(); |
@@ -524,15 +515,11 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
524 | char temp[12]; /* 11 + 1 characters needed for a u32 */ | 515 | char temp[12]; /* 11 + 1 characters needed for a u32 */ |
525 | int count, rc = 0; | 516 | int count, rc = 0; |
526 | struct dev_exception_item ex; | 517 | struct dev_exception_item ex; |
527 | struct cgroup *p = devcgroup->css.cgroup; | 518 | struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css)); |
528 | struct dev_cgroup *parent = NULL; | ||
529 | 519 | ||
530 | if (!capable(CAP_SYS_ADMIN)) | 520 | if (!capable(CAP_SYS_ADMIN)) |
531 | return -EPERM; | 521 | return -EPERM; |
532 | 522 | ||
533 | if (p->parent) | ||
534 | parent = cgroup_to_devcgroup(p->parent); | ||
535 | |||
536 | memset(&ex, 0, sizeof(ex)); | 523 | memset(&ex, 0, sizeof(ex)); |
537 | b = buffer; | 524 | b = buffer; |
538 | 525 | ||
@@ -677,13 +664,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
677 | return rc; | 664 | return rc; |
678 | } | 665 | } |
679 | 666 | ||
680 | static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, | 667 | static int devcgroup_access_write(struct cgroup_subsys_state *css, |
681 | const char *buffer) | 668 | struct cftype *cft, const char *buffer) |
682 | { | 669 | { |
683 | int retval; | 670 | int retval; |
684 | 671 | ||
685 | mutex_lock(&devcgroup_mutex); | 672 | mutex_lock(&devcgroup_mutex); |
686 | retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), | 673 | retval = devcgroup_update_access(css_to_devcgroup(css), |
687 | cft->private, buffer); | 674 | cft->private, buffer); |
688 | mutex_unlock(&devcgroup_mutex); | 675 | mutex_unlock(&devcgroup_mutex); |
689 | return retval; | 676 | return retval; |