aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-18 23:25:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-18 23:25:49 -0400
commit6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f (patch)
treefaf2bb2e77bfe20f5a116f87c620a99c54febe4f
parentfcab86add71623e3963d7565c0d61bb9d99aea7c (diff)
parentcfe02a8a973e7e5f66926b8ae38dfce404b19e29 (diff)
Merge branch 'for-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "cgroup changes for v4.6-rc1. No userland visible behavior changes in this pull request. I'll send out a separate pull request for the addition of cgroup namespace support. - The biggest change is the revamping of cgroup core task migration and controller handling logic. There are quite a few places where controllers and tasks are manipulated. Previously, many of those places implemented custom operations for each specific use case assuming specific starting conditions. While this worked, it makes the code fragile and difficult to follow. The bulk of this pull request restructures these operations so that most related operations are performed through common helpers which implement recursive (subtrees are always processed consistently) and idempotent (they make cgroup hierarchy converge to the target state rather than performing operations assuming specific starting conditions). This makes the code a lot easier to understand, verify and extend. - Implicit controller support is added. This is primarily for using perf_event on the v2 hierarchy so that perf can match cgroup v2 path without requiring the user to do anything special. The kernel portion of perf_event changes is acked but userland changes are still pending review. - cgroup_no_v1= boot parameter added to ease testing cgroup v2 in certain environments. - There is a regression introduced during v4.4 devel cycle where attempts to migrate zombie tasks can mess up internal object management. This was fixed earlier this week and included in this pull request w/ stable cc'd. - Misc non-critical fixes and improvements" * 'for-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (44 commits) cgroup: avoid false positive gcc-6 warning cgroup: ignore css_sets associated with dead cgroups during migration Documentation: cgroup v2: Trivial heading correction. cgroup: implement cgroup_subsys->implicit_on_dfl cgroup: use css_set->mg_dst_cgrp for the migration target cgroup cgroup: make cgroup[_taskset]_migrate() take cgroup_root instead of cgroup cgroup: move migration destination verification out of cgroup_migrate_prepare_dst() cgroup: fix incorrect destination cgroup in cgroup_update_dfl_csses() cgroup: Trivial correction to reflect controller. cgroup: remove stale item in cgroup-v1 document INDEX file. cgroup: update css iteration in cgroup_update_dfl_csses() cgroup: allocate 2x cgrp_cset_links when setting up a new root cgroup: make cgroup_calc_subtree_ss_mask() take @this_ss_mask cgroup: reimplement rebind_subsystems() using cgroup_apply_control() and friends cgroup: use cgroup_apply_enable_control() in cgroup creation path cgroup: combine cgroup_mutex locking and offline css draining cgroup: factor out cgroup_{apply|finalize}_control() from cgroup_subtree_control_write() cgroup: introduce cgroup_{save|propagate|restore}_control() cgroup: make cgroup_drain_offline() and cgroup_apply_control_{disable|enable}() recursive cgroup: factor out cgroup_apply_control_enable() from cgroup_subtree_control_write() ...
-rw-r--r--Documentation/cgroup-v1/00-INDEX2
-rw-r--r--Documentation/cgroup-v2.txt8
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--include/linux/cgroup-defs.h46
-rw-r--r--init/Kconfig4
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cgroup.c1168
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpuacct.c2
10 files changed, 738 insertions, 504 deletions
diff --git a/Documentation/cgroup-v1/00-INDEX b/Documentation/cgroup-v1/00-INDEX
index 6ad425f7cf56..106885ad670d 100644
--- a/Documentation/cgroup-v1/00-INDEX
+++ b/Documentation/cgroup-v1/00-INDEX
@@ -24,5 +24,3 @@ net_prio.txt
24 - Network priority cgroups details and usages. 24 - Network priority cgroups details and usages.
25pids.txt 25pids.txt
26 - Process number cgroups details and usages. 26 - Process number cgroups details and usages.
27unified-hierarchy.txt
28 - Description the new/next cgroup interface.
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 8f1329a5f700..bdc6773277be 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -132,6 +132,12 @@ strongly discouraged for production use. It is recommended to decide
132the hierarchies and controller associations before starting using the 132the hierarchies and controller associations before starting using the
133controllers after system boot. 133controllers after system boot.
134 134
135During transition to v2, system management software might still
136automount the v1 cgroup filesystem and so hijack all controllers
137during boot, before manual intervention is possible. To make testing
138and experimenting easier, the kernel parameter cgroup_no_v1= allows
139disabling controllers in v1 and make them always available in v2.
140
135 141
1362-2. Organizing Processes 1422-2. Organizing Processes
137 143
@@ -915,7 +921,7 @@ PAGE_SIZE multiple when read back.
915 limit, anonymous meomry of the cgroup will not be swapped out. 921 limit, anonymous meomry of the cgroup will not be swapped out.
916 922
917 923
9185-2-2. General Usage 9245-2-2. Usage Guidelines
919 925
920"memory.high" is the main mechanism to control memory usage. 926"memory.high" is the main mechanism to control memory usage.
921Over-committing on high limit (sum of high limits > available memory) 927Over-committing on high limit (sum of high limits > available memory)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0ee46a8f6401..eef242ee576b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -614,6 +614,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
614 cut the overhead, others just disable the usage. So 614 cut the overhead, others just disable the usage. So
615 only cgroup_disable=memory is actually worthy} 615 only cgroup_disable=memory is actually worthy}
616 616
617 cgroup_no_v1= [KNL] Disable one, multiple, all cgroup controllers in v1
618 Format: { controller[,controller...] | "all" }
619 Like cgroup_disable, but only applies to cgroup v1;
620 the blacklisted controllers remain available in cgroup2.
621
617 cgroup.memory= [KNL] Pass options to the cgroup memory controller. 622 cgroup.memory= [KNL] Pass options to the cgroup memory controller.
618 Format: <string> 623 Format: <string>
619 nosocket -- Disable socket memory accounting. 624 nosocket -- Disable socket memory accounting.
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 789471dba6fb..3e39ae5bc799 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -45,6 +45,7 @@ enum {
45 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 45 CSS_NO_REF = (1 << 0), /* no reference counting for this css */
46 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 46 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
47 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 47 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */
48 CSS_VISIBLE = (1 << 3), /* css is visible to userland */
48}; 49};
49 50
50/* bits in struct cgroup flags field */ 51/* bits in struct cgroup flags field */
@@ -190,12 +191,13 @@ struct css_set {
190 191
191 /* 192 /*
192 * If this cset is acting as the source of migration the following 193 * If this cset is acting as the source of migration the following
193 * two fields are set. mg_src_cgrp is the source cgroup of the 194 * two fields are set. mg_src_cgrp and mg_dst_cgrp are
194 * on-going migration and mg_dst_cset is the destination cset the 195 * respectively the source and destination cgroups of the on-going
195 * target tasks on this cset should be migrated to. Protected by 196 * migration. mg_dst_cset is the destination cset the target tasks
196 * cgroup_mutex. 197 * on this cset should be migrated to. Protected by cgroup_mutex.
197 */ 198 */
198 struct cgroup *mg_src_cgrp; 199 struct cgroup *mg_src_cgrp;
200 struct cgroup *mg_dst_cgrp;
199 struct css_set *mg_dst_cset; 201 struct css_set *mg_dst_cset;
200 202
201 /* 203 /*
@@ -210,6 +212,9 @@ struct css_set {
210 /* all css_task_iters currently walking this cset */ 212 /* all css_task_iters currently walking this cset */
211 struct list_head task_iters; 213 struct list_head task_iters;
212 214
215 /* dead and being drained, ignore for migration */
216 bool dead;
217
213 /* For RCU-protected deletion */ 218 /* For RCU-protected deletion */
214 struct rcu_head rcu_head; 219 struct rcu_head rcu_head;
215}; 220};
@@ -253,13 +258,14 @@ struct cgroup {
253 /* 258 /*
254 * The bitmask of subsystems enabled on the child cgroups. 259 * The bitmask of subsystems enabled on the child cgroups.
255 * ->subtree_control is the one configured through 260 * ->subtree_control is the one configured through
256 * "cgroup.subtree_control" while ->child_subsys_mask is the 261 * "cgroup.subtree_control" while ->child_ss_mask is the effective
257 * effective one which may have more subsystems enabled. 262 * one which may have more subsystems enabled. Controller knobs
258 * Controller knobs are made available iff it's enabled in 263 * are made available iff it's enabled in ->subtree_control.
259 * ->subtree_control.
260 */ 264 */
261 unsigned int subtree_control; 265 u16 subtree_control;
262 unsigned int child_subsys_mask; 266 u16 subtree_ss_mask;
267 u16 old_subtree_control;
268 u16 old_subtree_ss_mask;
263 269
264 /* Private pointers for each registered subsystem */ 270 /* Private pointers for each registered subsystem */
265 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 271 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -434,7 +440,6 @@ struct cgroup_subsys {
434 void (*css_released)(struct cgroup_subsys_state *css); 440 void (*css_released)(struct cgroup_subsys_state *css);
435 void (*css_free)(struct cgroup_subsys_state *css); 441 void (*css_free)(struct cgroup_subsys_state *css);
436 void (*css_reset)(struct cgroup_subsys_state *css); 442 void (*css_reset)(struct cgroup_subsys_state *css);
437 void (*css_e_css_changed)(struct cgroup_subsys_state *css);
438 443
439 int (*can_attach)(struct cgroup_taskset *tset); 444 int (*can_attach)(struct cgroup_taskset *tset);
440 void (*cancel_attach)(struct cgroup_taskset *tset); 445 void (*cancel_attach)(struct cgroup_taskset *tset);
@@ -446,7 +451,20 @@ struct cgroup_subsys {
446 void (*free)(struct task_struct *task); 451 void (*free)(struct task_struct *task);
447 void (*bind)(struct cgroup_subsys_state *root_css); 452 void (*bind)(struct cgroup_subsys_state *root_css);
448 453
449 int early_init; 454 bool early_init:1;
455
456 /*
457 * If %true, the controller, on the default hierarchy, doesn't show
458 * up in "cgroup.controllers" or "cgroup.subtree_control", is
459 * implicitly enabled on all cgroups on the default hierarchy, and
460 * bypasses the "no internal process" constraint. This is for
461 * utility type controllers which is transparent to userland.
462 *
463 * An implicit controller can be stolen from the default hierarchy
464 * anytime and thus must be okay with offline csses from previous
465 * hierarchies coexisting with csses for the current one.
466 */
467 bool implicit_on_dfl:1;
450 468
451 /* 469 /*
452 * If %false, this subsystem is properly hierarchical - 470 * If %false, this subsystem is properly hierarchical -
@@ -460,8 +478,8 @@ struct cgroup_subsys {
460 * cases. Eventually, all subsystems will be made properly 478 * cases. Eventually, all subsystems will be made properly
461 * hierarchical and this will go away. 479 * hierarchical and this will go away.
462 */ 480 */
463 bool broken_hierarchy; 481 bool broken_hierarchy:1;
464 bool warned_broken_hierarchy; 482 bool warned_broken_hierarchy:1;
465 483
466 /* the following two fields are initialized automtically during boot */ 484 /* the following two fields are initialized automtically during boot */
467 int id; 485 int id;
diff --git a/init/Kconfig b/init/Kconfig
index 2d70c8c4b1d8..e0d26162432e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1047,10 +1047,10 @@ config CGROUP_PIDS
1047 is fairly trivial to reach PID exhaustion before you reach even a 1047 is fairly trivial to reach PID exhaustion before you reach even a
1048 conservative kmemcg limit. As a result, it is possible to grind a 1048 conservative kmemcg limit. As a result, it is possible to grind a
1049 system to halt without being limited by other cgroup policies. The 1049 system to halt without being limited by other cgroup policies. The
1050 PIDs cgroup subsystem is designed to stop this from happening. 1050 PIDs controller is designed to stop this from happening.
1051 1051
1052 It should be noted that organisational operations (such as attaching 1052 It should be noted that organisational operations (such as attaching
1053 to a cgroup hierarchy will *not* be blocked by the PIDs subsystem), 1053 to a cgroup hierarchy will *not* be blocked by the PIDs controller),
1054 since the PIDs limit only affects a process's ability to fork, not to 1054 since the PIDs limit only affects a process's ability to fork, not to
1055 attach to a cgroup. 1055 attach to a cgroup.
1056 1056
diff --git a/kernel/Makefile b/kernel/Makefile
index 53abf008ecb3..baa55e50a315 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -14,8 +14,7 @@ obj-y = fork.o exec_domain.o panic.o \
14obj-$(CONFIG_MULTIUSER) += groups.o 14obj-$(CONFIG_MULTIUSER) += groups.o
15 15
16ifdef CONFIG_FUNCTION_TRACER 16ifdef CONFIG_FUNCTION_TRACER
17# Do not trace debug files and internal ftrace files 17# Do not trace internal ftrace files
18CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE)
19CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) 18CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
20endif 19endif
21 20
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d27904c193da..3fe02c152799 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -178,10 +178,16 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root);
178 * The default hierarchy always exists but is hidden until mounted for the 178 * The default hierarchy always exists but is hidden until mounted for the
179 * first time. This is for backward compatibility. 179 * first time. This is for backward compatibility.
180 */ 180 */
181static bool cgrp_dfl_root_visible; 181static bool cgrp_dfl_visible;
182
183/* Controllers blocked by the commandline in v1 */
184static u16 cgroup_no_v1_mask;
182 185
183/* some controllers are not supported in the default hierarchy */ 186/* some controllers are not supported in the default hierarchy */
184static unsigned long cgrp_dfl_root_inhibit_ss_mask; 187static u16 cgrp_dfl_inhibit_ss_mask;
188
189/* some controllers are implicitly enabled on the default hierarchy */
190static unsigned long cgrp_dfl_implicit_ss_mask;
185 191
186/* The list of hierarchy roots */ 192/* The list of hierarchy roots */
187 193
@@ -205,23 +211,25 @@ static u64 css_serial_nr_next = 1;
205 * fork/exit handlers to call. This avoids us having to do extra work in the 211 * fork/exit handlers to call. This avoids us having to do extra work in the
206 * fork/exit path to check which subsystems have fork/exit callbacks. 212 * fork/exit path to check which subsystems have fork/exit callbacks.
207 */ 213 */
208static unsigned long have_fork_callback __read_mostly; 214static u16 have_fork_callback __read_mostly;
209static unsigned long have_exit_callback __read_mostly; 215static u16 have_exit_callback __read_mostly;
210static unsigned long have_free_callback __read_mostly; 216static u16 have_free_callback __read_mostly;
211 217
212/* Ditto for the can_fork callback. */ 218/* Ditto for the can_fork callback. */
213static unsigned long have_canfork_callback __read_mostly; 219static u16 have_canfork_callback __read_mostly;
214 220
215static struct file_system_type cgroup2_fs_type; 221static struct file_system_type cgroup2_fs_type;
216static struct cftype cgroup_dfl_base_files[]; 222static struct cftype cgroup_dfl_base_files[];
217static struct cftype cgroup_legacy_base_files[]; 223static struct cftype cgroup_legacy_base_files[];
218 224
219static int rebind_subsystems(struct cgroup_root *dst_root, 225static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
220 unsigned long ss_mask); 226static void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
227static int cgroup_apply_control(struct cgroup *cgrp);
228static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
221static void css_task_iter_advance(struct css_task_iter *it); 229static void css_task_iter_advance(struct css_task_iter *it);
222static int cgroup_destroy_locked(struct cgroup *cgrp); 230static int cgroup_destroy_locked(struct cgroup *cgrp);
223static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, 231static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
224 bool visible); 232 struct cgroup_subsys *ss);
225static void css_release(struct percpu_ref *ref); 233static void css_release(struct percpu_ref *ref);
226static void kill_css(struct cgroup_subsys_state *css); 234static void kill_css(struct cgroup_subsys_state *css);
227static int cgroup_addrm_files(struct cgroup_subsys_state *css, 235static int cgroup_addrm_files(struct cgroup_subsys_state *css,
@@ -238,9 +246,17 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
238 */ 246 */
239static bool cgroup_ssid_enabled(int ssid) 247static bool cgroup_ssid_enabled(int ssid)
240{ 248{
249 if (CGROUP_SUBSYS_COUNT == 0)
250 return false;
251
241 return static_key_enabled(cgroup_subsys_enabled_key[ssid]); 252 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
242} 253}
243 254
255static bool cgroup_ssid_no_v1(int ssid)
256{
257 return cgroup_no_v1_mask & (1 << ssid);
258}
259
244/** 260/**
245 * cgroup_on_dfl - test whether a cgroup is on the default hierarchy 261 * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
246 * @cgrp: the cgroup of interest 262 * @cgrp: the cgroup of interest
@@ -339,6 +355,32 @@ static struct cgroup *cgroup_parent(struct cgroup *cgrp)
339 return NULL; 355 return NULL;
340} 356}
341 357
358/* subsystems visibly enabled on a cgroup */
359static u16 cgroup_control(struct cgroup *cgrp)
360{
361 struct cgroup *parent = cgroup_parent(cgrp);
362 u16 root_ss_mask = cgrp->root->subsys_mask;
363
364 if (parent)
365 return parent->subtree_control;
366
367 if (cgroup_on_dfl(cgrp))
368 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
369 cgrp_dfl_implicit_ss_mask);
370 return root_ss_mask;
371}
372
373/* subsystems enabled on a cgroup */
374static u16 cgroup_ss_mask(struct cgroup *cgrp)
375{
376 struct cgroup *parent = cgroup_parent(cgrp);
377
378 if (parent)
379 return parent->subtree_ss_mask;
380
381 return cgrp->root->subsys_mask;
382}
383
342/** 384/**
343 * cgroup_css - obtain a cgroup's css for the specified subsystem 385 * cgroup_css - obtain a cgroup's css for the specified subsystem
344 * @cgrp: the cgroup of interest 386 * @cgrp: the cgroup of interest
@@ -378,16 +420,15 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
378 if (!ss) 420 if (!ss)
379 return &cgrp->self; 421 return &cgrp->self;
380 422
381 if (!(cgrp->root->subsys_mask & (1 << ss->id)))
382 return NULL;
383
384 /* 423 /*
385 * This function is used while updating css associations and thus 424 * This function is used while updating css associations and thus
386 * can't test the csses directly. Use ->child_subsys_mask. 425 * can't test the csses directly. Test ss_mask.
387 */ 426 */
388 while (cgroup_parent(cgrp) && 427 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
389 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
390 cgrp = cgroup_parent(cgrp); 428 cgrp = cgroup_parent(cgrp);
429 if (!cgrp)
430 return NULL;
431 }
391 432
392 return cgroup_css(cgrp, ss); 433 return cgroup_css(cgrp, ss);
393} 434}
@@ -506,22 +547,28 @@ static int notify_on_release(const struct cgroup *cgrp)
506 (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) 547 (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
507 548
508/** 549/**
509 * for_each_subsys_which - filter for_each_subsys with a bitmask 550 * do_each_subsys_mask - filter for_each_subsys with a bitmask
510 * @ss: the iteration cursor 551 * @ss: the iteration cursor
511 * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end 552 * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
512 * @ss_maskp: a pointer to the bitmask 553 * @ss_mask: the bitmask
513 * 554 *
514 * The block will only run for cases where the ssid-th bit (1 << ssid) of 555 * The block will only run for cases where the ssid-th bit (1 << ssid) of
515 * mask is set to 1. 556 * @ss_mask is set.
516 */ 557 */
517#define for_each_subsys_which(ss, ssid, ss_maskp) \ 558#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
518 if (!CGROUP_SUBSYS_COUNT) /* to avoid spurious gcc warning */ \ 559 unsigned long __ss_mask = (ss_mask); \
560 if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
519 (ssid) = 0; \ 561 (ssid) = 0; \
520 else \ 562 break; \
521 for_each_set_bit(ssid, ss_maskp, CGROUP_SUBSYS_COUNT) \ 563 } \
522 if (((ss) = cgroup_subsys[ssid]) && false) \ 564 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
523 break; \ 565 (ss) = cgroup_subsys[ssid]; \
524 else 566 {
567
568#define while_each_subsys_mask() \
569 } \
570 } \
571} while (false)
525 572
526/* iterate across the hierarchies */ 573/* iterate across the hierarchies */
527#define for_each_root(root) \ 574#define for_each_root(root) \
@@ -535,6 +582,24 @@ static int notify_on_release(const struct cgroup *cgrp)
535 ; \ 582 ; \
536 else 583 else
537 584
585/* walk live descendants in preorder */
586#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
587 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
588 if (({ lockdep_assert_held(&cgroup_mutex); \
589 (dsct) = (d_css)->cgroup; \
590 cgroup_is_dead(dsct); })) \
591 ; \
592 else
593
594/* walk live descendants in postorder */
595#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
596 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
597 if (({ lockdep_assert_held(&cgroup_mutex); \
598 (dsct) = (d_css)->cgroup; \
599 cgroup_is_dead(dsct); })) \
600 ; \
601 else
602
538static void cgroup_release_agent(struct work_struct *work); 603static void cgroup_release_agent(struct work_struct *work);
539static void check_for_release(struct cgroup *cgrp); 604static void check_for_release(struct cgroup *cgrp);
540 605
@@ -665,6 +730,9 @@ static void css_set_move_task(struct task_struct *task,
665{ 730{
666 lockdep_assert_held(&css_set_lock); 731 lockdep_assert_held(&css_set_lock);
667 732
733 if (to_cset && !css_set_populated(to_cset))
734 css_set_update_populated(to_cset, true);
735
668 if (from_cset) { 736 if (from_cset) {
669 struct css_task_iter *it, *pos; 737 struct css_task_iter *it, *pos;
670 738
@@ -698,8 +766,6 @@ static void css_set_move_task(struct task_struct *task,
698 */ 766 */
699 WARN_ON_ONCE(task->flags & PF_EXITING); 767 WARN_ON_ONCE(task->flags & PF_EXITING);
700 768
701 if (!css_set_populated(to_cset))
702 css_set_update_populated(to_cset, true);
703 rcu_assign_pointer(task->cgroups, to_cset); 769 rcu_assign_pointer(task->cgroups, to_cset);
704 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks : 770 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
705 &to_cset->tasks); 771 &to_cset->tasks);
@@ -1102,13 +1168,13 @@ static void cgroup_destroy_root(struct cgroup_root *root)
1102 struct cgroup *cgrp = &root->cgrp; 1168 struct cgroup *cgrp = &root->cgrp;
1103 struct cgrp_cset_link *link, *tmp_link; 1169 struct cgrp_cset_link *link, *tmp_link;
1104 1170
1105 mutex_lock(&cgroup_mutex); 1171 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1106 1172
1107 BUG_ON(atomic_read(&root->nr_cgrps)); 1173 BUG_ON(atomic_read(&root->nr_cgrps));
1108 BUG_ON(!list_empty(&cgrp->self.children)); 1174 BUG_ON(!list_empty(&cgrp->self.children));
1109 1175
1110 /* Rebind all subsystems back to the default hierarchy */ 1176 /* Rebind all subsystems back to the default hierarchy */
1111 rebind_subsystems(&cgrp_dfl_root, root->subsys_mask); 1177 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1112 1178
1113 /* 1179 /*
1114 * Release all the links from cset_links to this hierarchy's 1180 * Release all the links from cset_links to this hierarchy's
@@ -1248,46 +1314,40 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
1248} 1314}
1249 1315
1250/** 1316/**
1251 * cgroup_calc_child_subsys_mask - calculate child_subsys_mask 1317 * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
1252 * @cgrp: the target cgroup
1253 * @subtree_control: the new subtree_control mask to consider 1318 * @subtree_control: the new subtree_control mask to consider
1319 * @this_ss_mask: available subsystems
1254 * 1320 *
1255 * On the default hierarchy, a subsystem may request other subsystems to be 1321 * On the default hierarchy, a subsystem may request other subsystems to be
1256 * enabled together through its ->depends_on mask. In such cases, more 1322 * enabled together through its ->depends_on mask. In such cases, more
1257 * subsystems than specified in "cgroup.subtree_control" may be enabled. 1323 * subsystems than specified in "cgroup.subtree_control" may be enabled.
1258 * 1324 *
1259 * This function calculates which subsystems need to be enabled if 1325 * This function calculates which subsystems need to be enabled if
1260 * @subtree_control is to be applied to @cgrp. The returned mask is always 1326 * @subtree_control is to be applied while restricted to @this_ss_mask.
1261 * a superset of @subtree_control and follows the usual hierarchy rules.
1262 */ 1327 */
1263static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, 1328static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1264 unsigned long subtree_control)
1265{ 1329{
1266 struct cgroup *parent = cgroup_parent(cgrp); 1330 u16 cur_ss_mask = subtree_control;
1267 unsigned long cur_ss_mask = subtree_control;
1268 struct cgroup_subsys *ss; 1331 struct cgroup_subsys *ss;
1269 int ssid; 1332 int ssid;
1270 1333
1271 lockdep_assert_held(&cgroup_mutex); 1334 lockdep_assert_held(&cgroup_mutex);
1272 1335
1273 if (!cgroup_on_dfl(cgrp)) 1336 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1274 return cur_ss_mask;
1275 1337
1276 while (true) { 1338 while (true) {
1277 unsigned long new_ss_mask = cur_ss_mask; 1339 u16 new_ss_mask = cur_ss_mask;
1278 1340
1279 for_each_subsys_which(ss, ssid, &cur_ss_mask) 1341 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1280 new_ss_mask |= ss->depends_on; 1342 new_ss_mask |= ss->depends_on;
1343 } while_each_subsys_mask();
1281 1344
1282 /* 1345 /*
1283 * Mask out subsystems which aren't available. This can 1346 * Mask out subsystems which aren't available. This can
1284 * happen only if some depended-upon subsystems were bound 1347 * happen only if some depended-upon subsystems were bound
1285 * to non-default hierarchies. 1348 * to non-default hierarchies.
1286 */ 1349 */
1287 if (parent) 1350 new_ss_mask &= this_ss_mask;
1288 new_ss_mask &= parent->child_subsys_mask;
1289 else
1290 new_ss_mask &= cgrp->root->subsys_mask;
1291 1351
1292 if (new_ss_mask == cur_ss_mask) 1352 if (new_ss_mask == cur_ss_mask)
1293 break; 1353 break;
@@ -1298,19 +1358,6 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
1298} 1358}
1299 1359
1300/** 1360/**
1301 * cgroup_refresh_child_subsys_mask - update child_subsys_mask
1302 * @cgrp: the target cgroup
1303 *
1304 * Update @cgrp->child_subsys_mask according to the current
1305 * @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
1306 */
1307static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1308{
1309 cgrp->child_subsys_mask =
1310 cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
1311}
1312
1313/**
1314 * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods 1361 * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
1315 * @kn: the kernfs_node being serviced 1362 * @kn: the kernfs_node being serviced
1316 * 1363 *
@@ -1338,19 +1385,22 @@ static void cgroup_kn_unlock(struct kernfs_node *kn)
1338/** 1385/**
1339 * cgroup_kn_lock_live - locking helper for cgroup kernfs methods 1386 * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
1340 * @kn: the kernfs_node being serviced 1387 * @kn: the kernfs_node being serviced
1388 * @drain_offline: perform offline draining on the cgroup
1341 * 1389 *
1342 * This helper is to be used by a cgroup kernfs method currently servicing 1390 * This helper is to be used by a cgroup kernfs method currently servicing
1343 * @kn. It breaks the active protection, performs cgroup locking and 1391 * @kn. It breaks the active protection, performs cgroup locking and
1344 * verifies that the associated cgroup is alive. Returns the cgroup if 1392 * verifies that the associated cgroup is alive. Returns the cgroup if
1345 * alive; otherwise, %NULL. A successful return should be undone by a 1393 * alive; otherwise, %NULL. A successful return should be undone by a
1346 * matching cgroup_kn_unlock() invocation. 1394 * matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the
1395 * cgroup is drained of offlining csses before return.
1347 * 1396 *
1348 * Any cgroup kernfs method implementation which requires locking the 1397 * Any cgroup kernfs method implementation which requires locking the
1349 * associated cgroup should use this helper. It avoids nesting cgroup 1398 * associated cgroup should use this helper. It avoids nesting cgroup
1350 * locking under kernfs active protection and allows all kernfs operations 1399 * locking under kernfs active protection and allows all kernfs operations
1351 * including self-removal. 1400 * including self-removal.
1352 */ 1401 */
1353static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) 1402static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn,
1403 bool drain_offline)
1354{ 1404{
1355 struct cgroup *cgrp; 1405 struct cgroup *cgrp;
1356 1406
@@ -1369,7 +1419,10 @@ static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn)
1369 return NULL; 1419 return NULL;
1370 kernfs_break_active_protection(kn); 1420 kernfs_break_active_protection(kn);
1371 1421
1372 mutex_lock(&cgroup_mutex); 1422 if (drain_offline)
1423 cgroup_lock_and_drain_offline(cgrp);
1424 else
1425 mutex_lock(&cgroup_mutex);
1373 1426
1374 if (!cgroup_is_dead(cgrp)) 1427 if (!cgroup_is_dead(cgrp))
1375 return cgrp; 1428 return cgrp;
@@ -1399,14 +1452,17 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1399/** 1452/**
1400 * css_clear_dir - remove subsys files in a cgroup directory 1453 * css_clear_dir - remove subsys files in a cgroup directory
1401 * @css: taget css 1454 * @css: taget css
1402 * @cgrp_override: specify if target cgroup is different from css->cgroup
1403 */ 1455 */
1404static void css_clear_dir(struct cgroup_subsys_state *css, 1456static void css_clear_dir(struct cgroup_subsys_state *css)
1405 struct cgroup *cgrp_override)
1406{ 1457{
1407 struct cgroup *cgrp = cgrp_override ?: css->cgroup; 1458 struct cgroup *cgrp = css->cgroup;
1408 struct cftype *cfts; 1459 struct cftype *cfts;
1409 1460
1461 if (!(css->flags & CSS_VISIBLE))
1462 return;
1463
1464 css->flags &= ~CSS_VISIBLE;
1465
1410 list_for_each_entry(cfts, &css->ss->cfts, node) 1466 list_for_each_entry(cfts, &css->ss->cfts, node)
1411 cgroup_addrm_files(css, cgrp, cfts, false); 1467 cgroup_addrm_files(css, cgrp, cfts, false);
1412} 1468}
@@ -1414,17 +1470,18 @@ static void css_clear_dir(struct cgroup_subsys_state *css,
1414/** 1470/**
1415 * css_populate_dir - create subsys files in a cgroup directory 1471 * css_populate_dir - create subsys files in a cgroup directory
1416 * @css: target css 1472 * @css: target css
1417 * @cgrp_overried: specify if target cgroup is different from css->cgroup
1418 * 1473 *
1419 * On failure, no file is added. 1474 * On failure, no file is added.
1420 */ 1475 */
1421static int css_populate_dir(struct cgroup_subsys_state *css, 1476static int css_populate_dir(struct cgroup_subsys_state *css)
1422 struct cgroup *cgrp_override)
1423{ 1477{
1424 struct cgroup *cgrp = cgrp_override ?: css->cgroup; 1478 struct cgroup *cgrp = css->cgroup;
1425 struct cftype *cfts, *failed_cfts; 1479 struct cftype *cfts, *failed_cfts;
1426 int ret; 1480 int ret;
1427 1481
1482 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1483 return 0;
1484
1428 if (!css->ss) { 1485 if (!css->ss) {
1429 if (cgroup_on_dfl(cgrp)) 1486 if (cgroup_on_dfl(cgrp))
1430 cfts = cgroup_dfl_base_files; 1487 cfts = cgroup_dfl_base_files;
@@ -1441,6 +1498,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css,
1441 goto err; 1498 goto err;
1442 } 1499 }
1443 } 1500 }
1501
1502 css->flags |= CSS_VISIBLE;
1503
1444 return 0; 1504 return 0;
1445err: 1505err:
1446 list_for_each_entry(cfts, &css->ss->cfts, node) { 1506 list_for_each_entry(cfts, &css->ss->cfts, node) {
@@ -1451,67 +1511,30 @@ err:
1451 return ret; 1511 return ret;
1452} 1512}
1453 1513
1454static int rebind_subsystems(struct cgroup_root *dst_root, 1514static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1455 unsigned long ss_mask)
1456{ 1515{
1457 struct cgroup *dcgrp = &dst_root->cgrp; 1516 struct cgroup *dcgrp = &dst_root->cgrp;
1458 struct cgroup_subsys *ss; 1517 struct cgroup_subsys *ss;
1459 unsigned long tmp_ss_mask;
1460 int ssid, i, ret; 1518 int ssid, i, ret;
1461 1519
1462 lockdep_assert_held(&cgroup_mutex); 1520 lockdep_assert_held(&cgroup_mutex);
1463 1521
1464 for_each_subsys_which(ss, ssid, &ss_mask) { 1522 do_each_subsys_mask(ss, ssid, ss_mask) {
1465 /* if @ss has non-root csses attached to it, can't move */ 1523 /*
1466 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss))) 1524 * If @ss has non-root csses attached to it, can't move.
1525 * If @ss is an implicit controller, it is exempt from this
1526 * rule and can be stolen.
1527 */
1528 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1529 !ss->implicit_on_dfl)
1467 return -EBUSY; 1530 return -EBUSY;
1468 1531
1469 /* can't move between two non-dummy roots either */ 1532 /* can't move between two non-dummy roots either */
1470 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) 1533 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1471 return -EBUSY; 1534 return -EBUSY;
1472 } 1535 } while_each_subsys_mask();
1473
1474 /* skip creating root files on dfl_root for inhibited subsystems */
1475 tmp_ss_mask = ss_mask;
1476 if (dst_root == &cgrp_dfl_root)
1477 tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask;
1478
1479 for_each_subsys_which(ss, ssid, &tmp_ss_mask) {
1480 struct cgroup *scgrp = &ss->root->cgrp;
1481 int tssid;
1482
1483 ret = css_populate_dir(cgroup_css(scgrp, ss), dcgrp);
1484 if (!ret)
1485 continue;
1486
1487 /*
1488 * Rebinding back to the default root is not allowed to
1489 * fail. Using both default and non-default roots should
1490 * be rare. Moving subsystems back and forth even more so.
1491 * Just warn about it and continue.
1492 */
1493 if (dst_root == &cgrp_dfl_root) {
1494 if (cgrp_dfl_root_visible) {
1495 pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n",
1496 ret, ss_mask);
1497 pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
1498 }
1499 continue;
1500 }
1501
1502 for_each_subsys_which(ss, tssid, &tmp_ss_mask) {
1503 if (tssid == ssid)
1504 break;
1505 css_clear_dir(cgroup_css(scgrp, ss), dcgrp);
1506 }
1507 return ret;
1508 }
1509 1536
1510 /* 1537 do_each_subsys_mask(ss, ssid, ss_mask) {
1511 * Nothing can fail from this point on. Remove files for the
1512 * removed subsystems and rebind each subsystem.
1513 */
1514 for_each_subsys_which(ss, ssid, &ss_mask) {
1515 struct cgroup_root *src_root = ss->root; 1538 struct cgroup_root *src_root = ss->root;
1516 struct cgroup *scgrp = &src_root->cgrp; 1539 struct cgroup *scgrp = &src_root->cgrp;
1517 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); 1540 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
@@ -1519,8 +1542,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
1519 1542
1520 WARN_ON(!css || cgroup_css(dcgrp, ss)); 1543 WARN_ON(!css || cgroup_css(dcgrp, ss));
1521 1544
1522 css_clear_dir(css, NULL); 1545 /* disable from the source */
1546 src_root->subsys_mask &= ~(1 << ssid);
1547 WARN_ON(cgroup_apply_control(scgrp));
1548 cgroup_finalize_control(scgrp, 0);
1523 1549
1550 /* rebind */
1524 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); 1551 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1525 rcu_assign_pointer(dcgrp->subsys[ssid], css); 1552 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1526 ss->root = dst_root; 1553 ss->root = dst_root;
@@ -1532,23 +1559,23 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
1532 &dcgrp->e_csets[ss->id]); 1559 &dcgrp->e_csets[ss->id]);
1533 spin_unlock_bh(&css_set_lock); 1560 spin_unlock_bh(&css_set_lock);
1534 1561
1535 src_root->subsys_mask &= ~(1 << ssid);
1536 scgrp->subtree_control &= ~(1 << ssid);
1537 cgroup_refresh_child_subsys_mask(scgrp);
1538
1539 /* default hierarchy doesn't enable controllers by default */ 1562 /* default hierarchy doesn't enable controllers by default */
1540 dst_root->subsys_mask |= 1 << ssid; 1563 dst_root->subsys_mask |= 1 << ssid;
1541 if (dst_root == &cgrp_dfl_root) { 1564 if (dst_root == &cgrp_dfl_root) {
1542 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]); 1565 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1543 } else { 1566 } else {
1544 dcgrp->subtree_control |= 1 << ssid; 1567 dcgrp->subtree_control |= 1 << ssid;
1545 cgroup_refresh_child_subsys_mask(dcgrp);
1546 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]); 1568 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1547 } 1569 }
1548 1570
1571 ret = cgroup_apply_control(dcgrp);
1572 if (ret)
1573 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1574 ss->name, ret);
1575
1549 if (ss->bind) 1576 if (ss->bind)
1550 ss->bind(css); 1577 ss->bind(css);
1551 } 1578 } while_each_subsys_mask();
1552 1579
1553 kernfs_activate(dcgrp->kn); 1580 kernfs_activate(dcgrp->kn);
1554 return 0; 1581 return 0;
@@ -1584,7 +1611,7 @@ static int cgroup_show_options(struct seq_file *seq,
1584} 1611}
1585 1612
1586struct cgroup_sb_opts { 1613struct cgroup_sb_opts {
1587 unsigned long subsys_mask; 1614 u16 subsys_mask;
1588 unsigned int flags; 1615 unsigned int flags;
1589 char *release_agent; 1616 char *release_agent;
1590 bool cpuset_clone_children; 1617 bool cpuset_clone_children;
@@ -1597,13 +1624,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1597{ 1624{
1598 char *token, *o = data; 1625 char *token, *o = data;
1599 bool all_ss = false, one_ss = false; 1626 bool all_ss = false, one_ss = false;
1600 unsigned long mask = -1UL; 1627 u16 mask = U16_MAX;
1601 struct cgroup_subsys *ss; 1628 struct cgroup_subsys *ss;
1602 int nr_opts = 0; 1629 int nr_opts = 0;
1603 int i; 1630 int i;
1604 1631
1605#ifdef CONFIG_CPUSETS 1632#ifdef CONFIG_CPUSETS
1606 mask = ~(1U << cpuset_cgrp_id); 1633 mask = ~((u16)1 << cpuset_cgrp_id);
1607#endif 1634#endif
1608 1635
1609 memset(opts, 0, sizeof(*opts)); 1636 memset(opts, 0, sizeof(*opts));
@@ -1678,6 +1705,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1678 continue; 1705 continue;
1679 if (!cgroup_ssid_enabled(i)) 1706 if (!cgroup_ssid_enabled(i))
1680 continue; 1707 continue;
1708 if (cgroup_ssid_no_v1(i))
1709 continue;
1681 1710
1682 /* Mutually exclusive option 'all' + subsystem name */ 1711 /* Mutually exclusive option 'all' + subsystem name */
1683 if (all_ss) 1712 if (all_ss)
@@ -1698,7 +1727,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1698 */ 1727 */
1699 if (all_ss || (!one_ss && !opts->none && !opts->name)) 1728 if (all_ss || (!one_ss && !opts->none && !opts->name))
1700 for_each_subsys(ss, i) 1729 for_each_subsys(ss, i)
1701 if (cgroup_ssid_enabled(i)) 1730 if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
1702 opts->subsys_mask |= (1 << i); 1731 opts->subsys_mask |= (1 << i);
1703 1732
1704 /* 1733 /*
@@ -1728,14 +1757,14 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1728 int ret = 0; 1757 int ret = 0;
1729 struct cgroup_root *root = cgroup_root_from_kf(kf_root); 1758 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1730 struct cgroup_sb_opts opts; 1759 struct cgroup_sb_opts opts;
1731 unsigned long added_mask, removed_mask; 1760 u16 added_mask, removed_mask;
1732 1761
1733 if (root == &cgrp_dfl_root) { 1762 if (root == &cgrp_dfl_root) {
1734 pr_err("remount is not allowed\n"); 1763 pr_err("remount is not allowed\n");
1735 return -EINVAL; 1764 return -EINVAL;
1736 } 1765 }
1737 1766
1738 mutex_lock(&cgroup_mutex); 1767 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1739 1768
1740 /* See what subsystems are wanted */ 1769 /* See what subsystems are wanted */
1741 ret = parse_cgroupfs_options(data, &opts); 1770 ret = parse_cgroupfs_options(data, &opts);
@@ -1768,7 +1797,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1768 if (ret) 1797 if (ret)
1769 goto out_unlock; 1798 goto out_unlock;
1770 1799
1771 rebind_subsystems(&cgrp_dfl_root, removed_mask); 1800 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1772 1801
1773 if (opts.release_agent) { 1802 if (opts.release_agent) {
1774 spin_lock(&release_agent_path_lock); 1803 spin_lock(&release_agent_path_lock);
@@ -1876,7 +1905,7 @@ static void init_cgroup_root(struct cgroup_root *root,
1876 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); 1905 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1877} 1906}
1878 1907
1879static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) 1908static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1880{ 1909{
1881 LIST_HEAD(tmp_links); 1910 LIST_HEAD(tmp_links);
1882 struct cgroup *root_cgrp = &root->cgrp; 1911 struct cgroup *root_cgrp = &root->cgrp;
@@ -1899,10 +1928,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
1899 /* 1928 /*
1900 * We're accessing css_set_count without locking css_set_lock here, 1929 * We're accessing css_set_count without locking css_set_lock here,
1901 * but that's OK - it can only be increased by someone holding 1930 * but that's OK - it can only be increased by someone holding
1902 * cgroup_lock, and that's us. The worst that can happen is that we 1931 * cgroup_lock, and that's us. Later rebinding may disable
1903 * have some link structures left over 1932 * controllers on the default hierarchy and thus create new csets,
1933 * which can't be more than the existing ones. Allocate 2x.
1904 */ 1934 */
1905 ret = allocate_cgrp_cset_links(css_set_count, &tmp_links); 1935 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
1906 if (ret) 1936 if (ret)
1907 goto cancel_ref; 1937 goto cancel_ref;
1908 1938
@@ -1919,7 +1949,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
1919 } 1949 }
1920 root_cgrp->kn = root->kf_root->kn; 1950 root_cgrp->kn = root->kf_root->kn;
1921 1951
1922 ret = css_populate_dir(&root_cgrp->self, NULL); 1952 ret = css_populate_dir(&root_cgrp->self);
1923 if (ret) 1953 if (ret)
1924 goto destroy_root; 1954 goto destroy_root;
1925 1955
@@ -1992,13 +2022,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1992 pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); 2022 pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
1993 return ERR_PTR(-EINVAL); 2023 return ERR_PTR(-EINVAL);
1994 } 2024 }
1995 cgrp_dfl_root_visible = true; 2025 cgrp_dfl_visible = true;
1996 root = &cgrp_dfl_root; 2026 root = &cgrp_dfl_root;
1997 cgroup_get(&root->cgrp); 2027 cgroup_get(&root->cgrp);
1998 goto out_mount; 2028 goto out_mount;
1999 } 2029 }
2000 2030
2001 mutex_lock(&cgroup_mutex); 2031 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
2002 2032
2003 /* First find the desired set of subsystems */ 2033 /* First find the desired set of subsystems */
2004 ret = parse_cgroupfs_options(data, &opts); 2034 ret = parse_cgroupfs_options(data, &opts);
@@ -2338,38 +2368,38 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2338} 2368}
2339 2369
2340/** 2370/**
2341 * cgroup_taskset_migrate - migrate a taskset to a cgroup 2371 * cgroup_taskset_migrate - migrate a taskset
2342 * @tset: taget taskset 2372 * @tset: taget taskset
2343 * @dst_cgrp: destination cgroup 2373 * @root: cgroup root the migration is taking place on
2344 * 2374 *
2345 * Migrate tasks in @tset to @dst_cgrp. This function fails iff one of the 2375 * Migrate tasks in @tset as setup by migration preparation functions.
2346 * ->can_attach callbacks fails and guarantees that either all or none of 2376 * This function fails iff one of the ->can_attach callbacks fails and
2347 * the tasks in @tset are migrated. @tset is consumed regardless of 2377 * guarantees that either all or none of the tasks in @tset are migrated.
2348 * success. 2378 * @tset is consumed regardless of success.
2349 */ 2379 */
2350static int cgroup_taskset_migrate(struct cgroup_taskset *tset, 2380static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
2351 struct cgroup *dst_cgrp) 2381 struct cgroup_root *root)
2352{ 2382{
2353 struct cgroup_subsys_state *css, *failed_css = NULL; 2383 struct cgroup_subsys *ss;
2354 struct task_struct *task, *tmp_task; 2384 struct task_struct *task, *tmp_task;
2355 struct css_set *cset, *tmp_cset; 2385 struct css_set *cset, *tmp_cset;
2356 int i, ret; 2386 int ssid, failed_ssid, ret;
2357 2387
2358 /* methods shouldn't be called if no task is actually migrating */ 2388 /* methods shouldn't be called if no task is actually migrating */
2359 if (list_empty(&tset->src_csets)) 2389 if (list_empty(&tset->src_csets))
2360 return 0; 2390 return 0;
2361 2391
2362 /* check that we can legitimately attach to the cgroup */ 2392 /* check that we can legitimately attach to the cgroup */
2363 for_each_e_css(css, i, dst_cgrp) { 2393 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2364 if (css->ss->can_attach) { 2394 if (ss->can_attach) {
2365 tset->ssid = i; 2395 tset->ssid = ssid;
2366 ret = css->ss->can_attach(tset); 2396 ret = ss->can_attach(tset);
2367 if (ret) { 2397 if (ret) {
2368 failed_css = css; 2398 failed_ssid = ssid;
2369 goto out_cancel_attach; 2399 goto out_cancel_attach;
2370 } 2400 }
2371 } 2401 }
2372 } 2402 } while_each_subsys_mask();
2373 2403
2374 /* 2404 /*
2375 * Now that we're guaranteed success, proceed to move all tasks to 2405 * Now that we're guaranteed success, proceed to move all tasks to
@@ -2396,25 +2426,25 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
2396 */ 2426 */
2397 tset->csets = &tset->dst_csets; 2427 tset->csets = &tset->dst_csets;
2398 2428
2399 for_each_e_css(css, i, dst_cgrp) { 2429 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2400 if (css->ss->attach) { 2430 if (ss->attach) {
2401 tset->ssid = i; 2431 tset->ssid = ssid;
2402 css->ss->attach(tset); 2432 ss->attach(tset);
2403 } 2433 }
2404 } 2434 } while_each_subsys_mask();
2405 2435
2406 ret = 0; 2436 ret = 0;
2407 goto out_release_tset; 2437 goto out_release_tset;
2408 2438
2409out_cancel_attach: 2439out_cancel_attach:
2410 for_each_e_css(css, i, dst_cgrp) { 2440 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2411 if (css == failed_css) 2441 if (ssid == failed_ssid)
2412 break; 2442 break;
2413 if (css->ss->cancel_attach) { 2443 if (ss->cancel_attach) {
2414 tset->ssid = i; 2444 tset->ssid = ssid;
2415 css->ss->cancel_attach(tset); 2445 ss->cancel_attach(tset);
2416 } 2446 }
2417 } 2447 } while_each_subsys_mask();
2418out_release_tset: 2448out_release_tset:
2419 spin_lock_bh(&css_set_lock); 2449 spin_lock_bh(&css_set_lock);
2420 list_splice_init(&tset->dst_csets, &tset->src_csets); 2450 list_splice_init(&tset->dst_csets, &tset->src_csets);
@@ -2427,6 +2457,20 @@ out_release_tset:
2427} 2457}
2428 2458
2429/** 2459/**
2460 * cgroup_may_migrate_to - verify whether a cgroup can be migration destination
2461 * @dst_cgrp: destination cgroup to test
2462 *
2463 * On the default hierarchy, except for the root, subtree_control must be
2464 * zero for migration destination cgroups with tasks so that child cgroups
2465 * don't compete against tasks.
2466 */
2467static bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
2468{
2469 return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
2470 !dst_cgrp->subtree_control;
2471}
2472
2473/**
2430 * cgroup_migrate_finish - cleanup after attach 2474 * cgroup_migrate_finish - cleanup after attach
2431 * @preloaded_csets: list of preloaded css_sets 2475 * @preloaded_csets: list of preloaded css_sets
2432 * 2476 *
@@ -2442,6 +2486,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
2442 spin_lock_bh(&css_set_lock); 2486 spin_lock_bh(&css_set_lock);
2443 list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { 2487 list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
2444 cset->mg_src_cgrp = NULL; 2488 cset->mg_src_cgrp = NULL;
2489 cset->mg_dst_cgrp = NULL;
2445 cset->mg_dst_cset = NULL; 2490 cset->mg_dst_cset = NULL;
2446 list_del_init(&cset->mg_preload_node); 2491 list_del_init(&cset->mg_preload_node);
2447 put_css_set_locked(cset); 2492 put_css_set_locked(cset);
@@ -2474,58 +2519,56 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
2474 lockdep_assert_held(&cgroup_mutex); 2519 lockdep_assert_held(&cgroup_mutex);
2475 lockdep_assert_held(&css_set_lock); 2520 lockdep_assert_held(&css_set_lock);
2476 2521
2522 /*
2523 * If ->dead, @src_set is associated with one or more dead cgroups
2524 * and doesn't contain any migratable tasks. Ignore it early so
2525 * that the rest of migration path doesn't get confused by it.
2526 */
2527 if (src_cset->dead)
2528 return;
2529
2477 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); 2530 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2478 2531
2479 if (!list_empty(&src_cset->mg_preload_node)) 2532 if (!list_empty(&src_cset->mg_preload_node))
2480 return; 2533 return;
2481 2534
2482 WARN_ON(src_cset->mg_src_cgrp); 2535 WARN_ON(src_cset->mg_src_cgrp);
2536 WARN_ON(src_cset->mg_dst_cgrp);
2483 WARN_ON(!list_empty(&src_cset->mg_tasks)); 2537 WARN_ON(!list_empty(&src_cset->mg_tasks));
2484 WARN_ON(!list_empty(&src_cset->mg_node)); 2538 WARN_ON(!list_empty(&src_cset->mg_node));
2485 2539
2486 src_cset->mg_src_cgrp = src_cgrp; 2540 src_cset->mg_src_cgrp = src_cgrp;
2541 src_cset->mg_dst_cgrp = dst_cgrp;
2487 get_css_set(src_cset); 2542 get_css_set(src_cset);
2488 list_add(&src_cset->mg_preload_node, preloaded_csets); 2543 list_add(&src_cset->mg_preload_node, preloaded_csets);
2489} 2544}
2490 2545
2491/** 2546/**
2492 * cgroup_migrate_prepare_dst - prepare destination css_sets for migration 2547 * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
2493 * @dst_cgrp: the destination cgroup (may be %NULL)
2494 * @preloaded_csets: list of preloaded source css_sets 2548 * @preloaded_csets: list of preloaded source css_sets
2495 * 2549 *
2496 * Tasks are about to be moved to @dst_cgrp and all the source css_sets 2550 * Tasks are about to be moved and all the source css_sets have been
2497 * have been preloaded to @preloaded_csets. This function looks up and 2551 * preloaded to @preloaded_csets. This function looks up and pins all
2498 * pins all destination css_sets, links each to its source, and append them 2552 * destination css_sets, links each to its source, and append them to
2499 * to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each 2553 * @preloaded_csets.
2500 * source css_set is assumed to be its cgroup on the default hierarchy.
2501 * 2554 *
2502 * This function must be called after cgroup_migrate_add_src() has been 2555 * This function must be called after cgroup_migrate_add_src() has been
2503 * called on each migration source css_set. After migration is performed 2556 * called on each migration source css_set. After migration is performed
2504 * using cgroup_migrate(), cgroup_migrate_finish() must be called on 2557 * using cgroup_migrate(), cgroup_migrate_finish() must be called on
2505 * @preloaded_csets. 2558 * @preloaded_csets.
2506 */ 2559 */
2507static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, 2560static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
2508 struct list_head *preloaded_csets)
2509{ 2561{
2510 LIST_HEAD(csets); 2562 LIST_HEAD(csets);
2511 struct css_set *src_cset, *tmp_cset; 2563 struct css_set *src_cset, *tmp_cset;
2512 2564
2513 lockdep_assert_held(&cgroup_mutex); 2565 lockdep_assert_held(&cgroup_mutex);
2514 2566
2515 /*
2516 * Except for the root, child_subsys_mask must be zero for a cgroup
2517 * with tasks so that child cgroups don't compete against tasks.
2518 */
2519 if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
2520 dst_cgrp->child_subsys_mask)
2521 return -EBUSY;
2522
2523 /* look up the dst cset for each src cset and link it to src */ 2567 /* look up the dst cset for each src cset and link it to src */
2524 list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { 2568 list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
2525 struct css_set *dst_cset; 2569 struct css_set *dst_cset;
2526 2570
2527 dst_cset = find_css_set(src_cset, 2571 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2528 dst_cgrp ?: src_cset->dfl_cgrp);
2529 if (!dst_cset) 2572 if (!dst_cset)
2530 goto err; 2573 goto err;
2531 2574
@@ -2538,6 +2581,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
2538 */ 2581 */
2539 if (src_cset == dst_cset) { 2582 if (src_cset == dst_cset) {
2540 src_cset->mg_src_cgrp = NULL; 2583 src_cset->mg_src_cgrp = NULL;
2584 src_cset->mg_dst_cgrp = NULL;
2541 list_del_init(&src_cset->mg_preload_node); 2585 list_del_init(&src_cset->mg_preload_node);
2542 put_css_set(src_cset); 2586 put_css_set(src_cset);
2543 put_css_set(dst_cset); 2587 put_css_set(dst_cset);
@@ -2563,11 +2607,11 @@ err:
2563 * cgroup_migrate - migrate a process or task to a cgroup 2607 * cgroup_migrate - migrate a process or task to a cgroup
2564 * @leader: the leader of the process or the task to migrate 2608 * @leader: the leader of the process or the task to migrate
2565 * @threadgroup: whether @leader points to the whole process or a single task 2609 * @threadgroup: whether @leader points to the whole process or a single task
2566 * @cgrp: the destination cgroup 2610 * @root: cgroup root migration is taking place on
2567 * 2611 *
2568 * Migrate a process or task denoted by @leader to @cgrp. If migrating a 2612 * Migrate a process or task denoted by @leader. If migrating a process,
2569 * process, the caller must be holding cgroup_threadgroup_rwsem. The 2613 * the caller must be holding cgroup_threadgroup_rwsem. The caller is also
2570 * caller is also responsible for invoking cgroup_migrate_add_src() and 2614 * responsible for invoking cgroup_migrate_add_src() and
2571 * cgroup_migrate_prepare_dst() on the targets before invoking this 2615 * cgroup_migrate_prepare_dst() on the targets before invoking this
2572 * function and following up with cgroup_migrate_finish(). 2616 * function and following up with cgroup_migrate_finish().
2573 * 2617 *
@@ -2578,7 +2622,7 @@ err:
2578 * actually starting migrating. 2622 * actually starting migrating.
2579 */ 2623 */
2580static int cgroup_migrate(struct task_struct *leader, bool threadgroup, 2624static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2581 struct cgroup *cgrp) 2625 struct cgroup_root *root)
2582{ 2626{
2583 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); 2627 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
2584 struct task_struct *task; 2628 struct task_struct *task;
@@ -2599,7 +2643,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2599 rcu_read_unlock(); 2643 rcu_read_unlock();
2600 spin_unlock_bh(&css_set_lock); 2644 spin_unlock_bh(&css_set_lock);
2601 2645
2602 return cgroup_taskset_migrate(&tset, cgrp); 2646 return cgroup_taskset_migrate(&tset, root);
2603} 2647}
2604 2648
2605/** 2649/**
@@ -2617,6 +2661,9 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
2617 struct task_struct *task; 2661 struct task_struct *task;
2618 int ret; 2662 int ret;
2619 2663
2664 if (!cgroup_may_migrate_to(dst_cgrp))
2665 return -EBUSY;
2666
2620 /* look up all src csets */ 2667 /* look up all src csets */
2621 spin_lock_bh(&css_set_lock); 2668 spin_lock_bh(&css_set_lock);
2622 rcu_read_lock(); 2669 rcu_read_lock();
@@ -2631,9 +2678,9 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
2631 spin_unlock_bh(&css_set_lock); 2678 spin_unlock_bh(&css_set_lock);
2632 2679
2633 /* prepare dst csets and commit */ 2680 /* prepare dst csets and commit */
2634 ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets); 2681 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
2635 if (!ret) 2682 if (!ret)
2636 ret = cgroup_migrate(leader, threadgroup, dst_cgrp); 2683 ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
2637 2684
2638 cgroup_migrate_finish(&preloaded_csets); 2685 cgroup_migrate_finish(&preloaded_csets);
2639 return ret; 2686 return ret;
@@ -2696,7 +2743,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2696 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) 2743 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2697 return -EINVAL; 2744 return -EINVAL;
2698 2745
2699 cgrp = cgroup_kn_lock_live(of->kn); 2746 cgrp = cgroup_kn_lock_live(of->kn, false);
2700 if (!cgrp) 2747 if (!cgrp)
2701 return -ENODEV; 2748 return -ENODEV;
2702 2749
@@ -2794,7 +2841,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
2794 2841
2795 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); 2842 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2796 2843
2797 cgrp = cgroup_kn_lock_live(of->kn); 2844 cgrp = cgroup_kn_lock_live(of->kn, false);
2798 if (!cgrp) 2845 if (!cgrp)
2799 return -ENODEV; 2846 return -ENODEV;
2800 spin_lock(&release_agent_path_lock); 2847 spin_lock(&release_agent_path_lock);
@@ -2822,38 +2869,28 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
2822 return 0; 2869 return 0;
2823} 2870}
2824 2871
2825static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask) 2872static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2826{ 2873{
2827 struct cgroup_subsys *ss; 2874 struct cgroup_subsys *ss;
2828 bool printed = false; 2875 bool printed = false;
2829 int ssid; 2876 int ssid;
2830 2877
2831 for_each_subsys_which(ss, ssid, &ss_mask) { 2878 do_each_subsys_mask(ss, ssid, ss_mask) {
2832 if (printed) 2879 if (printed)
2833 seq_putc(seq, ' '); 2880 seq_putc(seq, ' ');
2834 seq_printf(seq, "%s", ss->name); 2881 seq_printf(seq, "%s", ss->name);
2835 printed = true; 2882 printed = true;
2836 } 2883 } while_each_subsys_mask();
2837 if (printed) 2884 if (printed)
2838 seq_putc(seq, '\n'); 2885 seq_putc(seq, '\n');
2839} 2886}
2840 2887
2841/* show controllers which are currently attached to the default hierarchy */
2842static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
2843{
2844 struct cgroup *cgrp = seq_css(seq)->cgroup;
2845
2846 cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
2847 ~cgrp_dfl_root_inhibit_ss_mask);
2848 return 0;
2849}
2850
2851/* show controllers which are enabled from the parent */ 2888/* show controllers which are enabled from the parent */
2852static int cgroup_controllers_show(struct seq_file *seq, void *v) 2889static int cgroup_controllers_show(struct seq_file *seq, void *v)
2853{ 2890{
2854 struct cgroup *cgrp = seq_css(seq)->cgroup; 2891 struct cgroup *cgrp = seq_css(seq)->cgroup;
2855 2892
2856 cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control); 2893 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2857 return 0; 2894 return 0;
2858} 2895}
2859 2896
@@ -2870,16 +2907,17 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2870 * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy 2907 * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
2871 * @cgrp: root of the subtree to update csses for 2908 * @cgrp: root of the subtree to update csses for
2872 * 2909 *
2873 * @cgrp's child_subsys_mask has changed and its subtree's (self excluded) 2910 * @cgrp's control masks have changed and its subtree's css associations
2874 * css associations need to be updated accordingly. This function looks up 2911 * need to be updated accordingly. This function looks up all css_sets
2875 * all css_sets which are attached to the subtree, creates the matching 2912 * which are attached to the subtree, creates the matching updated css_sets
2876 * updated css_sets and migrates the tasks to the new ones. 2913 * and migrates the tasks to the new ones.
2877 */ 2914 */
2878static int cgroup_update_dfl_csses(struct cgroup *cgrp) 2915static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2879{ 2916{
2880 LIST_HEAD(preloaded_csets); 2917 LIST_HEAD(preloaded_csets);
2881 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); 2918 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
2882 struct cgroup_subsys_state *css; 2919 struct cgroup_subsys_state *d_css;
2920 struct cgroup *dsct;
2883 struct css_set *src_cset; 2921 struct css_set *src_cset;
2884 int ret; 2922 int ret;
2885 2923
@@ -2889,21 +2927,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2889 2927
2890 /* look up all csses currently attached to @cgrp's subtree */ 2928 /* look up all csses currently attached to @cgrp's subtree */
2891 spin_lock_bh(&css_set_lock); 2929 spin_lock_bh(&css_set_lock);
2892 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { 2930 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2893 struct cgrp_cset_link *link; 2931 struct cgrp_cset_link *link;
2894 2932
2895 /* self is not affected by child_subsys_mask change */ 2933 list_for_each_entry(link, &dsct->cset_links, cset_link)
2896 if (css->cgroup == cgrp) 2934 cgroup_migrate_add_src(link->cset, dsct,
2897 continue;
2898
2899 list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
2900 cgroup_migrate_add_src(link->cset, cgrp,
2901 &preloaded_csets); 2935 &preloaded_csets);
2902 } 2936 }
2903 spin_unlock_bh(&css_set_lock); 2937 spin_unlock_bh(&css_set_lock);
2904 2938
2905 /* NULL dst indicates self on default hierarchy */ 2939 /* NULL dst indicates self on default hierarchy */
2906 ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets); 2940 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
2907 if (ret) 2941 if (ret)
2908 goto out_finish; 2942 goto out_finish;
2909 2943
@@ -2921,20 +2955,272 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2921 } 2955 }
2922 spin_unlock_bh(&css_set_lock); 2956 spin_unlock_bh(&css_set_lock);
2923 2957
2924 ret = cgroup_taskset_migrate(&tset, cgrp); 2958 ret = cgroup_taskset_migrate(&tset, cgrp->root);
2925out_finish: 2959out_finish:
2926 cgroup_migrate_finish(&preloaded_csets); 2960 cgroup_migrate_finish(&preloaded_csets);
2927 percpu_up_write(&cgroup_threadgroup_rwsem); 2961 percpu_up_write(&cgroup_threadgroup_rwsem);
2928 return ret; 2962 return ret;
2929} 2963}
2930 2964
2965/**
2966 * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
2967 * @cgrp: root of the target subtree
2968 *
2969 * Because css offlining is asynchronous, userland may try to re-enable a
2970 * controller while the previous css is still around. This function grabs
2971 * cgroup_mutex and drains the previous css instances of @cgrp's subtree.
2972 */
2973static void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2974 __acquires(&cgroup_mutex)
2975{
2976 struct cgroup *dsct;
2977 struct cgroup_subsys_state *d_css;
2978 struct cgroup_subsys *ss;
2979 int ssid;
2980
2981restart:
2982 mutex_lock(&cgroup_mutex);
2983
2984 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2985 for_each_subsys(ss, ssid) {
2986 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2987 DEFINE_WAIT(wait);
2988
2989 if (!css || !percpu_ref_is_dying(&css->refcnt))
2990 continue;
2991
2992 cgroup_get(dsct);
2993 prepare_to_wait(&dsct->offline_waitq, &wait,
2994 TASK_UNINTERRUPTIBLE);
2995
2996 mutex_unlock(&cgroup_mutex);
2997 schedule();
2998 finish_wait(&dsct->offline_waitq, &wait);
2999
3000 cgroup_put(dsct);
3001 goto restart;
3002 }
3003 }
3004}
3005
3006/**
3007 * cgroup_save_control - save control masks of a subtree
3008 * @cgrp: root of the target subtree
3009 *
3010 * Save ->subtree_control and ->subtree_ss_mask to the respective old_
3011 * prefixed fields for @cgrp's subtree including @cgrp itself.
3012 */
3013static void cgroup_save_control(struct cgroup *cgrp)
3014{
3015 struct cgroup *dsct;
3016 struct cgroup_subsys_state *d_css;
3017
3018 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3019 dsct->old_subtree_control = dsct->subtree_control;
3020 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3021 }
3022}
3023
3024/**
3025 * cgroup_propagate_control - refresh control masks of a subtree
3026 * @cgrp: root of the target subtree
3027 *
3028 * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
3029 * ->subtree_control and propagate controller availability through the
3030 * subtree so that descendants don't have unavailable controllers enabled.
3031 */
3032static void cgroup_propagate_control(struct cgroup *cgrp)
3033{
3034 struct cgroup *dsct;
3035 struct cgroup_subsys_state *d_css;
3036
3037 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3038 dsct->subtree_control &= cgroup_control(dsct);
3039 dsct->subtree_ss_mask =
3040 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3041 cgroup_ss_mask(dsct));
3042 }
3043}
3044
3045/**
3046 * cgroup_restore_control - restore control masks of a subtree
3047 * @cgrp: root of the target subtree
3048 *
3049 * Restore ->subtree_control and ->subtree_ss_mask from the respective old_
3050 * prefixed fields for @cgrp's subtree including @cgrp itself.
3051 */
3052static void cgroup_restore_control(struct cgroup *cgrp)
3053{
3054 struct cgroup *dsct;
3055 struct cgroup_subsys_state *d_css;
3056
3057 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3058 dsct->subtree_control = dsct->old_subtree_control;
3059 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3060 }
3061}
3062
3063static bool css_visible(struct cgroup_subsys_state *css)
3064{
3065 struct cgroup_subsys *ss = css->ss;
3066 struct cgroup *cgrp = css->cgroup;
3067
3068 if (cgroup_control(cgrp) & (1 << ss->id))
3069 return true;
3070 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3071 return false;
3072 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3073}
3074
3075/**
3076 * cgroup_apply_control_enable - enable or show csses according to control
3077 * @cgrp: root of the target subtree
3078 *
3079 * Walk @cgrp's subtree and create new csses or make the existing ones
3080 * visible. A css is created invisible if it's being implicitly enabled
3081 * through dependency. An invisible css is made visible when the userland
3082 * explicitly enables it.
3083 *
3084 * Returns 0 on success, -errno on failure. On failure, csses which have
3085 * been processed already aren't cleaned up. The caller is responsible for
3086 * cleaning up with cgroup_apply_control_disble().
3087 */
3088static int cgroup_apply_control_enable(struct cgroup *cgrp)
3089{
3090 struct cgroup *dsct;
3091 struct cgroup_subsys_state *d_css;
3092 struct cgroup_subsys *ss;
3093 int ssid, ret;
3094
3095 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3096 for_each_subsys(ss, ssid) {
3097 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3098
3099 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3100
3101 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3102 continue;
3103
3104 if (!css) {
3105 css = css_create(dsct, ss);
3106 if (IS_ERR(css))
3107 return PTR_ERR(css);
3108 }
3109
3110 if (css_visible(css)) {
3111 ret = css_populate_dir(css);
3112 if (ret)
3113 return ret;
3114 }
3115 }
3116 }
3117
3118 return 0;
3119}
3120
3121/**
3122 * cgroup_apply_control_disable - kill or hide csses according to control
3123 * @cgrp: root of the target subtree
3124 *
3125 * Walk @cgrp's subtree and kill and hide csses so that they match
3126 * cgroup_ss_mask() and cgroup_visible_mask().
3127 *
3128 * A css is hidden when the userland requests it to be disabled while other
3129 * subsystems are still depending on it. The css must not actively control
3130 * resources and be in the vanilla state if it's made visible again later.
3131 * Controllers which may be depended upon should provide ->css_reset() for
3132 * this purpose.
3133 */
3134static void cgroup_apply_control_disable(struct cgroup *cgrp)
3135{
3136 struct cgroup *dsct;
3137 struct cgroup_subsys_state *d_css;
3138 struct cgroup_subsys *ss;
3139 int ssid;
3140
3141 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3142 for_each_subsys(ss, ssid) {
3143 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3144
3145 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3146
3147 if (!css)
3148 continue;
3149
3150 if (css->parent &&
3151 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3152 kill_css(css);
3153 } else if (!css_visible(css)) {
3154 css_clear_dir(css);
3155 if (ss->css_reset)
3156 ss->css_reset(css);
3157 }
3158 }
3159 }
3160}
3161
3162/**
3163 * cgroup_apply_control - apply control mask updates to the subtree
3164 * @cgrp: root of the target subtree
3165 *
3166 * subsystems can be enabled and disabled in a subtree using the following
3167 * steps.
3168 *
3169 * 1. Call cgroup_save_control() to stash the current state.
3170 * 2. Update ->subtree_control masks in the subtree as desired.
3171 * 3. Call cgroup_apply_control() to apply the changes.
3172 * 4. Optionally perform other related operations.
3173 * 5. Call cgroup_finalize_control() to finish up.
3174 *
3175 * This function implements step 3 and propagates the mask changes
3176 * throughout @cgrp's subtree, updates csses accordingly and perform
3177 * process migrations.
3178 */
3179static int cgroup_apply_control(struct cgroup *cgrp)
3180{
3181 int ret;
3182
3183 cgroup_propagate_control(cgrp);
3184
3185 ret = cgroup_apply_control_enable(cgrp);
3186 if (ret)
3187 return ret;
3188
3189 /*
3190 * At this point, cgroup_e_css() results reflect the new csses
3191 * making the following cgroup_update_dfl_csses() properly update
3192 * css associations of all tasks in the subtree.
3193 */
3194 ret = cgroup_update_dfl_csses(cgrp);
3195 if (ret)
3196 return ret;
3197
3198 return 0;
3199}
3200
3201/**
3202 * cgroup_finalize_control - finalize control mask update
3203 * @cgrp: root of the target subtree
3204 * @ret: the result of the update
3205 *
3206 * Finalize control mask update. See cgroup_apply_control() for more info.
3207 */
3208static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3209{
3210 if (ret) {
3211 cgroup_restore_control(cgrp);
3212 cgroup_propagate_control(cgrp);
3213 }
3214
3215 cgroup_apply_control_disable(cgrp);
3216}
3217
2931/* change the enabled child controllers for a cgroup in the default hierarchy */ 3218/* change the enabled child controllers for a cgroup in the default hierarchy */
2932static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, 3219static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2933 char *buf, size_t nbytes, 3220 char *buf, size_t nbytes,
2934 loff_t off) 3221 loff_t off)
2935{ 3222{
2936 unsigned long enable = 0, disable = 0; 3223 u16 enable = 0, disable = 0;
2937 unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
2938 struct cgroup *cgrp, *child; 3224 struct cgroup *cgrp, *child;
2939 struct cgroup_subsys *ss; 3225 struct cgroup_subsys *ss;
2940 char *tok; 3226 char *tok;
@@ -2946,11 +3232,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2946 */ 3232 */
2947 buf = strstrip(buf); 3233 buf = strstrip(buf);
2948 while ((tok = strsep(&buf, " "))) { 3234 while ((tok = strsep(&buf, " "))) {
2949 unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask;
2950
2951 if (tok[0] == '\0') 3235 if (tok[0] == '\0')
2952 continue; 3236 continue;
2953 for_each_subsys_which(ss, ssid, &tmp_ss_mask) { 3237 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
2954 if (!cgroup_ssid_enabled(ssid) || 3238 if (!cgroup_ssid_enabled(ssid) ||
2955 strcmp(tok + 1, ss->name)) 3239 strcmp(tok + 1, ss->name))
2956 continue; 3240 continue;
@@ -2965,12 +3249,12 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2965 return -EINVAL; 3249 return -EINVAL;
2966 } 3250 }
2967 break; 3251 break;
2968 } 3252 } while_each_subsys_mask();
2969 if (ssid == CGROUP_SUBSYS_COUNT) 3253 if (ssid == CGROUP_SUBSYS_COUNT)
2970 return -EINVAL; 3254 return -EINVAL;
2971 } 3255 }
2972 3256
2973 cgrp = cgroup_kn_lock_live(of->kn); 3257 cgrp = cgroup_kn_lock_live(of->kn, true);
2974 if (!cgrp) 3258 if (!cgrp)
2975 return -ENODEV; 3259 return -ENODEV;
2976 3260
@@ -2981,10 +3265,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2981 continue; 3265 continue;
2982 } 3266 }
2983 3267
2984 /* unavailable or not enabled on the parent? */ 3268 if (!(cgroup_control(cgrp) & (1 << ssid))) {
2985 if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
2986 (cgroup_parent(cgrp) &&
2987 !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) {
2988 ret = -ENOENT; 3269 ret = -ENOENT;
2989 goto out_unlock; 3270 goto out_unlock;
2990 } 3271 }
@@ -3018,150 +3299,21 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3018 goto out_unlock; 3299 goto out_unlock;
3019 } 3300 }
3020 3301
3021 /* 3302 /* save and update control masks and prepare csses */
3022 * Update subsys masks and calculate what needs to be done. More 3303 cgroup_save_control(cgrp);
3023 * subsystems than specified may need to be enabled or disabled
3024 * depending on subsystem dependencies.
3025 */
3026 old_sc = cgrp->subtree_control;
3027 old_ss = cgrp->child_subsys_mask;
3028 new_sc = (old_sc | enable) & ~disable;
3029 new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
3030
3031 css_enable = ~old_ss & new_ss;
3032 css_disable = old_ss & ~new_ss;
3033 enable |= css_enable;
3034 disable |= css_disable;
3035
3036 /*
3037 * Because css offlining is asynchronous, userland might try to
3038 * re-enable the same controller while the previous instance is
3039 * still around. In such cases, wait till it's gone using
3040 * offline_waitq.
3041 */
3042 for_each_subsys_which(ss, ssid, &css_enable) {
3043 cgroup_for_each_live_child(child, cgrp) {
3044 DEFINE_WAIT(wait);
3045
3046 if (!cgroup_css(child, ss))
3047 continue;
3048
3049 cgroup_get(child);
3050 prepare_to_wait(&child->offline_waitq, &wait,
3051 TASK_UNINTERRUPTIBLE);
3052 cgroup_kn_unlock(of->kn);
3053 schedule();
3054 finish_wait(&child->offline_waitq, &wait);
3055 cgroup_put(child);
3056
3057 return restart_syscall();
3058 }
3059 }
3060
3061 cgrp->subtree_control = new_sc;
3062 cgrp->child_subsys_mask = new_ss;
3063
3064 /*
3065 * Create new csses or make the existing ones visible. A css is
3066 * created invisible if it's being implicitly enabled through
3067 * dependency. An invisible css is made visible when the userland
3068 * explicitly enables it.
3069 */
3070 for_each_subsys(ss, ssid) {
3071 if (!(enable & (1 << ssid)))
3072 continue;
3073
3074 cgroup_for_each_live_child(child, cgrp) {
3075 if (css_enable & (1 << ssid))
3076 ret = create_css(child, ss,
3077 cgrp->subtree_control & (1 << ssid));
3078 else
3079 ret = css_populate_dir(cgroup_css(child, ss),
3080 NULL);
3081 if (ret)
3082 goto err_undo_css;
3083 }
3084 }
3085
3086 /*
3087 * At this point, cgroup_e_css() results reflect the new csses
3088 * making the following cgroup_update_dfl_csses() properly update
3089 * css associations of all tasks in the subtree.
3090 */
3091 ret = cgroup_update_dfl_csses(cgrp);
3092 if (ret)
3093 goto err_undo_css;
3094 3304
3095 /* 3305 cgrp->subtree_control |= enable;
3096 * All tasks are migrated out of disabled csses. Kill or hide 3306 cgrp->subtree_control &= ~disable;
3097 * them. A css is hidden when the userland requests it to be
3098 * disabled while other subsystems are still depending on it. The
3099 * css must not actively control resources and be in the vanilla
3100 * state if it's made visible again later. Controllers which may
3101 * be depended upon should provide ->css_reset() for this purpose.
3102 */
3103 for_each_subsys(ss, ssid) {
3104 if (!(disable & (1 << ssid)))
3105 continue;
3106 3307
3107 cgroup_for_each_live_child(child, cgrp) { 3308 ret = cgroup_apply_control(cgrp);
3108 struct cgroup_subsys_state *css = cgroup_css(child, ss);
3109 3309
3110 if (css_disable & (1 << ssid)) { 3310 cgroup_finalize_control(cgrp, ret);
3111 kill_css(css);
3112 } else {
3113 css_clear_dir(css, NULL);
3114 if (ss->css_reset)
3115 ss->css_reset(css);
3116 }
3117 }
3118 }
3119
3120 /*
3121 * The effective csses of all the descendants (excluding @cgrp) may
3122 * have changed. Subsystems can optionally subscribe to this event
3123 * by implementing ->css_e_css_changed() which is invoked if any of
3124 * the effective csses seen from the css's cgroup may have changed.
3125 */
3126 for_each_subsys(ss, ssid) {
3127 struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
3128 struct cgroup_subsys_state *css;
3129
3130 if (!ss->css_e_css_changed || !this_css)
3131 continue;
3132
3133 css_for_each_descendant_pre(css, this_css)
3134 if (css != this_css)
3135 ss->css_e_css_changed(css);
3136 }
3137 3311
3138 kernfs_activate(cgrp->kn); 3312 kernfs_activate(cgrp->kn);
3139 ret = 0; 3313 ret = 0;
3140out_unlock: 3314out_unlock:
3141 cgroup_kn_unlock(of->kn); 3315 cgroup_kn_unlock(of->kn);
3142 return ret ?: nbytes; 3316 return ret ?: nbytes;
3143
3144err_undo_css:
3145 cgrp->subtree_control = old_sc;
3146 cgrp->child_subsys_mask = old_ss;
3147
3148 for_each_subsys(ss, ssid) {
3149 if (!(enable & (1 << ssid)))
3150 continue;
3151
3152 cgroup_for_each_live_child(child, cgrp) {
3153 struct cgroup_subsys_state *css = cgroup_css(child, ss);
3154
3155 if (!css)
3156 continue;
3157
3158 if (css_enable & (1 << ssid))
3159 kill_css(css);
3160 else
3161 css_clear_dir(css, NULL);
3162 }
3163 }
3164 goto out_unlock;
3165} 3317}
3166 3318
3167static int cgroup_events_show(struct seq_file *seq, void *v) 3319static int cgroup_events_show(struct seq_file *seq, void *v)
@@ -3359,7 +3511,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3359 bool is_add) 3511 bool is_add)
3360{ 3512{
3361 struct cftype *cft, *cft_end = NULL; 3513 struct cftype *cft, *cft_end = NULL;
3362 int ret; 3514 int ret = 0;
3363 3515
3364 lockdep_assert_held(&cgroup_mutex); 3516 lockdep_assert_held(&cgroup_mutex);
3365 3517
@@ -3388,7 +3540,7 @@ restart:
3388 cgroup_rm_file(cgrp, cft); 3540 cgroup_rm_file(cgrp, cft);
3389 } 3541 }
3390 } 3542 }
3391 return 0; 3543 return ret;
3392} 3544}
3393 3545
3394static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) 3546static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
@@ -3405,7 +3557,7 @@ static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3405 css_for_each_descendant_pre(css, cgroup_css(root, ss)) { 3557 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3406 struct cgroup *cgrp = css->cgroup; 3558 struct cgroup *cgrp = css->cgroup;
3407 3559
3408 if (cgroup_is_dead(cgrp)) 3560 if (!(css->flags & CSS_VISIBLE))
3409 continue; 3561 continue;
3410 3562
3411 ret = cgroup_addrm_files(css, cgrp, cfts, is_add); 3563 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
@@ -4026,6 +4178,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
4026 struct task_struct *task; 4178 struct task_struct *task;
4027 int ret; 4179 int ret;
4028 4180
4181 if (!cgroup_may_migrate_to(to))
4182 return -EBUSY;
4183
4029 mutex_lock(&cgroup_mutex); 4184 mutex_lock(&cgroup_mutex);
4030 4185
4031 /* all tasks in @from are being moved, all csets are source */ 4186 /* all tasks in @from are being moved, all csets are source */
@@ -4034,7 +4189,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
4034 cgroup_migrate_add_src(link->cset, to, &preloaded_csets); 4189 cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
4035 spin_unlock_bh(&css_set_lock); 4190 spin_unlock_bh(&css_set_lock);
4036 4191
4037 ret = cgroup_migrate_prepare_dst(to, &preloaded_csets); 4192 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
4038 if (ret) 4193 if (ret)
4039 goto out_err; 4194 goto out_err;
4040 4195
@@ -4050,7 +4205,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
4050 css_task_iter_end(&it); 4205 css_task_iter_end(&it);
4051 4206
4052 if (task) { 4207 if (task) {
4053 ret = cgroup_migrate(task, false, to); 4208 ret = cgroup_migrate(task, false, to->root);
4054 put_task_struct(task); 4209 put_task_struct(task);
4055 } 4210 }
4056 } while (task && !ret); 4211 } while (task && !ret);
@@ -4557,12 +4712,6 @@ static struct cftype cgroup_dfl_base_files[] = {
4557 }, 4712 },
4558 { 4713 {
4559 .name = "cgroup.controllers", 4714 .name = "cgroup.controllers",
4560 .flags = CFTYPE_ONLY_ON_ROOT,
4561 .seq_show = cgroup_root_controllers_show,
4562 },
4563 {
4564 .name = "cgroup.controllers",
4565 .flags = CFTYPE_NOT_ON_ROOT,
4566 .seq_show = cgroup_controllers_show, 4715 .seq_show = cgroup_controllers_show,
4567 }, 4716 },
4568 { 4717 {
@@ -4731,7 +4880,9 @@ static void css_release_work_fn(struct work_struct *work)
4731 * Those are supported by RCU protecting clearing of 4880 * Those are supported by RCU protecting clearing of
4732 * cgrp->kn->priv backpointer. 4881 * cgrp->kn->priv backpointer.
4733 */ 4882 */
4734 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); 4883 if (cgrp->kn)
4884 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
4885 NULL);
4735 } 4886 }
4736 4887
4737 mutex_unlock(&cgroup_mutex); 4888 mutex_unlock(&cgroup_mutex);
@@ -4802,6 +4953,9 @@ static void offline_css(struct cgroup_subsys_state *css)
4802 if (!(css->flags & CSS_ONLINE)) 4953 if (!(css->flags & CSS_ONLINE))
4803 return; 4954 return;
4804 4955
4956 if (ss->css_reset)
4957 ss->css_reset(css);
4958
4805 if (ss->css_offline) 4959 if (ss->css_offline)
4806 ss->css_offline(css); 4960 ss->css_offline(css);
4807 4961
@@ -4812,17 +4966,16 @@ static void offline_css(struct cgroup_subsys_state *css)
4812} 4966}
4813 4967
4814/** 4968/**
4815 * create_css - create a cgroup_subsys_state 4969 * css_create - create a cgroup_subsys_state
4816 * @cgrp: the cgroup new css will be associated with 4970 * @cgrp: the cgroup new css will be associated with
4817 * @ss: the subsys of new css 4971 * @ss: the subsys of new css
4818 * @visible: whether to create control knobs for the new css or not
4819 * 4972 *
4820 * Create a new css associated with @cgrp - @ss pair. On success, the new 4973 * Create a new css associated with @cgrp - @ss pair. On success, the new
4821 * css is online and installed in @cgrp with all interface files created if 4974 * css is online and installed in @cgrp. This function doesn't create the
4822 * @visible. Returns 0 on success, -errno on failure. 4975 * interface files. Returns 0 on success, -errno on failure.
4823 */ 4976 */
4824static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, 4977static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
4825 bool visible) 4978 struct cgroup_subsys *ss)
4826{ 4979{
4827 struct cgroup *parent = cgroup_parent(cgrp); 4980 struct cgroup *parent = cgroup_parent(cgrp);
4828 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); 4981 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
@@ -4833,7 +4986,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
4833 4986
4834 css = ss->css_alloc(parent_css); 4987 css = ss->css_alloc(parent_css);
4835 if (IS_ERR(css)) 4988 if (IS_ERR(css))
4836 return PTR_ERR(css); 4989 return css;
4837 4990
4838 init_and_link_css(css, ss, cgrp); 4991 init_and_link_css(css, ss, cgrp);
4839 4992
@@ -4846,12 +4999,6 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
4846 goto err_free_percpu_ref; 4999 goto err_free_percpu_ref;
4847 css->id = err; 5000 css->id = err;
4848 5001
4849 if (visible) {
4850 err = css_populate_dir(css, NULL);
4851 if (err)
4852 goto err_free_id;
4853 }
4854
4855 /* @css is ready to be brought online now, make it visible */ 5002 /* @css is ready to be brought online now, make it visible */
4856 list_add_tail_rcu(&css->sibling, &parent_css->children); 5003 list_add_tail_rcu(&css->sibling, &parent_css->children);
4857 cgroup_idr_replace(&ss->css_idr, css, css->id); 5004 cgroup_idr_replace(&ss->css_idr, css, css->id);
@@ -4869,47 +5016,30 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
4869 ss->warned_broken_hierarchy = true; 5016 ss->warned_broken_hierarchy = true;
4870 } 5017 }
4871 5018
4872 return 0; 5019 return css;
4873 5020
4874err_list_del: 5021err_list_del:
4875 list_del_rcu(&css->sibling); 5022 list_del_rcu(&css->sibling);
4876 css_clear_dir(css, NULL);
4877err_free_id:
4878 cgroup_idr_remove(&ss->css_idr, css->id); 5023 cgroup_idr_remove(&ss->css_idr, css->id);
4879err_free_percpu_ref: 5024err_free_percpu_ref:
4880 percpu_ref_exit(&css->refcnt); 5025 percpu_ref_exit(&css->refcnt);
4881err_free_css: 5026err_free_css:
4882 call_rcu(&css->rcu_head, css_free_rcu_fn); 5027 call_rcu(&css->rcu_head, css_free_rcu_fn);
4883 return err; 5028 return ERR_PTR(err);
4884} 5029}
4885 5030
4886static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 5031static struct cgroup *cgroup_create(struct cgroup *parent)
4887 umode_t mode)
4888{ 5032{
4889 struct cgroup *parent, *cgrp, *tcgrp; 5033 struct cgroup_root *root = parent->root;
4890 struct cgroup_root *root; 5034 struct cgroup *cgrp, *tcgrp;
4891 struct cgroup_subsys *ss; 5035 int level = parent->level + 1;
4892 struct kernfs_node *kn; 5036 int ret;
4893 int level, ssid, ret;
4894
4895 /* Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable.
4896 */
4897 if (strchr(name, '\n'))
4898 return -EINVAL;
4899
4900 parent = cgroup_kn_lock_live(parent_kn);
4901 if (!parent)
4902 return -ENODEV;
4903 root = parent->root;
4904 level = parent->level + 1;
4905 5037
4906 /* allocate the cgroup and its ID, 0 is reserved for the root */ 5038 /* allocate the cgroup and its ID, 0 is reserved for the root */
4907 cgrp = kzalloc(sizeof(*cgrp) + 5039 cgrp = kzalloc(sizeof(*cgrp) +
4908 sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL); 5040 sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
4909 if (!cgrp) { 5041 if (!cgrp)
4910 ret = -ENOMEM; 5042 return ERR_PTR(-ENOMEM);
4911 goto out_unlock;
4912 }
4913 5043
4914 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL); 5044 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
4915 if (ret) 5045 if (ret)
@@ -4940,20 +5070,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
4940 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) 5070 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4941 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); 5071 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4942 5072
4943 /* create the directory */
4944 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
4945 if (IS_ERR(kn)) {
4946 ret = PTR_ERR(kn);
4947 goto out_free_id;
4948 }
4949 cgrp->kn = kn;
4950
4951 /*
4952 * This extra ref will be put in cgroup_free_fn() and guarantees
4953 * that @cgrp->kn is always accessible.
4954 */
4955 kernfs_get(kn);
4956
4957 cgrp->self.serial_nr = css_serial_nr_next++; 5073 cgrp->self.serial_nr = css_serial_nr_next++;
4958 5074
4959 /* allocation complete, commit to creation */ 5075 /* allocation complete, commit to creation */
@@ -4967,51 +5083,90 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
4967 */ 5083 */
4968 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id); 5084 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
4969 5085
4970 ret = cgroup_kn_set_ugid(kn); 5086 /*
4971 if (ret) 5087 * On the default hierarchy, a child doesn't automatically inherit
4972 goto out_destroy; 5088 * subtree_control from the parent. Each is configured manually.
5089 */
5090 if (!cgroup_on_dfl(cgrp))
5091 cgrp->subtree_control = cgroup_control(cgrp);
4973 5092
4974 ret = css_populate_dir(&cgrp->self, NULL); 5093 cgroup_propagate_control(cgrp);
5094
5095 /* @cgrp doesn't have dir yet so the following will only create csses */
5096 ret = cgroup_apply_control_enable(cgrp);
4975 if (ret) 5097 if (ret)
4976 goto out_destroy; 5098 goto out_destroy;
4977 5099
4978 /* let's create and online css's */ 5100 return cgrp;
4979 for_each_subsys(ss, ssid) { 5101
4980 if (parent->child_subsys_mask & (1 << ssid)) { 5102out_cancel_ref:
4981 ret = create_css(cgrp, ss, 5103 percpu_ref_exit(&cgrp->self.refcnt);
4982 parent->subtree_control & (1 << ssid)); 5104out_free_cgrp:
4983 if (ret) 5105 kfree(cgrp);
4984 goto out_destroy; 5106 return ERR_PTR(ret);
4985 } 5107out_destroy:
5108 cgroup_destroy_locked(cgrp);
5109 return ERR_PTR(ret);
5110}
5111
5112static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
5113 umode_t mode)
5114{
5115 struct cgroup *parent, *cgrp;
5116 struct kernfs_node *kn;
5117 int ret;
5118
5119 /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
5120 if (strchr(name, '\n'))
5121 return -EINVAL;
5122
5123 parent = cgroup_kn_lock_live(parent_kn, false);
5124 if (!parent)
5125 return -ENODEV;
5126
5127 cgrp = cgroup_create(parent);
5128 if (IS_ERR(cgrp)) {
5129 ret = PTR_ERR(cgrp);
5130 goto out_unlock;
5131 }
5132
5133 /* create the directory */
5134 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5135 if (IS_ERR(kn)) {
5136 ret = PTR_ERR(kn);
5137 goto out_destroy;
4986 } 5138 }
5139 cgrp->kn = kn;
4987 5140
4988 /* 5141 /*
4989 * On the default hierarchy, a child doesn't automatically inherit 5142 * This extra ref will be put in cgroup_free_fn() and guarantees
4990 * subtree_control from the parent. Each is configured manually. 5143 * that @cgrp->kn is always accessible.
4991 */ 5144 */
4992 if (!cgroup_on_dfl(cgrp)) { 5145 kernfs_get(kn);
4993 cgrp->subtree_control = parent->subtree_control; 5146
4994 cgroup_refresh_child_subsys_mask(cgrp); 5147 ret = cgroup_kn_set_ugid(kn);
4995 } 5148 if (ret)
5149 goto out_destroy;
5150
5151 ret = css_populate_dir(&cgrp->self);
5152 if (ret)
5153 goto out_destroy;
4996 5154
5155 ret = cgroup_apply_control_enable(cgrp);
5156 if (ret)
5157 goto out_destroy;
5158
5159 /* let's create and online css's */
4997 kernfs_activate(kn); 5160 kernfs_activate(kn);
4998 5161
4999 ret = 0; 5162 ret = 0;
5000 goto out_unlock; 5163 goto out_unlock;
5001 5164
5002out_free_id: 5165out_destroy:
5003 cgroup_idr_remove(&root->cgroup_idr, cgrp->id); 5166 cgroup_destroy_locked(cgrp);
5004out_cancel_ref:
5005 percpu_ref_exit(&cgrp->self.refcnt);
5006out_free_cgrp:
5007 kfree(cgrp);
5008out_unlock: 5167out_unlock:
5009 cgroup_kn_unlock(parent_kn); 5168 cgroup_kn_unlock(parent_kn);
5010 return ret; 5169 return ret;
5011
5012out_destroy:
5013 cgroup_destroy_locked(cgrp);
5014 goto out_unlock;
5015} 5170}
5016 5171
5017/* 5172/*
@@ -5065,7 +5220,7 @@ static void kill_css(struct cgroup_subsys_state *css)
5065 * This must happen before css is disassociated with its cgroup. 5220 * This must happen before css is disassociated with its cgroup.
5066 * See seq_css() for details. 5221 * See seq_css() for details.
5067 */ 5222 */
5068 css_clear_dir(css, NULL); 5223 css_clear_dir(css);
5069 5224
5070 /* 5225 /*
5071 * Killing would put the base ref, but we need to keep it alive 5226 * Killing would put the base ref, but we need to keep it alive
@@ -5114,6 +5269,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5114 __releases(&cgroup_mutex) __acquires(&cgroup_mutex) 5269 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5115{ 5270{
5116 struct cgroup_subsys_state *css; 5271 struct cgroup_subsys_state *css;
5272 struct cgrp_cset_link *link;
5117 int ssid; 5273 int ssid;
5118 5274
5119 lockdep_assert_held(&cgroup_mutex); 5275 lockdep_assert_held(&cgroup_mutex);
@@ -5134,11 +5290,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5134 return -EBUSY; 5290 return -EBUSY;
5135 5291
5136 /* 5292 /*
5137 * Mark @cgrp dead. This prevents further task migration and child 5293 * Mark @cgrp and the associated csets dead. The former prevents
5138 * creation by disabling cgroup_lock_live_group(). 5294 * further task migration and child creation by disabling
5295 * cgroup_lock_live_group(). The latter makes the csets ignored by
5296 * the migration path.
5139 */ 5297 */
5140 cgrp->self.flags &= ~CSS_ONLINE; 5298 cgrp->self.flags &= ~CSS_ONLINE;
5141 5299
5300 spin_lock_bh(&css_set_lock);
5301 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5302 link->cset->dead = true;
5303 spin_unlock_bh(&css_set_lock);
5304
5142 /* initiate massacre of all css's */ 5305 /* initiate massacre of all css's */
5143 for_each_css(css, ssid, cgrp) 5306 for_each_css(css, ssid, cgrp)
5144 kill_css(css); 5307 kill_css(css);
@@ -5162,7 +5325,7 @@ static int cgroup_rmdir(struct kernfs_node *kn)
5162 struct cgroup *cgrp; 5325 struct cgroup *cgrp;
5163 int ret = 0; 5326 int ret = 0;
5164 5327
5165 cgrp = cgroup_kn_lock_live(kn); 5328 cgrp = cgroup_kn_lock_live(kn, false);
5166 if (!cgrp) 5329 if (!cgrp)
5167 return 0; 5330 return 0;
5168 5331
@@ -5252,7 +5415,7 @@ int __init cgroup_init_early(void)
5252 5415
5253 for_each_subsys(ss, i) { 5416 for_each_subsys(ss, i) {
5254 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, 5417 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5255 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p name:id=%d:%s\n", 5418 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5256 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free, 5419 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5257 ss->id, ss->name); 5420 ss->id, ss->name);
5258 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN, 5421 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
@@ -5269,7 +5432,7 @@ int __init cgroup_init_early(void)
5269 return 0; 5432 return 0;
5270} 5433}
5271 5434
5272static unsigned long cgroup_disable_mask __initdata; 5435static u16 cgroup_disable_mask __initdata;
5273 5436
5274/** 5437/**
5275 * cgroup_init - cgroup initialization 5438 * cgroup_init - cgroup initialization
@@ -5280,18 +5443,21 @@ static unsigned long cgroup_disable_mask __initdata;
5280int __init cgroup_init(void) 5443int __init cgroup_init(void)
5281{ 5444{
5282 struct cgroup_subsys *ss; 5445 struct cgroup_subsys *ss;
5283 unsigned long key;
5284 int ssid; 5446 int ssid;
5285 5447
5448 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5286 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); 5449 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5287 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); 5450 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5288 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); 5451 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5289 5452
5290 mutex_lock(&cgroup_mutex); 5453 mutex_lock(&cgroup_mutex);
5291 5454
5292 /* Add init_css_set to the hash table */ 5455 /*
5293 key = css_set_hash(init_css_set.subsys); 5456 * Add init_css_set to the hash table so that dfl_root can link to
5294 hash_add(css_set_table, &init_css_set.hlist, key); 5457 * it during init.
5458 */
5459 hash_add(css_set_table, &init_css_set.hlist,
5460 css_set_hash(init_css_set.subsys));
5295 5461
5296 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); 5462 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5297 5463
@@ -5324,10 +5490,16 @@ int __init cgroup_init(void)
5324 continue; 5490 continue;
5325 } 5491 }
5326 5492
5493 if (cgroup_ssid_no_v1(ssid))
5494 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5495 ss->name);
5496
5327 cgrp_dfl_root.subsys_mask |= 1 << ss->id; 5497 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5328 5498
5329 if (!ss->dfl_cftypes) 5499 if (ss->implicit_on_dfl)
5330 cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id; 5500 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5501 else if (!ss->dfl_cftypes)
5502 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5331 5503
5332 if (ss->dfl_cftypes == ss->legacy_cftypes) { 5504 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5333 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); 5505 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
@@ -5340,6 +5512,11 @@ int __init cgroup_init(void)
5340 ss->bind(init_css_set.subsys[ssid]); 5512 ss->bind(init_css_set.subsys[ssid]);
5341 } 5513 }
5342 5514
5515 /* init_css_set.subsys[] has been updated, re-hash */
5516 hash_del(&init_css_set.hlist);
5517 hash_add(css_set_table, &init_css_set.hlist,
5518 css_set_hash(init_css_set.subsys));
5519
5343 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); 5520 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5344 WARN_ON(register_filesystem(&cgroup_fs_type)); 5521 WARN_ON(register_filesystem(&cgroup_fs_type));
5345 WARN_ON(register_filesystem(&cgroup2_fs_type)); 5522 WARN_ON(register_filesystem(&cgroup2_fs_type));
@@ -5398,7 +5575,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5398 struct cgroup *cgrp; 5575 struct cgroup *cgrp;
5399 int ssid, count = 0; 5576 int ssid, count = 0;
5400 5577
5401 if (root == &cgrp_dfl_root && !cgrp_dfl_root_visible) 5578 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5402 continue; 5579 continue;
5403 5580
5404 seq_printf(m, "%d:", root->hierarchy_id); 5581 seq_printf(m, "%d:", root->hierarchy_id);
@@ -5513,11 +5690,11 @@ int cgroup_can_fork(struct task_struct *child)
5513 struct cgroup_subsys *ss; 5690 struct cgroup_subsys *ss;
5514 int i, j, ret; 5691 int i, j, ret;
5515 5692
5516 for_each_subsys_which(ss, i, &have_canfork_callback) { 5693 do_each_subsys_mask(ss, i, have_canfork_callback) {
5517 ret = ss->can_fork(child); 5694 ret = ss->can_fork(child);
5518 if (ret) 5695 if (ret)
5519 goto out_revert; 5696 goto out_revert;
5520 } 5697 } while_each_subsys_mask();
5521 5698
5522 return 0; 5699 return 0;
5523 5700
@@ -5602,8 +5779,9 @@ void cgroup_post_fork(struct task_struct *child)
5602 * css_set; otherwise, @child might change state between ->fork() 5779 * css_set; otherwise, @child might change state between ->fork()
5603 * and addition to css_set. 5780 * and addition to css_set.
5604 */ 5781 */
5605 for_each_subsys_which(ss, i, &have_fork_callback) 5782 do_each_subsys_mask(ss, i, have_fork_callback) {
5606 ss->fork(child); 5783 ss->fork(child);
5784 } while_each_subsys_mask();
5607} 5785}
5608 5786
5609/** 5787/**
@@ -5646,8 +5824,9 @@ void cgroup_exit(struct task_struct *tsk)
5646 } 5824 }
5647 5825
5648 /* see cgroup_post_fork() for details */ 5826 /* see cgroup_post_fork() for details */
5649 for_each_subsys_which(ss, i, &have_exit_callback) 5827 do_each_subsys_mask(ss, i, have_exit_callback) {
5650 ss->exit(tsk); 5828 ss->exit(tsk);
5829 } while_each_subsys_mask();
5651} 5830}
5652 5831
5653void cgroup_free(struct task_struct *task) 5832void cgroup_free(struct task_struct *task)
@@ -5656,8 +5835,9 @@ void cgroup_free(struct task_struct *task)
5656 struct cgroup_subsys *ss; 5835 struct cgroup_subsys *ss;
5657 int ssid; 5836 int ssid;
5658 5837
5659 for_each_subsys_which(ss, ssid, &have_free_callback) 5838 do_each_subsys_mask(ss, ssid, have_free_callback) {
5660 ss->free(task); 5839 ss->free(task);
5840 } while_each_subsys_mask();
5661 5841
5662 put_css_set(cset); 5842 put_css_set(cset);
5663} 5843}
@@ -5750,6 +5930,33 @@ static int __init cgroup_disable(char *str)
5750} 5930}
5751__setup("cgroup_disable=", cgroup_disable); 5931__setup("cgroup_disable=", cgroup_disable);
5752 5932
5933static int __init cgroup_no_v1(char *str)
5934{
5935 struct cgroup_subsys *ss;
5936 char *token;
5937 int i;
5938
5939 while ((token = strsep(&str, ",")) != NULL) {
5940 if (!*token)
5941 continue;
5942
5943 if (!strcmp(token, "all")) {
5944 cgroup_no_v1_mask = U16_MAX;
5945 break;
5946 }
5947
5948 for_each_subsys(ss, i) {
5949 if (strcmp(token, ss->name) &&
5950 strcmp(token, ss->legacy_name))
5951 continue;
5952
5953 cgroup_no_v1_mask |= 1 << i;
5954 }
5955 }
5956 return 1;
5957}
5958__setup("cgroup_no_v1=", cgroup_no_v1);
5959
5753/** 5960/**
5754 * css_tryget_online_from_dir - get corresponding css from a cgroup dentry 5961 * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
5755 * @dentry: directory dentry of interest 5962 * @dentry: directory dentry of interest
@@ -5763,12 +5970,13 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
5763 struct cgroup_subsys *ss) 5970 struct cgroup_subsys *ss)
5764{ 5971{
5765 struct kernfs_node *kn = kernfs_node_from_dentry(dentry); 5972 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
5973 struct file_system_type *s_type = dentry->d_sb->s_type;
5766 struct cgroup_subsys_state *css = NULL; 5974 struct cgroup_subsys_state *css = NULL;
5767 struct cgroup *cgrp; 5975 struct cgroup *cgrp;
5768 5976
5769 /* is @dentry a cgroup dir? */ 5977 /* is @dentry a cgroup dir? */
5770 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn || 5978 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
5771 kernfs_type(kn) != KERNFS_DIR) 5979 !kn || kernfs_type(kn) != KERNFS_DIR)
5772 return ERR_PTR(-EBADF); 5980 return ERR_PTR(-EBADF);
5773 5981
5774 rcu_read_lock(); 5982 rcu_read_lock();
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 41989ab4db57..90899837ea78 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2089,7 +2089,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
2089 .attach = cpuset_attach, 2089 .attach = cpuset_attach,
2090 .bind = cpuset_bind, 2090 .bind = cpuset_bind,
2091 .legacy_cftypes = files, 2091 .legacy_cftypes = files,
2092 .early_init = 1, 2092 .early_init = true,
2093}; 2093};
2094 2094
2095/** 2095/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4edecc15177d..4ee3ce7ec78d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8441,7 +8441,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
8441 .can_attach = cpu_cgroup_can_attach, 8441 .can_attach = cpu_cgroup_can_attach,
8442 .attach = cpu_cgroup_attach, 8442 .attach = cpu_cgroup_attach,
8443 .legacy_cftypes = cpu_files, 8443 .legacy_cftypes = cpu_files,
8444 .early_init = 1, 8444 .early_init = true,
8445}; 8445};
8446 8446
8447#endif /* CONFIG_CGROUP_SCHED */ 8447#endif /* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dd7cbb55bbf2..2ddaebf7469a 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -279,5 +279,5 @@ struct cgroup_subsys cpuacct_cgrp_subsys = {
279 .css_alloc = cpuacct_css_alloc, 279 .css_alloc = cpuacct_css_alloc,
280 .css_free = cpuacct_css_free, 280 .css_free = cpuacct_css_free,
281 .legacy_cftypes = files, 281 .legacy_cftypes = files,
282 .early_init = 1, 282 .early_init = true,
283}; 283};