diff options
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 14 | ||||
-rw-r--r-- | Documentation/cgroups/unified-hierarchy.txt | 35 | ||||
-rw-r--r-- | block/blk-cgroup.c | 13 | ||||
-rw-r--r-- | block/blk-throttle.c | 6 | ||||
-rw-r--r-- | include/linux/cgroup.h | 165 | ||||
-rw-r--r-- | kernel/cgroup.c | 453 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 500 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/sched/cpuacct.c | 2 | ||||
-rw-r--r-- | mm/hugetlb_cgroup.c | 5 | ||||
-rw-r--r-- | mm/memcontrol.c | 37 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 2 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 2 | ||||
-rw-r--r-- | security/device_cgroup.c | 2 |
16 files changed, 806 insertions, 436 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 821de56d1580..10c949b293e4 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -599,6 +599,20 @@ fork. If this method returns 0 (success) then this should remain valid | |||
599 | while the caller holds cgroup_mutex and it is ensured that either | 599 | while the caller holds cgroup_mutex and it is ensured that either |
600 | attach() or cancel_attach() will be called in future. | 600 | attach() or cancel_attach() will be called in future. |
601 | 601 | ||
602 | void css_reset(struct cgroup_subsys_state *css) | ||
603 | (cgroup_mutex held by caller) | ||
604 | |||
605 | An optional operation which should restore @css's configuration to the | ||
606 | initial state. This is currently only used on the unified hierarchy | ||
607 | when a subsystem is disabled on a cgroup through | ||
608 | "cgroup.subtree_control" but should remain enabled because other | ||
609 | subsystems depend on it. cgroup core makes such a css invisible by | ||
610 | removing the associated interface files and invokes this callback so | ||
611 | that the hidden subsystem can return to the initial neutral state. | ||
612 | This prevents unexpected resource control from a hidden css and | ||
613 | ensures that the configuration is in the initial state when it is made | ||
614 | visible again later. | ||
615 | |||
602 | void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 616 | void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
603 | (cgroup_mutex held by caller) | 617 | (cgroup_mutex held by caller) |
604 | 618 | ||
diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 324b182e6000..4f4563277864 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt | |||
@@ -94,12 +94,35 @@ change soon. | |||
94 | 94 | ||
95 | mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT | 95 | mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT |
96 | 96 | ||
97 | All controllers which are not bound to other hierarchies are | 97 | All controllers which support the unified hierarchy and are not bound |
98 | automatically bound to unified hierarchy and show up at the root of | 98 | to other hierarchies are automatically bound to unified hierarchy and |
99 | it. Controllers which are enabled only in the root of unified | 99 | show up at the root of it. Controllers which are enabled only in the |
100 | hierarchy can be bound to other hierarchies at any time. This allows | 100 | root of unified hierarchy can be bound to other hierarchies. This |
101 | mixing unified hierarchy with the traditional multiple hierarchies in | 101 | allows mixing unified hierarchy with the traditional multiple |
102 | a fully backward compatible way. | 102 | hierarchies in a fully backward compatible way. |
103 | |||
104 | For development purposes, the following boot parameter makes all | ||
105 | controllers to appear on the unified hierarchy whether supported or | ||
106 | not. | ||
107 | |||
108 | cgroup__DEVEL__legacy_files_on_dfl | ||
109 | |||
110 | A controller can be moved across hierarchies only after the controller | ||
111 | is no longer referenced in its current hierarchy. Because per-cgroup | ||
112 | controller states are destroyed asynchronously and controllers may | ||
113 | have lingering references, a controller may not show up immediately on | ||
114 | the unified hierarchy after the final umount of the previous | ||
115 | hierarchy. Similarly, a controller should be fully disabled to be | ||
116 | moved out of the unified hierarchy and it may take some time for the | ||
117 | disabled controller to become available for other hierarchies; | ||
118 | furthermore, due to dependencies among controllers, other controllers | ||
119 | may need to be disabled too. | ||
120 | |||
121 | While useful for development and manual configurations, dynamically | ||
122 | moving controllers between the unified and other hierarchies is | ||
123 | strongly discouraged for production use. It is recommended to decide | ||
124 | the hierarchies and controller associations before starting using the | ||
125 | controllers. | ||
103 | 126 | ||
104 | 127 | ||
105 | 2-2. cgroup.subtree_control | 128 | 2-2. cgroup.subtree_control |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 28d227c5ca77..e17da947f6bd 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -928,7 +928,15 @@ struct cgroup_subsys blkio_cgrp_subsys = { | |||
928 | .css_offline = blkcg_css_offline, | 928 | .css_offline = blkcg_css_offline, |
929 | .css_free = blkcg_css_free, | 929 | .css_free = blkcg_css_free, |
930 | .can_attach = blkcg_can_attach, | 930 | .can_attach = blkcg_can_attach, |
931 | .base_cftypes = blkcg_files, | 931 | .legacy_cftypes = blkcg_files, |
932 | #ifdef CONFIG_MEMCG | ||
933 | /* | ||
934 | * This ensures that, if available, memcg is automatically enabled | ||
935 | * together on the default hierarchy so that the owner cgroup can | ||
936 | * be retrieved from writeback pages. | ||
937 | */ | ||
938 | .depends_on = 1 << memory_cgrp_id, | ||
939 | #endif | ||
932 | }; | 940 | }; |
933 | EXPORT_SYMBOL_GPL(blkio_cgrp_subsys); | 941 | EXPORT_SYMBOL_GPL(blkio_cgrp_subsys); |
934 | 942 | ||
@@ -1120,7 +1128,8 @@ int blkcg_policy_register(struct blkcg_policy *pol) | |||
1120 | 1128 | ||
1121 | /* everything is in place, add intf files for the new policy */ | 1129 | /* everything is in place, add intf files for the new policy */ |
1122 | if (pol->cftypes) | 1130 | if (pol->cftypes) |
1123 | WARN_ON(cgroup_add_cftypes(&blkio_cgrp_subsys, pol->cftypes)); | 1131 | WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, |
1132 | pol->cftypes)); | ||
1124 | ret = 0; | 1133 | ret = 0; |
1125 | out_unlock: | 1134 | out_unlock: |
1126 | mutex_unlock(&blkcg_pol_mutex); | 1135 | mutex_unlock(&blkcg_pol_mutex); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3fdb21a390c1..9273d0969ebd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -412,13 +412,13 @@ static void throtl_pd_init(struct blkcg_gq *blkg) | |||
412 | int rw; | 412 | int rw; |
413 | 413 | ||
414 | /* | 414 | /* |
415 | * If sane_hierarchy is enabled, we switch to properly hierarchical | 415 | * If on the default hierarchy, we switch to properly hierarchical |
416 | * behavior where limits on a given throtl_grp are applied to the | 416 | * behavior where limits on a given throtl_grp are applied to the |
417 | * whole subtree rather than just the group itself. e.g. If 16M | 417 | * whole subtree rather than just the group itself. e.g. If 16M |
418 | * read_bps limit is set on the root group, the whole system can't | 418 | * read_bps limit is set on the root group, the whole system can't |
419 | * exceed 16M for the device. | 419 | * exceed 16M for the device. |
420 | * | 420 | * |
421 | * If sane_hierarchy is not enabled, the broken flat hierarchy | 421 | * If not on the default hierarchy, the broken flat hierarchy |
422 | * behavior is retained where all throtl_grps are treated as if | 422 | * behavior is retained where all throtl_grps are treated as if |
423 | * they're all separate root groups right below throtl_data. | 423 | * they're all separate root groups right below throtl_data. |
424 | * Limits of a group don't interact with limits of other groups | 424 | * Limits of a group don't interact with limits of other groups |
@@ -426,7 +426,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) | |||
426 | */ | 426 | */ |
427 | parent_sq = &td->service_queue; | 427 | parent_sq = &td->service_queue; |
428 | 428 | ||
429 | if (cgroup_sane_behavior(blkg->blkcg->css.cgroup) && blkg->parent) | 429 | if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent) |
430 | parent_sq = &blkg_to_tg(blkg->parent)->service_queue; | 430 | parent_sq = &blkg_to_tg(blkg->parent)->service_queue; |
431 | 431 | ||
432 | throtl_service_queue_init(&tg->service_queue, parent_sq); | 432 | throtl_service_queue_init(&tg->service_queue, parent_sq); |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8a111dd42d7a..b5223c570eba 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -203,7 +203,15 @@ struct cgroup { | |||
203 | struct kernfs_node *kn; /* cgroup kernfs entry */ | 203 | struct kernfs_node *kn; /* cgroup kernfs entry */ |
204 | struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ | 204 | struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ |
205 | 205 | ||
206 | /* the bitmask of subsystems enabled on the child cgroups */ | 206 | /* |
207 | * The bitmask of subsystems enabled on the child cgroups. | ||
208 | * ->subtree_control is the one configured through | ||
209 | * "cgroup.subtree_control" while ->child_subsys_mask is the | ||
210 | * effective one which may have more subsystems enabled. | ||
211 | * Controller knobs are made available iff it's enabled in | ||
212 | * ->subtree_control. | ||
213 | */ | ||
214 | unsigned int subtree_control; | ||
207 | unsigned int child_subsys_mask; | 215 | unsigned int child_subsys_mask; |
208 | 216 | ||
209 | /* Private pointers for each registered subsystem */ | 217 | /* Private pointers for each registered subsystem */ |
@@ -248,73 +256,9 @@ struct cgroup { | |||
248 | 256 | ||
249 | /* cgroup_root->flags */ | 257 | /* cgroup_root->flags */ |
250 | enum { | 258 | enum { |
251 | /* | 259 | CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ |
252 | * Unfortunately, cgroup core and various controllers are riddled | ||
253 | * with idiosyncrasies and pointless options. The following flag, | ||
254 | * when set, will force sane behavior - some options are forced on, | ||
255 | * others are disallowed, and some controllers will change their | ||
256 | * hierarchical or other behaviors. | ||
257 | * | ||
258 | * The set of behaviors affected by this flag are still being | ||
259 | * determined and developed and the mount option for this flag is | ||
260 | * prefixed with __DEVEL__. The prefix will be dropped once we | ||
261 | * reach the point where all behaviors are compatible with the | ||
262 | * planned unified hierarchy, which will automatically turn on this | ||
263 | * flag. | ||
264 | * | ||
265 | * The followings are the behaviors currently affected this flag. | ||
266 | * | ||
267 | * - Mount options "noprefix", "xattr", "clone_children", | ||
268 | * "release_agent" and "name" are disallowed. | ||
269 | * | ||
270 | * - When mounting an existing superblock, mount options should | ||
271 | * match. | ||
272 | * | ||
273 | * - Remount is disallowed. | ||
274 | * | ||
275 | * - rename(2) is disallowed. | ||
276 | * | ||
277 | * - "tasks" is removed. Everything should be at process | ||
278 | * granularity. Use "cgroup.procs" instead. | ||
279 | * | ||
280 | * - "cgroup.procs" is not sorted. pids will be unique unless they | ||
281 | * got recycled inbetween reads. | ||
282 | * | ||
283 | * - "release_agent" and "notify_on_release" are removed. | ||
284 | * Replacement notification mechanism will be implemented. | ||
285 | * | ||
286 | * - "cgroup.clone_children" is removed. | ||
287 | * | ||
288 | * - "cgroup.subtree_populated" is available. Its value is 0 if | ||
289 | * the cgroup and its descendants contain no task; otherwise, 1. | ||
290 | * The file also generates kernfs notification which can be | ||
291 | * monitored through poll and [di]notify when the value of the | ||
292 | * file changes. | ||
293 | * | ||
294 | * - If mount is requested with sane_behavior but without any | ||
295 | * subsystem, the default unified hierarchy is mounted. | ||
296 | * | ||
297 | * - cpuset: tasks will be kept in empty cpusets when hotplug happens | ||
298 | * and take masks of ancestors with non-empty cpus/mems, instead of | ||
299 | * being moved to an ancestor. | ||
300 | * | ||
301 | * - cpuset: a task can be moved into an empty cpuset, and again it | ||
302 | * takes masks of ancestors. | ||
303 | * | ||
304 | * - memcg: use_hierarchy is on by default and the cgroup file for | ||
305 | * the flag is not created. | ||
306 | * | ||
307 | * - blkcg: blk-throttle becomes properly hierarchical. | ||
308 | * | ||
309 | * - debug: disallowed on the default hierarchy. | ||
310 | */ | ||
311 | CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), | ||
312 | |||
313 | CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ | 260 | CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ |
314 | CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ | 261 | CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ |
315 | |||
316 | /* mount options live below bit 16 */ | ||
317 | CGRP_ROOT_OPTION_MASK = (1 << 16) - 1, | ||
318 | }; | 262 | }; |
319 | 263 | ||
320 | /* | 264 | /* |
@@ -440,9 +384,11 @@ struct css_set { | |||
440 | enum { | 384 | enum { |
441 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ | 385 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ |
442 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ | 386 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ |
443 | CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ | ||
444 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ | 387 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ |
445 | CFTYPE_ONLY_ON_DFL = (1 << 4), /* only on default hierarchy */ | 388 | |
389 | /* internal flags, do not use outside cgroup core proper */ | ||
390 | __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ | ||
391 | __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ | ||
446 | }; | 392 | }; |
447 | 393 | ||
448 | #define MAX_CFTYPE_NAME 64 | 394 | #define MAX_CFTYPE_NAME 64 |
@@ -526,20 +472,64 @@ struct cftype { | |||
526 | extern struct cgroup_root cgrp_dfl_root; | 472 | extern struct cgroup_root cgrp_dfl_root; |
527 | extern struct css_set init_css_set; | 473 | extern struct css_set init_css_set; |
528 | 474 | ||
475 | /** | ||
476 | * cgroup_on_dfl - test whether a cgroup is on the default hierarchy | ||
477 | * @cgrp: the cgroup of interest | ||
478 | * | ||
479 | * The default hierarchy is the v2 interface of cgroup and this function | ||
480 | * can be used to test whether a cgroup is on the default hierarchy for | ||
481 | * cases where a subsystem should behave differnetly depending on the | ||
482 | * interface version. | ||
483 | * | ||
484 | * The set of behaviors which change on the default hierarchy are still | ||
485 | * being determined and the mount option is prefixed with __DEVEL__. | ||
486 | * | ||
487 | * List of changed behaviors: | ||
488 | * | ||
489 | * - Mount options "noprefix", "xattr", "clone_children", "release_agent" | ||
490 | * and "name" are disallowed. | ||
491 | * | ||
492 | * - When mounting an existing superblock, mount options should match. | ||
493 | * | ||
494 | * - Remount is disallowed. | ||
495 | * | ||
496 | * - rename(2) is disallowed. | ||
497 | * | ||
498 | * - "tasks" is removed. Everything should be at process granularity. Use | ||
499 | * "cgroup.procs" instead. | ||
500 | * | ||
501 | * - "cgroup.procs" is not sorted. pids will be unique unless they got | ||
502 | * recycled inbetween reads. | ||
503 | * | ||
504 | * - "release_agent" and "notify_on_release" are removed. Replacement | ||
505 | * notification mechanism will be implemented. | ||
506 | * | ||
507 | * - "cgroup.clone_children" is removed. | ||
508 | * | ||
509 | * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup | ||
510 | * and its descendants contain no task; otherwise, 1. The file also | ||
511 | * generates kernfs notification which can be monitored through poll and | ||
512 | * [di]notify when the value of the file changes. | ||
513 | * | ||
514 | * - cpuset: tasks will be kept in empty cpusets when hotplug happens and | ||
515 | * take masks of ancestors with non-empty cpus/mems, instead of being | ||
516 | * moved to an ancestor. | ||
517 | * | ||
518 | * - cpuset: a task can be moved into an empty cpuset, and again it takes | ||
519 | * masks of ancestors. | ||
520 | * | ||
521 | * - memcg: use_hierarchy is on by default and the cgroup file for the flag | ||
522 | * is not created. | ||
523 | * | ||
524 | * - blkcg: blk-throttle becomes properly hierarchical. | ||
525 | * | ||
526 | * - debug: disallowed on the default hierarchy. | ||
527 | */ | ||
529 | static inline bool cgroup_on_dfl(const struct cgroup *cgrp) | 528 | static inline bool cgroup_on_dfl(const struct cgroup *cgrp) |
530 | { | 529 | { |
531 | return cgrp->root == &cgrp_dfl_root; | 530 | return cgrp->root == &cgrp_dfl_root; |
532 | } | 531 | } |
533 | 532 | ||
534 | /* | ||
535 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | ||
536 | * function can be called as long as @cgrp is accessible. | ||
537 | */ | ||
538 | static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) | ||
539 | { | ||
540 | return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; | ||
541 | } | ||
542 | |||
543 | /* no synchronization, the result can only be used as a hint */ | 533 | /* no synchronization, the result can only be used as a hint */ |
544 | static inline bool cgroup_has_tasks(struct cgroup *cgrp) | 534 | static inline bool cgroup_has_tasks(struct cgroup *cgrp) |
545 | { | 535 | { |
@@ -602,7 +592,8 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) | |||
602 | 592 | ||
603 | char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); | 593 | char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); |
604 | 594 | ||
605 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 595 | int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
596 | int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | ||
606 | int cgroup_rm_cftypes(struct cftype *cfts); | 597 | int cgroup_rm_cftypes(struct cftype *cfts); |
607 | 598 | ||
608 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); | 599 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); |
@@ -634,6 +625,7 @@ struct cgroup_subsys { | |||
634 | int (*css_online)(struct cgroup_subsys_state *css); | 625 | int (*css_online)(struct cgroup_subsys_state *css); |
635 | void (*css_offline)(struct cgroup_subsys_state *css); | 626 | void (*css_offline)(struct cgroup_subsys_state *css); |
636 | void (*css_free)(struct cgroup_subsys_state *css); | 627 | void (*css_free)(struct cgroup_subsys_state *css); |
628 | void (*css_reset)(struct cgroup_subsys_state *css); | ||
637 | 629 | ||
638 | int (*can_attach)(struct cgroup_subsys_state *css, | 630 | int (*can_attach)(struct cgroup_subsys_state *css, |
639 | struct cgroup_taskset *tset); | 631 | struct cgroup_taskset *tset); |
@@ -682,8 +674,21 @@ struct cgroup_subsys { | |||
682 | */ | 674 | */ |
683 | struct list_head cfts; | 675 | struct list_head cfts; |
684 | 676 | ||
685 | /* base cftypes, automatically registered with subsys itself */ | 677 | /* |
686 | struct cftype *base_cftypes; | 678 | * Base cftypes which are automatically registered. The two can |
679 | * point to the same array. | ||
680 | */ | ||
681 | struct cftype *dfl_cftypes; /* for the default hierarchy */ | ||
682 | struct cftype *legacy_cftypes; /* for the legacy hierarchies */ | ||
683 | |||
684 | /* | ||
685 | * A subsystem may depend on other subsystems. When such subsystem | ||
686 | * is enabled on a cgroup, the depended-upon subsystems are enabled | ||
687 | * together if available. Subsystems enabled due to dependency are | ||
688 | * not visible to userland until explicitly enabled. The following | ||
689 | * specifies the mask of subsystems that this one depends on. | ||
690 | */ | ||
691 | unsigned int depends_on; | ||
687 | }; | 692 | }; |
688 | 693 | ||
689 | #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; | 694 | #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aad41f06901b..7dc8788cfd52 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -149,12 +149,14 @@ struct cgroup_root cgrp_dfl_root; | |||
149 | */ | 149 | */ |
150 | static bool cgrp_dfl_root_visible; | 150 | static bool cgrp_dfl_root_visible; |
151 | 151 | ||
152 | /* | ||
153 | * Set by the boot param of the same name and makes subsystems with NULL | ||
154 | * ->dfl_files to use ->legacy_files on the default hierarchy. | ||
155 | */ | ||
156 | static bool cgroup_legacy_files_on_dfl; | ||
157 | |||
152 | /* some controllers are not supported in the default hierarchy */ | 158 | /* some controllers are not supported in the default hierarchy */ |
153 | static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0 | 159 | static unsigned int cgrp_dfl_root_inhibit_ss_mask; |
154 | #ifdef CONFIG_CGROUP_DEBUG | ||
155 | | (1 << debug_cgrp_id) | ||
156 | #endif | ||
157 | ; | ||
158 | 160 | ||
159 | /* The list of hierarchy roots */ | 161 | /* The list of hierarchy roots */ |
160 | 162 | ||
@@ -180,13 +182,15 @@ static u64 css_serial_nr_next = 1; | |||
180 | */ | 182 | */ |
181 | static int need_forkexit_callback __read_mostly; | 183 | static int need_forkexit_callback __read_mostly; |
182 | 184 | ||
183 | static struct cftype cgroup_base_files[]; | 185 | static struct cftype cgroup_dfl_base_files[]; |
186 | static struct cftype cgroup_legacy_base_files[]; | ||
184 | 187 | ||
185 | static void cgroup_put(struct cgroup *cgrp); | 188 | static void cgroup_put(struct cgroup *cgrp); |
186 | static int rebind_subsystems(struct cgroup_root *dst_root, | 189 | static int rebind_subsystems(struct cgroup_root *dst_root, |
187 | unsigned int ss_mask); | 190 | unsigned int ss_mask); |
188 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 191 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
189 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); | 192 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, |
193 | bool visible); | ||
190 | static void css_release(struct percpu_ref *ref); | 194 | static void css_release(struct percpu_ref *ref); |
191 | static void kill_css(struct cgroup_subsys_state *css); | 195 | static void kill_css(struct cgroup_subsys_state *css); |
192 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 196 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
@@ -1037,6 +1041,58 @@ static void cgroup_put(struct cgroup *cgrp) | |||
1037 | } | 1041 | } |
1038 | 1042 | ||
1039 | /** | 1043 | /** |
1044 | * cgroup_refresh_child_subsys_mask - update child_subsys_mask | ||
1045 | * @cgrp: the target cgroup | ||
1046 | * | ||
1047 | * On the default hierarchy, a subsystem may request other subsystems to be | ||
1048 | * enabled together through its ->depends_on mask. In such cases, more | ||
1049 | * subsystems than specified in "cgroup.subtree_control" may be enabled. | ||
1050 | * | ||
1051 | * This function determines which subsystems need to be enabled given the | ||
1052 | * current @cgrp->subtree_control and records it in | ||
1053 | * @cgrp->child_subsys_mask. The resulting mask is always a superset of | ||
1054 | * @cgrp->subtree_control and follows the usual hierarchy rules. | ||
1055 | */ | ||
1056 | static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) | ||
1057 | { | ||
1058 | struct cgroup *parent = cgroup_parent(cgrp); | ||
1059 | unsigned int cur_ss_mask = cgrp->subtree_control; | ||
1060 | struct cgroup_subsys *ss; | ||
1061 | int ssid; | ||
1062 | |||
1063 | lockdep_assert_held(&cgroup_mutex); | ||
1064 | |||
1065 | if (!cgroup_on_dfl(cgrp)) { | ||
1066 | cgrp->child_subsys_mask = cur_ss_mask; | ||
1067 | return; | ||
1068 | } | ||
1069 | |||
1070 | while (true) { | ||
1071 | unsigned int new_ss_mask = cur_ss_mask; | ||
1072 | |||
1073 | for_each_subsys(ss, ssid) | ||
1074 | if (cur_ss_mask & (1 << ssid)) | ||
1075 | new_ss_mask |= ss->depends_on; | ||
1076 | |||
1077 | /* | ||
1078 | * Mask out subsystems which aren't available. This can | ||
1079 | * happen only if some depended-upon subsystems were bound | ||
1080 | * to non-default hierarchies. | ||
1081 | */ | ||
1082 | if (parent) | ||
1083 | new_ss_mask &= parent->child_subsys_mask; | ||
1084 | else | ||
1085 | new_ss_mask &= cgrp->root->subsys_mask; | ||
1086 | |||
1087 | if (new_ss_mask == cur_ss_mask) | ||
1088 | break; | ||
1089 | cur_ss_mask = new_ss_mask; | ||
1090 | } | ||
1091 | |||
1092 | cgrp->child_subsys_mask = cur_ss_mask; | ||
1093 | } | ||
1094 | |||
1095 | /** | ||
1040 | * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods | 1096 | * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods |
1041 | * @kn: the kernfs_node being serviced | 1097 | * @kn: the kernfs_node being serviced |
1042 | * | 1098 | * |
@@ -1208,12 +1264,15 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) | |||
1208 | up_write(&css_set_rwsem); | 1264 | up_write(&css_set_rwsem); |
1209 | 1265 | ||
1210 | src_root->subsys_mask &= ~(1 << ssid); | 1266 | src_root->subsys_mask &= ~(1 << ssid); |
1211 | src_root->cgrp.child_subsys_mask &= ~(1 << ssid); | 1267 | src_root->cgrp.subtree_control &= ~(1 << ssid); |
1268 | cgroup_refresh_child_subsys_mask(&src_root->cgrp); | ||
1212 | 1269 | ||
1213 | /* default hierarchy doesn't enable controllers by default */ | 1270 | /* default hierarchy doesn't enable controllers by default */ |
1214 | dst_root->subsys_mask |= 1 << ssid; | 1271 | dst_root->subsys_mask |= 1 << ssid; |
1215 | if (dst_root != &cgrp_dfl_root) | 1272 | if (dst_root != &cgrp_dfl_root) { |
1216 | dst_root->cgrp.child_subsys_mask |= 1 << ssid; | 1273 | dst_root->cgrp.subtree_control |= 1 << ssid; |
1274 | cgroup_refresh_child_subsys_mask(&dst_root->cgrp); | ||
1275 | } | ||
1217 | 1276 | ||
1218 | if (ss->bind) | 1277 | if (ss->bind) |
1219 | ss->bind(css); | 1278 | ss->bind(css); |
@@ -1233,8 +1292,6 @@ static int cgroup_show_options(struct seq_file *seq, | |||
1233 | for_each_subsys(ss, ssid) | 1292 | for_each_subsys(ss, ssid) |
1234 | if (root->subsys_mask & (1 << ssid)) | 1293 | if (root->subsys_mask & (1 << ssid)) |
1235 | seq_printf(seq, ",%s", ss->name); | 1294 | seq_printf(seq, ",%s", ss->name); |
1236 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) | ||
1237 | seq_puts(seq, ",sane_behavior"); | ||
1238 | if (root->flags & CGRP_ROOT_NOPREFIX) | 1295 | if (root->flags & CGRP_ROOT_NOPREFIX) |
1239 | seq_puts(seq, ",noprefix"); | 1296 | seq_puts(seq, ",noprefix"); |
1240 | if (root->flags & CGRP_ROOT_XATTR) | 1297 | if (root->flags & CGRP_ROOT_XATTR) |
@@ -1268,6 +1325,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1268 | bool all_ss = false, one_ss = false; | 1325 | bool all_ss = false, one_ss = false; |
1269 | unsigned int mask = -1U; | 1326 | unsigned int mask = -1U; |
1270 | struct cgroup_subsys *ss; | 1327 | struct cgroup_subsys *ss; |
1328 | int nr_opts = 0; | ||
1271 | int i; | 1329 | int i; |
1272 | 1330 | ||
1273 | #ifdef CONFIG_CPUSETS | 1331 | #ifdef CONFIG_CPUSETS |
@@ -1277,6 +1335,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1277 | memset(opts, 0, sizeof(*opts)); | 1335 | memset(opts, 0, sizeof(*opts)); |
1278 | 1336 | ||
1279 | while ((token = strsep(&o, ",")) != NULL) { | 1337 | while ((token = strsep(&o, ",")) != NULL) { |
1338 | nr_opts++; | ||
1339 | |||
1280 | if (!*token) | 1340 | if (!*token) |
1281 | return -EINVAL; | 1341 | return -EINVAL; |
1282 | if (!strcmp(token, "none")) { | 1342 | if (!strcmp(token, "none")) { |
@@ -1361,37 +1421,33 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1361 | return -ENOENT; | 1421 | return -ENOENT; |
1362 | } | 1422 | } |
1363 | 1423 | ||
1364 | /* Consistency checks */ | ||
1365 | |||
1366 | if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { | 1424 | if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { |
1367 | pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); | 1425 | pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); |
1368 | 1426 | if (nr_opts != 1) { | |
1369 | if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) || | 1427 | pr_err("sane_behavior: no other mount options allowed\n"); |
1370 | opts->cpuset_clone_children || opts->release_agent || | ||
1371 | opts->name) { | ||
1372 | pr_err("sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n"); | ||
1373 | return -EINVAL; | 1428 | return -EINVAL; |
1374 | } | 1429 | } |
1375 | } else { | 1430 | return 0; |
1376 | /* | ||
1377 | * If the 'all' option was specified select all the | ||
1378 | * subsystems, otherwise if 'none', 'name=' and a subsystem | ||
1379 | * name options were not specified, let's default to 'all' | ||
1380 | */ | ||
1381 | if (all_ss || (!one_ss && !opts->none && !opts->name)) | ||
1382 | for_each_subsys(ss, i) | ||
1383 | if (!ss->disabled) | ||
1384 | opts->subsys_mask |= (1 << i); | ||
1385 | |||
1386 | /* | ||
1387 | * We either have to specify by name or by subsystems. (So | ||
1388 | * all empty hierarchies must have a name). | ||
1389 | */ | ||
1390 | if (!opts->subsys_mask && !opts->name) | ||
1391 | return -EINVAL; | ||
1392 | } | 1431 | } |
1393 | 1432 | ||
1394 | /* | 1433 | /* |
1434 | * If the 'all' option was specified select all the subsystems, | ||
1435 | * otherwise if 'none', 'name=' and a subsystem name options were | ||
1436 | * not specified, let's default to 'all' | ||
1437 | */ | ||
1438 | if (all_ss || (!one_ss && !opts->none && !opts->name)) | ||
1439 | for_each_subsys(ss, i) | ||
1440 | if (!ss->disabled) | ||
1441 | opts->subsys_mask |= (1 << i); | ||
1442 | |||
1443 | /* | ||
1444 | * We either have to specify by name or by subsystems. (So all | ||
1445 | * empty hierarchies must have a name). | ||
1446 | */ | ||
1447 | if (!opts->subsys_mask && !opts->name) | ||
1448 | return -EINVAL; | ||
1449 | |||
1450 | /* | ||
1395 | * Option noprefix was introduced just for backward compatibility | 1451 | * Option noprefix was introduced just for backward compatibility |
1396 | * with the old cpuset, so we allow noprefix only if mounting just | 1452 | * with the old cpuset, so we allow noprefix only if mounting just |
1397 | * the cpuset subsystem. | 1453 | * the cpuset subsystem. |
@@ -1399,7 +1455,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1399 | if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) | 1455 | if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) |
1400 | return -EINVAL; | 1456 | return -EINVAL; |
1401 | 1457 | ||
1402 | |||
1403 | /* Can't specify "none" and some subsystems */ | 1458 | /* Can't specify "none" and some subsystems */ |
1404 | if (opts->subsys_mask && opts->none) | 1459 | if (opts->subsys_mask && opts->none) |
1405 | return -EINVAL; | 1460 | return -EINVAL; |
@@ -1414,8 +1469,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) | |||
1414 | struct cgroup_sb_opts opts; | 1469 | struct cgroup_sb_opts opts; |
1415 | unsigned int added_mask, removed_mask; | 1470 | unsigned int added_mask, removed_mask; |
1416 | 1471 | ||
1417 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { | 1472 | if (root == &cgrp_dfl_root) { |
1418 | pr_err("sane_behavior: remount is not allowed\n"); | 1473 | pr_err("remount is not allowed\n"); |
1419 | return -EINVAL; | 1474 | return -EINVAL; |
1420 | } | 1475 | } |
1421 | 1476 | ||
@@ -1434,11 +1489,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) | |||
1434 | removed_mask = root->subsys_mask & ~opts.subsys_mask; | 1489 | removed_mask = root->subsys_mask & ~opts.subsys_mask; |
1435 | 1490 | ||
1436 | /* Don't allow flags or name to change at remount */ | 1491 | /* Don't allow flags or name to change at remount */ |
1437 | if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || | 1492 | if ((opts.flags ^ root->flags) || |
1438 | (opts.name && strcmp(opts.name, root->name))) { | 1493 | (opts.name && strcmp(opts.name, root->name))) { |
1439 | pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", | 1494 | pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", |
1440 | opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "", | 1495 | opts.flags, opts.name ?: "", root->flags, root->name); |
1441 | root->flags & CGRP_ROOT_OPTION_MASK, root->name); | ||
1442 | ret = -EINVAL; | 1496 | ret = -EINVAL; |
1443 | goto out_unlock; | 1497 | goto out_unlock; |
1444 | } | 1498 | } |
@@ -1563,6 +1617,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) | |||
1563 | { | 1617 | { |
1564 | LIST_HEAD(tmp_links); | 1618 | LIST_HEAD(tmp_links); |
1565 | struct cgroup *root_cgrp = &root->cgrp; | 1619 | struct cgroup *root_cgrp = &root->cgrp; |
1620 | struct cftype *base_files; | ||
1566 | struct css_set *cset; | 1621 | struct css_set *cset; |
1567 | int i, ret; | 1622 | int i, ret; |
1568 | 1623 | ||
@@ -1600,7 +1655,12 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) | |||
1600 | } | 1655 | } |
1601 | root_cgrp->kn = root->kf_root->kn; | 1656 | root_cgrp->kn = root->kf_root->kn; |
1602 | 1657 | ||
1603 | ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true); | 1658 | if (root == &cgrp_dfl_root) |
1659 | base_files = cgroup_dfl_base_files; | ||
1660 | else | ||
1661 | base_files = cgroup_legacy_base_files; | ||
1662 | |||
1663 | ret = cgroup_addrm_files(root_cgrp, base_files, true); | ||
1604 | if (ret) | 1664 | if (ret) |
1605 | goto destroy_root; | 1665 | goto destroy_root; |
1606 | 1666 | ||
@@ -1672,7 +1732,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1672 | goto out_unlock; | 1732 | goto out_unlock; |
1673 | 1733 | ||
1674 | /* look for a matching existing root */ | 1734 | /* look for a matching existing root */ |
1675 | if (!opts.subsys_mask && !opts.none && !opts.name) { | 1735 | if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) { |
1676 | cgrp_dfl_root_visible = true; | 1736 | cgrp_dfl_root_visible = true; |
1677 | root = &cgrp_dfl_root; | 1737 | root = &cgrp_dfl_root; |
1678 | cgroup_get(&root->cgrp); | 1738 | cgroup_get(&root->cgrp); |
@@ -1730,15 +1790,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1730 | goto out_unlock; | 1790 | goto out_unlock; |
1731 | } | 1791 | } |
1732 | 1792 | ||
1733 | if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) { | 1793 | if (root->flags ^ opts.flags) |
1734 | if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { | 1794 | pr_warn("new mount options do not match the existing superblock, will be ignored\n"); |
1735 | pr_err("sane_behavior: new mount options should match the existing superblock\n"); | ||
1736 | ret = -EINVAL; | ||
1737 | goto out_unlock; | ||
1738 | } else { | ||
1739 | pr_warn("new mount options do not match the existing superblock, will be ignored\n"); | ||
1740 | } | ||
1741 | } | ||
1742 | 1795 | ||
1743 | /* | 1796 | /* |
1744 | * We want to reuse @root whose lifetime is governed by its | 1797 | * We want to reuse @root whose lifetime is governed by its |
@@ -2457,9 +2510,7 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v) | |||
2457 | 2510 | ||
2458 | static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) | 2511 | static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) |
2459 | { | 2512 | { |
2460 | struct cgroup *cgrp = seq_css(seq)->cgroup; | 2513 | seq_puts(seq, "0\n"); |
2461 | |||
2462 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | ||
2463 | return 0; | 2514 | return 0; |
2464 | } | 2515 | } |
2465 | 2516 | ||
@@ -2496,7 +2547,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v) | |||
2496 | { | 2547 | { |
2497 | struct cgroup *cgrp = seq_css(seq)->cgroup; | 2548 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
2498 | 2549 | ||
2499 | cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask); | 2550 | cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control); |
2500 | return 0; | 2551 | return 0; |
2501 | } | 2552 | } |
2502 | 2553 | ||
@@ -2505,7 +2556,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) | |||
2505 | { | 2556 | { |
2506 | struct cgroup *cgrp = seq_css(seq)->cgroup; | 2557 | struct cgroup *cgrp = seq_css(seq)->cgroup; |
2507 | 2558 | ||
2508 | cgroup_print_ss_mask(seq, cgrp->child_subsys_mask); | 2559 | cgroup_print_ss_mask(seq, cgrp->subtree_control); |
2509 | return 0; | 2560 | return 0; |
2510 | } | 2561 | } |
2511 | 2562 | ||
@@ -2611,6 +2662,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2611 | loff_t off) | 2662 | loff_t off) |
2612 | { | 2663 | { |
2613 | unsigned int enable = 0, disable = 0; | 2664 | unsigned int enable = 0, disable = 0; |
2665 | unsigned int css_enable, css_disable, old_ctrl, new_ctrl; | ||
2614 | struct cgroup *cgrp, *child; | 2666 | struct cgroup *cgrp, *child; |
2615 | struct cgroup_subsys *ss; | 2667 | struct cgroup_subsys *ss; |
2616 | char *tok; | 2668 | char *tok; |
@@ -2650,11 +2702,26 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2650 | 2702 | ||
2651 | for_each_subsys(ss, ssid) { | 2703 | for_each_subsys(ss, ssid) { |
2652 | if (enable & (1 << ssid)) { | 2704 | if (enable & (1 << ssid)) { |
2653 | if (cgrp->child_subsys_mask & (1 << ssid)) { | 2705 | if (cgrp->subtree_control & (1 << ssid)) { |
2654 | enable &= ~(1 << ssid); | 2706 | enable &= ~(1 << ssid); |
2655 | continue; | 2707 | continue; |
2656 | } | 2708 | } |
2657 | 2709 | ||
2710 | /* unavailable or not enabled on the parent? */ | ||
2711 | if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || | ||
2712 | (cgroup_parent(cgrp) && | ||
2713 | !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) { | ||
2714 | ret = -ENOENT; | ||
2715 | goto out_unlock; | ||
2716 | } | ||
2717 | |||
2718 | /* | ||
2719 | * @ss is already enabled through dependency and | ||
2720 | * we'll just make it visible. Skip draining. | ||
2721 | */ | ||
2722 | if (cgrp->child_subsys_mask & (1 << ssid)) | ||
2723 | continue; | ||
2724 | |||
2658 | /* | 2725 | /* |
2659 | * Because css offlining is asynchronous, userland | 2726 | * Because css offlining is asynchronous, userland |
2660 | * might try to re-enable the same controller while | 2727 | * might try to re-enable the same controller while |
@@ -2677,23 +2744,15 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2677 | 2744 | ||
2678 | return restart_syscall(); | 2745 | return restart_syscall(); |
2679 | } | 2746 | } |
2680 | |||
2681 | /* unavailable or not enabled on the parent? */ | ||
2682 | if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || | ||
2683 | (cgroup_parent(cgrp) && | ||
2684 | !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) { | ||
2685 | ret = -ENOENT; | ||
2686 | goto out_unlock; | ||
2687 | } | ||
2688 | } else if (disable & (1 << ssid)) { | 2747 | } else if (disable & (1 << ssid)) { |
2689 | if (!(cgrp->child_subsys_mask & (1 << ssid))) { | 2748 | if (!(cgrp->subtree_control & (1 << ssid))) { |
2690 | disable &= ~(1 << ssid); | 2749 | disable &= ~(1 << ssid); |
2691 | continue; | 2750 | continue; |
2692 | } | 2751 | } |
2693 | 2752 | ||
2694 | /* a child has it enabled? */ | 2753 | /* a child has it enabled? */ |
2695 | cgroup_for_each_live_child(child, cgrp) { | 2754 | cgroup_for_each_live_child(child, cgrp) { |
2696 | if (child->child_subsys_mask & (1 << ssid)) { | 2755 | if (child->subtree_control & (1 << ssid)) { |
2697 | ret = -EBUSY; | 2756 | ret = -EBUSY; |
2698 | goto out_unlock; | 2757 | goto out_unlock; |
2699 | } | 2758 | } |
@@ -2707,7 +2766,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2707 | } | 2766 | } |
2708 | 2767 | ||
2709 | /* | 2768 | /* |
2710 | * Except for the root, child_subsys_mask must be zero for a cgroup | 2769 | * Except for the root, subtree_control must be zero for a cgroup |
2711 | * with tasks so that child cgroups don't compete against tasks. | 2770 | * with tasks so that child cgroups don't compete against tasks. |
2712 | */ | 2771 | */ |
2713 | if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { | 2772 | if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { |
@@ -2716,36 +2775,75 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2716 | } | 2775 | } |
2717 | 2776 | ||
2718 | /* | 2777 | /* |
2719 | * Create csses for enables and update child_subsys_mask. This | 2778 | * Update subsys masks and calculate what needs to be done. More |
2720 | * changes cgroup_e_css() results which in turn makes the | 2779 | * subsystems than specified may need to be enabled or disabled |
2721 | * subsequent cgroup_update_dfl_csses() associate all tasks in the | 2780 | * depending on subsystem dependencies. |
2722 | * subtree to the updated csses. | 2781 | */ |
2782 | cgrp->subtree_control |= enable; | ||
2783 | cgrp->subtree_control &= ~disable; | ||
2784 | |||
2785 | old_ctrl = cgrp->child_subsys_mask; | ||
2786 | cgroup_refresh_child_subsys_mask(cgrp); | ||
2787 | new_ctrl = cgrp->child_subsys_mask; | ||
2788 | |||
2789 | css_enable = ~old_ctrl & new_ctrl; | ||
2790 | css_disable = old_ctrl & ~new_ctrl; | ||
2791 | enable |= css_enable; | ||
2792 | disable |= css_disable; | ||
2793 | |||
2794 | /* | ||
2795 | * Create new csses or make the existing ones visible. A css is | ||
2796 | * created invisible if it's being implicitly enabled through | ||
2797 | * dependency. An invisible css is made visible when the userland | ||
2798 | * explicitly enables it. | ||
2723 | */ | 2799 | */ |
2724 | for_each_subsys(ss, ssid) { | 2800 | for_each_subsys(ss, ssid) { |
2725 | if (!(enable & (1 << ssid))) | 2801 | if (!(enable & (1 << ssid))) |
2726 | continue; | 2802 | continue; |
2727 | 2803 | ||
2728 | cgroup_for_each_live_child(child, cgrp) { | 2804 | cgroup_for_each_live_child(child, cgrp) { |
2729 | ret = create_css(child, ss); | 2805 | if (css_enable & (1 << ssid)) |
2806 | ret = create_css(child, ss, | ||
2807 | cgrp->subtree_control & (1 << ssid)); | ||
2808 | else | ||
2809 | ret = cgroup_populate_dir(child, 1 << ssid); | ||
2730 | if (ret) | 2810 | if (ret) |
2731 | goto err_undo_css; | 2811 | goto err_undo_css; |
2732 | } | 2812 | } |
2733 | } | 2813 | } |
2734 | 2814 | ||
2735 | cgrp->child_subsys_mask |= enable; | 2815 | /* |
2736 | cgrp->child_subsys_mask &= ~disable; | 2816 | * At this point, cgroup_e_css() results reflect the new csses |
2737 | 2817 | * making the following cgroup_update_dfl_csses() properly update | |
2818 | * css associations of all tasks in the subtree. | ||
2819 | */ | ||
2738 | ret = cgroup_update_dfl_csses(cgrp); | 2820 | ret = cgroup_update_dfl_csses(cgrp); |
2739 | if (ret) | 2821 | if (ret) |
2740 | goto err_undo_css; | 2822 | goto err_undo_css; |
2741 | 2823 | ||
2742 | /* all tasks are now migrated away from the old csses, kill them */ | 2824 | /* |
2825 | * All tasks are migrated out of disabled csses. Kill or hide | ||
2826 | * them. A css is hidden when the userland requests it to be | ||
2827 | * disabled while other subsystems are still depending on it. The | ||
2828 | * css must not actively control resources and be in the vanilla | ||
2829 | * state if it's made visible again later. Controllers which may | ||
2830 | * be depended upon should provide ->css_reset() for this purpose. | ||
2831 | */ | ||
2743 | for_each_subsys(ss, ssid) { | 2832 | for_each_subsys(ss, ssid) { |
2744 | if (!(disable & (1 << ssid))) | 2833 | if (!(disable & (1 << ssid))) |
2745 | continue; | 2834 | continue; |
2746 | 2835 | ||
2747 | cgroup_for_each_live_child(child, cgrp) | 2836 | cgroup_for_each_live_child(child, cgrp) { |
2748 | kill_css(cgroup_css(child, ss)); | 2837 | struct cgroup_subsys_state *css = cgroup_css(child, ss); |
2838 | |||
2839 | if (css_disable & (1 << ssid)) { | ||
2840 | kill_css(css); | ||
2841 | } else { | ||
2842 | cgroup_clear_dir(child, 1 << ssid); | ||
2843 | if (ss->css_reset) | ||
2844 | ss->css_reset(css); | ||
2845 | } | ||
2846 | } | ||
2749 | } | 2847 | } |
2750 | 2848 | ||
2751 | kernfs_activate(cgrp->kn); | 2849 | kernfs_activate(cgrp->kn); |
@@ -2755,8 +2853,9 @@ out_unlock: | |||
2755 | return ret ?: nbytes; | 2853 | return ret ?: nbytes; |
2756 | 2854 | ||
2757 | err_undo_css: | 2855 | err_undo_css: |
2758 | cgrp->child_subsys_mask &= ~enable; | 2856 | cgrp->subtree_control &= ~enable; |
2759 | cgrp->child_subsys_mask |= disable; | 2857 | cgrp->subtree_control |= disable; |
2858 | cgroup_refresh_child_subsys_mask(cgrp); | ||
2760 | 2859 | ||
2761 | for_each_subsys(ss, ssid) { | 2860 | for_each_subsys(ss, ssid) { |
2762 | if (!(enable & (1 << ssid))) | 2861 | if (!(enable & (1 << ssid))) |
@@ -2764,8 +2863,14 @@ err_undo_css: | |||
2764 | 2863 | ||
2765 | cgroup_for_each_live_child(child, cgrp) { | 2864 | cgroup_for_each_live_child(child, cgrp) { |
2766 | struct cgroup_subsys_state *css = cgroup_css(child, ss); | 2865 | struct cgroup_subsys_state *css = cgroup_css(child, ss); |
2767 | if (css) | 2866 | |
2867 | if (!css) | ||
2868 | continue; | ||
2869 | |||
2870 | if (css_enable & (1 << ssid)) | ||
2768 | kill_css(css); | 2871 | kill_css(css); |
2872 | else | ||
2873 | cgroup_clear_dir(child, 1 << ssid); | ||
2769 | } | 2874 | } |
2770 | } | 2875 | } |
2771 | goto out_unlock; | 2876 | goto out_unlock; |
@@ -2878,9 +2983,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
2878 | 2983 | ||
2879 | /* | 2984 | /* |
2880 | * This isn't a proper migration and its usefulness is very | 2985 | * This isn't a proper migration and its usefulness is very |
2881 | * limited. Disallow if sane_behavior. | 2986 | * limited. Disallow on the default hierarchy. |
2882 | */ | 2987 | */ |
2883 | if (cgroup_sane_behavior(cgrp)) | 2988 | if (cgroup_on_dfl(cgrp)) |
2884 | return -EPERM; | 2989 | return -EPERM; |
2885 | 2990 | ||
2886 | /* | 2991 | /* |
@@ -2964,9 +3069,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | |||
2964 | 3069 | ||
2965 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 3070 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
2966 | /* does cft->flags tell us to skip this file on @cgrp? */ | 3071 | /* does cft->flags tell us to skip this file on @cgrp? */ |
2967 | if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) | 3072 | if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) |
2968 | continue; | 3073 | continue; |
2969 | if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) | 3074 | if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) |
2970 | continue; | 3075 | continue; |
2971 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) | 3076 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) |
2972 | continue; | 3077 | continue; |
@@ -3024,6 +3129,9 @@ static void cgroup_exit_cftypes(struct cftype *cfts) | |||
3024 | kfree(cft->kf_ops); | 3129 | kfree(cft->kf_ops); |
3025 | cft->kf_ops = NULL; | 3130 | cft->kf_ops = NULL; |
3026 | cft->ss = NULL; | 3131 | cft->ss = NULL; |
3132 | |||
3133 | /* revert flags set by cgroup core while adding @cfts */ | ||
3134 | cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL); | ||
3027 | } | 3135 | } |
3028 | } | 3136 | } |
3029 | 3137 | ||
@@ -3109,7 +3217,7 @@ int cgroup_rm_cftypes(struct cftype *cfts) | |||
3109 | * function currently returns 0 as long as @cfts registration is successful | 3217 | * function currently returns 0 as long as @cfts registration is successful |
3110 | * even if some file creation attempts on existing cgroups fail. | 3218 | * even if some file creation attempts on existing cgroups fail. |
3111 | */ | 3219 | */ |
3112 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | 3220 | static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
3113 | { | 3221 | { |
3114 | int ret; | 3222 | int ret; |
3115 | 3223 | ||
@@ -3135,6 +3243,40 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | |||
3135 | } | 3243 | } |
3136 | 3244 | ||
3137 | /** | 3245 | /** |
3246 | * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy | ||
3247 | * @ss: target cgroup subsystem | ||
3248 | * @cfts: zero-length name terminated array of cftypes | ||
3249 | * | ||
3250 | * Similar to cgroup_add_cftypes() but the added files are only used for | ||
3251 | * the default hierarchy. | ||
3252 | */ | ||
3253 | int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | ||
3254 | { | ||
3255 | struct cftype *cft; | ||
3256 | |||
3257 | for (cft = cfts; cft && cft->name[0] != '\0'; cft++) | ||
3258 | cft->flags |= __CFTYPE_ONLY_ON_DFL; | ||
3259 | return cgroup_add_cftypes(ss, cfts); | ||
3260 | } | ||
3261 | |||
3262 | /** | ||
3263 | * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies | ||
3264 | * @ss: target cgroup subsystem | ||
3265 | * @cfts: zero-length name terminated array of cftypes | ||
3266 | * | ||
3267 | * Similar to cgroup_add_cftypes() but the added files are only used for | ||
3268 | * the legacy hierarchies. | ||
3269 | */ | ||
3270 | int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | ||
3271 | { | ||
3272 | struct cftype *cft; | ||
3273 | |||
3274 | for (cft = cfts; cft && cft->name[0] != '\0'; cft++) | ||
3275 | cft->flags |= __CFTYPE_NOT_ON_DFL; | ||
3276 | return cgroup_add_cftypes(ss, cfts); | ||
3277 | } | ||
3278 | |||
3279 | /** | ||
3138 | * cgroup_task_count - count the number of tasks in a cgroup. | 3280 | * cgroup_task_count - count the number of tasks in a cgroup. |
3139 | * @cgrp: the cgroup in question | 3281 | * @cgrp: the cgroup in question |
3140 | * | 3282 | * |
@@ -3699,8 +3841,9 @@ after: | |||
3699 | * | 3841 | * |
3700 | * All this extra complexity was caused by the original implementation | 3842 | * All this extra complexity was caused by the original implementation |
3701 | * committing to an entirely unnecessary property. In the long term, we | 3843 | * committing to an entirely unnecessary property. In the long term, we |
3702 | * want to do away with it. Explicitly scramble sort order if | 3844 | * want to do away with it. Explicitly scramble sort order if on the |
3703 | * sane_behavior so that no such expectation exists in the new interface. | 3845 | * default hierarchy so that no such expectation exists in the new |
3846 | * interface. | ||
3704 | * | 3847 | * |
3705 | * Scrambling is done by swapping every two consecutive bits, which is | 3848 | * Scrambling is done by swapping every two consecutive bits, which is |
3706 | * non-identity one-to-one mapping which disturbs sort order sufficiently. | 3849 | * non-identity one-to-one mapping which disturbs sort order sufficiently. |
@@ -3715,7 +3858,7 @@ static pid_t pid_fry(pid_t pid) | |||
3715 | 3858 | ||
3716 | static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) | 3859 | static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) |
3717 | { | 3860 | { |
3718 | if (cgroup_sane_behavior(cgrp)) | 3861 | if (cgroup_on_dfl(cgrp)) |
3719 | return pid_fry(pid); | 3862 | return pid_fry(pid); |
3720 | else | 3863 | else |
3721 | return pid; | 3864 | return pid; |
@@ -3818,7 +3961,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
3818 | css_task_iter_end(&it); | 3961 | css_task_iter_end(&it); |
3819 | length = n; | 3962 | length = n; |
3820 | /* now sort & (if procs) strip out duplicates */ | 3963 | /* now sort & (if procs) strip out duplicates */ |
3821 | if (cgroup_sane_behavior(cgrp)) | 3964 | if (cgroup_on_dfl(cgrp)) |
3822 | sort(array, length, sizeof(pid_t), fried_cmppid, NULL); | 3965 | sort(array, length, sizeof(pid_t), fried_cmppid, NULL); |
3823 | else | 3966 | else |
3824 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3967 | sort(array, length, sizeof(pid_t), cmppid, NULL); |
@@ -4040,7 +4183,8 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css, | |||
4040 | return 0; | 4183 | return 0; |
4041 | } | 4184 | } |
4042 | 4185 | ||
4043 | static struct cftype cgroup_base_files[] = { | 4186 | /* cgroup core interface files for the default hierarchy */ |
4187 | static struct cftype cgroup_dfl_base_files[] = { | ||
4044 | { | 4188 | { |
4045 | .name = "cgroup.procs", | 4189 | .name = "cgroup.procs", |
4046 | .seq_start = cgroup_pidlist_start, | 4190 | .seq_start = cgroup_pidlist_start, |
@@ -4052,46 +4196,52 @@ static struct cftype cgroup_base_files[] = { | |||
4052 | .mode = S_IRUGO | S_IWUSR, | 4196 | .mode = S_IRUGO | S_IWUSR, |
4053 | }, | 4197 | }, |
4054 | { | 4198 | { |
4055 | .name = "cgroup.clone_children", | ||
4056 | .flags = CFTYPE_INSANE, | ||
4057 | .read_u64 = cgroup_clone_children_read, | ||
4058 | .write_u64 = cgroup_clone_children_write, | ||
4059 | }, | ||
4060 | { | ||
4061 | .name = "cgroup.sane_behavior", | ||
4062 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
4063 | .seq_show = cgroup_sane_behavior_show, | ||
4064 | }, | ||
4065 | { | ||
4066 | .name = "cgroup.controllers", | 4199 | .name = "cgroup.controllers", |
4067 | .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT, | 4200 | .flags = CFTYPE_ONLY_ON_ROOT, |
4068 | .seq_show = cgroup_root_controllers_show, | 4201 | .seq_show = cgroup_root_controllers_show, |
4069 | }, | 4202 | }, |
4070 | { | 4203 | { |
4071 | .name = "cgroup.controllers", | 4204 | .name = "cgroup.controllers", |
4072 | .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, | 4205 | .flags = CFTYPE_NOT_ON_ROOT, |
4073 | .seq_show = cgroup_controllers_show, | 4206 | .seq_show = cgroup_controllers_show, |
4074 | }, | 4207 | }, |
4075 | { | 4208 | { |
4076 | .name = "cgroup.subtree_control", | 4209 | .name = "cgroup.subtree_control", |
4077 | .flags = CFTYPE_ONLY_ON_DFL, | ||
4078 | .seq_show = cgroup_subtree_control_show, | 4210 | .seq_show = cgroup_subtree_control_show, |
4079 | .write = cgroup_subtree_control_write, | 4211 | .write = cgroup_subtree_control_write, |
4080 | }, | 4212 | }, |
4081 | { | 4213 | { |
4082 | .name = "cgroup.populated", | 4214 | .name = "cgroup.populated", |
4083 | .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, | 4215 | .flags = CFTYPE_NOT_ON_ROOT, |
4084 | .seq_show = cgroup_populated_show, | 4216 | .seq_show = cgroup_populated_show, |
4085 | }, | 4217 | }, |
4218 | { } /* terminate */ | ||
4219 | }; | ||
4086 | 4220 | ||
4087 | /* | 4221 | /* cgroup core interface files for the legacy hierarchies */ |
4088 | * Historical crazy stuff. These don't have "cgroup." prefix and | 4222 | static struct cftype cgroup_legacy_base_files[] = { |
4089 | * don't exist if sane_behavior. If you're depending on these, be | 4223 | { |
4090 | * prepared to be burned. | 4224 | .name = "cgroup.procs", |
4091 | */ | 4225 | .seq_start = cgroup_pidlist_start, |
4226 | .seq_next = cgroup_pidlist_next, | ||
4227 | .seq_stop = cgroup_pidlist_stop, | ||
4228 | .seq_show = cgroup_pidlist_show, | ||
4229 | .private = CGROUP_FILE_PROCS, | ||
4230 | .write = cgroup_procs_write, | ||
4231 | .mode = S_IRUGO | S_IWUSR, | ||
4232 | }, | ||
4233 | { | ||
4234 | .name = "cgroup.clone_children", | ||
4235 | .read_u64 = cgroup_clone_children_read, | ||
4236 | .write_u64 = cgroup_clone_children_write, | ||
4237 | }, | ||
4238 | { | ||
4239 | .name = "cgroup.sane_behavior", | ||
4240 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
4241 | .seq_show = cgroup_sane_behavior_show, | ||
4242 | }, | ||
4092 | { | 4243 | { |
4093 | .name = "tasks", | 4244 | .name = "tasks", |
4094 | .flags = CFTYPE_INSANE, /* use "procs" instead */ | ||
4095 | .seq_start = cgroup_pidlist_start, | 4245 | .seq_start = cgroup_pidlist_start, |
4096 | .seq_next = cgroup_pidlist_next, | 4246 | .seq_next = cgroup_pidlist_next, |
4097 | .seq_stop = cgroup_pidlist_stop, | 4247 | .seq_stop = cgroup_pidlist_stop, |
@@ -4102,13 +4252,12 @@ static struct cftype cgroup_base_files[] = { | |||
4102 | }, | 4252 | }, |
4103 | { | 4253 | { |
4104 | .name = "notify_on_release", | 4254 | .name = "notify_on_release", |
4105 | .flags = CFTYPE_INSANE, | ||
4106 | .read_u64 = cgroup_read_notify_on_release, | 4255 | .read_u64 = cgroup_read_notify_on_release, |
4107 | .write_u64 = cgroup_write_notify_on_release, | 4256 | .write_u64 = cgroup_write_notify_on_release, |
4108 | }, | 4257 | }, |
4109 | { | 4258 | { |
4110 | .name = "release_agent", | 4259 | .name = "release_agent", |
4111 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, | 4260 | .flags = CFTYPE_ONLY_ON_ROOT, |
4112 | .seq_show = cgroup_release_agent_show, | 4261 | .seq_show = cgroup_release_agent_show, |
4113 | .write = cgroup_release_agent_write, | 4262 | .write = cgroup_release_agent_write, |
4114 | .max_write_len = PATH_MAX - 1, | 4263 | .max_write_len = PATH_MAX - 1, |
@@ -4316,12 +4465,14 @@ static void offline_css(struct cgroup_subsys_state *css) | |||
4316 | * create_css - create a cgroup_subsys_state | 4465 | * create_css - create a cgroup_subsys_state |
4317 | * @cgrp: the cgroup new css will be associated with | 4466 | * @cgrp: the cgroup new css will be associated with |
4318 | * @ss: the subsys of new css | 4467 | * @ss: the subsys of new css |
4468 | * @visible: whether to create control knobs for the new css or not | ||
4319 | * | 4469 | * |
4320 | * Create a new css associated with @cgrp - @ss pair. On success, the new | 4470 | * Create a new css associated with @cgrp - @ss pair. On success, the new |
4321 | * css is online and installed in @cgrp with all interface files created. | 4471 | * css is online and installed in @cgrp with all interface files created if |
4322 | * Returns 0 on success, -errno on failure. | 4472 | * @visible. Returns 0 on success, -errno on failure. |
4323 | */ | 4473 | */ |
4324 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | 4474 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, |
4475 | bool visible) | ||
4325 | { | 4476 | { |
4326 | struct cgroup *parent = cgroup_parent(cgrp); | 4477 | struct cgroup *parent = cgroup_parent(cgrp); |
4327 | struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); | 4478 | struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); |
@@ -4345,9 +4496,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
4345 | goto err_free_percpu_ref; | 4496 | goto err_free_percpu_ref; |
4346 | css->id = err; | 4497 | css->id = err; |
4347 | 4498 | ||
4348 | err = cgroup_populate_dir(cgrp, 1 << ss->id); | 4499 | if (visible) { |
4349 | if (err) | 4500 | err = cgroup_populate_dir(cgrp, 1 << ss->id); |
4350 | goto err_free_id; | 4501 | if (err) |
4502 | goto err_free_id; | ||
4503 | } | ||
4351 | 4504 | ||
4352 | /* @css is ready to be brought online now, make it visible */ | 4505 | /* @css is ready to be brought online now, make it visible */ |
4353 | list_add_tail_rcu(&css->sibling, &parent_css->children); | 4506 | list_add_tail_rcu(&css->sibling, &parent_css->children); |
@@ -4387,6 +4540,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, | |||
4387 | struct cgroup_root *root; | 4540 | struct cgroup_root *root; |
4388 | struct cgroup_subsys *ss; | 4541 | struct cgroup_subsys *ss; |
4389 | struct kernfs_node *kn; | 4542 | struct kernfs_node *kn; |
4543 | struct cftype *base_files; | ||
4390 | int ssid, ret; | 4544 | int ssid, ret; |
4391 | 4545 | ||
4392 | parent = cgroup_kn_lock_live(parent_kn); | 4546 | parent = cgroup_kn_lock_live(parent_kn); |
@@ -4457,14 +4611,20 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, | |||
4457 | if (ret) | 4611 | if (ret) |
4458 | goto out_destroy; | 4612 | goto out_destroy; |
4459 | 4613 | ||
4460 | ret = cgroup_addrm_files(cgrp, cgroup_base_files, true); | 4614 | if (cgroup_on_dfl(cgrp)) |
4615 | base_files = cgroup_dfl_base_files; | ||
4616 | else | ||
4617 | base_files = cgroup_legacy_base_files; | ||
4618 | |||
4619 | ret = cgroup_addrm_files(cgrp, base_files, true); | ||
4461 | if (ret) | 4620 | if (ret) |
4462 | goto out_destroy; | 4621 | goto out_destroy; |
4463 | 4622 | ||
4464 | /* let's create and online css's */ | 4623 | /* let's create and online css's */ |
4465 | for_each_subsys(ss, ssid) { | 4624 | for_each_subsys(ss, ssid) { |
4466 | if (parent->child_subsys_mask & (1 << ssid)) { | 4625 | if (parent->child_subsys_mask & (1 << ssid)) { |
4467 | ret = create_css(cgrp, ss); | 4626 | ret = create_css(cgrp, ss, |
4627 | parent->subtree_control & (1 << ssid)); | ||
4468 | if (ret) | 4628 | if (ret) |
4469 | goto out_destroy; | 4629 | goto out_destroy; |
4470 | } | 4630 | } |
@@ -4472,10 +4632,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, | |||
4472 | 4632 | ||
4473 | /* | 4633 | /* |
4474 | * On the default hierarchy, a child doesn't automatically inherit | 4634 | * On the default hierarchy, a child doesn't automatically inherit |
4475 | * child_subsys_mask from the parent. Each is configured manually. | 4635 | * subtree_control from the parent. Each is configured manually. |
4476 | */ | 4636 | */ |
4477 | if (!cgroup_on_dfl(cgrp)) | 4637 | if (!cgroup_on_dfl(cgrp)) { |
4478 | cgrp->child_subsys_mask = parent->child_subsys_mask; | 4638 | cgrp->subtree_control = parent->subtree_control; |
4639 | cgroup_refresh_child_subsys_mask(cgrp); | ||
4640 | } | ||
4479 | 4641 | ||
4480 | kernfs_activate(kn); | 4642 | kernfs_activate(kn); |
4481 | 4643 | ||
@@ -4738,8 +4900,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) | |||
4738 | */ | 4900 | */ |
4739 | int __init cgroup_init_early(void) | 4901 | int __init cgroup_init_early(void) |
4740 | { | 4902 | { |
4741 | static struct cgroup_sb_opts __initdata opts = | 4903 | static struct cgroup_sb_opts __initdata opts; |
4742 | { .flags = CGRP_ROOT_SANE_BEHAVIOR }; | ||
4743 | struct cgroup_subsys *ss; | 4904 | struct cgroup_subsys *ss; |
4744 | int i; | 4905 | int i; |
4745 | 4906 | ||
@@ -4777,7 +4938,8 @@ int __init cgroup_init(void) | |||
4777 | unsigned long key; | 4938 | unsigned long key; |
4778 | int ssid, err; | 4939 | int ssid, err; |
4779 | 4940 | ||
4780 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); | 4941 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); |
4942 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); | ||
4781 | 4943 | ||
4782 | mutex_lock(&cgroup_mutex); | 4944 | mutex_lock(&cgroup_mutex); |
4783 | 4945 | ||
@@ -4809,9 +4971,22 @@ int __init cgroup_init(void) | |||
4809 | * disabled flag and cftype registration needs kmalloc, | 4971 | * disabled flag and cftype registration needs kmalloc, |
4810 | * both of which aren't available during early_init. | 4972 | * both of which aren't available during early_init. |
4811 | */ | 4973 | */ |
4812 | if (!ss->disabled) { | 4974 | if (ss->disabled) |
4813 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; | 4975 | continue; |
4814 | WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes)); | 4976 | |
4977 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; | ||
4978 | |||
4979 | if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes) | ||
4980 | ss->dfl_cftypes = ss->legacy_cftypes; | ||
4981 | |||
4982 | if (!ss->dfl_cftypes) | ||
4983 | cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id; | ||
4984 | |||
4985 | if (ss->dfl_cftypes == ss->legacy_cftypes) { | ||
4986 | WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); | ||
4987 | } else { | ||
4988 | WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); | ||
4989 | WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); | ||
4815 | } | 4990 | } |
4816 | } | 4991 | } |
4817 | 4992 | ||
@@ -5207,6 +5382,14 @@ static int __init cgroup_disable(char *str) | |||
5207 | } | 5382 | } |
5208 | __setup("cgroup_disable=", cgroup_disable); | 5383 | __setup("cgroup_disable=", cgroup_disable); |
5209 | 5384 | ||
5385 | static int __init cgroup_set_legacy_files_on_dfl(char *str) | ||
5386 | { | ||
5387 | printk("cgroup: using legacy files on the default hierarchy\n"); | ||
5388 | cgroup_legacy_files_on_dfl = true; | ||
5389 | return 0; | ||
5390 | } | ||
5391 | __setup("cgroup__DEVEL__legacy_files_on_dfl", cgroup_set_legacy_files_on_dfl); | ||
5392 | |||
5210 | /** | 5393 | /** |
5211 | * css_tryget_online_from_dir - get corresponding css from a cgroup dentry | 5394 | * css_tryget_online_from_dir - get corresponding css from a cgroup dentry |
5212 | * @dentry: directory dentry of interest | 5395 | * @dentry: directory dentry of interest |
@@ -5401,6 +5584,6 @@ static struct cftype debug_files[] = { | |||
5401 | struct cgroup_subsys debug_cgrp_subsys = { | 5584 | struct cgroup_subsys debug_cgrp_subsys = { |
5402 | .css_alloc = debug_css_alloc, | 5585 | .css_alloc = debug_css_alloc, |
5403 | .css_free = debug_css_free, | 5586 | .css_free = debug_css_free, |
5404 | .base_cftypes = debug_files, | 5587 | .legacy_cftypes = debug_files, |
5405 | }; | 5588 | }; |
5406 | #endif /* CONFIG_CGROUP_DEBUG */ | 5589 | #endif /* CONFIG_CGROUP_DEBUG */ |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index a79e40f9d700..92b98cc0ee76 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -480,5 +480,5 @@ struct cgroup_subsys freezer_cgrp_subsys = { | |||
480 | .css_free = freezer_css_free, | 480 | .css_free = freezer_css_free, |
481 | .attach = freezer_attach, | 481 | .attach = freezer_attach, |
482 | .fork = freezer_fork, | 482 | .fork = freezer_fork, |
483 | .base_cftypes = files, | 483 | .legacy_cftypes = files, |
484 | }; | 484 | }; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 116a4164720a..22874d7cf2c0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -76,8 +76,34 @@ struct cpuset { | |||
76 | struct cgroup_subsys_state css; | 76 | struct cgroup_subsys_state css; |
77 | 77 | ||
78 | unsigned long flags; /* "unsigned long" so bitops work */ | 78 | unsigned long flags; /* "unsigned long" so bitops work */ |
79 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ | 79 | |
80 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ | 80 | /* |
81 | * On default hierarchy: | ||
82 | * | ||
83 | * The user-configured masks can only be changed by writing to | ||
84 | * cpuset.cpus and cpuset.mems, and won't be limited by the | ||
85 | * parent masks. | ||
86 | * | ||
87 | * The effective masks is the real masks that apply to the tasks | ||
88 | * in the cpuset. They may be changed if the configured masks are | ||
89 | * changed or hotplug happens. | ||
90 | * | ||
91 | * effective_mask == configured_mask & parent's effective_mask, | ||
92 | * and if it ends up empty, it will inherit the parent's mask. | ||
93 | * | ||
94 | * | ||
95 | * On legacy hierachy: | ||
96 | * | ||
97 | * The user-configured masks are always the same with effective masks. | ||
98 | */ | ||
99 | |||
100 | /* user-configured CPUs and Memory Nodes allow to tasks */ | ||
101 | cpumask_var_t cpus_allowed; | ||
102 | nodemask_t mems_allowed; | ||
103 | |||
104 | /* effective CPUs and Memory Nodes allow to tasks */ | ||
105 | cpumask_var_t effective_cpus; | ||
106 | nodemask_t effective_mems; | ||
81 | 107 | ||
82 | /* | 108 | /* |
83 | * This is old Memory Nodes tasks took on. | 109 | * This is old Memory Nodes tasks took on. |
@@ -307,9 +333,9 @@ static struct file_system_type cpuset_fs_type = { | |||
307 | */ | 333 | */ |
308 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | 334 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) |
309 | { | 335 | { |
310 | while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) | 336 | while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) |
311 | cs = parent_cs(cs); | 337 | cs = parent_cs(cs); |
312 | cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); | 338 | cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); |
313 | } | 339 | } |
314 | 340 | ||
315 | /* | 341 | /* |
@@ -325,9 +351,9 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | |||
325 | */ | 351 | */ |
326 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) | 352 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) |
327 | { | 353 | { |
328 | while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) | 354 | while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) |
329 | cs = parent_cs(cs); | 355 | cs = parent_cs(cs); |
330 | nodes_and(*pmask, cs->mems_allowed, node_states[N_MEMORY]); | 356 | nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); |
331 | } | 357 | } |
332 | 358 | ||
333 | /* | 359 | /* |
@@ -376,13 +402,20 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) | |||
376 | if (!trial) | 402 | if (!trial) |
377 | return NULL; | 403 | return NULL; |
378 | 404 | ||
379 | if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { | 405 | if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) |
380 | kfree(trial); | 406 | goto free_cs; |
381 | return NULL; | 407 | if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) |
382 | } | 408 | goto free_cpus; |
383 | cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); | ||
384 | 409 | ||
410 | cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); | ||
411 | cpumask_copy(trial->effective_cpus, cs->effective_cpus); | ||
385 | return trial; | 412 | return trial; |
413 | |||
414 | free_cpus: | ||
415 | free_cpumask_var(trial->cpus_allowed); | ||
416 | free_cs: | ||
417 | kfree(trial); | ||
418 | return NULL; | ||
386 | } | 419 | } |
387 | 420 | ||
388 | /** | 421 | /** |
@@ -391,6 +424,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) | |||
391 | */ | 424 | */ |
392 | static void free_trial_cpuset(struct cpuset *trial) | 425 | static void free_trial_cpuset(struct cpuset *trial) |
393 | { | 426 | { |
427 | free_cpumask_var(trial->effective_cpus); | ||
394 | free_cpumask_var(trial->cpus_allowed); | 428 | free_cpumask_var(trial->cpus_allowed); |
395 | kfree(trial); | 429 | kfree(trial); |
396 | } | 430 | } |
@@ -436,9 +470,9 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) | |||
436 | 470 | ||
437 | par = parent_cs(cur); | 471 | par = parent_cs(cur); |
438 | 472 | ||
439 | /* We must be a subset of our parent cpuset */ | 473 | /* On legacy hiearchy, we must be a subset of our parent cpuset. */ |
440 | ret = -EACCES; | 474 | ret = -EACCES; |
441 | if (!is_cpuset_subset(trial, par)) | 475 | if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par)) |
442 | goto out; | 476 | goto out; |
443 | 477 | ||
444 | /* | 478 | /* |
@@ -480,11 +514,11 @@ out: | |||
480 | #ifdef CONFIG_SMP | 514 | #ifdef CONFIG_SMP |
481 | /* | 515 | /* |
482 | * Helper routine for generate_sched_domains(). | 516 | * Helper routine for generate_sched_domains(). |
483 | * Do cpusets a, b have overlapping cpus_allowed masks? | 517 | * Do cpusets a, b have overlapping effective cpus_allowed masks? |
484 | */ | 518 | */ |
485 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | 519 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) |
486 | { | 520 | { |
487 | return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); | 521 | return cpumask_intersects(a->effective_cpus, b->effective_cpus); |
488 | } | 522 | } |
489 | 523 | ||
490 | static void | 524 | static void |
@@ -601,7 +635,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
601 | *dattr = SD_ATTR_INIT; | 635 | *dattr = SD_ATTR_INIT; |
602 | update_domain_attr_tree(dattr, &top_cpuset); | 636 | update_domain_attr_tree(dattr, &top_cpuset); |
603 | } | 637 | } |
604 | cpumask_copy(doms[0], top_cpuset.cpus_allowed); | 638 | cpumask_copy(doms[0], top_cpuset.effective_cpus); |
605 | 639 | ||
606 | goto done; | 640 | goto done; |
607 | } | 641 | } |
@@ -705,7 +739,7 @@ restart: | |||
705 | struct cpuset *b = csa[j]; | 739 | struct cpuset *b = csa[j]; |
706 | 740 | ||
707 | if (apn == b->pn) { | 741 | if (apn == b->pn) { |
708 | cpumask_or(dp, dp, b->cpus_allowed); | 742 | cpumask_or(dp, dp, b->effective_cpus); |
709 | if (dattr) | 743 | if (dattr) |
710 | update_domain_attr_tree(dattr + nslot, b); | 744 | update_domain_attr_tree(dattr + nslot, b); |
711 | 745 | ||
@@ -757,7 +791,7 @@ static void rebuild_sched_domains_locked(void) | |||
757 | * passing doms with offlined cpu to partition_sched_domains(). | 791 | * passing doms with offlined cpu to partition_sched_domains(). |
758 | * Anyways, hotplug work item will rebuild sched domains. | 792 | * Anyways, hotplug work item will rebuild sched domains. |
759 | */ | 793 | */ |
760 | if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) | 794 | if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) |
761 | goto out; | 795 | goto out; |
762 | 796 | ||
763 | /* Generate domain masks and attrs */ | 797 | /* Generate domain masks and attrs */ |
@@ -781,45 +815,6 @@ void rebuild_sched_domains(void) | |||
781 | mutex_unlock(&cpuset_mutex); | 815 | mutex_unlock(&cpuset_mutex); |
782 | } | 816 | } |
783 | 817 | ||
784 | /* | ||
785 | * effective_cpumask_cpuset - return nearest ancestor with non-empty cpus | ||
786 | * @cs: the cpuset in interest | ||
787 | * | ||
788 | * A cpuset's effective cpumask is the cpumask of the nearest ancestor | ||
789 | * with non-empty cpus. We use effective cpumask whenever: | ||
790 | * - we update tasks' cpus_allowed. (they take on the ancestor's cpumask | ||
791 | * if the cpuset they reside in has no cpus) | ||
792 | * - we want to retrieve task_cs(tsk)'s cpus_allowed. | ||
793 | * | ||
794 | * Called with cpuset_mutex held. cpuset_cpus_allowed_fallback() is an | ||
795 | * exception. See comments there. | ||
796 | */ | ||
797 | static struct cpuset *effective_cpumask_cpuset(struct cpuset *cs) | ||
798 | { | ||
799 | while (cpumask_empty(cs->cpus_allowed)) | ||
800 | cs = parent_cs(cs); | ||
801 | return cs; | ||
802 | } | ||
803 | |||
804 | /* | ||
805 | * effective_nodemask_cpuset - return nearest ancestor with non-empty mems | ||
806 | * @cs: the cpuset in interest | ||
807 | * | ||
808 | * A cpuset's effective nodemask is the nodemask of the nearest ancestor | ||
809 | * with non-empty memss. We use effective nodemask whenever: | ||
810 | * - we update tasks' mems_allowed. (they take on the ancestor's nodemask | ||
811 | * if the cpuset they reside in has no mems) | ||
812 | * - we want to retrieve task_cs(tsk)'s mems_allowed. | ||
813 | * | ||
814 | * Called with cpuset_mutex held. | ||
815 | */ | ||
816 | static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) | ||
817 | { | ||
818 | while (nodes_empty(cs->mems_allowed)) | ||
819 | cs = parent_cs(cs); | ||
820 | return cs; | ||
821 | } | ||
822 | |||
823 | /** | 818 | /** |
824 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. | 819 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. |
825 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | 820 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed |
@@ -830,53 +825,80 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) | |||
830 | */ | 825 | */ |
831 | static void update_tasks_cpumask(struct cpuset *cs) | 826 | static void update_tasks_cpumask(struct cpuset *cs) |
832 | { | 827 | { |
833 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); | ||
834 | struct css_task_iter it; | 828 | struct css_task_iter it; |
835 | struct task_struct *task; | 829 | struct task_struct *task; |
836 | 830 | ||
837 | css_task_iter_start(&cs->css, &it); | 831 | css_task_iter_start(&cs->css, &it); |
838 | while ((task = css_task_iter_next(&it))) | 832 | while ((task = css_task_iter_next(&it))) |
839 | set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed); | 833 | set_cpus_allowed_ptr(task, cs->effective_cpus); |
840 | css_task_iter_end(&it); | 834 | css_task_iter_end(&it); |
841 | } | 835 | } |
842 | 836 | ||
843 | /* | 837 | /* |
844 | * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. | 838 | * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree |
845 | * @root_cs: the root cpuset of the hierarchy | 839 | * @cs: the cpuset to consider |
846 | * @update_root: update root cpuset or not? | 840 | * @new_cpus: temp variable for calculating new effective_cpus |
841 | * | ||
842 | * When congifured cpumask is changed, the effective cpumasks of this cpuset | ||
843 | * and all its descendants need to be updated. | ||
847 | * | 844 | * |
848 | * This will update cpumasks of tasks in @root_cs and all other empty cpusets | 845 | * On legacy hierachy, effective_cpus will be the same with cpu_allowed. |
849 | * which take on cpumask of @root_cs. | ||
850 | * | 846 | * |
851 | * Called with cpuset_mutex held | 847 | * Called with cpuset_mutex held |
852 | */ | 848 | */ |
853 | static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) | 849 | static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) |
854 | { | 850 | { |
855 | struct cpuset *cp; | 851 | struct cpuset *cp; |
856 | struct cgroup_subsys_state *pos_css; | 852 | struct cgroup_subsys_state *pos_css; |
853 | bool need_rebuild_sched_domains = false; | ||
857 | 854 | ||
858 | rcu_read_lock(); | 855 | rcu_read_lock(); |
859 | cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { | 856 | cpuset_for_each_descendant_pre(cp, pos_css, cs) { |
860 | if (cp == root_cs) { | 857 | struct cpuset *parent = parent_cs(cp); |
861 | if (!update_root) | 858 | |
862 | continue; | 859 | cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); |
863 | } else { | 860 | |
864 | /* skip the whole subtree if @cp have some CPU */ | 861 | /* |
865 | if (!cpumask_empty(cp->cpus_allowed)) { | 862 | * If it becomes empty, inherit the effective mask of the |
866 | pos_css = css_rightmost_descendant(pos_css); | 863 | * parent, which is guaranteed to have some CPUs. |
867 | continue; | 864 | */ |
868 | } | 865 | if (cpumask_empty(new_cpus)) |
866 | cpumask_copy(new_cpus, parent->effective_cpus); | ||
867 | |||
868 | /* Skip the whole subtree if the cpumask remains the same. */ | ||
869 | if (cpumask_equal(new_cpus, cp->effective_cpus)) { | ||
870 | pos_css = css_rightmost_descendant(pos_css); | ||
871 | continue; | ||
869 | } | 872 | } |
873 | |||
870 | if (!css_tryget_online(&cp->css)) | 874 | if (!css_tryget_online(&cp->css)) |
871 | continue; | 875 | continue; |
872 | rcu_read_unlock(); | 876 | rcu_read_unlock(); |
873 | 877 | ||
878 | mutex_lock(&callback_mutex); | ||
879 | cpumask_copy(cp->effective_cpus, new_cpus); | ||
880 | mutex_unlock(&callback_mutex); | ||
881 | |||
882 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | ||
883 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | ||
884 | |||
874 | update_tasks_cpumask(cp); | 885 | update_tasks_cpumask(cp); |
875 | 886 | ||
887 | /* | ||
888 | * If the effective cpumask of any non-empty cpuset is changed, | ||
889 | * we need to rebuild sched domains. | ||
890 | */ | ||
891 | if (!cpumask_empty(cp->cpus_allowed) && | ||
892 | is_sched_load_balance(cp)) | ||
893 | need_rebuild_sched_domains = true; | ||
894 | |||
876 | rcu_read_lock(); | 895 | rcu_read_lock(); |
877 | css_put(&cp->css); | 896 | css_put(&cp->css); |
878 | } | 897 | } |
879 | rcu_read_unlock(); | 898 | rcu_read_unlock(); |
899 | |||
900 | if (need_rebuild_sched_domains) | ||
901 | rebuild_sched_domains_locked(); | ||
880 | } | 902 | } |
881 | 903 | ||
882 | /** | 904 | /** |
@@ -889,7 +911,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
889 | const char *buf) | 911 | const char *buf) |
890 | { | 912 | { |
891 | int retval; | 913 | int retval; |
892 | int is_load_balanced; | ||
893 | 914 | ||
894 | /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ | 915 | /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ |
895 | if (cs == &top_cpuset) | 916 | if (cs == &top_cpuset) |
@@ -908,7 +929,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
908 | if (retval < 0) | 929 | if (retval < 0) |
909 | return retval; | 930 | return retval; |
910 | 931 | ||
911 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) | 932 | if (!cpumask_subset(trialcs->cpus_allowed, |
933 | top_cpuset.cpus_allowed)) | ||
912 | return -EINVAL; | 934 | return -EINVAL; |
913 | } | 935 | } |
914 | 936 | ||
@@ -920,16 +942,12 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
920 | if (retval < 0) | 942 | if (retval < 0) |
921 | return retval; | 943 | return retval; |
922 | 944 | ||
923 | is_load_balanced = is_sched_load_balance(trialcs); | ||
924 | |||
925 | mutex_lock(&callback_mutex); | 945 | mutex_lock(&callback_mutex); |
926 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | 946 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
927 | mutex_unlock(&callback_mutex); | 947 | mutex_unlock(&callback_mutex); |
928 | 948 | ||
929 | update_tasks_cpumask_hier(cs, true); | 949 | /* use trialcs->cpus_allowed as a temp variable */ |
930 | 950 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | |
931 | if (is_load_balanced) | ||
932 | rebuild_sched_domains_locked(); | ||
933 | return 0; | 951 | return 0; |
934 | } | 952 | } |
935 | 953 | ||
@@ -951,15 +969,13 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
951 | const nodemask_t *to) | 969 | const nodemask_t *to) |
952 | { | 970 | { |
953 | struct task_struct *tsk = current; | 971 | struct task_struct *tsk = current; |
954 | struct cpuset *mems_cs; | ||
955 | 972 | ||
956 | tsk->mems_allowed = *to; | 973 | tsk->mems_allowed = *to; |
957 | 974 | ||
958 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | 975 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); |
959 | 976 | ||
960 | rcu_read_lock(); | 977 | rcu_read_lock(); |
961 | mems_cs = effective_nodemask_cpuset(task_cs(tsk)); | 978 | guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); |
962 | guarantee_online_mems(mems_cs, &tsk->mems_allowed); | ||
963 | rcu_read_unlock(); | 979 | rcu_read_unlock(); |
964 | } | 980 | } |
965 | 981 | ||
@@ -1028,13 +1044,12 @@ static void *cpuset_being_rebound; | |||
1028 | static void update_tasks_nodemask(struct cpuset *cs) | 1044 | static void update_tasks_nodemask(struct cpuset *cs) |
1029 | { | 1045 | { |
1030 | static nodemask_t newmems; /* protected by cpuset_mutex */ | 1046 | static nodemask_t newmems; /* protected by cpuset_mutex */ |
1031 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); | ||
1032 | struct css_task_iter it; | 1047 | struct css_task_iter it; |
1033 | struct task_struct *task; | 1048 | struct task_struct *task; |
1034 | 1049 | ||
1035 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1050 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1036 | 1051 | ||
1037 | guarantee_online_mems(mems_cs, &newmems); | 1052 | guarantee_online_mems(cs, &newmems); |
1038 | 1053 | ||
1039 | /* | 1054 | /* |
1040 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't | 1055 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't |
@@ -1077,36 +1092,52 @@ static void update_tasks_nodemask(struct cpuset *cs) | |||
1077 | } | 1092 | } |
1078 | 1093 | ||
1079 | /* | 1094 | /* |
1080 | * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. | 1095 | * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree |
1081 | * @cs: the root cpuset of the hierarchy | 1096 | * @cs: the cpuset to consider |
1082 | * @update_root: update the root cpuset or not? | 1097 | * @new_mems: a temp variable for calculating new effective_mems |
1083 | * | 1098 | * |
1084 | * This will update nodemasks of tasks in @root_cs and all other empty cpusets | 1099 | * When configured nodemask is changed, the effective nodemasks of this cpuset |
1085 | * which take on nodemask of @root_cs. | 1100 | * and all its descendants need to be updated. |
1101 | * | ||
1102 | * On legacy hiearchy, effective_mems will be the same with mems_allowed. | ||
1086 | * | 1103 | * |
1087 | * Called with cpuset_mutex held | 1104 | * Called with cpuset_mutex held |
1088 | */ | 1105 | */ |
1089 | static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) | 1106 | static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) |
1090 | { | 1107 | { |
1091 | struct cpuset *cp; | 1108 | struct cpuset *cp; |
1092 | struct cgroup_subsys_state *pos_css; | 1109 | struct cgroup_subsys_state *pos_css; |
1093 | 1110 | ||
1094 | rcu_read_lock(); | 1111 | rcu_read_lock(); |
1095 | cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { | 1112 | cpuset_for_each_descendant_pre(cp, pos_css, cs) { |
1096 | if (cp == root_cs) { | 1113 | struct cpuset *parent = parent_cs(cp); |
1097 | if (!update_root) | 1114 | |
1098 | continue; | 1115 | nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); |
1099 | } else { | 1116 | |
1100 | /* skip the whole subtree if @cp have some CPU */ | 1117 | /* |
1101 | if (!nodes_empty(cp->mems_allowed)) { | 1118 | * If it becomes empty, inherit the effective mask of the |
1102 | pos_css = css_rightmost_descendant(pos_css); | 1119 | * parent, which is guaranteed to have some MEMs. |
1103 | continue; | 1120 | */ |
1104 | } | 1121 | if (nodes_empty(*new_mems)) |
1122 | *new_mems = parent->effective_mems; | ||
1123 | |||
1124 | /* Skip the whole subtree if the nodemask remains the same. */ | ||
1125 | if (nodes_equal(*new_mems, cp->effective_mems)) { | ||
1126 | pos_css = css_rightmost_descendant(pos_css); | ||
1127 | continue; | ||
1105 | } | 1128 | } |
1129 | |||
1106 | if (!css_tryget_online(&cp->css)) | 1130 | if (!css_tryget_online(&cp->css)) |
1107 | continue; | 1131 | continue; |
1108 | rcu_read_unlock(); | 1132 | rcu_read_unlock(); |
1109 | 1133 | ||
1134 | mutex_lock(&callback_mutex); | ||
1135 | cp->effective_mems = *new_mems; | ||
1136 | mutex_unlock(&callback_mutex); | ||
1137 | |||
1138 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | ||
1139 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | ||
1140 | |||
1110 | update_tasks_nodemask(cp); | 1141 | update_tasks_nodemask(cp); |
1111 | 1142 | ||
1112 | rcu_read_lock(); | 1143 | rcu_read_lock(); |
@@ -1156,8 +1187,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1156 | goto done; | 1187 | goto done; |
1157 | 1188 | ||
1158 | if (!nodes_subset(trialcs->mems_allowed, | 1189 | if (!nodes_subset(trialcs->mems_allowed, |
1159 | node_states[N_MEMORY])) { | 1190 | top_cpuset.mems_allowed)) { |
1160 | retval = -EINVAL; | 1191 | retval = -EINVAL; |
1161 | goto done; | 1192 | goto done; |
1162 | } | 1193 | } |
1163 | } | 1194 | } |
@@ -1174,7 +1205,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1174 | cs->mems_allowed = trialcs->mems_allowed; | 1205 | cs->mems_allowed = trialcs->mems_allowed; |
1175 | mutex_unlock(&callback_mutex); | 1206 | mutex_unlock(&callback_mutex); |
1176 | 1207 | ||
1177 | update_tasks_nodemask_hier(cs, true); | 1208 | /* use trialcs->mems_allowed as a temp variable */ |
1209 | update_nodemasks_hier(cs, &cs->mems_allowed); | ||
1178 | done: | 1210 | done: |
1179 | return retval; | 1211 | return retval; |
1180 | } | 1212 | } |
@@ -1389,12 +1421,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, | |||
1389 | 1421 | ||
1390 | mutex_lock(&cpuset_mutex); | 1422 | mutex_lock(&cpuset_mutex); |
1391 | 1423 | ||
1392 | /* | 1424 | /* allow moving tasks into an empty cpuset if on default hierarchy */ |
1393 | * We allow to move tasks into an empty cpuset if sane_behavior | ||
1394 | * flag is set. | ||
1395 | */ | ||
1396 | ret = -ENOSPC; | 1425 | ret = -ENOSPC; |
1397 | if (!cgroup_sane_behavior(css->cgroup) && | 1426 | if (!cgroup_on_dfl(css->cgroup) && |
1398 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) | 1427 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) |
1399 | goto out_unlock; | 1428 | goto out_unlock; |
1400 | 1429 | ||
@@ -1452,8 +1481,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1452 | struct task_struct *leader = cgroup_taskset_first(tset); | 1481 | struct task_struct *leader = cgroup_taskset_first(tset); |
1453 | struct cpuset *cs = css_cs(css); | 1482 | struct cpuset *cs = css_cs(css); |
1454 | struct cpuset *oldcs = cpuset_attach_old_cs; | 1483 | struct cpuset *oldcs = cpuset_attach_old_cs; |
1455 | struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); | ||
1456 | struct cpuset *mems_cs = effective_nodemask_cpuset(cs); | ||
1457 | 1484 | ||
1458 | mutex_lock(&cpuset_mutex); | 1485 | mutex_lock(&cpuset_mutex); |
1459 | 1486 | ||
@@ -1461,9 +1488,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1461 | if (cs == &top_cpuset) | 1488 | if (cs == &top_cpuset) |
1462 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1489 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1463 | else | 1490 | else |
1464 | guarantee_online_cpus(cpus_cs, cpus_attach); | 1491 | guarantee_online_cpus(cs, cpus_attach); |
1465 | 1492 | ||
1466 | guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); | 1493 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); |
1467 | 1494 | ||
1468 | cgroup_taskset_for_each(task, tset) { | 1495 | cgroup_taskset_for_each(task, tset) { |
1469 | /* | 1496 | /* |
@@ -1480,11 +1507,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1480 | * Change mm, possibly for multiple threads in a threadgroup. This is | 1507 | * Change mm, possibly for multiple threads in a threadgroup. This is |
1481 | * expensive and may sleep. | 1508 | * expensive and may sleep. |
1482 | */ | 1509 | */ |
1483 | cpuset_attach_nodemask_to = cs->mems_allowed; | 1510 | cpuset_attach_nodemask_to = cs->effective_mems; |
1484 | mm = get_task_mm(leader); | 1511 | mm = get_task_mm(leader); |
1485 | if (mm) { | 1512 | if (mm) { |
1486 | struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); | ||
1487 | |||
1488 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); | 1513 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1489 | 1514 | ||
1490 | /* | 1515 | /* |
@@ -1495,7 +1520,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1495 | * mm from. | 1520 | * mm from. |
1496 | */ | 1521 | */ |
1497 | if (is_memory_migrate(cs)) { | 1522 | if (is_memory_migrate(cs)) { |
1498 | cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, | 1523 | cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, |
1499 | &cpuset_attach_nodemask_to); | 1524 | &cpuset_attach_nodemask_to); |
1500 | } | 1525 | } |
1501 | mmput(mm); | 1526 | mmput(mm); |
@@ -1516,6 +1541,8 @@ typedef enum { | |||
1516 | FILE_MEMORY_MIGRATE, | 1541 | FILE_MEMORY_MIGRATE, |
1517 | FILE_CPULIST, | 1542 | FILE_CPULIST, |
1518 | FILE_MEMLIST, | 1543 | FILE_MEMLIST, |
1544 | FILE_EFFECTIVE_CPULIST, | ||
1545 | FILE_EFFECTIVE_MEMLIST, | ||
1519 | FILE_CPU_EXCLUSIVE, | 1546 | FILE_CPU_EXCLUSIVE, |
1520 | FILE_MEM_EXCLUSIVE, | 1547 | FILE_MEM_EXCLUSIVE, |
1521 | FILE_MEM_HARDWALL, | 1548 | FILE_MEM_HARDWALL, |
@@ -1694,6 +1721,12 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
1694 | case FILE_MEMLIST: | 1721 | case FILE_MEMLIST: |
1695 | s += nodelist_scnprintf(s, count, cs->mems_allowed); | 1722 | s += nodelist_scnprintf(s, count, cs->mems_allowed); |
1696 | break; | 1723 | break; |
1724 | case FILE_EFFECTIVE_CPULIST: | ||
1725 | s += cpulist_scnprintf(s, count, cs->effective_cpus); | ||
1726 | break; | ||
1727 | case FILE_EFFECTIVE_MEMLIST: | ||
1728 | s += nodelist_scnprintf(s, count, cs->effective_mems); | ||
1729 | break; | ||
1697 | default: | 1730 | default: |
1698 | ret = -EINVAL; | 1731 | ret = -EINVAL; |
1699 | goto out_unlock; | 1732 | goto out_unlock; |
@@ -1779,6 +1812,18 @@ static struct cftype files[] = { | |||
1779 | }, | 1812 | }, |
1780 | 1813 | ||
1781 | { | 1814 | { |
1815 | .name = "effective_cpus", | ||
1816 | .seq_show = cpuset_common_seq_show, | ||
1817 | .private = FILE_EFFECTIVE_CPULIST, | ||
1818 | }, | ||
1819 | |||
1820 | { | ||
1821 | .name = "effective_mems", | ||
1822 | .seq_show = cpuset_common_seq_show, | ||
1823 | .private = FILE_EFFECTIVE_MEMLIST, | ||
1824 | }, | ||
1825 | |||
1826 | { | ||
1782 | .name = "cpu_exclusive", | 1827 | .name = "cpu_exclusive", |
1783 | .read_u64 = cpuset_read_u64, | 1828 | .read_u64 = cpuset_read_u64, |
1784 | .write_u64 = cpuset_write_u64, | 1829 | .write_u64 = cpuset_write_u64, |
@@ -1869,18 +1914,26 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) | |||
1869 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 1914 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
1870 | if (!cs) | 1915 | if (!cs) |
1871 | return ERR_PTR(-ENOMEM); | 1916 | return ERR_PTR(-ENOMEM); |
1872 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { | 1917 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) |
1873 | kfree(cs); | 1918 | goto free_cs; |
1874 | return ERR_PTR(-ENOMEM); | 1919 | if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) |
1875 | } | 1920 | goto free_cpus; |
1876 | 1921 | ||
1877 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 1922 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
1878 | cpumask_clear(cs->cpus_allowed); | 1923 | cpumask_clear(cs->cpus_allowed); |
1879 | nodes_clear(cs->mems_allowed); | 1924 | nodes_clear(cs->mems_allowed); |
1925 | cpumask_clear(cs->effective_cpus); | ||
1926 | nodes_clear(cs->effective_mems); | ||
1880 | fmeter_init(&cs->fmeter); | 1927 | fmeter_init(&cs->fmeter); |
1881 | cs->relax_domain_level = -1; | 1928 | cs->relax_domain_level = -1; |
1882 | 1929 | ||
1883 | return &cs->css; | 1930 | return &cs->css; |
1931 | |||
1932 | free_cpus: | ||
1933 | free_cpumask_var(cs->cpus_allowed); | ||
1934 | free_cs: | ||
1935 | kfree(cs); | ||
1936 | return ERR_PTR(-ENOMEM); | ||
1884 | } | 1937 | } |
1885 | 1938 | ||
1886 | static int cpuset_css_online(struct cgroup_subsys_state *css) | 1939 | static int cpuset_css_online(struct cgroup_subsys_state *css) |
@@ -1903,6 +1956,13 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
1903 | 1956 | ||
1904 | cpuset_inc(); | 1957 | cpuset_inc(); |
1905 | 1958 | ||
1959 | mutex_lock(&callback_mutex); | ||
1960 | if (cgroup_on_dfl(cs->css.cgroup)) { | ||
1961 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | ||
1962 | cs->effective_mems = parent->effective_mems; | ||
1963 | } | ||
1964 | mutex_unlock(&callback_mutex); | ||
1965 | |||
1906 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | 1966 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
1907 | goto out_unlock; | 1967 | goto out_unlock; |
1908 | 1968 | ||
@@ -1962,20 +2022,40 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) | |||
1962 | { | 2022 | { |
1963 | struct cpuset *cs = css_cs(css); | 2023 | struct cpuset *cs = css_cs(css); |
1964 | 2024 | ||
2025 | free_cpumask_var(cs->effective_cpus); | ||
1965 | free_cpumask_var(cs->cpus_allowed); | 2026 | free_cpumask_var(cs->cpus_allowed); |
1966 | kfree(cs); | 2027 | kfree(cs); |
1967 | } | 2028 | } |
1968 | 2029 | ||
2030 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | ||
2031 | { | ||
2032 | mutex_lock(&cpuset_mutex); | ||
2033 | mutex_lock(&callback_mutex); | ||
2034 | |||
2035 | if (cgroup_on_dfl(root_css->cgroup)) { | ||
2036 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | ||
2037 | top_cpuset.mems_allowed = node_possible_map; | ||
2038 | } else { | ||
2039 | cpumask_copy(top_cpuset.cpus_allowed, | ||
2040 | top_cpuset.effective_cpus); | ||
2041 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | ||
2042 | } | ||
2043 | |||
2044 | mutex_unlock(&callback_mutex); | ||
2045 | mutex_unlock(&cpuset_mutex); | ||
2046 | } | ||
2047 | |||
1969 | struct cgroup_subsys cpuset_cgrp_subsys = { | 2048 | struct cgroup_subsys cpuset_cgrp_subsys = { |
1970 | .css_alloc = cpuset_css_alloc, | 2049 | .css_alloc = cpuset_css_alloc, |
1971 | .css_online = cpuset_css_online, | 2050 | .css_online = cpuset_css_online, |
1972 | .css_offline = cpuset_css_offline, | 2051 | .css_offline = cpuset_css_offline, |
1973 | .css_free = cpuset_css_free, | 2052 | .css_free = cpuset_css_free, |
1974 | .can_attach = cpuset_can_attach, | 2053 | .can_attach = cpuset_can_attach, |
1975 | .cancel_attach = cpuset_cancel_attach, | 2054 | .cancel_attach = cpuset_cancel_attach, |
1976 | .attach = cpuset_attach, | 2055 | .attach = cpuset_attach, |
1977 | .base_cftypes = files, | 2056 | .bind = cpuset_bind, |
1978 | .early_init = 1, | 2057 | .legacy_cftypes = files, |
2058 | .early_init = 1, | ||
1979 | }; | 2059 | }; |
1980 | 2060 | ||
1981 | /** | 2061 | /** |
@@ -1990,9 +2070,13 @@ int __init cpuset_init(void) | |||
1990 | 2070 | ||
1991 | if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) | 2071 | if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) |
1992 | BUG(); | 2072 | BUG(); |
2073 | if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) | ||
2074 | BUG(); | ||
1993 | 2075 | ||
1994 | cpumask_setall(top_cpuset.cpus_allowed); | 2076 | cpumask_setall(top_cpuset.cpus_allowed); |
1995 | nodes_setall(top_cpuset.mems_allowed); | 2077 | nodes_setall(top_cpuset.mems_allowed); |
2078 | cpumask_setall(top_cpuset.effective_cpus); | ||
2079 | nodes_setall(top_cpuset.effective_mems); | ||
1996 | 2080 | ||
1997 | fmeter_init(&top_cpuset.fmeter); | 2081 | fmeter_init(&top_cpuset.fmeter); |
1998 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); | 2082 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); |
@@ -2035,6 +2119,66 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
2035 | } | 2119 | } |
2036 | } | 2120 | } |
2037 | 2121 | ||
2122 | static void | ||
2123 | hotplug_update_tasks_legacy(struct cpuset *cs, | ||
2124 | struct cpumask *new_cpus, nodemask_t *new_mems, | ||
2125 | bool cpus_updated, bool mems_updated) | ||
2126 | { | ||
2127 | bool is_empty; | ||
2128 | |||
2129 | mutex_lock(&callback_mutex); | ||
2130 | cpumask_copy(cs->cpus_allowed, new_cpus); | ||
2131 | cpumask_copy(cs->effective_cpus, new_cpus); | ||
2132 | cs->mems_allowed = *new_mems; | ||
2133 | cs->effective_mems = *new_mems; | ||
2134 | mutex_unlock(&callback_mutex); | ||
2135 | |||
2136 | /* | ||
2137 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | ||
2138 | * as the tasks will be migratecd to an ancestor. | ||
2139 | */ | ||
2140 | if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) | ||
2141 | update_tasks_cpumask(cs); | ||
2142 | if (mems_updated && !nodes_empty(cs->mems_allowed)) | ||
2143 | update_tasks_nodemask(cs); | ||
2144 | |||
2145 | is_empty = cpumask_empty(cs->cpus_allowed) || | ||
2146 | nodes_empty(cs->mems_allowed); | ||
2147 | |||
2148 | mutex_unlock(&cpuset_mutex); | ||
2149 | |||
2150 | /* | ||
2151 | * Move tasks to the nearest ancestor with execution resources, | ||
2152 | * This is full cgroup operation which will also call back into | ||
2153 | * cpuset. Should be done outside any lock. | ||
2154 | */ | ||
2155 | if (is_empty) | ||
2156 | remove_tasks_in_empty_cpuset(cs); | ||
2157 | |||
2158 | mutex_lock(&cpuset_mutex); | ||
2159 | } | ||
2160 | |||
2161 | static void | ||
2162 | hotplug_update_tasks(struct cpuset *cs, | ||
2163 | struct cpumask *new_cpus, nodemask_t *new_mems, | ||
2164 | bool cpus_updated, bool mems_updated) | ||
2165 | { | ||
2166 | if (cpumask_empty(new_cpus)) | ||
2167 | cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); | ||
2168 | if (nodes_empty(*new_mems)) | ||
2169 | *new_mems = parent_cs(cs)->effective_mems; | ||
2170 | |||
2171 | mutex_lock(&callback_mutex); | ||
2172 | cpumask_copy(cs->effective_cpus, new_cpus); | ||
2173 | cs->effective_mems = *new_mems; | ||
2174 | mutex_unlock(&callback_mutex); | ||
2175 | |||
2176 | if (cpus_updated) | ||
2177 | update_tasks_cpumask(cs); | ||
2178 | if (mems_updated) | ||
2179 | update_tasks_nodemask(cs); | ||
2180 | } | ||
2181 | |||
2038 | /** | 2182 | /** |
2039 | * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug | 2183 | * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug |
2040 | * @cs: cpuset in interest | 2184 | * @cs: cpuset in interest |
@@ -2045,11 +2189,10 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
2045 | */ | 2189 | */ |
2046 | static void cpuset_hotplug_update_tasks(struct cpuset *cs) | 2190 | static void cpuset_hotplug_update_tasks(struct cpuset *cs) |
2047 | { | 2191 | { |
2048 | static cpumask_t off_cpus; | 2192 | static cpumask_t new_cpus; |
2049 | static nodemask_t off_mems; | 2193 | static nodemask_t new_mems; |
2050 | bool is_empty; | 2194 | bool cpus_updated; |
2051 | bool sane = cgroup_sane_behavior(cs->css.cgroup); | 2195 | bool mems_updated; |
2052 | |||
2053 | retry: | 2196 | retry: |
2054 | wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); | 2197 | wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); |
2055 | 2198 | ||
@@ -2064,51 +2207,20 @@ retry: | |||
2064 | goto retry; | 2207 | goto retry; |
2065 | } | 2208 | } |
2066 | 2209 | ||
2067 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); | 2210 | cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); |
2068 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); | 2211 | nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); |
2069 | |||
2070 | mutex_lock(&callback_mutex); | ||
2071 | cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); | ||
2072 | mutex_unlock(&callback_mutex); | ||
2073 | |||
2074 | /* | ||
2075 | * If sane_behavior flag is set, we need to update tasks' cpumask | ||
2076 | * for empty cpuset to take on ancestor's cpumask. Otherwise, don't | ||
2077 | * call update_tasks_cpumask() if the cpuset becomes empty, as | ||
2078 | * the tasks in it will be migrated to an ancestor. | ||
2079 | */ | ||
2080 | if ((sane && cpumask_empty(cs->cpus_allowed)) || | ||
2081 | (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) | ||
2082 | update_tasks_cpumask(cs); | ||
2083 | 2212 | ||
2084 | mutex_lock(&callback_mutex); | 2213 | cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); |
2085 | nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); | 2214 | mems_updated = !nodes_equal(new_mems, cs->effective_mems); |
2086 | mutex_unlock(&callback_mutex); | ||
2087 | |||
2088 | /* | ||
2089 | * If sane_behavior flag is set, we need to update tasks' nodemask | ||
2090 | * for empty cpuset to take on ancestor's nodemask. Otherwise, don't | ||
2091 | * call update_tasks_nodemask() if the cpuset becomes empty, as | ||
2092 | * the tasks in it will be migratd to an ancestor. | ||
2093 | */ | ||
2094 | if ((sane && nodes_empty(cs->mems_allowed)) || | ||
2095 | (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) | ||
2096 | update_tasks_nodemask(cs); | ||
2097 | 2215 | ||
2098 | is_empty = cpumask_empty(cs->cpus_allowed) || | 2216 | if (cgroup_on_dfl(cs->css.cgroup)) |
2099 | nodes_empty(cs->mems_allowed); | 2217 | hotplug_update_tasks(cs, &new_cpus, &new_mems, |
2218 | cpus_updated, mems_updated); | ||
2219 | else | ||
2220 | hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, | ||
2221 | cpus_updated, mems_updated); | ||
2100 | 2222 | ||
2101 | mutex_unlock(&cpuset_mutex); | 2223 | mutex_unlock(&cpuset_mutex); |
2102 | |||
2103 | /* | ||
2104 | * If sane_behavior flag is set, we'll keep tasks in empty cpusets. | ||
2105 | * | ||
2106 | * Otherwise move tasks to the nearest ancestor with execution | ||
2107 | * resources. This is full cgroup operation which will | ||
2108 | * also call back into cpuset. Should be done outside any lock. | ||
2109 | */ | ||
2110 | if (!sane && is_empty) | ||
2111 | remove_tasks_in_empty_cpuset(cs); | ||
2112 | } | 2224 | } |
2113 | 2225 | ||
2114 | /** | 2226 | /** |
@@ -2132,6 +2244,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2132 | static cpumask_t new_cpus; | 2244 | static cpumask_t new_cpus; |
2133 | static nodemask_t new_mems; | 2245 | static nodemask_t new_mems; |
2134 | bool cpus_updated, mems_updated; | 2246 | bool cpus_updated, mems_updated; |
2247 | bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup); | ||
2135 | 2248 | ||
2136 | mutex_lock(&cpuset_mutex); | 2249 | mutex_lock(&cpuset_mutex); |
2137 | 2250 | ||
@@ -2139,13 +2252,15 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2139 | cpumask_copy(&new_cpus, cpu_active_mask); | 2252 | cpumask_copy(&new_cpus, cpu_active_mask); |
2140 | new_mems = node_states[N_MEMORY]; | 2253 | new_mems = node_states[N_MEMORY]; |
2141 | 2254 | ||
2142 | cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); | 2255 | cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); |
2143 | mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); | 2256 | mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); |
2144 | 2257 | ||
2145 | /* synchronize cpus_allowed to cpu_active_mask */ | 2258 | /* synchronize cpus_allowed to cpu_active_mask */ |
2146 | if (cpus_updated) { | 2259 | if (cpus_updated) { |
2147 | mutex_lock(&callback_mutex); | 2260 | mutex_lock(&callback_mutex); |
2148 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | 2261 | if (!on_dfl) |
2262 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | ||
2263 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | ||
2149 | mutex_unlock(&callback_mutex); | 2264 | mutex_unlock(&callback_mutex); |
2150 | /* we don't mess with cpumasks of tasks in top_cpuset */ | 2265 | /* we don't mess with cpumasks of tasks in top_cpuset */ |
2151 | } | 2266 | } |
@@ -2153,7 +2268,9 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2153 | /* synchronize mems_allowed to N_MEMORY */ | 2268 | /* synchronize mems_allowed to N_MEMORY */ |
2154 | if (mems_updated) { | 2269 | if (mems_updated) { |
2155 | mutex_lock(&callback_mutex); | 2270 | mutex_lock(&callback_mutex); |
2156 | top_cpuset.mems_allowed = new_mems; | 2271 | if (!on_dfl) |
2272 | top_cpuset.mems_allowed = new_mems; | ||
2273 | top_cpuset.effective_mems = new_mems; | ||
2157 | mutex_unlock(&callback_mutex); | 2274 | mutex_unlock(&callback_mutex); |
2158 | update_tasks_nodemask(&top_cpuset); | 2275 | update_tasks_nodemask(&top_cpuset); |
2159 | } | 2276 | } |
@@ -2228,6 +2345,9 @@ void __init cpuset_init_smp(void) | |||
2228 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | 2345 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2229 | top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; | 2346 | top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; |
2230 | 2347 | ||
2348 | cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); | ||
2349 | top_cpuset.effective_mems = node_states[N_MEMORY]; | ||
2350 | |||
2231 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); | 2351 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); |
2232 | } | 2352 | } |
2233 | 2353 | ||
@@ -2244,23 +2364,17 @@ void __init cpuset_init_smp(void) | |||
2244 | 2364 | ||
2245 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | 2365 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
2246 | { | 2366 | { |
2247 | struct cpuset *cpus_cs; | ||
2248 | |||
2249 | mutex_lock(&callback_mutex); | 2367 | mutex_lock(&callback_mutex); |
2250 | rcu_read_lock(); | 2368 | rcu_read_lock(); |
2251 | cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); | 2369 | guarantee_online_cpus(task_cs(tsk), pmask); |
2252 | guarantee_online_cpus(cpus_cs, pmask); | ||
2253 | rcu_read_unlock(); | 2370 | rcu_read_unlock(); |
2254 | mutex_unlock(&callback_mutex); | 2371 | mutex_unlock(&callback_mutex); |
2255 | } | 2372 | } |
2256 | 2373 | ||
2257 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2374 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
2258 | { | 2375 | { |
2259 | struct cpuset *cpus_cs; | ||
2260 | |||
2261 | rcu_read_lock(); | 2376 | rcu_read_lock(); |
2262 | cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); | 2377 | do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); |
2263 | do_set_cpus_allowed(tsk, cpus_cs->cpus_allowed); | ||
2264 | rcu_read_unlock(); | 2378 | rcu_read_unlock(); |
2265 | 2379 | ||
2266 | /* | 2380 | /* |
@@ -2299,13 +2413,11 @@ void cpuset_init_current_mems_allowed(void) | |||
2299 | 2413 | ||
2300 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | 2414 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) |
2301 | { | 2415 | { |
2302 | struct cpuset *mems_cs; | ||
2303 | nodemask_t mask; | 2416 | nodemask_t mask; |
2304 | 2417 | ||
2305 | mutex_lock(&callback_mutex); | 2418 | mutex_lock(&callback_mutex); |
2306 | rcu_read_lock(); | 2419 | rcu_read_lock(); |
2307 | mems_cs = effective_nodemask_cpuset(task_cs(tsk)); | 2420 | guarantee_online_mems(task_cs(tsk), &mask); |
2308 | guarantee_online_mems(mems_cs, &mask); | ||
2309 | rcu_read_unlock(); | 2421 | rcu_read_unlock(); |
2310 | mutex_unlock(&callback_mutex); | 2422 | mutex_unlock(&callback_mutex); |
2311 | 2423 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bc1638b33449..126f7e3f04e7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -8083,7 +8083,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { | |||
8083 | .can_attach = cpu_cgroup_can_attach, | 8083 | .can_attach = cpu_cgroup_can_attach, |
8084 | .attach = cpu_cgroup_attach, | 8084 | .attach = cpu_cgroup_attach, |
8085 | .exit = cpu_cgroup_exit, | 8085 | .exit = cpu_cgroup_exit, |
8086 | .base_cftypes = cpu_files, | 8086 | .legacy_cftypes = cpu_files, |
8087 | .early_init = 1, | 8087 | .early_init = 1, |
8088 | }; | 8088 | }; |
8089 | 8089 | ||
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 9cf350c94ec4..dd7cbb55bbf2 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c | |||
@@ -278,6 +278,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) | |||
278 | struct cgroup_subsys cpuacct_cgrp_subsys = { | 278 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
279 | .css_alloc = cpuacct_css_alloc, | 279 | .css_alloc = cpuacct_css_alloc, |
280 | .css_free = cpuacct_css_free, | 280 | .css_free = cpuacct_css_free, |
281 | .base_cftypes = files, | 281 | .legacy_cftypes = files, |
282 | .early_init = 1, | 282 | .early_init = 1, |
283 | }; | 283 | }; |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 493f758445e7..9aae6f47433f 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
@@ -358,9 +358,8 @@ static void __init __hugetlb_cgroup_file_init(int idx) | |||
358 | cft = &h->cgroup_files[4]; | 358 | cft = &h->cgroup_files[4]; |
359 | memset(cft, 0, sizeof(*cft)); | 359 | memset(cft, 0, sizeof(*cft)); |
360 | 360 | ||
361 | WARN_ON(cgroup_add_cftypes(&hugetlb_cgrp_subsys, h->cgroup_files)); | 361 | WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, |
362 | 362 | h->cgroup_files)); | |
363 | return; | ||
364 | } | 363 | } |
365 | 364 | ||
366 | void __init hugetlb_cgroup_file_init(void) | 365 | void __init hugetlb_cgroup_file_init(void) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1f14a430c656..f009a14918d2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -6007,7 +6007,6 @@ static struct cftype mem_cgroup_files[] = { | |||
6007 | }, | 6007 | }, |
6008 | { | 6008 | { |
6009 | .name = "use_hierarchy", | 6009 | .name = "use_hierarchy", |
6010 | .flags = CFTYPE_INSANE, | ||
6011 | .write_u64 = mem_cgroup_hierarchy_write, | 6010 | .write_u64 = mem_cgroup_hierarchy_write, |
6012 | .read_u64 = mem_cgroup_hierarchy_read, | 6011 | .read_u64 = mem_cgroup_hierarchy_read, |
6013 | }, | 6012 | }, |
@@ -6411,6 +6410,29 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | |||
6411 | __mem_cgroup_free(memcg); | 6410 | __mem_cgroup_free(memcg); |
6412 | } | 6411 | } |
6413 | 6412 | ||
6413 | /** | ||
6414 | * mem_cgroup_css_reset - reset the states of a mem_cgroup | ||
6415 | * @css: the target css | ||
6416 | * | ||
6417 | * Reset the states of the mem_cgroup associated with @css. This is | ||
6418 | * invoked when the userland requests disabling on the default hierarchy | ||
6419 | * but the memcg is pinned through dependency. The memcg should stop | ||
6420 | * applying policies and should revert to the vanilla state as it may be | ||
6421 | * made visible again. | ||
6422 | * | ||
6423 | * The current implementation only resets the essential configurations. | ||
6424 | * This needs to be expanded to cover all the visible parts. | ||
6425 | */ | ||
6426 | static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | ||
6427 | { | ||
6428 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
6429 | |||
6430 | mem_cgroup_resize_limit(memcg, ULLONG_MAX); | ||
6431 | mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX); | ||
6432 | memcg_update_kmem_limit(memcg, ULLONG_MAX); | ||
6433 | res_counter_set_soft_limit(&memcg->res, ULLONG_MAX); | ||
6434 | } | ||
6435 | |||
6414 | #ifdef CONFIG_MMU | 6436 | #ifdef CONFIG_MMU |
6415 | /* Handlers for move charge at task migration. */ | 6437 | /* Handlers for move charge at task migration. */ |
6416 | #define PRECHARGE_COUNT_AT_ONCE 256 | 6438 | #define PRECHARGE_COUNT_AT_ONCE 256 |
@@ -7005,16 +7027,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, | |||
7005 | 7027 | ||
7006 | /* | 7028 | /* |
7007 | * Cgroup retains root cgroups across [un]mount cycles making it necessary | 7029 | * Cgroup retains root cgroups across [un]mount cycles making it necessary |
7008 | * to verify sane_behavior flag on each mount attempt. | 7030 | * to verify whether we're attached to the default hierarchy on each mount |
7031 | * attempt. | ||
7009 | */ | 7032 | */ |
7010 | static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) | 7033 | static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) |
7011 | { | 7034 | { |
7012 | /* | 7035 | /* |
7013 | * use_hierarchy is forced with sane_behavior. cgroup core | 7036 | * use_hierarchy is forced on the default hierarchy. cgroup core |
7014 | * guarantees that @root doesn't have any children, so turning it | 7037 | * guarantees that @root doesn't have any children, so turning it |
7015 | * on for the root memcg is enough. | 7038 | * on for the root memcg is enough. |
7016 | */ | 7039 | */ |
7017 | if (cgroup_sane_behavior(root_css->cgroup)) | 7040 | if (cgroup_on_dfl(root_css->cgroup)) |
7018 | mem_cgroup_from_css(root_css)->use_hierarchy = true; | 7041 | mem_cgroup_from_css(root_css)->use_hierarchy = true; |
7019 | } | 7042 | } |
7020 | 7043 | ||
@@ -7023,11 +7046,12 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
7023 | .css_online = mem_cgroup_css_online, | 7046 | .css_online = mem_cgroup_css_online, |
7024 | .css_offline = mem_cgroup_css_offline, | 7047 | .css_offline = mem_cgroup_css_offline, |
7025 | .css_free = mem_cgroup_css_free, | 7048 | .css_free = mem_cgroup_css_free, |
7049 | .css_reset = mem_cgroup_css_reset, | ||
7026 | .can_attach = mem_cgroup_can_attach, | 7050 | .can_attach = mem_cgroup_can_attach, |
7027 | .cancel_attach = mem_cgroup_cancel_attach, | 7051 | .cancel_attach = mem_cgroup_cancel_attach, |
7028 | .attach = mem_cgroup_move_task, | 7052 | .attach = mem_cgroup_move_task, |
7029 | .bind = mem_cgroup_bind, | 7053 | .bind = mem_cgroup_bind, |
7030 | .base_cftypes = mem_cgroup_files, | 7054 | .legacy_cftypes = mem_cgroup_files, |
7031 | .early_init = 0, | 7055 | .early_init = 0, |
7032 | }; | 7056 | }; |
7033 | 7057 | ||
@@ -7044,7 +7068,8 @@ __setup("swapaccount=", enable_swap_account); | |||
7044 | 7068 | ||
7045 | static void __init memsw_file_init(void) | 7069 | static void __init memsw_file_init(void) |
7046 | { | 7070 | { |
7047 | WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files)); | 7071 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, |
7072 | memsw_cgroup_files)); | ||
7048 | } | 7073 | } |
7049 | 7074 | ||
7050 | static void __init enable_swap_cgroup(void) | 7075 | static void __init enable_swap_cgroup(void) |
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 30d903b19c62..1f2a126f4ffa 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c | |||
@@ -107,5 +107,5 @@ struct cgroup_subsys net_cls_cgrp_subsys = { | |||
107 | .css_online = cgrp_css_online, | 107 | .css_online = cgrp_css_online, |
108 | .css_free = cgrp_css_free, | 108 | .css_free = cgrp_css_free, |
109 | .attach = cgrp_attach, | 109 | .attach = cgrp_attach, |
110 | .base_cftypes = ss_files, | 110 | .legacy_cftypes = ss_files, |
111 | }; | 111 | }; |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2f385b9bccc0..cbd0a199bf52 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -249,7 +249,7 @@ struct cgroup_subsys net_prio_cgrp_subsys = { | |||
249 | .css_online = cgrp_css_online, | 249 | .css_online = cgrp_css_online, |
250 | .css_free = cgrp_css_free, | 250 | .css_free = cgrp_css_free, |
251 | .attach = net_prio_attach, | 251 | .attach = net_prio_attach, |
252 | .base_cftypes = ss_files, | 252 | .legacy_cftypes = ss_files, |
253 | }; | 253 | }; |
254 | 254 | ||
255 | static int netprio_device_event(struct notifier_block *unused, | 255 | static int netprio_device_event(struct notifier_block *unused, |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index f7a2ec3ac584..3af522622fad 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
@@ -222,7 +222,7 @@ static struct cftype tcp_files[] = { | |||
222 | 222 | ||
223 | static int __init tcp_memcontrol_init(void) | 223 | static int __init tcp_memcontrol_init(void) |
224 | { | 224 | { |
225 | WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); | 225 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); |
226 | return 0; | 226 | return 0; |
227 | } | 227 | } |
228 | __initcall(tcp_memcontrol_init); | 228 | __initcall(tcp_memcontrol_init); |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index d9d69e6930ed..188c1d26393b 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -796,7 +796,7 @@ struct cgroup_subsys devices_cgrp_subsys = { | |||
796 | .css_free = devcgroup_css_free, | 796 | .css_free = devcgroup_css_free, |
797 | .css_online = devcgroup_online, | 797 | .css_online = devcgroup_online, |
798 | .css_offline = devcgroup_offline, | 798 | .css_offline = devcgroup_offline, |
799 | .base_cftypes = dev_cgroup_files, | 799 | .legacy_cftypes = dev_cgroup_files, |
800 | }; | 800 | }; |
801 | 801 | ||
802 | /** | 802 | /** |