diff options
-rw-r--r-- | Documentation/cgroup-v2.txt | 185 | ||||
-rw-r--r-- | include/linux/cgroup-defs.h | 12 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 5 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 355 | ||||
-rw-r--r-- | kernel/cgroup/debug.c | 1 | ||||
-rw-r--r-- | kernel/cgroup/pids.c | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 1 |
8 files changed, 522 insertions, 40 deletions
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index f01f831a3b11..cb9ea281ab72 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt | |||
@@ -18,7 +18,9 @@ v1 is available under Documentation/cgroup-v1/. | |||
18 | 1-2. What is cgroup? | 18 | 1-2. What is cgroup? |
19 | 2. Basic Operations | 19 | 2. Basic Operations |
20 | 2-1. Mounting | 20 | 2-1. Mounting |
21 | 2-2. Organizing Processes | 21 | 2-2. Organizing Processes and Threads |
22 | 2-2-1. Processes | ||
23 | 2-2-2. Threads | ||
22 | 2-3. [Un]populated Notification | 24 | 2-3. [Un]populated Notification |
23 | 2-4. Controlling Controllers | 25 | 2-4. Controlling Controllers |
24 | 2-4-1. Enabling and Disabling | 26 | 2-4-1. Enabling and Disabling |
@@ -167,8 +169,11 @@ cgroup v2 currently supports the following mount options. | |||
167 | Delegation section for details. | 169 | Delegation section for details. |
168 | 170 | ||
169 | 171 | ||
170 | Organizing Processes | 172 | Organizing Processes and Threads |
171 | -------------------- | 173 | -------------------------------- |
174 | |||
175 | Processes | ||
176 | ~~~~~~~~~ | ||
172 | 177 | ||
173 | Initially, only the root cgroup exists to which all processes belong. | 178 | Initially, only the root cgroup exists to which all processes belong. |
174 | A child cgroup can be created by creating a sub-directory:: | 179 | A child cgroup can be created by creating a sub-directory:: |
@@ -219,6 +224,104 @@ is removed subsequently, " (deleted)" is appended to the path:: | |||
219 | 0::/test-cgroup/test-cgroup-nested (deleted) | 224 | 0::/test-cgroup/test-cgroup-nested (deleted) |
220 | 225 | ||
221 | 226 | ||
227 | Threads | ||
228 | ~~~~~~~ | ||
229 | |||
230 | cgroup v2 supports thread granularity for a subset of controllers to | ||
231 | support use cases requiring hierarchical resource distribution across | ||
232 | the threads of a group of processes. By default, all threads of a | ||
233 | process belong to the same cgroup, which also serves as the resource | ||
234 | domain to host resource consumptions which are not specific to a | ||
235 | process or thread. The thread mode allows threads to be spread across | ||
236 | a subtree while still maintaining the common resource domain for them. | ||
237 | |||
238 | Controllers which support thread mode are called threaded controllers. | ||
239 | The ones which don't are called domain controllers. | ||
240 | |||
241 | Marking a cgroup threaded makes it join the resource domain of its | ||
242 | parent as a threaded cgroup. The parent may be another threaded | ||
243 | cgroup whose resource domain is further up in the hierarchy. The root | ||
244 | of a threaded subtree, that is, the nearest ancestor which is not | ||
245 | threaded, is called threaded domain or thread root interchangeably and | ||
246 | serves as the resource domain for the entire subtree. | ||
247 | |||
248 | Inside a threaded subtree, threads of a process can be put in | ||
249 | different cgroups and are not subject to the no internal process | ||
250 | constraint - threaded controllers can be enabled on non-leaf cgroups | ||
251 | whether they have threads in them or not. | ||
252 | |||
253 | As the threaded domain cgroup hosts all the domain resource | ||
254 | consumptions of the subtree, it is considered to have internal | ||
255 | resource consumptions whether there are processes in it or not and | ||
256 | can't have populated child cgroups which aren't threaded. Because the | ||
257 | root cgroup is not subject to no internal process constraint, it can | ||
258 | serve both as a threaded domain and a parent to domain cgroups. | ||
259 | |||
260 | The current operation mode or type of the cgroup is shown in the | ||
261 | "cgroup.type" file which indicates whether the cgroup is a normal | ||
262 | domain, a domain which is serving as the domain of a threaded subtree, | ||
263 | or a threaded cgroup. | ||
264 | |||
265 | On creation, a cgroup is always a domain cgroup and can be made | ||
266 | threaded by writing "threaded" to the "cgroup.type" file. The | ||
267 | operation is single direction:: | ||
268 | |||
269 | # echo threaded > cgroup.type | ||
270 | |||
271 | Once threaded, the cgroup can't be made a domain again. To enable the | ||
272 | thread mode, the following conditions must be met. | ||
273 | |||
274 | - As the cgroup will join the parent's resource domain. The parent | ||
275 | must either be a valid (threaded) domain or a threaded cgroup. | ||
276 | |||
277 | - The cgroup must be empty. No enabled controllers, child cgroups or | ||
278 | processes. | ||
279 | |||
280 | Topology-wise, a cgroup can be in an invalid state. Please consider | ||
281 | the following toplogy:: | ||
282 | |||
283 | A (threaded domain) - B (threaded) - C (domain, just created) | ||
284 | |||
285 | C is created as a domain but isn't connected to a parent which can | ||
286 | host child domains. C can't be used until it is turned into a | ||
287 | threaded cgroup. "cgroup.type" file will report "domain (invalid)" in | ||
288 | these cases. Operations which fail due to invalid topology use | ||
289 | EOPNOTSUPP as the errno. | ||
290 | |||
291 | A domain cgroup is turned into a threaded domain when one of its child | ||
292 | cgroup becomes threaded or threaded controllers are enabled in the | ||
293 | "cgroup.subtree_control" file while there are processes in the cgroup. | ||
294 | A threaded domain reverts to a normal domain when the conditions | ||
295 | clear. | ||
296 | |||
297 | When read, "cgroup.threads" contains the list of the thread IDs of all | ||
298 | threads in the cgroup. Except that the operations are per-thread | ||
299 | instead of per-process, "cgroup.threads" has the same format and | ||
300 | behaves the same way as "cgroup.procs". While "cgroup.threads" can be | ||
301 | written to in any cgroup, as it can only move threads inside the same | ||
302 | threaded domain, its operations are confined inside each threaded | ||
303 | subtree. | ||
304 | |||
305 | The threaded domain cgroup serves as the resource domain for the whole | ||
306 | subtree, and, while the threads can be scattered across the subtree, | ||
307 | all the processes are considered to be in the threaded domain cgroup. | ||
308 | "cgroup.procs" in a threaded domain cgroup contains the PIDs of all | ||
309 | processes in the subtree and is not readable in the subtree proper. | ||
310 | However, "cgroup.procs" can be written to from anywhere in the subtree | ||
311 | to migrate all threads of the matching process to the cgroup. | ||
312 | |||
313 | Only threaded controllers can be enabled in a threaded subtree. When | ||
314 | a threaded controller is enabled inside a threaded subtree, it only | ||
315 | accounts for and controls resource consumptions associated with the | ||
316 | threads in the cgroup and its descendants. All consumptions which | ||
317 | aren't tied to a specific thread belong to the threaded domain cgroup. | ||
318 | |||
319 | Because a threaded subtree is exempt from no internal process | ||
320 | constraint, a threaded controller must be able to handle competition | ||
321 | between threads in a non-leaf cgroup and its child cgroups. Each | ||
322 | threaded controller defines how such competitions are handled. | ||
323 | |||
324 | |||
222 | [Un]populated Notification | 325 | [Un]populated Notification |
223 | -------------------------- | 326 | -------------------------- |
224 | 327 | ||
@@ -302,15 +405,15 @@ disabled if one or more children have it enabled. | |||
302 | No Internal Process Constraint | 405 | No Internal Process Constraint |
303 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 406 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
304 | 407 | ||
305 | Non-root cgroups can only distribute resources to their children when | 408 | Non-root cgroups can distribute domain resources to their children |
306 | they don't have any processes of their own. In other words, only | 409 | only when they don't have any processes of their own. In other words, |
307 | cgroups which don't contain any processes can have controllers enabled | 410 | only domain cgroups which don't contain any processes can have domain |
308 | in their "cgroup.subtree_control" files. | 411 | controllers enabled in their "cgroup.subtree_control" files. |
309 | 412 | ||
310 | This guarantees that, when a controller is looking at the part of the | 413 | This guarantees that, when a domain controller is looking at the part |
311 | hierarchy which has it enabled, processes are always only on the | 414 | of the hierarchy which has it enabled, processes are always only on |
312 | leaves. This rules out situations where child cgroups compete against | 415 | the leaves. This rules out situations where child cgroups compete |
313 | internal processes of the parent. | 416 | against internal processes of the parent. |
314 | 417 | ||
315 | The root cgroup is exempt from this restriction. Root contains | 418 | The root cgroup is exempt from this restriction. Root contains |
316 | processes and anonymous resource consumption which can't be associated | 419 | processes and anonymous resource consumption which can't be associated |
@@ -334,10 +437,10 @@ Model of Delegation | |||
334 | ~~~~~~~~~~~~~~~~~~~ | 437 | ~~~~~~~~~~~~~~~~~~~ |
335 | 438 | ||
336 | A cgroup can be delegated in two ways. First, to a less privileged | 439 | A cgroup can be delegated in two ways. First, to a less privileged |
337 | user by granting write access of the directory and its "cgroup.procs" | 440 | user by granting write access of the directory and its "cgroup.procs", |
338 | and "cgroup.subtree_control" files to the user. Second, if the | 441 | "cgroup.threads" and "cgroup.subtree_control" files to the user. |
339 | "nsdelegate" mount option is set, automatically to a cgroup namespace | 442 | Second, if the "nsdelegate" mount option is set, automatically to a |
340 | on namespace creation. | 443 | cgroup namespace on namespace creation. |
341 | 444 | ||
342 | Because the resource control interface files in a given directory | 445 | Because the resource control interface files in a given directory |
343 | control the distribution of the parent's resources, the delegatee | 446 | control the distribution of the parent's resources, the delegatee |
@@ -644,6 +747,29 @@ Core Interface Files | |||
644 | 747 | ||
645 | All cgroup core files are prefixed with "cgroup." | 748 | All cgroup core files are prefixed with "cgroup." |
646 | 749 | ||
750 | cgroup.type | ||
751 | |||
752 | A read-write single value file which exists on non-root | ||
753 | cgroups. | ||
754 | |||
755 | When read, it indicates the current type of the cgroup, which | ||
756 | can be one of the following values. | ||
757 | |||
758 | - "domain" : A normal valid domain cgroup. | ||
759 | |||
760 | - "domain threaded" : A threaded domain cgroup which is | ||
761 | serving as the root of a threaded subtree. | ||
762 | |||
763 | - "domain invalid" : A cgroup which is in an invalid state. | ||
764 | It can't be populated or have controllers enabled. It may | ||
765 | be allowed to become a threaded cgroup. | ||
766 | |||
767 | - "threaded" : A threaded cgroup which is a member of a | ||
768 | threaded subtree. | ||
769 | |||
770 | A cgroup can be turned into a threaded cgroup by writing | ||
771 | "threaded" to this file. | ||
772 | |||
647 | cgroup.procs | 773 | cgroup.procs |
648 | A read-write new-line separated values file which exists on | 774 | A read-write new-line separated values file which exists on |
649 | all cgroups. | 775 | all cgroups. |
@@ -666,6 +792,35 @@ All cgroup core files are prefixed with "cgroup." | |||
666 | When delegating a sub-hierarchy, write access to this file | 792 | When delegating a sub-hierarchy, write access to this file |
667 | should be granted along with the containing directory. | 793 | should be granted along with the containing directory. |
668 | 794 | ||
795 | In a threaded cgroup, reading this file fails with EOPNOTSUPP | ||
796 | as all the processes belong to the thread root. Writing is | ||
797 | supported and moves every thread of the process to the cgroup. | ||
798 | |||
799 | cgroup.threads | ||
800 | A read-write new-line separated values file which exists on | ||
801 | all cgroups. | ||
802 | |||
803 | When read, it lists the TIDs of all threads which belong to | ||
804 | the cgroup one-per-line. The TIDs are not ordered and the | ||
805 | same TID may show up more than once if the thread got moved to | ||
806 | another cgroup and then back or the TID got recycled while | ||
807 | reading. | ||
808 | |||
809 | A TID can be written to migrate the thread associated with the | ||
810 | TID to the cgroup. The writer should match all of the | ||
811 | following conditions. | ||
812 | |||
813 | - It must have write access to the "cgroup.threads" file. | ||
814 | |||
815 | - The cgroup that the thread is currently in must be in the | ||
816 | same resource domain as the destination cgroup. | ||
817 | |||
818 | - It must have write access to the "cgroup.procs" file of the | ||
819 | common ancestor of the source and destination cgroups. | ||
820 | |||
821 | When delegating a sub-hierarchy, write access to this file | ||
822 | should be granted along with the containing directory. | ||
823 | |||
669 | cgroup.controllers | 824 | cgroup.controllers |
670 | A read-only space separated values file which exists on all | 825 | A read-only space separated values file which exists on all |
671 | cgroups. | 826 | cgroups. |
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 651c4363c85e..9d741959f218 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -522,6 +522,18 @@ struct cgroup_subsys { | |||
522 | bool implicit_on_dfl:1; | 522 | bool implicit_on_dfl:1; |
523 | 523 | ||
524 | /* | 524 | /* |
525 | * If %true, the controller, supports threaded mode on the default | ||
526 | * hierarchy. In a threaded subtree, both process granularity and | ||
527 | * no-internal-process constraint are ignored and a threaded | ||
528 | * controllers should be able to handle that. | ||
529 | * | ||
530 | * Note that as an implicit controller is automatically enabled on | ||
531 | * all cgroups on the default hierarchy, it should also be | ||
532 | * threaded. implicit && !threaded is not supported. | ||
533 | */ | ||
534 | bool threaded:1; | ||
535 | |||
536 | /* | ||
525 | * If %false, this subsystem is properly hierarchical - | 537 | * If %false, this subsystem is properly hierarchical - |
526 | * configuration, resource accounting and restriction on a parent | 538 | * configuration, resource accounting and restriction on a parent |
527 | * cgroup cover those of its children. If %true, hierarchy support | 539 | * cgroup cover those of its children. If %true, hierarchy support |
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 0e81c6109e91..f10eb19ddf04 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
@@ -170,7 +170,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, | |||
170 | struct cgroup_root *root, unsigned long magic, | 170 | struct cgroup_root *root, unsigned long magic, |
171 | struct cgroup_namespace *ns); | 171 | struct cgroup_namespace *ns); |
172 | 172 | ||
173 | bool cgroup_may_migrate_to(struct cgroup *dst_cgrp); | 173 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); |
174 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); | 174 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); |
175 | void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, | 175 | void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, |
176 | struct cgroup_mgctx *mgctx); | 176 | struct cgroup_mgctx *mgctx); |
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 167aaab04bf9..f0e8601b13cb 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c | |||
@@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | |||
99 | if (cgroup_on_dfl(to)) | 99 | if (cgroup_on_dfl(to)) |
100 | return -EINVAL; | 100 | return -EINVAL; |
101 | 101 | ||
102 | if (!cgroup_may_migrate_to(to)) | 102 | ret = cgroup_migrate_vet_dst(to); |
103 | return -EBUSY; | 103 | if (ret) |
104 | return ret; | ||
104 | 105 | ||
105 | mutex_lock(&cgroup_mutex); | 106 | mutex_lock(&cgroup_mutex); |
106 | 107 | ||
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a1d59af274a9..c396e701c206 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
@@ -162,6 +162,9 @@ static u16 cgrp_dfl_inhibit_ss_mask; | |||
162 | /* some controllers are implicitly enabled on the default hierarchy */ | 162 | /* some controllers are implicitly enabled on the default hierarchy */ |
163 | static u16 cgrp_dfl_implicit_ss_mask; | 163 | static u16 cgrp_dfl_implicit_ss_mask; |
164 | 164 | ||
165 | /* some controllers can be threaded on the default hierarchy */ | ||
166 | static u16 cgrp_dfl_threaded_ss_mask; | ||
167 | |||
165 | /* The list of hierarchy roots */ | 168 | /* The list of hierarchy roots */ |
166 | LIST_HEAD(cgroup_roots); | 169 | LIST_HEAD(cgroup_roots); |
167 | static int cgroup_root_count; | 170 | static int cgroup_root_count; |
@@ -335,14 +338,93 @@ static bool cgroup_is_threaded(struct cgroup *cgrp) | |||
335 | return cgrp->dom_cgrp != cgrp; | 338 | return cgrp->dom_cgrp != cgrp; |
336 | } | 339 | } |
337 | 340 | ||
341 | /* can @cgrp host both domain and threaded children? */ | ||
342 | static bool cgroup_is_mixable(struct cgroup *cgrp) | ||
343 | { | ||
344 | /* | ||
345 | * Root isn't under domain level resource control exempting it from | ||
346 | * the no-internal-process constraint, so it can serve as a thread | ||
347 | * root and a parent of resource domains at the same time. | ||
348 | */ | ||
349 | return !cgroup_parent(cgrp); | ||
350 | } | ||
351 | |||
352 | /* can @cgrp become a thread root? should always be true for a thread root */ | ||
353 | static bool cgroup_can_be_thread_root(struct cgroup *cgrp) | ||
354 | { | ||
355 | /* mixables don't care */ | ||
356 | if (cgroup_is_mixable(cgrp)) | ||
357 | return true; | ||
358 | |||
359 | /* domain roots can't be nested under threaded */ | ||
360 | if (cgroup_is_threaded(cgrp)) | ||
361 | return false; | ||
362 | |||
363 | /* can only have either domain or threaded children */ | ||
364 | if (cgrp->nr_populated_domain_children) | ||
365 | return false; | ||
366 | |||
367 | /* and no domain controllers can be enabled */ | ||
368 | if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask) | ||
369 | return false; | ||
370 | |||
371 | return true; | ||
372 | } | ||
373 | |||
374 | /* is @cgrp root of a threaded subtree? */ | ||
375 | static bool cgroup_is_thread_root(struct cgroup *cgrp) | ||
376 | { | ||
377 | /* thread root should be a domain */ | ||
378 | if (cgroup_is_threaded(cgrp)) | ||
379 | return false; | ||
380 | |||
381 | /* a domain w/ threaded children is a thread root */ | ||
382 | if (cgrp->nr_threaded_children) | ||
383 | return true; | ||
384 | |||
385 | /* | ||
386 | * A domain which has tasks and explicit threaded controllers | ||
387 | * enabled is a thread root. | ||
388 | */ | ||
389 | if (cgroup_has_tasks(cgrp) && | ||
390 | (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask)) | ||
391 | return true; | ||
392 | |||
393 | return false; | ||
394 | } | ||
395 | |||
396 | /* a domain which isn't connected to the root w/o brekage can't be used */ | ||
397 | static bool cgroup_is_valid_domain(struct cgroup *cgrp) | ||
398 | { | ||
399 | /* the cgroup itself can be a thread root */ | ||
400 | if (cgroup_is_threaded(cgrp)) | ||
401 | return false; | ||
402 | |||
403 | /* but the ancestors can't be unless mixable */ | ||
404 | while ((cgrp = cgroup_parent(cgrp))) { | ||
405 | if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp)) | ||
406 | return false; | ||
407 | if (cgroup_is_threaded(cgrp)) | ||
408 | return false; | ||
409 | } | ||
410 | |||
411 | return true; | ||
412 | } | ||
413 | |||
338 | /* subsystems visibly enabled on a cgroup */ | 414 | /* subsystems visibly enabled on a cgroup */ |
339 | static u16 cgroup_control(struct cgroup *cgrp) | 415 | static u16 cgroup_control(struct cgroup *cgrp) |
340 | { | 416 | { |
341 | struct cgroup *parent = cgroup_parent(cgrp); | 417 | struct cgroup *parent = cgroup_parent(cgrp); |
342 | u16 root_ss_mask = cgrp->root->subsys_mask; | 418 | u16 root_ss_mask = cgrp->root->subsys_mask; |
343 | 419 | ||
344 | if (parent) | 420 | if (parent) { |
345 | return parent->subtree_control; | 421 | u16 ss_mask = parent->subtree_control; |
422 | |||
423 | /* threaded cgroups can only have threaded controllers */ | ||
424 | if (cgroup_is_threaded(cgrp)) | ||
425 | ss_mask &= cgrp_dfl_threaded_ss_mask; | ||
426 | return ss_mask; | ||
427 | } | ||
346 | 428 | ||
347 | if (cgroup_on_dfl(cgrp)) | 429 | if (cgroup_on_dfl(cgrp)) |
348 | root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | | 430 | root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | |
@@ -355,8 +437,14 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp) | |||
355 | { | 437 | { |
356 | struct cgroup *parent = cgroup_parent(cgrp); | 438 | struct cgroup *parent = cgroup_parent(cgrp); |
357 | 439 | ||
358 | if (parent) | 440 | if (parent) { |
359 | return parent->subtree_ss_mask; | 441 | u16 ss_mask = parent->subtree_ss_mask; |
442 | |||
443 | /* threaded cgroups can only have threaded controllers */ | ||
444 | if (cgroup_is_threaded(cgrp)) | ||
445 | ss_mask &= cgrp_dfl_threaded_ss_mask; | ||
446 | return ss_mask; | ||
447 | } | ||
360 | 448 | ||
361 | return cgrp->root->subsys_mask; | 449 | return cgrp->root->subsys_mask; |
362 | } | 450 | } |
@@ -2237,17 +2325,40 @@ out_release_tset: | |||
2237 | } | 2325 | } |
2238 | 2326 | ||
2239 | /** | 2327 | /** |
2240 | * cgroup_may_migrate_to - verify whether a cgroup can be migration destination | 2328 | * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination |
2241 | * @dst_cgrp: destination cgroup to test | 2329 | * @dst_cgrp: destination cgroup to test |
2242 | * | 2330 | * |
2243 | * On the default hierarchy, except for the root, subtree_control must be | 2331 | * On the default hierarchy, except for the mixable, (possible) thread root |
2244 | * zero for migration destination cgroups with tasks so that child cgroups | 2332 | * and threaded cgroups, subtree_control must be zero for migration |
2245 | * don't compete against tasks. | 2333 | * destination cgroups with tasks so that child cgroups don't compete |
2334 | * against tasks. | ||
2246 | */ | 2335 | */ |
2247 | bool cgroup_may_migrate_to(struct cgroup *dst_cgrp) | 2336 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) |
2248 | { | 2337 | { |
2249 | return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) || | 2338 | /* v1 doesn't have any restriction */ |
2250 | !dst_cgrp->subtree_control; | 2339 | if (!cgroup_on_dfl(dst_cgrp)) |
2340 | return 0; | ||
2341 | |||
2342 | /* verify @dst_cgrp can host resources */ | ||
2343 | if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp)) | ||
2344 | return -EOPNOTSUPP; | ||
2345 | |||
2346 | /* mixables don't care */ | ||
2347 | if (cgroup_is_mixable(dst_cgrp)) | ||
2348 | return 0; | ||
2349 | |||
2350 | /* | ||
2351 | * If @dst_cgrp is already or can become a thread root or is | ||
2352 | * threaded, it doesn't matter. | ||
2353 | */ | ||
2354 | if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp)) | ||
2355 | return 0; | ||
2356 | |||
2357 | /* apply no-internal-process constraint */ | ||
2358 | if (dst_cgrp->subtree_control) | ||
2359 | return -EBUSY; | ||
2360 | |||
2361 | return 0; | ||
2251 | } | 2362 | } |
2252 | 2363 | ||
2253 | /** | 2364 | /** |
@@ -2452,8 +2563,9 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, | |||
2452 | struct task_struct *task; | 2563 | struct task_struct *task; |
2453 | int ret; | 2564 | int ret; |
2454 | 2565 | ||
2455 | if (!cgroup_may_migrate_to(dst_cgrp)) | 2566 | ret = cgroup_migrate_vet_dst(dst_cgrp); |
2456 | return -EBUSY; | 2567 | if (ret) |
2568 | return ret; | ||
2457 | 2569 | ||
2458 | /* look up all src csets */ | 2570 | /* look up all src csets */ |
2459 | spin_lock_irq(&css_set_lock); | 2571 | spin_lock_irq(&css_set_lock); |
@@ -2881,6 +2993,46 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret) | |||
2881 | cgroup_apply_control_disable(cgrp); | 2993 | cgroup_apply_control_disable(cgrp); |
2882 | } | 2994 | } |
2883 | 2995 | ||
2996 | static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable) | ||
2997 | { | ||
2998 | u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask; | ||
2999 | |||
3000 | /* if nothing is getting enabled, nothing to worry about */ | ||
3001 | if (!enable) | ||
3002 | return 0; | ||
3003 | |||
3004 | /* can @cgrp host any resources? */ | ||
3005 | if (!cgroup_is_valid_domain(cgrp->dom_cgrp)) | ||
3006 | return -EOPNOTSUPP; | ||
3007 | |||
3008 | /* mixables don't care */ | ||
3009 | if (cgroup_is_mixable(cgrp)) | ||
3010 | return 0; | ||
3011 | |||
3012 | if (domain_enable) { | ||
3013 | /* can't enable domain controllers inside a thread subtree */ | ||
3014 | if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp)) | ||
3015 | return -EOPNOTSUPP; | ||
3016 | } else { | ||
3017 | /* | ||
3018 | * Threaded controllers can handle internal competitions | ||
3019 | * and are always allowed inside a (prospective) thread | ||
3020 | * subtree. | ||
3021 | */ | ||
3022 | if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp)) | ||
3023 | return 0; | ||
3024 | } | ||
3025 | |||
3026 | /* | ||
3027 | * Controllers can't be enabled for a cgroup with tasks to avoid | ||
3028 | * child cgroups competing against tasks. | ||
3029 | */ | ||
3030 | if (cgroup_has_tasks(cgrp)) | ||
3031 | return -EBUSY; | ||
3032 | |||
3033 | return 0; | ||
3034 | } | ||
3035 | |||
2884 | /* change the enabled child controllers for a cgroup in the default hierarchy */ | 3036 | /* change the enabled child controllers for a cgroup in the default hierarchy */ |
2885 | static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | 3037 | static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, |
2886 | char *buf, size_t nbytes, | 3038 | char *buf, size_t nbytes, |
@@ -2956,14 +3108,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2956 | goto out_unlock; | 3108 | goto out_unlock; |
2957 | } | 3109 | } |
2958 | 3110 | ||
2959 | /* | 3111 | ret = cgroup_vet_subtree_control_enable(cgrp, enable); |
2960 | * Except for the root, subtree_control must be zero for a cgroup | 3112 | if (ret) |
2961 | * with tasks so that child cgroups don't compete against tasks. | ||
2962 | */ | ||
2963 | if (enable && cgroup_parent(cgrp) && cgroup_has_tasks(cgrp)) { | ||
2964 | ret = -EBUSY; | ||
2965 | goto out_unlock; | 3113 | goto out_unlock; |
2966 | } | ||
2967 | 3114 | ||
2968 | /* save and update control masks and prepare csses */ | 3115 | /* save and update control masks and prepare csses */ |
2969 | cgroup_save_control(cgrp); | 3116 | cgroup_save_control(cgrp); |
@@ -2982,6 +3129,84 @@ out_unlock: | |||
2982 | return ret ?: nbytes; | 3129 | return ret ?: nbytes; |
2983 | } | 3130 | } |
2984 | 3131 | ||
3132 | static int cgroup_enable_threaded(struct cgroup *cgrp) | ||
3133 | { | ||
3134 | struct cgroup *parent = cgroup_parent(cgrp); | ||
3135 | struct cgroup *dom_cgrp = parent->dom_cgrp; | ||
3136 | int ret; | ||
3137 | |||
3138 | lockdep_assert_held(&cgroup_mutex); | ||
3139 | |||
3140 | /* noop if already threaded */ | ||
3141 | if (cgroup_is_threaded(cgrp)) | ||
3142 | return 0; | ||
3143 | |||
3144 | /* we're joining the parent's domain, ensure its validity */ | ||
3145 | if (!cgroup_is_valid_domain(dom_cgrp) || | ||
3146 | !cgroup_can_be_thread_root(dom_cgrp)) | ||
3147 | return -EOPNOTSUPP; | ||
3148 | |||
3149 | /* | ||
3150 | * Allow enabling thread mode only on empty cgroups to avoid | ||
3151 | * implicit migrations and recursive operations. | ||
3152 | */ | ||
3153 | if (cgroup_has_tasks(cgrp) || css_has_online_children(&cgrp->self)) | ||
3154 | return -EBUSY; | ||
3155 | |||
3156 | /* | ||
3157 | * The following shouldn't cause actual migrations and should | ||
3158 | * always succeed. | ||
3159 | */ | ||
3160 | cgroup_save_control(cgrp); | ||
3161 | |||
3162 | cgrp->dom_cgrp = dom_cgrp; | ||
3163 | ret = cgroup_apply_control(cgrp); | ||
3164 | if (!ret) | ||
3165 | parent->nr_threaded_children++; | ||
3166 | else | ||
3167 | cgrp->dom_cgrp = cgrp; | ||
3168 | |||
3169 | cgroup_finalize_control(cgrp, ret); | ||
3170 | return ret; | ||
3171 | } | ||
3172 | |||
3173 | static int cgroup_type_show(struct seq_file *seq, void *v) | ||
3174 | { | ||
3175 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
3176 | |||
3177 | if (cgroup_is_threaded(cgrp)) | ||
3178 | seq_puts(seq, "threaded\n"); | ||
3179 | else if (!cgroup_is_valid_domain(cgrp)) | ||
3180 | seq_puts(seq, "domain invalid\n"); | ||
3181 | else if (cgroup_is_thread_root(cgrp)) | ||
3182 | seq_puts(seq, "domain threaded\n"); | ||
3183 | else | ||
3184 | seq_puts(seq, "domain\n"); | ||
3185 | |||
3186 | return 0; | ||
3187 | } | ||
3188 | |||
3189 | static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, | ||
3190 | size_t nbytes, loff_t off) | ||
3191 | { | ||
3192 | struct cgroup *cgrp; | ||
3193 | int ret; | ||
3194 | |||
3195 | /* only switching to threaded mode is supported */ | ||
3196 | if (strcmp(strstrip(buf), "threaded")) | ||
3197 | return -EINVAL; | ||
3198 | |||
3199 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
3200 | if (!cgrp) | ||
3201 | return -ENOENT; | ||
3202 | |||
3203 | /* threaded can only be enabled */ | ||
3204 | ret = cgroup_enable_threaded(cgrp); | ||
3205 | |||
3206 | cgroup_kn_unlock(of->kn); | ||
3207 | return ret ?: nbytes; | ||
3208 | } | ||
3209 | |||
2985 | static int cgroup_events_show(struct seq_file *seq, void *v) | 3210 | static int cgroup_events_show(struct seq_file *seq, void *v) |
2986 | { | 3211 | { |
2987 | seq_printf(seq, "populated %d\n", | 3212 | seq_printf(seq, "populated %d\n", |
@@ -3867,12 +4092,12 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) | |||
3867 | return css_task_iter_next(it); | 4092 | return css_task_iter_next(it); |
3868 | } | 4093 | } |
3869 | 4094 | ||
3870 | static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | 4095 | static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, |
4096 | unsigned int iter_flags) | ||
3871 | { | 4097 | { |
3872 | struct kernfs_open_file *of = s->private; | 4098 | struct kernfs_open_file *of = s->private; |
3873 | struct cgroup *cgrp = seq_css(s)->cgroup; | 4099 | struct cgroup *cgrp = seq_css(s)->cgroup; |
3874 | struct css_task_iter *it = of->priv; | 4100 | struct css_task_iter *it = of->priv; |
3875 | unsigned iter_flags = CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED; | ||
3876 | 4101 | ||
3877 | /* | 4102 | /* |
3878 | * When a seq_file is seeked, it's always traversed sequentially | 4103 | * When a seq_file is seeked, it's always traversed sequentially |
@@ -3895,6 +4120,23 @@ static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | |||
3895 | return cgroup_procs_next(s, NULL, NULL); | 4120 | return cgroup_procs_next(s, NULL, NULL); |
3896 | } | 4121 | } |
3897 | 4122 | ||
4123 | static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | ||
4124 | { | ||
4125 | struct cgroup *cgrp = seq_css(s)->cgroup; | ||
4126 | |||
4127 | /* | ||
4128 | * All processes of a threaded subtree belong to the domain cgroup | ||
4129 | * of the subtree. Only threads can be distributed across the | ||
4130 | * subtree. Reject reads on cgroup.procs in the subtree proper. | ||
4131 | * They're always empty anyway. | ||
4132 | */ | ||
4133 | if (cgroup_is_threaded(cgrp)) | ||
4134 | return ERR_PTR(-EOPNOTSUPP); | ||
4135 | |||
4136 | return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS | | ||
4137 | CSS_TASK_ITER_THREADED); | ||
4138 | } | ||
4139 | |||
3898 | static int cgroup_procs_show(struct seq_file *s, void *v) | 4140 | static int cgroup_procs_show(struct seq_file *s, void *v) |
3899 | { | 4141 | { |
3900 | seq_printf(s, "%d\n", task_pid_vnr(v)); | 4142 | seq_printf(s, "%d\n", task_pid_vnr(v)); |
@@ -3974,9 +4216,64 @@ out_unlock: | |||
3974 | return ret ?: nbytes; | 4216 | return ret ?: nbytes; |
3975 | } | 4217 | } |
3976 | 4218 | ||
4219 | static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) | ||
4220 | { | ||
4221 | return __cgroup_procs_start(s, pos, 0); | ||
4222 | } | ||
4223 | |||
4224 | static ssize_t cgroup_threads_write(struct kernfs_open_file *of, | ||
4225 | char *buf, size_t nbytes, loff_t off) | ||
4226 | { | ||
4227 | struct cgroup *src_cgrp, *dst_cgrp; | ||
4228 | struct task_struct *task; | ||
4229 | ssize_t ret; | ||
4230 | |||
4231 | buf = strstrip(buf); | ||
4232 | |||
4233 | dst_cgrp = cgroup_kn_lock_live(of->kn, false); | ||
4234 | if (!dst_cgrp) | ||
4235 | return -ENODEV; | ||
4236 | |||
4237 | task = cgroup_procs_write_start(buf, false); | ||
4238 | ret = PTR_ERR_OR_ZERO(task); | ||
4239 | if (ret) | ||
4240 | goto out_unlock; | ||
4241 | |||
4242 | /* find the source cgroup */ | ||
4243 | spin_lock_irq(&css_set_lock); | ||
4244 | src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
4245 | spin_unlock_irq(&css_set_lock); | ||
4246 | |||
4247 | /* thread migrations follow the cgroup.procs delegation rule */ | ||
4248 | ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, | ||
4249 | of->file->f_path.dentry->d_sb); | ||
4250 | if (ret) | ||
4251 | goto out_finish; | ||
4252 | |||
4253 | /* and must be contained in the same domain */ | ||
4254 | ret = -EOPNOTSUPP; | ||
4255 | if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp) | ||
4256 | goto out_finish; | ||
4257 | |||
4258 | ret = cgroup_attach_task(dst_cgrp, task, false); | ||
4259 | |||
4260 | out_finish: | ||
4261 | cgroup_procs_write_finish(task); | ||
4262 | out_unlock: | ||
4263 | cgroup_kn_unlock(of->kn); | ||
4264 | |||
4265 | return ret ?: nbytes; | ||
4266 | } | ||
4267 | |||
3977 | /* cgroup core interface files for the default hierarchy */ | 4268 | /* cgroup core interface files for the default hierarchy */ |
3978 | static struct cftype cgroup_base_files[] = { | 4269 | static struct cftype cgroup_base_files[] = { |
3979 | { | 4270 | { |
4271 | .name = "cgroup.type", | ||
4272 | .flags = CFTYPE_NOT_ON_ROOT, | ||
4273 | .seq_show = cgroup_type_show, | ||
4274 | .write = cgroup_type_write, | ||
4275 | }, | ||
4276 | { | ||
3980 | .name = "cgroup.procs", | 4277 | .name = "cgroup.procs", |
3981 | .flags = CFTYPE_NS_DELEGATABLE, | 4278 | .flags = CFTYPE_NS_DELEGATABLE, |
3982 | .file_offset = offsetof(struct cgroup, procs_file), | 4279 | .file_offset = offsetof(struct cgroup, procs_file), |
@@ -3987,6 +4284,14 @@ static struct cftype cgroup_base_files[] = { | |||
3987 | .write = cgroup_procs_write, | 4284 | .write = cgroup_procs_write, |
3988 | }, | 4285 | }, |
3989 | { | 4286 | { |
4287 | .name = "cgroup.threads", | ||
4288 | .release = cgroup_procs_release, | ||
4289 | .seq_start = cgroup_threads_start, | ||
4290 | .seq_next = cgroup_procs_next, | ||
4291 | .seq_show = cgroup_procs_show, | ||
4292 | .write = cgroup_threads_write, | ||
4293 | }, | ||
4294 | { | ||
3990 | .name = "cgroup.controllers", | 4295 | .name = "cgroup.controllers", |
3991 | .seq_show = cgroup_controllers_show, | 4296 | .seq_show = cgroup_controllers_show, |
3992 | }, | 4297 | }, |
@@ -4753,11 +5058,17 @@ int __init cgroup_init(void) | |||
4753 | 5058 | ||
4754 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; | 5059 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; |
4755 | 5060 | ||
5061 | /* implicit controllers must be threaded too */ | ||
5062 | WARN_ON(ss->implicit_on_dfl && !ss->threaded); | ||
5063 | |||
4756 | if (ss->implicit_on_dfl) | 5064 | if (ss->implicit_on_dfl) |
4757 | cgrp_dfl_implicit_ss_mask |= 1 << ss->id; | 5065 | cgrp_dfl_implicit_ss_mask |= 1 << ss->id; |
4758 | else if (!ss->dfl_cftypes) | 5066 | else if (!ss->dfl_cftypes) |
4759 | cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; | 5067 | cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; |
4760 | 5068 | ||
5069 | if (ss->threaded) | ||
5070 | cgrp_dfl_threaded_ss_mask |= 1 << ss->id; | ||
5071 | |||
4761 | if (ss->dfl_cftypes == ss->legacy_cftypes) { | 5072 | if (ss->dfl_cftypes == ss->legacy_cftypes) { |
4762 | WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); | 5073 | WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); |
4763 | } else { | 5074 | } else { |
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index dac46af22782..787a242fa69d 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c | |||
@@ -352,6 +352,7 @@ static int __init enable_cgroup_debug(char *str) | |||
352 | { | 352 | { |
353 | debug_cgrp_subsys.dfl_cftypes = debug_files; | 353 | debug_cgrp_subsys.dfl_cftypes = debug_files; |
354 | debug_cgrp_subsys.implicit_on_dfl = true; | 354 | debug_cgrp_subsys.implicit_on_dfl = true; |
355 | debug_cgrp_subsys.threaded = true; | ||
355 | return 1; | 356 | return 1; |
356 | } | 357 | } |
357 | __setup("cgroup_debug", enable_cgroup_debug); | 358 | __setup("cgroup_debug", enable_cgroup_debug); |
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 2237201d66d5..9829c67ebc0a 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c | |||
@@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = { | |||
345 | .free = pids_free, | 345 | .free = pids_free, |
346 | .legacy_cftypes = pids_files, | 346 | .legacy_cftypes = pids_files, |
347 | .dfl_cftypes = pids_files, | 347 | .dfl_cftypes = pids_files, |
348 | .threaded = true, | ||
348 | }; | 349 | }; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1538df9b2b65..ec78247da310 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -11210,5 +11210,6 @@ struct cgroup_subsys perf_event_cgrp_subsys = { | |||
11210 | * controller is not mounted on a legacy hierarchy. | 11210 | * controller is not mounted on a legacy hierarchy. |
11211 | */ | 11211 | */ |
11212 | .implicit_on_dfl = true, | 11212 | .implicit_on_dfl = true, |
11213 | .threaded = true, | ||
11213 | }; | 11214 | }; |
11214 | #endif /* CONFIG_CGROUP_PERF */ | 11215 | #endif /* CONFIG_CGROUP_PERF */ |