diff options
| -rw-r--r-- | Documentation/cgroup-v2.txt | 221 | ||||
| -rw-r--r-- | include/linux/cgroup-defs.h | 68 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 39 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup-internal.h | 12 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup-v1.c | 75 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 947 | ||||
| -rw-r--r-- | kernel/cgroup/cpuset.c | 39 | ||||
| -rw-r--r-- | kernel/cgroup/debug.c | 53 | ||||
| -rw-r--r-- | kernel/cgroup/freezer.c | 6 | ||||
| -rw-r--r-- | kernel/cgroup/pids.c | 1 | ||||
| -rw-r--r-- | kernel/events/core.c | 1 | ||||
| -rw-r--r-- | mm/memcontrol.c | 2 | ||||
| -rw-r--r-- | net/core/netclassid_cgroup.c | 2 |
13 files changed, 1194 insertions, 272 deletions
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index bde177103567..dc44785dc0fa 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt | |||
| @@ -18,7 +18,9 @@ v1 is available under Documentation/cgroup-v1/. | |||
| 18 | 1-2. What is cgroup? | 18 | 1-2. What is cgroup? |
| 19 | 2. Basic Operations | 19 | 2. Basic Operations |
| 20 | 2-1. Mounting | 20 | 2-1. Mounting |
| 21 | 2-2. Organizing Processes | 21 | 2-2. Organizing Processes and Threads |
| 22 | 2-2-1. Processes | ||
| 23 | 2-2-2. Threads | ||
| 22 | 2-3. [Un]populated Notification | 24 | 2-3. [Un]populated Notification |
| 23 | 2-4. Controlling Controllers | 25 | 2-4. Controlling Controllers |
| 24 | 2-4-1. Enabling and Disabling | 26 | 2-4-1. Enabling and Disabling |
| @@ -167,8 +169,11 @@ cgroup v2 currently supports the following mount options. | |||
| 167 | Delegation section for details. | 169 | Delegation section for details. |
| 168 | 170 | ||
| 169 | 171 | ||
| 170 | Organizing Processes | 172 | Organizing Processes and Threads |
| 171 | -------------------- | 173 | -------------------------------- |
| 174 | |||
| 175 | Processes | ||
| 176 | ~~~~~~~~~ | ||
| 172 | 177 | ||
| 173 | Initially, only the root cgroup exists to which all processes belong. | 178 | Initially, only the root cgroup exists to which all processes belong. |
| 174 | A child cgroup can be created by creating a sub-directory:: | 179 | A child cgroup can be created by creating a sub-directory:: |
| @@ -219,6 +224,105 @@ is removed subsequently, " (deleted)" is appended to the path:: | |||
| 219 | 0::/test-cgroup/test-cgroup-nested (deleted) | 224 | 0::/test-cgroup/test-cgroup-nested (deleted) |
| 220 | 225 | ||
| 221 | 226 | ||
| 227 | Threads | ||
| 228 | ~~~~~~~ | ||
| 229 | |||
| 230 | cgroup v2 supports thread granularity for a subset of controllers to | ||
| 231 | support use cases requiring hierarchical resource distribution across | ||
| 232 | the threads of a group of processes. By default, all threads of a | ||
| 233 | process belong to the same cgroup, which also serves as the resource | ||
| 234 | domain to host resource consumptions which are not specific to a | ||
| 235 | process or thread. The thread mode allows threads to be spread across | ||
| 236 | a subtree while still maintaining the common resource domain for them. | ||
| 237 | |||
| 238 | Controllers which support thread mode are called threaded controllers. | ||
| 239 | The ones which don't are called domain controllers. | ||
| 240 | |||
| 241 | Marking a cgroup threaded makes it join the resource domain of its | ||
| 242 | parent as a threaded cgroup. The parent may be another threaded | ||
| 243 | cgroup whose resource domain is further up in the hierarchy. The root | ||
| 244 | of a threaded subtree, that is, the nearest ancestor which is not | ||
| 245 | threaded, is called threaded domain or thread root interchangeably and | ||
| 246 | serves as the resource domain for the entire subtree. | ||
| 247 | |||
| 248 | Inside a threaded subtree, threads of a process can be put in | ||
| 249 | different cgroups and are not subject to the no internal process | ||
| 250 | constraint - threaded controllers can be enabled on non-leaf cgroups | ||
| 251 | whether they have threads in them or not. | ||
| 252 | |||
| 253 | As the threaded domain cgroup hosts all the domain resource | ||
| 254 | consumptions of the subtree, it is considered to have internal | ||
| 255 | resource consumptions whether there are processes in it or not and | ||
| 256 | can't have populated child cgroups which aren't threaded. Because the | ||
| 257 | root cgroup is not subject to no internal process constraint, it can | ||
| 258 | serve both as a threaded domain and a parent to domain cgroups. | ||
| 259 | |||
| 260 | The current operation mode or type of the cgroup is shown in the | ||
| 261 | "cgroup.type" file which indicates whether the cgroup is a normal | ||
| 262 | domain, a domain which is serving as the domain of a threaded subtree, | ||
| 263 | or a threaded cgroup. | ||
| 264 | |||
| 265 | On creation, a cgroup is always a domain cgroup and can be made | ||
| 266 | threaded by writing "threaded" to the "cgroup.type" file. The | ||
| 267 | operation is single direction:: | ||
| 268 | |||
| 269 | # echo threaded > cgroup.type | ||
| 270 | |||
| 271 | Once threaded, the cgroup can't be made a domain again. To enable the | ||
| 272 | thread mode, the following conditions must be met. | ||
| 273 | |||
| 274 | - As the cgroup will join the parent's resource domain. The parent | ||
| 275 | must either be a valid (threaded) domain or a threaded cgroup. | ||
| 276 | |||
| 277 | - When the parent is an unthreaded domain, it must not have any domain | ||
| 278 | controllers enabled or populated domain children. The root is | ||
| 279 | exempt from this requirement. | ||
| 280 | |||
| 281 | Topology-wise, a cgroup can be in an invalid state. Please consider | ||
| 282 | the following toplogy:: | ||
| 283 | |||
| 284 | A (threaded domain) - B (threaded) - C (domain, just created) | ||
| 285 | |||
| 286 | C is created as a domain but isn't connected to a parent which can | ||
| 287 | host child domains. C can't be used until it is turned into a | ||
| 288 | threaded cgroup. "cgroup.type" file will report "domain (invalid)" in | ||
| 289 | these cases. Operations which fail due to invalid topology use | ||
| 290 | EOPNOTSUPP as the errno. | ||
| 291 | |||
| 292 | A domain cgroup is turned into a threaded domain when one of its child | ||
| 293 | cgroup becomes threaded or threaded controllers are enabled in the | ||
| 294 | "cgroup.subtree_control" file while there are processes in the cgroup. | ||
| 295 | A threaded domain reverts to a normal domain when the conditions | ||
| 296 | clear. | ||
| 297 | |||
| 298 | When read, "cgroup.threads" contains the list of the thread IDs of all | ||
| 299 | threads in the cgroup. Except that the operations are per-thread | ||
| 300 | instead of per-process, "cgroup.threads" has the same format and | ||
| 301 | behaves the same way as "cgroup.procs". While "cgroup.threads" can be | ||
| 302 | written to in any cgroup, as it can only move threads inside the same | ||
| 303 | threaded domain, its operations are confined inside each threaded | ||
| 304 | subtree. | ||
| 305 | |||
| 306 | The threaded domain cgroup serves as the resource domain for the whole | ||
| 307 | subtree, and, while the threads can be scattered across the subtree, | ||
| 308 | all the processes are considered to be in the threaded domain cgroup. | ||
| 309 | "cgroup.procs" in a threaded domain cgroup contains the PIDs of all | ||
| 310 | processes in the subtree and is not readable in the subtree proper. | ||
| 311 | However, "cgroup.procs" can be written to from anywhere in the subtree | ||
| 312 | to migrate all threads of the matching process to the cgroup. | ||
| 313 | |||
| 314 | Only threaded controllers can be enabled in a threaded subtree. When | ||
| 315 | a threaded controller is enabled inside a threaded subtree, it only | ||
| 316 | accounts for and controls resource consumptions associated with the | ||
| 317 | threads in the cgroup and its descendants. All consumptions which | ||
| 318 | aren't tied to a specific thread belong to the threaded domain cgroup. | ||
| 319 | |||
| 320 | Because a threaded subtree is exempt from no internal process | ||
| 321 | constraint, a threaded controller must be able to handle competition | ||
| 322 | between threads in a non-leaf cgroup and its child cgroups. Each | ||
| 323 | threaded controller defines how such competitions are handled. | ||
| 324 | |||
| 325 | |||
| 222 | [Un]populated Notification | 326 | [Un]populated Notification |
| 223 | -------------------------- | 327 | -------------------------- |
| 224 | 328 | ||
| @@ -302,15 +406,15 @@ disabled if one or more children have it enabled. | |||
| 302 | No Internal Process Constraint | 406 | No Internal Process Constraint |
| 303 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 407 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 304 | 408 | ||
| 305 | Non-root cgroups can only distribute resources to their children when | 409 | Non-root cgroups can distribute domain resources to their children |
| 306 | they don't have any processes of their own. In other words, only | 410 | only when they don't have any processes of their own. In other words, |
| 307 | cgroups which don't contain any processes can have controllers enabled | 411 | only domain cgroups which don't contain any processes can have domain |
| 308 | in their "cgroup.subtree_control" files. | 412 | controllers enabled in their "cgroup.subtree_control" files. |
| 309 | 413 | ||
| 310 | This guarantees that, when a controller is looking at the part of the | 414 | This guarantees that, when a domain controller is looking at the part |
| 311 | hierarchy which has it enabled, processes are always only on the | 415 | of the hierarchy which has it enabled, processes are always only on |
| 312 | leaves. This rules out situations where child cgroups compete against | 416 | the leaves. This rules out situations where child cgroups compete |
| 313 | internal processes of the parent. | 417 | against internal processes of the parent. |
| 314 | 418 | ||
| 315 | The root cgroup is exempt from this restriction. Root contains | 419 | The root cgroup is exempt from this restriction. Root contains |
| 316 | processes and anonymous resource consumption which can't be associated | 420 | processes and anonymous resource consumption which can't be associated |
| @@ -334,10 +438,10 @@ Model of Delegation | |||
| 334 | ~~~~~~~~~~~~~~~~~~~ | 438 | ~~~~~~~~~~~~~~~~~~~ |
| 335 | 439 | ||
| 336 | A cgroup can be delegated in two ways. First, to a less privileged | 440 | A cgroup can be delegated in two ways. First, to a less privileged |
| 337 | user by granting write access of the directory and its "cgroup.procs" | 441 | user by granting write access of the directory and its "cgroup.procs", |
| 338 | and "cgroup.subtree_control" files to the user. Second, if the | 442 | "cgroup.threads" and "cgroup.subtree_control" files to the user. |
| 339 | "nsdelegate" mount option is set, automatically to a cgroup namespace | 443 | Second, if the "nsdelegate" mount option is set, automatically to a |
| 340 | on namespace creation. | 444 | cgroup namespace on namespace creation. |
| 341 | 445 | ||
| 342 | Because the resource control interface files in a given directory | 446 | Because the resource control interface files in a given directory |
| 343 | control the distribution of the parent's resources, the delegatee | 447 | control the distribution of the parent's resources, the delegatee |
| @@ -644,6 +748,29 @@ Core Interface Files | |||
| 644 | 748 | ||
| 645 | All cgroup core files are prefixed with "cgroup." | 749 | All cgroup core files are prefixed with "cgroup." |
| 646 | 750 | ||
| 751 | cgroup.type | ||
| 752 | |||
| 753 | A read-write single value file which exists on non-root | ||
| 754 | cgroups. | ||
| 755 | |||
| 756 | When read, it indicates the current type of the cgroup, which | ||
| 757 | can be one of the following values. | ||
| 758 | |||
| 759 | - "domain" : A normal valid domain cgroup. | ||
| 760 | |||
| 761 | - "domain threaded" : A threaded domain cgroup which is | ||
| 762 | serving as the root of a threaded subtree. | ||
| 763 | |||
| 764 | - "domain invalid" : A cgroup which is in an invalid state. | ||
| 765 | It can't be populated or have controllers enabled. It may | ||
| 766 | be allowed to become a threaded cgroup. | ||
| 767 | |||
| 768 | - "threaded" : A threaded cgroup which is a member of a | ||
| 769 | threaded subtree. | ||
| 770 | |||
| 771 | A cgroup can be turned into a threaded cgroup by writing | ||
| 772 | "threaded" to this file. | ||
| 773 | |||
| 647 | cgroup.procs | 774 | cgroup.procs |
| 648 | A read-write new-line separated values file which exists on | 775 | A read-write new-line separated values file which exists on |
| 649 | all cgroups. | 776 | all cgroups. |
| @@ -658,9 +785,6 @@ All cgroup core files are prefixed with "cgroup." | |||
| 658 | the PID to the cgroup. The writer should match all of the | 785 | the PID to the cgroup. The writer should match all of the |
| 659 | following conditions. | 786 | following conditions. |
| 660 | 787 | ||
| 661 | - Its euid is either root or must match either uid or suid of | ||
| 662 | the target process. | ||
| 663 | |||
| 664 | - It must have write access to the "cgroup.procs" file. | 788 | - It must have write access to the "cgroup.procs" file. |
| 665 | 789 | ||
| 666 | - It must have write access to the "cgroup.procs" file of the | 790 | - It must have write access to the "cgroup.procs" file of the |
| @@ -669,6 +793,35 @@ All cgroup core files are prefixed with "cgroup." | |||
| 669 | When delegating a sub-hierarchy, write access to this file | 793 | When delegating a sub-hierarchy, write access to this file |
| 670 | should be granted along with the containing directory. | 794 | should be granted along with the containing directory. |
| 671 | 795 | ||
| 796 | In a threaded cgroup, reading this file fails with EOPNOTSUPP | ||
| 797 | as all the processes belong to the thread root. Writing is | ||
| 798 | supported and moves every thread of the process to the cgroup. | ||
| 799 | |||
| 800 | cgroup.threads | ||
| 801 | A read-write new-line separated values file which exists on | ||
| 802 | all cgroups. | ||
| 803 | |||
| 804 | When read, it lists the TIDs of all threads which belong to | ||
| 805 | the cgroup one-per-line. The TIDs are not ordered and the | ||
| 806 | same TID may show up more than once if the thread got moved to | ||
| 807 | another cgroup and then back or the TID got recycled while | ||
| 808 | reading. | ||
| 809 | |||
| 810 | A TID can be written to migrate the thread associated with the | ||
| 811 | TID to the cgroup. The writer should match all of the | ||
| 812 | following conditions. | ||
| 813 | |||
| 814 | - It must have write access to the "cgroup.threads" file. | ||
| 815 | |||
| 816 | - The cgroup that the thread is currently in must be in the | ||
| 817 | same resource domain as the destination cgroup. | ||
| 818 | |||
| 819 | - It must have write access to the "cgroup.procs" file of the | ||
| 820 | common ancestor of the source and destination cgroups. | ||
| 821 | |||
| 822 | When delegating a sub-hierarchy, write access to this file | ||
| 823 | should be granted along with the containing directory. | ||
| 824 | |||
| 672 | cgroup.controllers | 825 | cgroup.controllers |
| 673 | A read-only space separated values file which exists on all | 826 | A read-only space separated values file which exists on all |
| 674 | cgroups. | 827 | cgroups. |
| @@ -701,6 +854,38 @@ All cgroup core files are prefixed with "cgroup." | |||
| 701 | 1 if the cgroup or its descendants contains any live | 854 | 1 if the cgroup or its descendants contains any live |
| 702 | processes; otherwise, 0. | 855 | processes; otherwise, 0. |
| 703 | 856 | ||
| 857 | cgroup.max.descendants | ||
| 858 | A read-write single value files. The default is "max". | ||
| 859 | |||
| 860 | Maximum allowed number of descent cgroups. | ||
| 861 | If the actual number of descendants is equal or larger, | ||
| 862 | an attempt to create a new cgroup in the hierarchy will fail. | ||
| 863 | |||
| 864 | cgroup.max.depth | ||
| 865 | A read-write single value files. The default is "max". | ||
| 866 | |||
| 867 | Maximum allowed descent depth below the current cgroup. | ||
| 868 | If the actual descent depth is equal or larger, | ||
| 869 | an attempt to create a new child cgroup will fail. | ||
| 870 | |||
| 871 | cgroup.stat | ||
| 872 | A read-only flat-keyed file with the following entries: | ||
| 873 | |||
| 874 | nr_descendants | ||
| 875 | Total number of visible descendant cgroups. | ||
| 876 | |||
| 877 | nr_dying_descendants | ||
| 878 | Total number of dying descendant cgroups. A cgroup becomes | ||
| 879 | dying after being deleted by a user. The cgroup will remain | ||
| 880 | in dying state for some time undefined time (which can depend | ||
| 881 | on system load) before being completely destroyed. | ||
| 882 | |||
| 883 | A process can't enter a dying cgroup under any circumstances, | ||
| 884 | a dying cgroup can't revive. | ||
| 885 | |||
| 886 | A dying cgroup can consume system resources not exceeding | ||
| 887 | limits, which were active at the moment of cgroup deletion. | ||
| 888 | |||
| 704 | 889 | ||
| 705 | Controllers | 890 | Controllers |
| 706 | =========== | 891 | =========== |
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 09f4c7df1478..ade4a78a54c2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
| @@ -74,6 +74,11 @@ enum { | |||
| 74 | * aren't writeable from inside the namespace. | 74 | * aren't writeable from inside the namespace. |
| 75 | */ | 75 | */ |
| 76 | CGRP_ROOT_NS_DELEGATE = (1 << 3), | 76 | CGRP_ROOT_NS_DELEGATE = (1 << 3), |
| 77 | |||
| 78 | /* | ||
| 79 | * Enable cpuset controller in v1 cgroup to use v2 behavior. | ||
| 80 | */ | ||
| 81 | CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), | ||
| 77 | }; | 82 | }; |
| 78 | 83 | ||
| 79 | /* cftype->flags */ | 84 | /* cftype->flags */ |
| @@ -172,6 +177,14 @@ struct css_set { | |||
| 172 | /* reference count */ | 177 | /* reference count */ |
| 173 | refcount_t refcount; | 178 | refcount_t refcount; |
| 174 | 179 | ||
| 180 | /* | ||
| 181 | * For a domain cgroup, the following points to self. If threaded, | ||
| 182 | * to the matching cset of the nearest domain ancestor. The | ||
| 183 | * dom_cset provides access to the domain cgroup and its csses to | ||
| 184 | * which domain level resource consumptions should be charged. | ||
| 185 | */ | ||
| 186 | struct css_set *dom_cset; | ||
| 187 | |||
| 175 | /* the default cgroup associated with this css_set */ | 188 | /* the default cgroup associated with this css_set */ |
| 176 | struct cgroup *dfl_cgrp; | 189 | struct cgroup *dfl_cgrp; |
| 177 | 190 | ||
| @@ -200,6 +213,10 @@ struct css_set { | |||
| 200 | */ | 213 | */ |
| 201 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; | 214 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; |
| 202 | 215 | ||
| 216 | /* all threaded csets whose ->dom_cset points to this cset */ | ||
| 217 | struct list_head threaded_csets; | ||
| 218 | struct list_head threaded_csets_node; | ||
| 219 | |||
| 203 | /* | 220 | /* |
| 204 | * List running through all cgroup groups in the same hash | 221 | * List running through all cgroup groups in the same hash |
| 205 | * slot. Protected by css_set_lock | 222 | * slot. Protected by css_set_lock |
| @@ -261,13 +278,35 @@ struct cgroup { | |||
| 261 | */ | 278 | */ |
| 262 | int level; | 279 | int level; |
| 263 | 280 | ||
| 281 | /* Maximum allowed descent tree depth */ | ||
| 282 | int max_depth; | ||
| 283 | |||
| 284 | /* | ||
| 285 | * Keep track of total numbers of visible and dying descent cgroups. | ||
| 286 | * Dying cgroups are cgroups which were deleted by a user, | ||
| 287 | * but are still existing because someone else is holding a reference. | ||
| 288 | * max_descendants is a maximum allowed number of descent cgroups. | ||
| 289 | */ | ||
| 290 | int nr_descendants; | ||
| 291 | int nr_dying_descendants; | ||
| 292 | int max_descendants; | ||
| 293 | |||
| 264 | /* | 294 | /* |
| 265 | * Each non-empty css_set associated with this cgroup contributes | 295 | * Each non-empty css_set associated with this cgroup contributes |
| 266 | * one to populated_cnt. All children with non-zero popuplated_cnt | 296 | * one to nr_populated_csets. The counter is zero iff this cgroup |
| 267 | * of their own contribute one. The count is zero iff there's no | 297 | * doesn't have any tasks. |
| 268 | * task in this cgroup or its subtree. | 298 | * |
| 299 | * All children which have non-zero nr_populated_csets and/or | ||
| 300 | * nr_populated_children of their own contribute one to either | ||
| 301 | * nr_populated_domain_children or nr_populated_threaded_children | ||
| 302 | * depending on their type. Each counter is zero iff all cgroups | ||
| 303 | * of the type in the subtree proper don't have any tasks. | ||
| 269 | */ | 304 | */ |
| 270 | int populated_cnt; | 305 | int nr_populated_csets; |
| 306 | int nr_populated_domain_children; | ||
| 307 | int nr_populated_threaded_children; | ||
| 308 | |||
| 309 | int nr_threaded_children; /* # of live threaded child cgroups */ | ||
| 271 | 310 | ||
| 272 | struct kernfs_node *kn; /* cgroup kernfs entry */ | 311 | struct kernfs_node *kn; /* cgroup kernfs entry */ |
| 273 | struct cgroup_file procs_file; /* handle for "cgroup.procs" */ | 312 | struct cgroup_file procs_file; /* handle for "cgroup.procs" */ |
| @@ -306,6 +345,15 @@ struct cgroup { | |||
| 306 | struct list_head e_csets[CGROUP_SUBSYS_COUNT]; | 345 | struct list_head e_csets[CGROUP_SUBSYS_COUNT]; |
| 307 | 346 | ||
| 308 | /* | 347 | /* |
| 348 | * If !threaded, self. If threaded, it points to the nearest | ||
| 349 | * domain ancestor. Inside a threaded subtree, cgroups are exempt | ||
| 350 | * from process granularity and no-internal-task constraint. | ||
| 351 | * Domain level resource consumptions which aren't tied to a | ||
| 352 | * specific task are charged to the dom_cgrp. | ||
| 353 | */ | ||
| 354 | struct cgroup *dom_cgrp; | ||
| 355 | |||
| 356 | /* | ||
| 309 | * list of pidlists, up to two for each namespace (one for procs, one | 357 | * list of pidlists, up to two for each namespace (one for procs, one |
| 310 | * for tasks); created on demand. | 358 | * for tasks); created on demand. |
| 311 | */ | 359 | */ |
| @@ -492,6 +540,18 @@ struct cgroup_subsys { | |||
| 492 | bool implicit_on_dfl:1; | 540 | bool implicit_on_dfl:1; |
| 493 | 541 | ||
| 494 | /* | 542 | /* |
| 543 | * If %true, the controller, supports threaded mode on the default | ||
| 544 | * hierarchy. In a threaded subtree, both process granularity and | ||
| 545 | * no-internal-process constraint are ignored and a threaded | ||
| 546 | * controllers should be able to handle that. | ||
| 547 | * | ||
| 548 | * Note that as an implicit controller is automatically enabled on | ||
| 549 | * all cgroups on the default hierarchy, it should also be | ||
| 550 | * threaded. implicit && !threaded is not supported. | ||
| 551 | */ | ||
| 552 | bool threaded:1; | ||
| 553 | |||
| 554 | /* | ||
| 495 | * If %false, this subsystem is properly hierarchical - | 555 | * If %false, this subsystem is properly hierarchical - |
| 496 | * configuration, resource accounting and restriction on a parent | 556 | * configuration, resource accounting and restriction on a parent |
| 497 | * cgroup cover those of its children. If %true, hierarchy support | 557 | * cgroup cover those of its children. If %true, hierarchy support |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 710a005c6b7a..085056e562b1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -36,18 +36,28 @@ | |||
| 36 | #define CGROUP_WEIGHT_DFL 100 | 36 | #define CGROUP_WEIGHT_DFL 100 |
| 37 | #define CGROUP_WEIGHT_MAX 10000 | 37 | #define CGROUP_WEIGHT_MAX 10000 |
| 38 | 38 | ||
| 39 | /* walk only threadgroup leaders */ | ||
| 40 | #define CSS_TASK_ITER_PROCS (1U << 0) | ||
| 41 | /* walk all threaded css_sets in the domain */ | ||
| 42 | #define CSS_TASK_ITER_THREADED (1U << 1) | ||
| 43 | |||
| 39 | /* a css_task_iter should be treated as an opaque object */ | 44 | /* a css_task_iter should be treated as an opaque object */ |
| 40 | struct css_task_iter { | 45 | struct css_task_iter { |
| 41 | struct cgroup_subsys *ss; | 46 | struct cgroup_subsys *ss; |
| 47 | unsigned int flags; | ||
| 42 | 48 | ||
| 43 | struct list_head *cset_pos; | 49 | struct list_head *cset_pos; |
| 44 | struct list_head *cset_head; | 50 | struct list_head *cset_head; |
| 45 | 51 | ||
| 52 | struct list_head *tcset_pos; | ||
| 53 | struct list_head *tcset_head; | ||
| 54 | |||
| 46 | struct list_head *task_pos; | 55 | struct list_head *task_pos; |
| 47 | struct list_head *tasks_head; | 56 | struct list_head *tasks_head; |
| 48 | struct list_head *mg_tasks_head; | 57 | struct list_head *mg_tasks_head; |
| 49 | 58 | ||
| 50 | struct css_set *cur_cset; | 59 | struct css_set *cur_cset; |
| 60 | struct css_set *cur_dcset; | ||
| 51 | struct task_struct *cur_task; | 61 | struct task_struct *cur_task; |
| 52 | struct list_head iters_node; /* css_set->task_iters */ | 62 | struct list_head iters_node; /* css_set->task_iters */ |
| 53 | }; | 63 | }; |
| @@ -129,7 +139,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, | |||
| 129 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, | 139 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, |
| 130 | struct cgroup_subsys_state **dst_cssp); | 140 | struct cgroup_subsys_state **dst_cssp); |
| 131 | 141 | ||
| 132 | void css_task_iter_start(struct cgroup_subsys_state *css, | 142 | void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, |
| 133 | struct css_task_iter *it); | 143 | struct css_task_iter *it); |
| 134 | struct task_struct *css_task_iter_next(struct css_task_iter *it); | 144 | struct task_struct *css_task_iter_next(struct css_task_iter *it); |
| 135 | void css_task_iter_end(struct css_task_iter *it); | 145 | void css_task_iter_end(struct css_task_iter *it); |
| @@ -388,6 +398,16 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) | |||
| 388 | percpu_ref_put_many(&css->refcnt, n); | 398 | percpu_ref_put_many(&css->refcnt, n); |
| 389 | } | 399 | } |
| 390 | 400 | ||
| 401 | static inline void cgroup_get(struct cgroup *cgrp) | ||
| 402 | { | ||
| 403 | css_get(&cgrp->self); | ||
| 404 | } | ||
| 405 | |||
| 406 | static inline bool cgroup_tryget(struct cgroup *cgrp) | ||
| 407 | { | ||
| 408 | return css_tryget(&cgrp->self); | ||
| 409 | } | ||
| 410 | |||
| 391 | static inline void cgroup_put(struct cgroup *cgrp) | 411 | static inline void cgroup_put(struct cgroup *cgrp) |
| 392 | { | 412 | { |
| 393 | css_put(&cgrp->self); | 413 | css_put(&cgrp->self); |
| @@ -500,6 +520,20 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, | |||
| 500 | return task_css(task, subsys_id)->cgroup; | 520 | return task_css(task, subsys_id)->cgroup; |
| 501 | } | 521 | } |
| 502 | 522 | ||
| 523 | static inline struct cgroup *task_dfl_cgroup(struct task_struct *task) | ||
| 524 | { | ||
| 525 | return task_css_set(task)->dfl_cgrp; | ||
| 526 | } | ||
| 527 | |||
| 528 | static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) | ||
| 529 | { | ||
| 530 | struct cgroup_subsys_state *parent_css = cgrp->self.parent; | ||
| 531 | |||
| 532 | if (parent_css) | ||
| 533 | return container_of(parent_css, struct cgroup, self); | ||
| 534 | return NULL; | ||
| 535 | } | ||
| 536 | |||
| 503 | /** | 537 | /** |
| 504 | * cgroup_is_descendant - test ancestry | 538 | * cgroup_is_descendant - test ancestry |
| 505 | * @cgrp: the cgroup to be tested | 539 | * @cgrp: the cgroup to be tested |
| @@ -537,7 +571,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, | |||
| 537 | /* no synchronization, the result can only be used as a hint */ | 571 | /* no synchronization, the result can only be used as a hint */ |
| 538 | static inline bool cgroup_is_populated(struct cgroup *cgrp) | 572 | static inline bool cgroup_is_populated(struct cgroup *cgrp) |
| 539 | { | 573 | { |
| 540 | return cgrp->populated_cnt; | 574 | return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children + |
| 575 | cgrp->nr_populated_threaded_children; | ||
| 541 | } | 576 | } |
| 542 | 577 | ||
| 543 | /* returns ino associated with a cgroup */ | 578 | /* returns ino associated with a cgroup */ |
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 8b4c3c2f2509..5151ff256c29 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
| @@ -156,6 +156,8 @@ static inline void get_css_set(struct css_set *cset) | |||
| 156 | 156 | ||
| 157 | bool cgroup_ssid_enabled(int ssid); | 157 | bool cgroup_ssid_enabled(int ssid); |
| 158 | bool cgroup_on_dfl(const struct cgroup *cgrp); | 158 | bool cgroup_on_dfl(const struct cgroup *cgrp); |
| 159 | bool cgroup_is_thread_root(struct cgroup *cgrp); | ||
| 160 | bool cgroup_is_threaded(struct cgroup *cgrp); | ||
| 159 | 161 | ||
| 160 | struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); | 162 | struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); |
| 161 | struct cgroup *task_cgroup_from_root(struct task_struct *task, | 163 | struct cgroup *task_cgroup_from_root(struct task_struct *task, |
| @@ -173,7 +175,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, | |||
| 173 | struct cgroup_root *root, unsigned long magic, | 175 | struct cgroup_root *root, unsigned long magic, |
| 174 | struct cgroup_namespace *ns); | 176 | struct cgroup_namespace *ns); |
| 175 | 177 | ||
| 176 | bool cgroup_may_migrate_to(struct cgroup *dst_cgrp); | 178 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); |
| 177 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); | 179 | void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); |
| 178 | void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, | 180 | void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, |
| 179 | struct cgroup_mgctx *mgctx); | 181 | struct cgroup_mgctx *mgctx); |
| @@ -183,10 +185,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, | |||
| 183 | 185 | ||
| 184 | int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, | 186 | int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, |
| 185 | bool threadgroup); | 187 | bool threadgroup); |
| 186 | ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | 188 | struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) |
| 187 | size_t nbytes, loff_t off, bool threadgroup); | 189 | __acquires(&cgroup_threadgroup_rwsem); |
| 188 | ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, | 190 | void cgroup_procs_write_finish(struct task_struct *task) |
| 189 | loff_t off); | 191 | __releases(&cgroup_threadgroup_rwsem); |
| 190 | 192 | ||
| 191 | void cgroup_lock_and_drain_offline(struct cgroup *cgrp); | 193 | void cgroup_lock_and_drain_offline(struct cgroup *cgrp); |
| 192 | 194 | ||
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 7bf4b1533f34..024085daab1a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c | |||
| @@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | |||
| 99 | if (cgroup_on_dfl(to)) | 99 | if (cgroup_on_dfl(to)) |
| 100 | return -EINVAL; | 100 | return -EINVAL; |
| 101 | 101 | ||
| 102 | if (!cgroup_may_migrate_to(to)) | 102 | ret = cgroup_migrate_vet_dst(to); |
| 103 | return -EBUSY; | 103 | if (ret) |
| 104 | return ret; | ||
| 104 | 105 | ||
| 105 | mutex_lock(&cgroup_mutex); | 106 | mutex_lock(&cgroup_mutex); |
| 106 | 107 | ||
| @@ -121,7 +122,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | |||
| 121 | * ->can_attach() fails. | 122 | * ->can_attach() fails. |
| 122 | */ | 123 | */ |
| 123 | do { | 124 | do { |
| 124 | css_task_iter_start(&from->self, &it); | 125 | css_task_iter_start(&from->self, 0, &it); |
| 125 | task = css_task_iter_next(&it); | 126 | task = css_task_iter_next(&it); |
| 126 | if (task) | 127 | if (task) |
| 127 | get_task_struct(task); | 128 | get_task_struct(task); |
| @@ -373,7 +374,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 373 | if (!array) | 374 | if (!array) |
| 374 | return -ENOMEM; | 375 | return -ENOMEM; |
| 375 | /* now, populate the array */ | 376 | /* now, populate the array */ |
| 376 | css_task_iter_start(&cgrp->self, &it); | 377 | css_task_iter_start(&cgrp->self, 0, &it); |
| 377 | while ((tsk = css_task_iter_next(&it))) { | 378 | while ((tsk = css_task_iter_next(&it))) { |
| 378 | if (unlikely(n == length)) | 379 | if (unlikely(n == length)) |
| 379 | break; | 380 | break; |
| @@ -510,10 +511,58 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v) | |||
| 510 | return 0; | 511 | return 0; |
| 511 | } | 512 | } |
| 512 | 513 | ||
| 513 | static ssize_t cgroup_tasks_write(struct kernfs_open_file *of, | 514 | static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, |
| 514 | char *buf, size_t nbytes, loff_t off) | 515 | char *buf, size_t nbytes, loff_t off, |
| 516 | bool threadgroup) | ||
| 515 | { | 517 | { |
| 516 | return __cgroup_procs_write(of, buf, nbytes, off, false); | 518 | struct cgroup *cgrp; |
| 519 | struct task_struct *task; | ||
| 520 | const struct cred *cred, *tcred; | ||
| 521 | ssize_t ret; | ||
| 522 | |||
| 523 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 524 | if (!cgrp) | ||
| 525 | return -ENODEV; | ||
| 526 | |||
| 527 | task = cgroup_procs_write_start(buf, threadgroup); | ||
| 528 | ret = PTR_ERR_OR_ZERO(task); | ||
| 529 | if (ret) | ||
| 530 | goto out_unlock; | ||
| 531 | |||
| 532 | /* | ||
| 533 | * Even if we're attaching all tasks in the thread group, we only | ||
| 534 | * need to check permissions on one of them. | ||
| 535 | */ | ||
| 536 | cred = current_cred(); | ||
| 537 | tcred = get_task_cred(task); | ||
| 538 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && | ||
| 539 | !uid_eq(cred->euid, tcred->uid) && | ||
| 540 | !uid_eq(cred->euid, tcred->suid)) | ||
| 541 | ret = -EACCES; | ||
| 542 | put_cred(tcred); | ||
| 543 | if (ret) | ||
| 544 | goto out_finish; | ||
| 545 | |||
| 546 | ret = cgroup_attach_task(cgrp, task, threadgroup); | ||
| 547 | |||
| 548 | out_finish: | ||
| 549 | cgroup_procs_write_finish(task); | ||
| 550 | out_unlock: | ||
| 551 | cgroup_kn_unlock(of->kn); | ||
| 552 | |||
| 553 | return ret ?: nbytes; | ||
| 554 | } | ||
| 555 | |||
| 556 | static ssize_t cgroup1_procs_write(struct kernfs_open_file *of, | ||
| 557 | char *buf, size_t nbytes, loff_t off) | ||
| 558 | { | ||
| 559 | return __cgroup1_procs_write(of, buf, nbytes, off, true); | ||
| 560 | } | ||
| 561 | |||
| 562 | static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of, | ||
| 563 | char *buf, size_t nbytes, loff_t off) | ||
| 564 | { | ||
| 565 | return __cgroup1_procs_write(of, buf, nbytes, off, false); | ||
| 517 | } | 566 | } |
| 518 | 567 | ||
| 519 | static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, | 568 | static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, |
| @@ -592,7 +641,7 @@ struct cftype cgroup1_base_files[] = { | |||
| 592 | .seq_stop = cgroup_pidlist_stop, | 641 | .seq_stop = cgroup_pidlist_stop, |
| 593 | .seq_show = cgroup_pidlist_show, | 642 | .seq_show = cgroup_pidlist_show, |
| 594 | .private = CGROUP_FILE_PROCS, | 643 | .private = CGROUP_FILE_PROCS, |
| 595 | .write = cgroup_procs_write, | 644 | .write = cgroup1_procs_write, |
| 596 | }, | 645 | }, |
| 597 | { | 646 | { |
| 598 | .name = "cgroup.clone_children", | 647 | .name = "cgroup.clone_children", |
| @@ -611,7 +660,7 @@ struct cftype cgroup1_base_files[] = { | |||
| 611 | .seq_stop = cgroup_pidlist_stop, | 660 | .seq_stop = cgroup_pidlist_stop, |
| 612 | .seq_show = cgroup_pidlist_show, | 661 | .seq_show = cgroup_pidlist_show, |
| 613 | .private = CGROUP_FILE_TASKS, | 662 | .private = CGROUP_FILE_TASKS, |
| 614 | .write = cgroup_tasks_write, | 663 | .write = cgroup1_tasks_write, |
| 615 | }, | 664 | }, |
| 616 | { | 665 | { |
| 617 | .name = "notify_on_release", | 666 | .name = "notify_on_release", |
| @@ -701,7 +750,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
| 701 | } | 750 | } |
| 702 | rcu_read_unlock(); | 751 | rcu_read_unlock(); |
| 703 | 752 | ||
| 704 | css_task_iter_start(&cgrp->self, &it); | 753 | css_task_iter_start(&cgrp->self, 0, &it); |
| 705 | while ((tsk = css_task_iter_next(&it))) { | 754 | while ((tsk = css_task_iter_next(&it))) { |
| 706 | switch (tsk->state) { | 755 | switch (tsk->state) { |
| 707 | case TASK_RUNNING: | 756 | case TASK_RUNNING: |
| @@ -846,6 +895,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo | |||
| 846 | seq_puts(seq, ",noprefix"); | 895 | seq_puts(seq, ",noprefix"); |
| 847 | if (root->flags & CGRP_ROOT_XATTR) | 896 | if (root->flags & CGRP_ROOT_XATTR) |
| 848 | seq_puts(seq, ",xattr"); | 897 | seq_puts(seq, ",xattr"); |
| 898 | if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) | ||
| 899 | seq_puts(seq, ",cpuset_v2_mode"); | ||
| 849 | 900 | ||
| 850 | spin_lock(&release_agent_path_lock); | 901 | spin_lock(&release_agent_path_lock); |
| 851 | if (strlen(root->release_agent_path)) | 902 | if (strlen(root->release_agent_path)) |
| @@ -900,6 +951,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 900 | opts->cpuset_clone_children = true; | 951 | opts->cpuset_clone_children = true; |
| 901 | continue; | 952 | continue; |
| 902 | } | 953 | } |
| 954 | if (!strcmp(token, "cpuset_v2_mode")) { | ||
| 955 | opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; | ||
| 956 | continue; | ||
| 957 | } | ||
| 903 | if (!strcmp(token, "xattr")) { | 958 | if (!strcmp(token, "xattr")) { |
| 904 | opts->flags |= CGRP_ROOT_XATTR; | 959 | opts->flags |= CGRP_ROOT_XATTR; |
| 905 | continue; | 960 | continue; |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f64fc967a9ef..4f2196a00953 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
| @@ -162,6 +162,9 @@ static u16 cgrp_dfl_inhibit_ss_mask; | |||
| 162 | /* some controllers are implicitly enabled on the default hierarchy */ | 162 | /* some controllers are implicitly enabled on the default hierarchy */ |
| 163 | static u16 cgrp_dfl_implicit_ss_mask; | 163 | static u16 cgrp_dfl_implicit_ss_mask; |
| 164 | 164 | ||
| 165 | /* some controllers can be threaded on the default hierarchy */ | ||
| 166 | static u16 cgrp_dfl_threaded_ss_mask; | ||
| 167 | |||
| 165 | /* The list of hierarchy roots */ | 168 | /* The list of hierarchy roots */ |
| 166 | LIST_HEAD(cgroup_roots); | 169 | LIST_HEAD(cgroup_roots); |
| 167 | static int cgroup_root_count; | 170 | static int cgroup_root_count; |
| @@ -316,13 +319,87 @@ static void cgroup_idr_remove(struct idr *idr, int id) | |||
| 316 | spin_unlock_bh(&cgroup_idr_lock); | 319 | spin_unlock_bh(&cgroup_idr_lock); |
| 317 | } | 320 | } |
| 318 | 321 | ||
| 319 | static struct cgroup *cgroup_parent(struct cgroup *cgrp) | 322 | static bool cgroup_has_tasks(struct cgroup *cgrp) |
| 320 | { | 323 | { |
| 321 | struct cgroup_subsys_state *parent_css = cgrp->self.parent; | 324 | return cgrp->nr_populated_csets; |
| 325 | } | ||
| 322 | 326 | ||
| 323 | if (parent_css) | 327 | bool cgroup_is_threaded(struct cgroup *cgrp) |
| 324 | return container_of(parent_css, struct cgroup, self); | 328 | { |
| 325 | return NULL; | 329 | return cgrp->dom_cgrp != cgrp; |
| 330 | } | ||
| 331 | |||
| 332 | /* can @cgrp host both domain and threaded children? */ | ||
| 333 | static bool cgroup_is_mixable(struct cgroup *cgrp) | ||
| 334 | { | ||
| 335 | /* | ||
| 336 | * Root isn't under domain level resource control exempting it from | ||
| 337 | * the no-internal-process constraint, so it can serve as a thread | ||
| 338 | * root and a parent of resource domains at the same time. | ||
| 339 | */ | ||
| 340 | return !cgroup_parent(cgrp); | ||
| 341 | } | ||
| 342 | |||
| 343 | /* can @cgrp become a thread root? should always be true for a thread root */ | ||
| 344 | static bool cgroup_can_be_thread_root(struct cgroup *cgrp) | ||
| 345 | { | ||
| 346 | /* mixables don't care */ | ||
| 347 | if (cgroup_is_mixable(cgrp)) | ||
| 348 | return true; | ||
| 349 | |||
| 350 | /* domain roots can't be nested under threaded */ | ||
| 351 | if (cgroup_is_threaded(cgrp)) | ||
| 352 | return false; | ||
| 353 | |||
| 354 | /* can only have either domain or threaded children */ | ||
| 355 | if (cgrp->nr_populated_domain_children) | ||
| 356 | return false; | ||
| 357 | |||
| 358 | /* and no domain controllers can be enabled */ | ||
| 359 | if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask) | ||
| 360 | return false; | ||
| 361 | |||
| 362 | return true; | ||
| 363 | } | ||
| 364 | |||
| 365 | /* is @cgrp root of a threaded subtree? */ | ||
| 366 | bool cgroup_is_thread_root(struct cgroup *cgrp) | ||
| 367 | { | ||
| 368 | /* thread root should be a domain */ | ||
| 369 | if (cgroup_is_threaded(cgrp)) | ||
| 370 | return false; | ||
| 371 | |||
| 372 | /* a domain w/ threaded children is a thread root */ | ||
| 373 | if (cgrp->nr_threaded_children) | ||
| 374 | return true; | ||
| 375 | |||
| 376 | /* | ||
| 377 | * A domain which has tasks and explicit threaded controllers | ||
| 378 | * enabled is a thread root. | ||
| 379 | */ | ||
| 380 | if (cgroup_has_tasks(cgrp) && | ||
| 381 | (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask)) | ||
| 382 | return true; | ||
| 383 | |||
| 384 | return false; | ||
| 385 | } | ||
| 386 | |||
| 387 | /* a domain which isn't connected to the root w/o brekage can't be used */ | ||
| 388 | static bool cgroup_is_valid_domain(struct cgroup *cgrp) | ||
| 389 | { | ||
| 390 | /* the cgroup itself can be a thread root */ | ||
| 391 | if (cgroup_is_threaded(cgrp)) | ||
| 392 | return false; | ||
| 393 | |||
| 394 | /* but the ancestors can't be unless mixable */ | ||
| 395 | while ((cgrp = cgroup_parent(cgrp))) { | ||
| 396 | if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp)) | ||
| 397 | return false; | ||
| 398 | if (cgroup_is_threaded(cgrp)) | ||
| 399 | return false; | ||
| 400 | } | ||
| 401 | |||
| 402 | return true; | ||
| 326 | } | 403 | } |
| 327 | 404 | ||
| 328 | /* subsystems visibly enabled on a cgroup */ | 405 | /* subsystems visibly enabled on a cgroup */ |
| @@ -331,8 +408,14 @@ static u16 cgroup_control(struct cgroup *cgrp) | |||
| 331 | struct cgroup *parent = cgroup_parent(cgrp); | 408 | struct cgroup *parent = cgroup_parent(cgrp); |
| 332 | u16 root_ss_mask = cgrp->root->subsys_mask; | 409 | u16 root_ss_mask = cgrp->root->subsys_mask; |
| 333 | 410 | ||
| 334 | if (parent) | 411 | if (parent) { |
| 335 | return parent->subtree_control; | 412 | u16 ss_mask = parent->subtree_control; |
| 413 | |||
| 414 | /* threaded cgroups can only have threaded controllers */ | ||
| 415 | if (cgroup_is_threaded(cgrp)) | ||
| 416 | ss_mask &= cgrp_dfl_threaded_ss_mask; | ||
| 417 | return ss_mask; | ||
| 418 | } | ||
| 336 | 419 | ||
| 337 | if (cgroup_on_dfl(cgrp)) | 420 | if (cgroup_on_dfl(cgrp)) |
| 338 | root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | | 421 | root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | |
| @@ -345,8 +428,14 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp) | |||
| 345 | { | 428 | { |
| 346 | struct cgroup *parent = cgroup_parent(cgrp); | 429 | struct cgroup *parent = cgroup_parent(cgrp); |
| 347 | 430 | ||
| 348 | if (parent) | 431 | if (parent) { |
| 349 | return parent->subtree_ss_mask; | 432 | u16 ss_mask = parent->subtree_ss_mask; |
| 433 | |||
| 434 | /* threaded cgroups can only have threaded controllers */ | ||
| 435 | if (cgroup_is_threaded(cgrp)) | ||
| 436 | ss_mask &= cgrp_dfl_threaded_ss_mask; | ||
| 437 | return ss_mask; | ||
| 438 | } | ||
| 350 | 439 | ||
| 351 | return cgrp->root->subsys_mask; | 440 | return cgrp->root->subsys_mask; |
| 352 | } | 441 | } |
| @@ -436,22 +525,12 @@ out_unlock: | |||
| 436 | return css; | 525 | return css; |
| 437 | } | 526 | } |
| 438 | 527 | ||
| 439 | static void __maybe_unused cgroup_get(struct cgroup *cgrp) | ||
| 440 | { | ||
| 441 | css_get(&cgrp->self); | ||
| 442 | } | ||
| 443 | |||
| 444 | static void cgroup_get_live(struct cgroup *cgrp) | 528 | static void cgroup_get_live(struct cgroup *cgrp) |
| 445 | { | 529 | { |
| 446 | WARN_ON_ONCE(cgroup_is_dead(cgrp)); | 530 | WARN_ON_ONCE(cgroup_is_dead(cgrp)); |
| 447 | css_get(&cgrp->self); | 531 | css_get(&cgrp->self); |
| 448 | } | 532 | } |
| 449 | 533 | ||
| 450 | static bool cgroup_tryget(struct cgroup *cgrp) | ||
| 451 | { | ||
| 452 | return css_tryget(&cgrp->self); | ||
| 453 | } | ||
| 454 | |||
| 455 | struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) | 534 | struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) |
| 456 | { | 535 | { |
| 457 | struct cgroup *cgrp = of->kn->parent->priv; | 536 | struct cgroup *cgrp = of->kn->parent->priv; |
| @@ -560,9 +639,11 @@ EXPORT_SYMBOL_GPL(of_css); | |||
| 560 | */ | 639 | */ |
| 561 | struct css_set init_css_set = { | 640 | struct css_set init_css_set = { |
| 562 | .refcount = REFCOUNT_INIT(1), | 641 | .refcount = REFCOUNT_INIT(1), |
| 642 | .dom_cset = &init_css_set, | ||
| 563 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), | 643 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), |
| 564 | .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), | 644 | .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), |
| 565 | .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), | 645 | .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), |
| 646 | .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), | ||
| 566 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), | 647 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), |
| 567 | .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), | 648 | .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), |
| 568 | .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), | 649 | .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), |
| @@ -570,6 +651,11 @@ struct css_set init_css_set = { | |||
| 570 | 651 | ||
| 571 | static int css_set_count = 1; /* 1 for init_css_set */ | 652 | static int css_set_count = 1; /* 1 for init_css_set */ |
| 572 | 653 | ||
| 654 | static bool css_set_threaded(struct css_set *cset) | ||
| 655 | { | ||
| 656 | return cset->dom_cset != cset; | ||
| 657 | } | ||
| 658 | |||
| 573 | /** | 659 | /** |
| 574 | * css_set_populated - does a css_set contain any tasks? | 660 | * css_set_populated - does a css_set contain any tasks? |
| 575 | * @cset: target css_set | 661 | * @cset: target css_set |
| @@ -587,39 +673,48 @@ static bool css_set_populated(struct css_set *cset) | |||
| 587 | } | 673 | } |
| 588 | 674 | ||
| 589 | /** | 675 | /** |
| 590 | * cgroup_update_populated - updated populated count of a cgroup | 676 | * cgroup_update_populated - update the populated count of a cgroup |
| 591 | * @cgrp: the target cgroup | 677 | * @cgrp: the target cgroup |
| 592 | * @populated: inc or dec populated count | 678 | * @populated: inc or dec populated count |
| 593 | * | 679 | * |
| 594 | * One of the css_sets associated with @cgrp is either getting its first | 680 | * One of the css_sets associated with @cgrp is either getting its first |
| 595 | * task or losing the last. Update @cgrp->populated_cnt accordingly. The | 681 | * task or losing the last. Update @cgrp->nr_populated_* accordingly. The |
| 596 | * count is propagated towards root so that a given cgroup's populated_cnt | 682 | * count is propagated towards root so that a given cgroup's |
| 597 | * is zero iff the cgroup and all its descendants don't contain any tasks. | 683 | * nr_populated_children is zero iff none of its descendants contain any |
| 684 | * tasks. | ||
| 598 | * | 685 | * |
| 599 | * @cgrp's interface file "cgroup.populated" is zero if | 686 | * @cgrp's interface file "cgroup.populated" is zero if both |
| 600 | * @cgrp->populated_cnt is zero and 1 otherwise. When @cgrp->populated_cnt | 687 | * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and |
| 601 | * changes from or to zero, userland is notified that the content of the | 688 | * 1 otherwise. When the sum changes from or to zero, userland is notified |
| 602 | * interface file has changed. This can be used to detect when @cgrp and | 689 | * that the content of the interface file has changed. This can be used to |
| 603 | * its descendants become populated or empty. | 690 | * detect when @cgrp and its descendants become populated or empty. |
| 604 | */ | 691 | */ |
| 605 | static void cgroup_update_populated(struct cgroup *cgrp, bool populated) | 692 | static void cgroup_update_populated(struct cgroup *cgrp, bool populated) |
| 606 | { | 693 | { |
| 694 | struct cgroup *child = NULL; | ||
| 695 | int adj = populated ? 1 : -1; | ||
| 696 | |||
| 607 | lockdep_assert_held(&css_set_lock); | 697 | lockdep_assert_held(&css_set_lock); |
| 608 | 698 | ||
| 609 | do { | 699 | do { |
| 610 | bool trigger; | 700 | bool was_populated = cgroup_is_populated(cgrp); |
| 611 | 701 | ||
| 612 | if (populated) | 702 | if (!child) { |
| 613 | trigger = !cgrp->populated_cnt++; | 703 | cgrp->nr_populated_csets += adj; |
| 614 | else | 704 | } else { |
| 615 | trigger = !--cgrp->populated_cnt; | 705 | if (cgroup_is_threaded(child)) |
| 706 | cgrp->nr_populated_threaded_children += adj; | ||
| 707 | else | ||
| 708 | cgrp->nr_populated_domain_children += adj; | ||
| 709 | } | ||
| 616 | 710 | ||
| 617 | if (!trigger) | 711 | if (was_populated == cgroup_is_populated(cgrp)) |
| 618 | break; | 712 | break; |
| 619 | 713 | ||
| 620 | cgroup1_check_for_release(cgrp); | 714 | cgroup1_check_for_release(cgrp); |
| 621 | cgroup_file_notify(&cgrp->events_file); | 715 | cgroup_file_notify(&cgrp->events_file); |
| 622 | 716 | ||
| 717 | child = cgrp; | ||
| 623 | cgrp = cgroup_parent(cgrp); | 718 | cgrp = cgroup_parent(cgrp); |
| 624 | } while (cgrp); | 719 | } while (cgrp); |
| 625 | } | 720 | } |
| @@ -630,7 +725,7 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) | |||
| 630 | * @populated: whether @cset is populated or depopulated | 725 | * @populated: whether @cset is populated or depopulated |
| 631 | * | 726 | * |
| 632 | * @cset is either getting the first task or losing the last. Update the | 727 | * @cset is either getting the first task or losing the last. Update the |
| 633 | * ->populated_cnt of all associated cgroups accordingly. | 728 | * populated counters of all associated cgroups accordingly. |
| 634 | */ | 729 | */ |
| 635 | static void css_set_update_populated(struct css_set *cset, bool populated) | 730 | static void css_set_update_populated(struct css_set *cset, bool populated) |
| 636 | { | 731 | { |
| @@ -653,7 +748,7 @@ static void css_set_update_populated(struct css_set *cset, bool populated) | |||
| 653 | * css_set, @from_cset can be NULL. If @task is being disassociated | 748 | * css_set, @from_cset can be NULL. If @task is being disassociated |
| 654 | * instead of moved, @to_cset can be NULL. | 749 | * instead of moved, @to_cset can be NULL. |
| 655 | * | 750 | * |
| 656 | * This function automatically handles populated_cnt updates and | 751 | * This function automatically handles populated counter updates and |
| 657 | * css_task_iter adjustments but the caller is responsible for managing | 752 | * css_task_iter adjustments but the caller is responsible for managing |
| 658 | * @from_cset and @to_cset's reference counts. | 753 | * @from_cset and @to_cset's reference counts. |
| 659 | */ | 754 | */ |
| @@ -737,6 +832,8 @@ void put_css_set_locked(struct css_set *cset) | |||
| 737 | if (!refcount_dec_and_test(&cset->refcount)) | 832 | if (!refcount_dec_and_test(&cset->refcount)) |
| 738 | return; | 833 | return; |
| 739 | 834 | ||
| 835 | WARN_ON_ONCE(!list_empty(&cset->threaded_csets)); | ||
| 836 | |||
| 740 | /* This css_set is dead. unlink it and release cgroup and css refs */ | 837 | /* This css_set is dead. unlink it and release cgroup and css refs */ |
| 741 | for_each_subsys(ss, ssid) { | 838 | for_each_subsys(ss, ssid) { |
| 742 | list_del(&cset->e_cset_node[ssid]); | 839 | list_del(&cset->e_cset_node[ssid]); |
| @@ -753,6 +850,11 @@ void put_css_set_locked(struct css_set *cset) | |||
| 753 | kfree(link); | 850 | kfree(link); |
| 754 | } | 851 | } |
| 755 | 852 | ||
| 853 | if (css_set_threaded(cset)) { | ||
| 854 | list_del(&cset->threaded_csets_node); | ||
| 855 | put_css_set_locked(cset->dom_cset); | ||
| 856 | } | ||
| 857 | |||
| 756 | kfree_rcu(cset, rcu_head); | 858 | kfree_rcu(cset, rcu_head); |
| 757 | } | 859 | } |
| 758 | 860 | ||
| @@ -771,6 +873,7 @@ static bool compare_css_sets(struct css_set *cset, | |||
| 771 | struct cgroup *new_cgrp, | 873 | struct cgroup *new_cgrp, |
| 772 | struct cgroup_subsys_state *template[]) | 874 | struct cgroup_subsys_state *template[]) |
| 773 | { | 875 | { |
| 876 | struct cgroup *new_dfl_cgrp; | ||
| 774 | struct list_head *l1, *l2; | 877 | struct list_head *l1, *l2; |
| 775 | 878 | ||
| 776 | /* | 879 | /* |
| @@ -781,6 +884,16 @@ static bool compare_css_sets(struct css_set *cset, | |||
| 781 | if (memcmp(template, cset->subsys, sizeof(cset->subsys))) | 884 | if (memcmp(template, cset->subsys, sizeof(cset->subsys))) |
| 782 | return false; | 885 | return false; |
| 783 | 886 | ||
| 887 | |||
| 888 | /* @cset's domain should match the default cgroup's */ | ||
| 889 | if (cgroup_on_dfl(new_cgrp)) | ||
| 890 | new_dfl_cgrp = new_cgrp; | ||
| 891 | else | ||
| 892 | new_dfl_cgrp = old_cset->dfl_cgrp; | ||
| 893 | |||
| 894 | if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp) | ||
| 895 | return false; | ||
| 896 | |||
| 784 | /* | 897 | /* |
| 785 | * Compare cgroup pointers in order to distinguish between | 898 | * Compare cgroup pointers in order to distinguish between |
| 786 | * different cgroups in hierarchies. As different cgroups may | 899 | * different cgroups in hierarchies. As different cgroups may |
| @@ -988,9 +1101,11 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
| 988 | } | 1101 | } |
| 989 | 1102 | ||
| 990 | refcount_set(&cset->refcount, 1); | 1103 | refcount_set(&cset->refcount, 1); |
| 1104 | cset->dom_cset = cset; | ||
| 991 | INIT_LIST_HEAD(&cset->tasks); | 1105 | INIT_LIST_HEAD(&cset->tasks); |
| 992 | INIT_LIST_HEAD(&cset->mg_tasks); | 1106 | INIT_LIST_HEAD(&cset->mg_tasks); |
| 993 | INIT_LIST_HEAD(&cset->task_iters); | 1107 | INIT_LIST_HEAD(&cset->task_iters); |
| 1108 | INIT_LIST_HEAD(&cset->threaded_csets); | ||
| 994 | INIT_HLIST_NODE(&cset->hlist); | 1109 | INIT_HLIST_NODE(&cset->hlist); |
| 995 | INIT_LIST_HEAD(&cset->cgrp_links); | 1110 | INIT_LIST_HEAD(&cset->cgrp_links); |
| 996 | INIT_LIST_HEAD(&cset->mg_preload_node); | 1111 | INIT_LIST_HEAD(&cset->mg_preload_node); |
| @@ -1028,6 +1143,28 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
| 1028 | 1143 | ||
| 1029 | spin_unlock_irq(&css_set_lock); | 1144 | spin_unlock_irq(&css_set_lock); |
| 1030 | 1145 | ||
| 1146 | /* | ||
| 1147 | * If @cset should be threaded, look up the matching dom_cset and | ||
| 1148 | * link them up. We first fully initialize @cset then look for the | ||
| 1149 | * dom_cset. It's simpler this way and safe as @cset is guaranteed | ||
| 1150 | * to stay empty until we return. | ||
| 1151 | */ | ||
| 1152 | if (cgroup_is_threaded(cset->dfl_cgrp)) { | ||
| 1153 | struct css_set *dcset; | ||
| 1154 | |||
| 1155 | dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp); | ||
| 1156 | if (!dcset) { | ||
| 1157 | put_css_set(cset); | ||
| 1158 | return NULL; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | spin_lock_irq(&css_set_lock); | ||
| 1162 | cset->dom_cset = dcset; | ||
| 1163 | list_add_tail(&cset->threaded_csets_node, | ||
| 1164 | &dcset->threaded_csets); | ||
| 1165 | spin_unlock_irq(&css_set_lock); | ||
| 1166 | } | ||
| 1167 | |||
| 1031 | return cset; | 1168 | return cset; |
| 1032 | } | 1169 | } |
| 1033 | 1170 | ||
| @@ -1155,6 +1292,8 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, | |||
| 1155 | 1292 | ||
| 1156 | if (cset == &init_css_set) { | 1293 | if (cset == &init_css_set) { |
| 1157 | res = &root->cgrp; | 1294 | res = &root->cgrp; |
| 1295 | } else if (root == &cgrp_dfl_root) { | ||
| 1296 | res = cset->dfl_cgrp; | ||
| 1158 | } else { | 1297 | } else { |
| 1159 | struct cgrp_cset_link *link; | 1298 | struct cgrp_cset_link *link; |
| 1160 | 1299 | ||
| @@ -1670,6 +1809,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1670 | mutex_init(&cgrp->pidlist_mutex); | 1809 | mutex_init(&cgrp->pidlist_mutex); |
| 1671 | cgrp->self.cgroup = cgrp; | 1810 | cgrp->self.cgroup = cgrp; |
| 1672 | cgrp->self.flags |= CSS_ONLINE; | 1811 | cgrp->self.flags |= CSS_ONLINE; |
| 1812 | cgrp->dom_cgrp = cgrp; | ||
| 1813 | cgrp->max_descendants = INT_MAX; | ||
| 1814 | cgrp->max_depth = INT_MAX; | ||
| 1673 | 1815 | ||
| 1674 | for_each_subsys(ss, ssid) | 1816 | for_each_subsys(ss, ssid) |
| 1675 | INIT_LIST_HEAD(&cgrp->e_csets[ssid]); | 1817 | INIT_LIST_HEAD(&cgrp->e_csets[ssid]); |
| @@ -2172,17 +2314,40 @@ out_release_tset: | |||
| 2172 | } | 2314 | } |
| 2173 | 2315 | ||
| 2174 | /** | 2316 | /** |
| 2175 | * cgroup_may_migrate_to - verify whether a cgroup can be migration destination | 2317 | * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination |
| 2176 | * @dst_cgrp: destination cgroup to test | 2318 | * @dst_cgrp: destination cgroup to test |
| 2177 | * | 2319 | * |
| 2178 | * On the default hierarchy, except for the root, subtree_control must be | 2320 | * On the default hierarchy, except for the mixable, (possible) thread root |
| 2179 | * zero for migration destination cgroups with tasks so that child cgroups | 2321 | * and threaded cgroups, subtree_control must be zero for migration |
| 2180 | * don't compete against tasks. | 2322 | * destination cgroups with tasks so that child cgroups don't compete |
| 2323 | * against tasks. | ||
| 2181 | */ | 2324 | */ |
| 2182 | bool cgroup_may_migrate_to(struct cgroup *dst_cgrp) | 2325 | int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) |
| 2183 | { | 2326 | { |
| 2184 | return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) || | 2327 | /* v1 doesn't have any restriction */ |
| 2185 | !dst_cgrp->subtree_control; | 2328 | if (!cgroup_on_dfl(dst_cgrp)) |
| 2329 | return 0; | ||
| 2330 | |||
| 2331 | /* verify @dst_cgrp can host resources */ | ||
| 2332 | if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp)) | ||
| 2333 | return -EOPNOTSUPP; | ||
| 2334 | |||
| 2335 | /* mixables don't care */ | ||
| 2336 | if (cgroup_is_mixable(dst_cgrp)) | ||
| 2337 | return 0; | ||
| 2338 | |||
| 2339 | /* | ||
| 2340 | * If @dst_cgrp is already or can become a thread root or is | ||
| 2341 | * threaded, it doesn't matter. | ||
| 2342 | */ | ||
| 2343 | if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp)) | ||
| 2344 | return 0; | ||
| 2345 | |||
| 2346 | /* apply no-internal-process constraint */ | ||
| 2347 | if (dst_cgrp->subtree_control) | ||
| 2348 | return -EBUSY; | ||
| 2349 | |||
| 2350 | return 0; | ||
| 2186 | } | 2351 | } |
| 2187 | 2352 | ||
| 2188 | /** | 2353 | /** |
| @@ -2387,8 +2552,9 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, | |||
| 2387 | struct task_struct *task; | 2552 | struct task_struct *task; |
| 2388 | int ret; | 2553 | int ret; |
| 2389 | 2554 | ||
| 2390 | if (!cgroup_may_migrate_to(dst_cgrp)) | 2555 | ret = cgroup_migrate_vet_dst(dst_cgrp); |
| 2391 | return -EBUSY; | 2556 | if (ret) |
| 2557 | return ret; | ||
| 2392 | 2558 | ||
| 2393 | /* look up all src csets */ | 2559 | /* look up all src csets */ |
| 2394 | spin_lock_irq(&css_set_lock); | 2560 | spin_lock_irq(&css_set_lock); |
| @@ -2415,96 +2581,23 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, | |||
| 2415 | return ret; | 2581 | return ret; |
| 2416 | } | 2582 | } |
| 2417 | 2583 | ||
| 2418 | static int cgroup_procs_write_permission(struct task_struct *task, | 2584 | struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) |
| 2419 | struct cgroup *dst_cgrp, | 2585 | __acquires(&cgroup_threadgroup_rwsem) |
| 2420 | struct kernfs_open_file *of) | ||
| 2421 | { | ||
| 2422 | struct super_block *sb = of->file->f_path.dentry->d_sb; | ||
| 2423 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | ||
| 2424 | struct cgroup *root_cgrp = ns->root_cset->dfl_cgrp; | ||
| 2425 | struct cgroup *src_cgrp, *com_cgrp; | ||
| 2426 | struct inode *inode; | ||
| 2427 | int ret; | ||
| 2428 | |||
| 2429 | if (!cgroup_on_dfl(dst_cgrp)) { | ||
| 2430 | const struct cred *cred = current_cred(); | ||
| 2431 | const struct cred *tcred = get_task_cred(task); | ||
| 2432 | |||
| 2433 | /* | ||
| 2434 | * even if we're attaching all tasks in the thread group, | ||
| 2435 | * we only need to check permissions on one of them. | ||
| 2436 | */ | ||
| 2437 | if (uid_eq(cred->euid, GLOBAL_ROOT_UID) || | ||
| 2438 | uid_eq(cred->euid, tcred->uid) || | ||
| 2439 | uid_eq(cred->euid, tcred->suid)) | ||
| 2440 | ret = 0; | ||
| 2441 | else | ||
| 2442 | ret = -EACCES; | ||
| 2443 | |||
| 2444 | put_cred(tcred); | ||
| 2445 | return ret; | ||
| 2446 | } | ||
| 2447 | |||
| 2448 | /* find the source cgroup */ | ||
| 2449 | spin_lock_irq(&css_set_lock); | ||
| 2450 | src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
| 2451 | spin_unlock_irq(&css_set_lock); | ||
| 2452 | |||
| 2453 | /* and the common ancestor */ | ||
| 2454 | com_cgrp = src_cgrp; | ||
| 2455 | while (!cgroup_is_descendant(dst_cgrp, com_cgrp)) | ||
| 2456 | com_cgrp = cgroup_parent(com_cgrp); | ||
| 2457 | |||
| 2458 | /* %current should be authorized to migrate to the common ancestor */ | ||
| 2459 | inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn); | ||
| 2460 | if (!inode) | ||
| 2461 | return -ENOMEM; | ||
| 2462 | |||
| 2463 | ret = inode_permission(inode, MAY_WRITE); | ||
| 2464 | iput(inode); | ||
| 2465 | if (ret) | ||
| 2466 | return ret; | ||
| 2467 | |||
| 2468 | /* | ||
| 2469 | * If namespaces are delegation boundaries, %current must be able | ||
| 2470 | * to see both source and destination cgroups from its namespace. | ||
| 2471 | */ | ||
| 2472 | if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) && | ||
| 2473 | (!cgroup_is_descendant(src_cgrp, root_cgrp) || | ||
| 2474 | !cgroup_is_descendant(dst_cgrp, root_cgrp))) | ||
| 2475 | return -ENOENT; | ||
| 2476 | |||
| 2477 | return 0; | ||
| 2478 | } | ||
| 2479 | |||
| 2480 | /* | ||
| 2481 | * Find the task_struct of the task to attach by vpid and pass it along to the | ||
| 2482 | * function to attach either it or all tasks in its threadgroup. Will lock | ||
| 2483 | * cgroup_mutex and threadgroup. | ||
| 2484 | */ | ||
| 2485 | ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | ||
| 2486 | size_t nbytes, loff_t off, bool threadgroup) | ||
| 2487 | { | 2586 | { |
| 2488 | struct task_struct *tsk; | 2587 | struct task_struct *tsk; |
| 2489 | struct cgroup_subsys *ss; | ||
| 2490 | struct cgroup *cgrp; | ||
| 2491 | pid_t pid; | 2588 | pid_t pid; |
| 2492 | int ssid, ret; | ||
| 2493 | 2589 | ||
| 2494 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) | 2590 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) |
| 2495 | return -EINVAL; | 2591 | return ERR_PTR(-EINVAL); |
| 2496 | |||
| 2497 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 2498 | if (!cgrp) | ||
| 2499 | return -ENODEV; | ||
| 2500 | 2592 | ||
| 2501 | percpu_down_write(&cgroup_threadgroup_rwsem); | 2593 | percpu_down_write(&cgroup_threadgroup_rwsem); |
| 2594 | |||
| 2502 | rcu_read_lock(); | 2595 | rcu_read_lock(); |
| 2503 | if (pid) { | 2596 | if (pid) { |
| 2504 | tsk = find_task_by_vpid(pid); | 2597 | tsk = find_task_by_vpid(pid); |
| 2505 | if (!tsk) { | 2598 | if (!tsk) { |
| 2506 | ret = -ESRCH; | 2599 | tsk = ERR_PTR(-ESRCH); |
| 2507 | goto out_unlock_rcu; | 2600 | goto out_unlock_threadgroup; |
| 2508 | } | 2601 | } |
| 2509 | } else { | 2602 | } else { |
| 2510 | tsk = current; | 2603 | tsk = current; |
| @@ -2520,35 +2613,33 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
| 2520 | * cgroup with no rt_runtime allocated. Just say no. | 2613 | * cgroup with no rt_runtime allocated. Just say no. |
| 2521 | */ | 2614 | */ |
| 2522 | if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { | 2615 | if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { |
| 2523 | ret = -EINVAL; | 2616 | tsk = ERR_PTR(-EINVAL); |
| 2524 | goto out_unlock_rcu; | 2617 | goto out_unlock_threadgroup; |
| 2525 | } | 2618 | } |
| 2526 | 2619 | ||
| 2527 | get_task_struct(tsk); | 2620 | get_task_struct(tsk); |
| 2621 | goto out_unlock_rcu; | ||
| 2622 | |||
| 2623 | out_unlock_threadgroup: | ||
| 2624 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
| 2625 | out_unlock_rcu: | ||
| 2528 | rcu_read_unlock(); | 2626 | rcu_read_unlock(); |
| 2627 | return tsk; | ||
| 2628 | } | ||
| 2529 | 2629 | ||
| 2530 | ret = cgroup_procs_write_permission(tsk, cgrp, of); | 2630 | void cgroup_procs_write_finish(struct task_struct *task) |
| 2531 | if (!ret) | 2631 | __releases(&cgroup_threadgroup_rwsem) |
| 2532 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2632 | { |
| 2633 | struct cgroup_subsys *ss; | ||
| 2634 | int ssid; | ||
| 2533 | 2635 | ||
| 2534 | put_task_struct(tsk); | 2636 | /* release reference from cgroup_procs_write_start() */ |
| 2535 | goto out_unlock_threadgroup; | 2637 | put_task_struct(task); |
| 2536 | 2638 | ||
| 2537 | out_unlock_rcu: | ||
| 2538 | rcu_read_unlock(); | ||
| 2539 | out_unlock_threadgroup: | ||
| 2540 | percpu_up_write(&cgroup_threadgroup_rwsem); | 2639 | percpu_up_write(&cgroup_threadgroup_rwsem); |
| 2541 | for_each_subsys(ss, ssid) | 2640 | for_each_subsys(ss, ssid) |
| 2542 | if (ss->post_attach) | 2641 | if (ss->post_attach) |
| 2543 | ss->post_attach(); | 2642 | ss->post_attach(); |
| 2544 | cgroup_kn_unlock(of->kn); | ||
| 2545 | return ret ?: nbytes; | ||
| 2546 | } | ||
| 2547 | |||
| 2548 | ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, | ||
| 2549 | loff_t off) | ||
| 2550 | { | ||
| 2551 | return __cgroup_procs_write(of, buf, nbytes, off, true); | ||
| 2552 | } | 2643 | } |
| 2553 | 2644 | ||
| 2554 | static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) | 2645 | static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) |
| @@ -2891,6 +2982,46 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret) | |||
| 2891 | cgroup_apply_control_disable(cgrp); | 2982 | cgroup_apply_control_disable(cgrp); |
| 2892 | } | 2983 | } |
| 2893 | 2984 | ||
| 2985 | static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable) | ||
| 2986 | { | ||
| 2987 | u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask; | ||
| 2988 | |||
| 2989 | /* if nothing is getting enabled, nothing to worry about */ | ||
| 2990 | if (!enable) | ||
| 2991 | return 0; | ||
| 2992 | |||
| 2993 | /* can @cgrp host any resources? */ | ||
| 2994 | if (!cgroup_is_valid_domain(cgrp->dom_cgrp)) | ||
| 2995 | return -EOPNOTSUPP; | ||
| 2996 | |||
| 2997 | /* mixables don't care */ | ||
| 2998 | if (cgroup_is_mixable(cgrp)) | ||
| 2999 | return 0; | ||
| 3000 | |||
| 3001 | if (domain_enable) { | ||
| 3002 | /* can't enable domain controllers inside a thread subtree */ | ||
| 3003 | if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp)) | ||
| 3004 | return -EOPNOTSUPP; | ||
| 3005 | } else { | ||
| 3006 | /* | ||
| 3007 | * Threaded controllers can handle internal competitions | ||
| 3008 | * and are always allowed inside a (prospective) thread | ||
| 3009 | * subtree. | ||
| 3010 | */ | ||
| 3011 | if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp)) | ||
| 3012 | return 0; | ||
| 3013 | } | ||
| 3014 | |||
| 3015 | /* | ||
| 3016 | * Controllers can't be enabled for a cgroup with tasks to avoid | ||
| 3017 | * child cgroups competing against tasks. | ||
| 3018 | */ | ||
| 3019 | if (cgroup_has_tasks(cgrp)) | ||
| 3020 | return -EBUSY; | ||
| 3021 | |||
| 3022 | return 0; | ||
| 3023 | } | ||
| 3024 | |||
| 2894 | /* change the enabled child controllers for a cgroup in the default hierarchy */ | 3025 | /* change the enabled child controllers for a cgroup in the default hierarchy */ |
| 2895 | static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | 3026 | static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, |
| 2896 | char *buf, size_t nbytes, | 3027 | char *buf, size_t nbytes, |
| @@ -2966,33 +3097,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
| 2966 | goto out_unlock; | 3097 | goto out_unlock; |
| 2967 | } | 3098 | } |
| 2968 | 3099 | ||
| 2969 | /* | 3100 | ret = cgroup_vet_subtree_control_enable(cgrp, enable); |
| 2970 | * Except for the root, subtree_control must be zero for a cgroup | 3101 | if (ret) |
| 2971 | * with tasks so that child cgroups don't compete against tasks. | 3102 | goto out_unlock; |
| 2972 | */ | ||
| 2973 | if (enable && cgroup_parent(cgrp)) { | ||
| 2974 | struct cgrp_cset_link *link; | ||
| 2975 | |||
| 2976 | /* | ||
| 2977 | * Because namespaces pin csets too, @cgrp->cset_links | ||
| 2978 | * might not be empty even when @cgrp is empty. Walk and | ||
| 2979 | * verify each cset. | ||
| 2980 | */ | ||
| 2981 | spin_lock_irq(&css_set_lock); | ||
| 2982 | |||
| 2983 | ret = 0; | ||
| 2984 | list_for_each_entry(link, &cgrp->cset_links, cset_link) { | ||
| 2985 | if (css_set_populated(link->cset)) { | ||
| 2986 | ret = -EBUSY; | ||
| 2987 | break; | ||
| 2988 | } | ||
| 2989 | } | ||
| 2990 | |||
| 2991 | spin_unlock_irq(&css_set_lock); | ||
| 2992 | |||
| 2993 | if (ret) | ||
| 2994 | goto out_unlock; | ||
| 2995 | } | ||
| 2996 | 3103 | ||
| 2997 | /* save and update control masks and prepare csses */ | 3104 | /* save and update control masks and prepare csses */ |
| 2998 | cgroup_save_control(cgrp); | 3105 | cgroup_save_control(cgrp); |
| @@ -3011,6 +3118,172 @@ out_unlock: | |||
| 3011 | return ret ?: nbytes; | 3118 | return ret ?: nbytes; |
| 3012 | } | 3119 | } |
| 3013 | 3120 | ||
| 3121 | /** | ||
| 3122 | * cgroup_enable_threaded - make @cgrp threaded | ||
| 3123 | * @cgrp: the target cgroup | ||
| 3124 | * | ||
| 3125 | * Called when "threaded" is written to the cgroup.type interface file and | ||
| 3126 | * tries to make @cgrp threaded and join the parent's resource domain. | ||
| 3127 | * This function is never called on the root cgroup as cgroup.type doesn't | ||
| 3128 | * exist on it. | ||
| 3129 | */ | ||
| 3130 | static int cgroup_enable_threaded(struct cgroup *cgrp) | ||
| 3131 | { | ||
| 3132 | struct cgroup *parent = cgroup_parent(cgrp); | ||
| 3133 | struct cgroup *dom_cgrp = parent->dom_cgrp; | ||
| 3134 | int ret; | ||
| 3135 | |||
| 3136 | lockdep_assert_held(&cgroup_mutex); | ||
| 3137 | |||
| 3138 | /* noop if already threaded */ | ||
| 3139 | if (cgroup_is_threaded(cgrp)) | ||
| 3140 | return 0; | ||
| 3141 | |||
| 3142 | /* we're joining the parent's domain, ensure its validity */ | ||
| 3143 | if (!cgroup_is_valid_domain(dom_cgrp) || | ||
| 3144 | !cgroup_can_be_thread_root(dom_cgrp)) | ||
| 3145 | return -EOPNOTSUPP; | ||
| 3146 | |||
| 3147 | /* | ||
| 3148 | * The following shouldn't cause actual migrations and should | ||
| 3149 | * always succeed. | ||
| 3150 | */ | ||
| 3151 | cgroup_save_control(cgrp); | ||
| 3152 | |||
| 3153 | cgrp->dom_cgrp = dom_cgrp; | ||
| 3154 | ret = cgroup_apply_control(cgrp); | ||
| 3155 | if (!ret) | ||
| 3156 | parent->nr_threaded_children++; | ||
| 3157 | else | ||
| 3158 | cgrp->dom_cgrp = cgrp; | ||
| 3159 | |||
| 3160 | cgroup_finalize_control(cgrp, ret); | ||
| 3161 | return ret; | ||
| 3162 | } | ||
| 3163 | |||
| 3164 | static int cgroup_type_show(struct seq_file *seq, void *v) | ||
| 3165 | { | ||
| 3166 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 3167 | |||
| 3168 | if (cgroup_is_threaded(cgrp)) | ||
| 3169 | seq_puts(seq, "threaded\n"); | ||
| 3170 | else if (!cgroup_is_valid_domain(cgrp)) | ||
| 3171 | seq_puts(seq, "domain invalid\n"); | ||
| 3172 | else if (cgroup_is_thread_root(cgrp)) | ||
| 3173 | seq_puts(seq, "domain threaded\n"); | ||
| 3174 | else | ||
| 3175 | seq_puts(seq, "domain\n"); | ||
| 3176 | |||
| 3177 | return 0; | ||
| 3178 | } | ||
| 3179 | |||
| 3180 | static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, | ||
| 3181 | size_t nbytes, loff_t off) | ||
| 3182 | { | ||
| 3183 | struct cgroup *cgrp; | ||
| 3184 | int ret; | ||
| 3185 | |||
| 3186 | /* only switching to threaded mode is supported */ | ||
| 3187 | if (strcmp(strstrip(buf), "threaded")) | ||
| 3188 | return -EINVAL; | ||
| 3189 | |||
| 3190 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 3191 | if (!cgrp) | ||
| 3192 | return -ENOENT; | ||
| 3193 | |||
| 3194 | /* threaded can only be enabled */ | ||
| 3195 | ret = cgroup_enable_threaded(cgrp); | ||
| 3196 | |||
| 3197 | cgroup_kn_unlock(of->kn); | ||
| 3198 | return ret ?: nbytes; | ||
| 3199 | } | ||
| 3200 | |||
| 3201 | static int cgroup_max_descendants_show(struct seq_file *seq, void *v) | ||
| 3202 | { | ||
| 3203 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 3204 | int descendants = READ_ONCE(cgrp->max_descendants); | ||
| 3205 | |||
| 3206 | if (descendants == INT_MAX) | ||
| 3207 | seq_puts(seq, "max\n"); | ||
| 3208 | else | ||
| 3209 | seq_printf(seq, "%d\n", descendants); | ||
| 3210 | |||
| 3211 | return 0; | ||
| 3212 | } | ||
| 3213 | |||
| 3214 | static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of, | ||
| 3215 | char *buf, size_t nbytes, loff_t off) | ||
| 3216 | { | ||
| 3217 | struct cgroup *cgrp; | ||
| 3218 | int descendants; | ||
| 3219 | ssize_t ret; | ||
| 3220 | |||
| 3221 | buf = strstrip(buf); | ||
| 3222 | if (!strcmp(buf, "max")) { | ||
| 3223 | descendants = INT_MAX; | ||
| 3224 | } else { | ||
| 3225 | ret = kstrtoint(buf, 0, &descendants); | ||
| 3226 | if (ret) | ||
| 3227 | return ret; | ||
| 3228 | } | ||
| 3229 | |||
| 3230 | if (descendants < 0) | ||
| 3231 | return -ERANGE; | ||
| 3232 | |||
| 3233 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 3234 | if (!cgrp) | ||
| 3235 | return -ENOENT; | ||
| 3236 | |||
| 3237 | cgrp->max_descendants = descendants; | ||
| 3238 | |||
| 3239 | cgroup_kn_unlock(of->kn); | ||
| 3240 | |||
| 3241 | return nbytes; | ||
| 3242 | } | ||
| 3243 | |||
| 3244 | static int cgroup_max_depth_show(struct seq_file *seq, void *v) | ||
| 3245 | { | ||
| 3246 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 3247 | int depth = READ_ONCE(cgrp->max_depth); | ||
| 3248 | |||
| 3249 | if (depth == INT_MAX) | ||
| 3250 | seq_puts(seq, "max\n"); | ||
| 3251 | else | ||
| 3252 | seq_printf(seq, "%d\n", depth); | ||
| 3253 | |||
| 3254 | return 0; | ||
| 3255 | } | ||
| 3256 | |||
| 3257 | static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, | ||
| 3258 | char *buf, size_t nbytes, loff_t off) | ||
| 3259 | { | ||
| 3260 | struct cgroup *cgrp; | ||
| 3261 | ssize_t ret; | ||
| 3262 | int depth; | ||
| 3263 | |||
| 3264 | buf = strstrip(buf); | ||
| 3265 | if (!strcmp(buf, "max")) { | ||
| 3266 | depth = INT_MAX; | ||
| 3267 | } else { | ||
| 3268 | ret = kstrtoint(buf, 0, &depth); | ||
| 3269 | if (ret) | ||
| 3270 | return ret; | ||
| 3271 | } | ||
| 3272 | |||
| 3273 | if (depth < 0) | ||
| 3274 | return -ERANGE; | ||
| 3275 | |||
| 3276 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 3277 | if (!cgrp) | ||
| 3278 | return -ENOENT; | ||
| 3279 | |||
| 3280 | cgrp->max_depth = depth; | ||
| 3281 | |||
| 3282 | cgroup_kn_unlock(of->kn); | ||
| 3283 | |||
| 3284 | return nbytes; | ||
| 3285 | } | ||
| 3286 | |||
| 3014 | static int cgroup_events_show(struct seq_file *seq, void *v) | 3287 | static int cgroup_events_show(struct seq_file *seq, void *v) |
| 3015 | { | 3288 | { |
| 3016 | seq_printf(seq, "populated %d\n", | 3289 | seq_printf(seq, "populated %d\n", |
| @@ -3018,6 +3291,18 @@ static int cgroup_events_show(struct seq_file *seq, void *v) | |||
| 3018 | return 0; | 3291 | return 0; |
| 3019 | } | 3292 | } |
| 3020 | 3293 | ||
| 3294 | static int cgroup_stat_show(struct seq_file *seq, void *v) | ||
| 3295 | { | ||
| 3296 | struct cgroup *cgroup = seq_css(seq)->cgroup; | ||
| 3297 | |||
| 3298 | seq_printf(seq, "nr_descendants %d\n", | ||
| 3299 | cgroup->nr_descendants); | ||
| 3300 | seq_printf(seq, "nr_dying_descendants %d\n", | ||
| 3301 | cgroup->nr_dying_descendants); | ||
| 3302 | |||
| 3303 | return 0; | ||
| 3304 | } | ||
| 3305 | |||
| 3021 | static int cgroup_file_open(struct kernfs_open_file *of) | 3306 | static int cgroup_file_open(struct kernfs_open_file *of) |
| 3022 | { | 3307 | { |
| 3023 | struct cftype *cft = of->kn->priv; | 3308 | struct cftype *cft = of->kn->priv; |
| @@ -3234,7 +3519,6 @@ restart: | |||
| 3234 | 3519 | ||
| 3235 | static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) | 3520 | static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) |
| 3236 | { | 3521 | { |
| 3237 | LIST_HEAD(pending); | ||
| 3238 | struct cgroup_subsys *ss = cfts[0].ss; | 3522 | struct cgroup_subsys *ss = cfts[0].ss; |
| 3239 | struct cgroup *root = &ss->root->cgrp; | 3523 | struct cgroup *root = &ss->root->cgrp; |
| 3240 | struct cgroup_subsys_state *css; | 3524 | struct cgroup_subsys_state *css; |
| @@ -3659,6 +3943,58 @@ bool css_has_online_children(struct cgroup_subsys_state *css) | |||
| 3659 | return ret; | 3943 | return ret; |
| 3660 | } | 3944 | } |
| 3661 | 3945 | ||
| 3946 | static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it) | ||
| 3947 | { | ||
| 3948 | struct list_head *l; | ||
| 3949 | struct cgrp_cset_link *link; | ||
| 3950 | struct css_set *cset; | ||
| 3951 | |||
| 3952 | lockdep_assert_held(&css_set_lock); | ||
| 3953 | |||
| 3954 | /* find the next threaded cset */ | ||
| 3955 | if (it->tcset_pos) { | ||
| 3956 | l = it->tcset_pos->next; | ||
| 3957 | |||
| 3958 | if (l != it->tcset_head) { | ||
| 3959 | it->tcset_pos = l; | ||
| 3960 | return container_of(l, struct css_set, | ||
| 3961 | threaded_csets_node); | ||
| 3962 | } | ||
| 3963 | |||
| 3964 | it->tcset_pos = NULL; | ||
| 3965 | } | ||
| 3966 | |||
| 3967 | /* find the next cset */ | ||
| 3968 | l = it->cset_pos; | ||
| 3969 | l = l->next; | ||
| 3970 | if (l == it->cset_head) { | ||
| 3971 | it->cset_pos = NULL; | ||
| 3972 | return NULL; | ||
| 3973 | } | ||
| 3974 | |||
| 3975 | if (it->ss) { | ||
| 3976 | cset = container_of(l, struct css_set, e_cset_node[it->ss->id]); | ||
| 3977 | } else { | ||
| 3978 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
| 3979 | cset = link->cset; | ||
| 3980 | } | ||
| 3981 | |||
| 3982 | it->cset_pos = l; | ||
| 3983 | |||
| 3984 | /* initialize threaded css_set walking */ | ||
| 3985 | if (it->flags & CSS_TASK_ITER_THREADED) { | ||
| 3986 | if (it->cur_dcset) | ||
| 3987 | put_css_set_locked(it->cur_dcset); | ||
| 3988 | it->cur_dcset = cset; | ||
| 3989 | get_css_set(cset); | ||
| 3990 | |||
| 3991 | it->tcset_head = &cset->threaded_csets; | ||
| 3992 | it->tcset_pos = &cset->threaded_csets; | ||
| 3993 | } | ||
| 3994 | |||
| 3995 | return cset; | ||
| 3996 | } | ||
| 3997 | |||
| 3662 | /** | 3998 | /** |
| 3663 | * css_task_iter_advance_css_set - advance a task itererator to the next css_set | 3999 | * css_task_iter_advance_css_set - advance a task itererator to the next css_set |
| 3664 | * @it: the iterator to advance | 4000 | * @it: the iterator to advance |
| @@ -3667,32 +4003,19 @@ bool css_has_online_children(struct cgroup_subsys_state *css) | |||
| 3667 | */ | 4003 | */ |
| 3668 | static void css_task_iter_advance_css_set(struct css_task_iter *it) | 4004 | static void css_task_iter_advance_css_set(struct css_task_iter *it) |
| 3669 | { | 4005 | { |
| 3670 | struct list_head *l = it->cset_pos; | ||
| 3671 | struct cgrp_cset_link *link; | ||
| 3672 | struct css_set *cset; | 4006 | struct css_set *cset; |
| 3673 | 4007 | ||
| 3674 | lockdep_assert_held(&css_set_lock); | 4008 | lockdep_assert_held(&css_set_lock); |
| 3675 | 4009 | ||
| 3676 | /* Advance to the next non-empty css_set */ | 4010 | /* Advance to the next non-empty css_set */ |
| 3677 | do { | 4011 | do { |
| 3678 | l = l->next; | 4012 | cset = css_task_iter_next_css_set(it); |
| 3679 | if (l == it->cset_head) { | 4013 | if (!cset) { |
| 3680 | it->cset_pos = NULL; | ||
| 3681 | it->task_pos = NULL; | 4014 | it->task_pos = NULL; |
| 3682 | return; | 4015 | return; |
| 3683 | } | 4016 | } |
| 3684 | |||
| 3685 | if (it->ss) { | ||
| 3686 | cset = container_of(l, struct css_set, | ||
| 3687 | e_cset_node[it->ss->id]); | ||
| 3688 | } else { | ||
| 3689 | link = list_entry(l, struct cgrp_cset_link, cset_link); | ||
| 3690 | cset = link->cset; | ||
| 3691 | } | ||
| 3692 | } while (!css_set_populated(cset)); | 4017 | } while (!css_set_populated(cset)); |
| 3693 | 4018 | ||
| 3694 | it->cset_pos = l; | ||
| 3695 | |||
| 3696 | if (!list_empty(&cset->tasks)) | 4019 | if (!list_empty(&cset->tasks)) |
| 3697 | it->task_pos = cset->tasks.next; | 4020 | it->task_pos = cset->tasks.next; |
| 3698 | else | 4021 | else |
| @@ -3732,6 +4055,7 @@ static void css_task_iter_advance(struct css_task_iter *it) | |||
| 3732 | lockdep_assert_held(&css_set_lock); | 4055 | lockdep_assert_held(&css_set_lock); |
| 3733 | WARN_ON_ONCE(!l); | 4056 | WARN_ON_ONCE(!l); |
| 3734 | 4057 | ||
| 4058 | repeat: | ||
| 3735 | /* | 4059 | /* |
| 3736 | * Advance iterator to find next entry. cset->tasks is consumed | 4060 | * Advance iterator to find next entry. cset->tasks is consumed |
| 3737 | * first and then ->mg_tasks. After ->mg_tasks, we move onto the | 4061 | * first and then ->mg_tasks. After ->mg_tasks, we move onto the |
| @@ -3746,11 +4070,18 @@ static void css_task_iter_advance(struct css_task_iter *it) | |||
| 3746 | css_task_iter_advance_css_set(it); | 4070 | css_task_iter_advance_css_set(it); |
| 3747 | else | 4071 | else |
| 3748 | it->task_pos = l; | 4072 | it->task_pos = l; |
| 4073 | |||
| 4074 | /* if PROCS, skip over tasks which aren't group leaders */ | ||
| 4075 | if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && | ||
| 4076 | !thread_group_leader(list_entry(it->task_pos, struct task_struct, | ||
| 4077 | cg_list))) | ||
| 4078 | goto repeat; | ||
| 3749 | } | 4079 | } |
| 3750 | 4080 | ||
| 3751 | /** | 4081 | /** |
| 3752 | * css_task_iter_start - initiate task iteration | 4082 | * css_task_iter_start - initiate task iteration |
| 3753 | * @css: the css to walk tasks of | 4083 | * @css: the css to walk tasks of |
| 4084 | * @flags: CSS_TASK_ITER_* flags | ||
| 3754 | * @it: the task iterator to use | 4085 | * @it: the task iterator to use |
| 3755 | * | 4086 | * |
| 3756 | * Initiate iteration through the tasks of @css. The caller can call | 4087 | * Initiate iteration through the tasks of @css. The caller can call |
| @@ -3758,7 +4089,7 @@ static void css_task_iter_advance(struct css_task_iter *it) | |||
| 3758 | * returns NULL. On completion of iteration, css_task_iter_end() must be | 4089 | * returns NULL. On completion of iteration, css_task_iter_end() must be |
| 3759 | * called. | 4090 | * called. |
| 3760 | */ | 4091 | */ |
| 3761 | void css_task_iter_start(struct cgroup_subsys_state *css, | 4092 | void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, |
| 3762 | struct css_task_iter *it) | 4093 | struct css_task_iter *it) |
| 3763 | { | 4094 | { |
| 3764 | /* no one should try to iterate before mounting cgroups */ | 4095 | /* no one should try to iterate before mounting cgroups */ |
| @@ -3769,6 +4100,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, | |||
| 3769 | spin_lock_irq(&css_set_lock); | 4100 | spin_lock_irq(&css_set_lock); |
| 3770 | 4101 | ||
| 3771 | it->ss = css->ss; | 4102 | it->ss = css->ss; |
| 4103 | it->flags = flags; | ||
| 3772 | 4104 | ||
| 3773 | if (it->ss) | 4105 | if (it->ss) |
| 3774 | it->cset_pos = &css->cgroup->e_csets[css->ss->id]; | 4106 | it->cset_pos = &css->cgroup->e_csets[css->ss->id]; |
| @@ -3826,6 +4158,9 @@ void css_task_iter_end(struct css_task_iter *it) | |||
| 3826 | spin_unlock_irq(&css_set_lock); | 4158 | spin_unlock_irq(&css_set_lock); |
| 3827 | } | 4159 | } |
| 3828 | 4160 | ||
| 4161 | if (it->cur_dcset) | ||
| 4162 | put_css_set(it->cur_dcset); | ||
| 4163 | |||
| 3829 | if (it->cur_task) | 4164 | if (it->cur_task) |
| 3830 | put_task_struct(it->cur_task); | 4165 | put_task_struct(it->cur_task); |
| 3831 | } | 4166 | } |
| @@ -3842,16 +4177,12 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) | |||
| 3842 | { | 4177 | { |
| 3843 | struct kernfs_open_file *of = s->private; | 4178 | struct kernfs_open_file *of = s->private; |
| 3844 | struct css_task_iter *it = of->priv; | 4179 | struct css_task_iter *it = of->priv; |
| 3845 | struct task_struct *task; | ||
| 3846 | 4180 | ||
| 3847 | do { | 4181 | return css_task_iter_next(it); |
| 3848 | task = css_task_iter_next(it); | ||
| 3849 | } while (task && !thread_group_leader(task)); | ||
| 3850 | |||
| 3851 | return task; | ||
| 3852 | } | 4182 | } |
| 3853 | 4183 | ||
| 3854 | static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | 4184 | static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, |
| 4185 | unsigned int iter_flags) | ||
| 3855 | { | 4186 | { |
| 3856 | struct kernfs_open_file *of = s->private; | 4187 | struct kernfs_open_file *of = s->private; |
| 3857 | struct cgroup *cgrp = seq_css(s)->cgroup; | 4188 | struct cgroup *cgrp = seq_css(s)->cgroup; |
| @@ -3869,24 +4200,169 @@ static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | |||
| 3869 | if (!it) | 4200 | if (!it) |
| 3870 | return ERR_PTR(-ENOMEM); | 4201 | return ERR_PTR(-ENOMEM); |
| 3871 | of->priv = it; | 4202 | of->priv = it; |
| 3872 | css_task_iter_start(&cgrp->self, it); | 4203 | css_task_iter_start(&cgrp->self, iter_flags, it); |
| 3873 | } else if (!(*pos)++) { | 4204 | } else if (!(*pos)++) { |
| 3874 | css_task_iter_end(it); | 4205 | css_task_iter_end(it); |
| 3875 | css_task_iter_start(&cgrp->self, it); | 4206 | css_task_iter_start(&cgrp->self, iter_flags, it); |
| 3876 | } | 4207 | } |
| 3877 | 4208 | ||
| 3878 | return cgroup_procs_next(s, NULL, NULL); | 4209 | return cgroup_procs_next(s, NULL, NULL); |
| 3879 | } | 4210 | } |
| 3880 | 4211 | ||
| 4212 | static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) | ||
| 4213 | { | ||
| 4214 | struct cgroup *cgrp = seq_css(s)->cgroup; | ||
| 4215 | |||
| 4216 | /* | ||
| 4217 | * All processes of a threaded subtree belong to the domain cgroup | ||
| 4218 | * of the subtree. Only threads can be distributed across the | ||
| 4219 | * subtree. Reject reads on cgroup.procs in the subtree proper. | ||
| 4220 | * They're always empty anyway. | ||
| 4221 | */ | ||
| 4222 | if (cgroup_is_threaded(cgrp)) | ||
| 4223 | return ERR_PTR(-EOPNOTSUPP); | ||
| 4224 | |||
| 4225 | return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS | | ||
| 4226 | CSS_TASK_ITER_THREADED); | ||
| 4227 | } | ||
| 4228 | |||
| 3881 | static int cgroup_procs_show(struct seq_file *s, void *v) | 4229 | static int cgroup_procs_show(struct seq_file *s, void *v) |
| 3882 | { | 4230 | { |
| 3883 | seq_printf(s, "%d\n", task_tgid_vnr(v)); | 4231 | seq_printf(s, "%d\n", task_pid_vnr(v)); |
| 3884 | return 0; | 4232 | return 0; |
| 3885 | } | 4233 | } |
| 3886 | 4234 | ||
| 4235 | static int cgroup_procs_write_permission(struct cgroup *src_cgrp, | ||
| 4236 | struct cgroup *dst_cgrp, | ||
| 4237 | struct super_block *sb) | ||
| 4238 | { | ||
| 4239 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | ||
| 4240 | struct cgroup *com_cgrp = src_cgrp; | ||
| 4241 | struct inode *inode; | ||
| 4242 | int ret; | ||
| 4243 | |||
| 4244 | lockdep_assert_held(&cgroup_mutex); | ||
| 4245 | |||
| 4246 | /* find the common ancestor */ | ||
| 4247 | while (!cgroup_is_descendant(dst_cgrp, com_cgrp)) | ||
| 4248 | com_cgrp = cgroup_parent(com_cgrp); | ||
| 4249 | |||
| 4250 | /* %current should be authorized to migrate to the common ancestor */ | ||
| 4251 | inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn); | ||
| 4252 | if (!inode) | ||
| 4253 | return -ENOMEM; | ||
| 4254 | |||
| 4255 | ret = inode_permission(inode, MAY_WRITE); | ||
| 4256 | iput(inode); | ||
| 4257 | if (ret) | ||
| 4258 | return ret; | ||
| 4259 | |||
| 4260 | /* | ||
| 4261 | * If namespaces are delegation boundaries, %current must be able | ||
| 4262 | * to see both source and destination cgroups from its namespace. | ||
| 4263 | */ | ||
| 4264 | if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) && | ||
| 4265 | (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) || | ||
| 4266 | !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp))) | ||
| 4267 | return -ENOENT; | ||
| 4268 | |||
| 4269 | return 0; | ||
| 4270 | } | ||
| 4271 | |||
| 4272 | static ssize_t cgroup_procs_write(struct kernfs_open_file *of, | ||
| 4273 | char *buf, size_t nbytes, loff_t off) | ||
| 4274 | { | ||
| 4275 | struct cgroup *src_cgrp, *dst_cgrp; | ||
| 4276 | struct task_struct *task; | ||
| 4277 | ssize_t ret; | ||
| 4278 | |||
| 4279 | dst_cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 4280 | if (!dst_cgrp) | ||
| 4281 | return -ENODEV; | ||
| 4282 | |||
| 4283 | task = cgroup_procs_write_start(buf, true); | ||
| 4284 | ret = PTR_ERR_OR_ZERO(task); | ||
| 4285 | if (ret) | ||
| 4286 | goto out_unlock; | ||
| 4287 | |||
| 4288 | /* find the source cgroup */ | ||
| 4289 | spin_lock_irq(&css_set_lock); | ||
| 4290 | src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
| 4291 | spin_unlock_irq(&css_set_lock); | ||
| 4292 | |||
| 4293 | ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, | ||
| 4294 | of->file->f_path.dentry->d_sb); | ||
| 4295 | if (ret) | ||
| 4296 | goto out_finish; | ||
| 4297 | |||
| 4298 | ret = cgroup_attach_task(dst_cgrp, task, true); | ||
| 4299 | |||
| 4300 | out_finish: | ||
| 4301 | cgroup_procs_write_finish(task); | ||
| 4302 | out_unlock: | ||
| 4303 | cgroup_kn_unlock(of->kn); | ||
| 4304 | |||
| 4305 | return ret ?: nbytes; | ||
| 4306 | } | ||
| 4307 | |||
| 4308 | static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) | ||
| 4309 | { | ||
| 4310 | return __cgroup_procs_start(s, pos, 0); | ||
| 4311 | } | ||
| 4312 | |||
| 4313 | static ssize_t cgroup_threads_write(struct kernfs_open_file *of, | ||
| 4314 | char *buf, size_t nbytes, loff_t off) | ||
| 4315 | { | ||
| 4316 | struct cgroup *src_cgrp, *dst_cgrp; | ||
| 4317 | struct task_struct *task; | ||
| 4318 | ssize_t ret; | ||
| 4319 | |||
| 4320 | buf = strstrip(buf); | ||
| 4321 | |||
| 4322 | dst_cgrp = cgroup_kn_lock_live(of->kn, false); | ||
| 4323 | if (!dst_cgrp) | ||
| 4324 | return -ENODEV; | ||
| 4325 | |||
| 4326 | task = cgroup_procs_write_start(buf, false); | ||
| 4327 | ret = PTR_ERR_OR_ZERO(task); | ||
| 4328 | if (ret) | ||
| 4329 | goto out_unlock; | ||
| 4330 | |||
| 4331 | /* find the source cgroup */ | ||
| 4332 | spin_lock_irq(&css_set_lock); | ||
| 4333 | src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
| 4334 | spin_unlock_irq(&css_set_lock); | ||
| 4335 | |||
| 4336 | /* thread migrations follow the cgroup.procs delegation rule */ | ||
| 4337 | ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, | ||
| 4338 | of->file->f_path.dentry->d_sb); | ||
| 4339 | if (ret) | ||
| 4340 | goto out_finish; | ||
| 4341 | |||
| 4342 | /* and must be contained in the same domain */ | ||
| 4343 | ret = -EOPNOTSUPP; | ||
| 4344 | if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp) | ||
| 4345 | goto out_finish; | ||
| 4346 | |||
| 4347 | ret = cgroup_attach_task(dst_cgrp, task, false); | ||
| 4348 | |||
| 4349 | out_finish: | ||
| 4350 | cgroup_procs_write_finish(task); | ||
| 4351 | out_unlock: | ||
| 4352 | cgroup_kn_unlock(of->kn); | ||
| 4353 | |||
| 4354 | return ret ?: nbytes; | ||
| 4355 | } | ||
| 4356 | |||
| 3887 | /* cgroup core interface files for the default hierarchy */ | 4357 | /* cgroup core interface files for the default hierarchy */ |
| 3888 | static struct cftype cgroup_base_files[] = { | 4358 | static struct cftype cgroup_base_files[] = { |
| 3889 | { | 4359 | { |
| 4360 | .name = "cgroup.type", | ||
| 4361 | .flags = CFTYPE_NOT_ON_ROOT, | ||
| 4362 | .seq_show = cgroup_type_show, | ||
| 4363 | .write = cgroup_type_write, | ||
| 4364 | }, | ||
| 4365 | { | ||
| 3890 | .name = "cgroup.procs", | 4366 | .name = "cgroup.procs", |
| 3891 | .flags = CFTYPE_NS_DELEGATABLE, | 4367 | .flags = CFTYPE_NS_DELEGATABLE, |
| 3892 | .file_offset = offsetof(struct cgroup, procs_file), | 4368 | .file_offset = offsetof(struct cgroup, procs_file), |
| @@ -3897,6 +4373,14 @@ static struct cftype cgroup_base_files[] = { | |||
| 3897 | .write = cgroup_procs_write, | 4373 | .write = cgroup_procs_write, |
| 3898 | }, | 4374 | }, |
| 3899 | { | 4375 | { |
| 4376 | .name = "cgroup.threads", | ||
| 4377 | .release = cgroup_procs_release, | ||
| 4378 | .seq_start = cgroup_threads_start, | ||
| 4379 | .seq_next = cgroup_procs_next, | ||
| 4380 | .seq_show = cgroup_procs_show, | ||
| 4381 | .write = cgroup_threads_write, | ||
| 4382 | }, | ||
| 4383 | { | ||
| 3900 | .name = "cgroup.controllers", | 4384 | .name = "cgroup.controllers", |
| 3901 | .seq_show = cgroup_controllers_show, | 4385 | .seq_show = cgroup_controllers_show, |
| 3902 | }, | 4386 | }, |
| @@ -3912,6 +4396,20 @@ static struct cftype cgroup_base_files[] = { | |||
| 3912 | .file_offset = offsetof(struct cgroup, events_file), | 4396 | .file_offset = offsetof(struct cgroup, events_file), |
| 3913 | .seq_show = cgroup_events_show, | 4397 | .seq_show = cgroup_events_show, |
| 3914 | }, | 4398 | }, |
| 4399 | { | ||
| 4400 | .name = "cgroup.max.descendants", | ||
| 4401 | .seq_show = cgroup_max_descendants_show, | ||
| 4402 | .write = cgroup_max_descendants_write, | ||
| 4403 | }, | ||
| 4404 | { | ||
| 4405 | .name = "cgroup.max.depth", | ||
| 4406 | .seq_show = cgroup_max_depth_show, | ||
| 4407 | .write = cgroup_max_depth_write, | ||
| 4408 | }, | ||
| 4409 | { | ||
| 4410 | .name = "cgroup.stat", | ||
| 4411 | .seq_show = cgroup_stat_show, | ||
| 4412 | }, | ||
| 3915 | { } /* terminate */ | 4413 | { } /* terminate */ |
| 3916 | }; | 4414 | }; |
| 3917 | 4415 | ||
| @@ -4011,9 +4509,15 @@ static void css_release_work_fn(struct work_struct *work) | |||
| 4011 | if (ss->css_released) | 4509 | if (ss->css_released) |
| 4012 | ss->css_released(css); | 4510 | ss->css_released(css); |
| 4013 | } else { | 4511 | } else { |
| 4512 | struct cgroup *tcgrp; | ||
| 4513 | |||
| 4014 | /* cgroup release path */ | 4514 | /* cgroup release path */ |
| 4015 | trace_cgroup_release(cgrp); | 4515 | trace_cgroup_release(cgrp); |
| 4016 | 4516 | ||
| 4517 | for (tcgrp = cgroup_parent(cgrp); tcgrp; | ||
| 4518 | tcgrp = cgroup_parent(tcgrp)) | ||
| 4519 | tcgrp->nr_dying_descendants--; | ||
| 4520 | |||
| 4017 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | 4521 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |
| 4018 | cgrp->id = -1; | 4522 | cgrp->id = -1; |
| 4019 | 4523 | ||
| @@ -4209,9 +4713,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent) | |||
| 4209 | cgrp->root = root; | 4713 | cgrp->root = root; |
| 4210 | cgrp->level = level; | 4714 | cgrp->level = level; |
| 4211 | 4715 | ||
| 4212 | for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) | 4716 | for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { |
| 4213 | cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; | 4717 | cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; |
| 4214 | 4718 | ||
| 4719 | if (tcgrp != cgrp) | ||
| 4720 | tcgrp->nr_descendants++; | ||
| 4721 | } | ||
| 4722 | |||
| 4215 | if (notify_on_release(parent)) | 4723 | if (notify_on_release(parent)) |
| 4216 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 4724 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
| 4217 | 4725 | ||
| @@ -4252,6 +4760,29 @@ out_free_cgrp: | |||
| 4252 | return ERR_PTR(ret); | 4760 | return ERR_PTR(ret); |
| 4253 | } | 4761 | } |
| 4254 | 4762 | ||
| 4763 | static bool cgroup_check_hierarchy_limits(struct cgroup *parent) | ||
| 4764 | { | ||
| 4765 | struct cgroup *cgroup; | ||
| 4766 | int ret = false; | ||
| 4767 | int level = 1; | ||
| 4768 | |||
| 4769 | lockdep_assert_held(&cgroup_mutex); | ||
| 4770 | |||
| 4771 | for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) { | ||
| 4772 | if (cgroup->nr_descendants >= cgroup->max_descendants) | ||
| 4773 | goto fail; | ||
| 4774 | |||
| 4775 | if (level > cgroup->max_depth) | ||
| 4776 | goto fail; | ||
| 4777 | |||
| 4778 | level++; | ||
| 4779 | } | ||
| 4780 | |||
| 4781 | ret = true; | ||
| 4782 | fail: | ||
| 4783 | return ret; | ||
| 4784 | } | ||
| 4785 | |||
| 4255 | int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) | 4786 | int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) |
| 4256 | { | 4787 | { |
| 4257 | struct cgroup *parent, *cgrp; | 4788 | struct cgroup *parent, *cgrp; |
| @@ -4266,6 +4797,11 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) | |||
| 4266 | if (!parent) | 4797 | if (!parent) |
| 4267 | return -ENODEV; | 4798 | return -ENODEV; |
| 4268 | 4799 | ||
| 4800 | if (!cgroup_check_hierarchy_limits(parent)) { | ||
| 4801 | ret = -EAGAIN; | ||
| 4802 | goto out_unlock; | ||
| 4803 | } | ||
| 4804 | |||
| 4269 | cgrp = cgroup_create(parent); | 4805 | cgrp = cgroup_create(parent); |
| 4270 | if (IS_ERR(cgrp)) { | 4806 | if (IS_ERR(cgrp)) { |
| 4271 | ret = PTR_ERR(cgrp); | 4807 | ret = PTR_ERR(cgrp); |
| @@ -4417,6 +4953,7 @@ static void kill_css(struct cgroup_subsys_state *css) | |||
| 4417 | static int cgroup_destroy_locked(struct cgroup *cgrp) | 4953 | static int cgroup_destroy_locked(struct cgroup *cgrp) |
| 4418 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4954 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
| 4419 | { | 4955 | { |
| 4956 | struct cgroup *tcgrp, *parent = cgroup_parent(cgrp); | ||
| 4420 | struct cgroup_subsys_state *css; | 4957 | struct cgroup_subsys_state *css; |
| 4421 | struct cgrp_cset_link *link; | 4958 | struct cgrp_cset_link *link; |
| 4422 | int ssid; | 4959 | int ssid; |
| @@ -4461,7 +4998,15 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4461 | */ | 4998 | */ |
| 4462 | kernfs_remove(cgrp->kn); | 4999 | kernfs_remove(cgrp->kn); |
| 4463 | 5000 | ||
| 4464 | cgroup1_check_for_release(cgroup_parent(cgrp)); | 5001 | if (parent && cgroup_is_threaded(cgrp)) |
| 5002 | parent->nr_threaded_children--; | ||
| 5003 | |||
| 5004 | for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { | ||
| 5005 | tcgrp->nr_descendants--; | ||
| 5006 | tcgrp->nr_dying_descendants++; | ||
| 5007 | } | ||
| 5008 | |||
| 5009 | cgroup1_check_for_release(parent); | ||
| 4465 | 5010 | ||
| 4466 | /* put the base reference */ | 5011 | /* put the base reference */ |
| 4467 | percpu_ref_kill(&cgrp->self.refcnt); | 5012 | percpu_ref_kill(&cgrp->self.refcnt); |
| @@ -4656,11 +5201,17 @@ int __init cgroup_init(void) | |||
| 4656 | 5201 | ||
| 4657 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; | 5202 | cgrp_dfl_root.subsys_mask |= 1 << ss->id; |
| 4658 | 5203 | ||
| 5204 | /* implicit controllers must be threaded too */ | ||
| 5205 | WARN_ON(ss->implicit_on_dfl && !ss->threaded); | ||
| 5206 | |||
| 4659 | if (ss->implicit_on_dfl) | 5207 | if (ss->implicit_on_dfl) |
| 4660 | cgrp_dfl_implicit_ss_mask |= 1 << ss->id; | 5208 | cgrp_dfl_implicit_ss_mask |= 1 << ss->id; |
| 4661 | else if (!ss->dfl_cftypes) | 5209 | else if (!ss->dfl_cftypes) |
| 4662 | cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; | 5210 | cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; |
| 4663 | 5211 | ||
| 5212 | if (ss->threaded) | ||
| 5213 | cgrp_dfl_threaded_ss_mask |= 1 << ss->id; | ||
| 5214 | |||
| 4664 | if (ss->dfl_cftypes == ss->legacy_cftypes) { | 5215 | if (ss->dfl_cftypes == ss->legacy_cftypes) { |
| 4665 | WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); | 5216 | WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); |
| 4666 | } else { | 5217 | } else { |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e7485786db9b..67230ecf2ce1 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
| @@ -301,6 +301,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); | |||
| 301 | static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); | 301 | static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); |
| 302 | 302 | ||
| 303 | /* | 303 | /* |
| 304 | * Cgroup v2 behavior is used when on default hierarchy or the | ||
| 305 | * cgroup_v2_mode flag is set. | ||
| 306 | */ | ||
| 307 | static inline bool is_in_v2_mode(void) | ||
| 308 | { | ||
| 309 | return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) || | ||
| 310 | (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); | ||
| 311 | } | ||
| 312 | |||
| 313 | /* | ||
| 304 | * This is ugly, but preserves the userspace API for existing cpuset | 314 | * This is ugly, but preserves the userspace API for existing cpuset |
| 305 | * users. If someone tries to mount the "cpuset" filesystem, we | 315 | * users. If someone tries to mount the "cpuset" filesystem, we |
| 306 | * silently switch it to mount "cgroup" instead | 316 | * silently switch it to mount "cgroup" instead |
| @@ -490,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) | |||
| 490 | 500 | ||
| 491 | /* On legacy hiearchy, we must be a subset of our parent cpuset. */ | 501 | /* On legacy hiearchy, we must be a subset of our parent cpuset. */ |
| 492 | ret = -EACCES; | 502 | ret = -EACCES; |
| 493 | if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 503 | if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) |
| 494 | !is_cpuset_subset(trial, par)) | ||
| 495 | goto out; | 504 | goto out; |
| 496 | 505 | ||
| 497 | /* | 506 | /* |
| @@ -870,7 +879,7 @@ static void update_tasks_cpumask(struct cpuset *cs) | |||
| 870 | struct css_task_iter it; | 879 | struct css_task_iter it; |
| 871 | struct task_struct *task; | 880 | struct task_struct *task; |
| 872 | 881 | ||
| 873 | css_task_iter_start(&cs->css, &it); | 882 | css_task_iter_start(&cs->css, 0, &it); |
| 874 | while ((task = css_task_iter_next(&it))) | 883 | while ((task = css_task_iter_next(&it))) |
| 875 | set_cpus_allowed_ptr(task, cs->effective_cpus); | 884 | set_cpus_allowed_ptr(task, cs->effective_cpus); |
| 876 | css_task_iter_end(&it); | 885 | css_task_iter_end(&it); |
| @@ -904,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |||
| 904 | * If it becomes empty, inherit the effective mask of the | 913 | * If it becomes empty, inherit the effective mask of the |
| 905 | * parent, which is guaranteed to have some CPUs. | 914 | * parent, which is guaranteed to have some CPUs. |
| 906 | */ | 915 | */ |
| 907 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 916 | if (is_in_v2_mode() && cpumask_empty(new_cpus)) |
| 908 | cpumask_empty(new_cpus)) | ||
| 909 | cpumask_copy(new_cpus, parent->effective_cpus); | 917 | cpumask_copy(new_cpus, parent->effective_cpus); |
| 910 | 918 | ||
| 911 | /* Skip the whole subtree if the cpumask remains the same. */ | 919 | /* Skip the whole subtree if the cpumask remains the same. */ |
| @@ -922,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |||
| 922 | cpumask_copy(cp->effective_cpus, new_cpus); | 930 | cpumask_copy(cp->effective_cpus, new_cpus); |
| 923 | spin_unlock_irq(&callback_lock); | 931 | spin_unlock_irq(&callback_lock); |
| 924 | 932 | ||
| 925 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 933 | WARN_ON(!is_in_v2_mode() && |
| 926 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | 934 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
| 927 | 935 | ||
| 928 | update_tasks_cpumask(cp); | 936 | update_tasks_cpumask(cp); |
| @@ -1100,7 +1108,7 @@ static void update_tasks_nodemask(struct cpuset *cs) | |||
| 1100 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() | 1108 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
| 1101 | * is idempotent. Also migrate pages in each mm to new nodes. | 1109 | * is idempotent. Also migrate pages in each mm to new nodes. |
| 1102 | */ | 1110 | */ |
| 1103 | css_task_iter_start(&cs->css, &it); | 1111 | css_task_iter_start(&cs->css, 0, &it); |
| 1104 | while ((task = css_task_iter_next(&it))) { | 1112 | while ((task = css_task_iter_next(&it))) { |
| 1105 | struct mm_struct *mm; | 1113 | struct mm_struct *mm; |
| 1106 | bool migrate; | 1114 | bool migrate; |
| @@ -1158,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
| 1158 | * If it becomes empty, inherit the effective mask of the | 1166 | * If it becomes empty, inherit the effective mask of the |
| 1159 | * parent, which is guaranteed to have some MEMs. | 1167 | * parent, which is guaranteed to have some MEMs. |
| 1160 | */ | 1168 | */ |
| 1161 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 1169 | if (is_in_v2_mode() && nodes_empty(*new_mems)) |
| 1162 | nodes_empty(*new_mems)) | ||
| 1163 | *new_mems = parent->effective_mems; | 1170 | *new_mems = parent->effective_mems; |
| 1164 | 1171 | ||
| 1165 | /* Skip the whole subtree if the nodemask remains the same. */ | 1172 | /* Skip the whole subtree if the nodemask remains the same. */ |
| @@ -1176,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
| 1176 | cp->effective_mems = *new_mems; | 1183 | cp->effective_mems = *new_mems; |
| 1177 | spin_unlock_irq(&callback_lock); | 1184 | spin_unlock_irq(&callback_lock); |
| 1178 | 1185 | ||
| 1179 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 1186 | WARN_ON(!is_in_v2_mode() && |
| 1180 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | 1187 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
| 1181 | 1188 | ||
| 1182 | update_tasks_nodemask(cp); | 1189 | update_tasks_nodemask(cp); |
| @@ -1293,7 +1300,7 @@ static void update_tasks_flags(struct cpuset *cs) | |||
| 1293 | struct css_task_iter it; | 1300 | struct css_task_iter it; |
| 1294 | struct task_struct *task; | 1301 | struct task_struct *task; |
| 1295 | 1302 | ||
| 1296 | css_task_iter_start(&cs->css, &it); | 1303 | css_task_iter_start(&cs->css, 0, &it); |
| 1297 | while ((task = css_task_iter_next(&it))) | 1304 | while ((task = css_task_iter_next(&it))) |
| 1298 | cpuset_update_task_spread_flag(cs, task); | 1305 | cpuset_update_task_spread_flag(cs, task); |
| 1299 | css_task_iter_end(&it); | 1306 | css_task_iter_end(&it); |
| @@ -1468,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) | |||
| 1468 | 1475 | ||
| 1469 | /* allow moving tasks into an empty cpuset if on default hierarchy */ | 1476 | /* allow moving tasks into an empty cpuset if on default hierarchy */ |
| 1470 | ret = -ENOSPC; | 1477 | ret = -ENOSPC; |
| 1471 | if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | 1478 | if (!is_in_v2_mode() && |
| 1472 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) | 1479 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) |
| 1473 | goto out_unlock; | 1480 | goto out_unlock; |
| 1474 | 1481 | ||
| @@ -1987,7 +1994,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
| 1987 | cpuset_inc(); | 1994 | cpuset_inc(); |
| 1988 | 1995 | ||
| 1989 | spin_lock_irq(&callback_lock); | 1996 | spin_lock_irq(&callback_lock); |
| 1990 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | 1997 | if (is_in_v2_mode()) { |
| 1991 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | 1998 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
| 1992 | cs->effective_mems = parent->effective_mems; | 1999 | cs->effective_mems = parent->effective_mems; |
| 1993 | } | 2000 | } |
| @@ -2064,7 +2071,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) | |||
| 2064 | mutex_lock(&cpuset_mutex); | 2071 | mutex_lock(&cpuset_mutex); |
| 2065 | spin_lock_irq(&callback_lock); | 2072 | spin_lock_irq(&callback_lock); |
| 2066 | 2073 | ||
| 2067 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | 2074 | if (is_in_v2_mode()) { |
| 2068 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | 2075 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
| 2069 | top_cpuset.mems_allowed = node_possible_map; | 2076 | top_cpuset.mems_allowed = node_possible_map; |
| 2070 | } else { | 2077 | } else { |
| @@ -2258,7 +2265,7 @@ retry: | |||
| 2258 | cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); | 2265 | cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); |
| 2259 | mems_updated = !nodes_equal(new_mems, cs->effective_mems); | 2266 | mems_updated = !nodes_equal(new_mems, cs->effective_mems); |
| 2260 | 2267 | ||
| 2261 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) | 2268 | if (is_in_v2_mode()) |
| 2262 | hotplug_update_tasks(cs, &new_cpus, &new_mems, | 2269 | hotplug_update_tasks(cs, &new_cpus, &new_mems, |
| 2263 | cpus_updated, mems_updated); | 2270 | cpus_updated, mems_updated); |
| 2264 | else | 2271 | else |
| @@ -2289,7 +2296,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
| 2289 | static cpumask_t new_cpus; | 2296 | static cpumask_t new_cpus; |
| 2290 | static nodemask_t new_mems; | 2297 | static nodemask_t new_mems; |
| 2291 | bool cpus_updated, mems_updated; | 2298 | bool cpus_updated, mems_updated; |
| 2292 | bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys); | 2299 | bool on_dfl = is_in_v2_mode(); |
| 2293 | 2300 | ||
| 2294 | mutex_lock(&cpuset_mutex); | 2301 | mutex_lock(&cpuset_mutex); |
| 2295 | 2302 | ||
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index dac46af22782..f661b4cc5efd 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c | |||
| @@ -114,27 +114,49 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) | |||
| 114 | { | 114 | { |
| 115 | struct cgroup_subsys_state *css = seq_css(seq); | 115 | struct cgroup_subsys_state *css = seq_css(seq); |
| 116 | struct cgrp_cset_link *link; | 116 | struct cgrp_cset_link *link; |
| 117 | int dead_cnt = 0, extra_refs = 0; | 117 | int dead_cnt = 0, extra_refs = 0, threaded_csets = 0; |
| 118 | 118 | ||
| 119 | spin_lock_irq(&css_set_lock); | 119 | spin_lock_irq(&css_set_lock); |
| 120 | |||
| 120 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { | 121 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { |
| 121 | struct css_set *cset = link->cset; | 122 | struct css_set *cset = link->cset; |
| 122 | struct task_struct *task; | 123 | struct task_struct *task; |
| 123 | int count = 0; | 124 | int count = 0; |
| 124 | int refcnt = refcount_read(&cset->refcount); | 125 | int refcnt = refcount_read(&cset->refcount); |
| 125 | 126 | ||
| 126 | seq_printf(seq, " %d", refcnt); | 127 | /* |
| 127 | if (refcnt - cset->nr_tasks > 0) { | 128 | * Print out the proc_cset and threaded_cset relationship |
| 128 | int extra = refcnt - cset->nr_tasks; | 129 | * and highlight difference between refcount and task_count. |
| 129 | 130 | */ | |
| 130 | seq_printf(seq, " +%d", extra); | 131 | seq_printf(seq, "css_set %pK", cset); |
| 131 | /* | 132 | if (rcu_dereference_protected(cset->dom_cset, 1) != cset) { |
| 132 | * Take out the one additional reference in | 133 | threaded_csets++; |
| 133 | * init_css_set. | 134 | seq_printf(seq, "=>%pK", cset->dom_cset); |
| 134 | */ | 135 | } |
| 135 | if (cset == &init_css_set) | 136 | if (!list_empty(&cset->threaded_csets)) { |
| 136 | extra--; | 137 | struct css_set *tcset; |
| 137 | extra_refs += extra; | 138 | int idx = 0; |
| 139 | |||
| 140 | list_for_each_entry(tcset, &cset->threaded_csets, | ||
| 141 | threaded_csets_node) { | ||
| 142 | seq_puts(seq, idx ? "," : "<="); | ||
| 143 | seq_printf(seq, "%pK", tcset); | ||
| 144 | idx++; | ||
| 145 | } | ||
| 146 | } else { | ||
| 147 | seq_printf(seq, " %d", refcnt); | ||
| 148 | if (refcnt - cset->nr_tasks > 0) { | ||
| 149 | int extra = refcnt - cset->nr_tasks; | ||
| 150 | |||
| 151 | seq_printf(seq, " +%d", extra); | ||
| 152 | /* | ||
| 153 | * Take out the one additional reference in | ||
| 154 | * init_css_set. | ||
| 155 | */ | ||
| 156 | if (cset == &init_css_set) | ||
| 157 | extra--; | ||
| 158 | extra_refs += extra; | ||
| 159 | } | ||
| 138 | } | 160 | } |
| 139 | seq_puts(seq, "\n"); | 161 | seq_puts(seq, "\n"); |
| 140 | 162 | ||
| @@ -163,10 +185,12 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) | |||
| 163 | } | 185 | } |
| 164 | spin_unlock_irq(&css_set_lock); | 186 | spin_unlock_irq(&css_set_lock); |
| 165 | 187 | ||
| 166 | if (!dead_cnt && !extra_refs) | 188 | if (!dead_cnt && !extra_refs && !threaded_csets) |
| 167 | return 0; | 189 | return 0; |
| 168 | 190 | ||
| 169 | seq_puts(seq, "\n"); | 191 | seq_puts(seq, "\n"); |
| 192 | if (threaded_csets) | ||
| 193 | seq_printf(seq, "threaded css_sets = %d\n", threaded_csets); | ||
| 170 | if (extra_refs) | 194 | if (extra_refs) |
| 171 | seq_printf(seq, "extra references = %d\n", extra_refs); | 195 | seq_printf(seq, "extra references = %d\n", extra_refs); |
| 172 | if (dead_cnt) | 196 | if (dead_cnt) |
| @@ -352,6 +376,7 @@ static int __init enable_cgroup_debug(char *str) | |||
| 352 | { | 376 | { |
| 353 | debug_cgrp_subsys.dfl_cftypes = debug_files; | 377 | debug_cgrp_subsys.dfl_cftypes = debug_files; |
| 354 | debug_cgrp_subsys.implicit_on_dfl = true; | 378 | debug_cgrp_subsys.implicit_on_dfl = true; |
| 379 | debug_cgrp_subsys.threaded = true; | ||
| 355 | return 1; | 380 | return 1; |
| 356 | } | 381 | } |
| 357 | __setup("cgroup_debug", enable_cgroup_debug); | 382 | __setup("cgroup_debug", enable_cgroup_debug); |
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index 1b72d56edce5..08236798d173 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c | |||
| @@ -268,7 +268,7 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
| 268 | rcu_read_unlock(); | 268 | rcu_read_unlock(); |
| 269 | 269 | ||
| 270 | /* are all tasks frozen? */ | 270 | /* are all tasks frozen? */ |
| 271 | css_task_iter_start(css, &it); | 271 | css_task_iter_start(css, 0, &it); |
| 272 | 272 | ||
| 273 | while ((task = css_task_iter_next(&it))) { | 273 | while ((task = css_task_iter_next(&it))) { |
| 274 | if (freezing(task)) { | 274 | if (freezing(task)) { |
| @@ -320,7 +320,7 @@ static void freeze_cgroup(struct freezer *freezer) | |||
| 320 | struct css_task_iter it; | 320 | struct css_task_iter it; |
| 321 | struct task_struct *task; | 321 | struct task_struct *task; |
| 322 | 322 | ||
| 323 | css_task_iter_start(&freezer->css, &it); | 323 | css_task_iter_start(&freezer->css, 0, &it); |
| 324 | while ((task = css_task_iter_next(&it))) | 324 | while ((task = css_task_iter_next(&it))) |
| 325 | freeze_task(task); | 325 | freeze_task(task); |
| 326 | css_task_iter_end(&it); | 326 | css_task_iter_end(&it); |
| @@ -331,7 +331,7 @@ static void unfreeze_cgroup(struct freezer *freezer) | |||
| 331 | struct css_task_iter it; | 331 | struct css_task_iter it; |
| 332 | struct task_struct *task; | 332 | struct task_struct *task; |
| 333 | 333 | ||
| 334 | css_task_iter_start(&freezer->css, &it); | 334 | css_task_iter_start(&freezer->css, 0, &it); |
| 335 | while ((task = css_task_iter_next(&it))) | 335 | while ((task = css_task_iter_next(&it))) |
| 336 | __thaw_task(task); | 336 | __thaw_task(task); |
| 337 | css_task_iter_end(&it); | 337 | css_task_iter_end(&it); |
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 2237201d66d5..9829c67ebc0a 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c | |||
| @@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = { | |||
| 345 | .free = pids_free, | 345 | .free = pids_free, |
| 346 | .legacy_cftypes = pids_files, | 346 | .legacy_cftypes = pids_files, |
| 347 | .dfl_cftypes = pids_files, | 347 | .dfl_cftypes = pids_files, |
| 348 | .threaded = true, | ||
| 348 | }; | 349 | }; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index fb415e3d824b..3e691b75b2db 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -11293,5 +11293,6 @@ struct cgroup_subsys perf_event_cgrp_subsys = { | |||
| 11293 | * controller is not mounted on a legacy hierarchy. | 11293 | * controller is not mounted on a legacy hierarchy. |
| 11294 | */ | 11294 | */ |
| 11295 | .implicit_on_dfl = true, | 11295 | .implicit_on_dfl = true, |
| 11296 | .threaded = true, | ||
| 11296 | }; | 11297 | }; |
| 11297 | #endif /* CONFIG_CGROUP_PERF */ | 11298 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ad15850ee157..6532b219b222 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -919,7 +919,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, | |||
| 919 | struct css_task_iter it; | 919 | struct css_task_iter it; |
| 920 | struct task_struct *task; | 920 | struct task_struct *task; |
| 921 | 921 | ||
| 922 | css_task_iter_start(&iter->css, &it); | 922 | css_task_iter_start(&iter->css, 0, &it); |
| 923 | while (!ret && (task = css_task_iter_next(&it))) | 923 | while (!ret && (task = css_task_iter_next(&it))) |
| 924 | ret = fn(task, arg); | 924 | ret = fn(task, arg); |
| 925 | css_task_iter_end(&it); | 925 | css_task_iter_end(&it); |
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 029a61ac6cdd..5e4f04004a49 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c | |||
| @@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, | |||
| 100 | 100 | ||
| 101 | cs->classid = (u32)value; | 101 | cs->classid = (u32)value; |
| 102 | 102 | ||
| 103 | css_task_iter_start(css, &it); | 103 | css_task_iter_start(css, 0, &it); |
| 104 | while ((p = css_task_iter_next(&it))) { | 104 | while ((p = css_task_iter_next(&it))) { |
| 105 | task_lock(p); | 105 | task_lock(p); |
| 106 | iterate_fd(p->files, 0, update_classid_sock, | 106 | iterate_fd(p->files, 0, update_classid_sock, |
