diff options
-rw-r--r-- | Documentation/cgroup-v2.txt | 60 | ||||
-rw-r--r-- | include/linux/cgroup-defs.h | 12 | ||||
-rw-r--r-- | init/Kconfig | 7 | ||||
-rw-r--r-- | kernel/cgroup/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 155 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 155 | ||||
-rw-r--r-- | kernel/cgroup/debug.c | 357 |
8 files changed, 548 insertions, 201 deletions
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index dc5e2dcdbef4..558c3a739baf 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt | |||
@@ -149,6 +149,16 @@ during boot, before manual intervention is possible. To make testing | |||
149 | and experimenting easier, the kernel parameter cgroup_no_v1= allows | 149 | and experimenting easier, the kernel parameter cgroup_no_v1= allows |
150 | disabling controllers in v1 and make them always available in v2. | 150 | disabling controllers in v1 and make them always available in v2. |
151 | 151 | ||
152 | cgroup v2 currently supports the following mount options. | ||
153 | |||
154 | nsdelegate | ||
155 | |||
156 | Consider cgroup namespaces as delegation boundaries. This | ||
157 | option is system wide and can only be set on mount or modified | ||
158 | through remount from the init namespace. The mount option is | ||
159 | ignored on non-init namespace mounts. Please refer to the | ||
160 | Delegation section for details. | ||
161 | |||
152 | 162 | ||
153 | 2-2. Organizing Processes | 163 | 2-2. Organizing Processes |
154 | 164 | ||
@@ -308,18 +318,27 @@ file. | |||
308 | 318 | ||
309 | 2-5-1. Model of Delegation | 319 | 2-5-1. Model of Delegation |
310 | 320 | ||
311 | A cgroup can be delegated to a less privileged user by granting write | 321 | A cgroup can be delegated in two ways. First, to a less privileged |
312 | access of the directory and its "cgroup.procs" file to the user. Note | 322 | user by granting write access of the directory and its "cgroup.procs" |
313 | that resource control interface files in a given directory control the | 323 | and "cgroup.subtree_control" files to the user. Second, if the |
314 | distribution of the parent's resources and thus must not be delegated | 324 | "nsdelegate" mount option is set, automatically to a cgroup namespace |
315 | along with the directory. | 325 | on namespace creation. |
316 | 326 | ||
317 | Once delegated, the user can build sub-hierarchy under the directory, | 327 | Because the resource control interface files in a given directory |
318 | organize processes as it sees fit and further distribute the resources | 328 | control the distribution of the parent's resources, the delegatee |
319 | it received from the parent. The limits and other settings of all | 329 | shouldn't be allowed to write to them. For the first method, this is |
320 | resource controllers are hierarchical and regardless of what happens | 330 | achieved by not granting access to these files. For the second, the |
321 | in the delegated sub-hierarchy, nothing can escape the resource | 331 | kernel rejects writes to all files other than "cgroup.procs" and |
322 | restrictions imposed by the parent. | 332 | "cgroup.subtree_control" on a namespace root from inside the |
333 | namespace. | ||
334 | |||
335 | The end results are equivalent for both delegation types. Once | ||
336 | delegated, the user can build sub-hierarchy under the directory, | ||
337 | organize processes inside it as it sees fit and further distribute the | ||
338 | resources it received from the parent. The limits and other settings | ||
339 | of all resource controllers are hierarchical and regardless of what | ||
340 | happens in the delegated sub-hierarchy, nothing can escape the | ||
341 | resource restrictions imposed by the parent. | ||
323 | 342 | ||
324 | Currently, cgroup doesn't impose any restrictions on the number of | 343 | Currently, cgroup doesn't impose any restrictions on the number of |
325 | cgroups in or nesting depth of a delegated sub-hierarchy; however, | 344 | cgroups in or nesting depth of a delegated sub-hierarchy; however, |
@@ -329,10 +348,12 @@ this may be limited explicitly in the future. | |||
329 | 2-5-2. Delegation Containment | 348 | 2-5-2. Delegation Containment |
330 | 349 | ||
331 | A delegated sub-hierarchy is contained in the sense that processes | 350 | A delegated sub-hierarchy is contained in the sense that processes |
332 | can't be moved into or out of the sub-hierarchy by the delegatee. For | 351 | can't be moved into or out of the sub-hierarchy by the delegatee. |
333 | a process with a non-root euid to migrate a target process into a | 352 | |
334 | cgroup by writing its PID to the "cgroup.procs" file, the following | 353 | For delegations to a less privileged user, this is achieved by |
335 | conditions must be met. | 354 | requiring the following conditions for a process with a non-root euid |
355 | to migrate a target process into a cgroup by writing its PID to the | ||
356 | "cgroup.procs" file. | ||
336 | 357 | ||
337 | - The writer must have write access to the "cgroup.procs" file. | 358 | - The writer must have write access to the "cgroup.procs" file. |
338 | 359 | ||
@@ -359,6 +380,11 @@ destination cgroup C00 is above the points of delegation and U0 would | |||
359 | not have write access to its "cgroup.procs" files and thus the write | 380 | not have write access to its "cgroup.procs" files and thus the write |
360 | will be denied with -EACCES. | 381 | will be denied with -EACCES. |
361 | 382 | ||
383 | For delegations to namespaces, containment is achieved by requiring | ||
384 | that both the source and destination cgroups are reachable from the | ||
385 | namespace of the process which is attempting the migration. If either | ||
386 | is not reachable, the migration is rejected with -ENOENT. | ||
387 | |||
362 | 388 | ||
363 | 2-6. Guidelines | 389 | 2-6. Guidelines |
364 | 390 | ||
@@ -1413,7 +1439,7 @@ D. Deprecated v1 Core Features | |||
1413 | 1439 | ||
1414 | - Multiple hierarchies including named ones are not supported. | 1440 | - Multiple hierarchies including named ones are not supported. |
1415 | 1441 | ||
1416 | - All mount options and remounting are not supported. | 1442 | - All v1 mount options are not supported. |
1417 | 1443 | ||
1418 | - The "tasks" file is removed and "cgroup.procs" is not sorted. | 1444 | - The "tasks" file is removed and "cgroup.procs" is not sorted. |
1419 | 1445 | ||
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ec47101cb1bf..09f4c7df1478 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -67,12 +67,21 @@ enum { | |||
67 | enum { | 67 | enum { |
68 | CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ | 68 | CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ |
69 | CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ | 69 | CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ |
70 | |||
71 | /* | ||
72 | * Consider namespaces as delegation boundaries. If this flag is | ||
73 | * set, controller specific interface files in a namespace root | ||
74 | * aren't writeable from inside the namespace. | ||
75 | */ | ||
76 | CGRP_ROOT_NS_DELEGATE = (1 << 3), | ||
70 | }; | 77 | }; |
71 | 78 | ||
72 | /* cftype->flags */ | 79 | /* cftype->flags */ |
73 | enum { | 80 | enum { |
74 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ | 81 | CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ |
75 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ | 82 | CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ |
83 | CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ | ||
84 | |||
76 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ | 85 | CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ |
77 | CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ | 86 | CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ |
78 | 87 | ||
@@ -166,6 +175,9 @@ struct css_set { | |||
166 | /* the default cgroup associated with this css_set */ | 175 | /* the default cgroup associated with this css_set */ |
167 | struct cgroup *dfl_cgrp; | 176 | struct cgroup *dfl_cgrp; |
168 | 177 | ||
178 | /* internal task count, protected by css_set_lock */ | ||
179 | int nr_tasks; | ||
180 | |||
169 | /* | 181 | /* |
170 | * Lists running through all tasks using this cgroup group. | 182 | * Lists running through all tasks using this cgroup group. |
171 | * mg_tasks lists tasks which belong to this cset but are in the | 183 | * mg_tasks lists tasks which belong to this cset but are in the |
diff --git a/init/Kconfig b/init/Kconfig index ee0f03b69d11..b0fcbb2c6f56 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -859,11 +859,14 @@ config CGROUP_BPF | |||
859 | inet sockets. | 859 | inet sockets. |
860 | 860 | ||
861 | config CGROUP_DEBUG | 861 | config CGROUP_DEBUG |
862 | bool "Example controller" | 862 | bool "Debug controller" |
863 | default n | 863 | default n |
864 | depends on DEBUG_KERNEL | ||
864 | help | 865 | help |
865 | This option enables a simple controller that exports | 866 | This option enables a simple controller that exports |
866 | debugging information about the cgroups framework. | 867 | debugging information about the cgroups framework. This |
868 | controller is for control cgroup debugging only. Its | ||
869 | interfaces are not stable. | ||
867 | 870 | ||
868 | Say N. | 871 | Say N. |
869 | 872 | ||
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 387348a40c64..ce693ccb8c58 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile | |||
@@ -4,3 +4,4 @@ obj-$(CONFIG_CGROUP_FREEZER) += freezer.o | |||
4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o | 4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o |
5 | obj-$(CONFIG_CGROUP_RDMA) += rdma.o | 5 | obj-$(CONFIG_CGROUP_RDMA) += rdma.o |
6 | obj-$(CONFIG_CPUSETS) += cpuset.o | 6 | obj-$(CONFIG_CPUSETS) += cpuset.o |
7 | obj-$(CONFIG_CGROUP_DEBUG) += debug.o | ||
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 00f4d6bf048f..793565c05742 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
@@ -192,6 +192,8 @@ int cgroup_rmdir(struct kernfs_node *kn); | |||
192 | int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | 192 | int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, |
193 | struct kernfs_root *kf_root); | 193 | struct kernfs_root *kf_root); |
194 | 194 | ||
195 | int cgroup_task_count(const struct cgroup *cgrp); | ||
196 | |||
195 | /* | 197 | /* |
196 | * namespace.c | 198 | * namespace.c |
197 | */ | 199 | */ |
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 85d75152402d..7bf4b1533f34 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c | |||
@@ -334,19 +334,15 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, | |||
334 | /** | 334 | /** |
335 | * cgroup_task_count - count the number of tasks in a cgroup. | 335 | * cgroup_task_count - count the number of tasks in a cgroup. |
336 | * @cgrp: the cgroup in question | 336 | * @cgrp: the cgroup in question |
337 | * | ||
338 | * Return the number of tasks in the cgroup. The returned number can be | ||
339 | * higher than the actual number of tasks due to css_set references from | ||
340 | * namespace roots and temporary usages. | ||
341 | */ | 337 | */ |
342 | static int cgroup_task_count(const struct cgroup *cgrp) | 338 | int cgroup_task_count(const struct cgroup *cgrp) |
343 | { | 339 | { |
344 | int count = 0; | 340 | int count = 0; |
345 | struct cgrp_cset_link *link; | 341 | struct cgrp_cset_link *link; |
346 | 342 | ||
347 | spin_lock_irq(&css_set_lock); | 343 | spin_lock_irq(&css_set_lock); |
348 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | 344 | list_for_each_entry(link, &cgrp->cset_links, cset_link) |
349 | count += refcount_read(&link->cset->refcount); | 345 | count += link->cset->nr_tasks; |
350 | spin_unlock_irq(&css_set_lock); | 346 | spin_unlock_irq(&css_set_lock); |
351 | return count; | 347 | return count; |
352 | } | 348 | } |
@@ -1263,150 +1259,3 @@ static int __init cgroup_no_v1(char *str) | |||
1263 | return 1; | 1259 | return 1; |
1264 | } | 1260 | } |
1265 | __setup("cgroup_no_v1=", cgroup_no_v1); | 1261 | __setup("cgroup_no_v1=", cgroup_no_v1); |
1266 | |||
1267 | |||
1268 | #ifdef CONFIG_CGROUP_DEBUG | ||
1269 | static struct cgroup_subsys_state * | ||
1270 | debug_css_alloc(struct cgroup_subsys_state *parent_css) | ||
1271 | { | ||
1272 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | ||
1273 | |||
1274 | if (!css) | ||
1275 | return ERR_PTR(-ENOMEM); | ||
1276 | |||
1277 | return css; | ||
1278 | } | ||
1279 | |||
1280 | static void debug_css_free(struct cgroup_subsys_state *css) | ||
1281 | { | ||
1282 | kfree(css); | ||
1283 | } | ||
1284 | |||
1285 | static u64 debug_taskcount_read(struct cgroup_subsys_state *css, | ||
1286 | struct cftype *cft) | ||
1287 | { | ||
1288 | return cgroup_task_count(css->cgroup); | ||
1289 | } | ||
1290 | |||
1291 | static u64 current_css_set_read(struct cgroup_subsys_state *css, | ||
1292 | struct cftype *cft) | ||
1293 | { | ||
1294 | return (u64)(unsigned long)current->cgroups; | ||
1295 | } | ||
1296 | |||
1297 | static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, | ||
1298 | struct cftype *cft) | ||
1299 | { | ||
1300 | u64 count; | ||
1301 | |||
1302 | rcu_read_lock(); | ||
1303 | count = refcount_read(&task_css_set(current)->refcount); | ||
1304 | rcu_read_unlock(); | ||
1305 | return count; | ||
1306 | } | ||
1307 | |||
1308 | static int current_css_set_cg_links_read(struct seq_file *seq, void *v) | ||
1309 | { | ||
1310 | struct cgrp_cset_link *link; | ||
1311 | struct css_set *cset; | ||
1312 | char *name_buf; | ||
1313 | |||
1314 | name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); | ||
1315 | if (!name_buf) | ||
1316 | return -ENOMEM; | ||
1317 | |||
1318 | spin_lock_irq(&css_set_lock); | ||
1319 | rcu_read_lock(); | ||
1320 | cset = rcu_dereference(current->cgroups); | ||
1321 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { | ||
1322 | struct cgroup *c = link->cgrp; | ||
1323 | |||
1324 | cgroup_name(c, name_buf, NAME_MAX + 1); | ||
1325 | seq_printf(seq, "Root %d group %s\n", | ||
1326 | c->root->hierarchy_id, name_buf); | ||
1327 | } | ||
1328 | rcu_read_unlock(); | ||
1329 | spin_unlock_irq(&css_set_lock); | ||
1330 | kfree(name_buf); | ||
1331 | return 0; | ||
1332 | } | ||
1333 | |||
1334 | #define MAX_TASKS_SHOWN_PER_CSS 25 | ||
1335 | static int cgroup_css_links_read(struct seq_file *seq, void *v) | ||
1336 | { | ||
1337 | struct cgroup_subsys_state *css = seq_css(seq); | ||
1338 | struct cgrp_cset_link *link; | ||
1339 | |||
1340 | spin_lock_irq(&css_set_lock); | ||
1341 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { | ||
1342 | struct css_set *cset = link->cset; | ||
1343 | struct task_struct *task; | ||
1344 | int count = 0; | ||
1345 | |||
1346 | seq_printf(seq, "css_set %pK\n", cset); | ||
1347 | |||
1348 | list_for_each_entry(task, &cset->tasks, cg_list) { | ||
1349 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) | ||
1350 | goto overflow; | ||
1351 | seq_printf(seq, " task %d\n", task_pid_vnr(task)); | ||
1352 | } | ||
1353 | |||
1354 | list_for_each_entry(task, &cset->mg_tasks, cg_list) { | ||
1355 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) | ||
1356 | goto overflow; | ||
1357 | seq_printf(seq, " task %d\n", task_pid_vnr(task)); | ||
1358 | } | ||
1359 | continue; | ||
1360 | overflow: | ||
1361 | seq_puts(seq, " ...\n"); | ||
1362 | } | ||
1363 | spin_unlock_irq(&css_set_lock); | ||
1364 | return 0; | ||
1365 | } | ||
1366 | |||
1367 | static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) | ||
1368 | { | ||
1369 | return (!cgroup_is_populated(css->cgroup) && | ||
1370 | !css_has_online_children(&css->cgroup->self)); | ||
1371 | } | ||
1372 | |||
1373 | static struct cftype debug_files[] = { | ||
1374 | { | ||
1375 | .name = "taskcount", | ||
1376 | .read_u64 = debug_taskcount_read, | ||
1377 | }, | ||
1378 | |||
1379 | { | ||
1380 | .name = "current_css_set", | ||
1381 | .read_u64 = current_css_set_read, | ||
1382 | }, | ||
1383 | |||
1384 | { | ||
1385 | .name = "current_css_set_refcount", | ||
1386 | .read_u64 = current_css_set_refcount_read, | ||
1387 | }, | ||
1388 | |||
1389 | { | ||
1390 | .name = "current_css_set_cg_links", | ||
1391 | .seq_show = current_css_set_cg_links_read, | ||
1392 | }, | ||
1393 | |||
1394 | { | ||
1395 | .name = "cgroup_css_links", | ||
1396 | .seq_show = cgroup_css_links_read, | ||
1397 | }, | ||
1398 | |||
1399 | { | ||
1400 | .name = "releasable", | ||
1401 | .read_u64 = releasable_read, | ||
1402 | }, | ||
1403 | |||
1404 | { } /* terminate */ | ||
1405 | }; | ||
1406 | |||
1407 | struct cgroup_subsys debug_cgrp_subsys = { | ||
1408 | .css_alloc = debug_css_alloc, | ||
1409 | .css_free = debug_css_free, | ||
1410 | .legacy_cftypes = debug_files, | ||
1411 | }; | ||
1412 | #endif /* CONFIG_CGROUP_DEBUG */ | ||
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8d4e85eae42c..620794a20a33 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
@@ -573,6 +573,11 @@ static int css_set_count = 1; /* 1 for init_css_set */ | |||
573 | /** | 573 | /** |
574 | * css_set_populated - does a css_set contain any tasks? | 574 | * css_set_populated - does a css_set contain any tasks? |
575 | * @cset: target css_set | 575 | * @cset: target css_set |
576 | * | ||
577 | * css_set_populated() should be the same as !!cset->nr_tasks at steady | ||
578 | * state. However, css_set_populated() can be called while a task is being | ||
579 | * added to or removed from the linked list before the nr_tasks is | ||
580 | * properly updated. Hence, we can't just look at ->nr_tasks here. | ||
576 | */ | 581 | */ |
577 | static bool css_set_populated(struct css_set *cset) | 582 | static bool css_set_populated(struct css_set *cset) |
578 | { | 583 | { |
@@ -1542,10 +1547,56 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | |||
1542 | return len; | 1547 | return len; |
1543 | } | 1548 | } |
1544 | 1549 | ||
1550 | static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) | ||
1551 | { | ||
1552 | char *token; | ||
1553 | |||
1554 | *root_flags = 0; | ||
1555 | |||
1556 | if (!data) | ||
1557 | return 0; | ||
1558 | |||
1559 | while ((token = strsep(&data, ",")) != NULL) { | ||
1560 | if (!strcmp(token, "nsdelegate")) { | ||
1561 | *root_flags |= CGRP_ROOT_NS_DELEGATE; | ||
1562 | continue; | ||
1563 | } | ||
1564 | |||
1565 | pr_err("cgroup2: unknown option \"%s\"\n", token); | ||
1566 | return -EINVAL; | ||
1567 | } | ||
1568 | |||
1569 | return 0; | ||
1570 | } | ||
1571 | |||
1572 | static void apply_cgroup_root_flags(unsigned int root_flags) | ||
1573 | { | ||
1574 | if (current->nsproxy->cgroup_ns == &init_cgroup_ns) { | ||
1575 | if (root_flags & CGRP_ROOT_NS_DELEGATE) | ||
1576 | cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE; | ||
1577 | else | ||
1578 | cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE; | ||
1579 | } | ||
1580 | } | ||
1581 | |||
1582 | static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root) | ||
1583 | { | ||
1584 | if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) | ||
1585 | seq_puts(seq, ",nsdelegate"); | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1545 | static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) | 1589 | static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) |
1546 | { | 1590 | { |
1547 | pr_err("remount is not allowed\n"); | 1591 | unsigned int root_flags; |
1548 | return -EINVAL; | 1592 | int ret; |
1593 | |||
1594 | ret = parse_cgroup_root_flags(data, &root_flags); | ||
1595 | if (ret) | ||
1596 | return ret; | ||
1597 | |||
1598 | apply_cgroup_root_flags(root_flags); | ||
1599 | return 0; | ||
1549 | } | 1600 | } |
1550 | 1601 | ||
1551 | /* | 1602 | /* |
@@ -1598,6 +1649,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
1598 | css_set_update_populated(cset, true); | 1649 | css_set_update_populated(cset, true); |
1599 | list_add_tail(&p->cg_list, &cset->tasks); | 1650 | list_add_tail(&p->cg_list, &cset->tasks); |
1600 | get_css_set(cset); | 1651 | get_css_set(cset); |
1652 | cset->nr_tasks++; | ||
1601 | } | 1653 | } |
1602 | spin_unlock(&p->sighand->siglock); | 1654 | spin_unlock(&p->sighand->siglock); |
1603 | } while_each_thread(g, p); | 1655 | } while_each_thread(g, p); |
@@ -1784,6 +1836,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1784 | { | 1836 | { |
1785 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | 1837 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; |
1786 | struct dentry *dentry; | 1838 | struct dentry *dentry; |
1839 | int ret; | ||
1787 | 1840 | ||
1788 | get_cgroup_ns(ns); | 1841 | get_cgroup_ns(ns); |
1789 | 1842 | ||
@@ -1801,16 +1854,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1801 | cgroup_enable_task_cg_lists(); | 1854 | cgroup_enable_task_cg_lists(); |
1802 | 1855 | ||
1803 | if (fs_type == &cgroup2_fs_type) { | 1856 | if (fs_type == &cgroup2_fs_type) { |
1804 | if (data) { | 1857 | unsigned int root_flags; |
1805 | pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); | 1858 | |
1859 | ret = parse_cgroup_root_flags(data, &root_flags); | ||
1860 | if (ret) { | ||
1806 | put_cgroup_ns(ns); | 1861 | put_cgroup_ns(ns); |
1807 | return ERR_PTR(-EINVAL); | 1862 | return ERR_PTR(ret); |
1808 | } | 1863 | } |
1864 | |||
1809 | cgrp_dfl_visible = true; | 1865 | cgrp_dfl_visible = true; |
1810 | cgroup_get_live(&cgrp_dfl_root.cgrp); | 1866 | cgroup_get_live(&cgrp_dfl_root.cgrp); |
1811 | 1867 | ||
1812 | dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, | 1868 | dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, |
1813 | CGROUP2_SUPER_MAGIC, ns); | 1869 | CGROUP2_SUPER_MAGIC, ns); |
1870 | if (!IS_ERR(dentry)) | ||
1871 | apply_cgroup_root_flags(root_flags); | ||
1814 | } else { | 1872 | } else { |
1815 | dentry = cgroup1_mount(&cgroup_fs_type, flags, data, | 1873 | dentry = cgroup1_mount(&cgroup_fs_type, flags, data, |
1816 | CGROUP_SUPER_MAGIC, ns); | 1874 | CGROUP_SUPER_MAGIC, ns); |
@@ -2064,8 +2122,10 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) | |||
2064 | struct css_set *to_cset = cset->mg_dst_cset; | 2122 | struct css_set *to_cset = cset->mg_dst_cset; |
2065 | 2123 | ||
2066 | get_css_set(to_cset); | 2124 | get_css_set(to_cset); |
2125 | to_cset->nr_tasks++; | ||
2067 | css_set_move_task(task, from_cset, to_cset, true); | 2126 | css_set_move_task(task, from_cset, to_cset, true); |
2068 | put_css_set_locked(from_cset); | 2127 | put_css_set_locked(from_cset); |
2128 | from_cset->nr_tasks--; | ||
2069 | } | 2129 | } |
2070 | } | 2130 | } |
2071 | spin_unlock_irq(&css_set_lock); | 2131 | spin_unlock_irq(&css_set_lock); |
@@ -2355,27 +2415,14 @@ static int cgroup_procs_write_permission(struct task_struct *task, | |||
2355 | struct cgroup *dst_cgrp, | 2415 | struct cgroup *dst_cgrp, |
2356 | struct kernfs_open_file *of) | 2416 | struct kernfs_open_file *of) |
2357 | { | 2417 | { |
2358 | int ret = 0; | 2418 | struct super_block *sb = of->file->f_path.dentry->d_sb; |
2359 | 2419 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | |
2360 | if (cgroup_on_dfl(dst_cgrp)) { | 2420 | struct cgroup *root_cgrp = ns->root_cset->dfl_cgrp; |
2361 | struct super_block *sb = of->file->f_path.dentry->d_sb; | 2421 | struct cgroup *src_cgrp, *com_cgrp; |
2362 | struct cgroup *cgrp; | 2422 | struct inode *inode; |
2363 | struct inode *inode; | 2423 | int ret; |
2364 | |||
2365 | spin_lock_irq(&css_set_lock); | ||
2366 | cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
2367 | spin_unlock_irq(&css_set_lock); | ||
2368 | |||
2369 | while (!cgroup_is_descendant(dst_cgrp, cgrp)) | ||
2370 | cgrp = cgroup_parent(cgrp); | ||
2371 | 2424 | ||
2372 | ret = -ENOMEM; | 2425 | if (!cgroup_on_dfl(dst_cgrp)) { |
2373 | inode = kernfs_get_inode(sb, cgrp->procs_file.kn); | ||
2374 | if (inode) { | ||
2375 | ret = inode_permission(inode, MAY_WRITE); | ||
2376 | iput(inode); | ||
2377 | } | ||
2378 | } else { | ||
2379 | const struct cred *cred = current_cred(); | 2426 | const struct cred *cred = current_cred(); |
2380 | const struct cred *tcred = get_task_cred(task); | 2427 | const struct cred *tcred = get_task_cred(task); |
2381 | 2428 | ||
@@ -2383,14 +2430,47 @@ static int cgroup_procs_write_permission(struct task_struct *task, | |||
2383 | * even if we're attaching all tasks in the thread group, | 2430 | * even if we're attaching all tasks in the thread group, |
2384 | * we only need to check permissions on one of them. | 2431 | * we only need to check permissions on one of them. |
2385 | */ | 2432 | */ |
2386 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && | 2433 | if (uid_eq(cred->euid, GLOBAL_ROOT_UID) || |
2387 | !uid_eq(cred->euid, tcred->uid) && | 2434 | uid_eq(cred->euid, tcred->uid) || |
2388 | !uid_eq(cred->euid, tcred->suid)) | 2435 | uid_eq(cred->euid, tcred->suid)) |
2436 | ret = 0; | ||
2437 | else | ||
2389 | ret = -EACCES; | 2438 | ret = -EACCES; |
2439 | |||
2390 | put_cred(tcred); | 2440 | put_cred(tcred); |
2441 | return ret; | ||
2391 | } | 2442 | } |
2392 | 2443 | ||
2393 | return ret; | 2444 | /* find the source cgroup */ |
2445 | spin_lock_irq(&css_set_lock); | ||
2446 | src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | ||
2447 | spin_unlock_irq(&css_set_lock); | ||
2448 | |||
2449 | /* and the common ancestor */ | ||
2450 | com_cgrp = src_cgrp; | ||
2451 | while (!cgroup_is_descendant(dst_cgrp, com_cgrp)) | ||
2452 | com_cgrp = cgroup_parent(com_cgrp); | ||
2453 | |||
2454 | /* %current should be authorized to migrate to the common ancestor */ | ||
2455 | inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn); | ||
2456 | if (!inode) | ||
2457 | return -ENOMEM; | ||
2458 | |||
2459 | ret = inode_permission(inode, MAY_WRITE); | ||
2460 | iput(inode); | ||
2461 | if (ret) | ||
2462 | return ret; | ||
2463 | |||
2464 | /* | ||
2465 | * If namespaces are delegation boundaries, %current must be able | ||
2466 | * to see both source and destination cgroups from its namespace. | ||
2467 | */ | ||
2468 | if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) && | ||
2469 | (!cgroup_is_descendant(src_cgrp, root_cgrp) || | ||
2470 | !cgroup_is_descendant(dst_cgrp, root_cgrp))) | ||
2471 | return -ENOENT; | ||
2472 | |||
2473 | return 0; | ||
2394 | } | 2474 | } |
2395 | 2475 | ||
2396 | /* | 2476 | /* |
@@ -2954,11 +3034,23 @@ static void cgroup_file_release(struct kernfs_open_file *of) | |||
2954 | static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, | 3034 | static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, |
2955 | size_t nbytes, loff_t off) | 3035 | size_t nbytes, loff_t off) |
2956 | { | 3036 | { |
3037 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | ||
2957 | struct cgroup *cgrp = of->kn->parent->priv; | 3038 | struct cgroup *cgrp = of->kn->parent->priv; |
2958 | struct cftype *cft = of->kn->priv; | 3039 | struct cftype *cft = of->kn->priv; |
2959 | struct cgroup_subsys_state *css; | 3040 | struct cgroup_subsys_state *css; |
2960 | int ret; | 3041 | int ret; |
2961 | 3042 | ||
3043 | /* | ||
3044 | * If namespaces are delegation boundaries, disallow writes to | ||
3045 | * files in an non-init namespace root from inside the namespace | ||
3046 | * except for the files explicitly marked delegatable - | ||
3047 | * cgroup.procs and cgroup.subtree_control. | ||
3048 | */ | ||
3049 | if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && | ||
3050 | !(cft->flags & CFTYPE_NS_DELEGATABLE) && | ||
3051 | ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) | ||
3052 | return -EPERM; | ||
3053 | |||
2962 | if (cft->write) | 3054 | if (cft->write) |
2963 | return cft->write(of, buf, nbytes, off); | 3055 | return cft->write(of, buf, nbytes, off); |
2964 | 3056 | ||
@@ -3792,6 +3884,7 @@ static int cgroup_procs_show(struct seq_file *s, void *v) | |||
3792 | static struct cftype cgroup_base_files[] = { | 3884 | static struct cftype cgroup_base_files[] = { |
3793 | { | 3885 | { |
3794 | .name = "cgroup.procs", | 3886 | .name = "cgroup.procs", |
3887 | .flags = CFTYPE_NS_DELEGATABLE, | ||
3795 | .file_offset = offsetof(struct cgroup, procs_file), | 3888 | .file_offset = offsetof(struct cgroup, procs_file), |
3796 | .release = cgroup_procs_release, | 3889 | .release = cgroup_procs_release, |
3797 | .seq_start = cgroup_procs_start, | 3890 | .seq_start = cgroup_procs_start, |
@@ -3805,6 +3898,7 @@ static struct cftype cgroup_base_files[] = { | |||
3805 | }, | 3898 | }, |
3806 | { | 3899 | { |
3807 | .name = "cgroup.subtree_control", | 3900 | .name = "cgroup.subtree_control", |
3901 | .flags = CFTYPE_NS_DELEGATABLE, | ||
3808 | .seq_show = cgroup_subtree_control_show, | 3902 | .seq_show = cgroup_subtree_control_show, |
3809 | .write = cgroup_subtree_control_write, | 3903 | .write = cgroup_subtree_control_write, |
3810 | }, | 3904 | }, |
@@ -4393,6 +4487,7 @@ int cgroup_rmdir(struct kernfs_node *kn) | |||
4393 | } | 4487 | } |
4394 | 4488 | ||
4395 | static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { | 4489 | static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { |
4490 | .show_options = cgroup_show_options, | ||
4396 | .remount_fs = cgroup_remount, | 4491 | .remount_fs = cgroup_remount, |
4397 | .mkdir = cgroup_mkdir, | 4492 | .mkdir = cgroup_mkdir, |
4398 | .rmdir = cgroup_rmdir, | 4493 | .rmdir = cgroup_rmdir, |
@@ -4789,6 +4884,7 @@ void cgroup_post_fork(struct task_struct *child) | |||
4789 | cset = task_css_set(current); | 4884 | cset = task_css_set(current); |
4790 | if (list_empty(&child->cg_list)) { | 4885 | if (list_empty(&child->cg_list)) { |
4791 | get_css_set(cset); | 4886 | get_css_set(cset); |
4887 | cset->nr_tasks++; | ||
4792 | css_set_move_task(child, NULL, cset, false); | 4888 | css_set_move_task(child, NULL, cset, false); |
4793 | } | 4889 | } |
4794 | spin_unlock_irq(&css_set_lock); | 4890 | spin_unlock_irq(&css_set_lock); |
@@ -4838,6 +4934,7 @@ void cgroup_exit(struct task_struct *tsk) | |||
4838 | if (!list_empty(&tsk->cg_list)) { | 4934 | if (!list_empty(&tsk->cg_list)) { |
4839 | spin_lock_irq(&css_set_lock); | 4935 | spin_lock_irq(&css_set_lock); |
4840 | css_set_move_task(tsk, cset, NULL, false); | 4936 | css_set_move_task(tsk, cset, NULL, false); |
4937 | cset->nr_tasks--; | ||
4841 | spin_unlock_irq(&css_set_lock); | 4938 | spin_unlock_irq(&css_set_lock); |
4842 | } else { | 4939 | } else { |
4843 | get_css_set(cset); | 4940 | get_css_set(cset); |
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c new file mode 100644 index 000000000000..dac46af22782 --- /dev/null +++ b/kernel/cgroup/debug.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * Debug controller | ||
3 | * | ||
4 | * WARNING: This controller is for cgroup core debugging only. | ||
5 | * Its interfaces are unstable and subject to changes at any time. | ||
6 | */ | ||
7 | #include <linux/ctype.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/slab.h> | ||
10 | |||
11 | #include "cgroup-internal.h" | ||
12 | |||
13 | static struct cgroup_subsys_state * | ||
14 | debug_css_alloc(struct cgroup_subsys_state *parent_css) | ||
15 | { | ||
16 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | ||
17 | |||
18 | if (!css) | ||
19 | return ERR_PTR(-ENOMEM); | ||
20 | |||
21 | return css; | ||
22 | } | ||
23 | |||
24 | static void debug_css_free(struct cgroup_subsys_state *css) | ||
25 | { | ||
26 | kfree(css); | ||
27 | } | ||
28 | |||
29 | /* | ||
30 | * debug_taskcount_read - return the number of tasks in a cgroup. | ||
31 | * @cgrp: the cgroup in question | ||
32 | */ | ||
33 | static u64 debug_taskcount_read(struct cgroup_subsys_state *css, | ||
34 | struct cftype *cft) | ||
35 | { | ||
36 | return cgroup_task_count(css->cgroup); | ||
37 | } | ||
38 | |||
39 | static int current_css_set_read(struct seq_file *seq, void *v) | ||
40 | { | ||
41 | struct kernfs_open_file *of = seq->private; | ||
42 | struct css_set *cset; | ||
43 | struct cgroup_subsys *ss; | ||
44 | struct cgroup_subsys_state *css; | ||
45 | int i, refcnt; | ||
46 | |||
47 | if (!cgroup_kn_lock_live(of->kn, false)) | ||
48 | return -ENODEV; | ||
49 | |||
50 | spin_lock_irq(&css_set_lock); | ||
51 | rcu_read_lock(); | ||
52 | cset = rcu_dereference(current->cgroups); | ||
53 | refcnt = refcount_read(&cset->refcount); | ||
54 | seq_printf(seq, "css_set %pK %d", cset, refcnt); | ||
55 | if (refcnt > cset->nr_tasks) | ||
56 | seq_printf(seq, " +%d", refcnt - cset->nr_tasks); | ||
57 | seq_puts(seq, "\n"); | ||
58 | |||
59 | /* | ||
60 | * Print the css'es stored in the current css_set. | ||
61 | */ | ||
62 | for_each_subsys(ss, i) { | ||
63 | css = cset->subsys[ss->id]; | ||
64 | if (!css) | ||
65 | continue; | ||
66 | seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name, | ||
67 | (unsigned long)css, css->id); | ||
68 | } | ||
69 | rcu_read_unlock(); | ||
70 | spin_unlock_irq(&css_set_lock); | ||
71 | cgroup_kn_unlock(of->kn); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, | ||
76 | struct cftype *cft) | ||
77 | { | ||
78 | u64 count; | ||
79 | |||
80 | rcu_read_lock(); | ||
81 | count = refcount_read(&task_css_set(current)->refcount); | ||
82 | rcu_read_unlock(); | ||
83 | return count; | ||
84 | } | ||
85 | |||
86 | static int current_css_set_cg_links_read(struct seq_file *seq, void *v) | ||
87 | { | ||
88 | struct cgrp_cset_link *link; | ||
89 | struct css_set *cset; | ||
90 | char *name_buf; | ||
91 | |||
92 | name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); | ||
93 | if (!name_buf) | ||
94 | return -ENOMEM; | ||
95 | |||
96 | spin_lock_irq(&css_set_lock); | ||
97 | rcu_read_lock(); | ||
98 | cset = rcu_dereference(current->cgroups); | ||
99 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { | ||
100 | struct cgroup *c = link->cgrp; | ||
101 | |||
102 | cgroup_name(c, name_buf, NAME_MAX + 1); | ||
103 | seq_printf(seq, "Root %d group %s\n", | ||
104 | c->root->hierarchy_id, name_buf); | ||
105 | } | ||
106 | rcu_read_unlock(); | ||
107 | spin_unlock_irq(&css_set_lock); | ||
108 | kfree(name_buf); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | #define MAX_TASKS_SHOWN_PER_CSS 25 | ||
113 | static int cgroup_css_links_read(struct seq_file *seq, void *v) | ||
114 | { | ||
115 | struct cgroup_subsys_state *css = seq_css(seq); | ||
116 | struct cgrp_cset_link *link; | ||
117 | int dead_cnt = 0, extra_refs = 0; | ||
118 | |||
119 | spin_lock_irq(&css_set_lock); | ||
120 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { | ||
121 | struct css_set *cset = link->cset; | ||
122 | struct task_struct *task; | ||
123 | int count = 0; | ||
124 | int refcnt = refcount_read(&cset->refcount); | ||
125 | |||
126 | seq_printf(seq, " %d", refcnt); | ||
127 | if (refcnt - cset->nr_tasks > 0) { | ||
128 | int extra = refcnt - cset->nr_tasks; | ||
129 | |||
130 | seq_printf(seq, " +%d", extra); | ||
131 | /* | ||
132 | * Take out the one additional reference in | ||
133 | * init_css_set. | ||
134 | */ | ||
135 | if (cset == &init_css_set) | ||
136 | extra--; | ||
137 | extra_refs += extra; | ||
138 | } | ||
139 | seq_puts(seq, "\n"); | ||
140 | |||
141 | list_for_each_entry(task, &cset->tasks, cg_list) { | ||
142 | if (count++ <= MAX_TASKS_SHOWN_PER_CSS) | ||
143 | seq_printf(seq, " task %d\n", | ||
144 | task_pid_vnr(task)); | ||
145 | } | ||
146 | |||
147 | list_for_each_entry(task, &cset->mg_tasks, cg_list) { | ||
148 | if (count++ <= MAX_TASKS_SHOWN_PER_CSS) | ||
149 | seq_printf(seq, " task %d\n", | ||
150 | task_pid_vnr(task)); | ||
151 | } | ||
152 | /* show # of overflowed tasks */ | ||
153 | if (count > MAX_TASKS_SHOWN_PER_CSS) | ||
154 | seq_printf(seq, " ... (%d)\n", | ||
155 | count - MAX_TASKS_SHOWN_PER_CSS); | ||
156 | |||
157 | if (cset->dead) { | ||
158 | seq_puts(seq, " [dead]\n"); | ||
159 | dead_cnt++; | ||
160 | } | ||
161 | |||
162 | WARN_ON(count != cset->nr_tasks); | ||
163 | } | ||
164 | spin_unlock_irq(&css_set_lock); | ||
165 | |||
166 | if (!dead_cnt && !extra_refs) | ||
167 | return 0; | ||
168 | |||
169 | seq_puts(seq, "\n"); | ||
170 | if (extra_refs) | ||
171 | seq_printf(seq, "extra references = %d\n", extra_refs); | ||
172 | if (dead_cnt) | ||
173 | seq_printf(seq, "dead css_sets = %d\n", dead_cnt); | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static int cgroup_subsys_states_read(struct seq_file *seq, void *v) | ||
179 | { | ||
180 | struct kernfs_open_file *of = seq->private; | ||
181 | struct cgroup *cgrp; | ||
182 | struct cgroup_subsys *ss; | ||
183 | struct cgroup_subsys_state *css; | ||
184 | char pbuf[16]; | ||
185 | int i; | ||
186 | |||
187 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
188 | if (!cgrp) | ||
189 | return -ENODEV; | ||
190 | |||
191 | for_each_subsys(ss, i) { | ||
192 | css = rcu_dereference_check(cgrp->subsys[ss->id], true); | ||
193 | if (!css) | ||
194 | continue; | ||
195 | |||
196 | pbuf[0] = '\0'; | ||
197 | |||
198 | /* Show the parent CSS if applicable*/ | ||
199 | if (css->parent) | ||
200 | snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", | ||
201 | css->parent->id); | ||
202 | seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name, | ||
203 | (unsigned long)css, css->id, | ||
204 | atomic_read(&css->online_cnt), pbuf); | ||
205 | } | ||
206 | |||
207 | cgroup_kn_unlock(of->kn); | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static void cgroup_masks_read_one(struct seq_file *seq, const char *name, | ||
212 | u16 mask) | ||
213 | { | ||
214 | struct cgroup_subsys *ss; | ||
215 | int ssid; | ||
216 | bool first = true; | ||
217 | |||
218 | seq_printf(seq, "%-17s: ", name); | ||
219 | for_each_subsys(ss, ssid) { | ||
220 | if (!(mask & (1 << ssid))) | ||
221 | continue; | ||
222 | if (!first) | ||
223 | seq_puts(seq, ", "); | ||
224 | seq_puts(seq, ss->name); | ||
225 | first = false; | ||
226 | } | ||
227 | seq_putc(seq, '\n'); | ||
228 | } | ||
229 | |||
230 | static int cgroup_masks_read(struct seq_file *seq, void *v) | ||
231 | { | ||
232 | struct kernfs_open_file *of = seq->private; | ||
233 | struct cgroup *cgrp; | ||
234 | |||
235 | cgrp = cgroup_kn_lock_live(of->kn, false); | ||
236 | if (!cgrp) | ||
237 | return -ENODEV; | ||
238 | |||
239 | cgroup_masks_read_one(seq, "subtree_control", cgrp->subtree_control); | ||
240 | cgroup_masks_read_one(seq, "subtree_ss_mask", cgrp->subtree_ss_mask); | ||
241 | |||
242 | cgroup_kn_unlock(of->kn); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) | ||
247 | { | ||
248 | return (!cgroup_is_populated(css->cgroup) && | ||
249 | !css_has_online_children(&css->cgroup->self)); | ||
250 | } | ||
251 | |||
252 | static struct cftype debug_legacy_files[] = { | ||
253 | { | ||
254 | .name = "taskcount", | ||
255 | .read_u64 = debug_taskcount_read, | ||
256 | }, | ||
257 | |||
258 | { | ||
259 | .name = "current_css_set", | ||
260 | .seq_show = current_css_set_read, | ||
261 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
262 | }, | ||
263 | |||
264 | { | ||
265 | .name = "current_css_set_refcount", | ||
266 | .read_u64 = current_css_set_refcount_read, | ||
267 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
268 | }, | ||
269 | |||
270 | { | ||
271 | .name = "current_css_set_cg_links", | ||
272 | .seq_show = current_css_set_cg_links_read, | ||
273 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
274 | }, | ||
275 | |||
276 | { | ||
277 | .name = "cgroup_css_links", | ||
278 | .seq_show = cgroup_css_links_read, | ||
279 | }, | ||
280 | |||
281 | { | ||
282 | .name = "cgroup_subsys_states", | ||
283 | .seq_show = cgroup_subsys_states_read, | ||
284 | }, | ||
285 | |||
286 | { | ||
287 | .name = "cgroup_masks", | ||
288 | .seq_show = cgroup_masks_read, | ||
289 | }, | ||
290 | |||
291 | { | ||
292 | .name = "releasable", | ||
293 | .read_u64 = releasable_read, | ||
294 | }, | ||
295 | |||
296 | { } /* terminate */ | ||
297 | }; | ||
298 | |||
299 | static struct cftype debug_files[] = { | ||
300 | { | ||
301 | .name = "taskcount", | ||
302 | .read_u64 = debug_taskcount_read, | ||
303 | }, | ||
304 | |||
305 | { | ||
306 | .name = "current_css_set", | ||
307 | .seq_show = current_css_set_read, | ||
308 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
309 | }, | ||
310 | |||
311 | { | ||
312 | .name = "current_css_set_refcount", | ||
313 | .read_u64 = current_css_set_refcount_read, | ||
314 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
315 | }, | ||
316 | |||
317 | { | ||
318 | .name = "current_css_set_cg_links", | ||
319 | .seq_show = current_css_set_cg_links_read, | ||
320 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
321 | }, | ||
322 | |||
323 | { | ||
324 | .name = "css_links", | ||
325 | .seq_show = cgroup_css_links_read, | ||
326 | }, | ||
327 | |||
328 | { | ||
329 | .name = "csses", | ||
330 | .seq_show = cgroup_subsys_states_read, | ||
331 | }, | ||
332 | |||
333 | { | ||
334 | .name = "masks", | ||
335 | .seq_show = cgroup_masks_read, | ||
336 | }, | ||
337 | |||
338 | { } /* terminate */ | ||
339 | }; | ||
340 | |||
341 | struct cgroup_subsys debug_cgrp_subsys = { | ||
342 | .css_alloc = debug_css_alloc, | ||
343 | .css_free = debug_css_free, | ||
344 | .legacy_cftypes = debug_legacy_files, | ||
345 | }; | ||
346 | |||
347 | /* | ||
348 | * On v2, debug is an implicit controller enabled by "cgroup_debug" boot | ||
349 | * parameter. | ||
350 | */ | ||
351 | static int __init enable_cgroup_debug(char *str) | ||
352 | { | ||
353 | debug_cgrp_subsys.dfl_cftypes = debug_files; | ||
354 | debug_cgrp_subsys.implicit_on_dfl = true; | ||
355 | return 1; | ||
356 | } | ||
357 | __setup("cgroup_debug", enable_cgroup_debug); | ||