aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c140
1 files changed, 109 insertions, 31 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..85bc9beb046d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
64#include <linux/file.h> 64#include <linux/file.h>
65#include <net/sock.h> 65#include <net/sock.h>
66 66
67#define CREATE_TRACE_POINTS
68#include <trace/events/cgroup.h>
69
67/* 70/*
68 * pidlists linger the following amount before being destroyed. The goal 71 * pidlists linger the following amount before being destroyed. The goal
69 * is avoiding frequent destruction in the middle of consecutive read calls 72 * is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
1176 struct cgroup *cgrp = &root->cgrp; 1179 struct cgroup *cgrp = &root->cgrp;
1177 struct cgrp_cset_link *link, *tmp_link; 1180 struct cgrp_cset_link *link, *tmp_link;
1178 1181
1182 trace_cgroup_destroy_root(root);
1183
1179 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); 1184 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1180 1185
1181 BUG_ON(atomic_read(&root->nr_cgrps)); 1186 BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1874 strcpy(root->release_agent_path, opts.release_agent); 1879 strcpy(root->release_agent_path, opts.release_agent);
1875 spin_unlock(&release_agent_path_lock); 1880 spin_unlock(&release_agent_path_lock);
1876 } 1881 }
1882
1883 trace_cgroup_remount(root);
1884
1877 out_unlock: 1885 out_unlock:
1878 kfree(opts.release_agent); 1886 kfree(opts.release_agent);
1879 kfree(opts.name); 1887 kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
2031 if (ret) 2039 if (ret)
2032 goto destroy_root; 2040 goto destroy_root;
2033 2041
2042 trace_cgroup_setup_root(root);
2043
2034 /* 2044 /*
2035 * There must be no failure case after here, since rebinding takes 2045 * There must be no failure case after here, since rebinding takes
2036 * care of subsystems' refcounts, which are explicitly dropped in 2046 * care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
2315 .fs_flags = FS_USERNS_MOUNT, 2325 .fs_flags = FS_USERNS_MOUNT,
2316}; 2326};
2317 2327
2318static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, 2328static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2319 struct cgroup_namespace *ns) 2329 struct cgroup_namespace *ns)
2320{ 2330{
2321 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root); 2331 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2322 int ret;
2323 2332
2324 ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); 2333 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2325 if (ret < 0 || ret >= buflen)
2326 return NULL;
2327 return buf;
2328} 2334}
2329 2335
2330char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, 2336int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2331 struct cgroup_namespace *ns) 2337 struct cgroup_namespace *ns)
2332{ 2338{
2333 char *ret; 2339 int ret;
2334 2340
2335 mutex_lock(&cgroup_mutex); 2341 mutex_lock(&cgroup_mutex);
2336 spin_lock_irq(&css_set_lock); 2342 spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
2357 * 2363 *
2358 * Return value is the same as kernfs_path(). 2364 * Return value is the same as kernfs_path().
2359 */ 2365 */
2360char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) 2366int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2361{ 2367{
2362 struct cgroup_root *root; 2368 struct cgroup_root *root;
2363 struct cgroup *cgrp; 2369 struct cgroup *cgrp;
2364 int hierarchy_id = 1; 2370 int hierarchy_id = 1;
2365 char *path = NULL; 2371 int ret;
2366 2372
2367 mutex_lock(&cgroup_mutex); 2373 mutex_lock(&cgroup_mutex);
2368 spin_lock_irq(&css_set_lock); 2374 spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2371 2377
2372 if (root) { 2378 if (root) {
2373 cgrp = task_cgroup_from_root(task, root); 2379 cgrp = task_cgroup_from_root(task, root);
2374 path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns); 2380 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2375 } else { 2381 } else {
2376 /* if no hierarchy exists, everyone is in "/" */ 2382 /* if no hierarchy exists, everyone is in "/" */
2377 if (strlcpy(buf, "/", buflen) < buflen) 2383 ret = strlcpy(buf, "/", buflen);
2378 path = buf;
2379 } 2384 }
2380 2385
2381 spin_unlock_irq(&css_set_lock); 2386 spin_unlock_irq(&css_set_lock);
2382 mutex_unlock(&cgroup_mutex); 2387 mutex_unlock(&cgroup_mutex);
2383 return path; 2388 return ret;
2384} 2389}
2385EXPORT_SYMBOL_GPL(task_cgroup_path); 2390EXPORT_SYMBOL_GPL(task_cgroup_path);
2386 2391
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
2830 ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); 2835 ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
2831 2836
2832 cgroup_migrate_finish(&preloaded_csets); 2837 cgroup_migrate_finish(&preloaded_csets);
2838
2839 if (!ret)
2840 trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
2841
2833 return ret; 2842 return ret;
2834} 2843}
2835 2844
@@ -3446,9 +3455,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3446 * Except for the root, subtree_control must be zero for a cgroup 3455 * Except for the root, subtree_control must be zero for a cgroup
3447 * with tasks so that child cgroups don't compete against tasks. 3456 * with tasks so that child cgroups don't compete against tasks.
3448 */ 3457 */
3449 if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { 3458 if (enable && cgroup_parent(cgrp)) {
3450 ret = -EBUSY; 3459 struct cgrp_cset_link *link;
3451 goto out_unlock; 3460
3461 /*
3462 * Because namespaces pin csets too, @cgrp->cset_links
3463 * might not be empty even when @cgrp is empty. Walk and
3464 * verify each cset.
3465 */
3466 spin_lock_irq(&css_set_lock);
3467
3468 ret = 0;
3469 list_for_each_entry(link, &cgrp->cset_links, cset_link) {
3470 if (css_set_populated(link->cset)) {
3471 ret = -EBUSY;
3472 break;
3473 }
3474 }
3475
3476 spin_unlock_irq(&css_set_lock);
3477
3478 if (ret)
3479 goto out_unlock;
3452 } 3480 }
3453 3481
3454 /* save and update control masks and prepare csses */ 3482 /* save and update control masks and prepare csses */
@@ -3592,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
3592 mutex_lock(&cgroup_mutex); 3620 mutex_lock(&cgroup_mutex);
3593 3621
3594 ret = kernfs_rename(kn, new_parent, new_name_str); 3622 ret = kernfs_rename(kn, new_parent, new_name_str);
3623 if (!ret)
3624 trace_cgroup_rename(cgrp);
3595 3625
3596 mutex_unlock(&cgroup_mutex); 3626 mutex_unlock(&cgroup_mutex);
3597 3627
@@ -3899,7 +3929,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
3899 * cgroup_task_count - count the number of tasks in a cgroup. 3929 * cgroup_task_count - count the number of tasks in a cgroup.
3900 * @cgrp: the cgroup in question 3930 * @cgrp: the cgroup in question
3901 * 3931 *
3902 * Return the number of tasks in the cgroup. 3932 * Return the number of tasks in the cgroup. The returned number can be
3933 * higher than the actual number of tasks due to css_set references from
3934 * namespace roots and temporary usages.
3903 */ 3935 */
3904static int cgroup_task_count(const struct cgroup *cgrp) 3936static int cgroup_task_count(const struct cgroup *cgrp)
3905{ 3937{
@@ -4360,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
4360 4392
4361 if (task) { 4393 if (task) {
4362 ret = cgroup_migrate(task, false, to->root); 4394 ret = cgroup_migrate(task, false, to->root);
4395 if (!ret)
4396 trace_cgroup_transfer_tasks(to, task, false);
4363 put_task_struct(task); 4397 put_task_struct(task);
4364 } 4398 }
4365 } while (task && !ret); 4399 } while (task && !ret);
@@ -5025,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
5025 ss->css_released(css); 5059 ss->css_released(css);
5026 } else { 5060 } else {
5027 /* cgroup release path */ 5061 /* cgroup release path */
5062 trace_cgroup_release(cgrp);
5063
5028 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); 5064 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
5029 cgrp->id = -1; 5065 cgrp->id = -1;
5030 5066
@@ -5311,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
5311 if (ret) 5347 if (ret)
5312 goto out_destroy; 5348 goto out_destroy;
5313 5349
5350 trace_cgroup_mkdir(cgrp);
5351
5314 /* let's create and online css's */ 5352 /* let's create and online css's */
5315 kernfs_activate(kn); 5353 kernfs_activate(kn);
5316 5354
@@ -5486,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
5486 5524
5487 ret = cgroup_destroy_locked(cgrp); 5525 ret = cgroup_destroy_locked(cgrp);
5488 5526
5527 if (!ret)
5528 trace_cgroup_rmdir(cgrp);
5529
5489 cgroup_kn_unlock(kn); 5530 cgroup_kn_unlock(kn);
5490 return ret; 5531 return ret;
5491} 5532}
@@ -5606,6 +5647,12 @@ int __init cgroup_init(void)
5606 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); 5647 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5607 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); 5648 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5608 5649
5650 /*
5651 * The latency of the synchronize_sched() is too high for cgroups,
5652 * avoid it at the cost of forcing all readers into the slow path.
5653 */
5654 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5655
5609 get_user_ns(init_cgroup_ns.user_ns); 5656 get_user_ns(init_cgroup_ns.user_ns);
5610 5657
5611 mutex_lock(&cgroup_mutex); 5658 mutex_lock(&cgroup_mutex);
@@ -5716,7 +5763,7 @@ core_initcall(cgroup_wq_init);
5716int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, 5763int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5717 struct pid *pid, struct task_struct *tsk) 5764 struct pid *pid, struct task_struct *tsk)
5718{ 5765{
5719 char *buf, *path; 5766 char *buf;
5720 int retval; 5767 int retval;
5721 struct cgroup_root *root; 5768 struct cgroup_root *root;
5722 5769
@@ -5759,18 +5806,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5759 * " (deleted)" is appended to the cgroup path. 5806 * " (deleted)" is appended to the cgroup path.
5760 */ 5807 */
5761 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { 5808 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5762 path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, 5809 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5763 current->nsproxy->cgroup_ns); 5810 current->nsproxy->cgroup_ns);
5764 if (!path) { 5811 if (retval >= PATH_MAX)
5765 retval = -ENAMETOOLONG; 5812 retval = -ENAMETOOLONG;
5813 if (retval < 0)
5766 goto out_unlock; 5814 goto out_unlock;
5767 } 5815
5816 seq_puts(m, buf);
5768 } else { 5817 } else {
5769 path = "/"; 5818 seq_puts(m, "/");
5770 } 5819 }
5771 5820
5772 seq_puts(m, path);
5773
5774 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp)) 5821 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5775 seq_puts(m, " (deleted)\n"); 5822 seq_puts(m, " (deleted)\n");
5776 else 5823 else
@@ -6035,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
6035{ 6082{
6036 struct cgroup *cgrp = 6083 struct cgroup *cgrp =
6037 container_of(work, struct cgroup, release_agent_work); 6084 container_of(work, struct cgroup, release_agent_work);
6038 char *pathbuf = NULL, *agentbuf = NULL, *path; 6085 char *pathbuf = NULL, *agentbuf = NULL;
6039 char *argv[3], *envp[3]; 6086 char *argv[3], *envp[3];
6087 int ret;
6040 6088
6041 mutex_lock(&cgroup_mutex); 6089 mutex_lock(&cgroup_mutex);
6042 6090
@@ -6046,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
6046 goto out; 6094 goto out;
6047 6095
6048 spin_lock_irq(&css_set_lock); 6096 spin_lock_irq(&css_set_lock);
6049 path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); 6097 ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
6050 spin_unlock_irq(&css_set_lock); 6098 spin_unlock_irq(&css_set_lock);
6051 if (!path) 6099 if (ret < 0 || ret >= PATH_MAX)
6052 goto out; 6100 goto out;
6053 6101
6054 argv[0] = agentbuf; 6102 argv[0] = agentbuf;
6055 argv[1] = path; 6103 argv[1] = pathbuf;
6056 argv[2] = NULL; 6104 argv[2] = NULL;
6057 6105
6058 /* minimal command environment */ 6106 /* minimal command environment */
@@ -6270,6 +6318,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6270 if (cgroup_sk_alloc_disabled) 6318 if (cgroup_sk_alloc_disabled)
6271 return; 6319 return;
6272 6320
6321 /* Socket clone path */
6322 if (skcd->val) {
6323 cgroup_get(sock_cgroup_ptr(skcd));
6324 return;
6325 }
6326
6273 rcu_read_lock(); 6327 rcu_read_lock();
6274 6328
6275 while (true) { 6329 while (true) {
@@ -6295,6 +6349,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
6295 6349
6296/* cgroup namespaces */ 6350/* cgroup namespaces */
6297 6351
6352static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
6353{
6354 return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
6355}
6356
6357static void dec_cgroup_namespaces(struct ucounts *ucounts)
6358{
6359 dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
6360}
6361
6298static struct cgroup_namespace *alloc_cgroup_ns(void) 6362static struct cgroup_namespace *alloc_cgroup_ns(void)
6299{ 6363{
6300 struct cgroup_namespace *new_ns; 6364 struct cgroup_namespace *new_ns;
@@ -6316,6 +6380,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
6316void free_cgroup_ns(struct cgroup_namespace *ns) 6380void free_cgroup_ns(struct cgroup_namespace *ns)
6317{ 6381{
6318 put_css_set(ns->root_cset); 6382 put_css_set(ns->root_cset);
6383 dec_cgroup_namespaces(ns->ucounts);
6319 put_user_ns(ns->user_ns); 6384 put_user_ns(ns->user_ns);
6320 ns_free_inum(&ns->ns); 6385 ns_free_inum(&ns->ns);
6321 kfree(ns); 6386 kfree(ns);
@@ -6327,6 +6392,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6327 struct cgroup_namespace *old_ns) 6392 struct cgroup_namespace *old_ns)
6328{ 6393{
6329 struct cgroup_namespace *new_ns; 6394 struct cgroup_namespace *new_ns;
6395 struct ucounts *ucounts;
6330 struct css_set *cset; 6396 struct css_set *cset;
6331 6397
6332 BUG_ON(!old_ns); 6398 BUG_ON(!old_ns);
@@ -6340,6 +6406,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6340 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 6406 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
6341 return ERR_PTR(-EPERM); 6407 return ERR_PTR(-EPERM);
6342 6408
6409 ucounts = inc_cgroup_namespaces(user_ns);
6410 if (!ucounts)
6411 return ERR_PTR(-ENOSPC);
6412
6343 /* It is not safe to take cgroup_mutex here */ 6413 /* It is not safe to take cgroup_mutex here */
6344 spin_lock_irq(&css_set_lock); 6414 spin_lock_irq(&css_set_lock);
6345 cset = task_css_set(current); 6415 cset = task_css_set(current);
@@ -6349,10 +6419,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6349 new_ns = alloc_cgroup_ns(); 6419 new_ns = alloc_cgroup_ns();
6350 if (IS_ERR(new_ns)) { 6420 if (IS_ERR(new_ns)) {
6351 put_css_set(cset); 6421 put_css_set(cset);
6422 dec_cgroup_namespaces(ucounts);
6352 return new_ns; 6423 return new_ns;
6353 } 6424 }
6354 6425
6355 new_ns->user_ns = get_user_ns(user_ns); 6426 new_ns->user_ns = get_user_ns(user_ns);
6427 new_ns->ucounts = ucounts;
6356 new_ns->root_cset = cset; 6428 new_ns->root_cset = cset;
6357 6429
6358 return new_ns; 6430 return new_ns;
@@ -6403,12 +6475,18 @@ static void cgroupns_put(struct ns_common *ns)
6403 put_cgroup_ns(to_cg_ns(ns)); 6475 put_cgroup_ns(to_cg_ns(ns));
6404} 6476}
6405 6477
6478static struct user_namespace *cgroupns_owner(struct ns_common *ns)
6479{
6480 return to_cg_ns(ns)->user_ns;
6481}
6482
6406const struct proc_ns_operations cgroupns_operations = { 6483const struct proc_ns_operations cgroupns_operations = {
6407 .name = "cgroup", 6484 .name = "cgroup",
6408 .type = CLONE_NEWCGROUP, 6485 .type = CLONE_NEWCGROUP,
6409 .get = cgroupns_get, 6486 .get = cgroupns_get,
6410 .put = cgroupns_put, 6487 .put = cgroupns_put,
6411 .install = cgroupns_install, 6488 .install = cgroupns_install,
6489 .owner = cgroupns_owner,
6412}; 6490};
6413 6491
6414static __init int cgroup_namespaces_init(void) 6492static __init int cgroup_namespaces_init(void)