1 files changed, 109 insertions, 31 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..85bc9beb046d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
 #include <linux/file.h>
 #include <net/sock.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
 /*
 * pidlists linger the following amount before being destroyed.  The goal
 * is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
        struct cgroup *cgrp = &root->cgrp;
        struct cgrp_cset_link *link, *tmp_link;
+        trace_cgroup_destroy_root(root);
        cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
        BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
                strcpy(root->release_agent_path, opts.release_agent);
                spin_unlock(&release_agent_path_lock);
        }
+        trace_cgroup_remount(root);
 out_unlock:
        kfree(opts.release_agent);
        kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
        if (ret)
                goto destroy_root;
+        trace_cgroup_setup_root(root);
        /*
         * There must be no failure case after here, since rebinding takes
         * care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
        .fs_flags = FS_USERNS_MOUNT,
 };
-static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-                                   struct cgroup_namespace *ns)
+                                 struct cgroup_namespace *ns)
 {
        struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-        int ret;
-        ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
+        return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-        if (ret < 0 || ret >= buflen)
-                return NULL;
-        return buf;
 }
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                     struct cgroup_namespace *ns)
+                   struct cgroup_namespace *ns)
 {
-        char *ret;
+        int ret;
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
 *
 * Return value is the same as kernfs_path().
 */
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
        struct cgroup_root *root;
        struct cgroup *cgrp;
        int hierarchy_id = 1;
-        char *path = NULL;
+        int ret;
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
        if (root) {
                cgrp = task_cgroup_from_root(task, root);
-                path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+                ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
        } else {
                /* if no hierarchy exists, everyone is in "/" */
-                if (strlcpy(buf, "/", buflen) < buflen)
+                ret = strlcpy(buf, "/", buflen);
-                        path = buf;
        }
        spin_unlock_irq(&css_set_lock);
        mutex_unlock(&cgroup_mutex);
-        return path;
+        return ret;
 }
 EXPORT_SYMBOL_GPL(task_cgroup_path);
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
                ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
        cgroup_migrate_finish(&preloaded_csets);
+        if (!ret)
+                trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
        return ret;
 }
@@ -3446,9 +3455,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
         * Except for the root, subtree_control must be zero for a cgroup
         * with tasks so that child cgroups don't compete against tasks.
         */
-        if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
+        if (enable && cgroup_parent(cgrp)) {
-                ret = -EBUSY;
+                struct cgrp_cset_link *link;
-                goto out_unlock;
+                /*
+                 * Because namespaces pin csets too, @cgrp->cset_links
+                 * might not be empty even when @cgrp is empty.  Walk and
+                 * verify each cset.
+                 */
+                spin_lock_irq(&css_set_lock);
+                ret = 0;
+                list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+                        if (css_set_populated(link->cset)) {
+                                ret = -EBUSY;
+                                break;
+                        }
+                }
+                spin_unlock_irq(&css_set_lock);
+                if (ret)
+                        goto out_unlock;
        }
        /* save and update control masks and prepare csses */
@@ -3592,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
        mutex_lock(&cgroup_mutex);
        ret = kernfs_rename(kn, new_parent, new_name_str);
+        if (!ret)
+                trace_cgroup_rename(cgrp);
        mutex_unlock(&cgroup_mutex);
@@ -3899,7 +3929,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
 * cgroup_task_count - count the number of tasks in a cgroup.
 * @cgrp: the cgroup in question
 *
- * Return the number of tasks in the cgroup.
+ * Return the number of tasks in the cgroup.  The returned number can be
+ * higher than the actual number of tasks due to css_set references from
+ * namespace roots and temporary usages.
 */
 static int cgroup_task_count(const struct cgroup *cgrp)
 {
@@ -4360,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
                if (task) {
                        ret = cgroup_migrate(task, false, to->root);
+                        if (!ret)
+                                trace_cgroup_transfer_tasks(to, task, false);
                        put_task_struct(task);
                }
        } while (task && !ret);
@@ -5025,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
                        ss->css_released(css);
        } else {
                /* cgroup release path */
+                trace_cgroup_release(cgrp);
                cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                cgrp->id = -1;
@@ -5311,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
        if (ret)
                goto out_destroy;
+        trace_cgroup_mkdir(cgrp);
        /* let's create and online css's */
        kernfs_activate(kn);
@@ -5486,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
        ret = cgroup_destroy_locked(cgrp);
+        if (!ret)
+                trace_cgroup_rmdir(cgrp);
        cgroup_kn_unlock(kn);
        return ret;
 }
@@ -5606,6 +5647,12 @@ int __init cgroup_init(void)
        BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
        BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
+        /*
+         * The latency of the synchronize_sched() is too high for cgroups,
+         * avoid it at the cost of forcing all readers into the slow path.
+         */
+        rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
        get_user_ns(init_cgroup_ns.user_ns);
        mutex_lock(&cgroup_mutex);
@@ -5716,7 +5763,7 @@ core_initcall(cgroup_wq_init);
 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
 {
-        char *buf, *path;
+        char *buf;
        int retval;
        struct cgroup_root *root;
@@ -5759,18 +5806,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                 * " (deleted)" is appended to the cgroup path.
                 */
                if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                        path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                        retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
                                                current->nsproxy->cgroup_ns);
-                        if (!path) {
+                        if (retval >= PATH_MAX)
                                retval = -ENAMETOOLONG;
+                        if (retval < 0)
                                goto out_unlock;
-                        }
+                        seq_puts(m, buf);
                } else {
-                        path = "/";
+                        seq_puts(m, "/");
                }
-                seq_puts(m, path);
                if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
                        seq_puts(m, " (deleted)\n");
                else
@@ -6035,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
 {
        struct cgroup *cgrp =
                container_of(work, struct cgroup, release_agent_work);
-        char *pathbuf = NULL, *agentbuf = NULL, *path;
+        char *pathbuf = NULL, *agentbuf = NULL;
        char *argv[3], *envp[3];
+        int ret;
        mutex_lock(&cgroup_mutex);
@@ -6046,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
                goto out;
        spin_lock_irq(&css_set_lock);
-        path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+        ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
        spin_unlock_irq(&css_set_lock);
-        if (!path)
+        if (ret < 0 || ret >= PATH_MAX)
                goto out;
        argv[0] = agentbuf;
-        argv[1] = path;
+        argv[1] = pathbuf;
        argv[2] = NULL;
        /* minimal command environment */
@@ -6270,6 +6318,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
        if (cgroup_sk_alloc_disabled)
                return;
+        /* Socket clone path */
+        if (skcd->val) {
+                cgroup_get(sock_cgroup_ptr(skcd));
+                return;
+        }
        rcu_read_lock();
        while (true) {
@@ -6295,6 +6349,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 /* cgroup namespaces */
+static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
+{
+        return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
+}
+static void dec_cgroup_namespaces(struct ucounts *ucounts)
+{
+        dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
+}
 static struct cgroup_namespace *alloc_cgroup_ns(void)
 {
        struct cgroup_namespace *new_ns;
@@ -6316,6 +6380,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 void free_cgroup_ns(struct cgroup_namespace *ns)
 {
        put_css_set(ns->root_cset);
+        dec_cgroup_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
        ns_free_inum(&ns->ns);
        kfree(ns);
@@ -6327,6 +6392,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
                                        struct cgroup_namespace *old_ns)
 {
        struct cgroup_namespace *new_ns;
+        struct ucounts *ucounts;
        struct css_set *cset;
        BUG_ON(!old_ns);
@@ -6340,6 +6406,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                return ERR_PTR(-EPERM);
+        ucounts = inc_cgroup_namespaces(user_ns);
+        if (!ucounts)
+                return ERR_PTR(-ENOSPC);
        /* It is not safe to take cgroup_mutex here */
        spin_lock_irq(&css_set_lock);
        cset = task_css_set(current);
@@ -6349,10 +6419,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
        new_ns = alloc_cgroup_ns();
        if (IS_ERR(new_ns)) {
                put_css_set(cset);
+                dec_cgroup_namespaces(ucounts);
                return new_ns;
        }
        new_ns->user_ns = get_user_ns(user_ns);
+        new_ns->ucounts = ucounts;
        new_ns->root_cset = cset;
        return new_ns;
@@ -6403,12 +6475,18 @@ static void cgroupns_put(struct ns_common *ns)
        put_cgroup_ns(to_cg_ns(ns));
 }
+static struct user_namespace *cgroupns_owner(struct ns_common *ns)
+{
+        return to_cg_ns(ns)->user_ns;
+}
 const struct proc_ns_operations cgroupns_operations = {
        .name           = "cgroup",
        .type           = CLONE_NEWCGROUP,
        .get            = cgroupns_get,
        .put            = cgroupns_put,
        .install        = cgroupns_install,
+        .owner          = cgroupns_owner,
 };
 static __init int cgroup_namespaces_init(void)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..85bc9beb046d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
64	#include <linux/file.h>	64	#include <linux/file.h>
65	#include <net/sock.h>	65	#include <net/sock.h>
66		66
		67	#define CREATE_TRACE_POINTS
		68	#include <trace/events/cgroup.h>
		69
67	/*	70	/*
68	* pidlists linger the following amount before being destroyed. The goal	71	* pidlists linger the following amount before being destroyed. The goal
69	* is avoiding frequent destruction in the middle of consecutive read calls	72	* is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
1176	struct cgroup *cgrp = &root->cgrp;	1179	struct cgroup *cgrp = &root->cgrp;
1177	struct cgrp_cset_link link, tmp_link;	1180	struct cgrp_cset_link link, tmp_link;
1178		1181
		1182	trace_cgroup_destroy_root(root);
		1183
1179	cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);	1184	cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1180		1185
1181	BUG_ON(atomic_read(&root->nr_cgrps));	1186	BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root kf_root, int flags, char *data)
1874	strcpy(root->release_agent_path, opts.release_agent);	1879	strcpy(root->release_agent_path, opts.release_agent);
1875	spin_unlock(&release_agent_path_lock);	1880	spin_unlock(&release_agent_path_lock);
1876	}	1881	}
		1882
		1883	trace_cgroup_remount(root);
		1884
1877	out_unlock:	1885	out_unlock:
1878	kfree(opts.release_agent);	1886	kfree(opts.release_agent);
1879	kfree(opts.name);	1887	kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
2031	if (ret)	2039	if (ret)
2032	goto destroy_root;	2040	goto destroy_root;
2033		2041
		2042	trace_cgroup_setup_root(root);
		2043
2034	/*	2044	/*
2035	* There must be no failure case after here, since rebinding takes	2045	* There must be no failure case after here, since rebinding takes
2036	* care of subsystems' refcounts, which are explicitly dropped in	2046	* care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
2315	.fs_flags = FS_USERNS_MOUNT,	2325	.fs_flags = FS_USERNS_MOUNT,
2316	};	2326	};
2317		2327
2318	static char cgroup_path_ns_locked(struct cgroup cgrp, char *buf, size_t buflen,	2328	static int cgroup_path_ns_locked(struct cgroup cgrp, char buf, size_t buflen,
2319	struct cgroup_namespace *ns)	2329	struct cgroup_namespace *ns)
2320	{	2330	{
2321	struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);	2331	struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2322	int ret;
2323		2332
2324	ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);	2333	return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2325	if (ret < 0 \|\| ret >= buflen)
2326	return NULL;
2327	return buf;
2328	}	2334	}
2329		2335
2330	char cgroup_path_ns(struct cgroup cgrp, char *buf, size_t buflen,	2336	int cgroup_path_ns(struct cgroup cgrp, char buf, size_t buflen,
2331	struct cgroup_namespace *ns)	2337	struct cgroup_namespace *ns)
2332	{	2338	{
2333	char *ret;	2339	int ret;
2334		2340
2335	mutex_lock(&cgroup_mutex);	2341	mutex_lock(&cgroup_mutex);
2336	spin_lock_irq(&css_set_lock);	2342	spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
2357	*	2363	*
2358	* Return value is the same as kernfs_path().	2364	* Return value is the same as kernfs_path().
2359	*/	2365	*/
2360	char task_cgroup_path(struct task_struct task, char *buf, size_t buflen)	2366	int task_cgroup_path(struct task_struct task, char buf, size_t buflen)
2361	{	2367	{
2362	struct cgroup_root *root;	2368	struct cgroup_root *root;
2363	struct cgroup *cgrp;	2369	struct cgroup *cgrp;
2364	int hierarchy_id = 1;	2370	int hierarchy_id = 1;
2365	char *path = NULL;	2371	int ret;
2366		2372
2367	mutex_lock(&cgroup_mutex);	2373	mutex_lock(&cgroup_mutex);
2368	spin_lock_irq(&css_set_lock);	2374	spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char task_cgroup_path(struct task_struct task, char *buf, size_t buflen)
2371		2377
2372	if (root) {	2378	if (root) {
2373	cgrp = task_cgroup_from_root(task, root);	2379	cgrp = task_cgroup_from_root(task, root);
2374	path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);	2380	ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2375	} else {	2381	} else {
2376	/* if no hierarchy exists, everyone is in "/" */	2382	/* if no hierarchy exists, everyone is in "/" */
2377	if (strlcpy(buf, "/", buflen) < buflen)	2383	ret = strlcpy(buf, "/", buflen);
2378	path = buf;
2379	}	2384	}
2380		2385
2381	spin_unlock_irq(&css_set_lock);	2386	spin_unlock_irq(&css_set_lock);
2382	mutex_unlock(&cgroup_mutex);	2387	mutex_unlock(&cgroup_mutex);
2383	return path;	2388	return ret;
2384	}	2389	}
2385	EXPORT_SYMBOL_GPL(task_cgroup_path);	2390	EXPORT_SYMBOL_GPL(task_cgroup_path);
2386		2391
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
2830	ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);	2835	ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
2831		2836
2832	cgroup_migrate_finish(&preloaded_csets);	2837	cgroup_migrate_finish(&preloaded_csets);
		2838
		2839	if (!ret)
		2840	trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
		2841
2833	return ret;	2842	return ret;
2834	}	2843	}
2835		2844
@@ -3446,9 +3455,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3446	* Except for the root, subtree_control must be zero for a cgroup	3455	* Except for the root, subtree_control must be zero for a cgroup
3447	* with tasks so that child cgroups don't compete against tasks.	3456	* with tasks so that child cgroups don't compete against tasks.
3448	*/	3457	*/
3449	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {	3458	if (enable && cgroup_parent(cgrp)) {
3450	ret = -EBUSY;	3459	struct cgrp_cset_link *link;
3451	goto out_unlock;	3460
		3461	/*
		3462	* Because namespaces pin csets too, @cgrp->cset_links
		3463	* might not be empty even when @cgrp is empty. Walk and
		3464	* verify each cset.
		3465	*/
		3466	spin_lock_irq(&css_set_lock);
		3467
		3468	ret = 0;
		3469	list_for_each_entry(link, &cgrp->cset_links, cset_link) {
		3470	if (css_set_populated(link->cset)) {
		3471	ret = -EBUSY;
		3472	break;
		3473	}
		3474	}
		3475
		3476	spin_unlock_irq(&css_set_lock);
		3477
		3478	if (ret)
		3479	goto out_unlock;
3452	}	3480	}
3453		3481
3454	/* save and update control masks and prepare csses */	3482	/* save and update control masks and prepare csses */
@@ -3592,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node kn, struct kernfs_node new_parent,
3592	mutex_lock(&cgroup_mutex);	3620	mutex_lock(&cgroup_mutex);
3593		3621
3594	ret = kernfs_rename(kn, new_parent, new_name_str);	3622	ret = kernfs_rename(kn, new_parent, new_name_str);
		3623	if (!ret)
		3624	trace_cgroup_rename(cgrp);
3595		3625
3596	mutex_unlock(&cgroup_mutex);	3626	mutex_unlock(&cgroup_mutex);
3597		3627
@@ -3899,7 +3929,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
3899	* cgroup_task_count - count the number of tasks in a cgroup.	3929	* cgroup_task_count - count the number of tasks in a cgroup.
3900	* @cgrp: the cgroup in question	3930	* @cgrp: the cgroup in question
3901	*	3931	*
3902	* Return the number of tasks in the cgroup.	3932	* Return the number of tasks in the cgroup. The returned number can be
		3933	* higher than the actual number of tasks due to css_set references from
		3934	* namespace roots and temporary usages.
3903	*/	3935	*/
3904	static int cgroup_task_count(const struct cgroup *cgrp)	3936	static int cgroup_task_count(const struct cgroup *cgrp)
3905	{	3937	{
@@ -4360,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup to, struct cgroup from)
4360		4392
4361	if (task) {	4393	if (task) {
4362	ret = cgroup_migrate(task, false, to->root);	4394	ret = cgroup_migrate(task, false, to->root);
		4395	if (!ret)
		4396	trace_cgroup_transfer_tasks(to, task, false);
4363	put_task_struct(task);	4397	put_task_struct(task);
4364	}	4398	}
4365	} while (task && !ret);	4399	} while (task && !ret);
@@ -5025,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
5025	ss->css_released(css);	5059	ss->css_released(css);
5026	} else {	5060	} else {
5027	/* cgroup release path */	5061	/* cgroup release path */
		5062	trace_cgroup_release(cgrp);
		5063
5028	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);	5064	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
5029	cgrp->id = -1;	5065	cgrp->id = -1;
5030		5066
@@ -5311,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node parent_kn, const char name,
5311	if (ret)	5347	if (ret)
5312	goto out_destroy;	5348	goto out_destroy;
5313		5349
		5350	trace_cgroup_mkdir(cgrp);
		5351
5314	/* let's create and online css's */	5352	/* let's create and online css's */
5315	kernfs_activate(kn);	5353	kernfs_activate(kn);
5316		5354
@@ -5486,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
5486		5524
5487	ret = cgroup_destroy_locked(cgrp);	5525	ret = cgroup_destroy_locked(cgrp);
5488		5526
		5527	if (!ret)
		5528	trace_cgroup_rmdir(cgrp);
		5529
5489	cgroup_kn_unlock(kn);	5530	cgroup_kn_unlock(kn);
5490	return ret;	5531	return ret;
5491	}	5532	}
@@ -5606,6 +5647,12 @@ int __init cgroup_init(void)
5606	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));	5647	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5607	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));	5648	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5608		5649
		5650	/*
		5651	* The latency of the synchronize_sched() is too high for cgroups,
		5652	* avoid it at the cost of forcing all readers into the slow path.
		5653	*/
		5654	rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
		5655
5609	get_user_ns(init_cgroup_ns.user_ns);	5656	get_user_ns(init_cgroup_ns.user_ns);
5610		5657
5611	mutex_lock(&cgroup_mutex);	5658	mutex_lock(&cgroup_mutex);
@@ -5716,7 +5763,7 @@ core_initcall(cgroup_wq_init);
5716	int proc_cgroup_show(struct seq_file m, struct pid_namespace ns,	5763	int proc_cgroup_show(struct seq_file m, struct pid_namespace ns,
5717	struct pid pid, struct task_struct tsk)	5764	struct pid pid, struct task_struct tsk)
5718	{	5765	{
5719	char buf, path;	5766	char *buf;
5720	int retval;	5767	int retval;
5721	struct cgroup_root *root;	5768	struct cgroup_root *root;
5722		5769
@@ -5759,18 +5806,18 @@ int proc_cgroup_show(struct seq_file m, struct pid_namespace ns,
5759	* " (deleted)" is appended to the cgroup path.	5806	* " (deleted)" is appended to the cgroup path.
5760	*/	5807	*/
5761	if (cgroup_on_dfl(cgrp) \|\| !(tsk->flags & PF_EXITING)) {	5808	if (cgroup_on_dfl(cgrp) \|\| !(tsk->flags & PF_EXITING)) {
5762	path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,	5809	retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5763	current->nsproxy->cgroup_ns);	5810	current->nsproxy->cgroup_ns);
5764	if (!path) {	5811	if (retval >= PATH_MAX)
5765	retval = -ENAMETOOLONG;	5812	retval = -ENAMETOOLONG;
		5813	if (retval < 0)
5766	goto out_unlock;	5814	goto out_unlock;
5767	}	5815
		5816	seq_puts(m, buf);
5768	} else {	5817	} else {
5769	path = "/";	5818	seq_puts(m, "/");
5770	}	5819	}
5771		5820
5772	seq_puts(m, path);
5773
5774	if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))	5821	if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5775	seq_puts(m, " (deleted)\n");	5822	seq_puts(m, " (deleted)\n");
5776	else	5823	else
@@ -6035,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
6035	{	6082	{
6036	struct cgroup *cgrp =	6083	struct cgroup *cgrp =
6037	container_of(work, struct cgroup, release_agent_work);	6084	container_of(work, struct cgroup, release_agent_work);
6038	char pathbuf = NULL, agentbuf = NULL, *path;	6085	char pathbuf = NULL, agentbuf = NULL;
6039	char argv[3], envp[3];	6086	char argv[3], envp[3];
		6087	int ret;
6040		6088
6041	mutex_lock(&cgroup_mutex);	6089	mutex_lock(&cgroup_mutex);
6042		6090
@@ -6046,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
6046	goto out;	6094	goto out;
6047		6095
6048	spin_lock_irq(&css_set_lock);	6096	spin_lock_irq(&css_set_lock);
6049	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);	6097	ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
6050	spin_unlock_irq(&css_set_lock);	6098	spin_unlock_irq(&css_set_lock);
6051	if (!path)	6099	if (ret < 0 \|\| ret >= PATH_MAX)
6052	goto out;	6100	goto out;
6053		6101
6054	argv[0] = agentbuf;	6102	argv[0] = agentbuf;
6055	argv[1] = path;	6103	argv[1] = pathbuf;
6056	argv[2] = NULL;	6104	argv[2] = NULL;
6057		6105
6058	/* minimal command environment */	6106	/* minimal command environment */
@@ -6270,6 +6318,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6270	if (cgroup_sk_alloc_disabled)	6318	if (cgroup_sk_alloc_disabled)
6271	return;	6319	return;
6272		6320
		6321	/* Socket clone path */
		6322	if (skcd->val) {
		6323	cgroup_get(sock_cgroup_ptr(skcd));
		6324	return;
		6325	}
		6326
6273	rcu_read_lock();	6327	rcu_read_lock();
6274		6328
6275	while (true) {	6329	while (true) {
@@ -6295,6 +6349,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
6295		6349
6296	/* cgroup namespaces */	6350	/* cgroup namespaces */
6297		6351
		6352	static struct ucounts inc_cgroup_namespaces(struct user_namespace ns)
		6353	{
		6354	return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
		6355	}
		6356
		6357	static void dec_cgroup_namespaces(struct ucounts *ucounts)
		6358	{
		6359	dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
		6360	}
		6361
6298	static struct cgroup_namespace *alloc_cgroup_ns(void)	6362	static struct cgroup_namespace *alloc_cgroup_ns(void)
6299	{	6363	{
6300	struct cgroup_namespace *new_ns;	6364	struct cgroup_namespace *new_ns;
@@ -6316,6 +6380,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
6316	void free_cgroup_ns(struct cgroup_namespace *ns)	6380	void free_cgroup_ns(struct cgroup_namespace *ns)
6317	{	6381	{
6318	put_css_set(ns->root_cset);	6382	put_css_set(ns->root_cset);
		6383	dec_cgroup_namespaces(ns->ucounts);
6319	put_user_ns(ns->user_ns);	6384	put_user_ns(ns->user_ns);
6320	ns_free_inum(&ns->ns);	6385	ns_free_inum(&ns->ns);
6321	kfree(ns);	6386	kfree(ns);
@@ -6327,6 +6392,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6327	struct cgroup_namespace *old_ns)	6392	struct cgroup_namespace *old_ns)
6328	{	6393	{
6329	struct cgroup_namespace *new_ns;	6394	struct cgroup_namespace *new_ns;
		6395	struct ucounts *ucounts;
6330	struct css_set *cset;	6396	struct css_set *cset;
6331		6397
6332	BUG_ON(!old_ns);	6398	BUG_ON(!old_ns);
@@ -6340,6 +6406,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6340	if (!ns_capable(user_ns, CAP_SYS_ADMIN))	6406	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
6341	return ERR_PTR(-EPERM);	6407	return ERR_PTR(-EPERM);
6342		6408
		6409	ucounts = inc_cgroup_namespaces(user_ns);
		6410	if (!ucounts)
		6411	return ERR_PTR(-ENOSPC);
		6412
6343	/* It is not safe to take cgroup_mutex here */	6413	/* It is not safe to take cgroup_mutex here */
6344	spin_lock_irq(&css_set_lock);	6414	spin_lock_irq(&css_set_lock);
6345	cset = task_css_set(current);	6415	cset = task_css_set(current);
@@ -6349,10 +6419,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6349	new_ns = alloc_cgroup_ns();	6419	new_ns = alloc_cgroup_ns();
6350	if (IS_ERR(new_ns)) {	6420	if (IS_ERR(new_ns)) {
6351	put_css_set(cset);	6421	put_css_set(cset);
		6422	dec_cgroup_namespaces(ucounts);
6352	return new_ns;	6423	return new_ns;
6353	}	6424	}
6354		6425
6355	new_ns->user_ns = get_user_ns(user_ns);	6426	new_ns->user_ns = get_user_ns(user_ns);
		6427	new_ns->ucounts = ucounts;
6356	new_ns->root_cset = cset;	6428	new_ns->root_cset = cset;
6357		6429
6358	return new_ns;	6430	return new_ns;
@@ -6403,12 +6475,18 @@ static void cgroupns_put(struct ns_common *ns)
6403	put_cgroup_ns(to_cg_ns(ns));	6475	put_cgroup_ns(to_cg_ns(ns));
6404	}	6476	}
6405		6477
		6478	static struct user_namespace cgroupns_owner(struct ns_common ns)
		6479	{
		6480	return to_cg_ns(ns)->user_ns;
		6481	}
		6482
6406	const struct proc_ns_operations cgroupns_operations = {	6483	const struct proc_ns_operations cgroupns_operations = {
6407	.name = "cgroup",	6484	.name = "cgroup",
6408	.type = CLONE_NEWCGROUP,	6485	.type = CLONE_NEWCGROUP,
6409	.get = cgroupns_get,	6486	.get = cgroupns_get,
6410	.put = cgroupns_put,	6487	.put = cgroupns_put,
6411	.install = cgroupns_install,	6488	.install = cgroupns_install,
		6489	.owner = cgroupns_owner,
6412	};	6490	};
6413		6491
6414	static __init int cgroup_namespaces_init(void)	6492	static __init int cgroup_namespaces_init(void)