diff options
author | Serge Hallyn <serge.hallyn@ubuntu.com> | 2016-01-29 03:54:09 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-02-16 13:04:59 -0500 |
commit | ed82571b1a14ab2bfbede2bb2c209700495749fc (patch) | |
tree | b0c92f0509559a4bfc3c68940fbf8c9814455e14 /kernel/cgroup.c | |
parent | fb3c8315650f89a1993fb3ae3e74e9c7e4a1c9c0 (diff) |
cgroup: mount cgroupns-root when inside non-init cgroupns
This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.
Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b086a461be23..24989022ff62 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1994,6 +1994,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1994 | { | 1994 | { |
1995 | bool is_v2 = fs_type == &cgroup2_fs_type; | 1995 | bool is_v2 = fs_type == &cgroup2_fs_type; |
1996 | struct super_block *pinned_sb = NULL; | 1996 | struct super_block *pinned_sb = NULL; |
1997 | struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; | ||
1997 | struct cgroup_subsys *ss; | 1998 | struct cgroup_subsys *ss; |
1998 | struct cgroup_root *root; | 1999 | struct cgroup_root *root; |
1999 | struct cgroup_sb_opts opts; | 2000 | struct cgroup_sb_opts opts; |
@@ -2002,6 +2003,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
2002 | int i; | 2003 | int i; |
2003 | bool new_sb; | 2004 | bool new_sb; |
2004 | 2005 | ||
2006 | get_cgroup_ns(ns); | ||
2007 | |||
2008 | /* Check if the caller has permission to mount. */ | ||
2009 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { | ||
2010 | put_cgroup_ns(ns); | ||
2011 | return ERR_PTR(-EPERM); | ||
2012 | } | ||
2013 | |||
2005 | /* | 2014 | /* |
2006 | * The first time anyone tries to mount a cgroup, enable the list | 2015 | * The first time anyone tries to mount a cgroup, enable the list |
2007 | * linking each css_set to its tasks and fix up all existing tasks. | 2016 | * linking each css_set to its tasks and fix up all existing tasks. |
@@ -2012,6 +2021,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
2012 | if (is_v2) { | 2021 | if (is_v2) { |
2013 | if (data) { | 2022 | if (data) { |
2014 | pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); | 2023 | pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); |
2024 | put_cgroup_ns(ns); | ||
2015 | return ERR_PTR(-EINVAL); | 2025 | return ERR_PTR(-EINVAL); |
2016 | } | 2026 | } |
2017 | cgrp_dfl_root_visible = true; | 2027 | cgrp_dfl_root_visible = true; |
@@ -2117,6 +2127,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
2117 | goto out_unlock; | 2127 | goto out_unlock; |
2118 | } | 2128 | } |
2119 | 2129 | ||
2130 | /* | ||
2131 | * We know this subsystem has not yet been bound. Users in a non-init | ||
2132 | * user namespace may only mount hierarchies with no bound subsystems, | ||
2133 | * i.e. 'none,name=user1' | ||
2134 | */ | ||
2135 | if (!opts.none && !capable(CAP_SYS_ADMIN)) { | ||
2136 | ret = -EPERM; | ||
2137 | goto out_unlock; | ||
2138 | } | ||
2139 | |||
2120 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 2140 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
2121 | if (!root) { | 2141 | if (!root) { |
2122 | ret = -ENOMEM; | 2142 | ret = -ENOMEM; |
@@ -2135,12 +2155,37 @@ out_free: | |||
2135 | kfree(opts.release_agent); | 2155 | kfree(opts.release_agent); |
2136 | kfree(opts.name); | 2156 | kfree(opts.name); |
2137 | 2157 | ||
2138 | if (ret) | 2158 | if (ret) { |
2159 | put_cgroup_ns(ns); | ||
2139 | return ERR_PTR(ret); | 2160 | return ERR_PTR(ret); |
2161 | } | ||
2140 | out_mount: | 2162 | out_mount: |
2141 | dentry = kernfs_mount(fs_type, flags, root->kf_root, | 2163 | dentry = kernfs_mount(fs_type, flags, root->kf_root, |
2142 | is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC, | 2164 | is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC, |
2143 | &new_sb); | 2165 | &new_sb); |
2166 | |||
2167 | /* | ||
2168 | * In non-init cgroup namespace, instead of root cgroup's | ||
2169 | * dentry, we return the dentry corresponding to the | ||
2170 | * cgroupns->root_cgrp. | ||
2171 | */ | ||
2172 | if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { | ||
2173 | struct dentry *nsdentry; | ||
2174 | struct cgroup *cgrp; | ||
2175 | |||
2176 | mutex_lock(&cgroup_mutex); | ||
2177 | spin_lock_bh(&css_set_lock); | ||
2178 | |||
2179 | cgrp = cset_cgroup_from_root(ns->root_cset, root); | ||
2180 | |||
2181 | spin_unlock_bh(&css_set_lock); | ||
2182 | mutex_unlock(&cgroup_mutex); | ||
2183 | |||
2184 | nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); | ||
2185 | dput(dentry); | ||
2186 | dentry = nsdentry; | ||
2187 | } | ||
2188 | |||
2144 | if (IS_ERR(dentry) || !new_sb) | 2189 | if (IS_ERR(dentry) || !new_sb) |
2145 | cgroup_put(&root->cgrp); | 2190 | cgroup_put(&root->cgrp); |
2146 | 2191 | ||
@@ -2153,6 +2198,7 @@ out_mount: | |||
2153 | deactivate_super(pinned_sb); | 2198 | deactivate_super(pinned_sb); |
2154 | } | 2199 | } |
2155 | 2200 | ||
2201 | put_cgroup_ns(ns); | ||
2156 | return dentry; | 2202 | return dentry; |
2157 | } | 2203 | } |
2158 | 2204 | ||