aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-04 16:24:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-04 16:24:27 -0500
commit4f9020ffde71ddb92bc2f65ce0b00232bc88c590 (patch)
treee67b1f3499f255477cadf5f418319fffed117874 /kernel
parent736706bee3298208343a76096370e4f6a5c55915 (diff)
parentd3d6a18d7d351cbcc9b33dbedf710e65f8ce1595 (diff)
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs fixes from Al Viro: "Assorted fixes that sat in -next for a while, all over the place" * 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: aio: Fix locking in aio_poll() exec: Fix mem leak in kernel_read_file copy_mount_string: Limit string length to PATH_MAX cgroup: saner refcounting for cgroup_root fix cgroup_do_mount() handling of failure exits
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup-internal.h2
-rw-r--r--kernel/cgroup/cgroup-v1.c58
-rw-r--r--kernel/cgroup/cgroup.c25
3 files changed, 27 insertions, 58 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c950864016e2..c9a35f09e4b9 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
198 198
199void cgroup_free_root(struct cgroup_root *root); 199void cgroup_free_root(struct cgroup_root *root);
200void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); 200void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
201int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); 201int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
202int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); 202int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
203struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, 203struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
204 struct cgroup_root *root, unsigned long magic, 204 struct cgroup_root *root, unsigned long magic,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 583b969b0c0e..f94a7229974e 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1116 void *data, unsigned long magic, 1116 void *data, unsigned long magic,
1117 struct cgroup_namespace *ns) 1117 struct cgroup_namespace *ns)
1118{ 1118{
1119 struct super_block *pinned_sb = NULL;
1120 struct cgroup_sb_opts opts; 1119 struct cgroup_sb_opts opts;
1121 struct cgroup_root *root; 1120 struct cgroup_root *root;
1122 struct cgroup_subsys *ss; 1121 struct cgroup_subsys *ss;
1123 struct dentry *dentry; 1122 struct dentry *dentry;
1124 int i, ret; 1123 int i, ret;
1125 bool new_root = false;
1126 1124
1127 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); 1125 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1128 1126
@@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1184 if (root->flags ^ opts.flags) 1182 if (root->flags ^ opts.flags)
1185 pr_warn("new mount options do not match the existing superblock, will be ignored\n"); 1183 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1186 1184
1187 /*
1188 * We want to reuse @root whose lifetime is governed by its
1189 * ->cgrp. Let's check whether @root is alive and keep it
1190 * that way. As cgroup_kill_sb() can happen anytime, we
1191 * want to block it by pinning the sb so that @root doesn't
1192 * get killed before mount is complete.
1193 *
1194 * With the sb pinned, tryget_live can reliably indicate
1195 * whether @root can be reused. If it's being killed,
1196 * drain it. We can use wait_queue for the wait but this
1197 * path is super cold. Let's just sleep a bit and retry.
1198 */
1199 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1200 if (IS_ERR(pinned_sb) ||
1201 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1202 mutex_unlock(&cgroup_mutex);
1203 if (!IS_ERR_OR_NULL(pinned_sb))
1204 deactivate_super(pinned_sb);
1205 msleep(10);
1206 ret = restart_syscall();
1207 goto out_free;
1208 }
1209
1210 ret = 0; 1185 ret = 0;
1211 goto out_unlock; 1186 goto out_unlock;
1212 } 1187 }
@@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1232 ret = -ENOMEM; 1207 ret = -ENOMEM;
1233 goto out_unlock; 1208 goto out_unlock;
1234 } 1209 }
1235 new_root = true;
1236 1210
1237 init_cgroup_root(root, &opts); 1211 init_cgroup_root(root, &opts);
1238 1212
1239 ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); 1213 ret = cgroup_setup_root(root, opts.subsys_mask);
1240 if (ret) 1214 if (ret)
1241 cgroup_free_root(root); 1215 cgroup_free_root(root);
1242 1216
1243out_unlock: 1217out_unlock:
1218 if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1219 mutex_unlock(&cgroup_mutex);
1220 msleep(10);
1221 ret = restart_syscall();
1222 goto out_free;
1223 }
1244 mutex_unlock(&cgroup_mutex); 1224 mutex_unlock(&cgroup_mutex);
1245out_free: 1225out_free:
1246 kfree(opts.release_agent); 1226 kfree(opts.release_agent);
@@ -1252,25 +1232,13 @@ out_free:
1252 dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, 1232 dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
1253 CGROUP_SUPER_MAGIC, ns); 1233 CGROUP_SUPER_MAGIC, ns);
1254 1234
1255 /* 1235 if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
1256 * There's a race window after we release cgroup_mutex and before 1236 struct super_block *sb = dentry->d_sb;
1257 * allocating a superblock. Make sure a concurrent process won't 1237 dput(dentry);
1258 * be able to re-use the root during this window by delaying the 1238 deactivate_locked_super(sb);
1259 * initialization of root refcnt. 1239 msleep(10);
1260 */ 1240 dentry = ERR_PTR(restart_syscall());
1261 if (new_root) {
1262 mutex_lock(&cgroup_mutex);
1263 percpu_ref_reinit(&root->cgrp.self.refcnt);
1264 mutex_unlock(&cgroup_mutex);
1265 } 1241 }
1266
1267 /*
1268 * If @pinned_sb, we're reusing an existing root and holding an
1269 * extra ref on its sb. Mount is complete. Put the extra ref.
1270 */
1271 if (pinned_sb)
1272 deactivate_super(pinned_sb);
1273
1274 return dentry; 1242 return dentry;
1275} 1243}
1276 1244
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f31bd61c9466..7fd9f22e406d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1927 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); 1927 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1928} 1928}
1929 1929
1930int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) 1930int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1931{ 1931{
1932 LIST_HEAD(tmp_links); 1932 LIST_HEAD(tmp_links);
1933 struct cgroup *root_cgrp = &root->cgrp; 1933 struct cgroup *root_cgrp = &root->cgrp;
@@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
1944 root_cgrp->ancestor_ids[0] = ret; 1944 root_cgrp->ancestor_ids[0] = ret;
1945 1945
1946 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 1946 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1947 ref_flags, GFP_KERNEL); 1947 0, GFP_KERNEL);
1948 if (ret) 1948 if (ret)
1949 goto out; 1949 goto out;
1950 1950
@@ -2033,7 +2033,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
2033 struct cgroup_namespace *ns) 2033 struct cgroup_namespace *ns)
2034{ 2034{
2035 struct dentry *dentry; 2035 struct dentry *dentry;
2036 bool new_sb; 2036 bool new_sb = false;
2037 2037
2038 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); 2038 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
2039 2039
@@ -2043,6 +2043,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
2043 */ 2043 */
2044 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { 2044 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
2045 struct dentry *nsdentry; 2045 struct dentry *nsdentry;
2046 struct super_block *sb = dentry->d_sb;
2046 struct cgroup *cgrp; 2047 struct cgroup *cgrp;
2047 2048
2048 mutex_lock(&cgroup_mutex); 2049 mutex_lock(&cgroup_mutex);
@@ -2053,12 +2054,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
2053 spin_unlock_irq(&css_set_lock); 2054 spin_unlock_irq(&css_set_lock);
2054 mutex_unlock(&cgroup_mutex); 2055 mutex_unlock(&cgroup_mutex);
2055 2056
2056 nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); 2057 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2057 dput(dentry); 2058 dput(dentry);
2059 if (IS_ERR(nsdentry))
2060 deactivate_locked_super(sb);
2058 dentry = nsdentry; 2061 dentry = nsdentry;
2059 } 2062 }
2060 2063
2061 if (IS_ERR(dentry) || !new_sb) 2064 if (!new_sb)
2062 cgroup_put(&root->cgrp); 2065 cgroup_put(&root->cgrp);
2063 2066
2064 return dentry; 2067 return dentry;
@@ -2118,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb)
2118 struct cgroup_root *root = cgroup_root_from_kf(kf_root); 2121 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2119 2122
2120 /* 2123 /*
2121 * If @root doesn't have any mounts or children, start killing it. 2124 * If @root doesn't have any children, start killing it.
2122 * This prevents new mounts by disabling percpu_ref_tryget_live(). 2125 * This prevents new mounts by disabling percpu_ref_tryget_live().
2123 * cgroup_mount() may wait for @root's release. 2126 * cgroup_mount() may wait for @root's release.
2124 * 2127 *
2125 * And don't kill the default root. 2128 * And don't kill the default root.
2126 */ 2129 */
2127 if (!list_empty(&root->cgrp.self.children) || 2130 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2128 root == &cgrp_dfl_root) 2131 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2129 cgroup_put(&root->cgrp);
2130 else
2131 percpu_ref_kill(&root->cgrp.self.refcnt); 2132 percpu_ref_kill(&root->cgrp.self.refcnt);
2132 2133 cgroup_put(&root->cgrp);
2133 kernfs_kill_sb(sb); 2134 kernfs_kill_sb(sb);
2134} 2135}
2135 2136
@@ -5399,7 +5400,7 @@ int __init cgroup_init(void)
5399 hash_add(css_set_table, &init_css_set.hlist, 5400 hash_add(css_set_table, &init_css_set.hlist,
5400 css_set_hash(init_css_set.subsys)); 5401 css_set_hash(init_css_set.subsys));
5401 5402
5402 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); 5403 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5403 5404
5404 mutex_unlock(&cgroup_mutex); 5405 mutex_unlock(&cgroup_mutex);
5405 5406