aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLi Zefan <lizefan@huawei.com>2013-03-01 02:01:56 -0500
committerTejun Heo <tj@kernel.org>2013-03-04 12:50:08 -0500
commit65dff759d2948cf18e2029fc5c0c595b8b7da3a5 (patch)
tree2904e81e44bb939413f406bbb2056e88df008f72
parent6dbe51c251a327e012439c4772097a13df43c5b8 (diff)
cgroup: fix cgroup_path() vs rename() race
rename() will change dentry->d_name. The result of this race can be worse than seeing partially rewritten name, but we might access a stale pointer because rename() will re-allocate memory to hold a longer name. As accessing dentry->name must be protected by dentry->d_lock or parent inode's i_mutex, while on the other hand cgroup-path() can be called with some irq-safe spinlocks held, we can't generate cgroup path using dentry->d_name. Alternatively we make a copy of dentry->d_name and save it in cgrp->name when a cgroup is created, and update cgrp->name at rename(). v5: use flexible array instead of zero-size array. v4: - allocate root_cgroup_name and all root_cgroup->name points to it. - add cgroup_name() wrapper. v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path. v2: make cgrp->name RCU safe. Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--include/linux/cgroup.h24
-rw-r--r--kernel/cgroup.c106
3 files changed, 100 insertions, 32 deletions
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index f2b292925ccd..4e595ee8c915 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
247{ 247{
248 int ret; 248 int ret;
249 249
250 rcu_read_lock();
251 ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); 250 ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
252 rcu_read_unlock();
253 if (ret) 251 if (ret)
254 strncpy(buf, "<unavailable>", buflen); 252 strncpy(buf, "<unavailable>", buflen);
255 return ret; 253 return ret;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 900af5964f55..75c6ec1ba1ba 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -150,6 +150,11 @@ enum {
150 CGRP_CPUSET_CLONE_CHILDREN, 150 CGRP_CPUSET_CLONE_CHILDREN,
151}; 151};
152 152
153struct cgroup_name {
154 struct rcu_head rcu_head;
155 char name[];
156};
157
153struct cgroup { 158struct cgroup {
154 unsigned long flags; /* "unsigned long" so bitops work */ 159 unsigned long flags; /* "unsigned long" so bitops work */
155 160
@@ -172,6 +177,19 @@ struct cgroup {
172 struct cgroup *parent; /* my parent */ 177 struct cgroup *parent; /* my parent */
173 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 178 struct dentry *dentry; /* cgroup fs entry, RCU protected */
174 179
180 /*
181 * This is a copy of dentry->d_name, and it's needed because
182 * we can't use dentry->d_name in cgroup_path().
183 *
184 * You must acquire rcu_read_lock() to access cgrp->name, and
185 * the only place that can change it is rename(), which is
186 * protected by parent dir's i_mutex.
187 *
188 * Normally you should use cgroup_name() wrapper rather than
189 * access it directly.
190 */
191 struct cgroup_name __rcu *name;
192
175 /* Private pointers for each registered subsystem */ 193 /* Private pointers for each registered subsystem */
176 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 194 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
177 195
@@ -404,6 +422,12 @@ struct cgroup_scanner {
404 void *data; 422 void *data;
405}; 423};
406 424
425/* Caller should hold rcu_read_lock() */
426static inline const char *cgroup_name(const struct cgroup *cgrp)
427{
428 return rcu_dereference(cgrp->name)->name;
429}
430
407int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 431int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
408int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 432int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
409 433
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a32f9432666c..50682168abc2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -238,6 +238,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
238/* dummytop is a shorthand for the dummy hierarchy's top cgroup */ 238/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
239#define dummytop (&rootnode.top_cgroup) 239#define dummytop (&rootnode.top_cgroup)
240 240
241static struct cgroup_name root_cgroup_name = { .name = "/" };
242
241/* This flag indicates whether tasks in the fork and exit paths should 243/* This flag indicates whether tasks in the fork and exit paths should
242 * check for fork/exit handlers to call. This avoids us having to do 244 * check for fork/exit handlers to call. This avoids us having to do
243 * extra work in the fork/exit path if none of the subsystems need to 245 * extra work in the fork/exit path if none of the subsystems need to
@@ -859,6 +861,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
859 return inode; 861 return inode;
860} 862}
861 863
864static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
865{
866 struct cgroup_name *name;
867
868 name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
869 if (!name)
870 return NULL;
871 strcpy(name->name, dentry->d_name.name);
872 return name;
873}
874
862static void cgroup_free_fn(struct work_struct *work) 875static void cgroup_free_fn(struct work_struct *work)
863{ 876{
864 struct cgroup *cgrp = container_of(work, struct cgroup, free_work); 877 struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
@@ -889,6 +902,7 @@ static void cgroup_free_fn(struct work_struct *work)
889 simple_xattrs_free(&cgrp->xattrs); 902 simple_xattrs_free(&cgrp->xattrs);
890 903
891 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); 904 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
905 kfree(rcu_dereference_raw(cgrp->name));
892 kfree(cgrp); 906 kfree(cgrp);
893} 907}
894 908
@@ -1421,6 +1435,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
1421 INIT_LIST_HEAD(&root->allcg_list); 1435 INIT_LIST_HEAD(&root->allcg_list);
1422 root->number_of_cgroups = 1; 1436 root->number_of_cgroups = 1;
1423 cgrp->root = root; 1437 cgrp->root = root;
1438 cgrp->name = &root_cgroup_name;
1424 cgrp->top_cgroup = cgrp; 1439 cgrp->top_cgroup = cgrp;
1425 init_cgroup_housekeeping(cgrp); 1440 init_cgroup_housekeeping(cgrp);
1426 list_add_tail(&cgrp->allcg_node, &root->allcg_list); 1441 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
@@ -1769,49 +1784,45 @@ static struct kobject *cgroup_kobj;
1769 * @buf: the buffer to write the path into 1784 * @buf: the buffer to write the path into
1770 * @buflen: the length of the buffer 1785 * @buflen: the length of the buffer
1771 * 1786 *
1772 * Called with cgroup_mutex held or else with an RCU-protected cgroup 1787 * Writes path of cgroup into buf. Returns 0 on success, -errno on error.
1773 * reference. Writes path of cgroup into buf. Returns 0 on success, 1788 *
1774 * -errno on error. 1789 * We can't generate cgroup path using dentry->d_name, as accessing
1790 * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
1791 * inode's i_mutex, while on the other hand cgroup_path() can be called
1792 * with some irq-safe spinlocks held.
1775 */ 1793 */
1776int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) 1794int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1777{ 1795{
1778 struct dentry *dentry = cgrp->dentry; 1796 int ret = -ENAMETOOLONG;
1779 char *start; 1797 char *start;
1780 1798
1781 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
1782 "cgroup_path() called without proper locking");
1783
1784 if (cgrp == dummytop) {
1785 /*
1786 * Inactive subsystems have no dentry for their root
1787 * cgroup
1788 */
1789 strcpy(buf, "/");
1790 return 0;
1791 }
1792
1793 start = buf + buflen - 1; 1799 start = buf + buflen - 1;
1794
1795 *start = '\0'; 1800 *start = '\0';
1796 for (;;) {
1797 int len = dentry->d_name.len;
1798 1801
1802 rcu_read_lock();
1803 while (cgrp) {
1804 const char *name = cgroup_name(cgrp);
1805 int len;
1806
1807 len = strlen(name);
1799 if ((start -= len) < buf) 1808 if ((start -= len) < buf)
1800 return -ENAMETOOLONG; 1809 goto out;
1801 memcpy(start, dentry->d_name.name, len); 1810 memcpy(start, name, len);
1802 cgrp = cgrp->parent;
1803 if (!cgrp)
1804 break;
1805 1811
1806 dentry = cgrp->dentry;
1807 if (!cgrp->parent) 1812 if (!cgrp->parent)
1808 continue; 1813 break;
1814
1809 if (--start < buf) 1815 if (--start < buf)
1810 return -ENAMETOOLONG; 1816 goto out;
1811 *start = '/'; 1817 *start = '/';
1818
1819 cgrp = cgrp->parent;
1812 } 1820 }
1821 ret = 0;
1813 memmove(buf, start, buf + buflen - start); 1822 memmove(buf, start, buf + buflen - start);
1814 return 0; 1823out:
1824 rcu_read_unlock();
1825 return ret;
1815} 1826}
1816EXPORT_SYMBOL_GPL(cgroup_path); 1827EXPORT_SYMBOL_GPL(cgroup_path);
1817 1828
@@ -2537,13 +2548,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
2537static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, 2548static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2538 struct inode *new_dir, struct dentry *new_dentry) 2549 struct inode *new_dir, struct dentry *new_dentry)
2539{ 2550{
2551 int ret;
2552 struct cgroup_name *name, *old_name;
2553 struct cgroup *cgrp;
2554
2555 /*
2556 * It's convinient to use parent dir's i_mutex to protected
2557 * cgrp->name.
2558 */
2559 lockdep_assert_held(&old_dir->i_mutex);
2560
2540 if (!S_ISDIR(old_dentry->d_inode->i_mode)) 2561 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2541 return -ENOTDIR; 2562 return -ENOTDIR;
2542 if (new_dentry->d_inode) 2563 if (new_dentry->d_inode)
2543 return -EEXIST; 2564 return -EEXIST;
2544 if (old_dir != new_dir) 2565 if (old_dir != new_dir)
2545 return -EIO; 2566 return -EIO;
2546 return simple_rename(old_dir, old_dentry, new_dir, new_dentry); 2567
2568 cgrp = __d_cgrp(old_dentry);
2569
2570 name = cgroup_alloc_name(new_dentry);
2571 if (!name)
2572 return -ENOMEM;
2573
2574 ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2575 if (ret) {
2576 kfree(name);
2577 return ret;
2578 }
2579
2580 old_name = cgrp->name;
2581 rcu_assign_pointer(cgrp->name, name);
2582
2583 kfree_rcu(old_name, rcu_head);
2584 return 0;
2547} 2585}
2548 2586
2549static struct simple_xattrs *__d_xattrs(struct dentry *dentry) 2587static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
@@ -4158,6 +4196,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4158 umode_t mode) 4196 umode_t mode)
4159{ 4197{
4160 struct cgroup *cgrp; 4198 struct cgroup *cgrp;
4199 struct cgroup_name *name;
4161 struct cgroupfs_root *root = parent->root; 4200 struct cgroupfs_root *root = parent->root;
4162 int err = 0; 4201 int err = 0;
4163 struct cgroup_subsys *ss; 4202 struct cgroup_subsys *ss;
@@ -4168,9 +4207,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4168 if (!cgrp) 4207 if (!cgrp)
4169 return -ENOMEM; 4208 return -ENOMEM;
4170 4209
4210 name = cgroup_alloc_name(dentry);
4211 if (!name)
4212 goto err_free_cgrp;
4213 rcu_assign_pointer(cgrp->name, name);
4214
4171 cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); 4215 cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
4172 if (cgrp->id < 0) 4216 if (cgrp->id < 0)
4173 goto err_free_cgrp; 4217 goto err_free_name;
4174 4218
4175 /* 4219 /*
4176 * Only live parents can have children. Note that the liveliness 4220 * Only live parents can have children. Note that the liveliness
@@ -4276,6 +4320,8 @@ err_free_all:
4276 deactivate_super(sb); 4320 deactivate_super(sb);
4277err_free_id: 4321err_free_id:
4278 ida_simple_remove(&root->cgroup_ida, cgrp->id); 4322 ida_simple_remove(&root->cgroup_ida, cgrp->id);
4323err_free_name:
4324 kfree(rcu_dereference_raw(cgrp->name));
4279err_free_cgrp: 4325err_free_cgrp:
4280 kfree(cgrp); 4326 kfree(cgrp);
4281 return err; 4327 return err;