diff options
| author | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
| commit | e25e2cbb4c6679bed5f52fb0f2cc381688297901 (patch) | |
| tree | ba11d495cba21d3d233d4e25f94676ef9d0055ae /kernel | |
| parent | 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 (diff) | |
cgroup: add cgroup_root_mutex
cgroup wants to make threadgroup stable while modifying cgroup
hierarchies which will introduce locking dependency on
cred_guard_mutex from cgroup_mutex. This unfortunately completes
circular dependency.
A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
B. namespace_sem -> cgroup_mutex
B is from cgroup_show_options() and this patch breaks it by
introducing another mutex cgroup_root_mutex which nests inside
cgroup_mutex and protects cgroupfs_root.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 64 |
1 files changed, 42 insertions, 22 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9d5648f3cdc..6545fd61b10d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -63,7 +63,24 @@ | |||
| 63 | 63 | ||
| 64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
| 65 | 65 | ||
| 66 | /* | ||
| 67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | ||
| 68 | * hierarchy must be performed while holding it. | ||
| 69 | * | ||
| 70 | * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify | ||
| 71 | * cgroupfs_root of any cgroup hierarchy - subsys list, flags, | ||
| 72 | * release_agent_path and so on. Modifying requires both cgroup_mutex and | ||
| 73 | * cgroup_root_mutex. Readers can acquire either of the two. This is to | ||
| 74 | * break the following locking order cycle. | ||
| 75 | * | ||
| 76 | * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem | ||
| 77 | * B. namespace_sem -> cgroup_mutex | ||
| 78 | * | ||
| 79 | * B happens only through cgroup_show_options() and using cgroup_root_mutex | ||
| 80 | * breaks it. | ||
| 81 | */ | ||
| 66 | static DEFINE_MUTEX(cgroup_mutex); | 82 | static DEFINE_MUTEX(cgroup_mutex); |
| 83 | static DEFINE_MUTEX(cgroup_root_mutex); | ||
| 67 | 84 | ||
| 68 | /* | 85 | /* |
| 69 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 86 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
| @@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 953 | int i; | 970 | int i; |
| 954 | 971 | ||
| 955 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 972 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| 973 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | ||
| 956 | 974 | ||
| 957 | removed_bits = root->actual_subsys_bits & ~final_bits; | 975 | removed_bits = root->actual_subsys_bits & ~final_bits; |
| 958 | added_bits = final_bits & ~root->actual_subsys_bits; | 976 | added_bits = final_bits & ~root->actual_subsys_bits; |
| @@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 1043 | struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; | 1061 | struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; |
| 1044 | struct cgroup_subsys *ss; | 1062 | struct cgroup_subsys *ss; |
| 1045 | 1063 | ||
| 1046 | mutex_lock(&cgroup_mutex); | 1064 | mutex_lock(&cgroup_root_mutex); |
| 1047 | for_each_subsys(root, ss) | 1065 | for_each_subsys(root, ss) |
| 1048 | seq_printf(seq, ",%s", ss->name); | 1066 | seq_printf(seq, ",%s", ss->name); |
| 1049 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1067 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
| @@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 1054 | seq_puts(seq, ",clone_children"); | 1072 | seq_puts(seq, ",clone_children"); |
| 1055 | if (strlen(root->name)) | 1073 | if (strlen(root->name)) |
| 1056 | seq_printf(seq, ",name=%s", root->name); | 1074 | seq_printf(seq, ",name=%s", root->name); |
| 1057 | mutex_unlock(&cgroup_mutex); | 1075 | mutex_unlock(&cgroup_root_mutex); |
| 1058 | return 0; | 1076 | return 0; |
| 1059 | } | 1077 | } |
| 1060 | 1078 | ||
| @@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1269 | 1287 | ||
| 1270 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1288 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
| 1271 | mutex_lock(&cgroup_mutex); | 1289 | mutex_lock(&cgroup_mutex); |
| 1290 | mutex_lock(&cgroup_root_mutex); | ||
| 1272 | 1291 | ||
| 1273 | /* See what subsystems are wanted */ | 1292 | /* See what subsystems are wanted */ |
| 1274 | ret = parse_cgroupfs_options(data, &opts); | 1293 | ret = parse_cgroupfs_options(data, &opts); |
| @@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1297 | out_unlock: | 1316 | out_unlock: |
| 1298 | kfree(opts.release_agent); | 1317 | kfree(opts.release_agent); |
| 1299 | kfree(opts.name); | 1318 | kfree(opts.name); |
| 1319 | mutex_unlock(&cgroup_root_mutex); | ||
| 1300 | mutex_unlock(&cgroup_mutex); | 1320 | mutex_unlock(&cgroup_mutex); |
| 1301 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1321 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 1302 | return ret; | 1322 | return ret; |
| @@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1481 | int ret = 0; | 1501 | int ret = 0; |
| 1482 | struct super_block *sb; | 1502 | struct super_block *sb; |
| 1483 | struct cgroupfs_root *new_root; | 1503 | struct cgroupfs_root *new_root; |
| 1504 | struct inode *inode; | ||
| 1484 | 1505 | ||
| 1485 | /* First find the desired set of subsystems */ | 1506 | /* First find the desired set of subsystems */ |
| 1486 | mutex_lock(&cgroup_mutex); | 1507 | mutex_lock(&cgroup_mutex); |
| @@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1514 | /* We used the new root structure, so this is a new hierarchy */ | 1535 | /* We used the new root structure, so this is a new hierarchy */ |
| 1515 | struct list_head tmp_cg_links; | 1536 | struct list_head tmp_cg_links; |
| 1516 | struct cgroup *root_cgrp = &root->top_cgroup; | 1537 | struct cgroup *root_cgrp = &root->top_cgroup; |
| 1517 | struct inode *inode; | ||
| 1518 | struct cgroupfs_root *existing_root; | 1538 | struct cgroupfs_root *existing_root; |
| 1519 | const struct cred *cred; | 1539 | const struct cred *cred; |
| 1520 | int i; | 1540 | int i; |
| @@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1528 | 1548 | ||
| 1529 | mutex_lock(&inode->i_mutex); | 1549 | mutex_lock(&inode->i_mutex); |
| 1530 | mutex_lock(&cgroup_mutex); | 1550 | mutex_lock(&cgroup_mutex); |
| 1551 | mutex_lock(&cgroup_root_mutex); | ||
| 1531 | 1552 | ||
| 1532 | if (strlen(root->name)) { | 1553 | /* Check for name clashes with existing mounts */ |
| 1533 | /* Check for name clashes with existing mounts */ | 1554 | ret = -EBUSY; |
| 1534 | for_each_active_root(existing_root) { | 1555 | if (strlen(root->name)) |
| 1535 | if (!strcmp(existing_root->name, root->name)) { | 1556 | for_each_active_root(existing_root) |
| 1536 | ret = -EBUSY; | 1557 | if (!strcmp(existing_root->name, root->name)) |
| 1537 | mutex_unlock(&cgroup_mutex); | 1558 | goto unlock_drop; |
| 1538 | mutex_unlock(&inode->i_mutex); | ||
| 1539 | goto drop_new_super; | ||
| 1540 | } | ||
| 1541 | } | ||
| 1542 | } | ||
| 1543 | 1559 | ||
| 1544 | /* | 1560 | /* |
| 1545 | * We're accessing css_set_count without locking | 1561 | * We're accessing css_set_count without locking |
| @@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1549 | * have some link structures left over | 1565 | * have some link structures left over |
| 1550 | */ | 1566 | */ |
| 1551 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); | 1567 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); |
| 1552 | if (ret) { | 1568 | if (ret) |
| 1553 | mutex_unlock(&cgroup_mutex); | 1569 | goto unlock_drop; |
| 1554 | mutex_unlock(&inode->i_mutex); | ||
| 1555 | goto drop_new_super; | ||
| 1556 | } | ||
| 1557 | 1570 | ||
| 1558 | ret = rebind_subsystems(root, root->subsys_bits); | 1571 | ret = rebind_subsystems(root, root->subsys_bits); |
| 1559 | if (ret == -EBUSY) { | 1572 | if (ret == -EBUSY) { |
| 1560 | mutex_unlock(&cgroup_mutex); | ||
| 1561 | mutex_unlock(&inode->i_mutex); | ||
| 1562 | free_cg_links(&tmp_cg_links); | 1573 | free_cg_links(&tmp_cg_links); |
| 1563 | goto drop_new_super; | 1574 | goto unlock_drop; |
| 1564 | } | 1575 | } |
| 1565 | /* | 1576 | /* |
| 1566 | * There must be no failure case after here, since rebinding | 1577 | * There must be no failure case after here, since rebinding |
| @@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1599 | cred = override_creds(&init_cred); | 1610 | cred = override_creds(&init_cred); |
| 1600 | cgroup_populate_dir(root_cgrp); | 1611 | cgroup_populate_dir(root_cgrp); |
| 1601 | revert_creds(cred); | 1612 | revert_creds(cred); |
| 1613 | mutex_unlock(&cgroup_root_mutex); | ||
| 1602 | mutex_unlock(&cgroup_mutex); | 1614 | mutex_unlock(&cgroup_mutex); |
| 1603 | mutex_unlock(&inode->i_mutex); | 1615 | mutex_unlock(&inode->i_mutex); |
| 1604 | } else { | 1616 | } else { |
| @@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1615 | kfree(opts.name); | 1627 | kfree(opts.name); |
| 1616 | return dget(sb->s_root); | 1628 | return dget(sb->s_root); |
| 1617 | 1629 | ||
| 1630 | unlock_drop: | ||
| 1631 | mutex_unlock(&cgroup_root_mutex); | ||
| 1632 | mutex_unlock(&cgroup_mutex); | ||
| 1633 | mutex_unlock(&inode->i_mutex); | ||
| 1618 | drop_new_super: | 1634 | drop_new_super: |
| 1619 | deactivate_locked_super(sb); | 1635 | deactivate_locked_super(sb); |
| 1620 | drop_modules: | 1636 | drop_modules: |
| @@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1639 | BUG_ON(!list_empty(&cgrp->sibling)); | 1655 | BUG_ON(!list_empty(&cgrp->sibling)); |
| 1640 | 1656 | ||
| 1641 | mutex_lock(&cgroup_mutex); | 1657 | mutex_lock(&cgroup_mutex); |
| 1658 | mutex_lock(&cgroup_root_mutex); | ||
| 1642 | 1659 | ||
| 1643 | /* Rebind all subsystems back to the default hierarchy */ | 1660 | /* Rebind all subsystems back to the default hierarchy */ |
| 1644 | ret = rebind_subsystems(root, 0); | 1661 | ret = rebind_subsystems(root, 0); |
| @@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1664 | root_count--; | 1681 | root_count--; |
| 1665 | } | 1682 | } |
| 1666 | 1683 | ||
| 1684 | mutex_unlock(&cgroup_root_mutex); | ||
| 1667 | mutex_unlock(&cgroup_mutex); | 1685 | mutex_unlock(&cgroup_mutex); |
| 1668 | 1686 | ||
| 1669 | kill_litter_super(sb); | 1687 | kill_litter_super(sb); |
| @@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
| 2311 | return -EINVAL; | 2329 | return -EINVAL; |
| 2312 | if (!cgroup_lock_live_group(cgrp)) | 2330 | if (!cgroup_lock_live_group(cgrp)) |
| 2313 | return -ENODEV; | 2331 | return -ENODEV; |
| 2332 | mutex_lock(&cgroup_root_mutex); | ||
| 2314 | strcpy(cgrp->root->release_agent_path, buffer); | 2333 | strcpy(cgrp->root->release_agent_path, buffer); |
| 2334 | mutex_unlock(&cgroup_root_mutex); | ||
| 2315 | cgroup_unlock(); | 2335 | cgroup_unlock(); |
| 2316 | return 0; | 2336 | return 0; |
| 2317 | } | 2337 | } |
