diff options
author | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-12-12 21:12:21 -0500 |
commit | e25e2cbb4c6679bed5f52fb0f2cc381688297901 (patch) | |
tree | ba11d495cba21d3d233d4e25f94676ef9d0055ae /kernel | |
parent | 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 (diff) |
cgroup: add cgroup_root_mutex
cgroup wants to make threadgroup stable while modifying cgroup
hierarchies which will introduce locking dependency on
cred_guard_mutex from cgroup_mutex. This unfortunately completes
circular dependency.
A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
B. namespace_sem -> cgroup_mutex
B is from cgroup_show_options() and this patch breaks it by
introducing another mutex cgroup_root_mutex which nests inside
cgroup_mutex and protects cgroupfs_root.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 64 |
1 files changed, 42 insertions, 22 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9d5648f3cdc..6545fd61b10d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -63,7 +63,24 @@ | |||
63 | 63 | ||
64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
65 | 65 | ||
66 | /* | ||
67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | ||
68 | * hierarchy must be performed while holding it. | ||
69 | * | ||
70 | * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify | ||
71 | * cgroupfs_root of any cgroup hierarchy - subsys list, flags, | ||
72 | * release_agent_path and so on. Modifying requires both cgroup_mutex and | ||
73 | * cgroup_root_mutex. Readers can acquire either of the two. This is to | ||
74 | * break the following locking order cycle. | ||
75 | * | ||
76 | * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem | ||
77 | * B. namespace_sem -> cgroup_mutex | ||
78 | * | ||
79 | * B happens only through cgroup_show_options() and using cgroup_root_mutex | ||
80 | * breaks it. | ||
81 | */ | ||
66 | static DEFINE_MUTEX(cgroup_mutex); | 82 | static DEFINE_MUTEX(cgroup_mutex); |
83 | static DEFINE_MUTEX(cgroup_root_mutex); | ||
67 | 84 | ||
68 | /* | 85 | /* |
69 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 86 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
953 | int i; | 970 | int i; |
954 | 971 | ||
955 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 972 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
973 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | ||
956 | 974 | ||
957 | removed_bits = root->actual_subsys_bits & ~final_bits; | 975 | removed_bits = root->actual_subsys_bits & ~final_bits; |
958 | added_bits = final_bits & ~root->actual_subsys_bits; | 976 | added_bits = final_bits & ~root->actual_subsys_bits; |
@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1043 | struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; | 1061 | struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; |
1044 | struct cgroup_subsys *ss; | 1062 | struct cgroup_subsys *ss; |
1045 | 1063 | ||
1046 | mutex_lock(&cgroup_mutex); | 1064 | mutex_lock(&cgroup_root_mutex); |
1047 | for_each_subsys(root, ss) | 1065 | for_each_subsys(root, ss) |
1048 | seq_printf(seq, ",%s", ss->name); | 1066 | seq_printf(seq, ",%s", ss->name); |
1049 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1067 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1054 | seq_puts(seq, ",clone_children"); | 1072 | seq_puts(seq, ",clone_children"); |
1055 | if (strlen(root->name)) | 1073 | if (strlen(root->name)) |
1056 | seq_printf(seq, ",name=%s", root->name); | 1074 | seq_printf(seq, ",name=%s", root->name); |
1057 | mutex_unlock(&cgroup_mutex); | 1075 | mutex_unlock(&cgroup_root_mutex); |
1058 | return 0; | 1076 | return 0; |
1059 | } | 1077 | } |
1060 | 1078 | ||
@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1269 | 1287 | ||
1270 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1288 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
1271 | mutex_lock(&cgroup_mutex); | 1289 | mutex_lock(&cgroup_mutex); |
1290 | mutex_lock(&cgroup_root_mutex); | ||
1272 | 1291 | ||
1273 | /* See what subsystems are wanted */ | 1292 | /* See what subsystems are wanted */ |
1274 | ret = parse_cgroupfs_options(data, &opts); | 1293 | ret = parse_cgroupfs_options(data, &opts); |
@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1297 | out_unlock: | 1316 | out_unlock: |
1298 | kfree(opts.release_agent); | 1317 | kfree(opts.release_agent); |
1299 | kfree(opts.name); | 1318 | kfree(opts.name); |
1319 | mutex_unlock(&cgroup_root_mutex); | ||
1300 | mutex_unlock(&cgroup_mutex); | 1320 | mutex_unlock(&cgroup_mutex); |
1301 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1321 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1302 | return ret; | 1322 | return ret; |
@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1481 | int ret = 0; | 1501 | int ret = 0; |
1482 | struct super_block *sb; | 1502 | struct super_block *sb; |
1483 | struct cgroupfs_root *new_root; | 1503 | struct cgroupfs_root *new_root; |
1504 | struct inode *inode; | ||
1484 | 1505 | ||
1485 | /* First find the desired set of subsystems */ | 1506 | /* First find the desired set of subsystems */ |
1486 | mutex_lock(&cgroup_mutex); | 1507 | mutex_lock(&cgroup_mutex); |
@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1514 | /* We used the new root structure, so this is a new hierarchy */ | 1535 | /* We used the new root structure, so this is a new hierarchy */ |
1515 | struct list_head tmp_cg_links; | 1536 | struct list_head tmp_cg_links; |
1516 | struct cgroup *root_cgrp = &root->top_cgroup; | 1537 | struct cgroup *root_cgrp = &root->top_cgroup; |
1517 | struct inode *inode; | ||
1518 | struct cgroupfs_root *existing_root; | 1538 | struct cgroupfs_root *existing_root; |
1519 | const struct cred *cred; | 1539 | const struct cred *cred; |
1520 | int i; | 1540 | int i; |
@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1528 | 1548 | ||
1529 | mutex_lock(&inode->i_mutex); | 1549 | mutex_lock(&inode->i_mutex); |
1530 | mutex_lock(&cgroup_mutex); | 1550 | mutex_lock(&cgroup_mutex); |
1551 | mutex_lock(&cgroup_root_mutex); | ||
1531 | 1552 | ||
1532 | if (strlen(root->name)) { | 1553 | /* Check for name clashes with existing mounts */ |
1533 | /* Check for name clashes with existing mounts */ | 1554 | ret = -EBUSY; |
1534 | for_each_active_root(existing_root) { | 1555 | if (strlen(root->name)) |
1535 | if (!strcmp(existing_root->name, root->name)) { | 1556 | for_each_active_root(existing_root) |
1536 | ret = -EBUSY; | 1557 | if (!strcmp(existing_root->name, root->name)) |
1537 | mutex_unlock(&cgroup_mutex); | 1558 | goto unlock_drop; |
1538 | mutex_unlock(&inode->i_mutex); | ||
1539 | goto drop_new_super; | ||
1540 | } | ||
1541 | } | ||
1542 | } | ||
1543 | 1559 | ||
1544 | /* | 1560 | /* |
1545 | * We're accessing css_set_count without locking | 1561 | * We're accessing css_set_count without locking |
@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1549 | * have some link structures left over | 1565 | * have some link structures left over |
1550 | */ | 1566 | */ |
1551 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); | 1567 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); |
1552 | if (ret) { | 1568 | if (ret) |
1553 | mutex_unlock(&cgroup_mutex); | 1569 | goto unlock_drop; |
1554 | mutex_unlock(&inode->i_mutex); | ||
1555 | goto drop_new_super; | ||
1556 | } | ||
1557 | 1570 | ||
1558 | ret = rebind_subsystems(root, root->subsys_bits); | 1571 | ret = rebind_subsystems(root, root->subsys_bits); |
1559 | if (ret == -EBUSY) { | 1572 | if (ret == -EBUSY) { |
1560 | mutex_unlock(&cgroup_mutex); | ||
1561 | mutex_unlock(&inode->i_mutex); | ||
1562 | free_cg_links(&tmp_cg_links); | 1573 | free_cg_links(&tmp_cg_links); |
1563 | goto drop_new_super; | 1574 | goto unlock_drop; |
1564 | } | 1575 | } |
1565 | /* | 1576 | /* |
1566 | * There must be no failure case after here, since rebinding | 1577 | * There must be no failure case after here, since rebinding |
@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1599 | cred = override_creds(&init_cred); | 1610 | cred = override_creds(&init_cred); |
1600 | cgroup_populate_dir(root_cgrp); | 1611 | cgroup_populate_dir(root_cgrp); |
1601 | revert_creds(cred); | 1612 | revert_creds(cred); |
1613 | mutex_unlock(&cgroup_root_mutex); | ||
1602 | mutex_unlock(&cgroup_mutex); | 1614 | mutex_unlock(&cgroup_mutex); |
1603 | mutex_unlock(&inode->i_mutex); | 1615 | mutex_unlock(&inode->i_mutex); |
1604 | } else { | 1616 | } else { |
@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1615 | kfree(opts.name); | 1627 | kfree(opts.name); |
1616 | return dget(sb->s_root); | 1628 | return dget(sb->s_root); |
1617 | 1629 | ||
1630 | unlock_drop: | ||
1631 | mutex_unlock(&cgroup_root_mutex); | ||
1632 | mutex_unlock(&cgroup_mutex); | ||
1633 | mutex_unlock(&inode->i_mutex); | ||
1618 | drop_new_super: | 1634 | drop_new_super: |
1619 | deactivate_locked_super(sb); | 1635 | deactivate_locked_super(sb); |
1620 | drop_modules: | 1636 | drop_modules: |
@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1639 | BUG_ON(!list_empty(&cgrp->sibling)); | 1655 | BUG_ON(!list_empty(&cgrp->sibling)); |
1640 | 1656 | ||
1641 | mutex_lock(&cgroup_mutex); | 1657 | mutex_lock(&cgroup_mutex); |
1658 | mutex_lock(&cgroup_root_mutex); | ||
1642 | 1659 | ||
1643 | /* Rebind all subsystems back to the default hierarchy */ | 1660 | /* Rebind all subsystems back to the default hierarchy */ |
1644 | ret = rebind_subsystems(root, 0); | 1661 | ret = rebind_subsystems(root, 0); |
@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1664 | root_count--; | 1681 | root_count--; |
1665 | } | 1682 | } |
1666 | 1683 | ||
1684 | mutex_unlock(&cgroup_root_mutex); | ||
1667 | mutex_unlock(&cgroup_mutex); | 1685 | mutex_unlock(&cgroup_mutex); |
1668 | 1686 | ||
1669 | kill_litter_super(sb); | 1687 | kill_litter_super(sb); |
@@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
2311 | return -EINVAL; | 2329 | return -EINVAL; |
2312 | if (!cgroup_lock_live_group(cgrp)) | 2330 | if (!cgroup_lock_live_group(cgrp)) |
2313 | return -ENODEV; | 2331 | return -ENODEV; |
2332 | mutex_lock(&cgroup_root_mutex); | ||
2314 | strcpy(cgrp->root->release_agent_path, buffer); | 2333 | strcpy(cgrp->root->release_agent_path, buffer); |
2334 | mutex_unlock(&cgroup_root_mutex); | ||
2315 | cgroup_unlock(); | 2335 | cgroup_unlock(); |
2316 | return 0; | 2336 | return 0; |
2317 | } | 2337 | } |