aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-07-10 14:38:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-07-10 14:38:23 -0400
commit40f6123737d45b94ae0e4c89252a695ba6794e59 (patch)
tree274f29b1f10c079230c0464f2531a765b2fe9505 /kernel
parenta805cbf4c4d9e220e7512e92013e7996b834e78c (diff)
parent76bb5ab8f6e3e7bebdcefec4146ff305e7d0b465 (diff)
Merge branch 'for-3.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Mostly fixes for the fallouts from the recent cgroup core changes. The decoupled nature of cgroup dynamic hierarchy management (hierarchies are created dynamically on mount but may or may not be reused once unmounted depending on remaining usages) led to more ugliness being added to kernfs. Hopefully, this is the last of it" * 'for-3.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cpuset: break kernfs active protection in cpuset_write_resmask() cgroup: fix a race between cgroup_mount() and cgroup_kill_sb() kernfs: introduce kernfs_pin_sb() cgroup: fix mount failure in a corner case cpuset,mempolicy: fix sleeping function called from invalid context cgroup: fix broken css_has_online_children()
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c58
-rw-r--r--kernel/cpuset.c20
2 files changed, 69 insertions, 9 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..70776aec2562 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1648 int flags, const char *unused_dev_name, 1648 int flags, const char *unused_dev_name,
1649 void *data) 1649 void *data)
1650{ 1650{
1651 struct super_block *pinned_sb = NULL;
1652 struct cgroup_subsys *ss;
1651 struct cgroup_root *root; 1653 struct cgroup_root *root;
1652 struct cgroup_sb_opts opts; 1654 struct cgroup_sb_opts opts;
1653 struct dentry *dentry; 1655 struct dentry *dentry;
1654 int ret; 1656 int ret;
1657 int i;
1655 bool new_sb; 1658 bool new_sb;
1656 1659
1657 /* 1660 /*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1677 goto out_unlock; 1680 goto out_unlock;
1678 } 1681 }
1679 1682
1683 /*
1684 * Destruction of cgroup root is asynchronous, so subsystems may
1685 * still be dying after the previous unmount. Let's drain the
1686 * dying subsystems. We just need to ensure that the ones
1687 * unmounted previously finish dying and don't care about new ones
1688 * starting. Testing ref liveliness is good enough.
1689 */
1690 for_each_subsys(ss, i) {
1691 if (!(opts.subsys_mask & (1 << i)) ||
1692 ss->root == &cgrp_dfl_root)
1693 continue;
1694
1695 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
1696 mutex_unlock(&cgroup_mutex);
1697 msleep(10);
1698 ret = restart_syscall();
1699 goto out_free;
1700 }
1701 cgroup_put(&ss->root->cgrp);
1702 }
1703
1680 for_each_root(root) { 1704 for_each_root(root) {
1681 bool name_match = false; 1705 bool name_match = false;
1682 1706
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1717 } 1741 }
1718 1742
1719 /* 1743 /*
1720 * A root's lifetime is governed by its root cgroup. 1744 * We want to reuse @root whose lifetime is governed by its
1721 * tryget_live failure indicate that the root is being 1745 * ->cgrp. Let's check whether @root is alive and keep it
1722 * destroyed. Wait for destruction to complete so that the 1746 * that way. As cgroup_kill_sb() can happen anytime, we
1723 * subsystems are free. We can use wait_queue for the wait 1747 * want to block it by pinning the sb so that @root doesn't
1724 * but this path is super cold. Let's just sleep for a bit 1748 * get killed before mount is complete.
1725 * and retry. 1749 *
1750 * With the sb pinned, tryget_live can reliably indicate
1751 * whether @root can be reused. If it's being killed,
1752 * drain it. We can use wait_queue for the wait but this
1753 * path is super cold. Let's just sleep a bit and retry.
1726 */ 1754 */
1727 if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { 1755 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1756 if (IS_ERR(pinned_sb) ||
1757 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1728 mutex_unlock(&cgroup_mutex); 1758 mutex_unlock(&cgroup_mutex);
1759 if (!IS_ERR_OR_NULL(pinned_sb))
1760 deactivate_super(pinned_sb);
1729 msleep(10); 1761 msleep(10);
1730 ret = restart_syscall(); 1762 ret = restart_syscall();
1731 goto out_free; 1763 goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
1770 CGROUP_SUPER_MAGIC, &new_sb); 1802 CGROUP_SUPER_MAGIC, &new_sb);
1771 if (IS_ERR(dentry) || !new_sb) 1803 if (IS_ERR(dentry) || !new_sb)
1772 cgroup_put(&root->cgrp); 1804 cgroup_put(&root->cgrp);
1805
1806 /*
1807 * If @pinned_sb, we're reusing an existing root and holding an
1808 * extra ref on its sb. Mount is complete. Put the extra ref.
1809 */
1810 if (pinned_sb) {
1811 WARN_ON(new_sb);
1812 deactivate_super(pinned_sb);
1813 }
1814
1773 return dentry; 1815 return dentry;
1774} 1816}
1775 1817
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
3328 3370
3329 rcu_read_lock(); 3371 rcu_read_lock();
3330 css_for_each_child(child, css) { 3372 css_for_each_child(child, css) {
3331 if (css->flags & CSS_ONLINE) { 3373 if (child->flags & CSS_ONLINE) {
3332 ret = true; 3374 ret = true;
3333 break; 3375 break;
3334 } 3376 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..116a4164720a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1181,7 +1181,13 @@ done:
1181 1181
1182int current_cpuset_is_being_rebound(void) 1182int current_cpuset_is_being_rebound(void)
1183{ 1183{
1184 return task_cs(current) == cpuset_being_rebound; 1184 int ret;
1185
1186 rcu_read_lock();
1187 ret = task_cs(current) == cpuset_being_rebound;
1188 rcu_read_unlock();
1189
1190 return ret;
1185} 1191}
1186 1192
1187static int update_relax_domain_level(struct cpuset *cs, s64 val) 1193static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1617 * resources, wait for the previously scheduled operations before 1623 * resources, wait for the previously scheduled operations before
1618 * proceeding, so that we don't end up keep removing tasks added 1624 * proceeding, so that we don't end up keep removing tasks added
1619 * after execution capability is restored. 1625 * after execution capability is restored.
1626 *
1627 * cpuset_hotplug_work calls back into cgroup core via
1628 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
1629 * operation like this one can lead to a deadlock through kernfs
1630 * active_ref protection. Let's break the protection. Losing the
1631 * protection is okay as we check whether @cs is online after
1632 * grabbing cpuset_mutex anyway. This only happens on the legacy
1633 * hierarchies.
1620 */ 1634 */
1635 css_get(&cs->css);
1636 kernfs_break_active_protection(of->kn);
1621 flush_work(&cpuset_hotplug_work); 1637 flush_work(&cpuset_hotplug_work);
1622 1638
1623 mutex_lock(&cpuset_mutex); 1639 mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1645 free_trial_cpuset(trialcs); 1661 free_trial_cpuset(trialcs);
1646out_unlock: 1662out_unlock:
1647 mutex_unlock(&cpuset_mutex); 1663 mutex_unlock(&cpuset_mutex);
1664 kernfs_unbreak_active_protection(of->kn);
1665 css_put(&cs->css);
1648 return retval ?: nbytes; 1666 return retval ?: nbytes;
1649} 1667}
1650 1668