aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-07-23 14:36:08 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-23 16:03:05 -0400
commitabb5a5cc6bba1516403146c5b79036fe843beb70 (patch)
treef7a1a4cc9381a1fc38be60cfe9428d9c7bd0aabe /kernel/cpuset.c
parentaa95387774039096c11803c04011f1aa42d85758 (diff)
[PATCH] Cpuset: fix ABBA deadlock with cpu hotplug lock
Fix ABBA deadlock between lock_cpu_hotplug() and the cpuset callback_mutex lock. It only happens on cpu_exclusive cpusets, due to the dynamic sched domain code trying to take the cpu hotplug lock inside the cpuset callback_mutex lock. This bug has apparently been here for several months, but didn't get hit until the right customer load on a large system. This fix appears right from inspection, but it will take a few more days running it on that customers workload to be confident we nailed it. We don't have any other reproducible test case. The cpu_hotplug_lock() tends to cover large runs of code. The other places that hold both that lock and the cpuset callback mutex lock always nest the cpuset lock inside the hotplug lock. This place tries to do the reverse, risking an ABBA deadlock. This is in the cpuset_rmdir() code, where we: * take the callback_mutex lock * mark the cpuset CS_REMOVED * call update_cpu_domains for cpu_exclusive cpusets * in that call, take the cpu_hotplug lock if the cpuset is marked for removal. Thanks to Jack Steiner for identifying this deadlock. The fix is to tear down the dynamic sched domain before we grab the cpuset callback_mutex lock. This way, the two locks are serialized, with the hotplug lock taken and released before trying for the cpuset lock. I suspect that this bug was introduced when I changed the cpuset locking from one lock to two. The dynamic sched domain dependency on cpu_exclusive cpusets and its hotplug hooks were added to this code earlier, when cpusets had only a single lock. It may well have been fine then. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c232dc077438..1a649f2bb9bb 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -762,6 +762,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
762 * 762 *
763 * Call with manage_mutex held. May nest a call to the 763 * Call with manage_mutex held. May nest a call to the
764 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. 764 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
765 * Must not be called holding callback_mutex, because we must
766 * not call lock_cpu_hotplug() while holding callback_mutex.
765 */ 767 */
766 768
767static void update_cpu_domains(struct cpuset *cur) 769static void update_cpu_domains(struct cpuset *cur)
@@ -781,7 +783,7 @@ static void update_cpu_domains(struct cpuset *cur)
781 if (is_cpu_exclusive(c)) 783 if (is_cpu_exclusive(c))
782 cpus_andnot(pspan, pspan, c->cpus_allowed); 784 cpus_andnot(pspan, pspan, c->cpus_allowed);
783 } 785 }
784 if (is_removed(cur) || !is_cpu_exclusive(cur)) { 786 if (!is_cpu_exclusive(cur)) {
785 cpus_or(pspan, pspan, cur->cpus_allowed); 787 cpus_or(pspan, pspan, cur->cpus_allowed);
786 if (cpus_equal(pspan, cur->cpus_allowed)) 788 if (cpus_equal(pspan, cur->cpus_allowed))
787 return; 789 return;
@@ -1917,6 +1919,17 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1917 return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); 1919 return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
1918} 1920}
1919 1921
1922/*
1923 * Locking note on the strange update_flag() call below:
1924 *
1925 * If the cpuset being removed is marked cpu_exclusive, then simulate
1926 * turning cpu_exclusive off, which will call update_cpu_domains().
1927 * The lock_cpu_hotplug() call in update_cpu_domains() must not be
1928 * made while holding callback_mutex. Elsewhere the kernel nests
1929 * callback_mutex inside lock_cpu_hotplug() calls. So the reverse
1930 * nesting would risk an ABBA deadlock.
1931 */
1932
1920static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) 1933static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1921{ 1934{
1922 struct cpuset *cs = dentry->d_fsdata; 1935 struct cpuset *cs = dentry->d_fsdata;
@@ -1936,11 +1949,16 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1936 mutex_unlock(&manage_mutex); 1949 mutex_unlock(&manage_mutex);
1937 return -EBUSY; 1950 return -EBUSY;
1938 } 1951 }
1952 if (is_cpu_exclusive(cs)) {
1953 int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
1954 if (retval < 0) {
1955 mutex_unlock(&manage_mutex);
1956 return retval;
1957 }
1958 }
1939 parent = cs->parent; 1959 parent = cs->parent;
1940 mutex_lock(&callback_mutex); 1960 mutex_lock(&callback_mutex);
1941 set_bit(CS_REMOVED, &cs->flags); 1961 set_bit(CS_REMOVED, &cs->flags);
1942 if (is_cpu_exclusive(cs))
1943 update_cpu_domains(cs);
1944 list_del(&cs->sibling); /* delete my sibling from parent->children */ 1962 list_del(&cs->sibling); /* delete my sibling from parent->children */
1945 spin_lock(&cs->dentry->d_lock); 1963 spin_lock(&cs->dentry->d_lock);
1946 d = dget(cs->dentry); 1964 d = dget(cs->dentry);