aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-07-14 15:24:11 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-07-20 00:38:52 -0400
commit7765be2fec0f476fcd61812d5f9406b04c765020 (patch)
treee89ccb3fbd5655090b05f608f8e1d39924f23a9a
parent131906b0062ddde7f85bbe67754983c754648bd8 (diff)
rcu: Fix RCU_BOOST race handling current->rcu_read_unlock_special
The RCU_BOOST commits for TREE_PREEMPT_RCU introduced an other-task write to a new RCU_READ_UNLOCK_BOOSTED bit in the task_struct structure's ->rcu_read_unlock_special field, but, as noted by Steven Rostedt, without correctly synchronizing all accesses to ->rcu_read_unlock_special. This could result in bits in ->rcu_read_unlock_special being spuriously set and cleared due to conflicting accesses, which in turn could result in deadlocks between the rcu_node structure's ->lock and the scheduler's rq and pi locks. These deadlocks would result from RCU incorrectly believing that the just-ended RCU read-side critical section had been preempted and/or boosted. If that RCU read-side critical section was executed with either rq or pi locks held, RCU's ensuing (incorrect) calls to the scheduler would cause the scheduler to attempt to once again acquire the rq and pi locks, resulting in deadlock. More complex deadlock cycles are also possible, involving multiple rq and pi locks as well as locks from multiple rcu_node structures. This commit fixes synchronization by creating ->rcu_boosted field in task_struct that is accessed and modified only when holding the ->lock in the rcu_node structure on which the task is queued (on that rcu_node structure's ->blkd_tasks list). This results in tasks accessing only their own current->rcu_read_unlock_special fields, making unsynchronized access once again legal, and keeping the rcu_read_unlock() fastpath free of atomic instructions and memory barriers. The reason that the rcu_read_unlock() fastpath does not need to access the new current->rcu_boosted field is that this new field cannot be non-zero unless the RCU_READ_UNLOCK_BLOCKED bit is set in the current->rcu_read_unlock_special field. Therefore, rcu_read_unlock() need only test current->rcu_read_unlock_special: if that is zero, then current->rcu_boosted must also be zero. This bug does not affect TINY_PREEMPT_RCU because this implementation of RCU accesses current->rcu_read_unlock_special with irqs disabled, thus preventing races on the !SMP systems that TINY_PREEMPT_RCU runs on. Maybe-reported-by: Dave Jones <davej@redhat.com> Maybe-reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/rcutree_plugin.h8
2 files changed, 9 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a96487..76676a407e4a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1254,6 +1254,9 @@ struct task_struct {
1254#ifdef CONFIG_PREEMPT_RCU 1254#ifdef CONFIG_PREEMPT_RCU
1255 int rcu_read_lock_nesting; 1255 int rcu_read_lock_nesting;
1256 char rcu_read_unlock_special; 1256 char rcu_read_unlock_special;
1257#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
1258 int rcu_boosted;
1259#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
1257 struct list_head rcu_node_entry; 1260 struct list_head rcu_node_entry;
1258#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1261#endif /* #ifdef CONFIG_PREEMPT_RCU */
1259#ifdef CONFIG_TREE_PREEMPT_RCU 1262#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 6abef3cfcbc1..3a0ae0355222 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -342,6 +342,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
342#ifdef CONFIG_RCU_BOOST 342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks) 343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np; 344 rnp->boost_tasks = np;
345 /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
346 if (t->rcu_boosted) {
347 special |= RCU_READ_UNLOCK_BOOSTED;
348 t->rcu_boosted = 0;
349 }
345#endif /* #ifdef CONFIG_RCU_BOOST */ 350#endif /* #ifdef CONFIG_RCU_BOOST */
346 t->rcu_blocked_node = NULL; 351 t->rcu_blocked_node = NULL;
347 352
@@ -358,7 +363,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
358#ifdef CONFIG_RCU_BOOST 363#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */ 364 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) { 365 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex); 366 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL; 367 t->rcu_boost_mutex = NULL;
364 } 368 }
@@ -1176,7 +1180,7 @@ static int rcu_boost(struct rcu_node *rnp)
1176 t = container_of(tb, struct task_struct, rcu_node_entry); 1180 t = container_of(tb, struct task_struct, rcu_node_entry);
1177 rt_mutex_init_proxy_locked(&mtx, t); 1181 rt_mutex_init_proxy_locked(&mtx, t);
1178 t->rcu_boost_mutex = &mtx; 1182 t->rcu_boost_mutex = &mtx;
1179 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; 1183 t->rcu_boosted = 1;
1180 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1184 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1181 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1185 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1182 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1186 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */