diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-09-21 17:41:37 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-12-11 13:31:21 -0500 |
commit | 389abd48efe1ceacb141b2fd151263b1bc432dbc (patch) | |
tree | f61a46a05c11b3d6ecd10b6beb73824bd5e88388 /kernel/rcutree_plugin.h | |
parent | af446b702c58b700cc5fa99f6edc78b99e55b995 (diff) |
rcu: Avoid RCU-preempt expedited grace-period botch
Because rcu_read_unlock_special() samples rcu_preempted_readers_exp(rnp)
after dropping rnp->lock, the following sequence of events is possible:
1. Task A exits its RCU read-side critical section, and removes
itself from the ->blkd_tasks list, releases rnp->lock, and is
then preempted. Task B remains on the ->blkd_tasks list, and
blocks the current expedited grace period.
2. Task B exits from its RCU read-side critical section and removes
itself from the ->blkd_tasks list. Because it is the last task
blocking the current expedited grace period, it ends that
expedited grace period.
3. Task A resumes, and samples rcu_preempted_readers_exp(rnp) which
of course indicates that nothing is blocking the nonexistent
expedited grace period. Task A is again preempted.
4. Some other CPU starts an expedited grace period. There are several
tasks blocking this expedited grace period queued on the
same rcu_node structure that Task A was using in step 1 above.
5. Task A examines its state and incorrectly concludes that it was
the last task blocking the expedited grace period on the current
rcu_node structure. It therefore reports completion up the
rcu_node tree.
6. The expedited grace period can then incorrectly complete before
the tasks blocked on this same rcu_node structure exit their
RCU read-side critical sections. Arbitrarily bad things happen.
This commit therefore takes a snapshot of rcu_preempted_readers_exp(rnp)
prior to dropping the lock, so that only the last task thinks that it is
the last task, thus avoiding the failure scenario laid out above.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4b9b9f8a4184..798605317161 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -312,6 +312,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
312 | { | 312 | { |
313 | int empty; | 313 | int empty; |
314 | int empty_exp; | 314 | int empty_exp; |
315 | int empty_exp_now; | ||
315 | unsigned long flags; | 316 | unsigned long flags; |
316 | struct list_head *np; | 317 | struct list_head *np; |
317 | #ifdef CONFIG_RCU_BOOST | 318 | #ifdef CONFIG_RCU_BOOST |
@@ -382,8 +383,10 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
382 | /* | 383 | /* |
383 | * If this was the last task on the current list, and if | 384 | * If this was the last task on the current list, and if |
384 | * we aren't waiting on any CPUs, report the quiescent state. | 385 | * we aren't waiting on any CPUs, report the quiescent state. |
385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 386 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
387 | * so we must take a snapshot of the expedited state. | ||
386 | */ | 388 | */ |
389 | empty_exp_now = !rcu_preempted_readers_exp(rnp); | ||
387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | 390 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
388 | trace_rcu_quiescent_state_report("preempt_rcu", | 391 | trace_rcu_quiescent_state_report("preempt_rcu", |
389 | rnp->gpnum, | 392 | rnp->gpnum, |
@@ -406,7 +409,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
406 | * If this was the last task on the expedited lists, | 409 | * If this was the last task on the expedited lists, |
407 | * then we need to report up the rcu_node hierarchy. | 410 | * then we need to report up the rcu_node hierarchy. |
408 | */ | 411 | */ |
409 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | 412 | if (!empty_exp && empty_exp_now) |
410 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | 413 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); |
411 | } else { | 414 | } else { |
412 | local_irq_restore(flags); | 415 | local_irq_restore(flags); |