aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutiny_plugin.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-01-11 17:40:20 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-02-21 12:03:39 -0500
commit26861faf896a4cfdc4243281e5c305755f4bad52 (patch)
treec93082f55b437b3b949b4bf0207d9be1a6fcd1fd /kernel/rcutiny_plugin.h
parentf38bd1020f797694b6b5e06f5f06c87688fc84c0 (diff)
rcu: Protect __rcu_read_unlock() against scheduler-using irq handlers
This commit ports commit #10f39bb1b2 (rcu: protect __rcu_read_unlock() against scheduler-using irq handlers) from TREE_PREEMPT_RCU to TINY_PREEMPT_RCU. The following is a corresponding port of that commit message. The addition of RCU read-side critical sections within runqueue and priority-inheritance critical sections introduced some deadlocks, for example, involving interrupts from __rcu_read_unlock() where the interrupt handlers call wake_up(). This situation can cause the instance of __rcu_read_unlock() invoked from interrupt to do some of the processing that would otherwise have been carried out by the task-level instance of __rcu_read_unlock(). When the interrupt-level instance of __rcu_read_unlock() is called with a scheduler lock held from interrupt-entry/exit situations where in_irq() returns false, deadlock can result. Of course, in a UP kernel, there are not really any deadlocks, but the upper-level critical section can still be be fatally confused by the lower-level critical section changing things out from under it. This commit resolves these deadlocks by using negative values of the per-task ->rcu_read_lock_nesting counter to indicate that an instance of __rcu_read_unlock() is in flight, which in turn prevents instances from interrupt handlers from doing any special processing. Note that nested rcu_read_lock()/rcu_read_unlock() pairs are still permitted, but they will never see ->rcu_read_lock_nesting go to zero, and will therefore never invoke rcu_read_unlock_special(), thus preventing them from seeing the RCU_READ_UNLOCK_BLOCKED bit should it be set in ->rcu_read_unlock_special. This patch also adds a check for ->rcu_read_unlock_special being negative in rcu_check_callbacks(), thus preventing the RCU_READ_UNLOCK_NEED_QS bit from being set should a scheduling-clock interrupt occur while __rcu_read_unlock() is exiting from an outermost RCU read-side critical section. Of course, __rcu_read_unlock() can be preempted during the time that ->rcu_read_lock_nesting is negative. This could result in the setting of the RCU_READ_UNLOCK_BLOCKED bit after __rcu_read_unlock() checks it, and would also result it this task being queued on the corresponding rcu_node structure's blkd_tasks list. Therefore, some later RCU read-side critical section would enter rcu_read_unlock_special() to clean up -- which could result in deadlock (OK, OK, fatal confusion) if that RCU read-side critical section happened to be in the scheduler where the runqueue or priority-inheritance locks were held. To prevent the possibility of fatal confusion that might result from preemption during the time that ->rcu_read_lock_nesting is negative, this commit also makes rcu_preempt_note_context_switch() check for negative ->rcu_read_lock_nesting, thus refraining from queuing the task (and from setting RCU_READ_UNLOCK_BLOCKED) if we are already exiting from the outermost RCU read-side critical section (in other words, we really are no longer actually in that RCU read-side critical section). In addition, rcu_preempt_note_context_switch() invokes rcu_read_unlock_special() to carry out the cleanup in this case, which clears out the ->rcu_read_unlock_special bits and dequeues the task (if necessary), in turn avoiding needless delay of the current RCU grace period and needless RCU priority boosting. It is still illegal to call rcu_read_unlock() while holding a scheduler lock if the prior RCU read-side critical section has ever had both preemption and irqs enabled. However, the common use case is legal, namely where then entire RCU read-side critical section executes with irqs disabled, for example, when the scheduler lock is held across the entire lifetime of the RCU read-side critical section. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutiny_plugin.h')
-rw-r--r--kernel/rcutiny_plugin.h43
1 files changed, 35 insertions, 8 deletions
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 4b905404a5bd..432ed2bc05ad 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,6 +132,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
132 RCU_TRACE(.rcb.name = "rcu_preempt") 132 RCU_TRACE(.rcb.name = "rcu_preempt")
133}; 133};
134 134
135static void rcu_read_unlock_special(struct task_struct *t);
135static int rcu_preempted_readers_exp(void); 136static int rcu_preempted_readers_exp(void);
136static void rcu_report_exp_done(void); 137static void rcu_report_exp_done(void);
137 138
@@ -146,6 +147,16 @@ static int rcu_cpu_blocking_cur_gp(void)
146/* 147/*
147 * Check for a running RCU reader. Because there is only one CPU, 148 * Check for a running RCU reader. Because there is only one CPU,
148 * there can be but one running RCU reader at a time. ;-) 149 * there can be but one running RCU reader at a time. ;-)
150 *
151 * Returns zero if there are no running readers. Returns a positive
152 * number if there is at least one reader within its RCU read-side
153 * critical section. Returns a negative number if an outermost reader
154 * is in the midst of exiting from its RCU read-side critical section
155 *
156 * Returns zero if there are no running readers. Returns a positive
157 * number if there is at least one reader within its RCU read-side
158 * critical section. Returns a negative number if an outermost reader
159 * is in the midst of exiting from its RCU read-side critical section.
149 */ 160 */
150static int rcu_preempt_running_reader(void) 161static int rcu_preempt_running_reader(void)
151{ 162{
@@ -475,7 +486,7 @@ void rcu_preempt_note_context_switch(void)
475 unsigned long flags; 486 unsigned long flags;
476 487
477 local_irq_save(flags); /* must exclude scheduler_tick(). */ 488 local_irq_save(flags); /* must exclude scheduler_tick(). */
478 if (rcu_preempt_running_reader() && 489 if (rcu_preempt_running_reader() > 0 &&
479 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 490 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
480 491
481 /* Possibly blocking in an RCU read-side critical section. */ 492 /* Possibly blocking in an RCU read-side critical section. */
@@ -494,6 +505,13 @@ void rcu_preempt_note_context_switch(void)
494 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); 505 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
495 if (rcu_cpu_blocking_cur_gp()) 506 if (rcu_cpu_blocking_cur_gp())
496 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; 507 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
508 } else if (rcu_preempt_running_reader() < 0 &&
509 t->rcu_read_unlock_special) {
510 /*
511 * Complete exit from RCU read-side critical section on
512 * behalf of preempted instance of __rcu_read_unlock().
513 */
514 rcu_read_unlock_special(t);
497 } 515 }
498 516
499 /* 517 /*
@@ -618,13 +636,22 @@ void __rcu_read_unlock(void)
618 struct task_struct *t = current; 636 struct task_struct *t = current;
619 637
620 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ 638 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
621 --t->rcu_read_lock_nesting; 639 if (t->rcu_read_lock_nesting != 1)
622 barrier(); /* decrement before load of ->rcu_read_unlock_special */ 640 --t->rcu_read_lock_nesting;
623 if (t->rcu_read_lock_nesting == 0 && 641 else {
624 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 642 t->rcu_read_lock_nesting = INT_MIN;
625 rcu_read_unlock_special(t); 643 barrier(); /* assign before ->rcu_read_unlock_special load */
644 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
645 rcu_read_unlock_special(t);
646 barrier(); /* ->rcu_read_unlock_special load before assign */
647 t->rcu_read_lock_nesting = 0;
648 }
626#ifdef CONFIG_PROVE_LOCKING 649#ifdef CONFIG_PROVE_LOCKING
627 WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); 650 {
651 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
652
653 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
654 }
628#endif /* #ifdef CONFIG_PROVE_LOCKING */ 655#endif /* #ifdef CONFIG_PROVE_LOCKING */
629} 656}
630EXPORT_SYMBOL_GPL(__rcu_read_unlock); 657EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -649,7 +676,7 @@ static void rcu_preempt_check_callbacks(void)
649 invoke_rcu_callbacks(); 676 invoke_rcu_callbacks();
650 if (rcu_preempt_gp_in_progress() && 677 if (rcu_preempt_gp_in_progress() &&
651 rcu_cpu_blocking_cur_gp() && 678 rcu_cpu_blocking_cur_gp() &&
652 rcu_preempt_running_reader()) 679 rcu_preempt_running_reader() > 0)
653 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 680 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
654} 681}
655 682