diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-04-01 20:37:01 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-05-10 14:08:33 -0400 |
commit | 25502a6c13745f4650cc59322bd198194f55e796 (patch) | |
tree | d76cc659d3ea797c5da4630e219ac363d17c44a6 /kernel/rcutree_plugin.h | |
parent | 99652b54de1ee094236f7171485214071af4ef31 (diff) |
rcu: refactor RCU's context-switch handling
The addition of preemptible RCU to treercu resulted in a bit of
confusion and inefficiency surrounding the handling of context switches
for RCU-sched and for RCU-preempt. For RCU-sched, a context switch
is a quiescent state, pure and simple, just like it always has been.
For RCU-preempt, a context switch is in no way a quiescent state, but
special handling is required when a task blocks in an RCU read-side
critical section.
However, the callout from the scheduler and the outer loop in ksoftirqd
still calls something named rcu_sched_qs(), whose name is no longer
accurate. Furthermore, when rcu_check_callbacks() notes an RCU-sched
quiescent state, it ends up unnecessarily (though harmlessly, aside
from the performance hit) enqueuing the current task if it happens to
be running in an RCU-preempt read-side critical section. This not only
increases the maximum latency of scheduler_tick(), it also needlessly
increases the overhead of the next outermost rcu_read_unlock() invocation.
This patch addresses this situation by separating the notion of RCU's
context-switch handling from that of RCU-sched's quiescent states.
The context-switch handling is covered by rcu_note_context_switch() in
general and by rcu_preempt_note_context_switch() for preemptible RCU.
This permits rcu_sched_qs() to handle quiescent states and only quiescent
states. It also reduces the maximum latency of scheduler_tick(), though
probably by much less than a microsecond. Finally, it means that tasks
within preemptible-RCU read-side critical sections avoid incurring the
overhead of queuing unless there really is a context switch.
Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 687c4e90722e..f9bc83a047da 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -75,13 +75,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |||
75 | * that this just means that the task currently running on the CPU is | 75 | * that this just means that the task currently running on the CPU is |
76 | * not in a quiescent state. There might be any number of tasks blocked | 76 | * not in a quiescent state. There might be any number of tasks blocked |
77 | * while in an RCU read-side critical section. | 77 | * while in an RCU read-side critical section. |
78 | * | ||
79 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
80 | * must disable irqs in order to protect the assignment to | ||
81 | * ->rcu_read_unlock_special. | ||
78 | */ | 82 | */ |
79 | static void rcu_preempt_qs(int cpu) | 83 | static void rcu_preempt_qs(int cpu) |
80 | { | 84 | { |
81 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 85 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
86 | |||
82 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 87 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
83 | barrier(); | 88 | barrier(); |
84 | rdp->passed_quiesc = 1; | 89 | rdp->passed_quiesc = 1; |
90 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
85 | } | 91 | } |
86 | 92 | ||
87 | /* | 93 | /* |
@@ -144,9 +150,8 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
144 | * grace period, then the fact that the task has been enqueued | 150 | * grace period, then the fact that the task has been enqueued |
145 | * means that we continue to block the current grace period. | 151 | * means that we continue to block the current grace period. |
146 | */ | 152 | */ |
147 | rcu_preempt_qs(cpu); | ||
148 | local_irq_save(flags); | 153 | local_irq_save(flags); |
149 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 154 | rcu_preempt_qs(cpu); |
150 | local_irq_restore(flags); | 155 | local_irq_restore(flags); |
151 | } | 156 | } |
152 | 157 | ||
@@ -236,7 +241,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
236 | */ | 241 | */ |
237 | special = t->rcu_read_unlock_special; | 242 | special = t->rcu_read_unlock_special; |
238 | if (special & RCU_READ_UNLOCK_NEED_QS) { | 243 | if (special & RCU_READ_UNLOCK_NEED_QS) { |
239 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
240 | rcu_preempt_qs(smp_processor_id()); | 244 | rcu_preempt_qs(smp_processor_id()); |
241 | } | 245 | } |
242 | 246 | ||
@@ -473,7 +477,6 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
473 | struct task_struct *t = current; | 477 | struct task_struct *t = current; |
474 | 478 | ||
475 | if (t->rcu_read_lock_nesting == 0) { | 479 | if (t->rcu_read_lock_nesting == 0) { |
476 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
477 | rcu_preempt_qs(cpu); | 480 | rcu_preempt_qs(cpu); |
478 | return; | 481 | return; |
479 | } | 482 | } |