diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-04-01 20:37:01 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-05-10 14:08:33 -0400 |
commit | 25502a6c13745f4650cc59322bd198194f55e796 (patch) | |
tree | d76cc659d3ea797c5da4630e219ac363d17c44a6 | |
parent | 99652b54de1ee094236f7171485214071af4ef31 (diff) |
rcu: refactor RCU's context-switch handling
The addition of preemptible RCU to treercu resulted in a bit of
confusion and inefficiency surrounding the handling of context switches
for RCU-sched and for RCU-preempt. For RCU-sched, a context switch
is a quiescent state, pure and simple, just like it always has been.
For RCU-preempt, a context switch is in no way a quiescent state, but
special handling is required when a task blocks in an RCU read-side
critical section.
However, the callout from the scheduler and the outer loop in ksoftirqd
still calls something named rcu_sched_qs(), whose name is no longer
accurate. Furthermore, when rcu_check_callbacks() notes an RCU-sched
quiescent state, it ends up unnecessarily (though harmlessly, aside
from the performance hit) enqueuing the current task if it happens to
be running in an RCU-preempt read-side critical section. This not only
increases the maximum latency of scheduler_tick(), it also needlessly
increases the overhead of the next outermost rcu_read_unlock() invocation.
This patch addresses this situation by separating the notion of RCU's
context-switch handling from that of RCU-sched's quiescent states.
The context-switch handling is covered by rcu_note_context_switch() in
general and by rcu_preempt_note_context_switch() for preemptible RCU.
This permits rcu_sched_qs() to handle quiescent states and only quiescent
states. It also reduces the maximum latency of scheduler_tick(), though
probably by much less than a microsecond. Finally, it means that tasks
within preemptible-RCU read-side critical sections avoid incurring the
overhead of queuing unless there really is a context switch.
Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
-rw-r--r-- | include/linux/rcutiny.h | 4 | ||||
-rw-r--r-- | include/linux/rcutree.h | 1 | ||||
-rw-r--r-- | kernel/rcutree.c | 17 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 11 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 2 |
6 files changed, 26 insertions, 11 deletions
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index bbeb55b7709b..ff22b97fb979 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
@@ -29,6 +29,10 @@ | |||
29 | 29 | ||
30 | void rcu_sched_qs(int cpu); | 30 | void rcu_sched_qs(int cpu); |
31 | void rcu_bh_qs(int cpu); | 31 | void rcu_bh_qs(int cpu); |
32 | static inline void rcu_note_context_switch(int cpu) | ||
33 | { | ||
34 | rcu_sched_qs(cpu); | ||
35 | } | ||
32 | 36 | ||
33 | #define __rcu_read_lock() preempt_disable() | 37 | #define __rcu_read_lock() preempt_disable() |
34 | #define __rcu_read_unlock() preempt_enable() | 38 | #define __rcu_read_unlock() preempt_enable() |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 7484fe66a3aa..b9f74606f320 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
@@ -34,6 +34,7 @@ struct notifier_block; | |||
34 | 34 | ||
35 | extern void rcu_sched_qs(int cpu); | 35 | extern void rcu_sched_qs(int cpu); |
36 | extern void rcu_bh_qs(int cpu); | 36 | extern void rcu_bh_qs(int cpu); |
37 | extern void rcu_note_context_switch(int cpu); | ||
37 | extern int rcu_needs_cpu(int cpu); | 38 | extern int rcu_needs_cpu(int cpu); |
38 | extern int rcu_expedited_torture_stats(char *page); | 39 | extern int rcu_expedited_torture_stats(char *page); |
39 | 40 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 86bb9499aae6..e33631354b69 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -97,25 +97,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
97 | */ | 97 | */ |
98 | void rcu_sched_qs(int cpu) | 98 | void rcu_sched_qs(int cpu) |
99 | { | 99 | { |
100 | struct rcu_data *rdp; | 100 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
101 | 101 | ||
102 | rdp = &per_cpu(rcu_sched_data, cpu); | ||
103 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 102 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
104 | barrier(); | 103 | barrier(); |
105 | rdp->passed_quiesc = 1; | 104 | rdp->passed_quiesc = 1; |
106 | rcu_preempt_note_context_switch(cpu); | ||
107 | } | 105 | } |
108 | 106 | ||
109 | void rcu_bh_qs(int cpu) | 107 | void rcu_bh_qs(int cpu) |
110 | { | 108 | { |
111 | struct rcu_data *rdp; | 109 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
112 | 110 | ||
113 | rdp = &per_cpu(rcu_bh_data, cpu); | ||
114 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 111 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
115 | barrier(); | 112 | barrier(); |
116 | rdp->passed_quiesc = 1; | 113 | rdp->passed_quiesc = 1; |
117 | } | 114 | } |
118 | 115 | ||
116 | /* | ||
117 | * Note a context switch. This is a quiescent state for RCU-sched, | ||
118 | * and requires special handling for preemptible RCU. | ||
119 | */ | ||
120 | void rcu_note_context_switch(int cpu) | ||
121 | { | ||
122 | rcu_sched_qs(cpu); | ||
123 | rcu_preempt_note_context_switch(cpu); | ||
124 | } | ||
125 | |||
119 | #ifdef CONFIG_NO_HZ | 126 | #ifdef CONFIG_NO_HZ |
120 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 127 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
121 | .dynticks_nesting = 1, | 128 | .dynticks_nesting = 1, |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 687c4e90722e..f9bc83a047da 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -75,13 +75,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |||
75 | * that this just means that the task currently running on the CPU is | 75 | * that this just means that the task currently running on the CPU is |
76 | * not in a quiescent state. There might be any number of tasks blocked | 76 | * not in a quiescent state. There might be any number of tasks blocked |
77 | * while in an RCU read-side critical section. | 77 | * while in an RCU read-side critical section. |
78 | * | ||
79 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
80 | * must disable irqs in order to protect the assignment to | ||
81 | * ->rcu_read_unlock_special. | ||
78 | */ | 82 | */ |
79 | static void rcu_preempt_qs(int cpu) | 83 | static void rcu_preempt_qs(int cpu) |
80 | { | 84 | { |
81 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 85 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
86 | |||
82 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 87 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
83 | barrier(); | 88 | barrier(); |
84 | rdp->passed_quiesc = 1; | 89 | rdp->passed_quiesc = 1; |
90 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
85 | } | 91 | } |
86 | 92 | ||
87 | /* | 93 | /* |
@@ -144,9 +150,8 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
144 | * grace period, then the fact that the task has been enqueued | 150 | * grace period, then the fact that the task has been enqueued |
145 | * means that we continue to block the current grace period. | 151 | * means that we continue to block the current grace period. |
146 | */ | 152 | */ |
147 | rcu_preempt_qs(cpu); | ||
148 | local_irq_save(flags); | 153 | local_irq_save(flags); |
149 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 154 | rcu_preempt_qs(cpu); |
150 | local_irq_restore(flags); | 155 | local_irq_restore(flags); |
151 | } | 156 | } |
152 | 157 | ||
@@ -236,7 +241,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
236 | */ | 241 | */ |
237 | special = t->rcu_read_unlock_special; | 242 | special = t->rcu_read_unlock_special; |
238 | if (special & RCU_READ_UNLOCK_NEED_QS) { | 243 | if (special & RCU_READ_UNLOCK_NEED_QS) { |
239 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
240 | rcu_preempt_qs(smp_processor_id()); | 244 | rcu_preempt_qs(smp_processor_id()); |
241 | } | 245 | } |
242 | 246 | ||
@@ -473,7 +477,6 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
473 | struct task_struct *t = current; | 477 | struct task_struct *t = current; |
474 | 478 | ||
475 | if (t->rcu_read_lock_nesting == 0) { | 479 | if (t->rcu_read_lock_nesting == 0) { |
476 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
477 | rcu_preempt_qs(cpu); | 480 | rcu_preempt_qs(cpu); |
478 | return; | 481 | return; |
479 | } | 482 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 3c2a54f70ffe..d8a213ccdc3b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3706,7 +3706,7 @@ need_resched: | |||
3706 | preempt_disable(); | 3706 | preempt_disable(); |
3707 | cpu = smp_processor_id(); | 3707 | cpu = smp_processor_id(); |
3708 | rq = cpu_rq(cpu); | 3708 | rq = cpu_rq(cpu); |
3709 | rcu_sched_qs(cpu); | 3709 | rcu_note_context_switch(cpu); |
3710 | prev = rq->curr; | 3710 | prev = rq->curr; |
3711 | switch_count = &prev->nivcsw; | 3711 | switch_count = &prev->nivcsw; |
3712 | 3712 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 7c1a67ef0274..0db913a5c60f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
716 | preempt_enable_no_resched(); | 716 | preempt_enable_no_resched(); |
717 | cond_resched(); | 717 | cond_resched(); |
718 | preempt_disable(); | 718 | preempt_disable(); |
719 | rcu_sched_qs((long)__bind_cpu); | 719 | rcu_note_context_switch((long)__bind_cpu); |
720 | } | 720 | } |
721 | preempt_enable(); | 721 | preempt_enable(); |
722 | set_current_state(TASK_INTERRUPTIBLE); | 722 | set_current_state(TASK_INTERRUPTIBLE); |