aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-04-01 20:37:01 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-05-10 14:08:33 -0400
commit25502a6c13745f4650cc59322bd198194f55e796 (patch)
treed76cc659d3ea797c5da4630e219ac363d17c44a6 /kernel
parent99652b54de1ee094236f7171485214071af4ef31 (diff)
rcu: refactor RCU's context-switch handling
The addition of preemptible RCU to treercu resulted in a bit of confusion and inefficiency surrounding the handling of context switches for RCU-sched and for RCU-preempt. For RCU-sched, a context switch is a quiescent state, pure and simple, just like it always has been. For RCU-preempt, a context switch is in no way a quiescent state, but special handling is required when a task blocks in an RCU read-side critical section. However, the callout from the scheduler and the outer loop in ksoftirqd still calls something named rcu_sched_qs(), whose name is no longer accurate. Furthermore, when rcu_check_callbacks() notes an RCU-sched quiescent state, it ends up unnecessarily (though harmlessly, aside from the performance hit) enqueuing the current task if it happens to be running in an RCU-preempt read-side critical section. This not only increases the maximum latency of scheduler_tick(), it also needlessly increases the overhead of the next outermost rcu_read_unlock() invocation. This patch addresses this situation by separating the notion of RCU's context-switch handling from that of RCU-sched's quiescent states. The context-switch handling is covered by rcu_note_context_switch() in general and by rcu_preempt_note_context_switch() for preemptible RCU. This permits rcu_sched_qs() to handle quiescent states and only quiescent states. It also reduces the maximum latency of scheduler_tick(), though probably by much less than a microsecond. Finally, it means that tasks within preemptible-RCU read-side critical sections avoid incurring the overhead of queuing unless there really is a context switch. Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com> Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c17
-rw-r--r--kernel/rcutree_plugin.h11
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/softirq.c2
4 files changed, 21 insertions, 11 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 86bb9499aae6..e33631354b69 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -97,25 +97,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
97 */ 97 */
98void rcu_sched_qs(int cpu) 98void rcu_sched_qs(int cpu)
99{ 99{
100 struct rcu_data *rdp; 100 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
101 101
102 rdp = &per_cpu(rcu_sched_data, cpu);
103 rdp->passed_quiesc_completed = rdp->gpnum - 1; 102 rdp->passed_quiesc_completed = rdp->gpnum - 1;
104 barrier(); 103 barrier();
105 rdp->passed_quiesc = 1; 104 rdp->passed_quiesc = 1;
106 rcu_preempt_note_context_switch(cpu);
107} 105}
108 106
109void rcu_bh_qs(int cpu) 107void rcu_bh_qs(int cpu)
110{ 108{
111 struct rcu_data *rdp; 109 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
112 110
113 rdp = &per_cpu(rcu_bh_data, cpu);
114 rdp->passed_quiesc_completed = rdp->gpnum - 1; 111 rdp->passed_quiesc_completed = rdp->gpnum - 1;
115 barrier(); 112 barrier();
116 rdp->passed_quiesc = 1; 113 rdp->passed_quiesc = 1;
117} 114}
118 115
116/*
117 * Note a context switch. This is a quiescent state for RCU-sched,
118 * and requires special handling for preemptible RCU.
119 */
120void rcu_note_context_switch(int cpu)
121{
122 rcu_sched_qs(cpu);
123 rcu_preempt_note_context_switch(cpu);
124}
125
119#ifdef CONFIG_NO_HZ 126#ifdef CONFIG_NO_HZ
120DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 127DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
121 .dynticks_nesting = 1, 128 .dynticks_nesting = 1,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 687c4e90722e..f9bc83a047da 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -75,13 +75,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
75 * that this just means that the task currently running on the CPU is 75 * that this just means that the task currently running on the CPU is
76 * not in a quiescent state. There might be any number of tasks blocked 76 * not in a quiescent state. There might be any number of tasks blocked
77 * while in an RCU read-side critical section. 77 * while in an RCU read-side critical section.
78 *
79 * Unlike the other rcu_*_qs() functions, callers to this function
80 * must disable irqs in order to protect the assignment to
81 * ->rcu_read_unlock_special.
78 */ 82 */
79static void rcu_preempt_qs(int cpu) 83static void rcu_preempt_qs(int cpu)
80{ 84{
81 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 85 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
86
82 rdp->passed_quiesc_completed = rdp->gpnum - 1; 87 rdp->passed_quiesc_completed = rdp->gpnum - 1;
83 barrier(); 88 barrier();
84 rdp->passed_quiesc = 1; 89 rdp->passed_quiesc = 1;
90 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
85} 91}
86 92
87/* 93/*
@@ -144,9 +150,8 @@ static void rcu_preempt_note_context_switch(int cpu)
144 * grace period, then the fact that the task has been enqueued 150 * grace period, then the fact that the task has been enqueued
145 * means that we continue to block the current grace period. 151 * means that we continue to block the current grace period.
146 */ 152 */
147 rcu_preempt_qs(cpu);
148 local_irq_save(flags); 153 local_irq_save(flags);
149 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 154 rcu_preempt_qs(cpu);
150 local_irq_restore(flags); 155 local_irq_restore(flags);
151} 156}
152 157
@@ -236,7 +241,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
236 */ 241 */
237 special = t->rcu_read_unlock_special; 242 special = t->rcu_read_unlock_special;
238 if (special & RCU_READ_UNLOCK_NEED_QS) { 243 if (special & RCU_READ_UNLOCK_NEED_QS) {
239 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
240 rcu_preempt_qs(smp_processor_id()); 244 rcu_preempt_qs(smp_processor_id());
241 } 245 }
242 246
@@ -473,7 +477,6 @@ static void rcu_preempt_check_callbacks(int cpu)
473 struct task_struct *t = current; 477 struct task_struct *t = current;
474 478
475 if (t->rcu_read_lock_nesting == 0) { 479 if (t->rcu_read_lock_nesting == 0) {
476 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
477 rcu_preempt_qs(cpu); 480 rcu_preempt_qs(cpu);
478 return; 481 return;
479 } 482 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f70ffe..d8a213ccdc3b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3706,7 +3706,7 @@ need_resched:
3706 preempt_disable(); 3706 preempt_disable();
3707 cpu = smp_processor_id(); 3707 cpu = smp_processor_id();
3708 rq = cpu_rq(cpu); 3708 rq = cpu_rq(cpu);
3709 rcu_sched_qs(cpu); 3709 rcu_note_context_switch(cpu);
3710 prev = rq->curr; 3710 prev = rq->curr;
3711 switch_count = &prev->nivcsw; 3711 switch_count = &prev->nivcsw;
3712 3712
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7c1a67ef0274..0db913a5c60f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu)
716 preempt_enable_no_resched(); 716 preempt_enable_no_resched();
717 cond_resched(); 717 cond_resched();
718 preempt_disable(); 718 preempt_disable();
719 rcu_sched_qs((long)__bind_cpu); 719 rcu_note_context_switch((long)__bind_cpu);
720 } 720 }
721 preempt_enable(); 721 preempt_enable();
722 set_current_state(TASK_INTERRUPTIBLE); 722 set_current_state(TASK_INTERRUPTIBLE);