diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-09-13 12:15:10 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-17 18:06:33 -0400 |
commit | c3422bea5f09b0e85704f51f2b01271630b8940b (patch) | |
tree | fc4a18241b0f7f5d71211f6787428197769cb9f5 /kernel | |
parent | b0e165c035b13e1074fa0b555318bd9cb7102558 (diff) |
rcu: Simplify rcu_read_unlock_special() quiescent-state accounting
The earlier approach required two scheduling-clock ticks to note an
preemptable-RCU quiescent state in the situation in which the
scheduling-clock interrupt is unlucky enough to always interrupt an
RCU read-side critical section.
With this change, the quiescent state is instead noted by the
outermost rcu_read_unlock() immediately following the first
scheduling-clock tick, or, alternatively, by the first subsequent
context switch. Therefore, this change also speeds up grace
periods.
Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111945-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 15 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 54 |
2 files changed, 32 insertions, 37 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e9a4ae94647f..6c99553e9f15 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, | |||
107 | */ | 107 | */ |
108 | void rcu_sched_qs(int cpu) | 108 | void rcu_sched_qs(int cpu) |
109 | { | 109 | { |
110 | unsigned long flags; | ||
111 | struct rcu_data *rdp; | 110 | struct rcu_data *rdp; |
112 | 111 | ||
113 | local_irq_save(flags); | ||
114 | rdp = &per_cpu(rcu_sched_data, cpu); | 112 | rdp = &per_cpu(rcu_sched_data, cpu); |
115 | rdp->passed_quiesc = 1; | ||
116 | rdp->passed_quiesc_completed = rdp->completed; | 113 | rdp->passed_quiesc_completed = rdp->completed; |
117 | rcu_preempt_qs(cpu); | 114 | barrier(); |
118 | local_irq_restore(flags); | 115 | rdp->passed_quiesc = 1; |
116 | rcu_preempt_note_context_switch(cpu); | ||
119 | } | 117 | } |
120 | 118 | ||
121 | void rcu_bh_qs(int cpu) | 119 | void rcu_bh_qs(int cpu) |
122 | { | 120 | { |
123 | unsigned long flags; | ||
124 | struct rcu_data *rdp; | 121 | struct rcu_data *rdp; |
125 | 122 | ||
126 | local_irq_save(flags); | ||
127 | rdp = &per_cpu(rcu_bh_data, cpu); | 123 | rdp = &per_cpu(rcu_bh_data, cpu); |
128 | rdp->passed_quiesc = 1; | ||
129 | rdp->passed_quiesc_completed = rdp->completed; | 124 | rdp->passed_quiesc_completed = rdp->completed; |
130 | local_irq_restore(flags); | 125 | barrier(); |
126 | rdp->passed_quiesc = 1; | ||
131 | } | 127 | } |
132 | 128 | ||
133 | #ifdef CONFIG_NO_HZ | 129 | #ifdef CONFIG_NO_HZ |
@@ -615,6 +611,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
615 | 611 | ||
616 | /* Advance to a new grace period and initialize state. */ | 612 | /* Advance to a new grace period and initialize state. */ |
617 | rsp->gpnum++; | 613 | rsp->gpnum++; |
614 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | ||
618 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 615 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
619 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 616 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
620 | record_gp_stall_check_time(rsp); | 617 | record_gp_stall_check_time(rsp); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index b8e4b0384f00..c9616e48379b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -64,34 +64,42 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); | |||
64 | * not in a quiescent state. There might be any number of tasks blocked | 64 | * not in a quiescent state. There might be any number of tasks blocked |
65 | * while in an RCU read-side critical section. | 65 | * while in an RCU read-side critical section. |
66 | */ | 66 | */ |
67 | static void rcu_preempt_qs_record(int cpu) | 67 | static void rcu_preempt_qs(int cpu) |
68 | { | 68 | { |
69 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 69 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
70 | rdp->passed_quiesc = 1; | ||
71 | rdp->passed_quiesc_completed = rdp->completed; | 70 | rdp->passed_quiesc_completed = rdp->completed; |
71 | barrier(); | ||
72 | rdp->passed_quiesc = 1; | ||
72 | } | 73 | } |
73 | 74 | ||
74 | /* | 75 | /* |
75 | * We have entered the scheduler or are between softirqs in ksoftirqd. | 76 | * We have entered the scheduler, and the current task might soon be |
76 | * If we are in an RCU read-side critical section, we need to reflect | 77 | * context-switched away from. If this task is in an RCU read-side |
77 | * that in the state of the rcu_node structure corresponding to this CPU. | 78 | * critical section, we will no longer be able to rely on the CPU to |
78 | * Caller must disable hardirqs. | 79 | * record that fact, so we enqueue the task on the appropriate entry |
80 | * of the blocked_tasks[] array. The task will dequeue itself when | ||
81 | * it exits the outermost enclosing RCU read-side critical section. | ||
82 | * Therefore, the current grace period cannot be permitted to complete | ||
83 | * until the blocked_tasks[] entry indexed by the low-order bit of | ||
84 | * rnp->gpnum empties. | ||
85 | * | ||
86 | * Caller must disable preemption. | ||
79 | */ | 87 | */ |
80 | static void rcu_preempt_qs(int cpu) | 88 | static void rcu_preempt_note_context_switch(int cpu) |
81 | { | 89 | { |
82 | struct task_struct *t = current; | 90 | struct task_struct *t = current; |
91 | unsigned long flags; | ||
83 | int phase; | 92 | int phase; |
84 | struct rcu_data *rdp; | 93 | struct rcu_data *rdp; |
85 | struct rcu_node *rnp; | 94 | struct rcu_node *rnp; |
86 | 95 | ||
87 | if (t->rcu_read_lock_nesting && | 96 | if (t->rcu_read_lock_nesting && |
88 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 97 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
89 | WARN_ON_ONCE(cpu != smp_processor_id()); | ||
90 | 98 | ||
91 | /* Possibly blocking in an RCU read-side critical section. */ | 99 | /* Possibly blocking in an RCU read-side critical section. */ |
92 | rdp = rcu_preempt_state.rda[cpu]; | 100 | rdp = rcu_preempt_state.rda[cpu]; |
93 | rnp = rdp->mynode; | 101 | rnp = rdp->mynode; |
94 | spin_lock(&rnp->lock); | 102 | spin_lock_irqsave(&rnp->lock, flags); |
95 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 103 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
96 | t->rcu_blocked_node = rnp; | 104 | t->rcu_blocked_node = rnp; |
97 | 105 | ||
@@ -112,7 +120,7 @@ static void rcu_preempt_qs(int cpu) | |||
112 | phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); | 120 | phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); |
113 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); | 121 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); |
114 | smp_mb(); /* Ensure later ctxt swtch seen after above. */ | 122 | smp_mb(); /* Ensure later ctxt swtch seen after above. */ |
115 | spin_unlock(&rnp->lock); | 123 | spin_unlock_irqrestore(&rnp->lock, flags); |
116 | } | 124 | } |
117 | 125 | ||
118 | /* | 126 | /* |
@@ -124,9 +132,8 @@ static void rcu_preempt_qs(int cpu) | |||
124 | * grace period, then the fact that the task has been enqueued | 132 | * grace period, then the fact that the task has been enqueued |
125 | * means that we continue to block the current grace period. | 133 | * means that we continue to block the current grace period. |
126 | */ | 134 | */ |
127 | rcu_preempt_qs_record(cpu); | 135 | rcu_preempt_qs(cpu); |
128 | t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | | 136 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
129 | RCU_READ_UNLOCK_GOT_QS); | ||
130 | } | 137 | } |
131 | 138 | ||
132 | /* | 139 | /* |
@@ -162,7 +169,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
162 | special = t->rcu_read_unlock_special; | 169 | special = t->rcu_read_unlock_special; |
163 | if (special & RCU_READ_UNLOCK_NEED_QS) { | 170 | if (special & RCU_READ_UNLOCK_NEED_QS) { |
164 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 171 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
165 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; | 172 | rcu_preempt_qs(smp_processor_id()); |
166 | } | 173 | } |
167 | 174 | ||
168 | /* Hardware IRQ handlers cannot block. */ | 175 | /* Hardware IRQ handlers cannot block. */ |
@@ -199,9 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
199 | */ | 206 | */ |
200 | if (!empty && rnp->qsmask == 0 && | 207 | if (!empty && rnp->qsmask == 0 && |
201 | list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { | 208 | list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { |
202 | t->rcu_read_unlock_special &= | 209 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
203 | ~(RCU_READ_UNLOCK_NEED_QS | | ||
204 | RCU_READ_UNLOCK_GOT_QS); | ||
205 | if (rnp->parent == NULL) { | 210 | if (rnp->parent == NULL) { |
206 | /* Only one rcu_node in the tree. */ | 211 | /* Only one rcu_node in the tree. */ |
207 | cpu_quiet_msk_finish(&rcu_preempt_state, flags); | 212 | cpu_quiet_msk_finish(&rcu_preempt_state, flags); |
@@ -352,19 +357,12 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
352 | struct task_struct *t = current; | 357 | struct task_struct *t = current; |
353 | 358 | ||
354 | if (t->rcu_read_lock_nesting == 0) { | 359 | if (t->rcu_read_lock_nesting == 0) { |
355 | t->rcu_read_unlock_special &= | 360 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
356 | ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); | 361 | rcu_preempt_qs(cpu); |
357 | rcu_preempt_qs_record(cpu); | ||
358 | return; | 362 | return; |
359 | } | 363 | } |
360 | if (per_cpu(rcu_preempt_data, cpu).qs_pending) { | 364 | if (per_cpu(rcu_preempt_data, cpu).qs_pending) { |
361 | if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { | 365 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
362 | rcu_preempt_qs_record(cpu); | ||
363 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS; | ||
364 | } else if (!(t->rcu_read_unlock_special & | ||
365 | RCU_READ_UNLOCK_NEED_QS)) { | ||
366 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | ||
367 | } | ||
368 | } | 366 | } |
369 | } | 367 | } |
370 | 368 | ||
@@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); | |||
451 | * Because preemptable RCU does not exist, we never have to check for | 449 | * Because preemptable RCU does not exist, we never have to check for |
452 | * CPUs being in quiescent states. | 450 | * CPUs being in quiescent states. |
453 | */ | 451 | */ |
454 | static void rcu_preempt_qs(int cpu) | 452 | static void rcu_preempt_note_context_switch(int cpu) |
455 | { | 453 | { |
456 | } | 454 | } |
457 | 455 | ||