aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-09-13 12:15:10 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-17 18:06:33 -0400
commitc3422bea5f09b0e85704f51f2b01271630b8940b (patch)
treefc4a18241b0f7f5d71211f6787428197769cb9f5 /kernel
parentb0e165c035b13e1074fa0b555318bd9cb7102558 (diff)
rcu: Simplify rcu_read_unlock_special() quiescent-state accounting
The earlier approach required two scheduling-clock ticks to note an preemptable-RCU quiescent state in the situation in which the scheduling-clock interrupt is unlucky enough to always interrupt an RCU read-side critical section. With this change, the quiescent state is instead noted by the outermost rcu_read_unlock() immediately following the first scheduling-clock tick, or, alternatively, by the first subsequent context switch. Therefore, this change also speeds up grace periods. Suggested-by: Josh Triplett <josh@joshtriplett.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu LKML-Reference: <12528585111945-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c15
-rw-r--r--kernel/rcutree_plugin.h54
2 files changed, 32 insertions, 37 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e9a4ae94647f..6c99553e9f15 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
107 */ 107 */
108void rcu_sched_qs(int cpu) 108void rcu_sched_qs(int cpu)
109{ 109{
110 unsigned long flags;
111 struct rcu_data *rdp; 110 struct rcu_data *rdp;
112 111
113 local_irq_save(flags);
114 rdp = &per_cpu(rcu_sched_data, cpu); 112 rdp = &per_cpu(rcu_sched_data, cpu);
115 rdp->passed_quiesc = 1;
116 rdp->passed_quiesc_completed = rdp->completed; 113 rdp->passed_quiesc_completed = rdp->completed;
117 rcu_preempt_qs(cpu); 114 barrier();
118 local_irq_restore(flags); 115 rdp->passed_quiesc = 1;
116 rcu_preempt_note_context_switch(cpu);
119} 117}
120 118
121void rcu_bh_qs(int cpu) 119void rcu_bh_qs(int cpu)
122{ 120{
123 unsigned long flags;
124 struct rcu_data *rdp; 121 struct rcu_data *rdp;
125 122
126 local_irq_save(flags);
127 rdp = &per_cpu(rcu_bh_data, cpu); 123 rdp = &per_cpu(rcu_bh_data, cpu);
128 rdp->passed_quiesc = 1;
129 rdp->passed_quiesc_completed = rdp->completed; 124 rdp->passed_quiesc_completed = rdp->completed;
130 local_irq_restore(flags); 125 barrier();
126 rdp->passed_quiesc = 1;
131} 127}
132 128
133#ifdef CONFIG_NO_HZ 129#ifdef CONFIG_NO_HZ
@@ -615,6 +611,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
615 611
616 /* Advance to a new grace period and initialize state. */ 612 /* Advance to a new grace period and initialize state. */
617 rsp->gpnum++; 613 rsp->gpnum++;
614 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
618 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 615 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
619 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 616 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
620 record_gp_stall_check_time(rsp); 617 record_gp_stall_check_time(rsp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b8e4b0384f00..c9616e48379b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -64,34 +64,42 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
64 * not in a quiescent state. There might be any number of tasks blocked 64 * not in a quiescent state. There might be any number of tasks blocked
65 * while in an RCU read-side critical section. 65 * while in an RCU read-side critical section.
66 */ 66 */
67static void rcu_preempt_qs_record(int cpu) 67static void rcu_preempt_qs(int cpu)
68{ 68{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc = 1;
71 rdp->passed_quiesc_completed = rdp->completed; 70 rdp->passed_quiesc_completed = rdp->completed;
71 barrier();
72 rdp->passed_quiesc = 1;
72} 73}
73 74
74/* 75/*
75 * We have entered the scheduler or are between softirqs in ksoftirqd. 76 * We have entered the scheduler, and the current task might soon be
76 * If we are in an RCU read-side critical section, we need to reflect 77 * context-switched away from. If this task is in an RCU read-side
77 * that in the state of the rcu_node structure corresponding to this CPU. 78 * critical section, we will no longer be able to rely on the CPU to
78 * Caller must disable hardirqs. 79 * record that fact, so we enqueue the task on the appropriate entry
80 * of the blocked_tasks[] array. The task will dequeue itself when
81 * it exits the outermost enclosing RCU read-side critical section.
82 * Therefore, the current grace period cannot be permitted to complete
83 * until the blocked_tasks[] entry indexed by the low-order bit of
84 * rnp->gpnum empties.
85 *
86 * Caller must disable preemption.
79 */ 87 */
80static void rcu_preempt_qs(int cpu) 88static void rcu_preempt_note_context_switch(int cpu)
81{ 89{
82 struct task_struct *t = current; 90 struct task_struct *t = current;
91 unsigned long flags;
83 int phase; 92 int phase;
84 struct rcu_data *rdp; 93 struct rcu_data *rdp;
85 struct rcu_node *rnp; 94 struct rcu_node *rnp;
86 95
87 if (t->rcu_read_lock_nesting && 96 if (t->rcu_read_lock_nesting &&
88 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 97 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
89 WARN_ON_ONCE(cpu != smp_processor_id());
90 98
91 /* Possibly blocking in an RCU read-side critical section. */ 99 /* Possibly blocking in an RCU read-side critical section. */
92 rdp = rcu_preempt_state.rda[cpu]; 100 rdp = rcu_preempt_state.rda[cpu];
93 rnp = rdp->mynode; 101 rnp = rdp->mynode;
94 spin_lock(&rnp->lock); 102 spin_lock_irqsave(&rnp->lock, flags);
95 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 103 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
96 t->rcu_blocked_node = rnp; 104 t->rcu_blocked_node = rnp;
97 105
@@ -112,7 +120,7 @@ static void rcu_preempt_qs(int cpu)
112 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); 120 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
113 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 121 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
114 smp_mb(); /* Ensure later ctxt swtch seen after above. */ 122 smp_mb(); /* Ensure later ctxt swtch seen after above. */
115 spin_unlock(&rnp->lock); 123 spin_unlock_irqrestore(&rnp->lock, flags);
116 } 124 }
117 125
118 /* 126 /*
@@ -124,9 +132,8 @@ static void rcu_preempt_qs(int cpu)
124 * grace period, then the fact that the task has been enqueued 132 * grace period, then the fact that the task has been enqueued
125 * means that we continue to block the current grace period. 133 * means that we continue to block the current grace period.
126 */ 134 */
127 rcu_preempt_qs_record(cpu); 135 rcu_preempt_qs(cpu);
128 t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | 136 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
129 RCU_READ_UNLOCK_GOT_QS);
130} 137}
131 138
132/* 139/*
@@ -162,7 +169,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
162 special = t->rcu_read_unlock_special; 169 special = t->rcu_read_unlock_special;
163 if (special & RCU_READ_UNLOCK_NEED_QS) { 170 if (special & RCU_READ_UNLOCK_NEED_QS) {
164 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
165 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; 172 rcu_preempt_qs(smp_processor_id());
166 } 173 }
167 174
168 /* Hardware IRQ handlers cannot block. */ 175 /* Hardware IRQ handlers cannot block. */
@@ -199,9 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
199 */ 206 */
200 if (!empty && rnp->qsmask == 0 && 207 if (!empty && rnp->qsmask == 0 &&
201 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { 208 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
202 t->rcu_read_unlock_special &= 209 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
203 ~(RCU_READ_UNLOCK_NEED_QS |
204 RCU_READ_UNLOCK_GOT_QS);
205 if (rnp->parent == NULL) { 210 if (rnp->parent == NULL) {
206 /* Only one rcu_node in the tree. */ 211 /* Only one rcu_node in the tree. */
207 cpu_quiet_msk_finish(&rcu_preempt_state, flags); 212 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@@ -352,19 +357,12 @@ static void rcu_preempt_check_callbacks(int cpu)
352 struct task_struct *t = current; 357 struct task_struct *t = current;
353 358
354 if (t->rcu_read_lock_nesting == 0) { 359 if (t->rcu_read_lock_nesting == 0) {
355 t->rcu_read_unlock_special &= 360 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
356 ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); 361 rcu_preempt_qs(cpu);
357 rcu_preempt_qs_record(cpu);
358 return; 362 return;
359 } 363 }
360 if (per_cpu(rcu_preempt_data, cpu).qs_pending) { 364 if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
361 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { 365 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
362 rcu_preempt_qs_record(cpu);
363 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
364 } else if (!(t->rcu_read_unlock_special &
365 RCU_READ_UNLOCK_NEED_QS)) {
366 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
367 }
368 } 366 }
369} 367}
370 368
@@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
451 * Because preemptable RCU does not exist, we never have to check for 449 * Because preemptable RCU does not exist, we never have to check for
452 * CPUs being in quiescent states. 450 * CPUs being in quiescent states.
453 */ 451 */
454static void rcu_preempt_qs(int cpu) 452static void rcu_preempt_note_context_switch(int cpu)
455{ 453{
456} 454}
457 455