aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-08-22 16:56:52 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-23 04:32:40 -0400
commitf41d911f8c49a5d65c86504c19e8204bb605c4fd (patch)
tree59bcd3048652ef290b3e19d2904409afd5c90eb3 /kernel/rcutree_plugin.h
parenta157229cabd6dd8cfa82525fc9bf730c94cc9ac2 (diff)
rcu: Merge preemptable-RCU functionality into hierarchical RCU
Create a kernel/rcutree_plugin.h file that contains definitions for preemptable RCU (or, under the #else branch of the #ifdef, empty definitions for the classic non-preemptable semantics). These definitions fit into plugins defined in kernel/rcutree.c for this purpose. This variant of preemptable RCU uses a new algorithm whose read-side expense is roughly that of classic hierarchical RCU under CONFIG_PREEMPT. This new algorithm's update-side expense is similar to that of classic hierarchical RCU, and, in absence of read-side preemption or blocking, is exactly that of classic hierarchical RCU. Perhaps more important, this new algorithm has a much simpler implementation, saving well over 1,000 lines of code compared to mainline's implementation of preemptable RCU, which will hopefully be retired in favor of this new algorithm. The simplifications are obtained by maintaining per-task nesting state for running tasks, and using a simple lock-protected algorithm to handle accounting when tasks block within RCU read-side critical sections, making use of lessons learned while creating numerous user-level RCU implementations over the past 18 months. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746134003-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h447
1 files changed, 447 insertions, 0 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
new file mode 100644
index 000000000000..cd2ab67400c6
--- /dev/null
+++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,447 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009
22 *
23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */
26
27
28#ifdef CONFIG_TREE_PREEMPT_RCU
29
30struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
31DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
32
33/*
34 * Tell them what RCU they are running.
35 */
36static inline void rcu_bootup_announce(void)
37{
38 printk(KERN_INFO
39 "Experimental preemptable hierarchical RCU implementation.\n");
40}
41
42/*
43 * Return the number of RCU-preempt batches processed thus far
44 * for debug and statistics.
45 */
46long rcu_batches_completed_preempt(void)
47{
48 return rcu_preempt_state.completed;
49}
50EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
51
52/*
53 * Return the number of RCU batches processed thus far for debug & stats.
54 */
55long rcu_batches_completed(void)
56{
57 return rcu_batches_completed_preempt();
58}
59EXPORT_SYMBOL_GPL(rcu_batches_completed);
60
61/*
62 * Record a preemptable-RCU quiescent state for the specified CPU. Note
63 * that this just means that the task currently running on the CPU is
64 * not in a quiescent state. There might be any number of tasks blocked
65 * while in an RCU read-side critical section.
66 */
67static void rcu_preempt_qs_record(int cpu)
68{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc = 1;
71 rdp->passed_quiesc_completed = rdp->completed;
72}
73
74/*
75 * We have entered the scheduler or are between softirqs in ksoftirqd.
76 * If we are in an RCU read-side critical section, we need to reflect
77 * that in the state of the rcu_node structure corresponding to this CPU.
78 * Caller must disable hardirqs.
79 */
80static void rcu_preempt_qs(int cpu)
81{
82 struct task_struct *t = current;
83 int phase;
84 struct rcu_data *rdp;
85 struct rcu_node *rnp;
86
87 if (t->rcu_read_lock_nesting &&
88 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
89
90 /* Possibly blocking in an RCU read-side critical section. */
91 rdp = rcu_preempt_state.rda[cpu];
92 rnp = rdp->mynode;
93 spin_lock(&rnp->lock);
94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
95 t->rcu_blocked_cpu = cpu;
96
97 /*
98 * If this CPU has already checked in, then this task
99 * will hold up the next grace period rather than the
100 * current grace period. Queue the task accordingly.
101 * If the task is queued for the current grace period
102 * (i.e., this CPU has not yet passed through a quiescent
103 * state for the current grace period), then as long
104 * as that task remains queued, the current grace period
105 * cannot end.
106 */
107 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
108 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
109 smp_mb(); /* Ensure later ctxt swtch seen after above. */
110 spin_unlock(&rnp->lock);
111 }
112
113 /*
114 * Either we were not in an RCU read-side critical section to
115 * begin with, or we have now recorded that critical section
116 * globally. Either way, we can now note a quiescent state
117 * for this CPU. Again, if we were in an RCU read-side critical
118 * section, and if that critical section was blocking the current
119 * grace period, then the fact that the task has been enqueued
120 * means that we continue to block the current grace period.
121 */
122 rcu_preempt_qs_record(cpu);
123 t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
124 RCU_READ_UNLOCK_GOT_QS);
125}
126
127/*
128 * Tree-preemptable RCU implementation for rcu_read_lock().
129 * Just increment ->rcu_read_lock_nesting, shared state will be updated
130 * if we block.
131 */
132void __rcu_read_lock(void)
133{
134 ACCESS_ONCE(current->rcu_read_lock_nesting)++;
135 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
136}
137EXPORT_SYMBOL_GPL(__rcu_read_lock);
138
139static void rcu_read_unlock_special(struct task_struct *t)
140{
141 int empty;
142 unsigned long flags;
143 unsigned long mask;
144 struct rcu_node *rnp;
145 int special;
146
147 /* NMI handlers cannot block and cannot safely manipulate state. */
148 if (in_nmi())
149 return;
150
151 local_irq_save(flags);
152
153 /*
154 * If RCU core is waiting for this CPU to exit critical section,
155 * let it know that we have done so.
156 */
157 special = t->rcu_read_unlock_special;
158 if (special & RCU_READ_UNLOCK_NEED_QS) {
159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
161 }
162
163 /* Hardware IRQ handlers cannot block. */
164 if (in_irq()) {
165 local_irq_restore(flags);
166 return;
167 }
168
169 /* Clean up if blocked during RCU read-side critical section. */
170 if (special & RCU_READ_UNLOCK_BLOCKED) {
171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
172
173 /* Remove this task from the list it blocked on. */
174 rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
175 spin_lock(&rnp->lock);
176 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
177 list_del_init(&t->rcu_node_entry);
178 t->rcu_blocked_cpu = -1;
179
180 /*
181 * If this was the last task on the current list, and if
182 * we aren't waiting on any CPUs, report the quiescent state.
183 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
184 * drop rnp->lock and restore irq.
185 */
186 if (!empty && rnp->qsmask == 0 &&
187 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
188 t->rcu_read_unlock_special &=
189 ~(RCU_READ_UNLOCK_NEED_QS |
190 RCU_READ_UNLOCK_GOT_QS);
191 if (rnp->parent == NULL) {
192 /* Only one rcu_node in the tree. */
193 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
194 return;
195 }
196 /* Report up the rest of the hierarchy. */
197 mask = rnp->grpmask;
198 spin_unlock_irqrestore(&rnp->lock, flags);
199 rnp = rnp->parent;
200 spin_lock_irqsave(&rnp->lock, flags);
201 cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
202 return;
203 }
204 spin_unlock(&rnp->lock);
205 }
206 local_irq_restore(flags);
207}
208
209/*
210 * Tree-preemptable RCU implementation for rcu_read_unlock().
211 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
212 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
213 * invoke rcu_read_unlock_special() to clean up after a context switch
214 * in an RCU read-side critical section and other special cases.
215 */
216void __rcu_read_unlock(void)
217{
218 struct task_struct *t = current;
219
220 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
221 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
222 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
223 rcu_read_unlock_special(t);
224}
225EXPORT_SYMBOL_GPL(__rcu_read_unlock);
226
227#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
228
229/*
230 * Scan the current list of tasks blocked within RCU read-side critical
231 * sections, printing out the tid of each.
232 */
233static void rcu_print_task_stall(struct rcu_node *rnp)
234{
235 unsigned long flags;
236 struct list_head *lp;
237 int phase = rnp->gpnum & 0x1;
238 struct task_struct *t;
239
240 if (!list_empty(&rnp->blocked_tasks[phase])) {
241 spin_lock_irqsave(&rnp->lock, flags);
242 phase = rnp->gpnum & 0x1; /* re-read under lock. */
243 lp = &rnp->blocked_tasks[phase];
244 list_for_each_entry(t, lp, rcu_node_entry)
245 printk(" P%d", t->pid);
246 spin_unlock_irqrestore(&rnp->lock, flags);
247 }
248}
249
250#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
251
252/*
253 * Check for preempted RCU readers for the specified rcu_node structure.
254 * If the caller needs a reliable answer, it must hold the rcu_node's
255 * >lock.
256 */
257static int rcu_preempted_readers(struct rcu_node *rnp)
258{
259 return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
260}
261
262/*
263 * Check for a quiescent state from the current CPU. When a task blocks,
264 * the task is recorded in the corresponding CPU's rcu_node structure,
265 * which is checked elsewhere.
266 *
267 * Caller must disable hard irqs.
268 */
269static void rcu_preempt_check_callbacks(int cpu)
270{
271 struct task_struct *t = current;
272
273 if (t->rcu_read_lock_nesting == 0) {
274 t->rcu_read_unlock_special &=
275 ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
276 rcu_preempt_qs_record(cpu);
277 return;
278 }
279 if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
280 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
281 rcu_preempt_qs_record(cpu);
282 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
283 } else if (!(t->rcu_read_unlock_special &
284 RCU_READ_UNLOCK_NEED_QS)) {
285 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
286 }
287 }
288}
289
290/*
291 * Process callbacks for preemptable RCU.
292 */
293static void rcu_preempt_process_callbacks(void)
294{
295 __rcu_process_callbacks(&rcu_preempt_state,
296 &__get_cpu_var(rcu_preempt_data));
297}
298
299/*
300 * Queue a preemptable-RCU callback for invocation after a grace period.
301 */
302void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
303{
304 __call_rcu(head, func, &rcu_preempt_state);
305}
306EXPORT_SYMBOL_GPL(call_rcu);
307
308/*
309 * Check to see if there is any immediate preemptable-RCU-related work
310 * to be done.
311 */
312static int rcu_preempt_pending(int cpu)
313{
314 return __rcu_pending(&rcu_preempt_state,
315 &per_cpu(rcu_preempt_data, cpu));
316}
317
318/*
319 * Does preemptable RCU need the CPU to stay out of dynticks mode?
320 */
321static int rcu_preempt_needs_cpu(int cpu)
322{
323 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
324}
325
326/*
327 * Initialize preemptable RCU's per-CPU data.
328 */
329static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
330{
331 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
332}
333
334/*
335 * Check for a task exiting while in a preemptable-RCU read-side
336 * critical section, clean up if so. No need to issue warnings,
337 * as debug_check_no_locks_held() already does this if lockdep
338 * is enabled.
339 */
340void exit_rcu(void)
341{
342 struct task_struct *t = current;
343
344 if (t->rcu_read_lock_nesting == 0)
345 return;
346 t->rcu_read_lock_nesting = 1;
347 rcu_read_unlock();
348}
349
350#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
351
352/*
353 * Tell them what RCU they are running.
354 */
355static inline void rcu_bootup_announce(void)
356{
357 printk(KERN_INFO "Hierarchical RCU implementation.\n");
358}
359
360/*
361 * Return the number of RCU batches processed thus far for debug & stats.
362 */
363long rcu_batches_completed(void)
364{
365 return rcu_batches_completed_sched();
366}
367EXPORT_SYMBOL_GPL(rcu_batches_completed);
368
369/*
370 * Because preemptable RCU does not exist, we never have to check for
371 * CPUs being in quiescent states.
372 */
373static void rcu_preempt_qs(int cpu)
374{
375}
376
377#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
378
379/*
380 * Because preemptable RCU does not exist, we never have to check for
381 * tasks blocked within RCU read-side critical sections.
382 */
383static void rcu_print_task_stall(struct rcu_node *rnp)
384{
385}
386
387#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
388
389/*
390 * Because preemptable RCU does not exist, there are never any preempted
391 * RCU readers.
392 */
393static int rcu_preempted_readers(struct rcu_node *rnp)
394{
395 return 0;
396}
397
398/*
399 * Because preemptable RCU does not exist, it never has any callbacks
400 * to check.
401 */
402void rcu_preempt_check_callbacks(int cpu)
403{
404}
405
406/*
407 * Because preemptable RCU does not exist, it never has any callbacks
408 * to process.
409 */
410void rcu_preempt_process_callbacks(void)
411{
412}
413
414/*
415 * In classic RCU, call_rcu() is just call_rcu_sched().
416 */
417void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
418{
419 call_rcu_sched(head, func);
420}
421EXPORT_SYMBOL_GPL(call_rcu);
422
423/*
424 * Because preemptable RCU does not exist, it never has any work to do.
425 */
426static int rcu_preempt_pending(int cpu)
427{
428 return 0;
429}
430
431/*
432 * Because preemptable RCU does not exist, it never needs any CPU.
433 */
434static int rcu_preempt_needs_cpu(int cpu)
435{
436 return 0;
437}
438
439/*
440 * Because preemptable RCU does not exist, there is no per-CPU
441 * data to initialize.
442 */
443static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
444{
445}
446
447#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */