aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcuclassic.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2008-01-25 15:08:24 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:24 -0500
commit01c1c660f4b8086cad7a62345fd04290f3d82c8f (patch)
treebd09ab1fc3f9e267b1ea78f1a41b121175e1585a /kernel/rcuclassic.c
parentc2d727aa2ff17a1c8e5ed1e5e231bb8579b27e82 (diff)
Preempt-RCU: reorganize RCU code into rcuclassic.c and rcupdate.c
This patch re-organizes the RCU code to enable multiple implementations of RCU. Users of RCU continues to include rcupdate.h and the RCU interfaces remain the same. This is in preparation for subsequently merging the preemptible RCU implementation. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/rcuclassic.c')
-rw-r--r--kernel/rcuclassic.c576
1 files changed, 576 insertions, 0 deletions
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
new file mode 100644
index 000000000000..18369e3386e2
--- /dev/null
+++ b/kernel/rcuclassic.c
@@ -0,0 +1,576 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com>
22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers:
26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 *
29 * For detailed explanation of Read-Copy Update mechanism see -
30 * Documentation/RCU
31 *
32 */
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h>
40#include <linux/sched.h>
41#include <asm/atomic.h>
42#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h>
47#include <linux/notifier.h>
48/* #include <linux/rcupdate.h> @@@ */
49#include <linux/cpu.h>
50#include <linux/mutex.h>
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC
53static struct lock_class_key rcu_lock_key;
54struct lockdep_map rcu_lock_map =
55 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59
60/* Definition for rcupdate control block. */
61static struct rcu_ctrlblk rcu_ctrlblk = {
62 .cur = -300,
63 .completed = -300,
64 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
65 .cpumask = CPU_MASK_NONE,
66};
67static struct rcu_ctrlblk rcu_bh_ctrlblk = {
68 .cur = -300,
69 .completed = -300,
70 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
71 .cpumask = CPU_MASK_NONE,
72};
73
74DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
75DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
76
77static int blimit = 10;
78static int qhimark = 10000;
79static int qlowmark = 100;
80
81#ifdef CONFIG_SMP
82static void force_quiescent_state(struct rcu_data *rdp,
83 struct rcu_ctrlblk *rcp)
84{
85 int cpu;
86 cpumask_t cpumask;
87 set_need_resched();
88 if (unlikely(!rcp->signaled)) {
89 rcp->signaled = 1;
90 /*
91 * Don't send IPI to itself. With irqs disabled,
92 * rdp->cpu is the current cpu.
93 */
94 cpumask = rcp->cpumask;
95 cpu_clear(rdp->cpu, cpumask);
96 for_each_cpu_mask(cpu, cpumask)
97 smp_send_reschedule(cpu);
98 }
99}
100#else
101static inline void force_quiescent_state(struct rcu_data *rdp,
102 struct rcu_ctrlblk *rcp)
103{
104 set_need_resched();
105}
106#endif
107
108/**
109 * call_rcu - Queue an RCU callback for invocation after a grace period.
110 * @head: structure to be used for queueing the RCU updates.
111 * @func: actual update function to be invoked after the grace period
112 *
113 * The update function will be invoked some time after a full grace
114 * period elapses, in other words after all currently executing RCU
115 * read-side critical sections have completed. RCU read-side critical
116 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
117 * and may be nested.
118 */
119void call_rcu(struct rcu_head *head,
120 void (*func)(struct rcu_head *rcu))
121{
122 unsigned long flags;
123 struct rcu_data *rdp;
124
125 head->func = func;
126 head->next = NULL;
127 local_irq_save(flags);
128 rdp = &__get_cpu_var(rcu_data);
129 *rdp->nxttail = head;
130 rdp->nxttail = &head->next;
131 if (unlikely(++rdp->qlen > qhimark)) {
132 rdp->blimit = INT_MAX;
133 force_quiescent_state(rdp, &rcu_ctrlblk);
134 }
135 local_irq_restore(flags);
136}
137EXPORT_SYMBOL_GPL(call_rcu);
138
139/**
140 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
141 * @head: structure to be used for queueing the RCU updates.
142 * @func: actual update function to be invoked after the grace period
143 *
144 * The update function will be invoked some time after a full grace
145 * period elapses, in other words after all currently executing RCU
146 * read-side critical sections have completed. call_rcu_bh() assumes
147 * that the read-side critical sections end on completion of a softirq
148 * handler. This means that read-side critical sections in process
149 * context must not be interrupted by softirqs. This interface is to be
150 * used when most of the read-side critical sections are in softirq context.
151 * RCU read-side critical sections are delimited by rcu_read_lock() and
152 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
153 * and rcu_read_unlock_bh(), if in process context. These may be nested.
154 */
155void call_rcu_bh(struct rcu_head *head,
156 void (*func)(struct rcu_head *rcu))
157{
158 unsigned long flags;
159 struct rcu_data *rdp;
160
161 head->func = func;
162 head->next = NULL;
163 local_irq_save(flags);
164 rdp = &__get_cpu_var(rcu_bh_data);
165 *rdp->nxttail = head;
166 rdp->nxttail = &head->next;
167
168 if (unlikely(++rdp->qlen > qhimark)) {
169 rdp->blimit = INT_MAX;
170 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
171 }
172
173 local_irq_restore(flags);
174}
175EXPORT_SYMBOL_GPL(call_rcu_bh);
176
177/*
178 * Return the number of RCU batches processed thus far. Useful
179 * for debug and statistics.
180 */
181long rcu_batches_completed(void)
182{
183 return rcu_ctrlblk.completed;
184}
185EXPORT_SYMBOL_GPL(rcu_batches_completed);
186
187/*
188 * Return the number of RCU batches processed thus far. Useful
189 * for debug and statistics.
190 */
191long rcu_batches_completed_bh(void)
192{
193 return rcu_bh_ctrlblk.completed;
194}
195EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
196
197/* Raises the softirq for processing rcu_callbacks. */
198static inline void raise_rcu_softirq(void)
199{
200 raise_softirq(RCU_SOFTIRQ);
201 /*
202 * The smp_mb() here is required to ensure that this cpu's
203 * __rcu_process_callbacks() reads the most recently updated
204 * value of rcu->cur.
205 */
206 smp_mb();
207}
208
209/*
210 * Invoke the completed RCU callbacks. They are expected to be in
211 * a per-cpu list.
212 */
213static void rcu_do_batch(struct rcu_data *rdp)
214{
215 struct rcu_head *next, *list;
216 int count = 0;
217
218 list = rdp->donelist;
219 while (list) {
220 next = list->next;
221 prefetch(next);
222 list->func(list);
223 list = next;
224 if (++count >= rdp->blimit)
225 break;
226 }
227 rdp->donelist = list;
228
229 local_irq_disable();
230 rdp->qlen -= count;
231 local_irq_enable();
232 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
233 rdp->blimit = blimit;
234
235 if (!rdp->donelist)
236 rdp->donetail = &rdp->donelist;
237 else
238 raise_rcu_softirq();
239}
240
241/*
242 * Grace period handling:
243 * The grace period handling consists out of two steps:
244 * - A new grace period is started.
245 * This is done by rcu_start_batch. The start is not broadcasted to
246 * all cpus, they must pick this up by comparing rcp->cur with
247 * rdp->quiescbatch. All cpus are recorded in the
248 * rcu_ctrlblk.cpumask bitmap.
249 * - All cpus must go through a quiescent state.
250 * Since the start of the grace period is not broadcasted, at least two
251 * calls to rcu_check_quiescent_state are required:
252 * The first call just notices that a new grace period is running. The
253 * following calls check if there was a quiescent state since the beginning
254 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
255 * the bitmap is empty, then the grace period is completed.
256 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
257 * period (if necessary).
258 */
259/*
260 * Register a new batch of callbacks, and start it up if there is currently no
261 * active batch and the batch to be registered has not already occurred.
262 * Caller must hold rcu_ctrlblk.lock.
263 */
264static void rcu_start_batch(struct rcu_ctrlblk *rcp)
265{
266 if (rcp->next_pending &&
267 rcp->completed == rcp->cur) {
268 rcp->next_pending = 0;
269 /*
270 * next_pending == 0 must be visible in
271 * __rcu_process_callbacks() before it can see new value of cur.
272 */
273 smp_wmb();
274 rcp->cur++;
275
276 /*
277 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
278 * Barrier Otherwise it can cause tickless idle CPUs to be
279 * included in rcp->cpumask, which will extend graceperiods
280 * unnecessarily.
281 */
282 smp_mb();
283 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
284
285 rcp->signaled = 0;
286 }
287}
288
289/*
290 * cpu went through a quiescent state since the beginning of the grace period.
291 * Clear it from the cpu mask and complete the grace period if it was the last
292 * cpu. Start another grace period if someone has further entries pending
293 */
294static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
295{
296 cpu_clear(cpu, rcp->cpumask);
297 if (cpus_empty(rcp->cpumask)) {
298 /* batch completed ! */
299 rcp->completed = rcp->cur;
300 rcu_start_batch(rcp);
301 }
302}
303
304/*
305 * Check if the cpu has gone through a quiescent state (say context
306 * switch). If so and if it already hasn't done so in this RCU
307 * quiescent cycle, then indicate that it has done so.
308 */
309static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
310 struct rcu_data *rdp)
311{
312 if (rdp->quiescbatch != rcp->cur) {
313 /* start new grace period: */
314 rdp->qs_pending = 1;
315 rdp->passed_quiesc = 0;
316 rdp->quiescbatch = rcp->cur;
317 return;
318 }
319
320 /* Grace period already completed for this cpu?
321 * qs_pending is checked instead of the actual bitmap to avoid
322 * cacheline trashing.
323 */
324 if (!rdp->qs_pending)
325 return;
326
327 /*
328 * Was there a quiescent state since the beginning of the grace
329 * period? If no, then exit and wait for the next call.
330 */
331 if (!rdp->passed_quiesc)
332 return;
333 rdp->qs_pending = 0;
334
335 spin_lock(&rcp->lock);
336 /*
337 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
338 * during cpu startup. Ignore the quiescent state.
339 */
340 if (likely(rdp->quiescbatch == rcp->cur))
341 cpu_quiet(rdp->cpu, rcp);
342
343 spin_unlock(&rcp->lock);
344}
345
346
347#ifdef CONFIG_HOTPLUG_CPU
348
349/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
350 * locking requirements, the list it's pulling from has to belong to a cpu
351 * which is dead and hence not processing interrupts.
352 */
353static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
354 struct rcu_head **tail)
355{
356 local_irq_disable();
357 *this_rdp->nxttail = list;
358 if (list)
359 this_rdp->nxttail = tail;
360 local_irq_enable();
361}
362
363static void __rcu_offline_cpu(struct rcu_data *this_rdp,
364 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
365{
366 /* if the cpu going offline owns the grace period
367 * we can block indefinitely waiting for it, so flush
368 * it here
369 */
370 spin_lock_bh(&rcp->lock);
371 if (rcp->cur != rcp->completed)
372 cpu_quiet(rdp->cpu, rcp);
373 spin_unlock_bh(&rcp->lock);
374 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
375 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
376 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
377}
378
379static void rcu_offline_cpu(int cpu)
380{
381 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
382 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
383
384 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
385 &per_cpu(rcu_data, cpu));
386 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
387 &per_cpu(rcu_bh_data, cpu));
388 put_cpu_var(rcu_data);
389 put_cpu_var(rcu_bh_data);
390}
391
392#else
393
394static void rcu_offline_cpu(int cpu)
395{
396}
397
398#endif
399
400/*
401 * This does the RCU processing work from softirq context.
402 */
403static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
404 struct rcu_data *rdp)
405{
406 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
407 *rdp->donetail = rdp->curlist;
408 rdp->donetail = rdp->curtail;
409 rdp->curlist = NULL;
410 rdp->curtail = &rdp->curlist;
411 }
412
413 if (rdp->nxtlist && !rdp->curlist) {
414 local_irq_disable();
415 rdp->curlist = rdp->nxtlist;
416 rdp->curtail = rdp->nxttail;
417 rdp->nxtlist = NULL;
418 rdp->nxttail = &rdp->nxtlist;
419 local_irq_enable();
420
421 /*
422 * start the next batch of callbacks
423 */
424
425 /* determine batch number */
426 rdp->batch = rcp->cur + 1;
427 /* see the comment and corresponding wmb() in
428 * the rcu_start_batch()
429 */
430 smp_rmb();
431
432 if (!rcp->next_pending) {
433 /* and start it/schedule start if it's a new batch */
434 spin_lock(&rcp->lock);
435 rcp->next_pending = 1;
436 rcu_start_batch(rcp);
437 spin_unlock(&rcp->lock);
438 }
439 }
440
441 rcu_check_quiescent_state(rcp, rdp);
442 if (rdp->donelist)
443 rcu_do_batch(rdp);
444}
445
446static void rcu_process_callbacks(struct softirq_action *unused)
447{
448 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
449 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
450}
451
452static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
453{
454 /* This cpu has pending rcu entries and the grace period
455 * for them has completed.
456 */
457 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
458 return 1;
459
460 /* This cpu has no pending entries, but there are new entries */
461 if (!rdp->curlist && rdp->nxtlist)
462 return 1;
463
464 /* This cpu has finished callbacks to invoke */
465 if (rdp->donelist)
466 return 1;
467
468 /* The rcu core waits for a quiescent state from the cpu */
469 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
470 return 1;
471
472 /* nothing to do */
473 return 0;
474}
475
476/*
477 * Check to see if there is any immediate RCU-related work to be done
478 * by the current CPU, returning 1 if so. This function is part of the
479 * RCU implementation; it is -not- an exported member of the RCU API.
480 */
481int rcu_pending(int cpu)
482{
483 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
484 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
485}
486
487/*
488 * Check to see if any future RCU-related work will need to be done
489 * by the current CPU, even if none need be done immediately, returning
490 * 1 if so. This function is part of the RCU implementation; it is -not-
491 * an exported member of the RCU API.
492 */
493int rcu_needs_cpu(int cpu)
494{
495 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
496 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
497
498 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
499}
500
501void rcu_check_callbacks(int cpu, int user)
502{
503 if (user ||
504 (idle_cpu(cpu) && !in_softirq() &&
505 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
506 rcu_qsctr_inc(cpu);
507 rcu_bh_qsctr_inc(cpu);
508 } else if (!in_softirq())
509 rcu_bh_qsctr_inc(cpu);
510 raise_rcu_softirq();
511}
512
513static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
514 struct rcu_data *rdp)
515{
516 memset(rdp, 0, sizeof(*rdp));
517 rdp->curtail = &rdp->curlist;
518 rdp->nxttail = &rdp->nxtlist;
519 rdp->donetail = &rdp->donelist;
520 rdp->quiescbatch = rcp->completed;
521 rdp->qs_pending = 0;
522 rdp->cpu = cpu;
523 rdp->blimit = blimit;
524}
525
526static void __cpuinit rcu_online_cpu(int cpu)
527{
528 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
529 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
530
531 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
532 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
533 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
534}
535
536static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
537 unsigned long action, void *hcpu)
538{
539 long cpu = (long)hcpu;
540
541 switch (action) {
542 case CPU_UP_PREPARE:
543 case CPU_UP_PREPARE_FROZEN:
544 rcu_online_cpu(cpu);
545 break;
546 case CPU_DEAD:
547 case CPU_DEAD_FROZEN:
548 rcu_offline_cpu(cpu);
549 break;
550 default:
551 break;
552 }
553 return NOTIFY_OK;
554}
555
556static struct notifier_block __cpuinitdata rcu_nb = {
557 .notifier_call = rcu_cpu_notify,
558};
559
560/*
561 * Initializes rcu mechanism. Assumed to be called early.
562 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
563 * Note that rcu_qsctr and friends are implicitly
564 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
565 */
566void __init __rcu_init(void)
567{
568 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
569 (void *)(long)smp_processor_id());
570 /* Register notifier for non-boot CPUs */
571 register_cpu_notifier(&rcu_nb);
572}
573
574module_param(blimit, int, 0);
575module_param(qhimark, int, 0);
576module_param(qlowmark, int, 0);