aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2008-01-25 15:08:24 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:24 -0500
commit01c1c660f4b8086cad7a62345fd04290f3d82c8f (patch)
treebd09ab1fc3f9e267b1ea78f1a41b121175e1585a /kernel
parentc2d727aa2ff17a1c8e5ed1e5e231bb8579b27e82 (diff)
Preempt-RCU: reorganize RCU code into rcuclassic.c and rcupdate.c
This patch re-organizes the RCU code to enable multiple implementations of RCU. Users of RCU continues to include rcupdate.h and the RCU interfaces remain the same. This is in preparation for subsequently merging the preemptible RCU implementation. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/rcuclassic.c576
-rw-r--r--kernel/rcupdate.c575
3 files changed, 602 insertions, 551 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index dfa96956dae0..def5dd6097a0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12 utsname.o notifier.o 12 utsname.o notifier.o
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
new file mode 100644
index 000000000000..18369e3386e2
--- /dev/null
+++ b/kernel/rcuclassic.c
@@ -0,0 +1,576 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com>
22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers:
26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 *
29 * For detailed explanation of Read-Copy Update mechanism see -
30 * Documentation/RCU
31 *
32 */
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h>
40#include <linux/sched.h>
41#include <asm/atomic.h>
42#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h>
47#include <linux/notifier.h>
48/* #include <linux/rcupdate.h> @@@ */
49#include <linux/cpu.h>
50#include <linux/mutex.h>
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC
53static struct lock_class_key rcu_lock_key;
54struct lockdep_map rcu_lock_map =
55 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59
60/* Definition for rcupdate control block. */
61static struct rcu_ctrlblk rcu_ctrlblk = {
62 .cur = -300,
63 .completed = -300,
64 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
65 .cpumask = CPU_MASK_NONE,
66};
67static struct rcu_ctrlblk rcu_bh_ctrlblk = {
68 .cur = -300,
69 .completed = -300,
70 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
71 .cpumask = CPU_MASK_NONE,
72};
73
74DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
75DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
76
77static int blimit = 10;
78static int qhimark = 10000;
79static int qlowmark = 100;
80
81#ifdef CONFIG_SMP
82static void force_quiescent_state(struct rcu_data *rdp,
83 struct rcu_ctrlblk *rcp)
84{
85 int cpu;
86 cpumask_t cpumask;
87 set_need_resched();
88 if (unlikely(!rcp->signaled)) {
89 rcp->signaled = 1;
90 /*
91 * Don't send IPI to itself. With irqs disabled,
92 * rdp->cpu is the current cpu.
93 */
94 cpumask = rcp->cpumask;
95 cpu_clear(rdp->cpu, cpumask);
96 for_each_cpu_mask(cpu, cpumask)
97 smp_send_reschedule(cpu);
98 }
99}
100#else
101static inline void force_quiescent_state(struct rcu_data *rdp,
102 struct rcu_ctrlblk *rcp)
103{
104 set_need_resched();
105}
106#endif
107
108/**
109 * call_rcu - Queue an RCU callback for invocation after a grace period.
110 * @head: structure to be used for queueing the RCU updates.
111 * @func: actual update function to be invoked after the grace period
112 *
113 * The update function will be invoked some time after a full grace
114 * period elapses, in other words after all currently executing RCU
115 * read-side critical sections have completed. RCU read-side critical
116 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
117 * and may be nested.
118 */
119void call_rcu(struct rcu_head *head,
120 void (*func)(struct rcu_head *rcu))
121{
122 unsigned long flags;
123 struct rcu_data *rdp;
124
125 head->func = func;
126 head->next = NULL;
127 local_irq_save(flags);
128 rdp = &__get_cpu_var(rcu_data);
129 *rdp->nxttail = head;
130 rdp->nxttail = &head->next;
131 if (unlikely(++rdp->qlen > qhimark)) {
132 rdp->blimit = INT_MAX;
133 force_quiescent_state(rdp, &rcu_ctrlblk);
134 }
135 local_irq_restore(flags);
136}
137EXPORT_SYMBOL_GPL(call_rcu);
138
139/**
140 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
141 * @head: structure to be used for queueing the RCU updates.
142 * @func: actual update function to be invoked after the grace period
143 *
144 * The update function will be invoked some time after a full grace
145 * period elapses, in other words after all currently executing RCU
146 * read-side critical sections have completed. call_rcu_bh() assumes
147 * that the read-side critical sections end on completion of a softirq
148 * handler. This means that read-side critical sections in process
149 * context must not be interrupted by softirqs. This interface is to be
150 * used when most of the read-side critical sections are in softirq context.
151 * RCU read-side critical sections are delimited by rcu_read_lock() and
152 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
153 * and rcu_read_unlock_bh(), if in process context. These may be nested.
154 */
155void call_rcu_bh(struct rcu_head *head,
156 void (*func)(struct rcu_head *rcu))
157{
158 unsigned long flags;
159 struct rcu_data *rdp;
160
161 head->func = func;
162 head->next = NULL;
163 local_irq_save(flags);
164 rdp = &__get_cpu_var(rcu_bh_data);
165 *rdp->nxttail = head;
166 rdp->nxttail = &head->next;
167
168 if (unlikely(++rdp->qlen > qhimark)) {
169 rdp->blimit = INT_MAX;
170 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
171 }
172
173 local_irq_restore(flags);
174}
175EXPORT_SYMBOL_GPL(call_rcu_bh);
176
177/*
178 * Return the number of RCU batches processed thus far. Useful
179 * for debug and statistics.
180 */
181long rcu_batches_completed(void)
182{
183 return rcu_ctrlblk.completed;
184}
185EXPORT_SYMBOL_GPL(rcu_batches_completed);
186
187/*
188 * Return the number of RCU batches processed thus far. Useful
189 * for debug and statistics.
190 */
191long rcu_batches_completed_bh(void)
192{
193 return rcu_bh_ctrlblk.completed;
194}
195EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
196
197/* Raises the softirq for processing rcu_callbacks. */
198static inline void raise_rcu_softirq(void)
199{
200 raise_softirq(RCU_SOFTIRQ);
201 /*
202 * The smp_mb() here is required to ensure that this cpu's
203 * __rcu_process_callbacks() reads the most recently updated
204 * value of rcu->cur.
205 */
206 smp_mb();
207}
208
209/*
210 * Invoke the completed RCU callbacks. They are expected to be in
211 * a per-cpu list.
212 */
213static void rcu_do_batch(struct rcu_data *rdp)
214{
215 struct rcu_head *next, *list;
216 int count = 0;
217
218 list = rdp->donelist;
219 while (list) {
220 next = list->next;
221 prefetch(next);
222 list->func(list);
223 list = next;
224 if (++count >= rdp->blimit)
225 break;
226 }
227 rdp->donelist = list;
228
229 local_irq_disable();
230 rdp->qlen -= count;
231 local_irq_enable();
232 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
233 rdp->blimit = blimit;
234
235 if (!rdp->donelist)
236 rdp->donetail = &rdp->donelist;
237 else
238 raise_rcu_softirq();
239}
240
241/*
242 * Grace period handling:
243 * The grace period handling consists out of two steps:
244 * - A new grace period is started.
245 * This is done by rcu_start_batch. The start is not broadcasted to
246 * all cpus, they must pick this up by comparing rcp->cur with
247 * rdp->quiescbatch. All cpus are recorded in the
248 * rcu_ctrlblk.cpumask bitmap.
249 * - All cpus must go through a quiescent state.
250 * Since the start of the grace period is not broadcasted, at least two
251 * calls to rcu_check_quiescent_state are required:
252 * The first call just notices that a new grace period is running. The
253 * following calls check if there was a quiescent state since the beginning
254 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
255 * the bitmap is empty, then the grace period is completed.
256 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
257 * period (if necessary).
258 */
259/*
260 * Register a new batch of callbacks, and start it up if there is currently no
261 * active batch and the batch to be registered has not already occurred.
262 * Caller must hold rcu_ctrlblk.lock.
263 */
264static void rcu_start_batch(struct rcu_ctrlblk *rcp)
265{
266 if (rcp->next_pending &&
267 rcp->completed == rcp->cur) {
268 rcp->next_pending = 0;
269 /*
270 * next_pending == 0 must be visible in
271 * __rcu_process_callbacks() before it can see new value of cur.
272 */
273 smp_wmb();
274 rcp->cur++;
275
276 /*
277 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
278 * Barrier Otherwise it can cause tickless idle CPUs to be
279 * included in rcp->cpumask, which will extend graceperiods
280 * unnecessarily.
281 */
282 smp_mb();
283 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
284
285 rcp->signaled = 0;
286 }
287}
288
289/*
290 * cpu went through a quiescent state since the beginning of the grace period.
291 * Clear it from the cpu mask and complete the grace period if it was the last
292 * cpu. Start another grace period if someone has further entries pending
293 */
294static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
295{
296 cpu_clear(cpu, rcp->cpumask);
297 if (cpus_empty(rcp->cpumask)) {
298 /* batch completed ! */
299 rcp->completed = rcp->cur;
300 rcu_start_batch(rcp);
301 }
302}
303
304/*
305 * Check if the cpu has gone through a quiescent state (say context
306 * switch). If so and if it already hasn't done so in this RCU
307 * quiescent cycle, then indicate that it has done so.
308 */
309static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
310 struct rcu_data *rdp)
311{
312 if (rdp->quiescbatch != rcp->cur) {
313 /* start new grace period: */
314 rdp->qs_pending = 1;
315 rdp->passed_quiesc = 0;
316 rdp->quiescbatch = rcp->cur;
317 return;
318 }
319
320 /* Grace period already completed for this cpu?
321 * qs_pending is checked instead of the actual bitmap to avoid
322 * cacheline trashing.
323 */
324 if (!rdp->qs_pending)
325 return;
326
327 /*
328 * Was there a quiescent state since the beginning of the grace
329 * period? If no, then exit and wait for the next call.
330 */
331 if (!rdp->passed_quiesc)
332 return;
333 rdp->qs_pending = 0;
334
335 spin_lock(&rcp->lock);
336 /*
337 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
338 * during cpu startup. Ignore the quiescent state.
339 */
340 if (likely(rdp->quiescbatch == rcp->cur))
341 cpu_quiet(rdp->cpu, rcp);
342
343 spin_unlock(&rcp->lock);
344}
345
346
347#ifdef CONFIG_HOTPLUG_CPU
348
349/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
350 * locking requirements, the list it's pulling from has to belong to a cpu
351 * which is dead and hence not processing interrupts.
352 */
353static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
354 struct rcu_head **tail)
355{
356 local_irq_disable();
357 *this_rdp->nxttail = list;
358 if (list)
359 this_rdp->nxttail = tail;
360 local_irq_enable();
361}
362
363static void __rcu_offline_cpu(struct rcu_data *this_rdp,
364 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
365{
366 /* if the cpu going offline owns the grace period
367 * we can block indefinitely waiting for it, so flush
368 * it here
369 */
370 spin_lock_bh(&rcp->lock);
371 if (rcp->cur != rcp->completed)
372 cpu_quiet(rdp->cpu, rcp);
373 spin_unlock_bh(&rcp->lock);
374 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
375 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
376 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
377}
378
379static void rcu_offline_cpu(int cpu)
380{
381 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
382 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
383
384 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
385 &per_cpu(rcu_data, cpu));
386 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
387 &per_cpu(rcu_bh_data, cpu));
388 put_cpu_var(rcu_data);
389 put_cpu_var(rcu_bh_data);
390}
391
392#else
393
394static void rcu_offline_cpu(int cpu)
395{
396}
397
398#endif
399
400/*
401 * This does the RCU processing work from softirq context.
402 */
403static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
404 struct rcu_data *rdp)
405{
406 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
407 *rdp->donetail = rdp->curlist;
408 rdp->donetail = rdp->curtail;
409 rdp->curlist = NULL;
410 rdp->curtail = &rdp->curlist;
411 }
412
413 if (rdp->nxtlist && !rdp->curlist) {
414 local_irq_disable();
415 rdp->curlist = rdp->nxtlist;
416 rdp->curtail = rdp->nxttail;
417 rdp->nxtlist = NULL;
418 rdp->nxttail = &rdp->nxtlist;
419 local_irq_enable();
420
421 /*
422 * start the next batch of callbacks
423 */
424
425 /* determine batch number */
426 rdp->batch = rcp->cur + 1;
427 /* see the comment and corresponding wmb() in
428 * the rcu_start_batch()
429 */
430 smp_rmb();
431
432 if (!rcp->next_pending) {
433 /* and start it/schedule start if it's a new batch */
434 spin_lock(&rcp->lock);
435 rcp->next_pending = 1;
436 rcu_start_batch(rcp);
437 spin_unlock(&rcp->lock);
438 }
439 }
440
441 rcu_check_quiescent_state(rcp, rdp);
442 if (rdp->donelist)
443 rcu_do_batch(rdp);
444}
445
446static void rcu_process_callbacks(struct softirq_action *unused)
447{
448 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
449 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
450}
451
452static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
453{
454 /* This cpu has pending rcu entries and the grace period
455 * for them has completed.
456 */
457 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
458 return 1;
459
460 /* This cpu has no pending entries, but there are new entries */
461 if (!rdp->curlist && rdp->nxtlist)
462 return 1;
463
464 /* This cpu has finished callbacks to invoke */
465 if (rdp->donelist)
466 return 1;
467
468 /* The rcu core waits for a quiescent state from the cpu */
469 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
470 return 1;
471
472 /* nothing to do */
473 return 0;
474}
475
476/*
477 * Check to see if there is any immediate RCU-related work to be done
478 * by the current CPU, returning 1 if so. This function is part of the
479 * RCU implementation; it is -not- an exported member of the RCU API.
480 */
481int rcu_pending(int cpu)
482{
483 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
484 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
485}
486
487/*
488 * Check to see if any future RCU-related work will need to be done
489 * by the current CPU, even if none need be done immediately, returning
490 * 1 if so. This function is part of the RCU implementation; it is -not-
491 * an exported member of the RCU API.
492 */
493int rcu_needs_cpu(int cpu)
494{
495 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
496 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
497
498 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
499}
500
501void rcu_check_callbacks(int cpu, int user)
502{
503 if (user ||
504 (idle_cpu(cpu) && !in_softirq() &&
505 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
506 rcu_qsctr_inc(cpu);
507 rcu_bh_qsctr_inc(cpu);
508 } else if (!in_softirq())
509 rcu_bh_qsctr_inc(cpu);
510 raise_rcu_softirq();
511}
512
513static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
514 struct rcu_data *rdp)
515{
516 memset(rdp, 0, sizeof(*rdp));
517 rdp->curtail = &rdp->curlist;
518 rdp->nxttail = &rdp->nxtlist;
519 rdp->donetail = &rdp->donelist;
520 rdp->quiescbatch = rcp->completed;
521 rdp->qs_pending = 0;
522 rdp->cpu = cpu;
523 rdp->blimit = blimit;
524}
525
526static void __cpuinit rcu_online_cpu(int cpu)
527{
528 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
529 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
530
531 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
532 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
533 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
534}
535
536static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
537 unsigned long action, void *hcpu)
538{
539 long cpu = (long)hcpu;
540
541 switch (action) {
542 case CPU_UP_PREPARE:
543 case CPU_UP_PREPARE_FROZEN:
544 rcu_online_cpu(cpu);
545 break;
546 case CPU_DEAD:
547 case CPU_DEAD_FROZEN:
548 rcu_offline_cpu(cpu);
549 break;
550 default:
551 break;
552 }
553 return NOTIFY_OK;
554}
555
556static struct notifier_block __cpuinitdata rcu_nb = {
557 .notifier_call = rcu_cpu_notify,
558};
559
560/*
561 * Initializes rcu mechanism. Assumed to be called early.
562 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
563 * Note that rcu_qsctr and friends are implicitly
564 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
565 */
566void __init __rcu_init(void)
567{
568 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
569 (void *)(long)smp_processor_id());
570 /* Register notifier for non-boot CPUs */
571 register_cpu_notifier(&rcu_nb);
572}
573
574module_param(blimit, int, 0);
575module_param(qhimark, int, 0);
576module_param(qlowmark, int, 0);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4dfa0b792efa..0ccd0095ebdc 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -15,7 +15,7 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright (C) IBM Corporation, 2001 18 * Copyright IBM Corporation, 2001
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
@@ -35,163 +35,57 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h> 38#include <linux/interrupt.h>
40#include <linux/sched.h> 39#include <linux/sched.h>
41#include <asm/atomic.h> 40#include <asm/atomic.h>
42#include <linux/bitops.h> 41#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h> 42#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h> 43#include <linux/percpu.h>
47#include <linux/notifier.h> 44#include <linux/notifier.h>
48#include <linux/cpu.h> 45#include <linux/cpu.h>
49#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/module.h>
50 48
51#ifdef CONFIG_DEBUG_LOCK_ALLOC 49struct rcu_synchronize {
52static struct lock_class_key rcu_lock_key; 50 struct rcu_head head;
53struct lockdep_map rcu_lock_map = 51 struct completion completion;
54 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
55
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59/* Definition for rcupdate control block. */
60static struct rcu_ctrlblk rcu_ctrlblk = {
61 .cur = -300,
62 .completed = -300,
63 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
64 .cpumask = CPU_MASK_NONE,
65};
66static struct rcu_ctrlblk rcu_bh_ctrlblk = {
67 .cur = -300,
68 .completed = -300,
69 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
70 .cpumask = CPU_MASK_NONE,
71}; 52};
72 53
73DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
74DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
75
76static int blimit = 10;
77static int qhimark = 10000;
78static int qlowmark = 100;
79
80static atomic_t rcu_barrier_cpu_count; 55static atomic_t rcu_barrier_cpu_count;
81static DEFINE_MUTEX(rcu_barrier_mutex); 56static DEFINE_MUTEX(rcu_barrier_mutex);
82static struct completion rcu_barrier_completion; 57static struct completion rcu_barrier_completion;
83 58
84#ifdef CONFIG_SMP 59/* Because of FASTCALL declaration of complete, we use this wrapper */
85static void force_quiescent_state(struct rcu_data *rdp, 60static void wakeme_after_rcu(struct rcu_head *head)
86 struct rcu_ctrlblk *rcp)
87{
88 int cpu;
89 cpumask_t cpumask;
90 set_need_resched();
91 if (unlikely(!rcp->signaled)) {
92 rcp->signaled = 1;
93 /*
94 * Don't send IPI to itself. With irqs disabled,
95 * rdp->cpu is the current cpu.
96 */
97 cpumask = rcp->cpumask;
98 cpu_clear(rdp->cpu, cpumask);
99 for_each_cpu_mask(cpu, cpumask)
100 smp_send_reschedule(cpu);
101 }
102}
103#else
104static inline void force_quiescent_state(struct rcu_data *rdp,
105 struct rcu_ctrlblk *rcp)
106{ 61{
107 set_need_resched(); 62 struct rcu_synchronize *rcu;
63
64 rcu = container_of(head, struct rcu_synchronize, head);
65 complete(&rcu->completion);
108} 66}
109#endif
110 67
111/** 68/**
112 * call_rcu - Queue an RCU callback for invocation after a grace period. 69 * synchronize_rcu - wait until a grace period has elapsed.
113 * @head: structure to be used for queueing the RCU updates.
114 * @func: actual update function to be invoked after the grace period
115 * 70 *
116 * The update function will be invoked some time after a full grace 71 * Control will return to the caller some time after a full grace
117 * period elapses, in other words after all currently executing RCU 72 * period has elapsed, in other words after all currently executing RCU
118 * read-side critical sections have completed. RCU read-side critical 73 * read-side critical sections have completed. RCU read-side critical
119 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 74 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
120 * and may be nested. 75 * and may be nested.
121 */ 76 */
122void fastcall call_rcu(struct rcu_head *head, 77void synchronize_rcu(void)
123 void (*func)(struct rcu_head *rcu))
124{
125 unsigned long flags;
126 struct rcu_data *rdp;
127
128 head->func = func;
129 head->next = NULL;
130 local_irq_save(flags);
131 rdp = &__get_cpu_var(rcu_data);
132 *rdp->nxttail = head;
133 rdp->nxttail = &head->next;
134 if (unlikely(++rdp->qlen > qhimark)) {
135 rdp->blimit = INT_MAX;
136 force_quiescent_state(rdp, &rcu_ctrlblk);
137 }
138 local_irq_restore(flags);
139}
140
141/**
142 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
143 * @head: structure to be used for queueing the RCU updates.
144 * @func: actual update function to be invoked after the grace period
145 *
146 * The update function will be invoked some time after a full grace
147 * period elapses, in other words after all currently executing RCU
148 * read-side critical sections have completed. call_rcu_bh() assumes
149 * that the read-side critical sections end on completion of a softirq
150 * handler. This means that read-side critical sections in process
151 * context must not be interrupted by softirqs. This interface is to be
152 * used when most of the read-side critical sections are in softirq context.
153 * RCU read-side critical sections are delimited by rcu_read_lock() and
154 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
155 * and rcu_read_unlock_bh(), if in process context. These may be nested.
156 */
157void fastcall call_rcu_bh(struct rcu_head *head,
158 void (*func)(struct rcu_head *rcu))
159{ 78{
160 unsigned long flags; 79 struct rcu_synchronize rcu;
161 struct rcu_data *rdp;
162
163 head->func = func;
164 head->next = NULL;
165 local_irq_save(flags);
166 rdp = &__get_cpu_var(rcu_bh_data);
167 *rdp->nxttail = head;
168 rdp->nxttail = &head->next;
169
170 if (unlikely(++rdp->qlen > qhimark)) {
171 rdp->blimit = INT_MAX;
172 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
173 }
174
175 local_irq_restore(flags);
176}
177 80
178/* 81 init_completion(&rcu.completion);
179 * Return the number of RCU batches processed thus far. Useful 82 /* Will wake me after RCU finished */
180 * for debug and statistics. 83 call_rcu(&rcu.head, wakeme_after_rcu);
181 */
182long rcu_batches_completed(void)
183{
184 return rcu_ctrlblk.completed;
185}
186 84
187/* 85 /* Wait for it */
188 * Return the number of RCU batches processed thus far. Useful 86 wait_for_completion(&rcu.completion);
189 * for debug and statistics.
190 */
191long rcu_batches_completed_bh(void)
192{
193 return rcu_bh_ctrlblk.completed;
194} 87}
88EXPORT_SYMBOL_GPL(synchronize_rcu);
195 89
196static void rcu_barrier_callback(struct rcu_head *notused) 90static void rcu_barrier_callback(struct rcu_head *notused)
197{ 91{
@@ -205,10 +99,8 @@ static void rcu_barrier_callback(struct rcu_head *notused)
205static void rcu_barrier_func(void *notused) 99static void rcu_barrier_func(void *notused)
206{ 100{
207 int cpu = smp_processor_id(); 101 int cpu = smp_processor_id();
208 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); 102 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
209 struct rcu_head *head;
210 103
211 head = &rdp->barrier;
212 atomic_inc(&rcu_barrier_cpu_count); 104 atomic_inc(&rcu_barrier_cpu_count);
213 call_rcu(head, rcu_barrier_callback); 105 call_rcu(head, rcu_barrier_callback);
214} 106}
@@ -229,425 +121,8 @@ void rcu_barrier(void)
229} 121}
230EXPORT_SYMBOL_GPL(rcu_barrier); 122EXPORT_SYMBOL_GPL(rcu_barrier);
231 123
232/* Raises the softirq for processing rcu_callbacks. */
233static inline void raise_rcu_softirq(void)
234{
235 raise_softirq(RCU_SOFTIRQ);
236 /*
237 * The smp_mb() here is required to ensure that this cpu's
238 * __rcu_process_callbacks() reads the most recently updated
239 * value of rcu->cur.
240 */
241 smp_mb();
242}
243
244/*
245 * Invoke the completed RCU callbacks. They are expected to be in
246 * a per-cpu list.
247 */
248static void rcu_do_batch(struct rcu_data *rdp)
249{
250 struct rcu_head *next, *list;
251 int count = 0;
252
253 list = rdp->donelist;
254 while (list) {
255 next = list->next;
256 prefetch(next);
257 list->func(list);
258 list = next;
259 if (++count >= rdp->blimit)
260 break;
261 }
262 rdp->donelist = list;
263
264 local_irq_disable();
265 rdp->qlen -= count;
266 local_irq_enable();
267 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
268 rdp->blimit = blimit;
269
270 if (!rdp->donelist)
271 rdp->donetail = &rdp->donelist;
272 else
273 raise_rcu_softirq();
274}
275
276/*
277 * Grace period handling:
278 * The grace period handling consists out of two steps:
279 * - A new grace period is started.
280 * This is done by rcu_start_batch. The start is not broadcasted to
281 * all cpus, they must pick this up by comparing rcp->cur with
282 * rdp->quiescbatch. All cpus are recorded in the
283 * rcu_ctrlblk.cpumask bitmap.
284 * - All cpus must go through a quiescent state.
285 * Since the start of the grace period is not broadcasted, at least two
286 * calls to rcu_check_quiescent_state are required:
287 * The first call just notices that a new grace period is running. The
288 * following calls check if there was a quiescent state since the beginning
289 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
290 * the bitmap is empty, then the grace period is completed.
291 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
292 * period (if necessary).
293 */
294/*
295 * Register a new batch of callbacks, and start it up if there is currently no
296 * active batch and the batch to be registered has not already occurred.
297 * Caller must hold rcu_ctrlblk.lock.
298 */
299static void rcu_start_batch(struct rcu_ctrlblk *rcp)
300{
301 if (rcp->next_pending &&
302 rcp->completed == rcp->cur) {
303 rcp->next_pending = 0;
304 /*
305 * next_pending == 0 must be visible in
306 * __rcu_process_callbacks() before it can see new value of cur.
307 */
308 smp_wmb();
309 rcp->cur++;
310
311 /*
312 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
313 * Barrier Otherwise it can cause tickless idle CPUs to be
314 * included in rcp->cpumask, which will extend graceperiods
315 * unnecessarily.
316 */
317 smp_mb();
318 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
319
320 rcp->signaled = 0;
321 }
322}
323
324/*
325 * cpu went through a quiescent state since the beginning of the grace period.
326 * Clear it from the cpu mask and complete the grace period if it was the last
327 * cpu. Start another grace period if someone has further entries pending
328 */
329static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
330{
331 cpu_clear(cpu, rcp->cpumask);
332 if (cpus_empty(rcp->cpumask)) {
333 /* batch completed ! */
334 rcp->completed = rcp->cur;
335 rcu_start_batch(rcp);
336 }
337}
338
339/*
340 * Check if the cpu has gone through a quiescent state (say context
341 * switch). If so and if it already hasn't done so in this RCU
342 * quiescent cycle, then indicate that it has done so.
343 */
344static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
345 struct rcu_data *rdp)
346{
347 if (rdp->quiescbatch != rcp->cur) {
348 /* start new grace period: */
349 rdp->qs_pending = 1;
350 rdp->passed_quiesc = 0;
351 rdp->quiescbatch = rcp->cur;
352 return;
353 }
354
355 /* Grace period already completed for this cpu?
356 * qs_pending is checked instead of the actual bitmap to avoid
357 * cacheline trashing.
358 */
359 if (!rdp->qs_pending)
360 return;
361
362 /*
363 * Was there a quiescent state since the beginning of the grace
364 * period? If no, then exit and wait for the next call.
365 */
366 if (!rdp->passed_quiesc)
367 return;
368 rdp->qs_pending = 0;
369
370 spin_lock(&rcp->lock);
371 /*
372 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
373 * during cpu startup. Ignore the quiescent state.
374 */
375 if (likely(rdp->quiescbatch == rcp->cur))
376 cpu_quiet(rdp->cpu, rcp);
377
378 spin_unlock(&rcp->lock);
379}
380
381
382#ifdef CONFIG_HOTPLUG_CPU
383
384/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
385 * locking requirements, the list it's pulling from has to belong to a cpu
386 * which is dead and hence not processing interrupts.
387 */
388static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
389 struct rcu_head **tail)
390{
391 local_irq_disable();
392 *this_rdp->nxttail = list;
393 if (list)
394 this_rdp->nxttail = tail;
395 local_irq_enable();
396}
397
398static void __rcu_offline_cpu(struct rcu_data *this_rdp,
399 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
400{
401 /* if the cpu going offline owns the grace period
402 * we can block indefinitely waiting for it, so flush
403 * it here
404 */
405 spin_lock_bh(&rcp->lock);
406 if (rcp->cur != rcp->completed)
407 cpu_quiet(rdp->cpu, rcp);
408 spin_unlock_bh(&rcp->lock);
409 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
410 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
411 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
412}
413
414static void rcu_offline_cpu(int cpu)
415{
416 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
417 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
418
419 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
420 &per_cpu(rcu_data, cpu));
421 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
422 &per_cpu(rcu_bh_data, cpu));
423 put_cpu_var(rcu_data);
424 put_cpu_var(rcu_bh_data);
425}
426
427#else
428
429static void rcu_offline_cpu(int cpu)
430{
431}
432
433#endif
434
435/*
436 * This does the RCU processing work from softirq context.
437 */
438static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
439 struct rcu_data *rdp)
440{
441 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
442 *rdp->donetail = rdp->curlist;
443 rdp->donetail = rdp->curtail;
444 rdp->curlist = NULL;
445 rdp->curtail = &rdp->curlist;
446 }
447
448 if (rdp->nxtlist && !rdp->curlist) {
449 local_irq_disable();
450 rdp->curlist = rdp->nxtlist;
451 rdp->curtail = rdp->nxttail;
452 rdp->nxtlist = NULL;
453 rdp->nxttail = &rdp->nxtlist;
454 local_irq_enable();
455
456 /*
457 * start the next batch of callbacks
458 */
459
460 /* determine batch number */
461 rdp->batch = rcp->cur + 1;
462 /* see the comment and corresponding wmb() in
463 * the rcu_start_batch()
464 */
465 smp_rmb();
466
467 if (!rcp->next_pending) {
468 /* and start it/schedule start if it's a new batch */
469 spin_lock(&rcp->lock);
470 rcp->next_pending = 1;
471 rcu_start_batch(rcp);
472 spin_unlock(&rcp->lock);
473 }
474 }
475
476 rcu_check_quiescent_state(rcp, rdp);
477 if (rdp->donelist)
478 rcu_do_batch(rdp);
479}
480
481static void rcu_process_callbacks(struct softirq_action *unused)
482{
483 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
484 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
485}
486
487static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
488{
489 /* This cpu has pending rcu entries and the grace period
490 * for them has completed.
491 */
492 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
493 return 1;
494
495 /* This cpu has no pending entries, but there are new entries */
496 if (!rdp->curlist && rdp->nxtlist)
497 return 1;
498
499 /* This cpu has finished callbacks to invoke */
500 if (rdp->donelist)
501 return 1;
502
503 /* The rcu core waits for a quiescent state from the cpu */
504 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
505 return 1;
506
507 /* nothing to do */
508 return 0;
509}
510
511/*
512 * Check to see if there is any immediate RCU-related work to be done
513 * by the current CPU, returning 1 if so. This function is part of the
514 * RCU implementation; it is -not- an exported member of the RCU API.
515 */
516int rcu_pending(int cpu)
517{
518 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
519 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
520}
521
522/*
523 * Check to see if any future RCU-related work will need to be done
524 * by the current CPU, even if none need be done immediately, returning
525 * 1 if so. This function is part of the RCU implementation; it is -not-
526 * an exported member of the RCU API.
527 */
528int rcu_needs_cpu(int cpu)
529{
530 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
531 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
532
533 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
534}
535
536void rcu_check_callbacks(int cpu, int user)
537{
538 if (user ||
539 (idle_cpu(cpu) && !in_softirq() &&
540 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
541 rcu_qsctr_inc(cpu);
542 rcu_bh_qsctr_inc(cpu);
543 } else if (!in_softirq())
544 rcu_bh_qsctr_inc(cpu);
545 raise_rcu_softirq();
546}
547
548static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
549 struct rcu_data *rdp)
550{
551 memset(rdp, 0, sizeof(*rdp));
552 rdp->curtail = &rdp->curlist;
553 rdp->nxttail = &rdp->nxtlist;
554 rdp->donetail = &rdp->donelist;
555 rdp->quiescbatch = rcp->completed;
556 rdp->qs_pending = 0;
557 rdp->cpu = cpu;
558 rdp->blimit = blimit;
559}
560
561static void __cpuinit rcu_online_cpu(int cpu)
562{
563 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
564 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
565
566 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
567 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
568 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
569}
570
571static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
572 unsigned long action, void *hcpu)
573{
574 long cpu = (long)hcpu;
575 switch (action) {
576 case CPU_UP_PREPARE:
577 case CPU_UP_PREPARE_FROZEN:
578 rcu_online_cpu(cpu);
579 break;
580 case CPU_DEAD:
581 case CPU_DEAD_FROZEN:
582 rcu_offline_cpu(cpu);
583 break;
584 default:
585 break;
586 }
587 return NOTIFY_OK;
588}
589
590static struct notifier_block __cpuinitdata rcu_nb = {
591 .notifier_call = rcu_cpu_notify,
592};
593
594/*
595 * Initializes rcu mechanism. Assumed to be called early.
596 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
597 * Note that rcu_qsctr and friends are implicitly
598 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
599 */
600void __init rcu_init(void) 124void __init rcu_init(void)
601{ 125{
602 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, 126 __rcu_init();
603 (void *)(long)smp_processor_id());
604 /* Register notifier for non-boot CPUs */
605 register_cpu_notifier(&rcu_nb);
606}
607
608struct rcu_synchronize {
609 struct rcu_head head;
610 struct completion completion;
611};
612
613/* Because of FASTCALL declaration of complete, we use this wrapper */
614static void wakeme_after_rcu(struct rcu_head *head)
615{
616 struct rcu_synchronize *rcu;
617
618 rcu = container_of(head, struct rcu_synchronize, head);
619 complete(&rcu->completion);
620}
621
622/**
623 * synchronize_rcu - wait until a grace period has elapsed.
624 *
625 * Control will return to the caller some time after a full grace
626 * period has elapsed, in other words after all currently executing RCU
627 * read-side critical sections have completed. RCU read-side critical
628 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
629 * and may be nested.
630 *
631 * If your read-side code is not protected by rcu_read_lock(), do -not-
632 * use synchronize_rcu().
633 */
634void synchronize_rcu(void)
635{
636 struct rcu_synchronize rcu;
637
638 init_completion(&rcu.completion);
639 /* Will wake me after RCU finished */
640 call_rcu(&rcu.head, wakeme_after_rcu);
641
642 /* Wait for it */
643 wait_for_completion(&rcu.completion);
644} 127}
645 128
646module_param(blimit, int, 0);
647module_param(qhimark, int, 0);
648module_param(qlowmark, int, 0);
649EXPORT_SYMBOL_GPL(rcu_batches_completed);
650EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
651EXPORT_SYMBOL_GPL(call_rcu);
652EXPORT_SYMBOL_GPL(call_rcu_bh);
653EXPORT_SYMBOL_GPL(synchronize_rcu);