aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/rcuclassic.h161
-rw-r--r--include/linux/rcupdate.h168
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/rcuclassic.c576
-rw-r--r--kernel/rcupdate.c575
5 files changed, 812 insertions, 670 deletions
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
new file mode 100644
index 000000000000..2b8b045a51d5
--- /dev/null
+++ b/include/linux/rcuclassic.h
@@ -0,0 +1,161 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (classic version)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 *
22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
24 * Papers:
25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
27 *
28 * For detailed explanation of Read-Copy Update mechanism see -
29 * Documentation/RCU
30 *
31 */
32
33#ifndef __LINUX_RCUCLASSIC_H
34#define __LINUX_RCUCLASSIC_H
35
36#ifdef __KERNEL__
37
38#include <linux/cache.h>
39#include <linux/spinlock.h>
40#include <linux/threads.h>
41#include <linux/percpu.h>
42#include <linux/cpumask.h>
43#include <linux/seqlock.h>
44
45
46/* Global control variables for rcupdate callback mechanism. */
47struct rcu_ctrlblk {
48 long cur; /* Current batch number. */
49 long completed; /* Number of the last completed batch */
50 int next_pending; /* Is the next batch already waiting? */
51
52 int signaled;
53
54 spinlock_t lock ____cacheline_internodealigned_in_smp;
55 cpumask_t cpumask; /* CPUs that need to switch in order */
56 /* for current batch to proceed. */
57} ____cacheline_internodealigned_in_smp;
58
59/* Is batch a before batch b ? */
60static inline int rcu_batch_before(long a, long b)
61{
62 return (a - b) < 0;
63}
64
65/* Is batch a after batch b ? */
66static inline int rcu_batch_after(long a, long b)
67{
68 return (a - b) > 0;
69}
70
71/*
72 * Per-CPU data for Read-Copy UPdate.
73 * nxtlist - new callbacks are added here
74 * curlist - current batch for which quiescent cycle started if any
75 */
76struct rcu_data {
77 /* 1) quiescent state handling : */
78 long quiescbatch; /* Batch # for grace period */
79 int passed_quiesc; /* User-mode/idle loop etc. */
80 int qs_pending; /* core waits for quiesc state */
81
82 /* 2) batch handling */
83 long batch; /* Batch # for current RCU batch */
84 struct rcu_head *nxtlist;
85 struct rcu_head **nxttail;
86 long qlen; /* # of queued callbacks */
87 struct rcu_head *curlist;
88 struct rcu_head **curtail;
89 struct rcu_head *donelist;
90 struct rcu_head **donetail;
91 long blimit; /* Upper limit on a processed batch */
92 int cpu;
93 struct rcu_head barrier;
94};
95
96DECLARE_PER_CPU(struct rcu_data, rcu_data);
97DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
98
99/*
100 * Increment the quiescent state counter.
101 * The counter is a bit degenerated: We do not need to know
102 * how many quiescent states passed, just if there was at least
103 * one since the start of the grace period. Thus just a flag.
104 */
105static inline void rcu_qsctr_inc(int cpu)
106{
107 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
108 rdp->passed_quiesc = 1;
109}
110static inline void rcu_bh_qsctr_inc(int cpu)
111{
112 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
113 rdp->passed_quiesc = 1;
114}
115
116extern int rcu_pending(int cpu);
117extern int rcu_needs_cpu(int cpu);
118
119#ifdef CONFIG_DEBUG_LOCK_ALLOC
120extern struct lockdep_map rcu_lock_map;
121# define rcu_read_acquire() \
122 lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
123# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
124#else
125# define rcu_read_acquire() do { } while (0)
126# define rcu_read_release() do { } while (0)
127#endif
128
129#define __rcu_read_lock() \
130 do { \
131 preempt_disable(); \
132 __acquire(RCU); \
133 rcu_read_acquire(); \
134 } while (0)
135#define __rcu_read_unlock() \
136 do { \
137 rcu_read_release(); \
138 __release(RCU); \
139 preempt_enable(); \
140 } while (0)
141#define __rcu_read_lock_bh() \
142 do { \
143 local_bh_disable(); \
144 __acquire(RCU_BH); \
145 rcu_read_acquire(); \
146 } while (0)
147#define __rcu_read_unlock_bh() \
148 do { \
149 rcu_read_release(); \
150 __release(RCU_BH); \
151 local_bh_enable(); \
152 } while (0)
153
154#define __synchronize_sched() synchronize_rcu()
155
156extern void __rcu_init(void);
157extern void rcu_check_callbacks(int cpu, int user);
158extern void rcu_restart_cpu(int cpu);
159
160#endif /* __KERNEL__ */
161#endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cc24a01df940..12aa13e13150 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -15,7 +15,7 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright (C) IBM Corporation, 2001 18 * Copyright IBM Corporation, 2001
19 * 19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 * 21 *
@@ -53,96 +53,14 @@ struct rcu_head {
53 void (*func)(struct rcu_head *head); 53 void (*func)(struct rcu_head *head);
54}; 54};
55 55
56#include <linux/rcuclassic.h>
57
56#define RCU_HEAD_INIT { .next = NULL, .func = NULL } 58#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
57#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT 59#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
58#define INIT_RCU_HEAD(ptr) do { \ 60#define INIT_RCU_HEAD(ptr) do { \
59 (ptr)->next = NULL; (ptr)->func = NULL; \ 61 (ptr)->next = NULL; (ptr)->func = NULL; \
60} while (0) 62} while (0)
61 63
62
63
64/* Global control variables for rcupdate callback mechanism. */
65struct rcu_ctrlblk {
66 long cur; /* Current batch number. */
67 long completed; /* Number of the last completed batch */
68 int next_pending; /* Is the next batch already waiting? */
69
70 int signaled;
71
72 spinlock_t lock ____cacheline_internodealigned_in_smp;
73 cpumask_t cpumask; /* CPUs that need to switch in order */
74 /* for current batch to proceed. */
75} ____cacheline_internodealigned_in_smp;
76
77/* Is batch a before batch b ? */
78static inline int rcu_batch_before(long a, long b)
79{
80 return (a - b) < 0;
81}
82
83/* Is batch a after batch b ? */
84static inline int rcu_batch_after(long a, long b)
85{
86 return (a - b) > 0;
87}
88
89/*
90 * Per-CPU data for Read-Copy UPdate.
91 * nxtlist - new callbacks are added here
92 * curlist - current batch for which quiescent cycle started if any
93 */
94struct rcu_data {
95 /* 1) quiescent state handling : */
96 long quiescbatch; /* Batch # for grace period */
97 int passed_quiesc; /* User-mode/idle loop etc. */
98 int qs_pending; /* core waits for quiesc state */
99
100 /* 2) batch handling */
101 long batch; /* Batch # for current RCU batch */
102 struct rcu_head *nxtlist;
103 struct rcu_head **nxttail;
104 long qlen; /* # of queued callbacks */
105 struct rcu_head *curlist;
106 struct rcu_head **curtail;
107 struct rcu_head *donelist;
108 struct rcu_head **donetail;
109 long blimit; /* Upper limit on a processed batch */
110 int cpu;
111 struct rcu_head barrier;
112};
113
114DECLARE_PER_CPU(struct rcu_data, rcu_data);
115DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
116
117/*
118 * Increment the quiescent state counter.
119 * The counter is a bit degenerated: We do not need to know
120 * how many quiescent states passed, just if there was at least
121 * one since the start of the grace period. Thus just a flag.
122 */
123static inline void rcu_qsctr_inc(int cpu)
124{
125 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
126 rdp->passed_quiesc = 1;
127}
128static inline void rcu_bh_qsctr_inc(int cpu)
129{
130 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
131 rdp->passed_quiesc = 1;
132}
133
134extern int rcu_pending(int cpu);
135extern int rcu_needs_cpu(int cpu);
136
137#ifdef CONFIG_DEBUG_LOCK_ALLOC
138extern struct lockdep_map rcu_lock_map;
139# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
140# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
141#else
142# define rcu_read_acquire() do { } while (0)
143# define rcu_read_release() do { } while (0)
144#endif
145
146/** 64/**
147 * rcu_read_lock - mark the beginning of an RCU read-side critical section. 65 * rcu_read_lock - mark the beginning of an RCU read-side critical section.
148 * 66 *
@@ -172,24 +90,13 @@ extern struct lockdep_map rcu_lock_map;
172 * 90 *
173 * It is illegal to block while in an RCU read-side critical section. 91 * It is illegal to block while in an RCU read-side critical section.
174 */ 92 */
175#define rcu_read_lock() \ 93#define rcu_read_lock() __rcu_read_lock()
176 do { \
177 preempt_disable(); \
178 __acquire(RCU); \
179 rcu_read_acquire(); \
180 } while(0)
181 94
182/** 95/**
183 * rcu_read_unlock - marks the end of an RCU read-side critical section. 96 * rcu_read_unlock - marks the end of an RCU read-side critical section.
184 * 97 *
185 * See rcu_read_lock() for more information. 98 * See rcu_read_lock() for more information.
186 */ 99 */
187#define rcu_read_unlock() \
188 do { \
189 rcu_read_release(); \
190 __release(RCU); \
191 preempt_enable(); \
192 } while(0)
193 100
194/* 101/*
195 * So where is rcu_write_lock()? It does not exist, as there is no 102 * So where is rcu_write_lock()? It does not exist, as there is no
@@ -200,6 +107,7 @@ extern struct lockdep_map rcu_lock_map;
200 * used as well. RCU does not care how the writers keep out of each 107 * used as well. RCU does not care how the writers keep out of each
201 * others' way, as long as they do so. 108 * others' way, as long as they do so.
202 */ 109 */
110#define rcu_read_unlock() __rcu_read_unlock()
203 111
204/** 112/**
205 * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section 113 * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
@@ -212,24 +120,14 @@ extern struct lockdep_map rcu_lock_map;
212 * can use just rcu_read_lock(). 120 * can use just rcu_read_lock().
213 * 121 *
214 */ 122 */
215#define rcu_read_lock_bh() \ 123#define rcu_read_lock_bh() __rcu_read_lock_bh()
216 do { \
217 local_bh_disable(); \
218 __acquire(RCU_BH); \
219 rcu_read_acquire(); \
220 } while(0)
221 124
222/* 125/*
223 * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section 126 * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
224 * 127 *
225 * See rcu_read_lock_bh() for more information. 128 * See rcu_read_lock_bh() for more information.
226 */ 129 */
227#define rcu_read_unlock_bh() \ 130#define rcu_read_unlock_bh() __rcu_read_unlock_bh()
228 do { \
229 rcu_read_release(); \
230 __release(RCU_BH); \
231 local_bh_enable(); \
232 } while(0)
233 131
234/* 132/*
235 * Prevent the compiler from merging or refetching accesses. The compiler 133 * Prevent the compiler from merging or refetching accesses. The compiler
@@ -293,21 +191,53 @@ extern struct lockdep_map rcu_lock_map;
293 * In "classic RCU", these two guarantees happen to be one and 191 * In "classic RCU", these two guarantees happen to be one and
294 * the same, but can differ in realtime RCU implementations. 192 * the same, but can differ in realtime RCU implementations.
295 */ 193 */
296#define synchronize_sched() synchronize_rcu() 194#define synchronize_sched() __synchronize_sched()
195
196/**
197 * call_rcu - Queue an RCU callback for invocation after a grace period.
198 * @head: structure to be used for queueing the RCU updates.
199 * @func: actual update function to be invoked after the grace period
200 *
201 * The update function will be invoked some time after a full grace
202 * period elapses, in other words after all currently executing RCU
203 * read-side critical sections have completed. RCU read-side critical
204 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
205 * and may be nested.
206 */
207extern void call_rcu(struct rcu_head *head,
208 void (*func)(struct rcu_head *head));
209
210/**
211 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
212 * @head: structure to be used for queueing the RCU updates.
213 * @func: actual update function to be invoked after the grace period
214 *
215 * The update function will be invoked some time after a full grace
216 * period elapses, in other words after all currently executing RCU
217 * read-side critical sections have completed. call_rcu_bh() assumes
218 * that the read-side critical sections end on completion of a softirq
219 * handler. This means that read-side critical sections in process
220 * context must not be interrupted by softirqs. This interface is to be
221 * used when most of the read-side critical sections are in softirq context.
222 * RCU read-side critical sections are delimited by :
223 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
224 * OR
225 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
226 * These may be nested.
227 */
228extern void call_rcu_bh(struct rcu_head *head,
229 void (*func)(struct rcu_head *head));
230
231/* Exported common interfaces */
232extern void synchronize_rcu(void);
233extern void rcu_barrier(void);
297 234
235/* Internal to kernel */
298extern void rcu_init(void); 236extern void rcu_init(void);
299extern void rcu_check_callbacks(int cpu, int user); 237extern void rcu_check_callbacks(int cpu, int user);
300extern void rcu_restart_cpu(int cpu); 238
301extern long rcu_batches_completed(void); 239extern long rcu_batches_completed(void);
302extern long rcu_batches_completed_bh(void); 240extern long rcu_batches_completed_bh(void);
303 241
304/* Exported interfaces */
305extern void FASTCALL(call_rcu(struct rcu_head *head,
306 void (*func)(struct rcu_head *head)));
307extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
308 void (*func)(struct rcu_head *head)));
309extern void synchronize_rcu(void);
310extern void rcu_barrier(void);
311
312#endif /* __KERNEL__ */ 242#endif /* __KERNEL__ */
313#endif /* __LINUX_RCUPDATE_H */ 243#endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index dfa96956dae0..def5dd6097a0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12 utsname.o notifier.o 12 utsname.o notifier.o
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
new file mode 100644
index 000000000000..18369e3386e2
--- /dev/null
+++ b/kernel/rcuclassic.c
@@ -0,0 +1,576 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com>
22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers:
26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 *
29 * For detailed explanation of Read-Copy Update mechanism see -
30 * Documentation/RCU
31 *
32 */
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h>
40#include <linux/sched.h>
41#include <asm/atomic.h>
42#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h>
47#include <linux/notifier.h>
48/* #include <linux/rcupdate.h> @@@ */
49#include <linux/cpu.h>
50#include <linux/mutex.h>
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC
53static struct lock_class_key rcu_lock_key;
54struct lockdep_map rcu_lock_map =
55 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59
60/* Definition for rcupdate control block. */
61static struct rcu_ctrlblk rcu_ctrlblk = {
62 .cur = -300,
63 .completed = -300,
64 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
65 .cpumask = CPU_MASK_NONE,
66};
67static struct rcu_ctrlblk rcu_bh_ctrlblk = {
68 .cur = -300,
69 .completed = -300,
70 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
71 .cpumask = CPU_MASK_NONE,
72};
73
74DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
75DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
76
77static int blimit = 10;
78static int qhimark = 10000;
79static int qlowmark = 100;
80
81#ifdef CONFIG_SMP
82static void force_quiescent_state(struct rcu_data *rdp,
83 struct rcu_ctrlblk *rcp)
84{
85 int cpu;
86 cpumask_t cpumask;
87 set_need_resched();
88 if (unlikely(!rcp->signaled)) {
89 rcp->signaled = 1;
90 /*
91 * Don't send IPI to itself. With irqs disabled,
92 * rdp->cpu is the current cpu.
93 */
94 cpumask = rcp->cpumask;
95 cpu_clear(rdp->cpu, cpumask);
96 for_each_cpu_mask(cpu, cpumask)
97 smp_send_reschedule(cpu);
98 }
99}
100#else
101static inline void force_quiescent_state(struct rcu_data *rdp,
102 struct rcu_ctrlblk *rcp)
103{
104 set_need_resched();
105}
106#endif
107
108/**
109 * call_rcu - Queue an RCU callback for invocation after a grace period.
110 * @head: structure to be used for queueing the RCU updates.
111 * @func: actual update function to be invoked after the grace period
112 *
113 * The update function will be invoked some time after a full grace
114 * period elapses, in other words after all currently executing RCU
115 * read-side critical sections have completed. RCU read-side critical
116 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
117 * and may be nested.
118 */
119void call_rcu(struct rcu_head *head,
120 void (*func)(struct rcu_head *rcu))
121{
122 unsigned long flags;
123 struct rcu_data *rdp;
124
125 head->func = func;
126 head->next = NULL;
127 local_irq_save(flags);
128 rdp = &__get_cpu_var(rcu_data);
129 *rdp->nxttail = head;
130 rdp->nxttail = &head->next;
131 if (unlikely(++rdp->qlen > qhimark)) {
132 rdp->blimit = INT_MAX;
133 force_quiescent_state(rdp, &rcu_ctrlblk);
134 }
135 local_irq_restore(flags);
136}
137EXPORT_SYMBOL_GPL(call_rcu);
138
139/**
140 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
141 * @head: structure to be used for queueing the RCU updates.
142 * @func: actual update function to be invoked after the grace period
143 *
144 * The update function will be invoked some time after a full grace
145 * period elapses, in other words after all currently executing RCU
146 * read-side critical sections have completed. call_rcu_bh() assumes
147 * that the read-side critical sections end on completion of a softirq
148 * handler. This means that read-side critical sections in process
149 * context must not be interrupted by softirqs. This interface is to be
150 * used when most of the read-side critical sections are in softirq context.
151 * RCU read-side critical sections are delimited by rcu_read_lock() and
152 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
153 * and rcu_read_unlock_bh(), if in process context. These may be nested.
154 */
155void call_rcu_bh(struct rcu_head *head,
156 void (*func)(struct rcu_head *rcu))
157{
158 unsigned long flags;
159 struct rcu_data *rdp;
160
161 head->func = func;
162 head->next = NULL;
163 local_irq_save(flags);
164 rdp = &__get_cpu_var(rcu_bh_data);
165 *rdp->nxttail = head;
166 rdp->nxttail = &head->next;
167
168 if (unlikely(++rdp->qlen > qhimark)) {
169 rdp->blimit = INT_MAX;
170 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
171 }
172
173 local_irq_restore(flags);
174}
175EXPORT_SYMBOL_GPL(call_rcu_bh);
176
177/*
178 * Return the number of RCU batches processed thus far. Useful
179 * for debug and statistics.
180 */
181long rcu_batches_completed(void)
182{
183 return rcu_ctrlblk.completed;
184}
185EXPORT_SYMBOL_GPL(rcu_batches_completed);
186
187/*
188 * Return the number of RCU batches processed thus far. Useful
189 * for debug and statistics.
190 */
191long rcu_batches_completed_bh(void)
192{
193 return rcu_bh_ctrlblk.completed;
194}
195EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
196
197/* Raises the softirq for processing rcu_callbacks. */
198static inline void raise_rcu_softirq(void)
199{
200 raise_softirq(RCU_SOFTIRQ);
201 /*
202 * The smp_mb() here is required to ensure that this cpu's
203 * __rcu_process_callbacks() reads the most recently updated
204 * value of rcu->cur.
205 */
206 smp_mb();
207}
208
209/*
210 * Invoke the completed RCU callbacks. They are expected to be in
211 * a per-cpu list.
212 */
213static void rcu_do_batch(struct rcu_data *rdp)
214{
215 struct rcu_head *next, *list;
216 int count = 0;
217
218 list = rdp->donelist;
219 while (list) {
220 next = list->next;
221 prefetch(next);
222 list->func(list);
223 list = next;
224 if (++count >= rdp->blimit)
225 break;
226 }
227 rdp->donelist = list;
228
229 local_irq_disable();
230 rdp->qlen -= count;
231 local_irq_enable();
232 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
233 rdp->blimit = blimit;
234
235 if (!rdp->donelist)
236 rdp->donetail = &rdp->donelist;
237 else
238 raise_rcu_softirq();
239}
240
241/*
242 * Grace period handling:
243 * The grace period handling consists out of two steps:
244 * - A new grace period is started.
245 * This is done by rcu_start_batch. The start is not broadcasted to
246 * all cpus, they must pick this up by comparing rcp->cur with
247 * rdp->quiescbatch. All cpus are recorded in the
248 * rcu_ctrlblk.cpumask bitmap.
249 * - All cpus must go through a quiescent state.
250 * Since the start of the grace period is not broadcasted, at least two
251 * calls to rcu_check_quiescent_state are required:
252 * The first call just notices that a new grace period is running. The
253 * following calls check if there was a quiescent state since the beginning
254 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
255 * the bitmap is empty, then the grace period is completed.
256 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
257 * period (if necessary).
258 */
259/*
260 * Register a new batch of callbacks, and start it up if there is currently no
261 * active batch and the batch to be registered has not already occurred.
262 * Caller must hold rcu_ctrlblk.lock.
263 */
264static void rcu_start_batch(struct rcu_ctrlblk *rcp)
265{
266 if (rcp->next_pending &&
267 rcp->completed == rcp->cur) {
268 rcp->next_pending = 0;
269 /*
270 * next_pending == 0 must be visible in
271 * __rcu_process_callbacks() before it can see new value of cur.
272 */
273 smp_wmb();
274 rcp->cur++;
275
276 /*
277 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
278 * Barrier Otherwise it can cause tickless idle CPUs to be
279 * included in rcp->cpumask, which will extend graceperiods
280 * unnecessarily.
281 */
282 smp_mb();
283 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
284
285 rcp->signaled = 0;
286 }
287}
288
289/*
290 * cpu went through a quiescent state since the beginning of the grace period.
291 * Clear it from the cpu mask and complete the grace period if it was the last
292 * cpu. Start another grace period if someone has further entries pending
293 */
294static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
295{
296 cpu_clear(cpu, rcp->cpumask);
297 if (cpus_empty(rcp->cpumask)) {
298 /* batch completed ! */
299 rcp->completed = rcp->cur;
300 rcu_start_batch(rcp);
301 }
302}
303
304/*
305 * Check if the cpu has gone through a quiescent state (say context
306 * switch). If so and if it already hasn't done so in this RCU
307 * quiescent cycle, then indicate that it has done so.
308 */
309static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
310 struct rcu_data *rdp)
311{
312 if (rdp->quiescbatch != rcp->cur) {
313 /* start new grace period: */
314 rdp->qs_pending = 1;
315 rdp->passed_quiesc = 0;
316 rdp->quiescbatch = rcp->cur;
317 return;
318 }
319
320 /* Grace period already completed for this cpu?
321 * qs_pending is checked instead of the actual bitmap to avoid
322 * cacheline trashing.
323 */
324 if (!rdp->qs_pending)
325 return;
326
327 /*
328 * Was there a quiescent state since the beginning of the grace
329 * period? If no, then exit and wait for the next call.
330 */
331 if (!rdp->passed_quiesc)
332 return;
333 rdp->qs_pending = 0;
334
335 spin_lock(&rcp->lock);
336 /*
337 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
338 * during cpu startup. Ignore the quiescent state.
339 */
340 if (likely(rdp->quiescbatch == rcp->cur))
341 cpu_quiet(rdp->cpu, rcp);
342
343 spin_unlock(&rcp->lock);
344}
345
346
347#ifdef CONFIG_HOTPLUG_CPU
348
349/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
350 * locking requirements, the list it's pulling from has to belong to a cpu
351 * which is dead and hence not processing interrupts.
352 */
353static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
354 struct rcu_head **tail)
355{
356 local_irq_disable();
357 *this_rdp->nxttail = list;
358 if (list)
359 this_rdp->nxttail = tail;
360 local_irq_enable();
361}
362
363static void __rcu_offline_cpu(struct rcu_data *this_rdp,
364 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
365{
366 /* if the cpu going offline owns the grace period
367 * we can block indefinitely waiting for it, so flush
368 * it here
369 */
370 spin_lock_bh(&rcp->lock);
371 if (rcp->cur != rcp->completed)
372 cpu_quiet(rdp->cpu, rcp);
373 spin_unlock_bh(&rcp->lock);
374 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
375 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
376 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
377}
378
379static void rcu_offline_cpu(int cpu)
380{
381 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
382 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
383
384 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
385 &per_cpu(rcu_data, cpu));
386 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
387 &per_cpu(rcu_bh_data, cpu));
388 put_cpu_var(rcu_data);
389 put_cpu_var(rcu_bh_data);
390}
391
392#else
393
394static void rcu_offline_cpu(int cpu)
395{
396}
397
398#endif
399
400/*
401 * This does the RCU processing work from softirq context.
402 */
403static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
404 struct rcu_data *rdp)
405{
406 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
407 *rdp->donetail = rdp->curlist;
408 rdp->donetail = rdp->curtail;
409 rdp->curlist = NULL;
410 rdp->curtail = &rdp->curlist;
411 }
412
413 if (rdp->nxtlist && !rdp->curlist) {
414 local_irq_disable();
415 rdp->curlist = rdp->nxtlist;
416 rdp->curtail = rdp->nxttail;
417 rdp->nxtlist = NULL;
418 rdp->nxttail = &rdp->nxtlist;
419 local_irq_enable();
420
421 /*
422 * start the next batch of callbacks
423 */
424
425 /* determine batch number */
426 rdp->batch = rcp->cur + 1;
427 /* see the comment and corresponding wmb() in
428 * the rcu_start_batch()
429 */
430 smp_rmb();
431
432 if (!rcp->next_pending) {
433 /* and start it/schedule start if it's a new batch */
434 spin_lock(&rcp->lock);
435 rcp->next_pending = 1;
436 rcu_start_batch(rcp);
437 spin_unlock(&rcp->lock);
438 }
439 }
440
441 rcu_check_quiescent_state(rcp, rdp);
442 if (rdp->donelist)
443 rcu_do_batch(rdp);
444}
445
446static void rcu_process_callbacks(struct softirq_action *unused)
447{
448 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
449 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
450}
451
452static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
453{
454 /* This cpu has pending rcu entries and the grace period
455 * for them has completed.
456 */
457 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
458 return 1;
459
460 /* This cpu has no pending entries, but there are new entries */
461 if (!rdp->curlist && rdp->nxtlist)
462 return 1;
463
464 /* This cpu has finished callbacks to invoke */
465 if (rdp->donelist)
466 return 1;
467
468 /* The rcu core waits for a quiescent state from the cpu */
469 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
470 return 1;
471
472 /* nothing to do */
473 return 0;
474}
475
476/*
477 * Check to see if there is any immediate RCU-related work to be done
478 * by the current CPU, returning 1 if so. This function is part of the
479 * RCU implementation; it is -not- an exported member of the RCU API.
480 */
481int rcu_pending(int cpu)
482{
483 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
484 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
485}
486
487/*
488 * Check to see if any future RCU-related work will need to be done
489 * by the current CPU, even if none need be done immediately, returning
490 * 1 if so. This function is part of the RCU implementation; it is -not-
491 * an exported member of the RCU API.
492 */
493int rcu_needs_cpu(int cpu)
494{
495 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
496 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
497
498 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
499}
500
501void rcu_check_callbacks(int cpu, int user)
502{
503 if (user ||
504 (idle_cpu(cpu) && !in_softirq() &&
505 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
506 rcu_qsctr_inc(cpu);
507 rcu_bh_qsctr_inc(cpu);
508 } else if (!in_softirq())
509 rcu_bh_qsctr_inc(cpu);
510 raise_rcu_softirq();
511}
512
513static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
514 struct rcu_data *rdp)
515{
516 memset(rdp, 0, sizeof(*rdp));
517 rdp->curtail = &rdp->curlist;
518 rdp->nxttail = &rdp->nxtlist;
519 rdp->donetail = &rdp->donelist;
520 rdp->quiescbatch = rcp->completed;
521 rdp->qs_pending = 0;
522 rdp->cpu = cpu;
523 rdp->blimit = blimit;
524}
525
526static void __cpuinit rcu_online_cpu(int cpu)
527{
528 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
529 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
530
531 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
532 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
533 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
534}
535
536static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
537 unsigned long action, void *hcpu)
538{
539 long cpu = (long)hcpu;
540
541 switch (action) {
542 case CPU_UP_PREPARE:
543 case CPU_UP_PREPARE_FROZEN:
544 rcu_online_cpu(cpu);
545 break;
546 case CPU_DEAD:
547 case CPU_DEAD_FROZEN:
548 rcu_offline_cpu(cpu);
549 break;
550 default:
551 break;
552 }
553 return NOTIFY_OK;
554}
555
556static struct notifier_block __cpuinitdata rcu_nb = {
557 .notifier_call = rcu_cpu_notify,
558};
559
560/*
561 * Initializes rcu mechanism. Assumed to be called early.
562 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
563 * Note that rcu_qsctr and friends are implicitly
564 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
565 */
566void __init __rcu_init(void)
567{
568 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
569 (void *)(long)smp_processor_id());
570 /* Register notifier for non-boot CPUs */
571 register_cpu_notifier(&rcu_nb);
572}
573
574module_param(blimit, int, 0);
575module_param(qhimark, int, 0);
576module_param(qlowmark, int, 0);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4dfa0b792efa..0ccd0095ebdc 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -15,7 +15,7 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright (C) IBM Corporation, 2001 18 * Copyright IBM Corporation, 2001
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
@@ -35,163 +35,57 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h> 38#include <linux/interrupt.h>
40#include <linux/sched.h> 39#include <linux/sched.h>
41#include <asm/atomic.h> 40#include <asm/atomic.h>
42#include <linux/bitops.h> 41#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h> 42#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h> 43#include <linux/percpu.h>
47#include <linux/notifier.h> 44#include <linux/notifier.h>
48#include <linux/cpu.h> 45#include <linux/cpu.h>
49#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/module.h>
50 48
51#ifdef CONFIG_DEBUG_LOCK_ALLOC 49struct rcu_synchronize {
52static struct lock_class_key rcu_lock_key; 50 struct rcu_head head;
53struct lockdep_map rcu_lock_map = 51 struct completion completion;
54 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
55
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59/* Definition for rcupdate control block. */
60static struct rcu_ctrlblk rcu_ctrlblk = {
61 .cur = -300,
62 .completed = -300,
63 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
64 .cpumask = CPU_MASK_NONE,
65};
66static struct rcu_ctrlblk rcu_bh_ctrlblk = {
67 .cur = -300,
68 .completed = -300,
69 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
70 .cpumask = CPU_MASK_NONE,
71}; 52};
72 53
73DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
74DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
75
76static int blimit = 10;
77static int qhimark = 10000;
78static int qlowmark = 100;
79
80static atomic_t rcu_barrier_cpu_count; 55static atomic_t rcu_barrier_cpu_count;
81static DEFINE_MUTEX(rcu_barrier_mutex); 56static DEFINE_MUTEX(rcu_barrier_mutex);
82static struct completion rcu_barrier_completion; 57static struct completion rcu_barrier_completion;
83 58
84#ifdef CONFIG_SMP 59/* Because of FASTCALL declaration of complete, we use this wrapper */
85static void force_quiescent_state(struct rcu_data *rdp, 60static void wakeme_after_rcu(struct rcu_head *head)
86 struct rcu_ctrlblk *rcp)
87{
88 int cpu;
89 cpumask_t cpumask;
90 set_need_resched();
91 if (unlikely(!rcp->signaled)) {
92 rcp->signaled = 1;
93 /*
94 * Don't send IPI to itself. With irqs disabled,
95 * rdp->cpu is the current cpu.
96 */
97 cpumask = rcp->cpumask;
98 cpu_clear(rdp->cpu, cpumask);
99 for_each_cpu_mask(cpu, cpumask)
100 smp_send_reschedule(cpu);
101 }
102}
103#else
104static inline void force_quiescent_state(struct rcu_data *rdp,
105 struct rcu_ctrlblk *rcp)
106{ 61{
107 set_need_resched(); 62 struct rcu_synchronize *rcu;
63
64 rcu = container_of(head, struct rcu_synchronize, head);
65 complete(&rcu->completion);
108} 66}
109#endif
110 67
111/** 68/**
112 * call_rcu - Queue an RCU callback for invocation after a grace period. 69 * synchronize_rcu - wait until a grace period has elapsed.
113 * @head: structure to be used for queueing the RCU updates.
114 * @func: actual update function to be invoked after the grace period
115 * 70 *
116 * The update function will be invoked some time after a full grace 71 * Control will return to the caller some time after a full grace
117 * period elapses, in other words after all currently executing RCU 72 * period has elapsed, in other words after all currently executing RCU
118 * read-side critical sections have completed. RCU read-side critical 73 * read-side critical sections have completed. RCU read-side critical
119 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 74 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
120 * and may be nested. 75 * and may be nested.
121 */ 76 */
122void fastcall call_rcu(struct rcu_head *head, 77void synchronize_rcu(void)
123 void (*func)(struct rcu_head *rcu))
124{
125 unsigned long flags;
126 struct rcu_data *rdp;
127
128 head->func = func;
129 head->next = NULL;
130 local_irq_save(flags);
131 rdp = &__get_cpu_var(rcu_data);
132 *rdp->nxttail = head;
133 rdp->nxttail = &head->next;
134 if (unlikely(++rdp->qlen > qhimark)) {
135 rdp->blimit = INT_MAX;
136 force_quiescent_state(rdp, &rcu_ctrlblk);
137 }
138 local_irq_restore(flags);
139}
140
141/**
142 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
143 * @head: structure to be used for queueing the RCU updates.
144 * @func: actual update function to be invoked after the grace period
145 *
146 * The update function will be invoked some time after a full grace
147 * period elapses, in other words after all currently executing RCU
148 * read-side critical sections have completed. call_rcu_bh() assumes
149 * that the read-side critical sections end on completion of a softirq
150 * handler. This means that read-side critical sections in process
151 * context must not be interrupted by softirqs. This interface is to be
152 * used when most of the read-side critical sections are in softirq context.
153 * RCU read-side critical sections are delimited by rcu_read_lock() and
154 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
155 * and rcu_read_unlock_bh(), if in process context. These may be nested.
156 */
157void fastcall call_rcu_bh(struct rcu_head *head,
158 void (*func)(struct rcu_head *rcu))
159{ 78{
160 unsigned long flags; 79 struct rcu_synchronize rcu;
161 struct rcu_data *rdp;
162
163 head->func = func;
164 head->next = NULL;
165 local_irq_save(flags);
166 rdp = &__get_cpu_var(rcu_bh_data);
167 *rdp->nxttail = head;
168 rdp->nxttail = &head->next;
169
170 if (unlikely(++rdp->qlen > qhimark)) {
171 rdp->blimit = INT_MAX;
172 force_quiescent_state(rdp, &rcu_bh_ctrlblk);
173 }
174
175 local_irq_restore(flags);
176}
177 80
178/* 81 init_completion(&rcu.completion);
179 * Return the number of RCU batches processed thus far. Useful 82 /* Will wake me after RCU finished */
180 * for debug and statistics. 83 call_rcu(&rcu.head, wakeme_after_rcu);
181 */
182long rcu_batches_completed(void)
183{
184 return rcu_ctrlblk.completed;
185}
186 84
187/* 85 /* Wait for it */
188 * Return the number of RCU batches processed thus far. Useful 86 wait_for_completion(&rcu.completion);
189 * for debug and statistics.
190 */
191long rcu_batches_completed_bh(void)
192{
193 return rcu_bh_ctrlblk.completed;
194} 87}
88EXPORT_SYMBOL_GPL(synchronize_rcu);
195 89
196static void rcu_barrier_callback(struct rcu_head *notused) 90static void rcu_barrier_callback(struct rcu_head *notused)
197{ 91{
@@ -205,10 +99,8 @@ static void rcu_barrier_callback(struct rcu_head *notused)
205static void rcu_barrier_func(void *notused) 99static void rcu_barrier_func(void *notused)
206{ 100{
207 int cpu = smp_processor_id(); 101 int cpu = smp_processor_id();
208 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); 102 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
209 struct rcu_head *head;
210 103
211 head = &rdp->barrier;
212 atomic_inc(&rcu_barrier_cpu_count); 104 atomic_inc(&rcu_barrier_cpu_count);
213 call_rcu(head, rcu_barrier_callback); 105 call_rcu(head, rcu_barrier_callback);
214} 106}
@@ -229,425 +121,8 @@ void rcu_barrier(void)
229} 121}
230EXPORT_SYMBOL_GPL(rcu_barrier); 122EXPORT_SYMBOL_GPL(rcu_barrier);
231 123
232/* Raises the softirq for processing rcu_callbacks. */
233static inline void raise_rcu_softirq(void)
234{
235 raise_softirq(RCU_SOFTIRQ);
236 /*
237 * The smp_mb() here is required to ensure that this cpu's
238 * __rcu_process_callbacks() reads the most recently updated
239 * value of rcu->cur.
240 */
241 smp_mb();
242}
243
244/*
245 * Invoke the completed RCU callbacks. They are expected to be in
246 * a per-cpu list.
247 */
248static void rcu_do_batch(struct rcu_data *rdp)
249{
250 struct rcu_head *next, *list;
251 int count = 0;
252
253 list = rdp->donelist;
254 while (list) {
255 next = list->next;
256 prefetch(next);
257 list->func(list);
258 list = next;
259 if (++count >= rdp->blimit)
260 break;
261 }
262 rdp->donelist = list;
263
264 local_irq_disable();
265 rdp->qlen -= count;
266 local_irq_enable();
267 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
268 rdp->blimit = blimit;
269
270 if (!rdp->donelist)
271 rdp->donetail = &rdp->donelist;
272 else
273 raise_rcu_softirq();
274}
275
276/*
277 * Grace period handling:
278 * The grace period handling consists out of two steps:
279 * - A new grace period is started.
280 * This is done by rcu_start_batch. The start is not broadcasted to
281 * all cpus, they must pick this up by comparing rcp->cur with
282 * rdp->quiescbatch. All cpus are recorded in the
283 * rcu_ctrlblk.cpumask bitmap.
284 * - All cpus must go through a quiescent state.
285 * Since the start of the grace period is not broadcasted, at least two
286 * calls to rcu_check_quiescent_state are required:
287 * The first call just notices that a new grace period is running. The
288 * following calls check if there was a quiescent state since the beginning
289 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
290 * the bitmap is empty, then the grace period is completed.
291 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
292 * period (if necessary).
293 */
294/*
295 * Register a new batch of callbacks, and start it up if there is currently no
296 * active batch and the batch to be registered has not already occurred.
297 * Caller must hold rcu_ctrlblk.lock.
298 */
299static void rcu_start_batch(struct rcu_ctrlblk *rcp)
300{
301 if (rcp->next_pending &&
302 rcp->completed == rcp->cur) {
303 rcp->next_pending = 0;
304 /*
305 * next_pending == 0 must be visible in
306 * __rcu_process_callbacks() before it can see new value of cur.
307 */
308 smp_wmb();
309 rcp->cur++;
310
311 /*
312 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
313 * Barrier Otherwise it can cause tickless idle CPUs to be
314 * included in rcp->cpumask, which will extend graceperiods
315 * unnecessarily.
316 */
317 smp_mb();
318 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
319
320 rcp->signaled = 0;
321 }
322}
323
324/*
325 * cpu went through a quiescent state since the beginning of the grace period.
326 * Clear it from the cpu mask and complete the grace period if it was the last
327 * cpu. Start another grace period if someone has further entries pending
328 */
329static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
330{
331 cpu_clear(cpu, rcp->cpumask);
332 if (cpus_empty(rcp->cpumask)) {
333 /* batch completed ! */
334 rcp->completed = rcp->cur;
335 rcu_start_batch(rcp);
336 }
337}
338
339/*
340 * Check if the cpu has gone through a quiescent state (say context
341 * switch). If so and if it already hasn't done so in this RCU
342 * quiescent cycle, then indicate that it has done so.
343 */
344static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
345 struct rcu_data *rdp)
346{
347 if (rdp->quiescbatch != rcp->cur) {
348 /* start new grace period: */
349 rdp->qs_pending = 1;
350 rdp->passed_quiesc = 0;
351 rdp->quiescbatch = rcp->cur;
352 return;
353 }
354
355 /* Grace period already completed for this cpu?
356 * qs_pending is checked instead of the actual bitmap to avoid
357 * cacheline trashing.
358 */
359 if (!rdp->qs_pending)
360 return;
361
362 /*
363 * Was there a quiescent state since the beginning of the grace
364 * period? If no, then exit and wait for the next call.
365 */
366 if (!rdp->passed_quiesc)
367 return;
368 rdp->qs_pending = 0;
369
370 spin_lock(&rcp->lock);
371 /*
372 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
373 * during cpu startup. Ignore the quiescent state.
374 */
375 if (likely(rdp->quiescbatch == rcp->cur))
376 cpu_quiet(rdp->cpu, rcp);
377
378 spin_unlock(&rcp->lock);
379}
380
381
382#ifdef CONFIG_HOTPLUG_CPU
383
384/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
385 * locking requirements, the list it's pulling from has to belong to a cpu
386 * which is dead and hence not processing interrupts.
387 */
388static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
389 struct rcu_head **tail)
390{
391 local_irq_disable();
392 *this_rdp->nxttail = list;
393 if (list)
394 this_rdp->nxttail = tail;
395 local_irq_enable();
396}
397
398static void __rcu_offline_cpu(struct rcu_data *this_rdp,
399 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
400{
401 /* if the cpu going offline owns the grace period
402 * we can block indefinitely waiting for it, so flush
403 * it here
404 */
405 spin_lock_bh(&rcp->lock);
406 if (rcp->cur != rcp->completed)
407 cpu_quiet(rdp->cpu, rcp);
408 spin_unlock_bh(&rcp->lock);
409 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
410 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
411 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
412}
413
414static void rcu_offline_cpu(int cpu)
415{
416 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
417 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
418
419 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
420 &per_cpu(rcu_data, cpu));
421 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
422 &per_cpu(rcu_bh_data, cpu));
423 put_cpu_var(rcu_data);
424 put_cpu_var(rcu_bh_data);
425}
426
427#else
428
429static void rcu_offline_cpu(int cpu)
430{
431}
432
433#endif
434
435/*
436 * This does the RCU processing work from softirq context.
437 */
438static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
439 struct rcu_data *rdp)
440{
441 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
442 *rdp->donetail = rdp->curlist;
443 rdp->donetail = rdp->curtail;
444 rdp->curlist = NULL;
445 rdp->curtail = &rdp->curlist;
446 }
447
448 if (rdp->nxtlist && !rdp->curlist) {
449 local_irq_disable();
450 rdp->curlist = rdp->nxtlist;
451 rdp->curtail = rdp->nxttail;
452 rdp->nxtlist = NULL;
453 rdp->nxttail = &rdp->nxtlist;
454 local_irq_enable();
455
456 /*
457 * start the next batch of callbacks
458 */
459
460 /* determine batch number */
461 rdp->batch = rcp->cur + 1;
462 /* see the comment and corresponding wmb() in
463 * the rcu_start_batch()
464 */
465 smp_rmb();
466
467 if (!rcp->next_pending) {
468 /* and start it/schedule start if it's a new batch */
469 spin_lock(&rcp->lock);
470 rcp->next_pending = 1;
471 rcu_start_batch(rcp);
472 spin_unlock(&rcp->lock);
473 }
474 }
475
476 rcu_check_quiescent_state(rcp, rdp);
477 if (rdp->donelist)
478 rcu_do_batch(rdp);
479}
480
481static void rcu_process_callbacks(struct softirq_action *unused)
482{
483 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
484 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
485}
486
487static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
488{
489 /* This cpu has pending rcu entries and the grace period
490 * for them has completed.
491 */
492 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
493 return 1;
494
495 /* This cpu has no pending entries, but there are new entries */
496 if (!rdp->curlist && rdp->nxtlist)
497 return 1;
498
499 /* This cpu has finished callbacks to invoke */
500 if (rdp->donelist)
501 return 1;
502
503 /* The rcu core waits for a quiescent state from the cpu */
504 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
505 return 1;
506
507 /* nothing to do */
508 return 0;
509}
510
511/*
512 * Check to see if there is any immediate RCU-related work to be done
513 * by the current CPU, returning 1 if so. This function is part of the
514 * RCU implementation; it is -not- an exported member of the RCU API.
515 */
516int rcu_pending(int cpu)
517{
518 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
519 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
520}
521
522/*
523 * Check to see if any future RCU-related work will need to be done
524 * by the current CPU, even if none need be done immediately, returning
525 * 1 if so. This function is part of the RCU implementation; it is -not-
526 * an exported member of the RCU API.
527 */
528int rcu_needs_cpu(int cpu)
529{
530 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
531 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
532
533 return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
534}
535
536void rcu_check_callbacks(int cpu, int user)
537{
538 if (user ||
539 (idle_cpu(cpu) && !in_softirq() &&
540 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
541 rcu_qsctr_inc(cpu);
542 rcu_bh_qsctr_inc(cpu);
543 } else if (!in_softirq())
544 rcu_bh_qsctr_inc(cpu);
545 raise_rcu_softirq();
546}
547
548static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
549 struct rcu_data *rdp)
550{
551 memset(rdp, 0, sizeof(*rdp));
552 rdp->curtail = &rdp->curlist;
553 rdp->nxttail = &rdp->nxtlist;
554 rdp->donetail = &rdp->donelist;
555 rdp->quiescbatch = rcp->completed;
556 rdp->qs_pending = 0;
557 rdp->cpu = cpu;
558 rdp->blimit = blimit;
559}
560
561static void __cpuinit rcu_online_cpu(int cpu)
562{
563 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
564 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
565
566 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
567 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
568 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
569}
570
571static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
572 unsigned long action, void *hcpu)
573{
574 long cpu = (long)hcpu;
575 switch (action) {
576 case CPU_UP_PREPARE:
577 case CPU_UP_PREPARE_FROZEN:
578 rcu_online_cpu(cpu);
579 break;
580 case CPU_DEAD:
581 case CPU_DEAD_FROZEN:
582 rcu_offline_cpu(cpu);
583 break;
584 default:
585 break;
586 }
587 return NOTIFY_OK;
588}
589
590static struct notifier_block __cpuinitdata rcu_nb = {
591 .notifier_call = rcu_cpu_notify,
592};
593
594/*
595 * Initializes rcu mechanism. Assumed to be called early.
596 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
597 * Note that rcu_qsctr and friends are implicitly
598 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
599 */
600void __init rcu_init(void) 124void __init rcu_init(void)
601{ 125{
602 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, 126 __rcu_init();
603 (void *)(long)smp_processor_id());
604 /* Register notifier for non-boot CPUs */
605 register_cpu_notifier(&rcu_nb);
606}
607
608struct rcu_synchronize {
609 struct rcu_head head;
610 struct completion completion;
611};
612
613/* Because of FASTCALL declaration of complete, we use this wrapper */
614static void wakeme_after_rcu(struct rcu_head *head)
615{
616 struct rcu_synchronize *rcu;
617
618 rcu = container_of(head, struct rcu_synchronize, head);
619 complete(&rcu->completion);
620}
621
622/**
623 * synchronize_rcu - wait until a grace period has elapsed.
624 *
625 * Control will return to the caller some time after a full grace
626 * period has elapsed, in other words after all currently executing RCU
627 * read-side critical sections have completed. RCU read-side critical
628 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
629 * and may be nested.
630 *
631 * If your read-side code is not protected by rcu_read_lock(), do -not-
632 * use synchronize_rcu().
633 */
634void synchronize_rcu(void)
635{
636 struct rcu_synchronize rcu;
637
638 init_completion(&rcu.completion);
639 /* Will wake me after RCU finished */
640 call_rcu(&rcu.head, wakeme_after_rcu);
641
642 /* Wait for it */
643 wait_for_completion(&rcu.completion);
644} 127}
645 128
646module_param(blimit, int, 0);
647module_param(qhimark, int, 0);
648module_param(qlowmark, int, 0);
649EXPORT_SYMBOL_GPL(rcu_batches_completed);
650EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
651EXPORT_SYMBOL_GPL(call_rcu);
652EXPORT_SYMBOL_GPL(call_rcu_bh);
653EXPORT_SYMBOL_GPL(synchronize_rcu);