diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2008-01-25 15:08:24 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:24 -0500 |
commit | e260be673a15b6125068270e0216a3bfbfc12f87 (patch) | |
tree | f50760606d395bf6faa9e865f814761a3c88d32c /kernel | |
parent | e0ecfa7917cafe72f4a75f87e8bb5d8d51dc534f (diff) |
Preempt-RCU: implementation
This patch implements a new version of RCU which allows its read-side
critical sections to be preempted. It uses a set of counter pairs
to keep track of the read-side critical sections and flips them
when all tasks exit read-side critical section. The details
of this implementation can be found in this paper -
http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf
and the article-
http://lwn.net/Articles/253651/
This patch was developed as a part of the -rt kernel development and
meant to provide better latencies when read-side critical sections of
RCU don't disable preemption. As a consequence of keeping track of RCU
readers, the readers have a slight overhead (optimizations in the paper).
This implementation co-exists with the "classic" RCU implementations
and can be switched to at compiler.
Also includes RCU tracing summarized in debugfs.
[ akpm@linux-foundation.org: build fixes on non-preempt architectures ]
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Reviewed-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.preempt | 10 | ||||
-rw-r--r-- | kernel/Makefile | 7 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/rcuclassic.c | 1 | ||||
-rw-r--r-- | kernel/rcupreempt.c | 816 | ||||
-rw-r--r-- | kernel/rcupreempt_trace.c | 330 |
6 files changed, 1166 insertions, 2 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c64ce9c14207..61fa116efcde 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt | |||
@@ -63,3 +63,13 @@ config PREEMPT_BKL | |||
63 | Say Y here if you are building a kernel for a desktop system. | 63 | Say Y here if you are building a kernel for a desktop system. |
64 | Say N if you are unsure. | 64 | Say N if you are unsure. |
65 | 65 | ||
66 | config RCU_TRACE | ||
67 | bool "Enable tracing for RCU - currently stats in debugfs" | ||
68 | select DEBUG_FS | ||
69 | default y | ||
70 | help | ||
71 | This option provides tracing in RCU which presents stats | ||
72 | in debugfs for debugging RCU implementation. | ||
73 | |||
74 | Say Y here if you want to enable RCU tracing | ||
75 | Say N if you are unsure. | ||
diff --git a/kernel/Makefile b/kernel/Makefile index def5dd6097a0..68755cd9a7e4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ |
12 | utsname.o notifier.o | 12 | utsname.o notifier.o |
@@ -52,6 +52,11 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | |||
52 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 52 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
53 | obj-$(CONFIG_SECCOMP) += seccomp.o | 53 | obj-$(CONFIG_SECCOMP) += seccomp.o |
54 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 54 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
55 | obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o | ||
56 | obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o | ||
57 | ifeq ($(CONFIG_PREEMPT_RCU),y) | ||
58 | obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o | ||
59 | endif | ||
55 | obj-$(CONFIG_RELAY) += relay.o | 60 | obj-$(CONFIG_RELAY) += relay.o |
56 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 61 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
57 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 62 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
diff --git a/kernel/fork.c b/kernel/fork.c index 930c51865ab4..9f8ef32cbc7a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1045 | copy_flags(clone_flags, p); | 1045 | copy_flags(clone_flags, p); |
1046 | INIT_LIST_HEAD(&p->children); | 1046 | INIT_LIST_HEAD(&p->children); |
1047 | INIT_LIST_HEAD(&p->sibling); | 1047 | INIT_LIST_HEAD(&p->sibling); |
1048 | #ifdef CONFIG_PREEMPT_RCU | ||
1049 | p->rcu_read_lock_nesting = 0; | ||
1050 | p->rcu_flipctr_idx = 0; | ||
1051 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
1048 | p->vfork_done = NULL; | 1052 | p->vfork_done = NULL; |
1049 | spin_lock_init(&p->alloc_lock); | 1053 | spin_lock_init(&p->alloc_lock); |
1050 | 1054 | ||
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index ce0cf16cab67..f4ffbd0f306f 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include <linux/moduleparam.h> | 45 | #include <linux/moduleparam.h> |
46 | #include <linux/percpu.h> | 46 | #include <linux/percpu.h> |
47 | #include <linux/notifier.h> | 47 | #include <linux/notifier.h> |
48 | /* #include <linux/rcupdate.h> @@@ */ | ||
49 | #include <linux/cpu.h> | 48 | #include <linux/cpu.h> |
50 | #include <linux/mutex.h> | 49 | #include <linux/mutex.h> |
51 | 50 | ||
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c new file mode 100644 index 000000000000..a5aabb1677f8 --- /dev/null +++ b/kernel/rcupreempt.c | |||
@@ -0,0 +1,816 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion, realtime implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2006 | ||
19 | * | ||
20 | * Authors: Paul E. McKenney <paulmck@us.ibm.com> | ||
21 | * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar | ||
22 | * for pushing me away from locks and towards counters, and | ||
23 | * to Suparna Bhattacharya for pushing me completely away | ||
24 | * from atomic instructions on the read side. | ||
25 | * | ||
26 | * Papers: http://www.rdrop.com/users/paulmck/RCU | ||
27 | * | ||
28 | * Design Document: http://lwn.net/Articles/253651/ | ||
29 | * | ||
30 | * For detailed explanation of Read-Copy Update mechanism see - | ||
31 | * Documentation/RCU/ *.txt | ||
32 | * | ||
33 | */ | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/smp.h> | ||
39 | #include <linux/rcupdate.h> | ||
40 | #include <linux/interrupt.h> | ||
41 | #include <linux/sched.h> | ||
42 | #include <asm/atomic.h> | ||
43 | #include <linux/bitops.h> | ||
44 | #include <linux/module.h> | ||
45 | #include <linux/completion.h> | ||
46 | #include <linux/moduleparam.h> | ||
47 | #include <linux/percpu.h> | ||
48 | #include <linux/notifier.h> | ||
49 | #include <linux/rcupdate.h> | ||
50 | #include <linux/cpu.h> | ||
51 | #include <linux/random.h> | ||
52 | #include <linux/delay.h> | ||
53 | #include <linux/byteorder/swabb.h> | ||
54 | #include <linux/cpumask.h> | ||
55 | #include <linux/rcupreempt_trace.h> | ||
56 | |||
57 | /* | ||
58 | * Macro that prevents the compiler from reordering accesses, but does | ||
59 | * absolutely -nothing- to prevent CPUs from reordering. This is used | ||
60 | * only to mediate communication between mainline code and hardware | ||
61 | * interrupt and NMI handlers. | ||
62 | */ | ||
63 | #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) | ||
64 | |||
65 | /* | ||
66 | * PREEMPT_RCU data structures. | ||
67 | */ | ||
68 | |||
69 | /* | ||
70 | * GP_STAGES specifies the number of times the state machine has | ||
71 | * to go through the all the rcu_try_flip_states (see below) | ||
72 | * in a single Grace Period. | ||
73 | * | ||
74 | * GP in GP_STAGES stands for Grace Period ;) | ||
75 | */ | ||
76 | #define GP_STAGES 2 | ||
77 | struct rcu_data { | ||
78 | spinlock_t lock; /* Protect rcu_data fields. */ | ||
79 | long completed; /* Number of last completed batch. */ | ||
80 | int waitlistcount; | ||
81 | struct tasklet_struct rcu_tasklet; | ||
82 | struct rcu_head *nextlist; | ||
83 | struct rcu_head **nexttail; | ||
84 | struct rcu_head *waitlist[GP_STAGES]; | ||
85 | struct rcu_head **waittail[GP_STAGES]; | ||
86 | struct rcu_head *donelist; | ||
87 | struct rcu_head **donetail; | ||
88 | long rcu_flipctr[2]; | ||
89 | #ifdef CONFIG_RCU_TRACE | ||
90 | struct rcupreempt_trace trace; | ||
91 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
92 | }; | ||
93 | |||
94 | /* | ||
95 | * States for rcu_try_flip() and friends. | ||
96 | */ | ||
97 | |||
98 | enum rcu_try_flip_states { | ||
99 | |||
100 | /* | ||
101 | * Stay here if nothing is happening. Flip the counter if somthing | ||
102 | * starts happening. Denoted by "I" | ||
103 | */ | ||
104 | rcu_try_flip_idle_state, | ||
105 | |||
106 | /* | ||
107 | * Wait here for all CPUs to notice that the counter has flipped. This | ||
108 | * prevents the old set of counters from ever being incremented once | ||
109 | * we leave this state, which in turn is necessary because we cannot | ||
110 | * test any individual counter for zero -- we can only check the sum. | ||
111 | * Denoted by "A". | ||
112 | */ | ||
113 | rcu_try_flip_waitack_state, | ||
114 | |||
115 | /* | ||
116 | * Wait here for the sum of the old per-CPU counters to reach zero. | ||
117 | * Denoted by "Z". | ||
118 | */ | ||
119 | rcu_try_flip_waitzero_state, | ||
120 | |||
121 | /* | ||
122 | * Wait here for each of the other CPUs to execute a memory barrier. | ||
123 | * This is necessary to ensure that these other CPUs really have | ||
124 | * completed executing their RCU read-side critical sections, despite | ||
125 | * their CPUs wildly reordering memory. Denoted by "M". | ||
126 | */ | ||
127 | rcu_try_flip_waitmb_state, | ||
128 | }; | ||
129 | |||
130 | struct rcu_ctrlblk { | ||
131 | spinlock_t fliplock; /* Protect state-machine transitions. */ | ||
132 | long completed; /* Number of last completed batch. */ | ||
133 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of | ||
134 | the rcu state machine */ | ||
135 | }; | ||
136 | |||
137 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); | ||
138 | static struct rcu_ctrlblk rcu_ctrlblk = { | ||
139 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), | ||
140 | .completed = 0, | ||
141 | .rcu_try_flip_state = rcu_try_flip_idle_state, | ||
142 | }; | ||
143 | |||
144 | |||
145 | #ifdef CONFIG_RCU_TRACE | ||
146 | static char *rcu_try_flip_state_names[] = | ||
147 | { "idle", "waitack", "waitzero", "waitmb" }; | ||
148 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
149 | |||
150 | /* | ||
151 | * Enum and per-CPU flag to determine when each CPU has seen | ||
152 | * the most recent counter flip. | ||
153 | */ | ||
154 | |||
155 | enum rcu_flip_flag_values { | ||
156 | rcu_flip_seen, /* Steady/initial state, last flip seen. */ | ||
157 | /* Only GP detector can update. */ | ||
158 | rcu_flipped /* Flip just completed, need confirmation. */ | ||
159 | /* Only corresponding CPU can update. */ | ||
160 | }; | ||
161 | static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag) | ||
162 | = rcu_flip_seen; | ||
163 | |||
164 | /* | ||
165 | * Enum and per-CPU flag to determine when each CPU has executed the | ||
166 | * needed memory barrier to fence in memory references from its last RCU | ||
167 | * read-side critical section in the just-completed grace period. | ||
168 | */ | ||
169 | |||
170 | enum rcu_mb_flag_values { | ||
171 | rcu_mb_done, /* Steady/initial state, no mb()s required. */ | ||
172 | /* Only GP detector can update. */ | ||
173 | rcu_mb_needed /* Flip just completed, need an mb(). */ | ||
174 | /* Only corresponding CPU can update. */ | ||
175 | }; | ||
176 | static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) | ||
177 | = rcu_mb_done; | ||
178 | |||
179 | /* | ||
180 | * RCU_DATA_ME: find the current CPU's rcu_data structure. | ||
181 | * RCU_DATA_CPU: find the specified CPU's rcu_data structure. | ||
182 | */ | ||
183 | #define RCU_DATA_ME() (&__get_cpu_var(rcu_data)) | ||
184 | #define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu)) | ||
185 | |||
186 | /* | ||
187 | * Helper macro for tracing when the appropriate rcu_data is not | ||
188 | * cached in a local variable, but where the CPU number is so cached. | ||
189 | */ | ||
190 | #define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace)); | ||
191 | |||
192 | /* | ||
193 | * Helper macro for tracing when the appropriate rcu_data is not | ||
194 | * cached in a local variable. | ||
195 | */ | ||
196 | #define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace)); | ||
197 | |||
198 | /* | ||
199 | * Helper macro for tracing when the appropriate rcu_data is pointed | ||
200 | * to by a local variable. | ||
201 | */ | ||
202 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); | ||
203 | |||
204 | /* | ||
205 | * Return the number of RCU batches processed thus far. Useful | ||
206 | * for debug and statistics. | ||
207 | */ | ||
208 | long rcu_batches_completed(void) | ||
209 | { | ||
210 | return rcu_ctrlblk.completed; | ||
211 | } | ||
212 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | ||
213 | |||
214 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | ||
215 | |||
216 | void __rcu_read_lock(void) | ||
217 | { | ||
218 | int idx; | ||
219 | struct task_struct *t = current; | ||
220 | int nesting; | ||
221 | |||
222 | nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
223 | if (nesting != 0) { | ||
224 | |||
225 | /* An earlier rcu_read_lock() covers us, just count it. */ | ||
226 | |||
227 | t->rcu_read_lock_nesting = nesting + 1; | ||
228 | |||
229 | } else { | ||
230 | unsigned long flags; | ||
231 | |||
232 | /* | ||
233 | * We disable interrupts for the following reasons: | ||
234 | * - If we get scheduling clock interrupt here, and we | ||
235 | * end up acking the counter flip, it's like a promise | ||
236 | * that we will never increment the old counter again. | ||
237 | * Thus we will break that promise if that | ||
238 | * scheduling clock interrupt happens between the time | ||
239 | * we pick the .completed field and the time that we | ||
240 | * increment our counter. | ||
241 | * | ||
242 | * - We don't want to be preempted out here. | ||
243 | * | ||
244 | * NMIs can still occur, of course, and might themselves | ||
245 | * contain rcu_read_lock(). | ||
246 | */ | ||
247 | |||
248 | local_irq_save(flags); | ||
249 | |||
250 | /* | ||
251 | * Outermost nesting of rcu_read_lock(), so increment | ||
252 | * the current counter for the current CPU. Use volatile | ||
253 | * casts to prevent the compiler from reordering. | ||
254 | */ | ||
255 | |||
256 | idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1; | ||
257 | ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++; | ||
258 | |||
259 | /* | ||
260 | * Now that the per-CPU counter has been incremented, we | ||
261 | * are protected from races with rcu_read_lock() invoked | ||
262 | * from NMI handlers on this CPU. We can therefore safely | ||
263 | * increment the nesting counter, relieving further NMIs | ||
264 | * of the need to increment the per-CPU counter. | ||
265 | */ | ||
266 | |||
267 | ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1; | ||
268 | |||
269 | /* | ||
270 | * Now that we have preventing any NMIs from storing | ||
271 | * to the ->rcu_flipctr_idx, we can safely use it to | ||
272 | * remember which counter to decrement in the matching | ||
273 | * rcu_read_unlock(). | ||
274 | */ | ||
275 | |||
276 | ACCESS_ONCE(t->rcu_flipctr_idx) = idx; | ||
277 | local_irq_restore(flags); | ||
278 | } | ||
279 | } | ||
280 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
281 | |||
282 | void __rcu_read_unlock(void) | ||
283 | { | ||
284 | int idx; | ||
285 | struct task_struct *t = current; | ||
286 | int nesting; | ||
287 | |||
288 | nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
289 | if (nesting > 1) { | ||
290 | |||
291 | /* | ||
292 | * We are still protected by the enclosing rcu_read_lock(), | ||
293 | * so simply decrement the counter. | ||
294 | */ | ||
295 | |||
296 | t->rcu_read_lock_nesting = nesting - 1; | ||
297 | |||
298 | } else { | ||
299 | unsigned long flags; | ||
300 | |||
301 | /* | ||
302 | * Disable local interrupts to prevent the grace-period | ||
303 | * detection state machine from seeing us half-done. | ||
304 | * NMIs can still occur, of course, and might themselves | ||
305 | * contain rcu_read_lock() and rcu_read_unlock(). | ||
306 | */ | ||
307 | |||
308 | local_irq_save(flags); | ||
309 | |||
310 | /* | ||
311 | * Outermost nesting of rcu_read_unlock(), so we must | ||
312 | * decrement the current counter for the current CPU. | ||
313 | * This must be done carefully, because NMIs can | ||
314 | * occur at any point in this code, and any rcu_read_lock() | ||
315 | * and rcu_read_unlock() pairs in the NMI handlers | ||
316 | * must interact non-destructively with this code. | ||
317 | * Lots of volatile casts, and -very- careful ordering. | ||
318 | * | ||
319 | * Changes to this code, including this one, must be | ||
320 | * inspected, validated, and tested extremely carefully!!! | ||
321 | */ | ||
322 | |||
323 | /* | ||
324 | * First, pick up the index. | ||
325 | */ | ||
326 | |||
327 | idx = ACCESS_ONCE(t->rcu_flipctr_idx); | ||
328 | |||
329 | /* | ||
330 | * Now that we have fetched the counter index, it is | ||
331 | * safe to decrement the per-task RCU nesting counter. | ||
332 | * After this, any interrupts or NMIs will increment and | ||
333 | * decrement the per-CPU counters. | ||
334 | */ | ||
335 | ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1; | ||
336 | |||
337 | /* | ||
338 | * It is now safe to decrement this task's nesting count. | ||
339 | * NMIs that occur after this statement will route their | ||
340 | * rcu_read_lock() calls through this "else" clause, and | ||
341 | * will thus start incrementing the per-CPU counter on | ||
342 | * their own. They will also clobber ->rcu_flipctr_idx, | ||
343 | * but that is OK, since we have already fetched it. | ||
344 | */ | ||
345 | |||
346 | ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--; | ||
347 | local_irq_restore(flags); | ||
348 | } | ||
349 | } | ||
350 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
351 | |||
352 | /* | ||
353 | * If a global counter flip has occurred since the last time that we | ||
354 | * advanced callbacks, advance them. Hardware interrupts must be | ||
355 | * disabled when calling this function. | ||
356 | */ | ||
357 | static void __rcu_advance_callbacks(struct rcu_data *rdp) | ||
358 | { | ||
359 | int cpu; | ||
360 | int i; | ||
361 | int wlc = 0; | ||
362 | |||
363 | if (rdp->completed != rcu_ctrlblk.completed) { | ||
364 | if (rdp->waitlist[GP_STAGES - 1] != NULL) { | ||
365 | *rdp->donetail = rdp->waitlist[GP_STAGES - 1]; | ||
366 | rdp->donetail = rdp->waittail[GP_STAGES - 1]; | ||
367 | RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp); | ||
368 | } | ||
369 | for (i = GP_STAGES - 2; i >= 0; i--) { | ||
370 | if (rdp->waitlist[i] != NULL) { | ||
371 | rdp->waitlist[i + 1] = rdp->waitlist[i]; | ||
372 | rdp->waittail[i + 1] = rdp->waittail[i]; | ||
373 | wlc++; | ||
374 | } else { | ||
375 | rdp->waitlist[i + 1] = NULL; | ||
376 | rdp->waittail[i + 1] = | ||
377 | &rdp->waitlist[i + 1]; | ||
378 | } | ||
379 | } | ||
380 | if (rdp->nextlist != NULL) { | ||
381 | rdp->waitlist[0] = rdp->nextlist; | ||
382 | rdp->waittail[0] = rdp->nexttail; | ||
383 | wlc++; | ||
384 | rdp->nextlist = NULL; | ||
385 | rdp->nexttail = &rdp->nextlist; | ||
386 | RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp); | ||
387 | } else { | ||
388 | rdp->waitlist[0] = NULL; | ||
389 | rdp->waittail[0] = &rdp->waitlist[0]; | ||
390 | } | ||
391 | rdp->waitlistcount = wlc; | ||
392 | rdp->completed = rcu_ctrlblk.completed; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Check to see if this CPU needs to report that it has seen | ||
397 | * the most recent counter flip, thereby declaring that all | ||
398 | * subsequent rcu_read_lock() invocations will respect this flip. | ||
399 | */ | ||
400 | |||
401 | cpu = raw_smp_processor_id(); | ||
402 | if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { | ||
403 | smp_mb(); /* Subsequent counter accesses must see new value */ | ||
404 | per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; | ||
405 | smp_mb(); /* Subsequent RCU read-side critical sections */ | ||
406 | /* seen -after- acknowledgement. */ | ||
407 | } | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * Get here when RCU is idle. Decide whether we need to | ||
412 | * move out of idle state, and return non-zero if so. | ||
413 | * "Straightforward" approach for the moment, might later | ||
414 | * use callback-list lengths, grace-period duration, or | ||
415 | * some such to determine when to exit idle state. | ||
416 | * Might also need a pre-idle test that does not acquire | ||
417 | * the lock, but let's get the simple case working first... | ||
418 | */ | ||
419 | |||
420 | static int | ||
421 | rcu_try_flip_idle(void) | ||
422 | { | ||
423 | int cpu; | ||
424 | |||
425 | RCU_TRACE_ME(rcupreempt_trace_try_flip_i1); | ||
426 | if (!rcu_pending(smp_processor_id())) { | ||
427 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1); | ||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Do the flip. | ||
433 | */ | ||
434 | |||
435 | RCU_TRACE_ME(rcupreempt_trace_try_flip_g1); | ||
436 | rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */ | ||
437 | |||
438 | /* | ||
439 | * Need a memory barrier so that other CPUs see the new | ||
440 | * counter value before they see the subsequent change of all | ||
441 | * the rcu_flip_flag instances to rcu_flipped. | ||
442 | */ | ||
443 | |||
444 | smp_mb(); /* see above block comment. */ | ||
445 | |||
446 | /* Now ask each CPU for acknowledgement of the flip. */ | ||
447 | |||
448 | for_each_possible_cpu(cpu) | ||
449 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; | ||
450 | |||
451 | return 1; | ||
452 | } | ||
453 | |||
454 | /* | ||
455 | * Wait for CPUs to acknowledge the flip. | ||
456 | */ | ||
457 | |||
458 | static int | ||
459 | rcu_try_flip_waitack(void) | ||
460 | { | ||
461 | int cpu; | ||
462 | |||
463 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); | ||
464 | for_each_possible_cpu(cpu) | ||
465 | if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | ||
466 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | /* | ||
471 | * Make sure our checks above don't bleed into subsequent | ||
472 | * waiting for the sum of the counters to reach zero. | ||
473 | */ | ||
474 | |||
475 | smp_mb(); /* see above block comment. */ | ||
476 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a2); | ||
477 | return 1; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Wait for collective ``last'' counter to reach zero, | ||
482 | * then tell all CPUs to do an end-of-grace-period memory barrier. | ||
483 | */ | ||
484 | |||
485 | static int | ||
486 | rcu_try_flip_waitzero(void) | ||
487 | { | ||
488 | int cpu; | ||
489 | int lastidx = !(rcu_ctrlblk.completed & 0x1); | ||
490 | int sum = 0; | ||
491 | |||
492 | /* Check to see if the sum of the "last" counters is zero. */ | ||
493 | |||
494 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); | ||
495 | for_each_possible_cpu(cpu) | ||
496 | sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; | ||
497 | if (sum != 0) { | ||
498 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * This ensures that the other CPUs see the call for | ||
504 | * memory barriers -after- the sum to zero has been | ||
505 | * detected here | ||
506 | */ | ||
507 | smp_mb(); /* ^^^^^^^^^^^^ */ | ||
508 | |||
509 | /* Call for a memory barrier from each CPU. */ | ||
510 | for_each_possible_cpu(cpu) | ||
511 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; | ||
512 | |||
513 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); | ||
514 | return 1; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Wait for all CPUs to do their end-of-grace-period memory barrier. | ||
519 | * Return 0 once all CPUs have done so. | ||
520 | */ | ||
521 | |||
522 | static int | ||
523 | rcu_try_flip_waitmb(void) | ||
524 | { | ||
525 | int cpu; | ||
526 | |||
527 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); | ||
528 | for_each_possible_cpu(cpu) | ||
529 | if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | ||
530 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); | ||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | smp_mb(); /* Ensure that the above checks precede any following flip. */ | ||
535 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m2); | ||
536 | return 1; | ||
537 | } | ||
538 | |||
539 | /* | ||
540 | * Attempt a single flip of the counters. Remember, a single flip does | ||
541 | * -not- constitute a grace period. Instead, the interval between | ||
542 | * at least GP_STAGES consecutive flips is a grace period. | ||
543 | * | ||
544 | * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation | ||
545 | * on a large SMP, they might want to use a hierarchical organization of | ||
546 | * the per-CPU-counter pairs. | ||
547 | */ | ||
548 | static void rcu_try_flip(void) | ||
549 | { | ||
550 | unsigned long flags; | ||
551 | |||
552 | RCU_TRACE_ME(rcupreempt_trace_try_flip_1); | ||
553 | if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) { | ||
554 | RCU_TRACE_ME(rcupreempt_trace_try_flip_e1); | ||
555 | return; | ||
556 | } | ||
557 | |||
558 | /* | ||
559 | * Take the next transition(s) through the RCU grace-period | ||
560 | * flip-counter state machine. | ||
561 | */ | ||
562 | |||
563 | switch (rcu_ctrlblk.rcu_try_flip_state) { | ||
564 | case rcu_try_flip_idle_state: | ||
565 | if (rcu_try_flip_idle()) | ||
566 | rcu_ctrlblk.rcu_try_flip_state = | ||
567 | rcu_try_flip_waitack_state; | ||
568 | break; | ||
569 | case rcu_try_flip_waitack_state: | ||
570 | if (rcu_try_flip_waitack()) | ||
571 | rcu_ctrlblk.rcu_try_flip_state = | ||
572 | rcu_try_flip_waitzero_state; | ||
573 | break; | ||
574 | case rcu_try_flip_waitzero_state: | ||
575 | if (rcu_try_flip_waitzero()) | ||
576 | rcu_ctrlblk.rcu_try_flip_state = | ||
577 | rcu_try_flip_waitmb_state; | ||
578 | break; | ||
579 | case rcu_try_flip_waitmb_state: | ||
580 | if (rcu_try_flip_waitmb()) | ||
581 | rcu_ctrlblk.rcu_try_flip_state = | ||
582 | rcu_try_flip_idle_state; | ||
583 | } | ||
584 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Check to see if this CPU needs to do a memory barrier in order to | ||
589 | * ensure that any prior RCU read-side critical sections have committed | ||
590 | * their counter manipulations and critical-section memory references | ||
591 | * before declaring the grace period to be completed. | ||
592 | */ | ||
593 | static void rcu_check_mb(int cpu) | ||
594 | { | ||
595 | if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) { | ||
596 | smp_mb(); /* Ensure RCU read-side accesses are visible. */ | ||
597 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_done; | ||
598 | } | ||
599 | } | ||
600 | |||
601 | void rcu_check_callbacks(int cpu, int user) | ||
602 | { | ||
603 | unsigned long flags; | ||
604 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
605 | |||
606 | rcu_check_mb(cpu); | ||
607 | if (rcu_ctrlblk.completed == rdp->completed) | ||
608 | rcu_try_flip(); | ||
609 | spin_lock_irqsave(&rdp->lock, flags); | ||
610 | RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); | ||
611 | __rcu_advance_callbacks(rdp); | ||
612 | if (rdp->donelist == NULL) { | ||
613 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
614 | } else { | ||
615 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
616 | raise_softirq(RCU_SOFTIRQ); | ||
617 | } | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * Needed by dynticks, to make sure all RCU processing has finished | ||
622 | * when we go idle: | ||
623 | */ | ||
624 | void rcu_advance_callbacks(int cpu, int user) | ||
625 | { | ||
626 | unsigned long flags; | ||
627 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
628 | |||
629 | if (rcu_ctrlblk.completed == rdp->completed) { | ||
630 | rcu_try_flip(); | ||
631 | if (rcu_ctrlblk.completed == rdp->completed) | ||
632 | return; | ||
633 | } | ||
634 | spin_lock_irqsave(&rdp->lock, flags); | ||
635 | RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); | ||
636 | __rcu_advance_callbacks(rdp); | ||
637 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
638 | } | ||
639 | |||
640 | static void rcu_process_callbacks(struct softirq_action *unused) | ||
641 | { | ||
642 | unsigned long flags; | ||
643 | struct rcu_head *next, *list; | ||
644 | struct rcu_data *rdp = RCU_DATA_ME(); | ||
645 | |||
646 | spin_lock_irqsave(&rdp->lock, flags); | ||
647 | list = rdp->donelist; | ||
648 | if (list == NULL) { | ||
649 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
650 | return; | ||
651 | } | ||
652 | rdp->donelist = NULL; | ||
653 | rdp->donetail = &rdp->donelist; | ||
654 | RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp); | ||
655 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
656 | while (list) { | ||
657 | next = list->next; | ||
658 | list->func(list); | ||
659 | list = next; | ||
660 | RCU_TRACE_ME(rcupreempt_trace_invoke); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
665 | { | ||
666 | unsigned long flags; | ||
667 | struct rcu_data *rdp; | ||
668 | |||
669 | head->func = func; | ||
670 | head->next = NULL; | ||
671 | local_irq_save(flags); | ||
672 | rdp = RCU_DATA_ME(); | ||
673 | spin_lock(&rdp->lock); | ||
674 | __rcu_advance_callbacks(rdp); | ||
675 | *rdp->nexttail = head; | ||
676 | rdp->nexttail = &head->next; | ||
677 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); | ||
678 | spin_unlock(&rdp->lock); | ||
679 | local_irq_restore(flags); | ||
680 | } | ||
681 | EXPORT_SYMBOL_GPL(call_rcu); | ||
682 | |||
683 | /* | ||
684 | * Wait until all currently running preempt_disable() code segments | ||
685 | * (including hardware-irq-disable segments) complete. Note that | ||
686 | * in -rt this does -not- necessarily result in all currently executing | ||
687 | * interrupt -handlers- having completed. | ||
688 | */ | ||
689 | void __synchronize_sched(void) | ||
690 | { | ||
691 | cpumask_t oldmask; | ||
692 | int cpu; | ||
693 | |||
694 | if (sched_getaffinity(0, &oldmask) < 0) | ||
695 | oldmask = cpu_possible_map; | ||
696 | for_each_online_cpu(cpu) { | ||
697 | sched_setaffinity(0, cpumask_of_cpu(cpu)); | ||
698 | schedule(); | ||
699 | } | ||
700 | sched_setaffinity(0, oldmask); | ||
701 | } | ||
702 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
703 | |||
704 | /* | ||
705 | * Check to see if any future RCU-related work will need to be done | ||
706 | * by the current CPU, even if none need be done immediately, returning | ||
707 | * 1 if so. Assumes that notifiers would take care of handling any | ||
708 | * outstanding requests from the RCU core. | ||
709 | * | ||
710 | * This function is part of the RCU implementation; it is -not- | ||
711 | * an exported member of the RCU API. | ||
712 | */ | ||
713 | int rcu_needs_cpu(int cpu) | ||
714 | { | ||
715 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
716 | |||
717 | return (rdp->donelist != NULL || | ||
718 | !!rdp->waitlistcount || | ||
719 | rdp->nextlist != NULL); | ||
720 | } | ||
721 | |||
722 | int rcu_pending(int cpu) | ||
723 | { | ||
724 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
725 | |||
726 | /* The CPU has at least one callback queued somewhere. */ | ||
727 | |||
728 | if (rdp->donelist != NULL || | ||
729 | !!rdp->waitlistcount || | ||
730 | rdp->nextlist != NULL) | ||
731 | return 1; | ||
732 | |||
733 | /* The RCU core needs an acknowledgement from this CPU. */ | ||
734 | |||
735 | if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) || | ||
736 | (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed)) | ||
737 | return 1; | ||
738 | |||
739 | /* This CPU has fallen behind the global grace-period number. */ | ||
740 | |||
741 | if (rdp->completed != rcu_ctrlblk.completed) | ||
742 | return 1; | ||
743 | |||
744 | /* Nothing needed from this CPU. */ | ||
745 | |||
746 | return 0; | ||
747 | } | ||
748 | |||
749 | void __init __rcu_init(void) | ||
750 | { | ||
751 | int cpu; | ||
752 | int i; | ||
753 | struct rcu_data *rdp; | ||
754 | |||
755 | printk(KERN_NOTICE "Preemptible RCU implementation.\n"); | ||
756 | for_each_possible_cpu(cpu) { | ||
757 | rdp = RCU_DATA_CPU(cpu); | ||
758 | spin_lock_init(&rdp->lock); | ||
759 | rdp->completed = 0; | ||
760 | rdp->waitlistcount = 0; | ||
761 | rdp->nextlist = NULL; | ||
762 | rdp->nexttail = &rdp->nextlist; | ||
763 | for (i = 0; i < GP_STAGES; i++) { | ||
764 | rdp->waitlist[i] = NULL; | ||
765 | rdp->waittail[i] = &rdp->waitlist[i]; | ||
766 | } | ||
767 | rdp->donelist = NULL; | ||
768 | rdp->donetail = &rdp->donelist; | ||
769 | rdp->rcu_flipctr[0] = 0; | ||
770 | rdp->rcu_flipctr[1] = 0; | ||
771 | } | ||
772 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | ||
777 | */ | ||
778 | void synchronize_kernel(void) | ||
779 | { | ||
780 | synchronize_rcu(); | ||
781 | } | ||
782 | |||
783 | #ifdef CONFIG_RCU_TRACE | ||
784 | long *rcupreempt_flipctr(int cpu) | ||
785 | { | ||
786 | return &RCU_DATA_CPU(cpu)->rcu_flipctr[0]; | ||
787 | } | ||
788 | EXPORT_SYMBOL_GPL(rcupreempt_flipctr); | ||
789 | |||
790 | int rcupreempt_flip_flag(int cpu) | ||
791 | { | ||
792 | return per_cpu(rcu_flip_flag, cpu); | ||
793 | } | ||
794 | EXPORT_SYMBOL_GPL(rcupreempt_flip_flag); | ||
795 | |||
796 | int rcupreempt_mb_flag(int cpu) | ||
797 | { | ||
798 | return per_cpu(rcu_mb_flag, cpu); | ||
799 | } | ||
800 | EXPORT_SYMBOL_GPL(rcupreempt_mb_flag); | ||
801 | |||
802 | char *rcupreempt_try_flip_state_name(void) | ||
803 | { | ||
804 | return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state]; | ||
805 | } | ||
806 | EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name); | ||
807 | |||
808 | struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu) | ||
809 | { | ||
810 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
811 | |||
812 | return &rdp->trace; | ||
813 | } | ||
814 | EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu); | ||
815 | |||
816 | #endif /* #ifdef RCU_TRACE */ | ||
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c new file mode 100644 index 000000000000..49ac4947af24 --- /dev/null +++ b/kernel/rcupreempt_trace.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * Read-Copy Update tracing for realtime implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2006 | ||
19 | * | ||
20 | * Papers: http://www.rdrop.com/users/paulmck/RCU | ||
21 | * | ||
22 | * For detailed explanation of Read-Copy Update mechanism see - | ||
23 | * Documentation/RCU/ *.txt | ||
24 | * | ||
25 | */ | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/rcupdate.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <asm/atomic.h> | ||
35 | #include <linux/bitops.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/completion.h> | ||
38 | #include <linux/moduleparam.h> | ||
39 | #include <linux/percpu.h> | ||
40 | #include <linux/notifier.h> | ||
41 | #include <linux/rcupdate.h> | ||
42 | #include <linux/cpu.h> | ||
43 | #include <linux/mutex.h> | ||
44 | #include <linux/rcupreempt_trace.h> | ||
45 | #include <linux/debugfs.h> | ||
46 | |||
47 | static struct mutex rcupreempt_trace_mutex; | ||
48 | static char *rcupreempt_trace_buf; | ||
49 | #define RCUPREEMPT_TRACE_BUF_SIZE 4096 | ||
50 | |||
51 | void rcupreempt_trace_move2done(struct rcupreempt_trace *trace) | ||
52 | { | ||
53 | trace->done_length += trace->wait_length; | ||
54 | trace->done_add += trace->wait_length; | ||
55 | trace->wait_length = 0; | ||
56 | } | ||
57 | void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace) | ||
58 | { | ||
59 | trace->wait_length += trace->next_length; | ||
60 | trace->wait_add += trace->next_length; | ||
61 | trace->next_length = 0; | ||
62 | } | ||
63 | void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace) | ||
64 | { | ||
65 | atomic_inc(&trace->rcu_try_flip_1); | ||
66 | } | ||
67 | void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace) | ||
68 | { | ||
69 | atomic_inc(&trace->rcu_try_flip_e1); | ||
70 | } | ||
71 | void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace) | ||
72 | { | ||
73 | trace->rcu_try_flip_i1++; | ||
74 | } | ||
75 | void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace) | ||
76 | { | ||
77 | trace->rcu_try_flip_ie1++; | ||
78 | } | ||
79 | void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace) | ||
80 | { | ||
81 | trace->rcu_try_flip_g1++; | ||
82 | } | ||
83 | void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace) | ||
84 | { | ||
85 | trace->rcu_try_flip_a1++; | ||
86 | } | ||
87 | void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace) | ||
88 | { | ||
89 | trace->rcu_try_flip_ae1++; | ||
90 | } | ||
91 | void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace) | ||
92 | { | ||
93 | trace->rcu_try_flip_a2++; | ||
94 | } | ||
95 | void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace) | ||
96 | { | ||
97 | trace->rcu_try_flip_z1++; | ||
98 | } | ||
99 | void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace) | ||
100 | { | ||
101 | trace->rcu_try_flip_ze1++; | ||
102 | } | ||
103 | void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace) | ||
104 | { | ||
105 | trace->rcu_try_flip_z2++; | ||
106 | } | ||
107 | void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace) | ||
108 | { | ||
109 | trace->rcu_try_flip_m1++; | ||
110 | } | ||
111 | void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace) | ||
112 | { | ||
113 | trace->rcu_try_flip_me1++; | ||
114 | } | ||
115 | void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace) | ||
116 | { | ||
117 | trace->rcu_try_flip_m2++; | ||
118 | } | ||
119 | void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace) | ||
120 | { | ||
121 | trace->rcu_check_callbacks++; | ||
122 | } | ||
123 | void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace) | ||
124 | { | ||
125 | trace->done_remove += trace->done_length; | ||
126 | trace->done_length = 0; | ||
127 | } | ||
128 | void rcupreempt_trace_invoke(struct rcupreempt_trace *trace) | ||
129 | { | ||
130 | atomic_inc(&trace->done_invoked); | ||
131 | } | ||
132 | void rcupreempt_trace_next_add(struct rcupreempt_trace *trace) | ||
133 | { | ||
134 | trace->next_add++; | ||
135 | trace->next_length++; | ||
136 | } | ||
137 | |||
138 | static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) | ||
139 | { | ||
140 | struct rcupreempt_trace *cp; | ||
141 | int cpu; | ||
142 | |||
143 | memset(sp, 0, sizeof(*sp)); | ||
144 | for_each_possible_cpu(cpu) { | ||
145 | cp = rcupreempt_trace_cpu(cpu); | ||
146 | sp->next_length += cp->next_length; | ||
147 | sp->next_add += cp->next_add; | ||
148 | sp->wait_length += cp->wait_length; | ||
149 | sp->wait_add += cp->wait_add; | ||
150 | sp->done_length += cp->done_length; | ||
151 | sp->done_add += cp->done_add; | ||
152 | sp->done_remove += cp->done_remove; | ||
153 | atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked)); | ||
154 | sp->rcu_check_callbacks += cp->rcu_check_callbacks; | ||
155 | atomic_set(&sp->rcu_try_flip_1, | ||
156 | atomic_read(&cp->rcu_try_flip_1)); | ||
157 | atomic_set(&sp->rcu_try_flip_e1, | ||
158 | atomic_read(&cp->rcu_try_flip_e1)); | ||
159 | sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; | ||
160 | sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; | ||
161 | sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; | ||
162 | sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1; | ||
163 | sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1; | ||
164 | sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2; | ||
165 | sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1; | ||
166 | sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1; | ||
167 | sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2; | ||
168 | sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1; | ||
169 | sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1; | ||
170 | sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2; | ||
171 | } | ||
172 | } | ||
173 | |||
174 | static ssize_t rcustats_read(struct file *filp, char __user *buffer, | ||
175 | size_t count, loff_t *ppos) | ||
176 | { | ||
177 | struct rcupreempt_trace trace; | ||
178 | ssize_t bcount; | ||
179 | int cnt = 0; | ||
180 | |||
181 | rcupreempt_trace_sum(&trace); | ||
182 | mutex_lock(&rcupreempt_trace_mutex); | ||
183 | snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
184 | "ggp=%ld rcc=%ld\n", | ||
185 | rcu_batches_completed(), | ||
186 | trace.rcu_check_callbacks); | ||
187 | snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
188 | "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n" | ||
189 | "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n" | ||
190 | "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n", | ||
191 | |||
192 | trace.next_add, trace.next_length, | ||
193 | trace.wait_add, trace.wait_length, | ||
194 | trace.done_add, trace.done_length, | ||
195 | trace.done_remove, atomic_read(&trace.done_invoked), | ||
196 | atomic_read(&trace.rcu_try_flip_1), | ||
197 | atomic_read(&trace.rcu_try_flip_e1), | ||
198 | trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1, | ||
199 | trace.rcu_try_flip_g1, | ||
200 | trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1, | ||
201 | trace.rcu_try_flip_a2, | ||
202 | trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1, | ||
203 | trace.rcu_try_flip_z2, | ||
204 | trace.rcu_try_flip_m1, trace.rcu_try_flip_me1, | ||
205 | trace.rcu_try_flip_m2); | ||
206 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
207 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
208 | mutex_unlock(&rcupreempt_trace_mutex); | ||
209 | return bcount; | ||
210 | } | ||
211 | |||
212 | static ssize_t rcugp_read(struct file *filp, char __user *buffer, | ||
213 | size_t count, loff_t *ppos) | ||
214 | { | ||
215 | long oldgp = rcu_batches_completed(); | ||
216 | ssize_t bcount; | ||
217 | |||
218 | mutex_lock(&rcupreempt_trace_mutex); | ||
219 | synchronize_rcu(); | ||
220 | snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE, | ||
221 | "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed()); | ||
222 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
223 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
224 | mutex_unlock(&rcupreempt_trace_mutex); | ||
225 | return bcount; | ||
226 | } | ||
227 | |||
228 | static ssize_t rcuctrs_read(struct file *filp, char __user *buffer, | ||
229 | size_t count, loff_t *ppos) | ||
230 | { | ||
231 | int cnt = 0; | ||
232 | int cpu; | ||
233 | int f = rcu_batches_completed() & 0x1; | ||
234 | ssize_t bcount; | ||
235 | |||
236 | mutex_lock(&rcupreempt_trace_mutex); | ||
237 | |||
238 | cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE, | ||
239 | "CPU last cur F M\n"); | ||
240 | for_each_online_cpu(cpu) { | ||
241 | long *flipctr = rcupreempt_flipctr(cpu); | ||
242 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
243 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
244 | "%3d %4ld %3ld %d %d\n", | ||
245 | cpu, | ||
246 | flipctr[!f], | ||
247 | flipctr[f], | ||
248 | rcupreempt_flip_flag(cpu), | ||
249 | rcupreempt_mb_flag(cpu)); | ||
250 | } | ||
251 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
252 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
253 | "ggp = %ld, state = %s\n", | ||
254 | rcu_batches_completed(), | ||
255 | rcupreempt_try_flip_state_name()); | ||
256 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
257 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
258 | "\n"); | ||
259 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
260 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
261 | mutex_unlock(&rcupreempt_trace_mutex); | ||
262 | return bcount; | ||
263 | } | ||
264 | |||
265 | static struct file_operations rcustats_fops = { | ||
266 | .owner = THIS_MODULE, | ||
267 | .read = rcustats_read, | ||
268 | }; | ||
269 | |||
270 | static struct file_operations rcugp_fops = { | ||
271 | .owner = THIS_MODULE, | ||
272 | .read = rcugp_read, | ||
273 | }; | ||
274 | |||
275 | static struct file_operations rcuctrs_fops = { | ||
276 | .owner = THIS_MODULE, | ||
277 | .read = rcuctrs_read, | ||
278 | }; | ||
279 | |||
280 | static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir; | ||
281 | static int rcupreempt_debugfs_init(void) | ||
282 | { | ||
283 | rcudir = debugfs_create_dir("rcu", NULL); | ||
284 | if (!rcudir) | ||
285 | goto out; | ||
286 | statdir = debugfs_create_file("rcustats", 0444, rcudir, | ||
287 | NULL, &rcustats_fops); | ||
288 | if (!statdir) | ||
289 | goto free_out; | ||
290 | |||
291 | gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); | ||
292 | if (!gpdir) | ||
293 | goto free_out; | ||
294 | |||
295 | ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir, | ||
296 | NULL, &rcuctrs_fops); | ||
297 | if (!ctrsdir) | ||
298 | goto free_out; | ||
299 | return 0; | ||
300 | free_out: | ||
301 | if (statdir) | ||
302 | debugfs_remove(statdir); | ||
303 | if (gpdir) | ||
304 | debugfs_remove(gpdir); | ||
305 | debugfs_remove(rcudir); | ||
306 | out: | ||
307 | return 1; | ||
308 | } | ||
309 | |||
310 | static int __init rcupreempt_trace_init(void) | ||
311 | { | ||
312 | mutex_init(&rcupreempt_trace_mutex); | ||
313 | rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); | ||
314 | if (!rcupreempt_trace_buf) | ||
315 | return 1; | ||
316 | return rcupreempt_debugfs_init(); | ||
317 | } | ||
318 | |||
319 | static void __exit rcupreempt_trace_cleanup(void) | ||
320 | { | ||
321 | debugfs_remove(statdir); | ||
322 | debugfs_remove(gpdir); | ||
323 | debugfs_remove(ctrsdir); | ||
324 | debugfs_remove(rcudir); | ||
325 | kfree(rcupreempt_trace_buf); | ||
326 | } | ||
327 | |||
328 | |||
329 | module_init(rcupreempt_trace_init); | ||
330 | module_exit(rcupreempt_trace_cleanup); | ||