aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2008-01-25 15:08:24 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:24 -0500
commite260be673a15b6125068270e0216a3bfbfc12f87 (patch)
treef50760606d395bf6faa9e865f814761a3c88d32c
parente0ecfa7917cafe72f4a75f87e8bb5d8d51dc534f (diff)
Preempt-RCU: implementation
This patch implements a new version of RCU which allows its read-side critical sections to be preempted. It uses a set of counter pairs to keep track of the read-side critical sections and flips them when all tasks exit read-side critical section. The details of this implementation can be found in this paper - http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf and the article- http://lwn.net/Articles/253651/ This patch was developed as a part of the -rt kernel development and meant to provide better latencies when read-side critical sections of RCU don't disable preemption. As a consequence of keeping track of RCU readers, the readers have a slight overhead (optimizations in the paper). This implementation co-exists with the "classic" RCU implementations and can be switched to at compiler. Also includes RCU tracing summarized in debugfs. [ akpm@linux-foundation.org: build fixes on non-preempt architectures ] Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--fs/Kconfig1
-rw-r--r--include/linux/rcuclassic.h3
-rw-r--r--include/linux/rcupdate.h11
-rw-r--r--include/linux/rcupreempt.h86
-rw-r--r--include/linux/rcupreempt_trace.h99
-rw-r--r--include/linux/sched.h5
-rw-r--r--init/Kconfig28
-rw-r--r--kernel/Kconfig.preempt10
-rw-r--r--kernel/Makefile7
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/rcuclassic.c1
-rw-r--r--kernel/rcupreempt.c816
-rw-r--r--kernel/rcupreempt_trace.c330
13 files changed, 1394 insertions, 7 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 781b47d2f9f2..b4799efaf9e8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2130,4 +2130,3 @@ source "fs/nls/Kconfig"
2130source "fs/dlm/Kconfig" 2130source "fs/dlm/Kconfig"
2131 2131
2132endmenu 2132endmenu
2133
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 2b8b045a51d5..4d6624260b4c 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -157,5 +157,8 @@ extern void __rcu_init(void);
157extern void rcu_check_callbacks(int cpu, int user); 157extern void rcu_check_callbacks(int cpu, int user);
158extern void rcu_restart_cpu(int cpu); 158extern void rcu_restart_cpu(int cpu);
159 159
160extern long rcu_batches_completed(void);
161extern long rcu_batches_completed_bh(void);
162
160#endif /* __KERNEL__ */ 163#endif /* __KERNEL__ */
161#endif /* __LINUX_RCUCLASSIC_H */ 164#endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 12aa13e13150..d32c14de270e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,11 @@ struct rcu_head {
53 void (*func)(struct rcu_head *head); 53 void (*func)(struct rcu_head *head);
54}; 54};
55 55
56#ifdef CONFIG_CLASSIC_RCU
56#include <linux/rcuclassic.h> 57#include <linux/rcuclassic.h>
58#else /* #ifdef CONFIG_CLASSIC_RCU */
59#include <linux/rcupreempt.h>
60#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
57 61
58#define RCU_HEAD_INIT { .next = NULL, .func = NULL } 62#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
59#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT 63#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
@@ -231,13 +235,12 @@ extern void call_rcu_bh(struct rcu_head *head,
231/* Exported common interfaces */ 235/* Exported common interfaces */
232extern void synchronize_rcu(void); 236extern void synchronize_rcu(void);
233extern void rcu_barrier(void); 237extern void rcu_barrier(void);
238extern long rcu_batches_completed(void);
239extern long rcu_batches_completed_bh(void);
234 240
235/* Internal to kernel */ 241/* Internal to kernel */
236extern void rcu_init(void); 242extern void rcu_init(void);
237extern void rcu_check_callbacks(int cpu, int user); 243extern int rcu_needs_cpu(int cpu);
238
239extern long rcu_batches_completed(void);
240extern long rcu_batches_completed_bh(void);
241 244
242#endif /* __KERNEL__ */ 245#endif /* __KERNEL__ */
243#endif /* __LINUX_RCUPDATE_H */ 246#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
new file mode 100644
index 000000000000..ece8eb3e4151
--- /dev/null
+++ b/include/linux/rcupreempt.h
@@ -0,0 +1,86 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (RT implementation)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2006
19 *
20 * Author: Paul McKenney <paulmck@us.ibm.com>
21 *
22 * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
24 * Papers:
25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
27 *
28 * For detailed explanation of Read-Copy Update mechanism see -
29 * Documentation/RCU
30 *
31 */
32
33#ifndef __LINUX_RCUPREEMPT_H
34#define __LINUX_RCUPREEMPT_H
35
36#ifdef __KERNEL__
37
38#include <linux/cache.h>
39#include <linux/spinlock.h>
40#include <linux/threads.h>
41#include <linux/percpu.h>
42#include <linux/cpumask.h>
43#include <linux/seqlock.h>
44
45#define rcu_qsctr_inc(cpu)
46#define rcu_bh_qsctr_inc(cpu)
47#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
48
49extern void __rcu_read_lock(void);
50extern void __rcu_read_unlock(void);
51extern int rcu_pending(int cpu);
52extern int rcu_needs_cpu(int cpu);
53
54#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
55#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }
56
57extern void __synchronize_sched(void);
58
59extern void __rcu_init(void);
60extern void rcu_check_callbacks(int cpu, int user);
61extern void rcu_restart_cpu(int cpu);
62extern long rcu_batches_completed(void);
63
64/*
65 * Return the number of RCU batches processed thus far. Useful for debug
66 * and statistic. The _bh variant is identifcal to straight RCU
67 */
68static inline long rcu_batches_completed_bh(void)
69{
70 return rcu_batches_completed();
71}
72
73#ifdef CONFIG_RCU_TRACE
74struct rcupreempt_trace;
75extern long *rcupreempt_flipctr(int cpu);
76extern long rcupreempt_data_completed(void);
77extern int rcupreempt_flip_flag(int cpu);
78extern int rcupreempt_mb_flag(int cpu);
79extern char *rcupreempt_try_flip_state_name(void);
80extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
81#endif
82
83struct softirq_action;
84
85#endif /* __KERNEL__ */
86#endif /* __LINUX_RCUPREEMPT_H */
diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h
new file mode 100644
index 000000000000..21cd6b2a5c42
--- /dev/null
+++ b/include/linux/rcupreempt_trace.h
@@ -0,0 +1,99 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (RT implementation)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2006
19 *
20 * Author: Paul McKenney <paulmck@us.ibm.com>
21 *
22 * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
24 * Papers:
25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
27 *
28 * For detailed explanation of the Preemptible Read-Copy Update mechanism see -
29 * http://lwn.net/Articles/253651/
30 */
31
32#ifndef __LINUX_RCUPREEMPT_TRACE_H
33#define __LINUX_RCUPREEMPT_TRACE_H
34
35#ifdef __KERNEL__
36#include <linux/types.h>
37#include <linux/kernel.h>
38
39#include <asm/atomic.h>
40
41/*
42 * PREEMPT_RCU data structures.
43 */
44
45struct rcupreempt_trace {
46 long next_length;
47 long next_add;
48 long wait_length;
49 long wait_add;
50 long done_length;
51 long done_add;
52 long done_remove;
53 atomic_t done_invoked;
54 long rcu_check_callbacks;
55 atomic_t rcu_try_flip_1;
56 atomic_t rcu_try_flip_e1;
57 long rcu_try_flip_i1;
58 long rcu_try_flip_ie1;
59 long rcu_try_flip_g1;
60 long rcu_try_flip_a1;
61 long rcu_try_flip_ae1;
62 long rcu_try_flip_a2;
63 long rcu_try_flip_z1;
64 long rcu_try_flip_ze1;
65 long rcu_try_flip_z2;
66 long rcu_try_flip_m1;
67 long rcu_try_flip_me1;
68 long rcu_try_flip_m2;
69};
70
71#ifdef CONFIG_RCU_TRACE
72#define RCU_TRACE(fn, arg) fn(arg);
73#else
74#define RCU_TRACE(fn, arg)
75#endif
76
77extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
78extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
79extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
80extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
81extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
82extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
83extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
84extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
85extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
86extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
87extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
88extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
89extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
90extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
91extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
92extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
93extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
94extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
95extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
96extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
97
98#endif /* __KERNEL__ */
99#endif /* __LINUX_RCUPREEMPT_TRACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2044e707004..72e1b8ecfbe1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,11 @@ struct task_struct {
974 int nr_cpus_allowed; 974 int nr_cpus_allowed;
975 unsigned int time_slice; 975 unsigned int time_slice;
976 976
977#ifdef CONFIG_PREEMPT_RCU
978 int rcu_read_lock_nesting;
979 int rcu_flipctr_idx;
980#endif /* #ifdef CONFIG_PREEMPT_RCU */
981
977#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 982#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
978 struct sched_info sched_info; 983 struct sched_info sched_info;
979#endif 984#endif
diff --git a/init/Kconfig b/init/Kconfig
index f5becd2a12f6..0eda68f0ad54 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -763,3 +763,31 @@ source "block/Kconfig"
763 763
764config PREEMPT_NOTIFIERS 764config PREEMPT_NOTIFIERS
765 bool 765 bool
766
767choice
768 prompt "RCU implementation type:"
769 default CLASSIC_RCU
770
771config CLASSIC_RCU
772 bool "Classic RCU"
773 help
774 This option selects the classic RCU implementation that is
775 designed for best read-side performance on non-realtime
776 systems.
777
778 Say Y if you are unsure.
779
780config PREEMPT_RCU
781 bool "Preemptible RCU"
782 depends on PREEMPT
783 help
784 This option reduces the latency of the kernel by making certain
785 RCU sections preemptible. Normally RCU code is non-preemptible, if
786 this option is selected then read-only RCU sections become
787 preemptible. This helps latency, but may expose bugs due to
788 now-naive assumptions about each RCU read-side critical section
789 remaining on a given CPU through its execution.
790
791 Say N if you are unsure.
792
793endchoice
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c64ce9c14207..61fa116efcde 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,3 +63,13 @@ config PREEMPT_BKL
63 Say Y here if you are building a kernel for a desktop system. 63 Say Y here if you are building a kernel for a desktop system.
64 Say N if you are unsure. 64 Say N if you are unsure.
65 65
66config RCU_TRACE
67 bool "Enable tracing for RCU - currently stats in debugfs"
68 select DEBUG_FS
69 default y
70 help
71 This option provides tracing in RCU which presents stats
72 in debugfs for debugging RCU implementation.
73
74 Say Y here if you want to enable RCU tracing
75 Say N if you are unsure.
diff --git a/kernel/Makefile b/kernel/Makefile
index def5dd6097a0..68755cd9a7e4 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
6 exit.o itimer.o time.o softirq.o resource.o \ 6 exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12 utsname.o notifier.o 12 utsname.o notifier.o
@@ -52,6 +52,11 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
52obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 52obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
53obj-$(CONFIG_SECCOMP) += seccomp.o 53obj-$(CONFIG_SECCOMP) += seccomp.o
54obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 54obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
55obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
56obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
57ifeq ($(CONFIG_PREEMPT_RCU),y)
58obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
59endif
55obj-$(CONFIG_RELAY) += relay.o 60obj-$(CONFIG_RELAY) += relay.o
56obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 61obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
57obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 62obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 930c51865ab4..9f8ef32cbc7a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1045 copy_flags(clone_flags, p); 1045 copy_flags(clone_flags, p);
1046 INIT_LIST_HEAD(&p->children); 1046 INIT_LIST_HEAD(&p->children);
1047 INIT_LIST_HEAD(&p->sibling); 1047 INIT_LIST_HEAD(&p->sibling);
1048#ifdef CONFIG_PREEMPT_RCU
1049 p->rcu_read_lock_nesting = 0;
1050 p->rcu_flipctr_idx = 0;
1051#endif /* #ifdef CONFIG_PREEMPT_RCU */
1048 p->vfork_done = NULL; 1052 p->vfork_done = NULL;
1049 spin_lock_init(&p->alloc_lock); 1053 spin_lock_init(&p->alloc_lock);
1050 1054
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index ce0cf16cab67..f4ffbd0f306f 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -45,7 +45,6 @@
45#include <linux/moduleparam.h> 45#include <linux/moduleparam.h>
46#include <linux/percpu.h> 46#include <linux/percpu.h>
47#include <linux/notifier.h> 47#include <linux/notifier.h>
48/* #include <linux/rcupdate.h> @@@ */
49#include <linux/cpu.h> 48#include <linux/cpu.h>
50#include <linux/mutex.h> 49#include <linux/mutex.h>
51 50
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
new file mode 100644
index 000000000000..a5aabb1677f8
--- /dev/null
+++ b/kernel/rcupreempt.c
@@ -0,0 +1,816 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion, realtime implementation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2006
19 *
20 * Authors: Paul E. McKenney <paulmck@us.ibm.com>
21 * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
22 * for pushing me away from locks and towards counters, and
23 * to Suparna Bhattacharya for pushing me completely away
24 * from atomic instructions on the read side.
25 *
26 * Papers: http://www.rdrop.com/users/paulmck/RCU
27 *
28 * Design Document: http://lwn.net/Articles/253651/
29 *
30 * For detailed explanation of Read-Copy Update mechanism see -
31 * Documentation/RCU/ *.txt
32 *
33 */
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/init.h>
37#include <linux/spinlock.h>
38#include <linux/smp.h>
39#include <linux/rcupdate.h>
40#include <linux/interrupt.h>
41#include <linux/sched.h>
42#include <asm/atomic.h>
43#include <linux/bitops.h>
44#include <linux/module.h>
45#include <linux/completion.h>
46#include <linux/moduleparam.h>
47#include <linux/percpu.h>
48#include <linux/notifier.h>
49#include <linux/rcupdate.h>
50#include <linux/cpu.h>
51#include <linux/random.h>
52#include <linux/delay.h>
53#include <linux/byteorder/swabb.h>
54#include <linux/cpumask.h>
55#include <linux/rcupreempt_trace.h>
56
57/*
58 * Macro that prevents the compiler from reordering accesses, but does
59 * absolutely -nothing- to prevent CPUs from reordering. This is used
60 * only to mediate communication between mainline code and hardware
61 * interrupt and NMI handlers.
62 */
63#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
64
65/*
66 * PREEMPT_RCU data structures.
67 */
68
69/*
70 * GP_STAGES specifies the number of times the state machine has
71 * to go through the all the rcu_try_flip_states (see below)
72 * in a single Grace Period.
73 *
74 * GP in GP_STAGES stands for Grace Period ;)
75 */
76#define GP_STAGES 2
77struct rcu_data {
78 spinlock_t lock; /* Protect rcu_data fields. */
79 long completed; /* Number of last completed batch. */
80 int waitlistcount;
81 struct tasklet_struct rcu_tasklet;
82 struct rcu_head *nextlist;
83 struct rcu_head **nexttail;
84 struct rcu_head *waitlist[GP_STAGES];
85 struct rcu_head **waittail[GP_STAGES];
86 struct rcu_head *donelist;
87 struct rcu_head **donetail;
88 long rcu_flipctr[2];
89#ifdef CONFIG_RCU_TRACE
90 struct rcupreempt_trace trace;
91#endif /* #ifdef CONFIG_RCU_TRACE */
92};
93
94/*
95 * States for rcu_try_flip() and friends.
96 */
97
98enum rcu_try_flip_states {
99
100 /*
101 * Stay here if nothing is happening. Flip the counter if somthing
102 * starts happening. Denoted by "I"
103 */
104 rcu_try_flip_idle_state,
105
106 /*
107 * Wait here for all CPUs to notice that the counter has flipped. This
108 * prevents the old set of counters from ever being incremented once
109 * we leave this state, which in turn is necessary because we cannot
110 * test any individual counter for zero -- we can only check the sum.
111 * Denoted by "A".
112 */
113 rcu_try_flip_waitack_state,
114
115 /*
116 * Wait here for the sum of the old per-CPU counters to reach zero.
117 * Denoted by "Z".
118 */
119 rcu_try_flip_waitzero_state,
120
121 /*
122 * Wait here for each of the other CPUs to execute a memory barrier.
123 * This is necessary to ensure that these other CPUs really have
124 * completed executing their RCU read-side critical sections, despite
125 * their CPUs wildly reordering memory. Denoted by "M".
126 */
127 rcu_try_flip_waitmb_state,
128};
129
130struct rcu_ctrlblk {
131 spinlock_t fliplock; /* Protect state-machine transitions. */
132 long completed; /* Number of last completed batch. */
133 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
134 the rcu state machine */
135};
136
137static DEFINE_PER_CPU(struct rcu_data, rcu_data);
138static struct rcu_ctrlblk rcu_ctrlblk = {
139 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
140 .completed = 0,
141 .rcu_try_flip_state = rcu_try_flip_idle_state,
142};
143
144
145#ifdef CONFIG_RCU_TRACE
146static char *rcu_try_flip_state_names[] =
147 { "idle", "waitack", "waitzero", "waitmb" };
148#endif /* #ifdef CONFIG_RCU_TRACE */
149
150/*
151 * Enum and per-CPU flag to determine when each CPU has seen
152 * the most recent counter flip.
153 */
154
155enum rcu_flip_flag_values {
156 rcu_flip_seen, /* Steady/initial state, last flip seen. */
157 /* Only GP detector can update. */
158 rcu_flipped /* Flip just completed, need confirmation. */
159 /* Only corresponding CPU can update. */
160};
161static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
162 = rcu_flip_seen;
163
164/*
165 * Enum and per-CPU flag to determine when each CPU has executed the
166 * needed memory barrier to fence in memory references from its last RCU
167 * read-side critical section in the just-completed grace period.
168 */
169
170enum rcu_mb_flag_values {
171 rcu_mb_done, /* Steady/initial state, no mb()s required. */
172 /* Only GP detector can update. */
173 rcu_mb_needed /* Flip just completed, need an mb(). */
174 /* Only corresponding CPU can update. */
175};
176static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
177 = rcu_mb_done;
178
179/*
180 * RCU_DATA_ME: find the current CPU's rcu_data structure.
181 * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
182 */
183#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
184#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
185
186/*
187 * Helper macro for tracing when the appropriate rcu_data is not
188 * cached in a local variable, but where the CPU number is so cached.
189 */
190#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
191
192/*
193 * Helper macro for tracing when the appropriate rcu_data is not
194 * cached in a local variable.
195 */
196#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
197
198/*
199 * Helper macro for tracing when the appropriate rcu_data is pointed
200 * to by a local variable.
201 */
202#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
203
204/*
205 * Return the number of RCU batches processed thus far. Useful
206 * for debug and statistics.
207 */
208long rcu_batches_completed(void)
209{
210 return rcu_ctrlblk.completed;
211}
212EXPORT_SYMBOL_GPL(rcu_batches_completed);
213
214EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
215
216void __rcu_read_lock(void)
217{
218 int idx;
219 struct task_struct *t = current;
220 int nesting;
221
222 nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
223 if (nesting != 0) {
224
225 /* An earlier rcu_read_lock() covers us, just count it. */
226
227 t->rcu_read_lock_nesting = nesting + 1;
228
229 } else {
230 unsigned long flags;
231
232 /*
233 * We disable interrupts for the following reasons:
234 * - If we get scheduling clock interrupt here, and we
235 * end up acking the counter flip, it's like a promise
236 * that we will never increment the old counter again.
237 * Thus we will break that promise if that
238 * scheduling clock interrupt happens between the time
239 * we pick the .completed field and the time that we
240 * increment our counter.
241 *
242 * - We don't want to be preempted out here.
243 *
244 * NMIs can still occur, of course, and might themselves
245 * contain rcu_read_lock().
246 */
247
248 local_irq_save(flags);
249
250 /*
251 * Outermost nesting of rcu_read_lock(), so increment
252 * the current counter for the current CPU. Use volatile
253 * casts to prevent the compiler from reordering.
254 */
255
256 idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
257 ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
258
259 /*
260 * Now that the per-CPU counter has been incremented, we
261 * are protected from races with rcu_read_lock() invoked
262 * from NMI handlers on this CPU. We can therefore safely
263 * increment the nesting counter, relieving further NMIs
264 * of the need to increment the per-CPU counter.
265 */
266
267 ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
268
269 /*
270 * Now that we have preventing any NMIs from storing
271 * to the ->rcu_flipctr_idx, we can safely use it to
272 * remember which counter to decrement in the matching
273 * rcu_read_unlock().
274 */
275
276 ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
277 local_irq_restore(flags);
278 }
279}
280EXPORT_SYMBOL_GPL(__rcu_read_lock);
281
282void __rcu_read_unlock(void)
283{
284 int idx;
285 struct task_struct *t = current;
286 int nesting;
287
288 nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
289 if (nesting > 1) {
290
291 /*
292 * We are still protected by the enclosing rcu_read_lock(),
293 * so simply decrement the counter.
294 */
295
296 t->rcu_read_lock_nesting = nesting - 1;
297
298 } else {
299 unsigned long flags;
300
301 /*
302 * Disable local interrupts to prevent the grace-period
303 * detection state machine from seeing us half-done.
304 * NMIs can still occur, of course, and might themselves
305 * contain rcu_read_lock() and rcu_read_unlock().
306 */
307
308 local_irq_save(flags);
309
310 /*
311 * Outermost nesting of rcu_read_unlock(), so we must
312 * decrement the current counter for the current CPU.
313 * This must be done carefully, because NMIs can
314 * occur at any point in this code, and any rcu_read_lock()
315 * and rcu_read_unlock() pairs in the NMI handlers
316 * must interact non-destructively with this code.
317 * Lots of volatile casts, and -very- careful ordering.
318 *
319 * Changes to this code, including this one, must be
320 * inspected, validated, and tested extremely carefully!!!
321 */
322
323 /*
324 * First, pick up the index.
325 */
326
327 idx = ACCESS_ONCE(t->rcu_flipctr_idx);
328
329 /*
330 * Now that we have fetched the counter index, it is
331 * safe to decrement the per-task RCU nesting counter.
332 * After this, any interrupts or NMIs will increment and
333 * decrement the per-CPU counters.
334 */
335 ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
336
337 /*
338 * It is now safe to decrement this task's nesting count.
339 * NMIs that occur after this statement will route their
340 * rcu_read_lock() calls through this "else" clause, and
341 * will thus start incrementing the per-CPU counter on
342 * their own. They will also clobber ->rcu_flipctr_idx,
343 * but that is OK, since we have already fetched it.
344 */
345
346 ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
347 local_irq_restore(flags);
348 }
349}
350EXPORT_SYMBOL_GPL(__rcu_read_unlock);
351
352/*
353 * If a global counter flip has occurred since the last time that we
354 * advanced callbacks, advance them. Hardware interrupts must be
355 * disabled when calling this function.
356 */
357static void __rcu_advance_callbacks(struct rcu_data *rdp)
358{
359 int cpu;
360 int i;
361 int wlc = 0;
362
363 if (rdp->completed != rcu_ctrlblk.completed) {
364 if (rdp->waitlist[GP_STAGES - 1] != NULL) {
365 *rdp->donetail = rdp->waitlist[GP_STAGES - 1];
366 rdp->donetail = rdp->waittail[GP_STAGES - 1];
367 RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
368 }
369 for (i = GP_STAGES - 2; i >= 0; i--) {
370 if (rdp->waitlist[i] != NULL) {
371 rdp->waitlist[i + 1] = rdp->waitlist[i];
372 rdp->waittail[i + 1] = rdp->waittail[i];
373 wlc++;
374 } else {
375 rdp->waitlist[i + 1] = NULL;
376 rdp->waittail[i + 1] =
377 &rdp->waitlist[i + 1];
378 }
379 }
380 if (rdp->nextlist != NULL) {
381 rdp->waitlist[0] = rdp->nextlist;
382 rdp->waittail[0] = rdp->nexttail;
383 wlc++;
384 rdp->nextlist = NULL;
385 rdp->nexttail = &rdp->nextlist;
386 RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
387 } else {
388 rdp->waitlist[0] = NULL;
389 rdp->waittail[0] = &rdp->waitlist[0];
390 }
391 rdp->waitlistcount = wlc;
392 rdp->completed = rcu_ctrlblk.completed;
393 }
394
395 /*
396 * Check to see if this CPU needs to report that it has seen
397 * the most recent counter flip, thereby declaring that all
398 * subsequent rcu_read_lock() invocations will respect this flip.
399 */
400
401 cpu = raw_smp_processor_id();
402 if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
403 smp_mb(); /* Subsequent counter accesses must see new value */
404 per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
405 smp_mb(); /* Subsequent RCU read-side critical sections */
406 /* seen -after- acknowledgement. */
407 }
408}
409
410/*
411 * Get here when RCU is idle. Decide whether we need to
412 * move out of idle state, and return non-zero if so.
413 * "Straightforward" approach for the moment, might later
414 * use callback-list lengths, grace-period duration, or
415 * some such to determine when to exit idle state.
416 * Might also need a pre-idle test that does not acquire
417 * the lock, but let's get the simple case working first...
418 */
419
420static int
421rcu_try_flip_idle(void)
422{
423 int cpu;
424
425 RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
426 if (!rcu_pending(smp_processor_id())) {
427 RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
428 return 0;
429 }
430
431 /*
432 * Do the flip.
433 */
434
435 RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
436 rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
437
438 /*
439 * Need a memory barrier so that other CPUs see the new
440 * counter value before they see the subsequent change of all
441 * the rcu_flip_flag instances to rcu_flipped.
442 */
443
444 smp_mb(); /* see above block comment. */
445
446 /* Now ask each CPU for acknowledgement of the flip. */
447
448 for_each_possible_cpu(cpu)
449 per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
450
451 return 1;
452}
453
454/*
455 * Wait for CPUs to acknowledge the flip.
456 */
457
458static int
459rcu_try_flip_waitack(void)
460{
461 int cpu;
462
463 RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
464 for_each_possible_cpu(cpu)
465 if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
466 RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
467 return 0;
468 }
469
470 /*
471 * Make sure our checks above don't bleed into subsequent
472 * waiting for the sum of the counters to reach zero.
473 */
474
475 smp_mb(); /* see above block comment. */
476 RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
477 return 1;
478}
479
480/*
481 * Wait for collective ``last'' counter to reach zero,
482 * then tell all CPUs to do an end-of-grace-period memory barrier.
483 */
484
485static int
486rcu_try_flip_waitzero(void)
487{
488 int cpu;
489 int lastidx = !(rcu_ctrlblk.completed & 0x1);
490 int sum = 0;
491
492 /* Check to see if the sum of the "last" counters is zero. */
493
494 RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
495 for_each_possible_cpu(cpu)
496 sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
497 if (sum != 0) {
498 RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
499 return 0;
500 }
501
502 /*
503 * This ensures that the other CPUs see the call for
504 * memory barriers -after- the sum to zero has been
505 * detected here
506 */
507 smp_mb(); /* ^^^^^^^^^^^^ */
508
509 /* Call for a memory barrier from each CPU. */
510 for_each_possible_cpu(cpu)
511 per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
512
513 RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
514 return 1;
515}
516
517/*
518 * Wait for all CPUs to do their end-of-grace-period memory barrier.
519 * Return 0 once all CPUs have done so.
520 */
521
522static int
523rcu_try_flip_waitmb(void)
524{
525 int cpu;
526
527 RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
528 for_each_possible_cpu(cpu)
529 if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
530 RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
531 return 0;
532 }
533
534 smp_mb(); /* Ensure that the above checks precede any following flip. */
535 RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
536 return 1;
537}
538
539/*
540 * Attempt a single flip of the counters. Remember, a single flip does
541 * -not- constitute a grace period. Instead, the interval between
542 * at least GP_STAGES consecutive flips is a grace period.
543 *
544 * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
545 * on a large SMP, they might want to use a hierarchical organization of
546 * the per-CPU-counter pairs.
547 */
548static void rcu_try_flip(void)
549{
550 unsigned long flags;
551
552 RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
553 if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
554 RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
555 return;
556 }
557
558 /*
559 * Take the next transition(s) through the RCU grace-period
560 * flip-counter state machine.
561 */
562
563 switch (rcu_ctrlblk.rcu_try_flip_state) {
564 case rcu_try_flip_idle_state:
565 if (rcu_try_flip_idle())
566 rcu_ctrlblk.rcu_try_flip_state =
567 rcu_try_flip_waitack_state;
568 break;
569 case rcu_try_flip_waitack_state:
570 if (rcu_try_flip_waitack())
571 rcu_ctrlblk.rcu_try_flip_state =
572 rcu_try_flip_waitzero_state;
573 break;
574 case rcu_try_flip_waitzero_state:
575 if (rcu_try_flip_waitzero())
576 rcu_ctrlblk.rcu_try_flip_state =
577 rcu_try_flip_waitmb_state;
578 break;
579 case rcu_try_flip_waitmb_state:
580 if (rcu_try_flip_waitmb())
581 rcu_ctrlblk.rcu_try_flip_state =
582 rcu_try_flip_idle_state;
583 }
584 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
585}
586
587/*
588 * Check to see if this CPU needs to do a memory barrier in order to
589 * ensure that any prior RCU read-side critical sections have committed
590 * their counter manipulations and critical-section memory references
591 * before declaring the grace period to be completed.
592 */
593static void rcu_check_mb(int cpu)
594{
595 if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
596 smp_mb(); /* Ensure RCU read-side accesses are visible. */
597 per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
598 }
599}
600
601void rcu_check_callbacks(int cpu, int user)
602{
603 unsigned long flags;
604 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
605
606 rcu_check_mb(cpu);
607 if (rcu_ctrlblk.completed == rdp->completed)
608 rcu_try_flip();
609 spin_lock_irqsave(&rdp->lock, flags);
610 RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
611 __rcu_advance_callbacks(rdp);
612 if (rdp->donelist == NULL) {
613 spin_unlock_irqrestore(&rdp->lock, flags);
614 } else {
615 spin_unlock_irqrestore(&rdp->lock, flags);
616 raise_softirq(RCU_SOFTIRQ);
617 }
618}
619
620/*
621 * Needed by dynticks, to make sure all RCU processing has finished
622 * when we go idle:
623 */
624void rcu_advance_callbacks(int cpu, int user)
625{
626 unsigned long flags;
627 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
628
629 if (rcu_ctrlblk.completed == rdp->completed) {
630 rcu_try_flip();
631 if (rcu_ctrlblk.completed == rdp->completed)
632 return;
633 }
634 spin_lock_irqsave(&rdp->lock, flags);
635 RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
636 __rcu_advance_callbacks(rdp);
637 spin_unlock_irqrestore(&rdp->lock, flags);
638}
639
640static void rcu_process_callbacks(struct softirq_action *unused)
641{
642 unsigned long flags;
643 struct rcu_head *next, *list;
644 struct rcu_data *rdp = RCU_DATA_ME();
645
646 spin_lock_irqsave(&rdp->lock, flags);
647 list = rdp->donelist;
648 if (list == NULL) {
649 spin_unlock_irqrestore(&rdp->lock, flags);
650 return;
651 }
652 rdp->donelist = NULL;
653 rdp->donetail = &rdp->donelist;
654 RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
655 spin_unlock_irqrestore(&rdp->lock, flags);
656 while (list) {
657 next = list->next;
658 list->func(list);
659 list = next;
660 RCU_TRACE_ME(rcupreempt_trace_invoke);
661 }
662}
663
664void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
665{
666 unsigned long flags;
667 struct rcu_data *rdp;
668
669 head->func = func;
670 head->next = NULL;
671 local_irq_save(flags);
672 rdp = RCU_DATA_ME();
673 spin_lock(&rdp->lock);
674 __rcu_advance_callbacks(rdp);
675 *rdp->nexttail = head;
676 rdp->nexttail = &head->next;
677 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
678 spin_unlock(&rdp->lock);
679 local_irq_restore(flags);
680}
681EXPORT_SYMBOL_GPL(call_rcu);
682
683/*
684 * Wait until all currently running preempt_disable() code segments
685 * (including hardware-irq-disable segments) complete. Note that
686 * in -rt this does -not- necessarily result in all currently executing
687 * interrupt -handlers- having completed.
688 */
689void __synchronize_sched(void)
690{
691 cpumask_t oldmask;
692 int cpu;
693
694 if (sched_getaffinity(0, &oldmask) < 0)
695 oldmask = cpu_possible_map;
696 for_each_online_cpu(cpu) {
697 sched_setaffinity(0, cpumask_of_cpu(cpu));
698 schedule();
699 }
700 sched_setaffinity(0, oldmask);
701}
702EXPORT_SYMBOL_GPL(__synchronize_sched);
703
704/*
705 * Check to see if any future RCU-related work will need to be done
706 * by the current CPU, even if none need be done immediately, returning
707 * 1 if so. Assumes that notifiers would take care of handling any
708 * outstanding requests from the RCU core.
709 *
710 * This function is part of the RCU implementation; it is -not-
711 * an exported member of the RCU API.
712 */
713int rcu_needs_cpu(int cpu)
714{
715 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
716
717 return (rdp->donelist != NULL ||
718 !!rdp->waitlistcount ||
719 rdp->nextlist != NULL);
720}
721
722int rcu_pending(int cpu)
723{
724 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
725
726 /* The CPU has at least one callback queued somewhere. */
727
728 if (rdp->donelist != NULL ||
729 !!rdp->waitlistcount ||
730 rdp->nextlist != NULL)
731 return 1;
732
733 /* The RCU core needs an acknowledgement from this CPU. */
734
735 if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
736 (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
737 return 1;
738
739 /* This CPU has fallen behind the global grace-period number. */
740
741 if (rdp->completed != rcu_ctrlblk.completed)
742 return 1;
743
744 /* Nothing needed from this CPU. */
745
746 return 0;
747}
748
749void __init __rcu_init(void)
750{
751 int cpu;
752 int i;
753 struct rcu_data *rdp;
754
755 printk(KERN_NOTICE "Preemptible RCU implementation.\n");
756 for_each_possible_cpu(cpu) {
757 rdp = RCU_DATA_CPU(cpu);
758 spin_lock_init(&rdp->lock);
759 rdp->completed = 0;
760 rdp->waitlistcount = 0;
761 rdp->nextlist = NULL;
762 rdp->nexttail = &rdp->nextlist;
763 for (i = 0; i < GP_STAGES; i++) {
764 rdp->waitlist[i] = NULL;
765 rdp->waittail[i] = &rdp->waitlist[i];
766 }
767 rdp->donelist = NULL;
768 rdp->donetail = &rdp->donelist;
769 rdp->rcu_flipctr[0] = 0;
770 rdp->rcu_flipctr[1] = 0;
771 }
772 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
773}
774
775/*
776 * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
777 */
778void synchronize_kernel(void)
779{
780 synchronize_rcu();
781}
782
783#ifdef CONFIG_RCU_TRACE
784long *rcupreempt_flipctr(int cpu)
785{
786 return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
787}
788EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
789
790int rcupreempt_flip_flag(int cpu)
791{
792 return per_cpu(rcu_flip_flag, cpu);
793}
794EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
795
796int rcupreempt_mb_flag(int cpu)
797{
798 return per_cpu(rcu_mb_flag, cpu);
799}
800EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
801
802char *rcupreempt_try_flip_state_name(void)
803{
804 return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
805}
806EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
807
808struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
809{
810 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
811
812 return &rdp->trace;
813}
814EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
815
816#endif /* #ifdef RCU_TRACE */
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
new file mode 100644
index 000000000000..49ac4947af24
--- /dev/null
+++ b/kernel/rcupreempt_trace.c
@@ -0,0 +1,330 @@
1/*
2 * Read-Copy Update tracing for realtime implementation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2006
19 *
20 * Papers: http://www.rdrop.com/users/paulmck/RCU
21 *
22 * For detailed explanation of Read-Copy Update mechanism see -
23 * Documentation/RCU/ *.txt
24 *
25 */
26#include <linux/types.h>
27#include <linux/kernel.h>
28#include <linux/init.h>
29#include <linux/spinlock.h>
30#include <linux/smp.h>
31#include <linux/rcupdate.h>
32#include <linux/interrupt.h>
33#include <linux/sched.h>
34#include <asm/atomic.h>
35#include <linux/bitops.h>
36#include <linux/module.h>
37#include <linux/completion.h>
38#include <linux/moduleparam.h>
39#include <linux/percpu.h>
40#include <linux/notifier.h>
41#include <linux/rcupdate.h>
42#include <linux/cpu.h>
43#include <linux/mutex.h>
44#include <linux/rcupreempt_trace.h>
45#include <linux/debugfs.h>
46
47static struct mutex rcupreempt_trace_mutex;
48static char *rcupreempt_trace_buf;
49#define RCUPREEMPT_TRACE_BUF_SIZE 4096
50
51void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
52{
53 trace->done_length += trace->wait_length;
54 trace->done_add += trace->wait_length;
55 trace->wait_length = 0;
56}
57void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
58{
59 trace->wait_length += trace->next_length;
60 trace->wait_add += trace->next_length;
61 trace->next_length = 0;
62}
63void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
64{
65 atomic_inc(&trace->rcu_try_flip_1);
66}
67void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
68{
69 atomic_inc(&trace->rcu_try_flip_e1);
70}
71void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
72{
73 trace->rcu_try_flip_i1++;
74}
75void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
76{
77 trace->rcu_try_flip_ie1++;
78}
79void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
80{
81 trace->rcu_try_flip_g1++;
82}
83void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
84{
85 trace->rcu_try_flip_a1++;
86}
87void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
88{
89 trace->rcu_try_flip_ae1++;
90}
91void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
92{
93 trace->rcu_try_flip_a2++;
94}
95void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
96{
97 trace->rcu_try_flip_z1++;
98}
99void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
100{
101 trace->rcu_try_flip_ze1++;
102}
103void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
104{
105 trace->rcu_try_flip_z2++;
106}
107void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
108{
109 trace->rcu_try_flip_m1++;
110}
111void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
112{
113 trace->rcu_try_flip_me1++;
114}
115void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
116{
117 trace->rcu_try_flip_m2++;
118}
119void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
120{
121 trace->rcu_check_callbacks++;
122}
123void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
124{
125 trace->done_remove += trace->done_length;
126 trace->done_length = 0;
127}
128void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
129{
130 atomic_inc(&trace->done_invoked);
131}
132void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
133{
134 trace->next_add++;
135 trace->next_length++;
136}
137
138static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
139{
140 struct rcupreempt_trace *cp;
141 int cpu;
142
143 memset(sp, 0, sizeof(*sp));
144 for_each_possible_cpu(cpu) {
145 cp = rcupreempt_trace_cpu(cpu);
146 sp->next_length += cp->next_length;
147 sp->next_add += cp->next_add;
148 sp->wait_length += cp->wait_length;
149 sp->wait_add += cp->wait_add;
150 sp->done_length += cp->done_length;
151 sp->done_add += cp->done_add;
152 sp->done_remove += cp->done_remove;
153 atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
154 sp->rcu_check_callbacks += cp->rcu_check_callbacks;
155 atomic_set(&sp->rcu_try_flip_1,
156 atomic_read(&cp->rcu_try_flip_1));
157 atomic_set(&sp->rcu_try_flip_e1,
158 atomic_read(&cp->rcu_try_flip_e1));
159 sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
160 sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
161 sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
162 sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
163 sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
164 sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
165 sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
166 sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
167 sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
168 sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
169 sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
170 sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
171 }
172}
173
174static ssize_t rcustats_read(struct file *filp, char __user *buffer,
175 size_t count, loff_t *ppos)
176{
177 struct rcupreempt_trace trace;
178 ssize_t bcount;
179 int cnt = 0;
180
181 rcupreempt_trace_sum(&trace);
182 mutex_lock(&rcupreempt_trace_mutex);
183 snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
184 "ggp=%ld rcc=%ld\n",
185 rcu_batches_completed(),
186 trace.rcu_check_callbacks);
187 snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
188 "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
189 "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
190 "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
191
192 trace.next_add, trace.next_length,
193 trace.wait_add, trace.wait_length,
194 trace.done_add, trace.done_length,
195 trace.done_remove, atomic_read(&trace.done_invoked),
196 atomic_read(&trace.rcu_try_flip_1),
197 atomic_read(&trace.rcu_try_flip_e1),
198 trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
199 trace.rcu_try_flip_g1,
200 trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
201 trace.rcu_try_flip_a2,
202 trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
203 trace.rcu_try_flip_z2,
204 trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
205 trace.rcu_try_flip_m2);
206 bcount = simple_read_from_buffer(buffer, count, ppos,
207 rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
208 mutex_unlock(&rcupreempt_trace_mutex);
209 return bcount;
210}
211
212static ssize_t rcugp_read(struct file *filp, char __user *buffer,
213 size_t count, loff_t *ppos)
214{
215 long oldgp = rcu_batches_completed();
216 ssize_t bcount;
217
218 mutex_lock(&rcupreempt_trace_mutex);
219 synchronize_rcu();
220 snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
221 "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
222 bcount = simple_read_from_buffer(buffer, count, ppos,
223 rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
224 mutex_unlock(&rcupreempt_trace_mutex);
225 return bcount;
226}
227
228static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
229 size_t count, loff_t *ppos)
230{
231 int cnt = 0;
232 int cpu;
233 int f = rcu_batches_completed() & 0x1;
234 ssize_t bcount;
235
236 mutex_lock(&rcupreempt_trace_mutex);
237
238 cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
239 "CPU last cur F M\n");
240 for_each_online_cpu(cpu) {
241 long *flipctr = rcupreempt_flipctr(cpu);
242 cnt += snprintf(&rcupreempt_trace_buf[cnt],
243 RCUPREEMPT_TRACE_BUF_SIZE - cnt,
244 "%3d %4ld %3ld %d %d\n",
245 cpu,
246 flipctr[!f],
247 flipctr[f],
248 rcupreempt_flip_flag(cpu),
249 rcupreempt_mb_flag(cpu));
250 }
251 cnt += snprintf(&rcupreempt_trace_buf[cnt],
252 RCUPREEMPT_TRACE_BUF_SIZE - cnt,
253 "ggp = %ld, state = %s\n",
254 rcu_batches_completed(),
255 rcupreempt_try_flip_state_name());
256 cnt += snprintf(&rcupreempt_trace_buf[cnt],
257 RCUPREEMPT_TRACE_BUF_SIZE - cnt,
258 "\n");
259 bcount = simple_read_from_buffer(buffer, count, ppos,
260 rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
261 mutex_unlock(&rcupreempt_trace_mutex);
262 return bcount;
263}
264
265static struct file_operations rcustats_fops = {
266 .owner = THIS_MODULE,
267 .read = rcustats_read,
268};
269
270static struct file_operations rcugp_fops = {
271 .owner = THIS_MODULE,
272 .read = rcugp_read,
273};
274
275static struct file_operations rcuctrs_fops = {
276 .owner = THIS_MODULE,
277 .read = rcuctrs_read,
278};
279
280static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
281static int rcupreempt_debugfs_init(void)
282{
283 rcudir = debugfs_create_dir("rcu", NULL);
284 if (!rcudir)
285 goto out;
286 statdir = debugfs_create_file("rcustats", 0444, rcudir,
287 NULL, &rcustats_fops);
288 if (!statdir)
289 goto free_out;
290
291 gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
292 if (!gpdir)
293 goto free_out;
294
295 ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
296 NULL, &rcuctrs_fops);
297 if (!ctrsdir)
298 goto free_out;
299 return 0;
300free_out:
301 if (statdir)
302 debugfs_remove(statdir);
303 if (gpdir)
304 debugfs_remove(gpdir);
305 debugfs_remove(rcudir);
306out:
307 return 1;
308}
309
310static int __init rcupreempt_trace_init(void)
311{
312 mutex_init(&rcupreempt_trace_mutex);
313 rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
314 if (!rcupreempt_trace_buf)
315 return 1;
316 return rcupreempt_debugfs_init();
317}
318
319static void __exit rcupreempt_trace_cleanup(void)
320{
321 debugfs_remove(statdir);
322 debugfs_remove(gpdir);
323 debugfs_remove(ctrsdir);
324 debugfs_remove(rcudir);
325 kfree(rcupreempt_trace_buf);
326}
327
328
329module_init(rcupreempt_trace_init);
330module_exit(rcupreempt_trace_cleanup);