diff options
-rw-r--r-- | Documentation/RCU/rcu.txt | 10 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.txt | 8 | ||||
-rw-r--r-- | include/linux/init_task.h | 6 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 4 | ||||
-rw-r--r-- | include/linux/rcupreempt.h | 140 | ||||
-rw-r--r-- | include/linux/rcupreempt_trace.h | 97 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | init/Kconfig | 20 | ||||
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/rcupreempt.c | 1518 | ||||
-rw-r--r-- | kernel/rcupreempt_trace.c | 335 | ||||
-rw-r--r-- | lib/Kconfig.debug | 2 |
12 files changed, 13 insertions, 2142 deletions
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 7aa2002ade77..2a23523ce471 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -36,7 +36,7 @@ o How can the updater tell when a grace period has completed | |||
36 | executed in user mode, or executed in the idle loop, we can | 36 | executed in user mode, or executed in the idle loop, we can |
37 | safely free up that item. | 37 | safely free up that item. |
38 | 38 | ||
39 | Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the | 39 | Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the |
40 | same effect, but require that the readers manipulate CPU-local | 40 | same effect, but require that the readers manipulate CPU-local |
41 | counters. These counters allow limited types of blocking | 41 | counters. These counters allow limited types of blocking |
42 | within RCU read-side critical sections. SRCU also uses | 42 | within RCU read-side critical sections. SRCU also uses |
@@ -79,10 +79,10 @@ o I hear that RCU is patented? What is with that? | |||
79 | o I hear that RCU needs work in order to support realtime kernels? | 79 | o I hear that RCU needs work in order to support realtime kernels? |
80 | 80 | ||
81 | This work is largely completed. Realtime-friendly RCU can be | 81 | This work is largely completed. Realtime-friendly RCU can be |
82 | enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. | 82 | enabled via the CONFIG_TREE_PREEMPT_RCU kernel configuration |
83 | However, work is in progress for enabling priority boosting of | 83 | parameter. However, work is in progress for enabling priority |
84 | preempted RCU read-side critical sections. This is needed if you | 84 | boosting of preempted RCU read-side critical sections. This is |
85 | have CPU-bound realtime threads. | 85 | needed if you have CPU-bound realtime threads. |
86 | 86 | ||
87 | o Where can I find more information on RCU? | 87 | o Where can I find more information on RCU? |
88 | 88 | ||
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 97ded2432c59..e41a7fecf0d3 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -136,10 +136,10 @@ rcu_read_lock() | |||
136 | Used by a reader to inform the reclaimer that the reader is | 136 | Used by a reader to inform the reclaimer that the reader is |
137 | entering an RCU read-side critical section. It is illegal | 137 | entering an RCU read-side critical section. It is illegal |
138 | to block while in an RCU read-side critical section, though | 138 | to block while in an RCU read-side critical section, though |
139 | kernels built with CONFIG_PREEMPT_RCU can preempt RCU read-side | 139 | kernels built with CONFIG_TREE_PREEMPT_RCU can preempt RCU |
140 | critical sections. Any RCU-protected data structure accessed | 140 | read-side critical sections. Any RCU-protected data structure |
141 | during an RCU read-side critical section is guaranteed to remain | 141 | accessed during an RCU read-side critical section is guaranteed to |
142 | unreclaimed for the full duration of that critical section. | 142 | remain unreclaimed for the full duration of that critical section. |
143 | Reference counts may be used in conjunction with RCU to maintain | 143 | Reference counts may be used in conjunction with RCU to maintain |
144 | longer-term references to data structures. | 144 | longer-term references to data structures. |
145 | 145 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 971a968831bf..79d4baee31b6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -94,11 +94,7 @@ extern struct group_info init_groups; | |||
94 | # define CAP_INIT_BSET CAP_INIT_EFF_SET | 94 | # define CAP_INIT_BSET CAP_INIT_EFF_SET |
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | #ifdef CONFIG_PREEMPT_RCU | 97 | #ifdef CONFIG_TREE_PREEMPT_RCU |
98 | #define INIT_TASK_RCU_PREEMPT(tsk) \ | ||
99 | .rcu_read_lock_nesting = 0, \ | ||
100 | .rcu_flipctr_idx = 0, | ||
101 | #elif defined(CONFIG_TREE_PREEMPT_RCU) | ||
102 | #define INIT_TASK_RCU_PREEMPT(tsk) \ | 98 | #define INIT_TASK_RCU_PREEMPT(tsk) \ |
103 | .rcu_read_lock_nesting = 0, \ | 99 | .rcu_read_lock_nesting = 0, \ |
104 | .rcu_read_unlock_special = 0, \ | 100 | .rcu_read_unlock_special = 0, \ |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 26892f5e7bd8..ec90fc34fea9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -68,11 +68,9 @@ extern int rcu_scheduler_active; | |||
68 | 68 | ||
69 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 69 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
70 | #include <linux/rcutree.h> | 70 | #include <linux/rcutree.h> |
71 | #elif defined(CONFIG_PREEMPT_RCU) | ||
72 | #include <linux/rcupreempt.h> | ||
73 | #else | 71 | #else |
74 | #error "Unknown RCU implementation specified to kernel configuration" | 72 | #error "Unknown RCU implementation specified to kernel configuration" |
75 | #endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ | 73 | #endif |
76 | 74 | ||
77 | #define RCU_HEAD_INIT { .next = NULL, .func = NULL } | 75 | #define RCU_HEAD_INIT { .next = NULL, .func = NULL } |
78 | #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT | 76 | #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT |
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h deleted file mode 100644 index a42ab88e9210..000000000000 --- a/include/linux/rcupreempt.h +++ /dev/null | |||
@@ -1,140 +0,0 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion (RT implementation) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2006 | ||
19 | * | ||
20 | * Author: Paul McKenney <paulmck@us.ibm.com> | ||
21 | * | ||
22 | * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> | ||
23 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | ||
24 | * Papers: | ||
25 | * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf | ||
26 | * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) | ||
27 | * | ||
28 | * For detailed explanation of Read-Copy Update mechanism see - | ||
29 | * Documentation/RCU | ||
30 | * | ||
31 | */ | ||
32 | |||
33 | #ifndef __LINUX_RCUPREEMPT_H | ||
34 | #define __LINUX_RCUPREEMPT_H | ||
35 | |||
36 | #include <linux/cache.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/threads.h> | ||
39 | #include <linux/smp.h> | ||
40 | #include <linux/cpumask.h> | ||
41 | #include <linux/seqlock.h> | ||
42 | |||
43 | extern void rcu_sched_qs(int cpu); | ||
44 | static inline void rcu_bh_qs(int cpu) { } | ||
45 | |||
46 | /* | ||
47 | * Someone might want to pass call_rcu_bh as a function pointer. | ||
48 | * So this needs to just be a rename and not a macro function. | ||
49 | * (no parentheses) | ||
50 | */ | ||
51 | #define call_rcu_bh call_rcu | ||
52 | |||
53 | /** | ||
54 | * call_rcu_sched - Queue RCU callback for invocation after sched grace period. | ||
55 | * @head: structure to be used for queueing the RCU updates. | ||
56 | * @func: actual update function to be invoked after the grace period | ||
57 | * | ||
58 | * The update function will be invoked some time after a full | ||
59 | * synchronize_sched()-style grace period elapses, in other words after | ||
60 | * all currently executing preempt-disabled sections of code (including | ||
61 | * hardirq handlers, NMI handlers, and local_irq_save() blocks) have | ||
62 | * completed. | ||
63 | */ | ||
64 | extern void call_rcu_sched(struct rcu_head *head, | ||
65 | void (*func)(struct rcu_head *head)); | ||
66 | |||
67 | extern void __rcu_read_lock(void); | ||
68 | extern void __rcu_read_unlock(void); | ||
69 | extern int rcu_needs_cpu(int cpu); | ||
70 | |||
71 | #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } | ||
72 | #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } | ||
73 | |||
74 | extern void __synchronize_sched(void); | ||
75 | |||
76 | static inline void synchronize_rcu_expedited(void) | ||
77 | { | ||
78 | synchronize_rcu(); /* Placeholder for new rcupreempt implementation. */ | ||
79 | } | ||
80 | |||
81 | static inline void synchronize_rcu_bh_expedited(void) | ||
82 | { | ||
83 | synchronize_rcu_bh(); /* Placeholder for new rcupreempt impl. */ | ||
84 | } | ||
85 | |||
86 | extern void __rcu_init(void); | ||
87 | extern void rcu_init_sched(void); | ||
88 | extern void rcu_check_callbacks(int cpu, int user); | ||
89 | extern void rcu_restart_cpu(int cpu); | ||
90 | extern long rcu_batches_completed(void); | ||
91 | |||
92 | /* | ||
93 | * Return the number of RCU batches processed thus far. Useful for debug | ||
94 | * and statistic. The _bh variant is identifcal to straight RCU | ||
95 | */ | ||
96 | static inline long rcu_batches_completed_bh(void) | ||
97 | { | ||
98 | return rcu_batches_completed(); | ||
99 | } | ||
100 | |||
101 | static inline void exit_rcu(void) | ||
102 | { | ||
103 | } | ||
104 | |||
105 | #ifdef CONFIG_RCU_TRACE | ||
106 | struct rcupreempt_trace; | ||
107 | extern long *rcupreempt_flipctr(int cpu); | ||
108 | extern long rcupreempt_data_completed(void); | ||
109 | extern int rcupreempt_flip_flag(int cpu); | ||
110 | extern int rcupreempt_mb_flag(int cpu); | ||
111 | extern char *rcupreempt_try_flip_state_name(void); | ||
112 | extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); | ||
113 | #endif | ||
114 | |||
115 | struct softirq_action; | ||
116 | |||
117 | #ifdef CONFIG_NO_HZ | ||
118 | extern void rcu_enter_nohz(void); | ||
119 | extern void rcu_exit_nohz(void); | ||
120 | #else | ||
121 | # define rcu_enter_nohz() do { } while (0) | ||
122 | # define rcu_exit_nohz() do { } while (0) | ||
123 | #endif | ||
124 | |||
125 | /* | ||
126 | * A context switch is a grace period for rcupreempt synchronize_rcu() | ||
127 | * only during early boot, before the scheduler has been initialized. | ||
128 | * So, how the heck do we get a context switch? Well, if the caller | ||
129 | * invokes synchronize_rcu(), they are willing to accept a context | ||
130 | * switch, so we simply pretend that one happened. | ||
131 | * | ||
132 | * After boot, there might be a blocked or preempted task in an RCU | ||
133 | * read-side critical section, so we cannot then take the fastpath. | ||
134 | */ | ||
135 | static inline int rcu_blocking_is_gp(void) | ||
136 | { | ||
137 | return num_online_cpus() == 1 && !rcu_scheduler_active; | ||
138 | } | ||
139 | |||
140 | #endif /* __LINUX_RCUPREEMPT_H */ | ||
diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h deleted file mode 100644 index b99ae073192a..000000000000 --- a/include/linux/rcupreempt_trace.h +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion (RT implementation) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2006 | ||
19 | * | ||
20 | * Author: Paul McKenney <paulmck@us.ibm.com> | ||
21 | * | ||
22 | * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> | ||
23 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | ||
24 | * Papers: | ||
25 | * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf | ||
26 | * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) | ||
27 | * | ||
28 | * For detailed explanation of the Preemptible Read-Copy Update mechanism see - | ||
29 | * http://lwn.net/Articles/253651/ | ||
30 | */ | ||
31 | |||
32 | #ifndef __LINUX_RCUPREEMPT_TRACE_H | ||
33 | #define __LINUX_RCUPREEMPT_TRACE_H | ||
34 | |||
35 | #include <linux/types.h> | ||
36 | #include <linux/kernel.h> | ||
37 | |||
38 | #include <asm/atomic.h> | ||
39 | |||
40 | /* | ||
41 | * PREEMPT_RCU data structures. | ||
42 | */ | ||
43 | |||
44 | struct rcupreempt_trace { | ||
45 | long next_length; | ||
46 | long next_add; | ||
47 | long wait_length; | ||
48 | long wait_add; | ||
49 | long done_length; | ||
50 | long done_add; | ||
51 | long done_remove; | ||
52 | atomic_t done_invoked; | ||
53 | long rcu_check_callbacks; | ||
54 | atomic_t rcu_try_flip_1; | ||
55 | atomic_t rcu_try_flip_e1; | ||
56 | long rcu_try_flip_i1; | ||
57 | long rcu_try_flip_ie1; | ||
58 | long rcu_try_flip_g1; | ||
59 | long rcu_try_flip_a1; | ||
60 | long rcu_try_flip_ae1; | ||
61 | long rcu_try_flip_a2; | ||
62 | long rcu_try_flip_z1; | ||
63 | long rcu_try_flip_ze1; | ||
64 | long rcu_try_flip_z2; | ||
65 | long rcu_try_flip_m1; | ||
66 | long rcu_try_flip_me1; | ||
67 | long rcu_try_flip_m2; | ||
68 | }; | ||
69 | |||
70 | #ifdef CONFIG_RCU_TRACE | ||
71 | #define RCU_TRACE(fn, arg) fn(arg); | ||
72 | #else | ||
73 | #define RCU_TRACE(fn, arg) | ||
74 | #endif | ||
75 | |||
76 | extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace); | ||
77 | extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace); | ||
78 | extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace); | ||
79 | extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace); | ||
80 | extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace); | ||
81 | extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace); | ||
82 | extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace); | ||
83 | extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace); | ||
84 | extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace); | ||
85 | extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace); | ||
86 | extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace); | ||
87 | extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace); | ||
88 | extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace); | ||
89 | extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace); | ||
90 | extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace); | ||
91 | extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace); | ||
92 | extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace); | ||
93 | extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace); | ||
94 | extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace); | ||
95 | extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace); | ||
96 | |||
97 | #endif /* __LINUX_RCUPREEMPT_TRACE_H */ | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index d7f98f637a2a..bfca26d63b13 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1205,11 +1205,6 @@ struct task_struct { | |||
1205 | unsigned int policy; | 1205 | unsigned int policy; |
1206 | cpumask_t cpus_allowed; | 1206 | cpumask_t cpus_allowed; |
1207 | 1207 | ||
1208 | #ifdef CONFIG_PREEMPT_RCU | ||
1209 | int rcu_read_lock_nesting; | ||
1210 | int rcu_flipctr_idx; | ||
1211 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
1212 | |||
1213 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1208 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1214 | int rcu_read_lock_nesting; | 1209 | int rcu_read_lock_nesting; |
1215 | char rcu_read_unlock_special; | 1210 | char rcu_read_unlock_special; |
@@ -1744,14 +1739,6 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1744 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1739 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1745 | } | 1740 | } |
1746 | 1741 | ||
1747 | #elif defined(CONFIG_PREEMPT_RCU) | ||
1748 | |||
1749 | static inline void rcu_copy_process(struct task_struct *p) | ||
1750 | { | ||
1751 | p->rcu_read_lock_nesting = 0; | ||
1752 | p->rcu_flipctr_idx = 0; | ||
1753 | } | ||
1754 | |||
1755 | #else | 1742 | #else |
1756 | 1743 | ||
1757 | static inline void rcu_copy_process(struct task_struct *p) | 1744 | static inline void rcu_copy_process(struct task_struct *p) |
diff --git a/init/Kconfig b/init/Kconfig index f88da2d1c1fb..8e8b76d8a272 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -324,17 +324,6 @@ config TREE_RCU | |||
324 | thousands of CPUs. It also scales down nicely to | 324 | thousands of CPUs. It also scales down nicely to |
325 | smaller systems. | 325 | smaller systems. |
326 | 326 | ||
327 | config PREEMPT_RCU | ||
328 | bool "Preemptible RCU" | ||
329 | depends on PREEMPT | ||
330 | help | ||
331 | This option reduces the latency of the kernel by making certain | ||
332 | RCU sections preemptible. Normally RCU code is non-preemptible, if | ||
333 | this option is selected then read-only RCU sections become | ||
334 | preemptible. This helps latency, but may expose bugs due to | ||
335 | now-naive assumptions about each RCU read-side critical section | ||
336 | remaining on a given CPU through its execution. | ||
337 | |||
338 | config TREE_PREEMPT_RCU | 327 | config TREE_PREEMPT_RCU |
339 | bool "Preemptable tree-based hierarchical RCU" | 328 | bool "Preemptable tree-based hierarchical RCU" |
340 | depends on PREEMPT | 329 | depends on PREEMPT |
@@ -348,7 +337,7 @@ endchoice | |||
348 | 337 | ||
349 | config RCU_TRACE | 338 | config RCU_TRACE |
350 | bool "Enable tracing for RCU" | 339 | bool "Enable tracing for RCU" |
351 | depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU | 340 | depends on TREE_RCU || TREE_PREEMPT_RCU |
352 | help | 341 | help |
353 | This option provides tracing in RCU which presents stats | 342 | This option provides tracing in RCU which presents stats |
354 | in debugfs for debugging RCU implementation. | 343 | in debugfs for debugging RCU implementation. |
@@ -395,13 +384,6 @@ config TREE_RCU_TRACE | |||
395 | TREE_PREEMPT_RCU implementations, permitting Makefile to | 384 | TREE_PREEMPT_RCU implementations, permitting Makefile to |
396 | trivially select kernel/rcutree_trace.c. | 385 | trivially select kernel/rcutree_trace.c. |
397 | 386 | ||
398 | config PREEMPT_RCU_TRACE | ||
399 | def_bool RCU_TRACE && PREEMPT_RCU | ||
400 | select DEBUG_FS | ||
401 | help | ||
402 | This option provides tracing for the PREEMPT_RCU implementation, | ||
403 | permitting Makefile to trivially select kernel/rcupreempt_trace.c. | ||
404 | |||
405 | endmenu # "RCU Subsystem" | 387 | endmenu # "RCU Subsystem" |
406 | 388 | ||
407 | config IKCONFIG | 389 | config IKCONFIG |
diff --git a/kernel/Makefile b/kernel/Makefile index 1a38b4789dda..b833bd5cc127 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -82,9 +82,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o | |||
82 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 82 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
83 | obj-$(CONFIG_TREE_RCU) += rcutree.o | 83 | obj-$(CONFIG_TREE_RCU) += rcutree.o |
84 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o | 84 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o |
85 | obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o | ||
86 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o | 85 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o |
87 | obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o | ||
88 | obj-$(CONFIG_RELAY) += relay.o | 86 | obj-$(CONFIG_RELAY) += relay.o |
89 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
90 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c deleted file mode 100644 index 0053ce56e326..000000000000 --- a/kernel/rcupreempt.c +++ /dev/null | |||
@@ -1,1518 +0,0 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion, realtime implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2006 | ||
19 | * | ||
20 | * Authors: Paul E. McKenney <paulmck@us.ibm.com> | ||
21 | * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar | ||
22 | * for pushing me away from locks and towards counters, and | ||
23 | * to Suparna Bhattacharya for pushing me completely away | ||
24 | * from atomic instructions on the read side. | ||
25 | * | ||
26 | * - Added handling of Dynamic Ticks | ||
27 | * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com> | ||
28 | * - Steven Rostedt <srostedt@redhat.com> | ||
29 | * | ||
30 | * Papers: http://www.rdrop.com/users/paulmck/RCU | ||
31 | * | ||
32 | * Design Document: http://lwn.net/Articles/253651/ | ||
33 | * | ||
34 | * For detailed explanation of Read-Copy Update mechanism see - | ||
35 | * Documentation/RCU/ *.txt | ||
36 | * | ||
37 | */ | ||
38 | #include <linux/types.h> | ||
39 | #include <linux/kernel.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/spinlock.h> | ||
42 | #include <linux/smp.h> | ||
43 | #include <linux/rcupdate.h> | ||
44 | #include <linux/interrupt.h> | ||
45 | #include <linux/sched.h> | ||
46 | #include <asm/atomic.h> | ||
47 | #include <linux/bitops.h> | ||
48 | #include <linux/module.h> | ||
49 | #include <linux/kthread.h> | ||
50 | #include <linux/completion.h> | ||
51 | #include <linux/moduleparam.h> | ||
52 | #include <linux/percpu.h> | ||
53 | #include <linux/notifier.h> | ||
54 | #include <linux/cpu.h> | ||
55 | #include <linux/random.h> | ||
56 | #include <linux/delay.h> | ||
57 | #include <linux/cpumask.h> | ||
58 | #include <linux/rcupreempt_trace.h> | ||
59 | #include <asm/byteorder.h> | ||
60 | |||
61 | /* | ||
62 | * PREEMPT_RCU data structures. | ||
63 | */ | ||
64 | |||
65 | /* | ||
66 | * GP_STAGES specifies the number of times the state machine has | ||
67 | * to go through the all the rcu_try_flip_states (see below) | ||
68 | * in a single Grace Period. | ||
69 | * | ||
70 | * GP in GP_STAGES stands for Grace Period ;) | ||
71 | */ | ||
72 | #define GP_STAGES 2 | ||
73 | struct rcu_data { | ||
74 | spinlock_t lock; /* Protect rcu_data fields. */ | ||
75 | long completed; /* Number of last completed batch. */ | ||
76 | int waitlistcount; | ||
77 | struct rcu_head *nextlist; | ||
78 | struct rcu_head **nexttail; | ||
79 | struct rcu_head *waitlist[GP_STAGES]; | ||
80 | struct rcu_head **waittail[GP_STAGES]; | ||
81 | struct rcu_head *donelist; /* from waitlist & waitschedlist */ | ||
82 | struct rcu_head **donetail; | ||
83 | long rcu_flipctr[2]; | ||
84 | struct rcu_head *nextschedlist; | ||
85 | struct rcu_head **nextschedtail; | ||
86 | struct rcu_head *waitschedlist; | ||
87 | struct rcu_head **waitschedtail; | ||
88 | int rcu_sched_sleeping; | ||
89 | #ifdef CONFIG_RCU_TRACE | ||
90 | struct rcupreempt_trace trace; | ||
91 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
92 | }; | ||
93 | |||
94 | /* | ||
95 | * States for rcu_try_flip() and friends. | ||
96 | */ | ||
97 | |||
98 | enum rcu_try_flip_states { | ||
99 | |||
100 | /* | ||
101 | * Stay here if nothing is happening. Flip the counter if somthing | ||
102 | * starts happening. Denoted by "I" | ||
103 | */ | ||
104 | rcu_try_flip_idle_state, | ||
105 | |||
106 | /* | ||
107 | * Wait here for all CPUs to notice that the counter has flipped. This | ||
108 | * prevents the old set of counters from ever being incremented once | ||
109 | * we leave this state, which in turn is necessary because we cannot | ||
110 | * test any individual counter for zero -- we can only check the sum. | ||
111 | * Denoted by "A". | ||
112 | */ | ||
113 | rcu_try_flip_waitack_state, | ||
114 | |||
115 | /* | ||
116 | * Wait here for the sum of the old per-CPU counters to reach zero. | ||
117 | * Denoted by "Z". | ||
118 | */ | ||
119 | rcu_try_flip_waitzero_state, | ||
120 | |||
121 | /* | ||
122 | * Wait here for each of the other CPUs to execute a memory barrier. | ||
123 | * This is necessary to ensure that these other CPUs really have | ||
124 | * completed executing their RCU read-side critical sections, despite | ||
125 | * their CPUs wildly reordering memory. Denoted by "M". | ||
126 | */ | ||
127 | rcu_try_flip_waitmb_state, | ||
128 | }; | ||
129 | |||
130 | /* | ||
131 | * States for rcu_ctrlblk.rcu_sched_sleep. | ||
132 | */ | ||
133 | |||
134 | enum rcu_sched_sleep_states { | ||
135 | rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ | ||
136 | rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ | ||
137 | rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ | ||
138 | }; | ||
139 | |||
140 | struct rcu_ctrlblk { | ||
141 | spinlock_t fliplock; /* Protect state-machine transitions. */ | ||
142 | long completed; /* Number of last completed batch. */ | ||
143 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of | ||
144 | the rcu state machine */ | ||
145 | spinlock_t schedlock; /* Protect rcu_sched sleep state. */ | ||
146 | enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ | ||
147 | wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ | ||
148 | }; | ||
149 | |||
150 | struct rcu_dyntick_sched { | ||
151 | int dynticks; | ||
152 | int dynticks_snap; | ||
153 | int sched_qs; | ||
154 | int sched_qs_snap; | ||
155 | int sched_dynticks_snap; | ||
156 | }; | ||
157 | |||
158 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { | ||
159 | .dynticks = 1, | ||
160 | }; | ||
161 | |||
162 | static int rcu_pending(int cpu); | ||
163 | |||
164 | void rcu_sched_qs(int cpu) | ||
165 | { | ||
166 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
167 | |||
168 | rdssp->sched_qs++; | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_NO_HZ | ||
172 | |||
173 | void rcu_enter_nohz(void) | ||
174 | { | ||
175 | static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); | ||
176 | |||
177 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
178 | __get_cpu_var(rcu_dyntick_sched).dynticks++; | ||
179 | WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs); | ||
180 | } | ||
181 | |||
182 | void rcu_exit_nohz(void) | ||
183 | { | ||
184 | static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); | ||
185 | |||
186 | __get_cpu_var(rcu_dyntick_sched).dynticks++; | ||
187 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
188 | WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), | ||
189 | &rs); | ||
190 | } | ||
191 | |||
192 | #endif /* CONFIG_NO_HZ */ | ||
193 | |||
194 | |||
195 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); | ||
196 | |||
197 | static struct rcu_ctrlblk rcu_ctrlblk = { | ||
198 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), | ||
199 | .completed = 0, | ||
200 | .rcu_try_flip_state = rcu_try_flip_idle_state, | ||
201 | .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), | ||
202 | .sched_sleep = rcu_sched_not_sleeping, | ||
203 | .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), | ||
204 | }; | ||
205 | |||
206 | static struct task_struct *rcu_sched_grace_period_task; | ||
207 | |||
208 | #ifdef CONFIG_RCU_TRACE | ||
209 | static char *rcu_try_flip_state_names[] = | ||
210 | { "idle", "waitack", "waitzero", "waitmb" }; | ||
211 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
212 | |||
213 | static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly | ||
214 | = CPU_BITS_NONE; | ||
215 | |||
216 | /* | ||
217 | * Enum and per-CPU flag to determine when each CPU has seen | ||
218 | * the most recent counter flip. | ||
219 | */ | ||
220 | |||
221 | enum rcu_flip_flag_values { | ||
222 | rcu_flip_seen, /* Steady/initial state, last flip seen. */ | ||
223 | /* Only GP detector can update. */ | ||
224 | rcu_flipped /* Flip just completed, need confirmation. */ | ||
225 | /* Only corresponding CPU can update. */ | ||
226 | }; | ||
227 | static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag) | ||
228 | = rcu_flip_seen; | ||
229 | |||
230 | /* | ||
231 | * Enum and per-CPU flag to determine when each CPU has executed the | ||
232 | * needed memory barrier to fence in memory references from its last RCU | ||
233 | * read-side critical section in the just-completed grace period. | ||
234 | */ | ||
235 | |||
236 | enum rcu_mb_flag_values { | ||
237 | rcu_mb_done, /* Steady/initial state, no mb()s required. */ | ||
238 | /* Only GP detector can update. */ | ||
239 | rcu_mb_needed /* Flip just completed, need an mb(). */ | ||
240 | /* Only corresponding CPU can update. */ | ||
241 | }; | ||
242 | static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) | ||
243 | = rcu_mb_done; | ||
244 | |||
245 | /* | ||
246 | * RCU_DATA_ME: find the current CPU's rcu_data structure. | ||
247 | * RCU_DATA_CPU: find the specified CPU's rcu_data structure. | ||
248 | */ | ||
249 | #define RCU_DATA_ME() (&__get_cpu_var(rcu_data)) | ||
250 | #define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu)) | ||
251 | |||
252 | /* | ||
253 | * Helper macro for tracing when the appropriate rcu_data is not | ||
254 | * cached in a local variable, but where the CPU number is so cached. | ||
255 | */ | ||
256 | #define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace)); | ||
257 | |||
258 | /* | ||
259 | * Helper macro for tracing when the appropriate rcu_data is not | ||
260 | * cached in a local variable. | ||
261 | */ | ||
262 | #define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace)); | ||
263 | |||
264 | /* | ||
265 | * Helper macro for tracing when the appropriate rcu_data is pointed | ||
266 | * to by a local variable. | ||
267 | */ | ||
268 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); | ||
269 | |||
270 | #define RCU_SCHED_BATCH_TIME (HZ / 50) | ||
271 | |||
272 | /* | ||
273 | * Return the number of RCU batches processed thus far. Useful | ||
274 | * for debug and statistics. | ||
275 | */ | ||
276 | long rcu_batches_completed(void) | ||
277 | { | ||
278 | return rcu_ctrlblk.completed; | ||
279 | } | ||
280 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | ||
281 | |||
282 | void __rcu_read_lock(void) | ||
283 | { | ||
284 | int idx; | ||
285 | struct task_struct *t = current; | ||
286 | int nesting; | ||
287 | |||
288 | nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
289 | if (nesting != 0) { | ||
290 | |||
291 | /* An earlier rcu_read_lock() covers us, just count it. */ | ||
292 | |||
293 | t->rcu_read_lock_nesting = nesting + 1; | ||
294 | |||
295 | } else { | ||
296 | unsigned long flags; | ||
297 | |||
298 | /* | ||
299 | * We disable interrupts for the following reasons: | ||
300 | * - If we get scheduling clock interrupt here, and we | ||
301 | * end up acking the counter flip, it's like a promise | ||
302 | * that we will never increment the old counter again. | ||
303 | * Thus we will break that promise if that | ||
304 | * scheduling clock interrupt happens between the time | ||
305 | * we pick the .completed field and the time that we | ||
306 | * increment our counter. | ||
307 | * | ||
308 | * - We don't want to be preempted out here. | ||
309 | * | ||
310 | * NMIs can still occur, of course, and might themselves | ||
311 | * contain rcu_read_lock(). | ||
312 | */ | ||
313 | |||
314 | local_irq_save(flags); | ||
315 | |||
316 | /* | ||
317 | * Outermost nesting of rcu_read_lock(), so increment | ||
318 | * the current counter for the current CPU. Use volatile | ||
319 | * casts to prevent the compiler from reordering. | ||
320 | */ | ||
321 | |||
322 | idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1; | ||
323 | ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++; | ||
324 | |||
325 | /* | ||
326 | * Now that the per-CPU counter has been incremented, we | ||
327 | * are protected from races with rcu_read_lock() invoked | ||
328 | * from NMI handlers on this CPU. We can therefore safely | ||
329 | * increment the nesting counter, relieving further NMIs | ||
330 | * of the need to increment the per-CPU counter. | ||
331 | */ | ||
332 | |||
333 | ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1; | ||
334 | |||
335 | /* | ||
336 | * Now that we have preventing any NMIs from storing | ||
337 | * to the ->rcu_flipctr_idx, we can safely use it to | ||
338 | * remember which counter to decrement in the matching | ||
339 | * rcu_read_unlock(). | ||
340 | */ | ||
341 | |||
342 | ACCESS_ONCE(t->rcu_flipctr_idx) = idx; | ||
343 | local_irq_restore(flags); | ||
344 | } | ||
345 | } | ||
346 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
347 | |||
348 | void __rcu_read_unlock(void) | ||
349 | { | ||
350 | int idx; | ||
351 | struct task_struct *t = current; | ||
352 | int nesting; | ||
353 | |||
354 | nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
355 | if (nesting > 1) { | ||
356 | |||
357 | /* | ||
358 | * We are still protected by the enclosing rcu_read_lock(), | ||
359 | * so simply decrement the counter. | ||
360 | */ | ||
361 | |||
362 | t->rcu_read_lock_nesting = nesting - 1; | ||
363 | |||
364 | } else { | ||
365 | unsigned long flags; | ||
366 | |||
367 | /* | ||
368 | * Disable local interrupts to prevent the grace-period | ||
369 | * detection state machine from seeing us half-done. | ||
370 | * NMIs can still occur, of course, and might themselves | ||
371 | * contain rcu_read_lock() and rcu_read_unlock(). | ||
372 | */ | ||
373 | |||
374 | local_irq_save(flags); | ||
375 | |||
376 | /* | ||
377 | * Outermost nesting of rcu_read_unlock(), so we must | ||
378 | * decrement the current counter for the current CPU. | ||
379 | * This must be done carefully, because NMIs can | ||
380 | * occur at any point in this code, and any rcu_read_lock() | ||
381 | * and rcu_read_unlock() pairs in the NMI handlers | ||
382 | * must interact non-destructively with this code. | ||
383 | * Lots of volatile casts, and -very- careful ordering. | ||
384 | * | ||
385 | * Changes to this code, including this one, must be | ||
386 | * inspected, validated, and tested extremely carefully!!! | ||
387 | */ | ||
388 | |||
389 | /* | ||
390 | * First, pick up the index. | ||
391 | */ | ||
392 | |||
393 | idx = ACCESS_ONCE(t->rcu_flipctr_idx); | ||
394 | |||
395 | /* | ||
396 | * Now that we have fetched the counter index, it is | ||
397 | * safe to decrement the per-task RCU nesting counter. | ||
398 | * After this, any interrupts or NMIs will increment and | ||
399 | * decrement the per-CPU counters. | ||
400 | */ | ||
401 | ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1; | ||
402 | |||
403 | /* | ||
404 | * It is now safe to decrement this task's nesting count. | ||
405 | * NMIs that occur after this statement will route their | ||
406 | * rcu_read_lock() calls through this "else" clause, and | ||
407 | * will thus start incrementing the per-CPU counter on | ||
408 | * their own. They will also clobber ->rcu_flipctr_idx, | ||
409 | * but that is OK, since we have already fetched it. | ||
410 | */ | ||
411 | |||
412 | ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--; | ||
413 | local_irq_restore(flags); | ||
414 | } | ||
415 | } | ||
416 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
417 | |||
418 | /* | ||
419 | * If a global counter flip has occurred since the last time that we | ||
420 | * advanced callbacks, advance them. Hardware interrupts must be | ||
421 | * disabled when calling this function. | ||
422 | */ | ||
423 | static void __rcu_advance_callbacks(struct rcu_data *rdp) | ||
424 | { | ||
425 | int cpu; | ||
426 | int i; | ||
427 | int wlc = 0; | ||
428 | |||
429 | if (rdp->completed != rcu_ctrlblk.completed) { | ||
430 | if (rdp->waitlist[GP_STAGES - 1] != NULL) { | ||
431 | *rdp->donetail = rdp->waitlist[GP_STAGES - 1]; | ||
432 | rdp->donetail = rdp->waittail[GP_STAGES - 1]; | ||
433 | RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp); | ||
434 | } | ||
435 | for (i = GP_STAGES - 2; i >= 0; i--) { | ||
436 | if (rdp->waitlist[i] != NULL) { | ||
437 | rdp->waitlist[i + 1] = rdp->waitlist[i]; | ||
438 | rdp->waittail[i + 1] = rdp->waittail[i]; | ||
439 | wlc++; | ||
440 | } else { | ||
441 | rdp->waitlist[i + 1] = NULL; | ||
442 | rdp->waittail[i + 1] = | ||
443 | &rdp->waitlist[i + 1]; | ||
444 | } | ||
445 | } | ||
446 | if (rdp->nextlist != NULL) { | ||
447 | rdp->waitlist[0] = rdp->nextlist; | ||
448 | rdp->waittail[0] = rdp->nexttail; | ||
449 | wlc++; | ||
450 | rdp->nextlist = NULL; | ||
451 | rdp->nexttail = &rdp->nextlist; | ||
452 | RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp); | ||
453 | } else { | ||
454 | rdp->waitlist[0] = NULL; | ||
455 | rdp->waittail[0] = &rdp->waitlist[0]; | ||
456 | } | ||
457 | rdp->waitlistcount = wlc; | ||
458 | rdp->completed = rcu_ctrlblk.completed; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Check to see if this CPU needs to report that it has seen | ||
463 | * the most recent counter flip, thereby declaring that all | ||
464 | * subsequent rcu_read_lock() invocations will respect this flip. | ||
465 | */ | ||
466 | |||
467 | cpu = raw_smp_processor_id(); | ||
468 | if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { | ||
469 | smp_mb(); /* Subsequent counter accesses must see new value */ | ||
470 | per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; | ||
471 | smp_mb(); /* Subsequent RCU read-side critical sections */ | ||
472 | /* seen -after- acknowledgement. */ | ||
473 | } | ||
474 | } | ||
475 | |||
476 | #ifdef CONFIG_NO_HZ | ||
477 | static DEFINE_PER_CPU(int, rcu_update_flag); | ||
478 | |||
479 | /** | ||
480 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. | ||
481 | * | ||
482 | * If the CPU was idle with dynamic ticks active, this updates the | ||
483 | * rcu_dyntick_sched.dynticks to let the RCU handling know that the | ||
484 | * CPU is active. | ||
485 | */ | ||
486 | void rcu_irq_enter(void) | ||
487 | { | ||
488 | int cpu = smp_processor_id(); | ||
489 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
490 | |||
491 | if (per_cpu(rcu_update_flag, cpu)) | ||
492 | per_cpu(rcu_update_flag, cpu)++; | ||
493 | |||
494 | /* | ||
495 | * Only update if we are coming from a stopped ticks mode | ||
496 | * (rcu_dyntick_sched.dynticks is even). | ||
497 | */ | ||
498 | if (!in_interrupt() && | ||
499 | (rdssp->dynticks & 0x1) == 0) { | ||
500 | /* | ||
501 | * The following might seem like we could have a race | ||
502 | * with NMI/SMIs. But this really isn't a problem. | ||
503 | * Here we do a read/modify/write, and the race happens | ||
504 | * when an NMI/SMI comes in after the read and before | ||
505 | * the write. But NMI/SMIs will increment this counter | ||
506 | * twice before returning, so the zero bit will not | ||
507 | * be corrupted by the NMI/SMI which is the most important | ||
508 | * part. | ||
509 | * | ||
510 | * The only thing is that we would bring back the counter | ||
511 | * to a postion that it was in during the NMI/SMI. | ||
512 | * But the zero bit would be set, so the rest of the | ||
513 | * counter would again be ignored. | ||
514 | * | ||
515 | * On return from the IRQ, the counter may have the zero | ||
516 | * bit be 0 and the counter the same as the return from | ||
517 | * the NMI/SMI. If the state machine was so unlucky to | ||
518 | * see that, it still doesn't matter, since all | ||
519 | * RCU read-side critical sections on this CPU would | ||
520 | * have already completed. | ||
521 | */ | ||
522 | rdssp->dynticks++; | ||
523 | /* | ||
524 | * The following memory barrier ensures that any | ||
525 | * rcu_read_lock() primitives in the irq handler | ||
526 | * are seen by other CPUs to follow the above | ||
527 | * increment to rcu_dyntick_sched.dynticks. This is | ||
528 | * required in order for other CPUs to correctly | ||
529 | * determine when it is safe to advance the RCU | ||
530 | * grace-period state machine. | ||
531 | */ | ||
532 | smp_mb(); /* see above block comment. */ | ||
533 | /* | ||
534 | * Since we can't determine the dynamic tick mode from | ||
535 | * the rcu_dyntick_sched.dynticks after this routine, | ||
536 | * we use a second flag to acknowledge that we came | ||
537 | * from an idle state with ticks stopped. | ||
538 | */ | ||
539 | per_cpu(rcu_update_flag, cpu)++; | ||
540 | /* | ||
541 | * If we take an NMI/SMI now, they will also increment | ||
542 | * the rcu_update_flag, and will not update the | ||
543 | * rcu_dyntick_sched.dynticks on exit. That is for | ||
544 | * this IRQ to do. | ||
545 | */ | ||
546 | } | ||
547 | } | ||
548 | |||
549 | /** | ||
550 | * rcu_irq_exit - Called from exiting Hard irq context. | ||
551 | * | ||
552 | * If the CPU was idle with dynamic ticks active, update the | ||
553 | * rcu_dyntick_sched.dynticks to let the RCU handling be | ||
554 | * aware that the CPU is going back to idle with no ticks. | ||
555 | */ | ||
556 | void rcu_irq_exit(void) | ||
557 | { | ||
558 | int cpu = smp_processor_id(); | ||
559 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
560 | |||
561 | /* | ||
562 | * rcu_update_flag is set if we interrupted the CPU | ||
563 | * when it was idle with ticks stopped. | ||
564 | * Once this occurs, we keep track of interrupt nesting | ||
565 | * because a NMI/SMI could also come in, and we still | ||
566 | * only want the IRQ that started the increment of the | ||
567 | * rcu_dyntick_sched.dynticks to be the one that modifies | ||
568 | * it on exit. | ||
569 | */ | ||
570 | if (per_cpu(rcu_update_flag, cpu)) { | ||
571 | if (--per_cpu(rcu_update_flag, cpu)) | ||
572 | return; | ||
573 | |||
574 | /* This must match the interrupt nesting */ | ||
575 | WARN_ON(in_interrupt()); | ||
576 | |||
577 | /* | ||
578 | * If an NMI/SMI happens now we are still | ||
579 | * protected by the rcu_dyntick_sched.dynticks being odd. | ||
580 | */ | ||
581 | |||
582 | /* | ||
583 | * The following memory barrier ensures that any | ||
584 | * rcu_read_unlock() primitives in the irq handler | ||
585 | * are seen by other CPUs to preceed the following | ||
586 | * increment to rcu_dyntick_sched.dynticks. This | ||
587 | * is required in order for other CPUs to determine | ||
588 | * when it is safe to advance the RCU grace-period | ||
589 | * state machine. | ||
590 | */ | ||
591 | smp_mb(); /* see above block comment. */ | ||
592 | rdssp->dynticks++; | ||
593 | WARN_ON(rdssp->dynticks & 0x1); | ||
594 | } | ||
595 | } | ||
596 | |||
597 | void rcu_nmi_enter(void) | ||
598 | { | ||
599 | rcu_irq_enter(); | ||
600 | } | ||
601 | |||
602 | void rcu_nmi_exit(void) | ||
603 | { | ||
604 | rcu_irq_exit(); | ||
605 | } | ||
606 | |||
607 | static void dyntick_save_progress_counter(int cpu) | ||
608 | { | ||
609 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
610 | |||
611 | rdssp->dynticks_snap = rdssp->dynticks; | ||
612 | } | ||
613 | |||
614 | static inline int | ||
615 | rcu_try_flip_waitack_needed(int cpu) | ||
616 | { | ||
617 | long curr; | ||
618 | long snap; | ||
619 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
620 | |||
621 | curr = rdssp->dynticks; | ||
622 | snap = rdssp->dynticks_snap; | ||
623 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
624 | |||
625 | /* | ||
626 | * If the CPU remained in dynticks mode for the entire time | ||
627 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
628 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
629 | * the next rcu_read_lock() it executes must use the new value | ||
630 | * of the counter. So we can safely pretend that this CPU | ||
631 | * already acknowledged the counter. | ||
632 | */ | ||
633 | |||
634 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
635 | return 0; | ||
636 | |||
637 | /* | ||
638 | * If the CPU passed through or entered a dynticks idle phase with | ||
639 | * no active irq handlers, then, as above, we can safely pretend | ||
640 | * that this CPU already acknowledged the counter. | ||
641 | */ | ||
642 | |||
643 | if ((curr - snap) > 2 || (curr & 0x1) == 0) | ||
644 | return 0; | ||
645 | |||
646 | /* We need this CPU to explicitly acknowledge the counter flip. */ | ||
647 | |||
648 | return 1; | ||
649 | } | ||
650 | |||
651 | static inline int | ||
652 | rcu_try_flip_waitmb_needed(int cpu) | ||
653 | { | ||
654 | long curr; | ||
655 | long snap; | ||
656 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
657 | |||
658 | curr = rdssp->dynticks; | ||
659 | snap = rdssp->dynticks_snap; | ||
660 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
661 | |||
662 | /* | ||
663 | * If the CPU remained in dynticks mode for the entire time | ||
664 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
665 | * then it cannot have executed an RCU read-side critical section | ||
666 | * during that time, so there is no need for it to execute a | ||
667 | * memory barrier. | ||
668 | */ | ||
669 | |||
670 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
671 | return 0; | ||
672 | |||
673 | /* | ||
674 | * If the CPU either entered or exited an outermost interrupt, | ||
675 | * SMI, NMI, or whatever handler, then we know that it executed | ||
676 | * a memory barrier when doing so. So we don't need another one. | ||
677 | */ | ||
678 | if (curr != snap) | ||
679 | return 0; | ||
680 | |||
681 | /* We need the CPU to execute a memory barrier. */ | ||
682 | |||
683 | return 1; | ||
684 | } | ||
685 | |||
686 | static void dyntick_save_progress_counter_sched(int cpu) | ||
687 | { | ||
688 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
689 | |||
690 | rdssp->sched_dynticks_snap = rdssp->dynticks; | ||
691 | } | ||
692 | |||
693 | static int rcu_qsctr_inc_needed_dyntick(int cpu) | ||
694 | { | ||
695 | long curr; | ||
696 | long snap; | ||
697 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
698 | |||
699 | curr = rdssp->dynticks; | ||
700 | snap = rdssp->sched_dynticks_snap; | ||
701 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
702 | |||
703 | /* | ||
704 | * If the CPU remained in dynticks mode for the entire time | ||
705 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
706 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
707 | * the next rcu_read_lock() it executes must use the new value | ||
708 | * of the counter. Therefore, this CPU has been in a quiescent | ||
709 | * state the entire time, and we don't need to wait for it. | ||
710 | */ | ||
711 | |||
712 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
713 | return 0; | ||
714 | |||
715 | /* | ||
716 | * If the CPU passed through or entered a dynticks idle phase with | ||
717 | * no active irq handlers, then, as above, this CPU has already | ||
718 | * passed through a quiescent state. | ||
719 | */ | ||
720 | |||
721 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | ||
722 | return 0; | ||
723 | |||
724 | /* We need this CPU to go through a quiescent state. */ | ||
725 | |||
726 | return 1; | ||
727 | } | ||
728 | |||
729 | #else /* !CONFIG_NO_HZ */ | ||
730 | |||
731 | # define dyntick_save_progress_counter(cpu) do { } while (0) | ||
732 | # define rcu_try_flip_waitack_needed(cpu) (1) | ||
733 | # define rcu_try_flip_waitmb_needed(cpu) (1) | ||
734 | |||
735 | # define dyntick_save_progress_counter_sched(cpu) do { } while (0) | ||
736 | # define rcu_qsctr_inc_needed_dyntick(cpu) (1) | ||
737 | |||
738 | #endif /* CONFIG_NO_HZ */ | ||
739 | |||
740 | static void save_qsctr_sched(int cpu) | ||
741 | { | ||
742 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
743 | |||
744 | rdssp->sched_qs_snap = rdssp->sched_qs; | ||
745 | } | ||
746 | |||
747 | static inline int rcu_qsctr_inc_needed(int cpu) | ||
748 | { | ||
749 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
750 | |||
751 | /* | ||
752 | * If there has been a quiescent state, no more need to wait | ||
753 | * on this CPU. | ||
754 | */ | ||
755 | |||
756 | if (rdssp->sched_qs != rdssp->sched_qs_snap) { | ||
757 | smp_mb(); /* force ordering with cpu entering schedule(). */ | ||
758 | return 0; | ||
759 | } | ||
760 | |||
761 | /* We need this CPU to go through a quiescent state. */ | ||
762 | |||
763 | return 1; | ||
764 | } | ||
765 | |||
766 | /* | ||
767 | * Get here when RCU is idle. Decide whether we need to | ||
768 | * move out of idle state, and return non-zero if so. | ||
769 | * "Straightforward" approach for the moment, might later | ||
770 | * use callback-list lengths, grace-period duration, or | ||
771 | * some such to determine when to exit idle state. | ||
772 | * Might also need a pre-idle test that does not acquire | ||
773 | * the lock, but let's get the simple case working first... | ||
774 | */ | ||
775 | |||
776 | static int | ||
777 | rcu_try_flip_idle(void) | ||
778 | { | ||
779 | int cpu; | ||
780 | |||
781 | RCU_TRACE_ME(rcupreempt_trace_try_flip_i1); | ||
782 | if (!rcu_pending(smp_processor_id())) { | ||
783 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1); | ||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * Do the flip. | ||
789 | */ | ||
790 | |||
791 | RCU_TRACE_ME(rcupreempt_trace_try_flip_g1); | ||
792 | rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */ | ||
793 | |||
794 | /* | ||
795 | * Need a memory barrier so that other CPUs see the new | ||
796 | * counter value before they see the subsequent change of all | ||
797 | * the rcu_flip_flag instances to rcu_flipped. | ||
798 | */ | ||
799 | |||
800 | smp_mb(); /* see above block comment. */ | ||
801 | |||
802 | /* Now ask each CPU for acknowledgement of the flip. */ | ||
803 | |||
804 | for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) { | ||
805 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; | ||
806 | dyntick_save_progress_counter(cpu); | ||
807 | } | ||
808 | |||
809 | return 1; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Wait for CPUs to acknowledge the flip. | ||
814 | */ | ||
815 | |||
816 | static int | ||
817 | rcu_try_flip_waitack(void) | ||
818 | { | ||
819 | int cpu; | ||
820 | |||
821 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); | ||
822 | for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) | ||
823 | if (rcu_try_flip_waitack_needed(cpu) && | ||
824 | per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { | ||
825 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); | ||
826 | return 0; | ||
827 | } | ||
828 | |||
829 | /* | ||
830 | * Make sure our checks above don't bleed into subsequent | ||
831 | * waiting for the sum of the counters to reach zero. | ||
832 | */ | ||
833 | |||
834 | smp_mb(); /* see above block comment. */ | ||
835 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a2); | ||
836 | return 1; | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * Wait for collective ``last'' counter to reach zero, | ||
841 | * then tell all CPUs to do an end-of-grace-period memory barrier. | ||
842 | */ | ||
843 | |||
844 | static int | ||
845 | rcu_try_flip_waitzero(void) | ||
846 | { | ||
847 | int cpu; | ||
848 | int lastidx = !(rcu_ctrlblk.completed & 0x1); | ||
849 | int sum = 0; | ||
850 | |||
851 | /* Check to see if the sum of the "last" counters is zero. */ | ||
852 | |||
853 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); | ||
854 | for_each_possible_cpu(cpu) | ||
855 | sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; | ||
856 | if (sum != 0) { | ||
857 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | /* | ||
862 | * This ensures that the other CPUs see the call for | ||
863 | * memory barriers -after- the sum to zero has been | ||
864 | * detected here | ||
865 | */ | ||
866 | smp_mb(); /* ^^^^^^^^^^^^ */ | ||
867 | |||
868 | /* Call for a memory barrier from each CPU. */ | ||
869 | for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) { | ||
870 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; | ||
871 | dyntick_save_progress_counter(cpu); | ||
872 | } | ||
873 | |||
874 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); | ||
875 | return 1; | ||
876 | } | ||
877 | |||
878 | /* | ||
879 | * Wait for all CPUs to do their end-of-grace-period memory barrier. | ||
880 | * Return 0 once all CPUs have done so. | ||
881 | */ | ||
882 | |||
883 | static int | ||
884 | rcu_try_flip_waitmb(void) | ||
885 | { | ||
886 | int cpu; | ||
887 | |||
888 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); | ||
889 | for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) | ||
890 | if (rcu_try_flip_waitmb_needed(cpu) && | ||
891 | per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { | ||
892 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); | ||
893 | return 0; | ||
894 | } | ||
895 | |||
896 | smp_mb(); /* Ensure that the above checks precede any following flip. */ | ||
897 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m2); | ||
898 | return 1; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * Attempt a single flip of the counters. Remember, a single flip does | ||
903 | * -not- constitute a grace period. Instead, the interval between | ||
904 | * at least GP_STAGES consecutive flips is a grace period. | ||
905 | * | ||
906 | * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation | ||
907 | * on a large SMP, they might want to use a hierarchical organization of | ||
908 | * the per-CPU-counter pairs. | ||
909 | */ | ||
910 | static void rcu_try_flip(void) | ||
911 | { | ||
912 | unsigned long flags; | ||
913 | |||
914 | RCU_TRACE_ME(rcupreempt_trace_try_flip_1); | ||
915 | if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) { | ||
916 | RCU_TRACE_ME(rcupreempt_trace_try_flip_e1); | ||
917 | return; | ||
918 | } | ||
919 | |||
920 | /* | ||
921 | * Take the next transition(s) through the RCU grace-period | ||
922 | * flip-counter state machine. | ||
923 | */ | ||
924 | |||
925 | switch (rcu_ctrlblk.rcu_try_flip_state) { | ||
926 | case rcu_try_flip_idle_state: | ||
927 | if (rcu_try_flip_idle()) | ||
928 | rcu_ctrlblk.rcu_try_flip_state = | ||
929 | rcu_try_flip_waitack_state; | ||
930 | break; | ||
931 | case rcu_try_flip_waitack_state: | ||
932 | if (rcu_try_flip_waitack()) | ||
933 | rcu_ctrlblk.rcu_try_flip_state = | ||
934 | rcu_try_flip_waitzero_state; | ||
935 | break; | ||
936 | case rcu_try_flip_waitzero_state: | ||
937 | if (rcu_try_flip_waitzero()) | ||
938 | rcu_ctrlblk.rcu_try_flip_state = | ||
939 | rcu_try_flip_waitmb_state; | ||
940 | break; | ||
941 | case rcu_try_flip_waitmb_state: | ||
942 | if (rcu_try_flip_waitmb()) | ||
943 | rcu_ctrlblk.rcu_try_flip_state = | ||
944 | rcu_try_flip_idle_state; | ||
945 | } | ||
946 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | ||
947 | } | ||
948 | |||
949 | /* | ||
950 | * Check to see if this CPU needs to do a memory barrier in order to | ||
951 | * ensure that any prior RCU read-side critical sections have committed | ||
952 | * their counter manipulations and critical-section memory references | ||
953 | * before declaring the grace period to be completed. | ||
954 | */ | ||
955 | static void rcu_check_mb(int cpu) | ||
956 | { | ||
957 | if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) { | ||
958 | smp_mb(); /* Ensure RCU read-side accesses are visible. */ | ||
959 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_done; | ||
960 | } | ||
961 | } | ||
962 | |||
963 | void rcu_check_callbacks(int cpu, int user) | ||
964 | { | ||
965 | unsigned long flags; | ||
966 | struct rcu_data *rdp; | ||
967 | |||
968 | if (!rcu_pending(cpu)) | ||
969 | return; /* if nothing for RCU to do. */ | ||
970 | |||
971 | /* | ||
972 | * If this CPU took its interrupt from user mode or from the | ||
973 | * idle loop, and this is not a nested interrupt, then | ||
974 | * this CPU has to have exited all prior preept-disable | ||
975 | * sections of code. So invoke rcu_sched_qs() to note this. | ||
976 | * | ||
977 | * The memory barrier is needed to handle the case where | ||
978 | * writes from a preempt-disable section of code get reordered | ||
979 | * into schedule() by this CPU's write buffer. So the memory | ||
980 | * barrier makes sure that the rcu_sched_qs() is seen by other | ||
981 | * CPUs to happen after any such write. | ||
982 | */ | ||
983 | |||
984 | rdp = RCU_DATA_CPU(cpu); | ||
985 | if (user || | ||
986 | (idle_cpu(cpu) && !in_softirq() && | ||
987 | hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||
988 | smp_mb(); /* Guard against aggressive schedule(). */ | ||
989 | rcu_sched_qs(cpu); | ||
990 | } | ||
991 | |||
992 | rcu_check_mb(cpu); | ||
993 | if (rcu_ctrlblk.completed == rdp->completed) | ||
994 | rcu_try_flip(); | ||
995 | spin_lock_irqsave(&rdp->lock, flags); | ||
996 | RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); | ||
997 | __rcu_advance_callbacks(rdp); | ||
998 | if (rdp->donelist == NULL) { | ||
999 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1000 | } else { | ||
1001 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1002 | raise_softirq(RCU_SOFTIRQ); | ||
1003 | } | ||
1004 | } | ||
1005 | |||
1006 | /* | ||
1007 | * Needed by dynticks, to make sure all RCU processing has finished | ||
1008 | * when we go idle: | ||
1009 | */ | ||
1010 | void rcu_advance_callbacks(int cpu, int user) | ||
1011 | { | ||
1012 | unsigned long flags; | ||
1013 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
1014 | |||
1015 | if (rcu_ctrlblk.completed == rdp->completed) { | ||
1016 | rcu_try_flip(); | ||
1017 | if (rcu_ctrlblk.completed == rdp->completed) | ||
1018 | return; | ||
1019 | } | ||
1020 | spin_lock_irqsave(&rdp->lock, flags); | ||
1021 | RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); | ||
1022 | __rcu_advance_callbacks(rdp); | ||
1023 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1024 | } | ||
1025 | |||
1026 | #ifdef CONFIG_HOTPLUG_CPU | ||
1027 | #define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \ | ||
1028 | *dsttail = srclist; \ | ||
1029 | if (srclist != NULL) { \ | ||
1030 | dsttail = srctail; \ | ||
1031 | srclist = NULL; \ | ||
1032 | srctail = &srclist;\ | ||
1033 | } \ | ||
1034 | } while (0) | ||
1035 | |||
1036 | void rcu_offline_cpu(int cpu) | ||
1037 | { | ||
1038 | int i; | ||
1039 | struct rcu_head *list = NULL; | ||
1040 | unsigned long flags; | ||
1041 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
1042 | struct rcu_head *schedlist = NULL; | ||
1043 | struct rcu_head **schedtail = &schedlist; | ||
1044 | struct rcu_head **tail = &list; | ||
1045 | |||
1046 | /* | ||
1047 | * Remove all callbacks from the newly dead CPU, retaining order. | ||
1048 | * Otherwise rcu_barrier() will fail | ||
1049 | */ | ||
1050 | |||
1051 | spin_lock_irqsave(&rdp->lock, flags); | ||
1052 | rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail); | ||
1053 | for (i = GP_STAGES - 1; i >= 0; i--) | ||
1054 | rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], | ||
1055 | list, tail); | ||
1056 | rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); | ||
1057 | rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, | ||
1058 | schedlist, schedtail); | ||
1059 | rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, | ||
1060 | schedlist, schedtail); | ||
1061 | rdp->rcu_sched_sleeping = 0; | ||
1062 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1063 | rdp->waitlistcount = 0; | ||
1064 | |||
1065 | /* Disengage the newly dead CPU from the grace-period computation. */ | ||
1066 | |||
1067 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | ||
1068 | rcu_check_mb(cpu); | ||
1069 | if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { | ||
1070 | smp_mb(); /* Subsequent counter accesses must see new value */ | ||
1071 | per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; | ||
1072 | smp_mb(); /* Subsequent RCU read-side critical sections */ | ||
1073 | /* seen -after- acknowledgement. */ | ||
1074 | } | ||
1075 | |||
1076 | cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map)); | ||
1077 | |||
1078 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | ||
1079 | |||
1080 | /* | ||
1081 | * Place the removed callbacks on the current CPU's queue. | ||
1082 | * Make them all start a new grace period: simple approach, | ||
1083 | * in theory could starve a given set of callbacks, but | ||
1084 | * you would need to be doing some serious CPU hotplugging | ||
1085 | * to make this happen. If this becomes a problem, adding | ||
1086 | * a synchronize_rcu() to the hotplug path would be a simple | ||
1087 | * fix. | ||
1088 | */ | ||
1089 | |||
1090 | local_irq_save(flags); /* disable preempt till we know what lock. */ | ||
1091 | rdp = RCU_DATA_ME(); | ||
1092 | spin_lock(&rdp->lock); | ||
1093 | *rdp->nexttail = list; | ||
1094 | if (list) | ||
1095 | rdp->nexttail = tail; | ||
1096 | *rdp->nextschedtail = schedlist; | ||
1097 | if (schedlist) | ||
1098 | rdp->nextschedtail = schedtail; | ||
1099 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1100 | } | ||
1101 | |||
1102 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1103 | |||
1104 | void rcu_offline_cpu(int cpu) | ||
1105 | { | ||
1106 | } | ||
1107 | |||
1108 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | ||
1109 | |||
1110 | void __cpuinit rcu_online_cpu(int cpu) | ||
1111 | { | ||
1112 | unsigned long flags; | ||
1113 | struct rcu_data *rdp; | ||
1114 | |||
1115 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | ||
1116 | cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map)); | ||
1117 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | ||
1118 | |||
1119 | /* | ||
1120 | * The rcu_sched grace-period processing might have bypassed | ||
1121 | * this CPU, given that it was not in the rcu_cpu_online_map | ||
1122 | * when the grace-period scan started. This means that the | ||
1123 | * grace-period task might sleep. So make sure that if this | ||
1124 | * should happen, the first callback posted to this CPU will | ||
1125 | * wake up the grace-period task if need be. | ||
1126 | */ | ||
1127 | |||
1128 | rdp = RCU_DATA_CPU(cpu); | ||
1129 | spin_lock_irqsave(&rdp->lock, flags); | ||
1130 | rdp->rcu_sched_sleeping = 1; | ||
1131 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1132 | } | ||
1133 | |||
1134 | static void rcu_process_callbacks(struct softirq_action *unused) | ||
1135 | { | ||
1136 | unsigned long flags; | ||
1137 | struct rcu_head *next, *list; | ||
1138 | struct rcu_data *rdp; | ||
1139 | |||
1140 | local_irq_save(flags); | ||
1141 | rdp = RCU_DATA_ME(); | ||
1142 | spin_lock(&rdp->lock); | ||
1143 | list = rdp->donelist; | ||
1144 | if (list == NULL) { | ||
1145 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1146 | return; | ||
1147 | } | ||
1148 | rdp->donelist = NULL; | ||
1149 | rdp->donetail = &rdp->donelist; | ||
1150 | RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp); | ||
1151 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1152 | while (list) { | ||
1153 | next = list->next; | ||
1154 | list->func(list); | ||
1155 | list = next; | ||
1156 | RCU_TRACE_ME(rcupreempt_trace_invoke); | ||
1157 | } | ||
1158 | } | ||
1159 | |||
1160 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
1161 | { | ||
1162 | unsigned long flags; | ||
1163 | struct rcu_data *rdp; | ||
1164 | |||
1165 | head->func = func; | ||
1166 | head->next = NULL; | ||
1167 | local_irq_save(flags); | ||
1168 | rdp = RCU_DATA_ME(); | ||
1169 | spin_lock(&rdp->lock); | ||
1170 | __rcu_advance_callbacks(rdp); | ||
1171 | *rdp->nexttail = head; | ||
1172 | rdp->nexttail = &head->next; | ||
1173 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); | ||
1174 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1175 | } | ||
1176 | EXPORT_SYMBOL_GPL(call_rcu); | ||
1177 | |||
1178 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
1179 | { | ||
1180 | unsigned long flags; | ||
1181 | struct rcu_data *rdp; | ||
1182 | int wake_gp = 0; | ||
1183 | |||
1184 | head->func = func; | ||
1185 | head->next = NULL; | ||
1186 | local_irq_save(flags); | ||
1187 | rdp = RCU_DATA_ME(); | ||
1188 | spin_lock(&rdp->lock); | ||
1189 | *rdp->nextschedtail = head; | ||
1190 | rdp->nextschedtail = &head->next; | ||
1191 | if (rdp->rcu_sched_sleeping) { | ||
1192 | |||
1193 | /* Grace-period processing might be sleeping... */ | ||
1194 | |||
1195 | rdp->rcu_sched_sleeping = 0; | ||
1196 | wake_gp = 1; | ||
1197 | } | ||
1198 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1199 | if (wake_gp) { | ||
1200 | |||
1201 | /* Wake up grace-period processing, unless someone beat us. */ | ||
1202 | |||
1203 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1204 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) | ||
1205 | wake_gp = 0; | ||
1206 | rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; | ||
1207 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1208 | if (wake_gp) | ||
1209 | wake_up_interruptible(&rcu_ctrlblk.sched_wq); | ||
1210 | } | ||
1211 | } | ||
1212 | EXPORT_SYMBOL_GPL(call_rcu_sched); | ||
1213 | |||
1214 | /* | ||
1215 | * Wait until all currently running preempt_disable() code segments | ||
1216 | * (including hardware-irq-disable segments) complete. Note that | ||
1217 | * in -rt this does -not- necessarily result in all currently executing | ||
1218 | * interrupt -handlers- having completed. | ||
1219 | */ | ||
1220 | void __synchronize_sched(void) | ||
1221 | { | ||
1222 | struct rcu_synchronize rcu; | ||
1223 | |||
1224 | if (num_online_cpus() == 1) | ||
1225 | return; /* blocking is gp if only one CPU! */ | ||
1226 | |||
1227 | init_completion(&rcu.completion); | ||
1228 | /* Will wake me after RCU finished. */ | ||
1229 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
1230 | /* Wait for it. */ | ||
1231 | wait_for_completion(&rcu.completion); | ||
1232 | } | ||
1233 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
1234 | |||
1235 | /* | ||
1236 | * kthread function that manages call_rcu_sched grace periods. | ||
1237 | */ | ||
1238 | static int rcu_sched_grace_period(void *arg) | ||
1239 | { | ||
1240 | int couldsleep; /* might sleep after current pass. */ | ||
1241 | int couldsleepnext = 0; /* might sleep after next pass. */ | ||
1242 | int cpu; | ||
1243 | unsigned long flags; | ||
1244 | struct rcu_data *rdp; | ||
1245 | int ret; | ||
1246 | |||
1247 | /* | ||
1248 | * Each pass through the following loop handles one | ||
1249 | * rcu_sched grace period cycle. | ||
1250 | */ | ||
1251 | do { | ||
1252 | /* Save each CPU's current state. */ | ||
1253 | |||
1254 | for_each_online_cpu(cpu) { | ||
1255 | dyntick_save_progress_counter_sched(cpu); | ||
1256 | save_qsctr_sched(cpu); | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Sleep for about an RCU grace-period's worth to | ||
1261 | * allow better batching and to consume less CPU. | ||
1262 | */ | ||
1263 | schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); | ||
1264 | |||
1265 | /* | ||
1266 | * If there was nothing to do last time, prepare to | ||
1267 | * sleep at the end of the current grace period cycle. | ||
1268 | */ | ||
1269 | couldsleep = couldsleepnext; | ||
1270 | couldsleepnext = 1; | ||
1271 | if (couldsleep) { | ||
1272 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1273 | rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; | ||
1274 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1275 | } | ||
1276 | |||
1277 | /* | ||
1278 | * Wait on each CPU in turn to have either visited | ||
1279 | * a quiescent state or been in dynticks-idle mode. | ||
1280 | */ | ||
1281 | for_each_online_cpu(cpu) { | ||
1282 | while (rcu_qsctr_inc_needed(cpu) && | ||
1283 | rcu_qsctr_inc_needed_dyntick(cpu)) { | ||
1284 | /* resched_cpu(cpu); @@@ */ | ||
1285 | schedule_timeout_interruptible(1); | ||
1286 | } | ||
1287 | } | ||
1288 | |||
1289 | /* Advance callbacks for each CPU. */ | ||
1290 | |||
1291 | for_each_online_cpu(cpu) { | ||
1292 | |||
1293 | rdp = RCU_DATA_CPU(cpu); | ||
1294 | spin_lock_irqsave(&rdp->lock, flags); | ||
1295 | |||
1296 | /* | ||
1297 | * We are running on this CPU irq-disabled, so no | ||
1298 | * CPU can go offline until we re-enable irqs. | ||
1299 | * The current CPU might have already gone | ||
1300 | * offline (between the for_each_offline_cpu and | ||
1301 | * the spin_lock_irqsave), but in that case all its | ||
1302 | * callback lists will be empty, so no harm done. | ||
1303 | * | ||
1304 | * Advance the callbacks! We share normal RCU's | ||
1305 | * donelist, since callbacks are invoked the | ||
1306 | * same way in either case. | ||
1307 | */ | ||
1308 | if (rdp->waitschedlist != NULL) { | ||
1309 | *rdp->donetail = rdp->waitschedlist; | ||
1310 | rdp->donetail = rdp->waitschedtail; | ||
1311 | |||
1312 | /* | ||
1313 | * Next rcu_check_callbacks() will | ||
1314 | * do the required raise_softirq(). | ||
1315 | */ | ||
1316 | } | ||
1317 | if (rdp->nextschedlist != NULL) { | ||
1318 | rdp->waitschedlist = rdp->nextschedlist; | ||
1319 | rdp->waitschedtail = rdp->nextschedtail; | ||
1320 | couldsleep = 0; | ||
1321 | couldsleepnext = 0; | ||
1322 | } else { | ||
1323 | rdp->waitschedlist = NULL; | ||
1324 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1325 | } | ||
1326 | rdp->nextschedlist = NULL; | ||
1327 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1328 | |||
1329 | /* Mark sleep intention. */ | ||
1330 | |||
1331 | rdp->rcu_sched_sleeping = couldsleep; | ||
1332 | |||
1333 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1334 | } | ||
1335 | |||
1336 | /* If we saw callbacks on the last scan, go deal with them. */ | ||
1337 | |||
1338 | if (!couldsleep) | ||
1339 | continue; | ||
1340 | |||
1341 | /* Attempt to block... */ | ||
1342 | |||
1343 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1344 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { | ||
1345 | |||
1346 | /* | ||
1347 | * Someone posted a callback after we scanned. | ||
1348 | * Go take care of it. | ||
1349 | */ | ||
1350 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1351 | couldsleepnext = 0; | ||
1352 | continue; | ||
1353 | } | ||
1354 | |||
1355 | /* Block until the next person posts a callback. */ | ||
1356 | |||
1357 | rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; | ||
1358 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1359 | ret = 0; /* unused */ | ||
1360 | __wait_event_interruptible(rcu_ctrlblk.sched_wq, | ||
1361 | rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, | ||
1362 | ret); | ||
1363 | |||
1364 | couldsleepnext = 0; | ||
1365 | |||
1366 | } while (!kthread_should_stop()); | ||
1367 | |||
1368 | return (0); | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * Check to see if any future RCU-related work will need to be done | ||
1373 | * by the current CPU, even if none need be done immediately, returning | ||
1374 | * 1 if so. Assumes that notifiers would take care of handling any | ||
1375 | * outstanding requests from the RCU core. | ||
1376 | * | ||
1377 | * This function is part of the RCU implementation; it is -not- | ||
1378 | * an exported member of the RCU API. | ||
1379 | */ | ||
1380 | int rcu_needs_cpu(int cpu) | ||
1381 | { | ||
1382 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
1383 | |||
1384 | return (rdp->donelist != NULL || | ||
1385 | !!rdp->waitlistcount || | ||
1386 | rdp->nextlist != NULL || | ||
1387 | rdp->nextschedlist != NULL || | ||
1388 | rdp->waitschedlist != NULL); | ||
1389 | } | ||
1390 | |||
1391 | static int rcu_pending(int cpu) | ||
1392 | { | ||
1393 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
1394 | |||
1395 | /* The CPU has at least one callback queued somewhere. */ | ||
1396 | |||
1397 | if (rdp->donelist != NULL || | ||
1398 | !!rdp->waitlistcount || | ||
1399 | rdp->nextlist != NULL || | ||
1400 | rdp->nextschedlist != NULL || | ||
1401 | rdp->waitschedlist != NULL) | ||
1402 | return 1; | ||
1403 | |||
1404 | /* The RCU core needs an acknowledgement from this CPU. */ | ||
1405 | |||
1406 | if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) || | ||
1407 | (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed)) | ||
1408 | return 1; | ||
1409 | |||
1410 | /* This CPU has fallen behind the global grace-period number. */ | ||
1411 | |||
1412 | if (rdp->completed != rcu_ctrlblk.completed) | ||
1413 | return 1; | ||
1414 | |||
1415 | /* Nothing needed from this CPU. */ | ||
1416 | |||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | int __cpuinit rcu_cpu_notify(struct notifier_block *self, | ||
1421 | unsigned long action, void *hcpu) | ||
1422 | { | ||
1423 | long cpu = (long)hcpu; | ||
1424 | |||
1425 | switch (action) { | ||
1426 | case CPU_UP_PREPARE: | ||
1427 | case CPU_UP_PREPARE_FROZEN: | ||
1428 | rcu_online_cpu(cpu); | ||
1429 | break; | ||
1430 | case CPU_UP_CANCELED: | ||
1431 | case CPU_UP_CANCELED_FROZEN: | ||
1432 | case CPU_DEAD: | ||
1433 | case CPU_DEAD_FROZEN: | ||
1434 | rcu_offline_cpu(cpu); | ||
1435 | break; | ||
1436 | default: | ||
1437 | break; | ||
1438 | } | ||
1439 | return NOTIFY_OK; | ||
1440 | } | ||
1441 | |||
1442 | void __init __rcu_init(void) | ||
1443 | { | ||
1444 | int cpu; | ||
1445 | int i; | ||
1446 | struct rcu_data *rdp; | ||
1447 | |||
1448 | printk(KERN_NOTICE "Preemptible RCU implementation.\n"); | ||
1449 | for_each_possible_cpu(cpu) { | ||
1450 | rdp = RCU_DATA_CPU(cpu); | ||
1451 | spin_lock_init(&rdp->lock); | ||
1452 | rdp->completed = 0; | ||
1453 | rdp->waitlistcount = 0; | ||
1454 | rdp->nextlist = NULL; | ||
1455 | rdp->nexttail = &rdp->nextlist; | ||
1456 | for (i = 0; i < GP_STAGES; i++) { | ||
1457 | rdp->waitlist[i] = NULL; | ||
1458 | rdp->waittail[i] = &rdp->waitlist[i]; | ||
1459 | } | ||
1460 | rdp->donelist = NULL; | ||
1461 | rdp->donetail = &rdp->donelist; | ||
1462 | rdp->rcu_flipctr[0] = 0; | ||
1463 | rdp->rcu_flipctr[1] = 0; | ||
1464 | rdp->nextschedlist = NULL; | ||
1465 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1466 | rdp->waitschedlist = NULL; | ||
1467 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1468 | rdp->rcu_sched_sleeping = 0; | ||
1469 | } | ||
1470 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * Late-boot-time RCU initialization that must wait until after scheduler | ||
1475 | * has been initialized. | ||
1476 | */ | ||
1477 | void __init rcu_init_sched(void) | ||
1478 | { | ||
1479 | rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, | ||
1480 | NULL, | ||
1481 | "rcu_sched_grace_period"); | ||
1482 | WARN_ON(IS_ERR(rcu_sched_grace_period_task)); | ||
1483 | } | ||
1484 | |||
1485 | #ifdef CONFIG_RCU_TRACE | ||
1486 | long *rcupreempt_flipctr(int cpu) | ||
1487 | { | ||
1488 | return &RCU_DATA_CPU(cpu)->rcu_flipctr[0]; | ||
1489 | } | ||
1490 | EXPORT_SYMBOL_GPL(rcupreempt_flipctr); | ||
1491 | |||
1492 | int rcupreempt_flip_flag(int cpu) | ||
1493 | { | ||
1494 | return per_cpu(rcu_flip_flag, cpu); | ||
1495 | } | ||
1496 | EXPORT_SYMBOL_GPL(rcupreempt_flip_flag); | ||
1497 | |||
1498 | int rcupreempt_mb_flag(int cpu) | ||
1499 | { | ||
1500 | return per_cpu(rcu_mb_flag, cpu); | ||
1501 | } | ||
1502 | EXPORT_SYMBOL_GPL(rcupreempt_mb_flag); | ||
1503 | |||
1504 | char *rcupreempt_try_flip_state_name(void) | ||
1505 | { | ||
1506 | return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state]; | ||
1507 | } | ||
1508 | EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name); | ||
1509 | |||
1510 | struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu) | ||
1511 | { | ||
1512 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | ||
1513 | |||
1514 | return &rdp->trace; | ||
1515 | } | ||
1516 | EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu); | ||
1517 | |||
1518 | #endif /* #ifdef RCU_TRACE */ | ||
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c deleted file mode 100644 index 11640346a507..000000000000 --- a/kernel/rcupreempt_trace.c +++ /dev/null | |||
@@ -1,335 +0,0 @@ | |||
1 | /* | ||
2 | * Read-Copy Update tracing for realtime implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2006 | ||
19 | * | ||
20 | * Papers: http://www.rdrop.com/users/paulmck/RCU | ||
21 | * | ||
22 | * For detailed explanation of Read-Copy Update mechanism see - | ||
23 | * Documentation/RCU/ *.txt | ||
24 | * | ||
25 | */ | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/rcupdate.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <asm/atomic.h> | ||
35 | #include <linux/bitops.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/completion.h> | ||
38 | #include <linux/moduleparam.h> | ||
39 | #include <linux/percpu.h> | ||
40 | #include <linux/notifier.h> | ||
41 | #include <linux/cpu.h> | ||
42 | #include <linux/mutex.h> | ||
43 | #include <linux/rcupreempt_trace.h> | ||
44 | #include <linux/debugfs.h> | ||
45 | |||
46 | static struct mutex rcupreempt_trace_mutex; | ||
47 | static char *rcupreempt_trace_buf; | ||
48 | #define RCUPREEMPT_TRACE_BUF_SIZE 4096 | ||
49 | |||
50 | void rcupreempt_trace_move2done(struct rcupreempt_trace *trace) | ||
51 | { | ||
52 | trace->done_length += trace->wait_length; | ||
53 | trace->done_add += trace->wait_length; | ||
54 | trace->wait_length = 0; | ||
55 | } | ||
56 | void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace) | ||
57 | { | ||
58 | trace->wait_length += trace->next_length; | ||
59 | trace->wait_add += trace->next_length; | ||
60 | trace->next_length = 0; | ||
61 | } | ||
62 | void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace) | ||
63 | { | ||
64 | atomic_inc(&trace->rcu_try_flip_1); | ||
65 | } | ||
66 | void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace) | ||
67 | { | ||
68 | atomic_inc(&trace->rcu_try_flip_e1); | ||
69 | } | ||
70 | void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace) | ||
71 | { | ||
72 | trace->rcu_try_flip_i1++; | ||
73 | } | ||
74 | void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace) | ||
75 | { | ||
76 | trace->rcu_try_flip_ie1++; | ||
77 | } | ||
78 | void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace) | ||
79 | { | ||
80 | trace->rcu_try_flip_g1++; | ||
81 | } | ||
82 | void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace) | ||
83 | { | ||
84 | trace->rcu_try_flip_a1++; | ||
85 | } | ||
86 | void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace) | ||
87 | { | ||
88 | trace->rcu_try_flip_ae1++; | ||
89 | } | ||
90 | void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace) | ||
91 | { | ||
92 | trace->rcu_try_flip_a2++; | ||
93 | } | ||
94 | void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace) | ||
95 | { | ||
96 | trace->rcu_try_flip_z1++; | ||
97 | } | ||
98 | void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace) | ||
99 | { | ||
100 | trace->rcu_try_flip_ze1++; | ||
101 | } | ||
102 | void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace) | ||
103 | { | ||
104 | trace->rcu_try_flip_z2++; | ||
105 | } | ||
106 | void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace) | ||
107 | { | ||
108 | trace->rcu_try_flip_m1++; | ||
109 | } | ||
110 | void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace) | ||
111 | { | ||
112 | trace->rcu_try_flip_me1++; | ||
113 | } | ||
114 | void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace) | ||
115 | { | ||
116 | trace->rcu_try_flip_m2++; | ||
117 | } | ||
118 | void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace) | ||
119 | { | ||
120 | trace->rcu_check_callbacks++; | ||
121 | } | ||
122 | void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace) | ||
123 | { | ||
124 | trace->done_remove += trace->done_length; | ||
125 | trace->done_length = 0; | ||
126 | } | ||
127 | void rcupreempt_trace_invoke(struct rcupreempt_trace *trace) | ||
128 | { | ||
129 | atomic_inc(&trace->done_invoked); | ||
130 | } | ||
131 | void rcupreempt_trace_next_add(struct rcupreempt_trace *trace) | ||
132 | { | ||
133 | trace->next_add++; | ||
134 | trace->next_length++; | ||
135 | } | ||
136 | |||
137 | static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) | ||
138 | { | ||
139 | struct rcupreempt_trace *cp; | ||
140 | int cpu; | ||
141 | |||
142 | memset(sp, 0, sizeof(*sp)); | ||
143 | for_each_possible_cpu(cpu) { | ||
144 | cp = rcupreempt_trace_cpu(cpu); | ||
145 | sp->next_length += cp->next_length; | ||
146 | sp->next_add += cp->next_add; | ||
147 | sp->wait_length += cp->wait_length; | ||
148 | sp->wait_add += cp->wait_add; | ||
149 | sp->done_length += cp->done_length; | ||
150 | sp->done_add += cp->done_add; | ||
151 | sp->done_remove += cp->done_remove; | ||
152 | atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked); | ||
153 | sp->rcu_check_callbacks += cp->rcu_check_callbacks; | ||
154 | atomic_add(atomic_read(&cp->rcu_try_flip_1), | ||
155 | &sp->rcu_try_flip_1); | ||
156 | atomic_add(atomic_read(&cp->rcu_try_flip_e1), | ||
157 | &sp->rcu_try_flip_e1); | ||
158 | sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; | ||
159 | sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; | ||
160 | sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; | ||
161 | sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1; | ||
162 | sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1; | ||
163 | sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2; | ||
164 | sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1; | ||
165 | sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1; | ||
166 | sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2; | ||
167 | sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1; | ||
168 | sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1; | ||
169 | sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | static ssize_t rcustats_read(struct file *filp, char __user *buffer, | ||
174 | size_t count, loff_t *ppos) | ||
175 | { | ||
176 | struct rcupreempt_trace trace; | ||
177 | ssize_t bcount; | ||
178 | int cnt = 0; | ||
179 | |||
180 | rcupreempt_trace_sum(&trace); | ||
181 | mutex_lock(&rcupreempt_trace_mutex); | ||
182 | snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
183 | "ggp=%ld rcc=%ld\n", | ||
184 | rcu_batches_completed(), | ||
185 | trace.rcu_check_callbacks); | ||
186 | snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
187 | "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n" | ||
188 | "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n" | ||
189 | "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n", | ||
190 | |||
191 | trace.next_add, trace.next_length, | ||
192 | trace.wait_add, trace.wait_length, | ||
193 | trace.done_add, trace.done_length, | ||
194 | trace.done_remove, atomic_read(&trace.done_invoked), | ||
195 | atomic_read(&trace.rcu_try_flip_1), | ||
196 | atomic_read(&trace.rcu_try_flip_e1), | ||
197 | trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1, | ||
198 | trace.rcu_try_flip_g1, | ||
199 | trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1, | ||
200 | trace.rcu_try_flip_a2, | ||
201 | trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1, | ||
202 | trace.rcu_try_flip_z2, | ||
203 | trace.rcu_try_flip_m1, trace.rcu_try_flip_me1, | ||
204 | trace.rcu_try_flip_m2); | ||
205 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
206 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
207 | mutex_unlock(&rcupreempt_trace_mutex); | ||
208 | return bcount; | ||
209 | } | ||
210 | |||
211 | static ssize_t rcugp_read(struct file *filp, char __user *buffer, | ||
212 | size_t count, loff_t *ppos) | ||
213 | { | ||
214 | long oldgp = rcu_batches_completed(); | ||
215 | ssize_t bcount; | ||
216 | |||
217 | mutex_lock(&rcupreempt_trace_mutex); | ||
218 | synchronize_rcu(); | ||
219 | snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE, | ||
220 | "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed()); | ||
221 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
222 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
223 | mutex_unlock(&rcupreempt_trace_mutex); | ||
224 | return bcount; | ||
225 | } | ||
226 | |||
227 | static ssize_t rcuctrs_read(struct file *filp, char __user *buffer, | ||
228 | size_t count, loff_t *ppos) | ||
229 | { | ||
230 | int cnt = 0; | ||
231 | int cpu; | ||
232 | int f = rcu_batches_completed() & 0x1; | ||
233 | ssize_t bcount; | ||
234 | |||
235 | mutex_lock(&rcupreempt_trace_mutex); | ||
236 | |||
237 | cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE, | ||
238 | "CPU last cur F M\n"); | ||
239 | for_each_possible_cpu(cpu) { | ||
240 | long *flipctr = rcupreempt_flipctr(cpu); | ||
241 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
242 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
243 | "%3d%c %4ld %3ld %d %d\n", | ||
244 | cpu, | ||
245 | cpu_is_offline(cpu) ? '!' : ' ', | ||
246 | flipctr[!f], | ||
247 | flipctr[f], | ||
248 | rcupreempt_flip_flag(cpu), | ||
249 | rcupreempt_mb_flag(cpu)); | ||
250 | } | ||
251 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
252 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
253 | "ggp = %ld, state = %s\n", | ||
254 | rcu_batches_completed(), | ||
255 | rcupreempt_try_flip_state_name()); | ||
256 | cnt += snprintf(&rcupreempt_trace_buf[cnt], | ||
257 | RCUPREEMPT_TRACE_BUF_SIZE - cnt, | ||
258 | "\n"); | ||
259 | bcount = simple_read_from_buffer(buffer, count, ppos, | ||
260 | rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); | ||
261 | mutex_unlock(&rcupreempt_trace_mutex); | ||
262 | return bcount; | ||
263 | } | ||
264 | |||
265 | static struct file_operations rcustats_fops = { | ||
266 | .owner = THIS_MODULE, | ||
267 | .read = rcustats_read, | ||
268 | }; | ||
269 | |||
270 | static struct file_operations rcugp_fops = { | ||
271 | .owner = THIS_MODULE, | ||
272 | .read = rcugp_read, | ||
273 | }; | ||
274 | |||
275 | static struct file_operations rcuctrs_fops = { | ||
276 | .owner = THIS_MODULE, | ||
277 | .read = rcuctrs_read, | ||
278 | }; | ||
279 | |||
280 | static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir; | ||
281 | static int rcupreempt_debugfs_init(void) | ||
282 | { | ||
283 | rcudir = debugfs_create_dir("rcu", NULL); | ||
284 | if (!rcudir) | ||
285 | goto out; | ||
286 | statdir = debugfs_create_file("rcustats", 0444, rcudir, | ||
287 | NULL, &rcustats_fops); | ||
288 | if (!statdir) | ||
289 | goto free_out; | ||
290 | |||
291 | gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); | ||
292 | if (!gpdir) | ||
293 | goto free_out; | ||
294 | |||
295 | ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir, | ||
296 | NULL, &rcuctrs_fops); | ||
297 | if (!ctrsdir) | ||
298 | goto free_out; | ||
299 | return 0; | ||
300 | free_out: | ||
301 | if (statdir) | ||
302 | debugfs_remove(statdir); | ||
303 | if (gpdir) | ||
304 | debugfs_remove(gpdir); | ||
305 | debugfs_remove(rcudir); | ||
306 | out: | ||
307 | return 1; | ||
308 | } | ||
309 | |||
310 | static int __init rcupreempt_trace_init(void) | ||
311 | { | ||
312 | int ret; | ||
313 | |||
314 | mutex_init(&rcupreempt_trace_mutex); | ||
315 | rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); | ||
316 | if (!rcupreempt_trace_buf) | ||
317 | return 1; | ||
318 | ret = rcupreempt_debugfs_init(); | ||
319 | if (ret) | ||
320 | kfree(rcupreempt_trace_buf); | ||
321 | return ret; | ||
322 | } | ||
323 | |||
324 | static void __exit rcupreempt_trace_cleanup(void) | ||
325 | { | ||
326 | debugfs_remove(statdir); | ||
327 | debugfs_remove(gpdir); | ||
328 | debugfs_remove(ctrsdir); | ||
329 | debugfs_remove(rcudir); | ||
330 | kfree(rcupreempt_trace_buf); | ||
331 | } | ||
332 | |||
333 | |||
334 | module_init(rcupreempt_trace_init); | ||
335 | module_exit(rcupreempt_trace_cleanup); | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f87fb0c8f924..82fbc49728df 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE | |||
725 | 725 | ||
726 | config RCU_CPU_STALL_DETECTOR | 726 | config RCU_CPU_STALL_DETECTOR |
727 | bool "Check for stalled CPUs delaying RCU grace periods" | 727 | bool "Check for stalled CPUs delaying RCU grace periods" |
728 | depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU | 728 | depends on TREE_RCU || TREE_PREEMPT_RCU |
729 | default n | 729 | default n |
730 | help | 730 | help |
731 | This option causes RCU to printk information on which | 731 | This option causes RCU to printk information on which |