diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:40 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:40 -0500 |
commit | 423d091dfe58d3109d84c408810a7cfa82f6f184 (patch) | |
tree | 43c4385d1dc7219582f924d42db1f3e203a577bd | |
parent | 1483b3823542c9721eddf09a077af1e02ac96b50 (diff) | |
parent | 919b83452b2e7c1dbced0456015508b4b9585db3 (diff) |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
cpu: Export cpu_up()
rcu: Apply ACCESS_ONCE() to rcu_boost() return value
Revert "rcu: Permit rt_mutex_unlock() with irqs disabled"
docs: Additional LWN links to RCU API
rcu: Augment rcu_batch_end tracing for idle and callback state
rcu: Add rcutorture tests for srcu_read_lock_raw()
rcu: Make rcutorture test for hotpluggability before offlining CPUs
driver-core/cpu: Expose hotpluggability to the rest of the kernel
rcu: Remove redundant rcu_cpu_stall_suppress declaration
rcu: Adaptive dyntick-idle preparation
rcu: Keep invoking callbacks if CPU otherwise idle
rcu: Irq nesting is always 0 on rcu_enter_idle_common
rcu: Don't check irq nesting from rcu idle entry/exit
rcu: Permit dyntick-idle with callbacks pending
rcu: Document same-context read-side constraints
rcu: Identify dyntick-idle CPUs on first force_quiescent_state() pass
rcu: Remove dynticks false positives and RCU failures
rcu: Reduce latency of rcu_prepare_for_idle()
rcu: Eliminate RCU_FAST_NO_HZ grace-period hang
rcu: Avoid needlessly IPIing CPUs at GP end
...
58 files changed, 1512 insertions, 407 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 0c134f8afc6f..bff2d8be1e18 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -328,6 +328,12 @@ over a rather long period of time, but improvements are always welcome! | |||
328 | RCU rather than SRCU, because RCU is almost always faster and | 328 | RCU rather than SRCU, because RCU is almost always faster and |
329 | easier to use than is SRCU. | 329 | easier to use than is SRCU. |
330 | 330 | ||
331 | If you need to enter your read-side critical section in a | ||
332 | hardirq or exception handler, and then exit that same read-side | ||
333 | critical section in the task that was interrupted, then you need | ||
334 | to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid | ||
335 | the lockdep checking that would otherwise this practice illegal. | ||
336 | |||
331 | Also unlike other forms of RCU, explicit initialization | 337 | Also unlike other forms of RCU, explicit initialization |
332 | and cleanup is required via init_srcu_struct() and | 338 | and cleanup is required via init_srcu_struct() and |
333 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" | 339 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" |
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 31852705b586..bf778332a28f 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -38,11 +38,11 @@ o How can the updater tell when a grace period has completed | |||
38 | 38 | ||
39 | Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the | 39 | Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the |
40 | same effect, but require that the readers manipulate CPU-local | 40 | same effect, but require that the readers manipulate CPU-local |
41 | counters. These counters allow limited types of blocking | 41 | counters. These counters allow limited types of blocking within |
42 | within RCU read-side critical sections. SRCU also uses | 42 | RCU read-side critical sections. SRCU also uses CPU-local |
43 | CPU-local counters, and permits general blocking within | 43 | counters, and permits general blocking within RCU read-side |
44 | RCU read-side critical sections. These two variants of | 44 | critical sections. These variants of RCU detect grace periods |
45 | RCU detect grace periods by sampling these counters. | 45 | by sampling these counters. |
46 | 46 | ||
47 | o If I am running on a uniprocessor kernel, which can only do one | 47 | o If I am running on a uniprocessor kernel, which can only do one |
48 | thing at a time, why should I wait for a grace period? | 48 | thing at a time, why should I wait for a grace period? |
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 4e959208f736..083d88cbc089 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt | |||
@@ -101,6 +101,11 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | |||
101 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning | 101 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning |
102 | messages. | 102 | messages. |
103 | 103 | ||
104 | o A hardware or software issue shuts off the scheduler-clock | ||
105 | interrupt on a CPU that is not in dyntick-idle mode. This | ||
106 | problem really has happened, and seems to be most likely to | ||
107 | result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. | ||
108 | |||
104 | o A bug in the RCU implementation. | 109 | o A bug in the RCU implementation. |
105 | 110 | ||
106 | o A hardware failure. This is quite unlikely, but has occurred | 111 | o A hardware failure. This is quite unlikely, but has occurred |
@@ -109,12 +114,11 @@ o A hardware failure. This is quite unlikely, but has occurred | |||
109 | This resulted in a series of RCU CPU stall warnings, eventually | 114 | This resulted in a series of RCU CPU stall warnings, eventually |
110 | leading the realization that the CPU had failed. | 115 | leading the realization that the CPU had failed. |
111 | 116 | ||
112 | The RCU, RCU-sched, and RCU-bh implementations have CPU stall | 117 | The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. |
113 | warning. SRCU does not have its own CPU stall warnings, but its | 118 | SRCU does not have its own CPU stall warnings, but its calls to |
114 | calls to synchronize_sched() will result in RCU-sched detecting | 119 | synchronize_sched() will result in RCU-sched detecting RCU-sched-related |
115 | RCU-sched-related CPU stalls. Please note that RCU only detects | 120 | CPU stalls. Please note that RCU only detects CPU stalls when there is |
116 | CPU stalls when there is a grace period in progress. No grace period, | 121 | a grace period in progress. No grace period, no CPU stall warnings. |
117 | no CPU stall warnings. | ||
118 | 122 | ||
119 | To diagnose the cause of the stall, inspect the stack traces. | 123 | To diagnose the cause of the stall, inspect the stack traces. |
120 | The offending function will usually be near the top of the stack. | 124 | The offending function will usually be near the top of the stack. |
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 783d6c134d3f..d67068d0d2b9 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -61,11 +61,24 @@ nreaders This is the number of RCU reading threads supported. | |||
61 | To properly exercise RCU implementations with preemptible | 61 | To properly exercise RCU implementations with preemptible |
62 | read-side critical sections. | 62 | read-side critical sections. |
63 | 63 | ||
64 | onoff_interval | ||
65 | The number of seconds between each attempt to execute a | ||
66 | randomly selected CPU-hotplug operation. Defaults to | ||
67 | zero, which disables CPU hotplugging. In HOTPLUG_CPU=n | ||
68 | kernels, rcutorture will silently refuse to do any | ||
69 | CPU-hotplug operations regardless of what value is | ||
70 | specified for onoff_interval. | ||
71 | |||
64 | shuffle_interval | 72 | shuffle_interval |
65 | The number of seconds to keep the test threads affinitied | 73 | The number of seconds to keep the test threads affinitied |
66 | to a particular subset of the CPUs, defaults to 3 seconds. | 74 | to a particular subset of the CPUs, defaults to 3 seconds. |
67 | Used in conjunction with test_no_idle_hz. | 75 | Used in conjunction with test_no_idle_hz. |
68 | 76 | ||
77 | shutdown_secs The number of seconds to run the test before terminating | ||
78 | the test and powering off the system. The default is | ||
79 | zero, which disables test termination and system shutdown. | ||
80 | This capability is useful for automated testing. | ||
81 | |||
69 | stat_interval The number of seconds between output of torture | 82 | stat_interval The number of seconds between output of torture |
70 | statistics (via printk()). Regardless of the interval, | 83 | statistics (via printk()). Regardless of the interval, |
71 | statistics are printed when the module is unloaded. | 84 | statistics are printed when the module is unloaded. |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index aaf65f6c6cd7..49587abfc2f7 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented | |||
105 | or one greater than the interrupt-nesting depth otherwise. | 105 | or one greater than the interrupt-nesting depth otherwise. |
106 | The number after the second "/" is the NMI nesting depth. | 106 | The number after the second "/" is the NMI nesting depth. |
107 | 107 | ||
108 | This field is displayed only for CONFIG_NO_HZ kernels. | ||
109 | |||
110 | o "df" is the number of times that some other CPU has forced a | 108 | o "df" is the number of times that some other CPU has forced a |
111 | quiescent state on behalf of this CPU due to this CPU being in | 109 | quiescent state on behalf of this CPU due to this CPU being in |
112 | dynticks-idle state. | 110 | dynticks-idle state. |
113 | 111 | ||
114 | This field is displayed only for CONFIG_NO_HZ kernels. | ||
115 | |||
116 | o "of" is the number of times that some other CPU has forced a | 112 | o "of" is the number of times that some other CPU has forced a |
117 | quiescent state on behalf of this CPU due to this CPU being | 113 | quiescent state on behalf of this CPU due to this CPU being |
118 | offline. In a perfect world, this might never happen, but it | 114 | offline. In a perfect world, this might never happen, but it |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6ef692667e2f..6bbe8dcdc3da 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -4,6 +4,7 @@ to start learning about RCU: | |||
4 | 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ | 4 | 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ |
5 | 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ | 5 | 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ |
6 | 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ | 6 | 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ |
7 | 4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/ | ||
7 | 8 | ||
8 | 9 | ||
9 | What is RCU? | 10 | What is RCU? |
@@ -834,6 +835,8 @@ SRCU: Critical sections Grace period Barrier | |||
834 | 835 | ||
835 | srcu_read_lock synchronize_srcu N/A | 836 | srcu_read_lock synchronize_srcu N/A |
836 | srcu_read_unlock synchronize_srcu_expedited | 837 | srcu_read_unlock synchronize_srcu_expedited |
838 | srcu_read_lock_raw | ||
839 | srcu_read_unlock_raw | ||
837 | srcu_dereference | 840 | srcu_dereference |
838 | 841 | ||
839 | SRCU: Initialization/cleanup | 842 | SRCU: Initialization/cleanup |
@@ -855,27 +858,33 @@ list can be helpful: | |||
855 | 858 | ||
856 | a. Will readers need to block? If so, you need SRCU. | 859 | a. Will readers need to block? If so, you need SRCU. |
857 | 860 | ||
858 | b. What about the -rt patchset? If readers would need to block | 861 | b. Is it necessary to start a read-side critical section in a |
862 | hardirq handler or exception handler, and then to complete | ||
863 | this read-side critical section in the task that was | ||
864 | interrupted? If so, you need SRCU's srcu_read_lock_raw() and | ||
865 | srcu_read_unlock_raw() primitives. | ||
866 | |||
867 | c. What about the -rt patchset? If readers would need to block | ||
859 | in an non-rt kernel, you need SRCU. If readers would block | 868 | in an non-rt kernel, you need SRCU. If readers would block |
860 | in a -rt kernel, but not in a non-rt kernel, SRCU is not | 869 | in a -rt kernel, but not in a non-rt kernel, SRCU is not |
861 | necessary. | 870 | necessary. |
862 | 871 | ||
863 | c. Do you need to treat NMI handlers, hardirq handlers, | 872 | d. Do you need to treat NMI handlers, hardirq handlers, |
864 | and code segments with preemption disabled (whether | 873 | and code segments with preemption disabled (whether |
865 | via preempt_disable(), local_irq_save(), local_bh_disable(), | 874 | via preempt_disable(), local_irq_save(), local_bh_disable(), |
866 | or some other mechanism) as if they were explicit RCU readers? | 875 | or some other mechanism) as if they were explicit RCU readers? |
867 | If so, you need RCU-sched. | 876 | If so, you need RCU-sched. |
868 | 877 | ||
869 | d. Do you need RCU grace periods to complete even in the face | 878 | e. Do you need RCU grace periods to complete even in the face |
870 | of softirq monopolization of one or more of the CPUs? For | 879 | of softirq monopolization of one or more of the CPUs? For |
871 | example, is your code subject to network-based denial-of-service | 880 | example, is your code subject to network-based denial-of-service |
872 | attacks? If so, you need RCU-bh. | 881 | attacks? If so, you need RCU-bh. |
873 | 882 | ||
874 | e. Is your workload too update-intensive for normal use of | 883 | f. Is your workload too update-intensive for normal use of |
875 | RCU, but inappropriate for other synchronization mechanisms? | 884 | RCU, but inappropriate for other synchronization mechanisms? |
876 | If so, consider SLAB_DESTROY_BY_RCU. But please be careful! | 885 | If so, consider SLAB_DESTROY_BY_RCU. But please be careful! |
877 | 886 | ||
878 | f. Otherwise, use RCU. | 887 | g. Otherwise, use RCU. |
879 | 888 | ||
880 | Of course, this all assumes that you have determined that RCU is in fact | 889 | Of course, this all assumes that you have determined that RCU is in fact |
881 | the right tool for your job. | 890 | the right tool for your job. |
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 3bd585b44927..27f2b21a9d5c 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt | |||
@@ -84,6 +84,93 @@ compiler optimizes the section accessing atomic_t variables. | |||
84 | 84 | ||
85 | *** YOU HAVE BEEN WARNED! *** | 85 | *** YOU HAVE BEEN WARNED! *** |
86 | 86 | ||
87 | Properly aligned pointers, longs, ints, and chars (and unsigned | ||
88 | equivalents) may be atomically loaded from and stored to in the same | ||
89 | sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE() | ||
90 | macro should be used to prevent the compiler from using optimizations | ||
91 | that might otherwise optimize accesses out of existence on the one hand, | ||
92 | or that might create unsolicited accesses on the other. | ||
93 | |||
94 | For example consider the following code: | ||
95 | |||
96 | while (a > 0) | ||
97 | do_something(); | ||
98 | |||
99 | If the compiler can prove that do_something() does not store to the | ||
100 | variable a, then the compiler is within its rights transforming this to | ||
101 | the following: | ||
102 | |||
103 | tmp = a; | ||
104 | if (a > 0) | ||
105 | for (;;) | ||
106 | do_something(); | ||
107 | |||
108 | If you don't want the compiler to do this (and you probably don't), then | ||
109 | you should use something like the following: | ||
110 | |||
111 | while (ACCESS_ONCE(a) < 0) | ||
112 | do_something(); | ||
113 | |||
114 | Alternatively, you could place a barrier() call in the loop. | ||
115 | |||
116 | For another example, consider the following code: | ||
117 | |||
118 | tmp_a = a; | ||
119 | do_something_with(tmp_a); | ||
120 | do_something_else_with(tmp_a); | ||
121 | |||
122 | If the compiler can prove that do_something_with() does not store to the | ||
123 | variable a, then the compiler is within its rights to manufacture an | ||
124 | additional load as follows: | ||
125 | |||
126 | tmp_a = a; | ||
127 | do_something_with(tmp_a); | ||
128 | tmp_a = a; | ||
129 | do_something_else_with(tmp_a); | ||
130 | |||
131 | This could fatally confuse your code if it expected the same value | ||
132 | to be passed to do_something_with() and do_something_else_with(). | ||
133 | |||
134 | The compiler would be likely to manufacture this additional load if | ||
135 | do_something_with() was an inline function that made very heavy use | ||
136 | of registers: reloading from variable a could save a flush to the | ||
137 | stack and later reload. To prevent the compiler from attacking your | ||
138 | code in this manner, write the following: | ||
139 | |||
140 | tmp_a = ACCESS_ONCE(a); | ||
141 | do_something_with(tmp_a); | ||
142 | do_something_else_with(tmp_a); | ||
143 | |||
144 | For a final example, consider the following code, assuming that the | ||
145 | variable a is set at boot time before the second CPU is brought online | ||
146 | and never changed later, so that memory barriers are not needed: | ||
147 | |||
148 | if (a) | ||
149 | b = 9; | ||
150 | else | ||
151 | b = 42; | ||
152 | |||
153 | The compiler is within its rights to manufacture an additional store | ||
154 | by transforming the above code into the following: | ||
155 | |||
156 | b = 42; | ||
157 | if (a) | ||
158 | b = 9; | ||
159 | |||
160 | This could come as a fatal surprise to other code running concurrently | ||
161 | that expected b to never have the value 42 if a was zero. To prevent | ||
162 | the compiler from doing this, write something like: | ||
163 | |||
164 | if (a) | ||
165 | ACCESS_ONCE(b) = 9; | ||
166 | else | ||
167 | ACCESS_ONCE(b) = 42; | ||
168 | |||
169 | Don't even -think- about doing this without proper use of memory barriers, | ||
170 | locks, or atomic operations if variable a can change at runtime! | ||
171 | |||
172 | *** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! *** | ||
173 | |||
87 | Now, we move onto the atomic operation interfaces typically implemented with | 174 | Now, we move onto the atomic operation interfaces typically implemented with |
88 | the help of assembly code. | 175 | the help of assembly code. |
89 | 176 | ||
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt index abf768c681e2..5dbc99c04f6e 100644 --- a/Documentation/lockdep-design.txt +++ b/Documentation/lockdep-design.txt | |||
@@ -221,3 +221,66 @@ when the chain is validated for the first time, is then put into a hash | |||
221 | table, which hash-table can be checked in a lockfree manner. If the | 221 | table, which hash-table can be checked in a lockfree manner. If the |
222 | locking chain occurs again later on, the hash table tells us that we | 222 | locking chain occurs again later on, the hash table tells us that we |
223 | dont have to validate the chain again. | 223 | dont have to validate the chain again. |
224 | |||
225 | Troubleshooting: | ||
226 | ---------------- | ||
227 | |||
228 | The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. | ||
229 | Exceeding this number will trigger the following lockdep warning: | ||
230 | |||
231 | (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | ||
232 | |||
233 | By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical | ||
234 | desktop systems have less than 1,000 lock classes, so this warning | ||
235 | normally results from lock-class leakage or failure to properly | ||
236 | initialize locks. These two problems are illustrated below: | ||
237 | |||
238 | 1. Repeated module loading and unloading while running the validator | ||
239 | will result in lock-class leakage. The issue here is that each | ||
240 | load of the module will create a new set of lock classes for | ||
241 | that module's locks, but module unloading does not remove old | ||
242 | classes (see below discussion of reuse of lock classes for why). | ||
243 | Therefore, if that module is loaded and unloaded repeatedly, | ||
244 | the number of lock classes will eventually reach the maximum. | ||
245 | |||
246 | 2. Using structures such as arrays that have large numbers of | ||
247 | locks that are not explicitly initialized. For example, | ||
248 | a hash table with 8192 buckets where each bucket has its own | ||
249 | spinlock_t will consume 8192 lock classes -unless- each spinlock | ||
250 | is explicitly initialized at runtime, for example, using the | ||
251 | run-time spin_lock_init() as opposed to compile-time initializers | ||
252 | such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize | ||
253 | the per-bucket spinlocks would guarantee lock-class overflow. | ||
254 | In contrast, a loop that called spin_lock_init() on each lock | ||
255 | would place all 8192 locks into a single lock class. | ||
256 | |||
257 | The moral of this story is that you should always explicitly | ||
258 | initialize your locks. | ||
259 | |||
260 | One might argue that the validator should be modified to allow | ||
261 | lock classes to be reused. However, if you are tempted to make this | ||
262 | argument, first review the code and think through the changes that would | ||
263 | be required, keeping in mind that the lock classes to be removed are | ||
264 | likely to be linked into the lock-dependency graph. This turns out to | ||
265 | be harder to do than to say. | ||
266 | |||
267 | Of course, if you do run out of lock classes, the next thing to do is | ||
268 | to find the offending lock classes. First, the following command gives | ||
269 | you the number of lock classes currently in use along with the maximum: | ||
270 | |||
271 | grep "lock-classes" /proc/lockdep_stats | ||
272 | |||
273 | This command produces the following output on a modest system: | ||
274 | |||
275 | lock-classes: 748 [max: 8191] | ||
276 | |||
277 | If the number allocated (748 above) increases continually over time, | ||
278 | then there is likely a leak. The following command can be used to | ||
279 | identify the leaking lock classes: | ||
280 | |||
281 | grep "BD" /proc/lockdep | ||
282 | |||
283 | Run the command and save the output, then compare against the output from | ||
284 | a later run of this command to identify the leakers. This same output | ||
285 | can also help you find situations where runtime lock initialization has | ||
286 | been omitted. | ||
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3d0c6fb74ae4..e8e8fe505df1 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c | |||
@@ -183,7 +183,8 @@ void cpu_idle(void) | |||
183 | 183 | ||
184 | /* endless idle loop with no priority at all */ | 184 | /* endless idle loop with no priority at all */ |
185 | while (1) { | 185 | while (1) { |
186 | tick_nohz_stop_sched_tick(1); | 186 | tick_nohz_idle_enter(); |
187 | rcu_idle_enter(); | ||
187 | leds_event(led_idle_start); | 188 | leds_event(led_idle_start); |
188 | while (!need_resched()) { | 189 | while (!need_resched()) { |
189 | #ifdef CONFIG_HOTPLUG_CPU | 190 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -213,7 +214,8 @@ void cpu_idle(void) | |||
213 | } | 214 | } |
214 | } | 215 | } |
215 | leds_event(led_idle_end); | 216 | leds_event(led_idle_end); |
216 | tick_nohz_restart_sched_tick(); | 217 | rcu_idle_exit(); |
218 | tick_nohz_idle_exit(); | ||
217 | preempt_enable_no_resched(); | 219 | preempt_enable_no_resched(); |
218 | schedule(); | 220 | schedule(); |
219 | preempt_disable(); | 221 | preempt_disable(); |
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index ef5a2a08fcca..ea3395750324 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c | |||
@@ -34,10 +34,12 @@ void cpu_idle(void) | |||
34 | { | 34 | { |
35 | /* endless idle loop with no priority at all */ | 35 | /* endless idle loop with no priority at all */ |
36 | while (1) { | 36 | while (1) { |
37 | tick_nohz_stop_sched_tick(1); | 37 | tick_nohz_idle_enter(); |
38 | rcu_idle_enter(); | ||
38 | while (!need_resched()) | 39 | while (!need_resched()) |
39 | cpu_idle_sleep(); | 40 | cpu_idle_sleep(); |
40 | tick_nohz_restart_sched_tick(); | 41 | rcu_idle_exit(); |
42 | tick_nohz_idle_exit(); | ||
41 | preempt_enable_no_resched(); | 43 | preempt_enable_no_resched(); |
42 | schedule(); | 44 | schedule(); |
43 | preempt_disable(); | 45 | preempt_disable(); |
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 6a80a9e9fc4a..8dd0416673cb 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c | |||
@@ -88,10 +88,12 @@ void cpu_idle(void) | |||
88 | #endif | 88 | #endif |
89 | if (!idle) | 89 | if (!idle) |
90 | idle = default_idle; | 90 | idle = default_idle; |
91 | tick_nohz_stop_sched_tick(1); | 91 | tick_nohz_idle_enter(); |
92 | rcu_idle_enter(); | ||
92 | while (!need_resched()) | 93 | while (!need_resched()) |
93 | idle(); | 94 | idle(); |
94 | tick_nohz_restart_sched_tick(); | 95 | rcu_idle_exit(); |
96 | tick_nohz_idle_exit(); | ||
95 | preempt_enable_no_resched(); | 97 | preempt_enable_no_resched(); |
96 | schedule(); | 98 | schedule(); |
97 | preempt_disable(); | 99 | preempt_disable(); |
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 95cc295976a7..7dcb5bfffb75 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c | |||
@@ -103,10 +103,12 @@ void cpu_idle(void) | |||
103 | if (!idle) | 103 | if (!idle) |
104 | idle = default_idle; | 104 | idle = default_idle; |
105 | 105 | ||
106 | tick_nohz_stop_sched_tick(1); | 106 | tick_nohz_idle_enter(); |
107 | rcu_idle_enter(); | ||
107 | while (!need_resched()) | 108 | while (!need_resched()) |
108 | idle(); | 109 | idle(); |
109 | tick_nohz_restart_sched_tick(); | 110 | rcu_idle_exit(); |
111 | tick_nohz_idle_exit(); | ||
110 | 112 | ||
111 | preempt_enable_no_resched(); | 113 | preempt_enable_no_resched(); |
112 | schedule(); | 114 | schedule(); |
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c47f96e453c0..7955409051c4 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c | |||
@@ -56,7 +56,8 @@ void __noreturn cpu_idle(void) | |||
56 | 56 | ||
57 | /* endless idle loop with no priority at all */ | 57 | /* endless idle loop with no priority at all */ |
58 | while (1) { | 58 | while (1) { |
59 | tick_nohz_stop_sched_tick(1); | 59 | tick_nohz_idle_enter(); |
60 | rcu_idle_enter(); | ||
60 | while (!need_resched() && cpu_online(cpu)) { | 61 | while (!need_resched() && cpu_online(cpu)) { |
61 | #ifdef CONFIG_MIPS_MT_SMTC | 62 | #ifdef CONFIG_MIPS_MT_SMTC |
62 | extern void smtc_idle_loop_hook(void); | 63 | extern void smtc_idle_loop_hook(void); |
@@ -77,7 +78,8 @@ void __noreturn cpu_idle(void) | |||
77 | system_state == SYSTEM_BOOTING)) | 78 | system_state == SYSTEM_BOOTING)) |
78 | play_dead(); | 79 | play_dead(); |
79 | #endif | 80 | #endif |
80 | tick_nohz_restart_sched_tick(); | 81 | rcu_idle_exit(); |
82 | tick_nohz_idle_exit(); | ||
81 | preempt_enable_no_resched(); | 83 | preempt_enable_no_resched(); |
82 | schedule(); | 84 | schedule(); |
83 | preempt_disable(); | 85 | preempt_disable(); |
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c index d5bc5f813e89..e5fc78877830 100644 --- a/arch/openrisc/kernel/idle.c +++ b/arch/openrisc/kernel/idle.c | |||
@@ -51,7 +51,8 @@ void cpu_idle(void) | |||
51 | 51 | ||
52 | /* endless idle loop with no priority at all */ | 52 | /* endless idle loop with no priority at all */ |
53 | while (1) { | 53 | while (1) { |
54 | tick_nohz_stop_sched_tick(1); | 54 | tick_nohz_idle_enter(); |
55 | rcu_idle_enter(); | ||
55 | 56 | ||
56 | while (!need_resched()) { | 57 | while (!need_resched()) { |
57 | check_pgt_cache(); | 58 | check_pgt_cache(); |
@@ -69,7 +70,8 @@ void cpu_idle(void) | |||
69 | set_thread_flag(TIF_POLLING_NRFLAG); | 70 | set_thread_flag(TIF_POLLING_NRFLAG); |
70 | } | 71 | } |
71 | 72 | ||
72 | tick_nohz_restart_sched_tick(); | 73 | rcu_idle_exit(); |
74 | tick_nohz_idle_exit(); | ||
73 | preempt_enable_no_resched(); | 75 | preempt_enable_no_resched(); |
74 | schedule(); | 76 | schedule(); |
75 | preempt_disable(); | 77 | preempt_disable(); |
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..9c3cd490b1bd 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c | |||
@@ -46,6 +46,12 @@ static int __init powersave_off(char *arg) | |||
46 | } | 46 | } |
47 | __setup("powersave=off", powersave_off); | 47 | __setup("powersave=off", powersave_off); |
48 | 48 | ||
49 | #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) | ||
50 | static const bool idle_uses_rcu = 1; | ||
51 | #else | ||
52 | static const bool idle_uses_rcu; | ||
53 | #endif | ||
54 | |||
49 | /* | 55 | /* |
50 | * The body of the idle task. | 56 | * The body of the idle task. |
51 | */ | 57 | */ |
@@ -56,7 +62,10 @@ void cpu_idle(void) | |||
56 | 62 | ||
57 | set_thread_flag(TIF_POLLING_NRFLAG); | 63 | set_thread_flag(TIF_POLLING_NRFLAG); |
58 | while (1) { | 64 | while (1) { |
59 | tick_nohz_stop_sched_tick(1); | 65 | tick_nohz_idle_enter(); |
66 | if (!idle_uses_rcu) | ||
67 | rcu_idle_enter(); | ||
68 | |||
60 | while (!need_resched() && !cpu_should_die()) { | 69 | while (!need_resched() && !cpu_should_die()) { |
61 | ppc64_runlatch_off(); | 70 | ppc64_runlatch_off(); |
62 | 71 | ||
@@ -93,7 +102,9 @@ void cpu_idle(void) | |||
93 | 102 | ||
94 | HMT_medium(); | 103 | HMT_medium(); |
95 | ppc64_runlatch_on(); | 104 | ppc64_runlatch_on(); |
96 | tick_nohz_restart_sched_tick(); | 105 | if (!idle_uses_rcu) |
106 | rcu_idle_exit(); | ||
107 | tick_nohz_idle_exit(); | ||
97 | preempt_enable_no_resched(); | 108 | preempt_enable_no_resched(); |
98 | if (cpu_should_die()) | 109 | if (cpu_should_die()) |
99 | cpu_die(); | 110 | cpu_die(); |
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index ea0acbd8966d..8fc62586a973 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c | |||
@@ -563,7 +563,8 @@ static void yield_shared_processor(void) | |||
563 | static void iseries_shared_idle(void) | 563 | static void iseries_shared_idle(void) |
564 | { | 564 | { |
565 | while (1) { | 565 | while (1) { |
566 | tick_nohz_stop_sched_tick(1); | 566 | tick_nohz_idle_enter(); |
567 | rcu_idle_enter(); | ||
567 | while (!need_resched() && !hvlpevent_is_pending()) { | 568 | while (!need_resched() && !hvlpevent_is_pending()) { |
568 | local_irq_disable(); | 569 | local_irq_disable(); |
569 | ppc64_runlatch_off(); | 570 | ppc64_runlatch_off(); |
@@ -577,7 +578,8 @@ static void iseries_shared_idle(void) | |||
577 | } | 578 | } |
578 | 579 | ||
579 | ppc64_runlatch_on(); | 580 | ppc64_runlatch_on(); |
580 | tick_nohz_restart_sched_tick(); | 581 | rcu_idle_exit(); |
582 | tick_nohz_idle_exit(); | ||
581 | 583 | ||
582 | if (hvlpevent_is_pending()) | 584 | if (hvlpevent_is_pending()) |
583 | process_iSeries_events(); | 585 | process_iSeries_events(); |
@@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void) | |||
593 | set_thread_flag(TIF_POLLING_NRFLAG); | 595 | set_thread_flag(TIF_POLLING_NRFLAG); |
594 | 596 | ||
595 | while (1) { | 597 | while (1) { |
596 | tick_nohz_stop_sched_tick(1); | 598 | tick_nohz_idle_enter(); |
599 | rcu_idle_enter(); | ||
597 | if (!need_resched()) { | 600 | if (!need_resched()) { |
598 | while (!need_resched()) { | 601 | while (!need_resched()) { |
599 | ppc64_runlatch_off(); | 602 | ppc64_runlatch_off(); |
@@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void) | |||
610 | } | 613 | } |
611 | 614 | ||
612 | ppc64_runlatch_on(); | 615 | ppc64_runlatch_on(); |
613 | tick_nohz_restart_sched_tick(); | 616 | rcu_idle_exit(); |
617 | tick_nohz_idle_exit(); | ||
614 | preempt_enable_no_resched(); | 618 | preempt_enable_no_resched(); |
615 | schedule(); | 619 | schedule(); |
616 | preempt_disable(); | 620 | preempt_disable(); |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 27a49508b410..52d429be6c76 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -555,6 +555,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) | |||
555 | 555 | ||
556 | (*depth)++; | 556 | (*depth)++; |
557 | trace_hcall_entry(opcode, args); | 557 | trace_hcall_entry(opcode, args); |
558 | if (opcode == H_CEDE) | ||
559 | rcu_idle_enter(); | ||
558 | (*depth)--; | 560 | (*depth)--; |
559 | 561 | ||
560 | out: | 562 | out: |
@@ -575,6 +577,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval, | |||
575 | goto out; | 577 | goto out; |
576 | 578 | ||
577 | (*depth)++; | 579 | (*depth)++; |
580 | if (opcode == H_CEDE) | ||
581 | rcu_idle_exit(); | ||
578 | trace_hcall_exit(opcode, retval, retbuf); | 582 | trace_hcall_exit(opcode, retval, retbuf); |
579 | (*depth)--; | 583 | (*depth)--; |
580 | 584 | ||
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9451b210a1b4..3201ae447990 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c | |||
@@ -91,10 +91,12 @@ static void default_idle(void) | |||
91 | void cpu_idle(void) | 91 | void cpu_idle(void) |
92 | { | 92 | { |
93 | for (;;) { | 93 | for (;;) { |
94 | tick_nohz_stop_sched_tick(1); | 94 | tick_nohz_idle_enter(); |
95 | rcu_idle_enter(); | ||
95 | while (!need_resched()) | 96 | while (!need_resched()) |
96 | default_idle(); | 97 | default_idle(); |
97 | tick_nohz_restart_sched_tick(); | 98 | rcu_idle_exit(); |
99 | tick_nohz_idle_exit(); | ||
98 | preempt_enable_no_resched(); | 100 | preempt_enable_no_resched(); |
99 | schedule(); | 101 | schedule(); |
100 | preempt_disable(); | 102 | preempt_disable(); |
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index db4ecd731a00..406508d4ce74 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c | |||
@@ -89,7 +89,8 @@ void cpu_idle(void) | |||
89 | 89 | ||
90 | /* endless idle loop with no priority at all */ | 90 | /* endless idle loop with no priority at all */ |
91 | while (1) { | 91 | while (1) { |
92 | tick_nohz_stop_sched_tick(1); | 92 | tick_nohz_idle_enter(); |
93 | rcu_idle_enter(); | ||
93 | 94 | ||
94 | while (!need_resched()) { | 95 | while (!need_resched()) { |
95 | check_pgt_cache(); | 96 | check_pgt_cache(); |
@@ -111,7 +112,8 @@ void cpu_idle(void) | |||
111 | start_critical_timings(); | 112 | start_critical_timings(); |
112 | } | 113 | } |
113 | 114 | ||
114 | tick_nohz_restart_sched_tick(); | 115 | rcu_idle_exit(); |
116 | tick_nohz_idle_exit(); | ||
115 | preempt_enable_no_resched(); | 117 | preempt_enable_no_resched(); |
116 | schedule(); | 118 | schedule(); |
117 | preempt_disable(); | 119 | preempt_disable(); |
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 3739a06a76cb..39d8b05201a2 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c | |||
@@ -95,12 +95,14 @@ void cpu_idle(void) | |||
95 | set_thread_flag(TIF_POLLING_NRFLAG); | 95 | set_thread_flag(TIF_POLLING_NRFLAG); |
96 | 96 | ||
97 | while(1) { | 97 | while(1) { |
98 | tick_nohz_stop_sched_tick(1); | 98 | tick_nohz_idle_enter(); |
99 | rcu_idle_enter(); | ||
99 | 100 | ||
100 | while (!need_resched() && !cpu_is_offline(cpu)) | 101 | while (!need_resched() && !cpu_is_offline(cpu)) |
101 | sparc64_yield(cpu); | 102 | sparc64_yield(cpu); |
102 | 103 | ||
103 | tick_nohz_restart_sched_tick(); | 104 | rcu_idle_exit(); |
105 | tick_nohz_idle_exit(); | ||
104 | 106 | ||
105 | preempt_enable_no_resched(); | 107 | preempt_enable_no_resched(); |
106 | 108 | ||
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index fe1e3fc31bc5..ffb883ddd0f0 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c | |||
@@ -84,7 +84,7 @@ static void prom_sync_me(void) | |||
84 | 84 | ||
85 | prom_printf("PROM SYNC COMMAND...\n"); | 85 | prom_printf("PROM SYNC COMMAND...\n"); |
86 | show_free_areas(0); | 86 | show_free_areas(0); |
87 | if(current->pid != 0) { | 87 | if (!is_idle_task(current)) { |
88 | local_irq_enable(); | 88 | local_irq_enable(); |
89 | sys_sync(); | 89 | sys_sync(); |
90 | local_irq_disable(); | 90 | local_irq_disable(); |
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 9c45d8bbdf57..4c1ac6e5347a 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c | |||
@@ -85,7 +85,8 @@ void cpu_idle(void) | |||
85 | 85 | ||
86 | /* endless idle loop with no priority at all */ | 86 | /* endless idle loop with no priority at all */ |
87 | while (1) { | 87 | while (1) { |
88 | tick_nohz_stop_sched_tick(1); | 88 | tick_nohz_idle_enter(); |
89 | rcu_idle_enter(); | ||
89 | while (!need_resched()) { | 90 | while (!need_resched()) { |
90 | if (cpu_is_offline(cpu)) | 91 | if (cpu_is_offline(cpu)) |
91 | BUG(); /* no HOTPLUG_CPU */ | 92 | BUG(); /* no HOTPLUG_CPU */ |
@@ -105,7 +106,8 @@ void cpu_idle(void) | |||
105 | local_irq_enable(); | 106 | local_irq_enable(); |
106 | current_thread_info()->status |= TS_POLLING; | 107 | current_thread_info()->status |= TS_POLLING; |
107 | } | 108 | } |
108 | tick_nohz_restart_sched_tick(); | 109 | rcu_idle_exit(); |
110 | tick_nohz_idle_exit(); | ||
109 | preempt_enable_no_resched(); | 111 | preempt_enable_no_resched(); |
110 | schedule(); | 112 | schedule(); |
111 | preempt_disable(); | 113 | preempt_disable(); |
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 25b7b90fd620..c1eaaa1fcc20 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -54,7 +54,7 @@ static noinline void force_sig_info_fault(const char *type, int si_signo, | |||
54 | if (unlikely(tsk->pid < 2)) { | 54 | if (unlikely(tsk->pid < 2)) { |
55 | panic("Signal %d (code %d) at %#lx sent to %s!", | 55 | panic("Signal %d (code %d) at %#lx sent to %s!", |
56 | si_signo, si_code & 0xffff, address, | 56 | si_signo, si_code & 0xffff, address, |
57 | tsk->pid ? "init" : "the idle task"); | 57 | is_idle_task(tsk) ? "the idle task" : "init"); |
58 | } | 58 | } |
59 | 59 | ||
60 | info.si_signo = si_signo; | 60 | info.si_signo = si_signo; |
@@ -515,7 +515,7 @@ no_context: | |||
515 | 515 | ||
516 | if (unlikely(tsk->pid < 2)) { | 516 | if (unlikely(tsk->pid < 2)) { |
517 | panic("Kernel page fault running %s!", | 517 | panic("Kernel page fault running %s!", |
518 | tsk->pid ? "init" : "the idle task"); | 518 | is_idle_task(tsk) ? "the idle task" : "init"); |
519 | } | 519 | } |
520 | 520 | ||
521 | /* | 521 | /* |
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c5338351aecd..69f24905abdc 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c | |||
@@ -246,10 +246,12 @@ void default_idle(void) | |||
246 | if (need_resched()) | 246 | if (need_resched()) |
247 | schedule(); | 247 | schedule(); |
248 | 248 | ||
249 | tick_nohz_stop_sched_tick(1); | 249 | tick_nohz_idle_enter(); |
250 | rcu_idle_enter(); | ||
250 | nsecs = disable_timer(); | 251 | nsecs = disable_timer(); |
251 | idle_sleep(nsecs); | 252 | idle_sleep(nsecs); |
252 | tick_nohz_restart_sched_tick(); | 253 | rcu_idle_exit(); |
254 | tick_nohz_idle_exit(); | ||
253 | } | 255 | } |
254 | } | 256 | } |
255 | 257 | ||
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index ba401df971ed..52edc2b62873 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c | |||
@@ -55,7 +55,8 @@ void cpu_idle(void) | |||
55 | { | 55 | { |
56 | /* endless idle loop with no priority at all */ | 56 | /* endless idle loop with no priority at all */ |
57 | while (1) { | 57 | while (1) { |
58 | tick_nohz_stop_sched_tick(1); | 58 | tick_nohz_idle_enter(); |
59 | rcu_idle_enter(); | ||
59 | while (!need_resched()) { | 60 | while (!need_resched()) { |
60 | local_irq_disable(); | 61 | local_irq_disable(); |
61 | stop_critical_timings(); | 62 | stop_critical_timings(); |
@@ -63,7 +64,8 @@ void cpu_idle(void) | |||
63 | local_irq_enable(); | 64 | local_irq_enable(); |
64 | start_critical_timings(); | 65 | start_critical_timings(); |
65 | } | 66 | } |
66 | tick_nohz_restart_sched_tick(); | 67 | rcu_idle_exit(); |
68 | tick_nohz_idle_exit(); | ||
67 | preempt_enable_no_resched(); | 69 | preempt_enable_no_resched(); |
68 | schedule(); | 70 | schedule(); |
69 | preempt_disable(); | 71 | preempt_disable(); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f98d84caf94c..2cd2d93643dc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -876,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) | |||
876 | * Besides, if we don't timer interrupts ignore the global | 876 | * Besides, if we don't timer interrupts ignore the global |
877 | * interrupt lock, which is the WrongThing (tm) to do. | 877 | * interrupt lock, which is the WrongThing (tm) to do. |
878 | */ | 878 | */ |
879 | exit_idle(); | ||
880 | irq_enter(); | 879 | irq_enter(); |
880 | exit_idle(); | ||
881 | local_apic_timer_interrupt(); | 881 | local_apic_timer_interrupt(); |
882 | irq_exit(); | 882 | irq_exit(); |
883 | 883 | ||
@@ -1809,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1809 | { | 1809 | { |
1810 | u32 v; | 1810 | u32 v; |
1811 | 1811 | ||
1812 | exit_idle(); | ||
1813 | irq_enter(); | 1812 | irq_enter(); |
1813 | exit_idle(); | ||
1814 | /* | 1814 | /* |
1815 | * Check if this really is a spurious interrupt and ACK it | 1815 | * Check if this really is a spurious interrupt and ACK it |
1816 | * if it is a vectored one. Just in case... | 1816 | * if it is a vectored one. Just in case... |
@@ -1846,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1846 | "Illegal register address", /* APIC Error Bit 7 */ | 1846 | "Illegal register address", /* APIC Error Bit 7 */ |
1847 | }; | 1847 | }; |
1848 | 1848 | ||
1849 | exit_idle(); | ||
1850 | irq_enter(); | 1849 | irq_enter(); |
1850 | exit_idle(); | ||
1851 | /* First tickle the hardware, only then report what went on. -- REW */ | 1851 | /* First tickle the hardware, only then report what went on. -- REW */ |
1852 | v0 = apic_read(APIC_ESR); | 1852 | v0 = apic_read(APIC_ESR); |
1853 | apic_write(APIC_ESR, 0); | 1853 | apic_write(APIC_ESR, 0); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7847e2..898055585516 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2421 | unsigned vector, me; | 2421 | unsigned vector, me; |
2422 | 2422 | ||
2423 | ack_APIC_irq(); | 2423 | ack_APIC_irq(); |
2424 | exit_idle(); | ||
2425 | irq_enter(); | 2424 | irq_enter(); |
2425 | exit_idle(); | ||
2426 | 2426 | ||
2427 | me = smp_processor_id(); | 2427 | me = smp_processor_id(); |
2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 2428 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..ce215616d5b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |||
397 | 397 | ||
398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | 398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) |
399 | { | 399 | { |
400 | exit_idle(); | ||
401 | irq_enter(); | 400 | irq_enter(); |
401 | exit_idle(); | ||
402 | inc_irq_stat(irq_thermal_count); | 402 | inc_irq_stat(irq_thermal_count); |
403 | smp_thermal_vector(); | 403 | smp_thermal_vector(); |
404 | irq_exit(); | 404 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..aa578cadb940 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; | |||
19 | 19 | ||
20 | asmlinkage void smp_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | ||
23 | irq_enter(); | 22 | irq_enter(); |
23 | exit_idle(); | ||
24 | inc_irq_stat(irq_threshold_count); | 24 | inc_irq_stat(irq_threshold_count); |
25 | mce_threshold_vector(); | 25 | mce_threshold_vector(); |
26 | irq_exit(); | 26 | irq_exit(); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 429e0c92924e..5d31e5bdbf85 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -181,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
181 | unsigned vector = ~regs->orig_ax; | 181 | unsigned vector = ~regs->orig_ax; |
182 | unsigned irq; | 182 | unsigned irq; |
183 | 183 | ||
184 | exit_idle(); | ||
185 | irq_enter(); | 184 | irq_enter(); |
185 | exit_idle(); | ||
186 | 186 | ||
187 | irq = __this_cpu_read(vector_irq[vector]); | 187 | irq = __this_cpu_read(vector_irq[vector]); |
188 | 188 | ||
@@ -209,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
209 | 209 | ||
210 | ack_APIC_irq(); | 210 | ack_APIC_irq(); |
211 | 211 | ||
212 | exit_idle(); | ||
213 | |||
214 | irq_enter(); | 212 | irq_enter(); |
215 | 213 | ||
214 | exit_idle(); | ||
215 | |||
216 | inc_irq_stat(x86_platform_ipis); | 216 | inc_irq_stat(x86_platform_ipis); |
217 | 217 | ||
218 | if (x86_platform_ipi_callback) | 218 | if (x86_platform_ipi_callback) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 795b79f984c2..485204f58cda 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -99,7 +99,8 @@ void cpu_idle(void) | |||
99 | 99 | ||
100 | /* endless idle loop with no priority at all */ | 100 | /* endless idle loop with no priority at all */ |
101 | while (1) { | 101 | while (1) { |
102 | tick_nohz_stop_sched_tick(1); | 102 | tick_nohz_idle_enter(); |
103 | rcu_idle_enter(); | ||
103 | while (!need_resched()) { | 104 | while (!need_resched()) { |
104 | 105 | ||
105 | check_pgt_cache(); | 106 | check_pgt_cache(); |
@@ -116,7 +117,8 @@ void cpu_idle(void) | |||
116 | pm_idle(); | 117 | pm_idle(); |
117 | start_critical_timings(); | 118 | start_critical_timings(); |
118 | } | 119 | } |
119 | tick_nohz_restart_sched_tick(); | 120 | rcu_idle_exit(); |
121 | tick_nohz_idle_exit(); | ||
120 | preempt_enable_no_resched(); | 122 | preempt_enable_no_resched(); |
121 | schedule(); | 123 | schedule(); |
122 | preempt_disable(); | 124 | preempt_disable(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3bd7e6eebf31..64e926c89a6f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -122,7 +122,7 @@ void cpu_idle(void) | |||
122 | 122 | ||
123 | /* endless idle loop with no priority at all */ | 123 | /* endless idle loop with no priority at all */ |
124 | while (1) { | 124 | while (1) { |
125 | tick_nohz_stop_sched_tick(1); | 125 | tick_nohz_idle_enter(); |
126 | while (!need_resched()) { | 126 | while (!need_resched()) { |
127 | 127 | ||
128 | rmb(); | 128 | rmb(); |
@@ -139,8 +139,14 @@ void cpu_idle(void) | |||
139 | enter_idle(); | 139 | enter_idle(); |
140 | /* Don't trace irqs off for idle */ | 140 | /* Don't trace irqs off for idle */ |
141 | stop_critical_timings(); | 141 | stop_critical_timings(); |
142 | |||
143 | /* enter_idle() needs rcu for notifiers */ | ||
144 | rcu_idle_enter(); | ||
145 | |||
142 | if (cpuidle_idle_call()) | 146 | if (cpuidle_idle_call()) |
143 | pm_idle(); | 147 | pm_idle(); |
148 | |||
149 | rcu_idle_exit(); | ||
144 | start_critical_timings(); | 150 | start_critical_timings(); |
145 | 151 | ||
146 | /* In many cases the interrupt that ended idle | 152 | /* In many cases the interrupt that ended idle |
@@ -149,7 +155,7 @@ void cpu_idle(void) | |||
149 | __exit_idle(); | 155 | __exit_idle(); |
150 | } | 156 | } |
151 | 157 | ||
152 | tick_nohz_restart_sched_tick(); | 158 | tick_nohz_idle_exit(); |
153 | preempt_enable_no_resched(); | 159 | preempt_enable_no_resched(); |
154 | schedule(); | 160 | schedule(); |
155 | preempt_disable(); | 161 | preempt_disable(); |
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 251acea3d359..3991502b21e5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c | |||
@@ -247,6 +247,13 @@ struct sys_device *get_cpu_sysdev(unsigned cpu) | |||
247 | } | 247 | } |
248 | EXPORT_SYMBOL_GPL(get_cpu_sysdev); | 248 | EXPORT_SYMBOL_GPL(get_cpu_sysdev); |
249 | 249 | ||
250 | bool cpu_is_hotpluggable(unsigned cpu) | ||
251 | { | ||
252 | struct sys_device *dev = get_cpu_sysdev(cpu); | ||
253 | return dev && container_of(dev, struct cpu, sysdev)->hotpluggable; | ||
254 | } | ||
255 | EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); | ||
256 | |||
250 | int __init cpu_dev_init(void) | 257 | int __init cpu_dev_init(void) |
251 | { | 258 | { |
252 | int err; | 259 | int err; |
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6cb60fd2ea84..305c263021e7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -27,6 +27,7 @@ struct cpu { | |||
27 | 27 | ||
28 | extern int register_cpu(struct cpu *cpu, int num); | 28 | extern int register_cpu(struct cpu *cpu, int num); |
29 | extern struct sys_device *get_cpu_sysdev(unsigned cpu); | 29 | extern struct sys_device *get_cpu_sysdev(unsigned cpu); |
30 | extern bool cpu_is_hotpluggable(unsigned cpu); | ||
30 | 31 | ||
31 | extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); | 32 | extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); |
32 | extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); | 33 | extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f743883f769e..bb7f30971858 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) | |||
139 | extern void account_system_vtime(struct task_struct *tsk); | 139 | extern void account_system_vtime(struct task_struct *tsk); |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | #if defined(CONFIG_NO_HZ) | ||
143 | #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) | 142 | #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
144 | extern void rcu_enter_nohz(void); | ||
145 | extern void rcu_exit_nohz(void); | ||
146 | |||
147 | static inline void rcu_irq_enter(void) | ||
148 | { | ||
149 | rcu_exit_nohz(); | ||
150 | } | ||
151 | |||
152 | static inline void rcu_irq_exit(void) | ||
153 | { | ||
154 | rcu_enter_nohz(); | ||
155 | } | ||
156 | 143 | ||
157 | static inline void rcu_nmi_enter(void) | 144 | static inline void rcu_nmi_enter(void) |
158 | { | 145 | { |
@@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void) | |||
163 | } | 150 | } |
164 | 151 | ||
165 | #else | 152 | #else |
166 | extern void rcu_irq_enter(void); | ||
167 | extern void rcu_irq_exit(void); | ||
168 | extern void rcu_nmi_enter(void); | 153 | extern void rcu_nmi_enter(void); |
169 | extern void rcu_nmi_exit(void); | 154 | extern void rcu_nmi_exit(void); |
170 | #endif | 155 | #endif |
171 | #else | ||
172 | # define rcu_irq_enter() do { } while (0) | ||
173 | # define rcu_irq_exit() do { } while (0) | ||
174 | # define rcu_nmi_enter() do { } while (0) | ||
175 | # define rcu_nmi_exit() do { } while (0) | ||
176 | #endif /* #if defined(CONFIG_NO_HZ) */ | ||
177 | 156 | ||
178 | /* | 157 | /* |
179 | * It is safe to do non-atomic ops on ->hardirq_context, | 158 | * It is safe to do non-atomic ops on ->hardirq_context, |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2cf4226ade7e..81c04f4348ec 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */ | |||
51 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 51 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
52 | extern void rcutorture_record_test_transition(void); | 52 | extern void rcutorture_record_test_transition(void); |
53 | extern void rcutorture_record_progress(unsigned long vernum); | 53 | extern void rcutorture_record_progress(unsigned long vernum); |
54 | extern void do_trace_rcu_torture_read(char *rcutorturename, | ||
55 | struct rcu_head *rhp); | ||
54 | #else | 56 | #else |
55 | static inline void rcutorture_record_test_transition(void) | 57 | static inline void rcutorture_record_test_transition(void) |
56 | { | 58 | { |
@@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void) | |||
58 | static inline void rcutorture_record_progress(unsigned long vernum) | 60 | static inline void rcutorture_record_progress(unsigned long vernum) |
59 | { | 61 | { |
60 | } | 62 | } |
63 | #ifdef CONFIG_RCU_TRACE | ||
64 | extern void do_trace_rcu_torture_read(char *rcutorturename, | ||
65 | struct rcu_head *rhp); | ||
66 | #else | ||
67 | #define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) | ||
68 | #endif | ||
61 | #endif | 69 | #endif |
62 | 70 | ||
63 | #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) | 71 | #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) |
@@ -177,23 +185,10 @@ extern void rcu_sched_qs(int cpu); | |||
177 | extern void rcu_bh_qs(int cpu); | 185 | extern void rcu_bh_qs(int cpu); |
178 | extern void rcu_check_callbacks(int cpu, int user); | 186 | extern void rcu_check_callbacks(int cpu, int user); |
179 | struct notifier_block; | 187 | struct notifier_block; |
180 | 188 | extern void rcu_idle_enter(void); | |
181 | #ifdef CONFIG_NO_HZ | 189 | extern void rcu_idle_exit(void); |
182 | 190 | extern void rcu_irq_enter(void); | |
183 | extern void rcu_enter_nohz(void); | 191 | extern void rcu_irq_exit(void); |
184 | extern void rcu_exit_nohz(void); | ||
185 | |||
186 | #else /* #ifdef CONFIG_NO_HZ */ | ||
187 | |||
188 | static inline void rcu_enter_nohz(void) | ||
189 | { | ||
190 | } | ||
191 | |||
192 | static inline void rcu_exit_nohz(void) | ||
193 | { | ||
194 | } | ||
195 | |||
196 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
197 | 192 | ||
198 | /* | 193 | /* |
199 | * Infrastructure to implement the synchronize_() primitives in | 194 | * Infrastructure to implement the synchronize_() primitives in |
@@ -233,22 +228,30 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) | |||
233 | 228 | ||
234 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 229 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
235 | 230 | ||
236 | extern struct lockdep_map rcu_lock_map; | 231 | #ifdef CONFIG_PROVE_RCU |
237 | # define rcu_read_acquire() \ | 232 | extern int rcu_is_cpu_idle(void); |
238 | lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) | 233 | #else /* !CONFIG_PROVE_RCU */ |
239 | # define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) | 234 | static inline int rcu_is_cpu_idle(void) |
235 | { | ||
236 | return 0; | ||
237 | } | ||
238 | #endif /* else !CONFIG_PROVE_RCU */ | ||
240 | 239 | ||
241 | extern struct lockdep_map rcu_bh_lock_map; | 240 | static inline void rcu_lock_acquire(struct lockdep_map *map) |
242 | # define rcu_read_acquire_bh() \ | 241 | { |
243 | lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) | 242 | WARN_ON_ONCE(rcu_is_cpu_idle()); |
244 | # define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_) | 243 | lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); |
244 | } | ||
245 | 245 | ||
246 | extern struct lockdep_map rcu_sched_lock_map; | 246 | static inline void rcu_lock_release(struct lockdep_map *map) |
247 | # define rcu_read_acquire_sched() \ | 247 | { |
248 | lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) | 248 | WARN_ON_ONCE(rcu_is_cpu_idle()); |
249 | # define rcu_read_release_sched() \ | 249 | lock_release(map, 1, _THIS_IP_); |
250 | lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) | 250 | } |
251 | 251 | ||
252 | extern struct lockdep_map rcu_lock_map; | ||
253 | extern struct lockdep_map rcu_bh_lock_map; | ||
254 | extern struct lockdep_map rcu_sched_lock_map; | ||
252 | extern int debug_lockdep_rcu_enabled(void); | 255 | extern int debug_lockdep_rcu_enabled(void); |
253 | 256 | ||
254 | /** | 257 | /** |
@@ -262,11 +265,18 @@ extern int debug_lockdep_rcu_enabled(void); | |||
262 | * | 265 | * |
263 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot | 266 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot |
264 | * and while lockdep is disabled. | 267 | * and while lockdep is disabled. |
268 | * | ||
269 | * Note that rcu_read_lock() and the matching rcu_read_unlock() must | ||
270 | * occur in the same context, for example, it is illegal to invoke | ||
271 | * rcu_read_unlock() in process context if the matching rcu_read_lock() | ||
272 | * was invoked from within an irq handler. | ||
265 | */ | 273 | */ |
266 | static inline int rcu_read_lock_held(void) | 274 | static inline int rcu_read_lock_held(void) |
267 | { | 275 | { |
268 | if (!debug_lockdep_rcu_enabled()) | 276 | if (!debug_lockdep_rcu_enabled()) |
269 | return 1; | 277 | return 1; |
278 | if (rcu_is_cpu_idle()) | ||
279 | return 0; | ||
270 | return lock_is_held(&rcu_lock_map); | 280 | return lock_is_held(&rcu_lock_map); |
271 | } | 281 | } |
272 | 282 | ||
@@ -290,6 +300,19 @@ extern int rcu_read_lock_bh_held(void); | |||
290 | * | 300 | * |
291 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | 301 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot |
292 | * and while lockdep is disabled. | 302 | * and while lockdep is disabled. |
303 | * | ||
304 | * Note that if the CPU is in the idle loop from an RCU point of | ||
305 | * view (ie: that we are in the section between rcu_idle_enter() and | ||
306 | * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU | ||
307 | * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs | ||
308 | * that are in such a section, considering these as in extended quiescent | ||
309 | * state, so such a CPU is effectively never in an RCU read-side critical | ||
310 | * section regardless of what RCU primitives it invokes. This state of | ||
311 | * affairs is required --- we need to keep an RCU-free window in idle | ||
312 | * where the CPU may possibly enter into low power mode. This way we can | ||
313 | * notice an extended quiescent state to other CPUs that started a grace | ||
314 | * period. Otherwise we would delay any grace period as long as we run in | ||
315 | * the idle task. | ||
293 | */ | 316 | */ |
294 | #ifdef CONFIG_PREEMPT_COUNT | 317 | #ifdef CONFIG_PREEMPT_COUNT |
295 | static inline int rcu_read_lock_sched_held(void) | 318 | static inline int rcu_read_lock_sched_held(void) |
@@ -298,6 +321,8 @@ static inline int rcu_read_lock_sched_held(void) | |||
298 | 321 | ||
299 | if (!debug_lockdep_rcu_enabled()) | 322 | if (!debug_lockdep_rcu_enabled()) |
300 | return 1; | 323 | return 1; |
324 | if (rcu_is_cpu_idle()) | ||
325 | return 0; | ||
301 | if (debug_locks) | 326 | if (debug_locks) |
302 | lockdep_opinion = lock_is_held(&rcu_sched_lock_map); | 327 | lockdep_opinion = lock_is_held(&rcu_sched_lock_map); |
303 | return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); | 328 | return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); |
@@ -311,12 +336,8 @@ static inline int rcu_read_lock_sched_held(void) | |||
311 | 336 | ||
312 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 337 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
313 | 338 | ||
314 | # define rcu_read_acquire() do { } while (0) | 339 | # define rcu_lock_acquire(a) do { } while (0) |
315 | # define rcu_read_release() do { } while (0) | 340 | # define rcu_lock_release(a) do { } while (0) |
316 | # define rcu_read_acquire_bh() do { } while (0) | ||
317 | # define rcu_read_release_bh() do { } while (0) | ||
318 | # define rcu_read_acquire_sched() do { } while (0) | ||
319 | # define rcu_read_release_sched() do { } while (0) | ||
320 | 341 | ||
321 | static inline int rcu_read_lock_held(void) | 342 | static inline int rcu_read_lock_held(void) |
322 | { | 343 | { |
@@ -637,7 +658,7 @@ static inline void rcu_read_lock(void) | |||
637 | { | 658 | { |
638 | __rcu_read_lock(); | 659 | __rcu_read_lock(); |
639 | __acquire(RCU); | 660 | __acquire(RCU); |
640 | rcu_read_acquire(); | 661 | rcu_lock_acquire(&rcu_lock_map); |
641 | } | 662 | } |
642 | 663 | ||
643 | /* | 664 | /* |
@@ -657,7 +678,7 @@ static inline void rcu_read_lock(void) | |||
657 | */ | 678 | */ |
658 | static inline void rcu_read_unlock(void) | 679 | static inline void rcu_read_unlock(void) |
659 | { | 680 | { |
660 | rcu_read_release(); | 681 | rcu_lock_release(&rcu_lock_map); |
661 | __release(RCU); | 682 | __release(RCU); |
662 | __rcu_read_unlock(); | 683 | __rcu_read_unlock(); |
663 | } | 684 | } |
@@ -673,12 +694,17 @@ static inline void rcu_read_unlock(void) | |||
673 | * critical sections in interrupt context can use just rcu_read_lock(), | 694 | * critical sections in interrupt context can use just rcu_read_lock(), |
674 | * though this should at least be commented to avoid confusing people | 695 | * though this should at least be commented to avoid confusing people |
675 | * reading the code. | 696 | * reading the code. |
697 | * | ||
698 | * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() | ||
699 | * must occur in the same context, for example, it is illegal to invoke | ||
700 | * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() | ||
701 | * was invoked from some other task. | ||
676 | */ | 702 | */ |
677 | static inline void rcu_read_lock_bh(void) | 703 | static inline void rcu_read_lock_bh(void) |
678 | { | 704 | { |
679 | local_bh_disable(); | 705 | local_bh_disable(); |
680 | __acquire(RCU_BH); | 706 | __acquire(RCU_BH); |
681 | rcu_read_acquire_bh(); | 707 | rcu_lock_acquire(&rcu_bh_lock_map); |
682 | } | 708 | } |
683 | 709 | ||
684 | /* | 710 | /* |
@@ -688,7 +714,7 @@ static inline void rcu_read_lock_bh(void) | |||
688 | */ | 714 | */ |
689 | static inline void rcu_read_unlock_bh(void) | 715 | static inline void rcu_read_unlock_bh(void) |
690 | { | 716 | { |
691 | rcu_read_release_bh(); | 717 | rcu_lock_release(&rcu_bh_lock_map); |
692 | __release(RCU_BH); | 718 | __release(RCU_BH); |
693 | local_bh_enable(); | 719 | local_bh_enable(); |
694 | } | 720 | } |
@@ -700,12 +726,17 @@ static inline void rcu_read_unlock_bh(void) | |||
700 | * are being done using call_rcu_sched() or synchronize_rcu_sched(). | 726 | * are being done using call_rcu_sched() or synchronize_rcu_sched(). |
701 | * Read-side critical sections can also be introduced by anything that | 727 | * Read-side critical sections can also be introduced by anything that |
702 | * disables preemption, including local_irq_disable() and friends. | 728 | * disables preemption, including local_irq_disable() and friends. |
729 | * | ||
730 | * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() | ||
731 | * must occur in the same context, for example, it is illegal to invoke | ||
732 | * rcu_read_unlock_sched() from process context if the matching | ||
733 | * rcu_read_lock_sched() was invoked from an NMI handler. | ||
703 | */ | 734 | */ |
704 | static inline void rcu_read_lock_sched(void) | 735 | static inline void rcu_read_lock_sched(void) |
705 | { | 736 | { |
706 | preempt_disable(); | 737 | preempt_disable(); |
707 | __acquire(RCU_SCHED); | 738 | __acquire(RCU_SCHED); |
708 | rcu_read_acquire_sched(); | 739 | rcu_lock_acquire(&rcu_sched_lock_map); |
709 | } | 740 | } |
710 | 741 | ||
711 | /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ | 742 | /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ |
@@ -722,7 +753,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) | |||
722 | */ | 753 | */ |
723 | static inline void rcu_read_unlock_sched(void) | 754 | static inline void rcu_read_unlock_sched(void) |
724 | { | 755 | { |
725 | rcu_read_release_sched(); | 756 | rcu_lock_release(&rcu_sched_lock_map); |
726 | __release(RCU_SCHED); | 757 | __release(RCU_SCHED); |
727 | preempt_enable(); | 758 | preempt_enable(); |
728 | } | 759 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc5..4a7e4d333a27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2070,6 +2070,14 @@ extern int sched_setscheduler(struct task_struct *, int, | |||
2070 | extern int sched_setscheduler_nocheck(struct task_struct *, int, | 2070 | extern int sched_setscheduler_nocheck(struct task_struct *, int, |
2071 | const struct sched_param *); | 2071 | const struct sched_param *); |
2072 | extern struct task_struct *idle_task(int cpu); | 2072 | extern struct task_struct *idle_task(int cpu); |
2073 | /** | ||
2074 | * is_idle_task - is the specified task an idle task? | ||
2075 | * @tsk: the task in question. | ||
2076 | */ | ||
2077 | static inline bool is_idle_task(struct task_struct *p) | ||
2078 | { | ||
2079 | return p->pid == 0; | ||
2080 | } | ||
2073 | extern struct task_struct *curr_task(int cpu); | 2081 | extern struct task_struct *curr_task(int cpu); |
2074 | extern void set_curr_task(int cpu, struct task_struct *p); | 2082 | extern void set_curr_task(int cpu, struct task_struct *p); |
2075 | 2083 | ||
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 58971e891f48..e1b005918bbb 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define _LINUX_SRCU_H | 28 | #define _LINUX_SRCU_H |
29 | 29 | ||
30 | #include <linux/mutex.h> | 30 | #include <linux/mutex.h> |
31 | #include <linux/rcupdate.h> | ||
31 | 32 | ||
32 | struct srcu_struct_array { | 33 | struct srcu_struct_array { |
33 | int c[2]; | 34 | int c[2]; |
@@ -60,18 +61,10 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, | |||
60 | __init_srcu_struct((sp), #sp, &__srcu_key); \ | 61 | __init_srcu_struct((sp), #sp, &__srcu_key); \ |
61 | }) | 62 | }) |
62 | 63 | ||
63 | # define srcu_read_acquire(sp) \ | ||
64 | lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_) | ||
65 | # define srcu_read_release(sp) \ | ||
66 | lock_release(&(sp)->dep_map, 1, _THIS_IP_) | ||
67 | |||
68 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 64 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
69 | 65 | ||
70 | int init_srcu_struct(struct srcu_struct *sp); | 66 | int init_srcu_struct(struct srcu_struct *sp); |
71 | 67 | ||
72 | # define srcu_read_acquire(sp) do { } while (0) | ||
73 | # define srcu_read_release(sp) do { } while (0) | ||
74 | |||
75 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 68 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
76 | 69 | ||
77 | void cleanup_srcu_struct(struct srcu_struct *sp); | 70 | void cleanup_srcu_struct(struct srcu_struct *sp); |
@@ -90,12 +83,32 @@ long srcu_batches_completed(struct srcu_struct *sp); | |||
90 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, | 83 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, |
91 | * this assumes we are in an SRCU read-side critical section unless it can | 84 | * this assumes we are in an SRCU read-side critical section unless it can |
92 | * prove otherwise. | 85 | * prove otherwise. |
86 | * | ||
87 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot | ||
88 | * and while lockdep is disabled. | ||
89 | * | ||
90 | * Note that if the CPU is in the idle loop from an RCU point of view | ||
91 | * (ie: that we are in the section between rcu_idle_enter() and | ||
92 | * rcu_idle_exit()) then srcu_read_lock_held() returns false even if | ||
93 | * the CPU did an srcu_read_lock(). The reason for this is that RCU | ||
94 | * ignores CPUs that are in such a section, considering these as in | ||
95 | * extended quiescent state, so such a CPU is effectively never in an | ||
96 | * RCU read-side critical section regardless of what RCU primitives it | ||
97 | * invokes. This state of affairs is required --- we need to keep an | ||
98 | * RCU-free window in idle where the CPU may possibly enter into low | ||
99 | * power mode. This way we can notice an extended quiescent state to | ||
100 | * other CPUs that started a grace period. Otherwise we would delay any | ||
101 | * grace period as long as we run in the idle task. | ||
93 | */ | 102 | */ |
94 | static inline int srcu_read_lock_held(struct srcu_struct *sp) | 103 | static inline int srcu_read_lock_held(struct srcu_struct *sp) |
95 | { | 104 | { |
96 | if (debug_locks) | 105 | if (rcu_is_cpu_idle()) |
97 | return lock_is_held(&sp->dep_map); | 106 | return 0; |
98 | return 1; | 107 | |
108 | if (!debug_lockdep_rcu_enabled()) | ||
109 | return 1; | ||
110 | |||
111 | return lock_is_held(&sp->dep_map); | ||
99 | } | 112 | } |
100 | 113 | ||
101 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 114 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
@@ -145,12 +158,17 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) | |||
145 | * one way to indirectly wait on an SRCU grace period is to acquire | 158 | * one way to indirectly wait on an SRCU grace period is to acquire |
146 | * a mutex that is held elsewhere while calling synchronize_srcu() or | 159 | * a mutex that is held elsewhere while calling synchronize_srcu() or |
147 | * synchronize_srcu_expedited(). | 160 | * synchronize_srcu_expedited(). |
161 | * | ||
162 | * Note that srcu_read_lock() and the matching srcu_read_unlock() must | ||
163 | * occur in the same context, for example, it is illegal to invoke | ||
164 | * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() | ||
165 | * was invoked in process context. | ||
148 | */ | 166 | */ |
149 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) | 167 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) |
150 | { | 168 | { |
151 | int retval = __srcu_read_lock(sp); | 169 | int retval = __srcu_read_lock(sp); |
152 | 170 | ||
153 | srcu_read_acquire(sp); | 171 | rcu_lock_acquire(&(sp)->dep_map); |
154 | return retval; | 172 | return retval; |
155 | } | 173 | } |
156 | 174 | ||
@@ -164,8 +182,51 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) | |||
164 | static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) | 182 | static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) |
165 | __releases(sp) | 183 | __releases(sp) |
166 | { | 184 | { |
167 | srcu_read_release(sp); | 185 | rcu_lock_release(&(sp)->dep_map); |
186 | __srcu_read_unlock(sp, idx); | ||
187 | } | ||
188 | |||
189 | /** | ||
190 | * srcu_read_lock_raw - register a new reader for an SRCU-protected structure. | ||
191 | * @sp: srcu_struct in which to register the new reader. | ||
192 | * | ||
193 | * Enter an SRCU read-side critical section. Similar to srcu_read_lock(), | ||
194 | * but avoids the RCU-lockdep checking. This means that it is legal to | ||
195 | * use srcu_read_lock_raw() in one context, for example, in an exception | ||
196 | * handler, and then have the matching srcu_read_unlock_raw() in another | ||
197 | * context, for example in the task that took the exception. | ||
198 | * | ||
199 | * However, the entire SRCU read-side critical section must reside within a | ||
200 | * single task. For example, beware of using srcu_read_lock_raw() in | ||
201 | * a device interrupt handler and srcu_read_unlock() in the interrupted | ||
202 | * task: This will not work if interrupts are threaded. | ||
203 | */ | ||
204 | static inline int srcu_read_lock_raw(struct srcu_struct *sp) | ||
205 | { | ||
206 | unsigned long flags; | ||
207 | int ret; | ||
208 | |||
209 | local_irq_save(flags); | ||
210 | ret = __srcu_read_lock(sp); | ||
211 | local_irq_restore(flags); | ||
212 | return ret; | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure. | ||
217 | * @sp: srcu_struct in which to unregister the old reader. | ||
218 | * @idx: return value from corresponding srcu_read_lock_raw(). | ||
219 | * | ||
220 | * Exit an SRCU read-side critical section without lockdep-RCU checking. | ||
221 | * See srcu_read_lock_raw() for more details. | ||
222 | */ | ||
223 | static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx) | ||
224 | { | ||
225 | unsigned long flags; | ||
226 | |||
227 | local_irq_save(flags); | ||
168 | __srcu_read_unlock(sp, idx); | 228 | __srcu_read_unlock(sp, idx); |
229 | local_irq_restore(flags); | ||
169 | } | 230 | } |
170 | 231 | ||
171 | #endif | 232 | #endif |
diff --git a/include/linux/tick.h b/include/linux/tick.h index b232ccc0ee29..ab8be90b5cc9 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #define _LINUX_TICK_H | 7 | #define _LINUX_TICK_H |
8 | 8 | ||
9 | #include <linux/clockchips.h> | 9 | #include <linux/clockchips.h> |
10 | #include <linux/irqflags.h> | ||
10 | 11 | ||
11 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 12 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
12 | 13 | ||
@@ -121,14 +122,16 @@ static inline int tick_oneshot_mode_active(void) { return 0; } | |||
121 | #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ | 122 | #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ |
122 | 123 | ||
123 | # ifdef CONFIG_NO_HZ | 124 | # ifdef CONFIG_NO_HZ |
124 | extern void tick_nohz_stop_sched_tick(int inidle); | 125 | extern void tick_nohz_idle_enter(void); |
125 | extern void tick_nohz_restart_sched_tick(void); | 126 | extern void tick_nohz_idle_exit(void); |
127 | extern void tick_nohz_irq_exit(void); | ||
126 | extern ktime_t tick_nohz_get_sleep_length(void); | 128 | extern ktime_t tick_nohz_get_sleep_length(void); |
127 | extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); | 129 | extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); |
128 | extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); | 130 | extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); |
129 | # else | 131 | # else |
130 | static inline void tick_nohz_stop_sched_tick(int inidle) { } | 132 | static inline void tick_nohz_idle_enter(void) { } |
131 | static inline void tick_nohz_restart_sched_tick(void) { } | 133 | static inline void tick_nohz_idle_exit(void) { } |
134 | |||
132 | static inline ktime_t tick_nohz_get_sleep_length(void) | 135 | static inline ktime_t tick_nohz_get_sleep_length(void) |
133 | { | 136 | { |
134 | ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; | 137 | ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; |
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 669fbd62ec25..d2d88bed891b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h | |||
@@ -241,24 +241,73 @@ TRACE_EVENT(rcu_fqs, | |||
241 | 241 | ||
242 | /* | 242 | /* |
243 | * Tracepoint for dyntick-idle entry/exit events. These take a string | 243 | * Tracepoint for dyntick-idle entry/exit events. These take a string |
244 | * as argument: "Start" for entering dyntick-idle mode and "End" for | 244 | * as argument: "Start" for entering dyntick-idle mode, "End" for |
245 | * leaving it. | 245 | * leaving it, "--=" for events moving towards idle, and "++=" for events |
246 | * moving away from idle. "Error on entry: not idle task" and "Error on | ||
247 | * exit: not idle task" indicate that a non-idle task is erroneously | ||
248 | * toying with the idle loop. | ||
249 | * | ||
250 | * These events also take a pair of numbers, which indicate the nesting | ||
251 | * depth before and after the event of interest. Note that task-related | ||
252 | * events use the upper bits of each number, while interrupt-related | ||
253 | * events use the lower bits. | ||
246 | */ | 254 | */ |
247 | TRACE_EVENT(rcu_dyntick, | 255 | TRACE_EVENT(rcu_dyntick, |
248 | 256 | ||
249 | TP_PROTO(char *polarity), | 257 | TP_PROTO(char *polarity, long long oldnesting, long long newnesting), |
250 | 258 | ||
251 | TP_ARGS(polarity), | 259 | TP_ARGS(polarity, oldnesting, newnesting), |
252 | 260 | ||
253 | TP_STRUCT__entry( | 261 | TP_STRUCT__entry( |
254 | __field(char *, polarity) | 262 | __field(char *, polarity) |
263 | __field(long long, oldnesting) | ||
264 | __field(long long, newnesting) | ||
255 | ), | 265 | ), |
256 | 266 | ||
257 | TP_fast_assign( | 267 | TP_fast_assign( |
258 | __entry->polarity = polarity; | 268 | __entry->polarity = polarity; |
269 | __entry->oldnesting = oldnesting; | ||
270 | __entry->newnesting = newnesting; | ||
271 | ), | ||
272 | |||
273 | TP_printk("%s %llx %llx", __entry->polarity, | ||
274 | __entry->oldnesting, __entry->newnesting) | ||
275 | ); | ||
276 | |||
277 | /* | ||
278 | * Tracepoint for RCU preparation for idle, the goal being to get RCU | ||
279 | * processing done so that the current CPU can shut off its scheduling | ||
280 | * clock and enter dyntick-idle mode. One way to accomplish this is | ||
281 | * to drain all RCU callbacks from this CPU, and the other is to have | ||
282 | * done everything RCU requires for the current grace period. In this | ||
283 | * latter case, the CPU will be awakened at the end of the current grace | ||
284 | * period in order to process the remainder of its callbacks. | ||
285 | * | ||
286 | * These tracepoints take a string as argument: | ||
287 | * | ||
288 | * "No callbacks": Nothing to do, no callbacks on this CPU. | ||
289 | * "In holdoff": Nothing to do, holding off after unsuccessful attempt. | ||
290 | * "Begin holdoff": Attempt failed, don't retry until next jiffy. | ||
291 | * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. | ||
292 | * "More callbacks": Still more callbacks, try again to clear them out. | ||
293 | * "Callbacks drained": All callbacks processed, off to dyntick idle! | ||
294 | * "Timer": Timer fired to cause CPU to continue processing callbacks. | ||
295 | */ | ||
296 | TRACE_EVENT(rcu_prep_idle, | ||
297 | |||
298 | TP_PROTO(char *reason), | ||
299 | |||
300 | TP_ARGS(reason), | ||
301 | |||
302 | TP_STRUCT__entry( | ||
303 | __field(char *, reason) | ||
304 | ), | ||
305 | |||
306 | TP_fast_assign( | ||
307 | __entry->reason = reason; | ||
259 | ), | 308 | ), |
260 | 309 | ||
261 | TP_printk("%s", __entry->polarity) | 310 | TP_printk("%s", __entry->reason) |
262 | ); | 311 | ); |
263 | 312 | ||
264 | /* | 313 | /* |
@@ -412,27 +461,71 @@ TRACE_EVENT(rcu_invoke_kfree_callback, | |||
412 | 461 | ||
413 | /* | 462 | /* |
414 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been | 463 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been |
415 | * invoked. The first argument is the name of the RCU flavor and | 464 | * invoked. The first argument is the name of the RCU flavor, |
416 | * the second argument is number of callbacks actually invoked. | 465 | * the second argument is number of callbacks actually invoked, |
466 | * the third argument (cb) is whether or not any of the callbacks that | ||
467 | * were ready to invoke at the beginning of this batch are still | ||
468 | * queued, the fourth argument (nr) is the return value of need_resched(), | ||
469 | * the fifth argument (iit) is 1 if the current task is the idle task, | ||
470 | * and the sixth argument (risk) is the return value from | ||
471 | * rcu_is_callbacks_kthread(). | ||
417 | */ | 472 | */ |
418 | TRACE_EVENT(rcu_batch_end, | 473 | TRACE_EVENT(rcu_batch_end, |
419 | 474 | ||
420 | TP_PROTO(char *rcuname, int callbacks_invoked), | 475 | TP_PROTO(char *rcuname, int callbacks_invoked, |
476 | bool cb, bool nr, bool iit, bool risk), | ||
421 | 477 | ||
422 | TP_ARGS(rcuname, callbacks_invoked), | 478 | TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), |
423 | 479 | ||
424 | TP_STRUCT__entry( | 480 | TP_STRUCT__entry( |
425 | __field(char *, rcuname) | 481 | __field(char *, rcuname) |
426 | __field(int, callbacks_invoked) | 482 | __field(int, callbacks_invoked) |
483 | __field(bool, cb) | ||
484 | __field(bool, nr) | ||
485 | __field(bool, iit) | ||
486 | __field(bool, risk) | ||
427 | ), | 487 | ), |
428 | 488 | ||
429 | TP_fast_assign( | 489 | TP_fast_assign( |
430 | __entry->rcuname = rcuname; | 490 | __entry->rcuname = rcuname; |
431 | __entry->callbacks_invoked = callbacks_invoked; | 491 | __entry->callbacks_invoked = callbacks_invoked; |
492 | __entry->cb = cb; | ||
493 | __entry->nr = nr; | ||
494 | __entry->iit = iit; | ||
495 | __entry->risk = risk; | ||
496 | ), | ||
497 | |||
498 | TP_printk("%s CBs-invoked=%d idle=%c%c%c%c", | ||
499 | __entry->rcuname, __entry->callbacks_invoked, | ||
500 | __entry->cb ? 'C' : '.', | ||
501 | __entry->nr ? 'S' : '.', | ||
502 | __entry->iit ? 'I' : '.', | ||
503 | __entry->risk ? 'R' : '.') | ||
504 | ); | ||
505 | |||
506 | /* | ||
507 | * Tracepoint for rcutorture readers. The first argument is the name | ||
508 | * of the RCU flavor from rcutorture's viewpoint and the second argument | ||
509 | * is the callback address. | ||
510 | */ | ||
511 | TRACE_EVENT(rcu_torture_read, | ||
512 | |||
513 | TP_PROTO(char *rcutorturename, struct rcu_head *rhp), | ||
514 | |||
515 | TP_ARGS(rcutorturename, rhp), | ||
516 | |||
517 | TP_STRUCT__entry( | ||
518 | __field(char *, rcutorturename) | ||
519 | __field(struct rcu_head *, rhp) | ||
520 | ), | ||
521 | |||
522 | TP_fast_assign( | ||
523 | __entry->rcutorturename = rcutorturename; | ||
524 | __entry->rhp = rhp; | ||
432 | ), | 525 | ), |
433 | 526 | ||
434 | TP_printk("%s CBs-invoked=%d", | 527 | TP_printk("%s torture read %p", |
435 | __entry->rcuname, __entry->callbacks_invoked) | 528 | __entry->rcutorturename, __entry->rhp) |
436 | ); | 529 | ); |
437 | 530 | ||
438 | #else /* #ifdef CONFIG_RCU_TRACE */ | 531 | #else /* #ifdef CONFIG_RCU_TRACE */ |
@@ -443,13 +536,16 @@ TRACE_EVENT(rcu_batch_end, | |||
443 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) | 536 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) |
444 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) | 537 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) |
445 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) | 538 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) |
446 | #define trace_rcu_dyntick(polarity) do { } while (0) | 539 | #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) |
540 | #define trace_rcu_prep_idle(reason) do { } while (0) | ||
447 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) | 541 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) |
448 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) | 542 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) |
449 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) | 543 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) |
450 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) | 544 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) |
451 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) | 545 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) |
452 | #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) | 546 | #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ |
547 | do { } while (0) | ||
548 | #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) | ||
453 | 549 | ||
454 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 550 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
455 | 551 | ||
diff --git a/init/Kconfig b/init/Kconfig index 43298f9810fb..82b6a4c675b2 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -469,14 +469,14 @@ config RCU_FANOUT_EXACT | |||
469 | 469 | ||
470 | config RCU_FAST_NO_HZ | 470 | config RCU_FAST_NO_HZ |
471 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | 471 | bool "Accelerate last non-dyntick-idle CPU's grace periods" |
472 | depends on TREE_RCU && NO_HZ && SMP | 472 | depends on NO_HZ && SMP |
473 | default n | 473 | default n |
474 | help | 474 | help |
475 | This option causes RCU to attempt to accelerate grace periods | 475 | This option causes RCU to attempt to accelerate grace periods |
476 | in order to allow the final CPU to enter dynticks-idle state | 476 | in order to allow CPUs to enter dynticks-idle state more |
477 | more quickly. On the other hand, this option increases the | 477 | quickly. On the other hand, this option increases the overhead |
478 | overhead of the dynticks-idle checking, particularly on systems | 478 | of the dynticks-idle checking, particularly on systems with |
479 | with large numbers of CPUs. | 479 | large numbers of CPUs. |
480 | 480 | ||
481 | Say Y if energy efficiency is critically important, particularly | 481 | Say Y if energy efficiency is critically important, particularly |
482 | if you have relatively few CPUs. | 482 | if you have relatively few CPUs. |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 563f13609470..9d448ddb2247 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -380,6 +380,7 @@ out: | |||
380 | cpu_maps_update_done(); | 380 | cpu_maps_update_done(); |
381 | return err; | 381 | return err; |
382 | } | 382 | } |
383 | EXPORT_SYMBOL_GPL(cpu_up); | ||
383 | 384 | ||
384 | #ifdef CONFIG_PM_SLEEP_SMP | 385 | #ifdef CONFIG_PM_SLEEP_SMP |
385 | static cpumask_var_t frozen_cpus; | 386 | static cpumask_var_t frozen_cpus; |
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 5532dd37aa86..7d6fb40d2188 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c | |||
@@ -636,7 +636,7 @@ char kdb_task_state_char (const struct task_struct *p) | |||
636 | (p->exit_state & EXIT_ZOMBIE) ? 'Z' : | 636 | (p->exit_state & EXIT_ZOMBIE) ? 'Z' : |
637 | (p->exit_state & EXIT_DEAD) ? 'E' : | 637 | (p->exit_state & EXIT_DEAD) ? 'E' : |
638 | (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; | 638 | (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; |
639 | if (p->pid == 0) { | 639 | if (is_idle_task(p)) { |
640 | /* Idle task. Is it really idle, apart from the kdb | 640 | /* Idle task. Is it really idle, apart from the kdb |
641 | * interrupt? */ | 641 | * interrupt? */ |
642 | if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { | 642 | if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 58690af323e4..fc0e7ff11dda 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -5366,7 +5366,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
5366 | regs = get_irq_regs(); | 5366 | regs = get_irq_regs(); |
5367 | 5367 | ||
5368 | if (regs && !perf_exclude_event(event, regs)) { | 5368 | if (regs && !perf_exclude_event(event, regs)) { |
5369 | if (!(event->attr.exclude_idle && current->pid == 0)) | 5369 | if (!(event->attr.exclude_idle && is_idle_task(current))) |
5370 | if (perf_event_overflow(event, &data, regs)) | 5370 | if (perf_event_overflow(event, &data, regs)) |
5371 | ret = HRTIMER_NORESTART; | 5371 | ret = HRTIMER_NORESTART; |
5372 | } | 5372 | } |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e69d633d6aa6..8fb755132322 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -4181,6 +4181,28 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
4181 | printk("%s:%d %s!\n", file, line, s); | 4181 | printk("%s:%d %s!\n", file, line, s); |
4182 | printk("\nother info that might help us debug this:\n\n"); | 4182 | printk("\nother info that might help us debug this:\n\n"); |
4183 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | 4183 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); |
4184 | |||
4185 | /* | ||
4186 | * If a CPU is in the RCU-free window in idle (ie: in the section | ||
4187 | * between rcu_idle_enter() and rcu_idle_exit(), then RCU | ||
4188 | * considers that CPU to be in an "extended quiescent state", | ||
4189 | * which means that RCU will be completely ignoring that CPU. | ||
4190 | * Therefore, rcu_read_lock() and friends have absolutely no | ||
4191 | * effect on a CPU running in that state. In other words, even if | ||
4192 | * such an RCU-idle CPU has called rcu_read_lock(), RCU might well | ||
4193 | * delete data structures out from under it. RCU really has no | ||
4194 | * choice here: we need to keep an RCU-free window in idle where | ||
4195 | * the CPU may possibly enter into low power mode. This way we can | ||
4196 | * notice an extended quiescent state to other CPUs that started a grace | ||
4197 | * period. Otherwise we would delay any grace period as long as we run | ||
4198 | * in the idle task. | ||
4199 | * | ||
4200 | * So complain bitterly if someone does call rcu_read_lock(), | ||
4201 | * rcu_read_lock_bh() and so on from extended quiescent states. | ||
4202 | */ | ||
4203 | if (rcu_is_cpu_idle()) | ||
4204 | printk("RCU used illegally from extended quiescent state!\n"); | ||
4205 | |||
4184 | lockdep_print_held_locks(curr); | 4206 | lockdep_print_held_locks(curr); |
4185 | printk("\nstack backtrace:\n"); | 4207 | printk("\nstack backtrace:\n"); |
4186 | dump_stack(); | 4208 | dump_stack(); |
diff --git a/kernel/rcu.h b/kernel/rcu.h index f600868d550d..aa88baab5f78 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h | |||
@@ -30,6 +30,13 @@ | |||
30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Process-level increment to ->dynticks_nesting field. This allows for | ||
34 | * architectures that use half-interrupts and half-exceptions from | ||
35 | * process context. | ||
36 | */ | ||
37 | #define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) | ||
38 | |||
39 | /* | ||
33 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | 40 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally |
34 | * by call_rcu() and rcu callback execution, and are therefore not part of the | 41 | * by call_rcu() and rcu callback execution, and are therefore not part of the |
35 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | 42 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c5b98e565aee..2bc4e135ff23 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void) | |||
93 | { | 93 | { |
94 | if (!debug_lockdep_rcu_enabled()) | 94 | if (!debug_lockdep_rcu_enabled()) |
95 | return 1; | 95 | return 1; |
96 | if (rcu_is_cpu_idle()) | ||
97 | return 0; | ||
96 | return in_softirq() || irqs_disabled(); | 98 | return in_softirq() || irqs_disabled(); |
97 | } | 99 | } |
98 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | 100 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); |
@@ -316,3 +318,13 @@ struct debug_obj_descr rcuhead_debug_descr = { | |||
316 | }; | 318 | }; |
317 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); | 319 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); |
318 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | 320 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
321 | |||
322 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) | ||
323 | void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) | ||
324 | { | ||
325 | trace_rcu_torture_read(rcutorturename, rhp); | ||
326 | } | ||
327 | EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); | ||
328 | #else | ||
329 | #define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) | ||
330 | #endif | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 636af6d9c6e5..977296dca0a4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -53,31 +53,137 @@ static void __call_rcu(struct rcu_head *head, | |||
53 | 53 | ||
54 | #include "rcutiny_plugin.h" | 54 | #include "rcutiny_plugin.h" |
55 | 55 | ||
56 | #ifdef CONFIG_NO_HZ | 56 | static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; |
57 | 57 | ||
58 | static long rcu_dynticks_nesting = 1; | 58 | /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ |
59 | static void rcu_idle_enter_common(long long oldval) | ||
60 | { | ||
61 | if (rcu_dynticks_nesting) { | ||
62 | RCU_TRACE(trace_rcu_dyntick("--=", | ||
63 | oldval, rcu_dynticks_nesting)); | ||
64 | return; | ||
65 | } | ||
66 | RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); | ||
67 | if (!is_idle_task(current)) { | ||
68 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
69 | |||
70 | RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", | ||
71 | oldval, rcu_dynticks_nesting)); | ||
72 | ftrace_dump(DUMP_ALL); | ||
73 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
74 | current->pid, current->comm, | ||
75 | idle->pid, idle->comm); /* must be idle task! */ | ||
76 | } | ||
77 | rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ | ||
78 | } | ||
59 | 79 | ||
60 | /* | 80 | /* |
61 | * Enter dynticks-idle mode, which is an extended quiescent state | 81 | * Enter idle, which is an extended quiescent state if we have fully |
62 | * if we have fully entered that mode (i.e., if the new value of | 82 | * entered that mode (i.e., if the new value of dynticks_nesting is zero). |
63 | * dynticks_nesting is zero). | ||
64 | */ | 83 | */ |
65 | void rcu_enter_nohz(void) | 84 | void rcu_idle_enter(void) |
66 | { | 85 | { |
67 | if (--rcu_dynticks_nesting == 0) | 86 | unsigned long flags; |
68 | rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ | 87 | long long oldval; |
88 | |||
89 | local_irq_save(flags); | ||
90 | oldval = rcu_dynticks_nesting; | ||
91 | rcu_dynticks_nesting = 0; | ||
92 | rcu_idle_enter_common(oldval); | ||
93 | local_irq_restore(flags); | ||
69 | } | 94 | } |
70 | 95 | ||
71 | /* | 96 | /* |
72 | * Exit dynticks-idle mode, so that we are no longer in an extended | 97 | * Exit an interrupt handler towards idle. |
73 | * quiescent state. | ||
74 | */ | 98 | */ |
75 | void rcu_exit_nohz(void) | 99 | void rcu_irq_exit(void) |
100 | { | ||
101 | unsigned long flags; | ||
102 | long long oldval; | ||
103 | |||
104 | local_irq_save(flags); | ||
105 | oldval = rcu_dynticks_nesting; | ||
106 | rcu_dynticks_nesting--; | ||
107 | WARN_ON_ONCE(rcu_dynticks_nesting < 0); | ||
108 | rcu_idle_enter_common(oldval); | ||
109 | local_irq_restore(flags); | ||
110 | } | ||
111 | |||
112 | /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ | ||
113 | static void rcu_idle_exit_common(long long oldval) | ||
76 | { | 114 | { |
115 | if (oldval) { | ||
116 | RCU_TRACE(trace_rcu_dyntick("++=", | ||
117 | oldval, rcu_dynticks_nesting)); | ||
118 | return; | ||
119 | } | ||
120 | RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); | ||
121 | if (!is_idle_task(current)) { | ||
122 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
123 | |||
124 | RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", | ||
125 | oldval, rcu_dynticks_nesting)); | ||
126 | ftrace_dump(DUMP_ALL); | ||
127 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
128 | current->pid, current->comm, | ||
129 | idle->pid, idle->comm); /* must be idle task! */ | ||
130 | } | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Exit idle, so that we are no longer in an extended quiescent state. | ||
135 | */ | ||
136 | void rcu_idle_exit(void) | ||
137 | { | ||
138 | unsigned long flags; | ||
139 | long long oldval; | ||
140 | |||
141 | local_irq_save(flags); | ||
142 | oldval = rcu_dynticks_nesting; | ||
143 | WARN_ON_ONCE(oldval != 0); | ||
144 | rcu_dynticks_nesting = DYNTICK_TASK_NESTING; | ||
145 | rcu_idle_exit_common(oldval); | ||
146 | local_irq_restore(flags); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Enter an interrupt handler, moving away from idle. | ||
151 | */ | ||
152 | void rcu_irq_enter(void) | ||
153 | { | ||
154 | unsigned long flags; | ||
155 | long long oldval; | ||
156 | |||
157 | local_irq_save(flags); | ||
158 | oldval = rcu_dynticks_nesting; | ||
77 | rcu_dynticks_nesting++; | 159 | rcu_dynticks_nesting++; |
160 | WARN_ON_ONCE(rcu_dynticks_nesting == 0); | ||
161 | rcu_idle_exit_common(oldval); | ||
162 | local_irq_restore(flags); | ||
163 | } | ||
164 | |||
165 | #ifdef CONFIG_PROVE_RCU | ||
166 | |||
167 | /* | ||
168 | * Test whether RCU thinks that the current CPU is idle. | ||
169 | */ | ||
170 | int rcu_is_cpu_idle(void) | ||
171 | { | ||
172 | return !rcu_dynticks_nesting; | ||
78 | } | 173 | } |
174 | EXPORT_SYMBOL(rcu_is_cpu_idle); | ||
175 | |||
176 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
79 | 177 | ||
80 | #endif /* #ifdef CONFIG_NO_HZ */ | 178 | /* |
179 | * Test whether the current CPU was interrupted from idle. Nested | ||
180 | * interrupts don't count, we must be running at the first interrupt | ||
181 | * level. | ||
182 | */ | ||
183 | int rcu_is_cpu_rrupt_from_idle(void) | ||
184 | { | ||
185 | return rcu_dynticks_nesting <= 0; | ||
186 | } | ||
81 | 187 | ||
82 | /* | 188 | /* |
83 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). | 189 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). |
@@ -126,14 +232,13 @@ void rcu_bh_qs(int cpu) | |||
126 | 232 | ||
127 | /* | 233 | /* |
128 | * Check to see if the scheduling-clock interrupt came from an extended | 234 | * Check to see if the scheduling-clock interrupt came from an extended |
129 | * quiescent state, and, if so, tell RCU about it. | 235 | * quiescent state, and, if so, tell RCU about it. This function must |
236 | * be called from hardirq context. It is normally called from the | ||
237 | * scheduling-clock interrupt. | ||
130 | */ | 238 | */ |
131 | void rcu_check_callbacks(int cpu, int user) | 239 | void rcu_check_callbacks(int cpu, int user) |
132 | { | 240 | { |
133 | if (user || | 241 | if (user || rcu_is_cpu_rrupt_from_idle()) |
134 | (idle_cpu(cpu) && | ||
135 | !in_softirq() && | ||
136 | hardirq_count() <= (1 << HARDIRQ_SHIFT))) | ||
137 | rcu_sched_qs(cpu); | 242 | rcu_sched_qs(cpu); |
138 | else if (!in_softirq()) | 243 | else if (!in_softirq()) |
139 | rcu_bh_qs(cpu); | 244 | rcu_bh_qs(cpu); |
@@ -154,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
154 | /* If no RCU callbacks ready to invoke, just return. */ | 259 | /* If no RCU callbacks ready to invoke, just return. */ |
155 | if (&rcp->rcucblist == rcp->donetail) { | 260 | if (&rcp->rcucblist == rcp->donetail) { |
156 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | 261 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); |
157 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | 262 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, |
263 | ACCESS_ONCE(rcp->rcucblist), | ||
264 | need_resched(), | ||
265 | is_idle_task(current), | ||
266 | rcu_is_callbacks_kthread())); | ||
158 | return; | 267 | return; |
159 | } | 268 | } |
160 | 269 | ||
@@ -183,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
183 | RCU_TRACE(cb_count++); | 292 | RCU_TRACE(cb_count++); |
184 | } | 293 | } |
185 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | 294 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); |
186 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | 295 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), |
296 | is_idle_task(current), | ||
297 | rcu_is_callbacks_kthread())); | ||
187 | } | 298 | } |
188 | 299 | ||
189 | static void rcu_process_callbacks(struct softirq_action *unused) | 300 | static void rcu_process_callbacks(struct softirq_action *unused) |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 2b0484a5dc28..9cb1ae4aabdd 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -312,8 +312,8 @@ static int rcu_boost(void) | |||
312 | rt_mutex_lock(&mtx); | 312 | rt_mutex_lock(&mtx); |
313 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 313 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
314 | 314 | ||
315 | return rcu_preempt_ctrlblk.boost_tasks != NULL || | 315 | return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL || |
316 | rcu_preempt_ctrlblk.exp_tasks != NULL; | 316 | ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL; |
317 | } | 317 | } |
318 | 318 | ||
319 | /* | 319 | /* |
@@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void) | |||
885 | wake_up(&rcu_kthread_wq); | 885 | wake_up(&rcu_kthread_wq); |
886 | } | 886 | } |
887 | 887 | ||
888 | #ifdef CONFIG_RCU_TRACE | ||
889 | |||
890 | /* | ||
891 | * Is the current CPU running the RCU-callbacks kthread? | ||
892 | * Caller must have preemption disabled. | ||
893 | */ | ||
894 | static bool rcu_is_callbacks_kthread(void) | ||
895 | { | ||
896 | return rcu_kthread_task == current; | ||
897 | } | ||
898 | |||
899 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
900 | |||
888 | /* | 901 | /* |
889 | * This kthread invokes RCU callbacks whose grace periods have | 902 | * This kthread invokes RCU callbacks whose grace periods have |
890 | * elapsed. It is awakened as needed, and takes the place of the | 903 | * elapsed. It is awakened as needed, and takes the place of the |
@@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void) | |||
938 | raise_softirq(RCU_SOFTIRQ); | 951 | raise_softirq(RCU_SOFTIRQ); |
939 | } | 952 | } |
940 | 953 | ||
954 | #ifdef CONFIG_RCU_TRACE | ||
955 | |||
956 | /* | ||
957 | * There is no callback kthread, so this thread is never it. | ||
958 | */ | ||
959 | static bool rcu_is_callbacks_kthread(void) | ||
960 | { | ||
961 | return false; | ||
962 | } | ||
963 | |||
964 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
965 | |||
941 | void rcu_init(void) | 966 | void rcu_init(void) |
942 | { | 967 | { |
943 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 968 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 764825c2685c..88f17b8a3b1d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -61,9 +61,11 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ | |||
61 | static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ | 61 | static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ |
62 | static int stutter = 5; /* Start/stop testing interval (in sec) */ | 62 | static int stutter = 5; /* Start/stop testing interval (in sec) */ |
63 | static int irqreader = 1; /* RCU readers from irq (timers). */ | 63 | static int irqreader = 1; /* RCU readers from irq (timers). */ |
64 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ | 64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ |
65 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ | 65 | static int fqs_holdoff; /* Hold time within burst (us). */ |
66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
67 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ | ||
68 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ | ||
67 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ | 69 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ |
68 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ | 70 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ |
69 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ | 71 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ |
@@ -91,6 +93,10 @@ module_param(fqs_holdoff, int, 0444); | |||
91 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 93 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
92 | module_param(fqs_stutter, int, 0444); | 94 | module_param(fqs_stutter, int, 0444); |
93 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 95 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
96 | module_param(onoff_interval, int, 0444); | ||
97 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); | ||
98 | module_param(shutdown_secs, int, 0444); | ||
99 | MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable."); | ||
94 | module_param(test_boost, int, 0444); | 100 | module_param(test_boost, int, 0444); |
95 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); | 101 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); |
96 | module_param(test_boost_interval, int, 0444); | 102 | module_param(test_boost_interval, int, 0444); |
@@ -119,6 +125,10 @@ static struct task_struct *shuffler_task; | |||
119 | static struct task_struct *stutter_task; | 125 | static struct task_struct *stutter_task; |
120 | static struct task_struct *fqs_task; | 126 | static struct task_struct *fqs_task; |
121 | static struct task_struct *boost_tasks[NR_CPUS]; | 127 | static struct task_struct *boost_tasks[NR_CPUS]; |
128 | static struct task_struct *shutdown_task; | ||
129 | #ifdef CONFIG_HOTPLUG_CPU | ||
130 | static struct task_struct *onoff_task; | ||
131 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
122 | 132 | ||
123 | #define RCU_TORTURE_PIPE_LEN 10 | 133 | #define RCU_TORTURE_PIPE_LEN 10 |
124 | 134 | ||
@@ -149,6 +159,10 @@ static long n_rcu_torture_boost_rterror; | |||
149 | static long n_rcu_torture_boost_failure; | 159 | static long n_rcu_torture_boost_failure; |
150 | static long n_rcu_torture_boosts; | 160 | static long n_rcu_torture_boosts; |
151 | static long n_rcu_torture_timers; | 161 | static long n_rcu_torture_timers; |
162 | static long n_offline_attempts; | ||
163 | static long n_offline_successes; | ||
164 | static long n_online_attempts; | ||
165 | static long n_online_successes; | ||
152 | static struct list_head rcu_torture_removed; | 166 | static struct list_head rcu_torture_removed; |
153 | static cpumask_var_t shuffle_tmp_mask; | 167 | static cpumask_var_t shuffle_tmp_mask; |
154 | 168 | ||
@@ -160,6 +174,8 @@ static int stutter_pause_test; | |||
160 | #define RCUTORTURE_RUNNABLE_INIT 0 | 174 | #define RCUTORTURE_RUNNABLE_INIT 0 |
161 | #endif | 175 | #endif |
162 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | 176 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; |
177 | module_param(rcutorture_runnable, int, 0444); | ||
178 | MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); | ||
163 | 179 | ||
164 | #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) | 180 | #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) |
165 | #define rcu_can_boost() 1 | 181 | #define rcu_can_boost() 1 |
@@ -167,6 +183,7 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | |||
167 | #define rcu_can_boost() 0 | 183 | #define rcu_can_boost() 0 |
168 | #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ | 184 | #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ |
169 | 185 | ||
186 | static unsigned long shutdown_time; /* jiffies to system shutdown. */ | ||
170 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | 187 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ |
171 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 188 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
172 | /* and boost task create/destroy. */ | 189 | /* and boost task create/destroy. */ |
@@ -182,6 +199,9 @@ static int fullstop = FULLSTOP_RMMOD; | |||
182 | */ | 199 | */ |
183 | static DEFINE_MUTEX(fullstop_mutex); | 200 | static DEFINE_MUTEX(fullstop_mutex); |
184 | 201 | ||
202 | /* Forward reference. */ | ||
203 | static void rcu_torture_cleanup(void); | ||
204 | |||
185 | /* | 205 | /* |
186 | * Detect and respond to a system shutdown. | 206 | * Detect and respond to a system shutdown. |
187 | */ | 207 | */ |
@@ -612,6 +632,30 @@ static struct rcu_torture_ops srcu_ops = { | |||
612 | .name = "srcu" | 632 | .name = "srcu" |
613 | }; | 633 | }; |
614 | 634 | ||
635 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) | ||
636 | { | ||
637 | return srcu_read_lock_raw(&srcu_ctl); | ||
638 | } | ||
639 | |||
640 | static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl) | ||
641 | { | ||
642 | srcu_read_unlock_raw(&srcu_ctl, idx); | ||
643 | } | ||
644 | |||
645 | static struct rcu_torture_ops srcu_raw_ops = { | ||
646 | .init = srcu_torture_init, | ||
647 | .cleanup = srcu_torture_cleanup, | ||
648 | .readlock = srcu_torture_read_lock_raw, | ||
649 | .read_delay = srcu_read_delay, | ||
650 | .readunlock = srcu_torture_read_unlock_raw, | ||
651 | .completed = srcu_torture_completed, | ||
652 | .deferred_free = rcu_sync_torture_deferred_free, | ||
653 | .sync = srcu_torture_synchronize, | ||
654 | .cb_barrier = NULL, | ||
655 | .stats = srcu_torture_stats, | ||
656 | .name = "srcu_raw" | ||
657 | }; | ||
658 | |||
615 | static void srcu_torture_synchronize_expedited(void) | 659 | static void srcu_torture_synchronize_expedited(void) |
616 | { | 660 | { |
617 | synchronize_srcu_expedited(&srcu_ctl); | 661 | synchronize_srcu_expedited(&srcu_ctl); |
@@ -913,6 +957,18 @@ rcu_torture_fakewriter(void *arg) | |||
913 | return 0; | 957 | return 0; |
914 | } | 958 | } |
915 | 959 | ||
960 | void rcutorture_trace_dump(void) | ||
961 | { | ||
962 | static atomic_t beenhere = ATOMIC_INIT(0); | ||
963 | |||
964 | if (atomic_read(&beenhere)) | ||
965 | return; | ||
966 | if (atomic_xchg(&beenhere, 1) != 0) | ||
967 | return; | ||
968 | do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL); | ||
969 | ftrace_dump(DUMP_ALL); | ||
970 | } | ||
971 | |||
916 | /* | 972 | /* |
917 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, | 973 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, |
918 | * incrementing the corresponding element of the pipeline array. The | 974 | * incrementing the corresponding element of the pipeline array. The |
@@ -934,6 +990,7 @@ static void rcu_torture_timer(unsigned long unused) | |||
934 | rcu_read_lock_bh_held() || | 990 | rcu_read_lock_bh_held() || |
935 | rcu_read_lock_sched_held() || | 991 | rcu_read_lock_sched_held() || |
936 | srcu_read_lock_held(&srcu_ctl)); | 992 | srcu_read_lock_held(&srcu_ctl)); |
993 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); | ||
937 | if (p == NULL) { | 994 | if (p == NULL) { |
938 | /* Leave because rcu_torture_writer is not yet underway */ | 995 | /* Leave because rcu_torture_writer is not yet underway */ |
939 | cur_ops->readunlock(idx); | 996 | cur_ops->readunlock(idx); |
@@ -951,6 +1008,8 @@ static void rcu_torture_timer(unsigned long unused) | |||
951 | /* Should not happen, but... */ | 1008 | /* Should not happen, but... */ |
952 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1009 | pipe_count = RCU_TORTURE_PIPE_LEN; |
953 | } | 1010 | } |
1011 | if (pipe_count > 1) | ||
1012 | rcutorture_trace_dump(); | ||
954 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1013 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
955 | completed = cur_ops->completed() - completed; | 1014 | completed = cur_ops->completed() - completed; |
956 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1015 | if (completed > RCU_TORTURE_PIPE_LEN) { |
@@ -994,6 +1053,7 @@ rcu_torture_reader(void *arg) | |||
994 | rcu_read_lock_bh_held() || | 1053 | rcu_read_lock_bh_held() || |
995 | rcu_read_lock_sched_held() || | 1054 | rcu_read_lock_sched_held() || |
996 | srcu_read_lock_held(&srcu_ctl)); | 1055 | srcu_read_lock_held(&srcu_ctl)); |
1056 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); | ||
997 | if (p == NULL) { | 1057 | if (p == NULL) { |
998 | /* Wait for rcu_torture_writer to get underway */ | 1058 | /* Wait for rcu_torture_writer to get underway */ |
999 | cur_ops->readunlock(idx); | 1059 | cur_ops->readunlock(idx); |
@@ -1009,6 +1069,8 @@ rcu_torture_reader(void *arg) | |||
1009 | /* Should not happen, but... */ | 1069 | /* Should not happen, but... */ |
1010 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1070 | pipe_count = RCU_TORTURE_PIPE_LEN; |
1011 | } | 1071 | } |
1072 | if (pipe_count > 1) | ||
1073 | rcutorture_trace_dump(); | ||
1012 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1074 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
1013 | completed = cur_ops->completed() - completed; | 1075 | completed = cur_ops->completed() - completed; |
1014 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1076 | if (completed > RCU_TORTURE_PIPE_LEN) { |
@@ -1056,7 +1118,8 @@ rcu_torture_printk(char *page) | |||
1056 | cnt += sprintf(&page[cnt], | 1118 | cnt += sprintf(&page[cnt], |
1057 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " | 1119 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " |
1058 | "rtmbe: %d rtbke: %ld rtbre: %ld " | 1120 | "rtmbe: %d rtbke: %ld rtbre: %ld " |
1059 | "rtbf: %ld rtb: %ld nt: %ld", | 1121 | "rtbf: %ld rtb: %ld nt: %ld " |
1122 | "onoff: %ld/%ld:%ld/%ld", | ||
1060 | rcu_torture_current, | 1123 | rcu_torture_current, |
1061 | rcu_torture_current_version, | 1124 | rcu_torture_current_version, |
1062 | list_empty(&rcu_torture_freelist), | 1125 | list_empty(&rcu_torture_freelist), |
@@ -1068,7 +1131,11 @@ rcu_torture_printk(char *page) | |||
1068 | n_rcu_torture_boost_rterror, | 1131 | n_rcu_torture_boost_rterror, |
1069 | n_rcu_torture_boost_failure, | 1132 | n_rcu_torture_boost_failure, |
1070 | n_rcu_torture_boosts, | 1133 | n_rcu_torture_boosts, |
1071 | n_rcu_torture_timers); | 1134 | n_rcu_torture_timers, |
1135 | n_online_successes, | ||
1136 | n_online_attempts, | ||
1137 | n_offline_successes, | ||
1138 | n_offline_attempts); | ||
1072 | if (atomic_read(&n_rcu_torture_mberror) != 0 || | 1139 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
1073 | n_rcu_torture_boost_ktrerror != 0 || | 1140 | n_rcu_torture_boost_ktrerror != 0 || |
1074 | n_rcu_torture_boost_rterror != 0 || | 1141 | n_rcu_torture_boost_rterror != 0 || |
@@ -1232,12 +1299,14 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) | |||
1232 | "shuffle_interval=%d stutter=%d irqreader=%d " | 1299 | "shuffle_interval=%d stutter=%d irqreader=%d " |
1233 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " | 1300 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " |
1234 | "test_boost=%d/%d test_boost_interval=%d " | 1301 | "test_boost=%d/%d test_boost_interval=%d " |
1235 | "test_boost_duration=%d\n", | 1302 | "test_boost_duration=%d shutdown_secs=%d " |
1303 | "onoff_interval=%d\n", | ||
1236 | torture_type, tag, nrealreaders, nfakewriters, | 1304 | torture_type, tag, nrealreaders, nfakewriters, |
1237 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, | 1305 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, |
1238 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, | 1306 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, |
1239 | test_boost, cur_ops->can_boost, | 1307 | test_boost, cur_ops->can_boost, |
1240 | test_boost_interval, test_boost_duration); | 1308 | test_boost_interval, test_boost_duration, shutdown_secs, |
1309 | onoff_interval); | ||
1241 | } | 1310 | } |
1242 | 1311 | ||
1243 | static struct notifier_block rcutorture_shutdown_nb = { | 1312 | static struct notifier_block rcutorture_shutdown_nb = { |
@@ -1287,6 +1356,131 @@ static int rcutorture_booster_init(int cpu) | |||
1287 | return 0; | 1356 | return 0; |
1288 | } | 1357 | } |
1289 | 1358 | ||
1359 | /* | ||
1360 | * Cause the rcutorture test to shutdown the system after the test has | ||
1361 | * run for the time specified by the shutdown_secs module parameter. | ||
1362 | */ | ||
1363 | static int | ||
1364 | rcu_torture_shutdown(void *arg) | ||
1365 | { | ||
1366 | long delta; | ||
1367 | unsigned long jiffies_snap; | ||
1368 | |||
1369 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); | ||
1370 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
1371 | while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && | ||
1372 | !kthread_should_stop()) { | ||
1373 | delta = shutdown_time - jiffies_snap; | ||
1374 | if (verbose) | ||
1375 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1376 | "rcu_torture_shutdown task: %lu " | ||
1377 | "jiffies remaining\n", | ||
1378 | torture_type, delta); | ||
1379 | schedule_timeout_interruptible(delta); | ||
1380 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
1381 | } | ||
1382 | if (kthread_should_stop()) { | ||
1383 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); | ||
1384 | return 0; | ||
1385 | } | ||
1386 | |||
1387 | /* OK, shut down the system. */ | ||
1388 | |||
1389 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); | ||
1390 | shutdown_task = NULL; /* Avoid self-kill deadlock. */ | ||
1391 | rcu_torture_cleanup(); /* Get the success/failure message. */ | ||
1392 | kernel_power_off(); /* Shut down the system. */ | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | #ifdef CONFIG_HOTPLUG_CPU | ||
1397 | |||
1398 | /* | ||
1399 | * Execute random CPU-hotplug operations at the interval specified | ||
1400 | * by the onoff_interval. | ||
1401 | */ | ||
1402 | static int | ||
1403 | rcu_torture_onoff(void *arg) | ||
1404 | { | ||
1405 | int cpu; | ||
1406 | int maxcpu = -1; | ||
1407 | DEFINE_RCU_RANDOM(rand); | ||
1408 | |||
1409 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); | ||
1410 | for_each_online_cpu(cpu) | ||
1411 | maxcpu = cpu; | ||
1412 | WARN_ON(maxcpu < 0); | ||
1413 | while (!kthread_should_stop()) { | ||
1414 | cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); | ||
1415 | if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { | ||
1416 | if (verbose) | ||
1417 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1418 | "rcu_torture_onoff task: offlining %d\n", | ||
1419 | torture_type, cpu); | ||
1420 | n_offline_attempts++; | ||
1421 | if (cpu_down(cpu) == 0) { | ||
1422 | if (verbose) | ||
1423 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1424 | "rcu_torture_onoff task: " | ||
1425 | "offlined %d\n", | ||
1426 | torture_type, cpu); | ||
1427 | n_offline_successes++; | ||
1428 | } | ||
1429 | } else if (cpu_is_hotpluggable(cpu)) { | ||
1430 | if (verbose) | ||
1431 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1432 | "rcu_torture_onoff task: onlining %d\n", | ||
1433 | torture_type, cpu); | ||
1434 | n_online_attempts++; | ||
1435 | if (cpu_up(cpu) == 0) { | ||
1436 | if (verbose) | ||
1437 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1438 | "rcu_torture_onoff task: " | ||
1439 | "onlined %d\n", | ||
1440 | torture_type, cpu); | ||
1441 | n_online_successes++; | ||
1442 | } | ||
1443 | } | ||
1444 | schedule_timeout_interruptible(onoff_interval * HZ); | ||
1445 | } | ||
1446 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); | ||
1447 | return 0; | ||
1448 | } | ||
1449 | |||
1450 | static int | ||
1451 | rcu_torture_onoff_init(void) | ||
1452 | { | ||
1453 | if (onoff_interval <= 0) | ||
1454 | return 0; | ||
1455 | onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); | ||
1456 | if (IS_ERR(onoff_task)) { | ||
1457 | onoff_task = NULL; | ||
1458 | return PTR_ERR(onoff_task); | ||
1459 | } | ||
1460 | return 0; | ||
1461 | } | ||
1462 | |||
1463 | static void rcu_torture_onoff_cleanup(void) | ||
1464 | { | ||
1465 | if (onoff_task == NULL) | ||
1466 | return; | ||
1467 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); | ||
1468 | kthread_stop(onoff_task); | ||
1469 | } | ||
1470 | |||
1471 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1472 | |||
1473 | static void | ||
1474 | rcu_torture_onoff_init(void) | ||
1475 | { | ||
1476 | } | ||
1477 | |||
1478 | static void rcu_torture_onoff_cleanup(void) | ||
1479 | { | ||
1480 | } | ||
1481 | |||
1482 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | ||
1483 | |||
1290 | static int rcutorture_cpu_notify(struct notifier_block *self, | 1484 | static int rcutorture_cpu_notify(struct notifier_block *self, |
1291 | unsigned long action, void *hcpu) | 1485 | unsigned long action, void *hcpu) |
1292 | { | 1486 | { |
@@ -1391,6 +1585,11 @@ rcu_torture_cleanup(void) | |||
1391 | for_each_possible_cpu(i) | 1585 | for_each_possible_cpu(i) |
1392 | rcutorture_booster_cleanup(i); | 1586 | rcutorture_booster_cleanup(i); |
1393 | } | 1587 | } |
1588 | if (shutdown_task != NULL) { | ||
1589 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); | ||
1590 | kthread_stop(shutdown_task); | ||
1591 | } | ||
1592 | rcu_torture_onoff_cleanup(); | ||
1394 | 1593 | ||
1395 | /* Wait for all RCU callbacks to fire. */ | 1594 | /* Wait for all RCU callbacks to fire. */ |
1396 | 1595 | ||
@@ -1416,7 +1615,7 @@ rcu_torture_init(void) | |||
1416 | static struct rcu_torture_ops *torture_ops[] = | 1615 | static struct rcu_torture_ops *torture_ops[] = |
1417 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1616 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1418 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | 1617 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1419 | &srcu_ops, &srcu_expedited_ops, | 1618 | &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, |
1420 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1619 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1421 | 1620 | ||
1422 | mutex_lock(&fullstop_mutex); | 1621 | mutex_lock(&fullstop_mutex); |
@@ -1607,6 +1806,18 @@ rcu_torture_init(void) | |||
1607 | } | 1806 | } |
1608 | } | 1807 | } |
1609 | } | 1808 | } |
1809 | if (shutdown_secs > 0) { | ||
1810 | shutdown_time = jiffies + shutdown_secs * HZ; | ||
1811 | shutdown_task = kthread_run(rcu_torture_shutdown, NULL, | ||
1812 | "rcu_torture_shutdown"); | ||
1813 | if (IS_ERR(shutdown_task)) { | ||
1814 | firsterr = PTR_ERR(shutdown_task); | ||
1815 | VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); | ||
1816 | shutdown_task = NULL; | ||
1817 | goto unwind; | ||
1818 | } | ||
1819 | } | ||
1820 | rcu_torture_onoff_init(); | ||
1610 | register_reboot_notifier(&rcutorture_shutdown_nb); | 1821 | register_reboot_notifier(&rcutorture_shutdown_nb); |
1611 | rcutorture_record_test_transition(); | 1822 | rcutorture_record_test_transition(); |
1612 | mutex_unlock(&fullstop_mutex); | 1823 | mutex_unlock(&fullstop_mutex); |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6b76d812740c..6c4a6722abfd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
69 | NUM_RCU_LVL_3, \ | 69 | NUM_RCU_LVL_3, \ |
70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | 70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ |
71 | }, \ | 71 | }, \ |
72 | .signaled = RCU_GP_IDLE, \ | 72 | .fqs_state = RCU_GP_IDLE, \ |
73 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
74 | .completed = -300, \ | 74 | .completed = -300, \ |
75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu) | |||
195 | } | 195 | } |
196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
197 | 197 | ||
198 | #ifdef CONFIG_NO_HZ | ||
199 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
200 | .dynticks_nesting = 1, | 199 | .dynticks_nesting = DYNTICK_TASK_NESTING, |
201 | .dynticks = ATOMIC_INIT(1), | 200 | .dynticks = ATOMIC_INIT(1), |
202 | }; | 201 | }; |
203 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
204 | 202 | ||
205 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 203 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
206 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 204 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
328 | return 1; | 326 | return 1; |
329 | } | 327 | } |
330 | 328 | ||
331 | /* If preemptible RCU, no point in sending reschedule IPI. */ | 329 | /* |
332 | if (rdp->preemptible) | 330 | * The CPU is online, so send it a reschedule IPI. This forces |
333 | return 0; | 331 | * it through the scheduler, and (inefficiently) also handles cases |
334 | 332 | * where idle loops fail to inform RCU about the CPU being idle. | |
335 | /* The CPU is online, so send it a reschedule IPI. */ | 333 | */ |
336 | if (rdp->cpu != smp_processor_id()) | 334 | if (rdp->cpu != smp_processor_id()) |
337 | smp_send_reschedule(rdp->cpu); | 335 | smp_send_reschedule(rdp->cpu); |
338 | else | 336 | else |
@@ -343,59 +341,181 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
343 | 341 | ||
344 | #endif /* #ifdef CONFIG_SMP */ | 342 | #endif /* #ifdef CONFIG_SMP */ |
345 | 343 | ||
346 | #ifdef CONFIG_NO_HZ | 344 | /* |
345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | ||
346 | * | ||
347 | * If the new value of the ->dynticks_nesting counter now is zero, | ||
348 | * we really have entered idle, and must do the appropriate accounting. | ||
349 | * The caller must have disabled interrupts. | ||
350 | */ | ||
351 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | ||
352 | { | ||
353 | trace_rcu_dyntick("Start", oldval, 0); | ||
354 | if (!is_idle_task(current)) { | ||
355 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
356 | |||
357 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | ||
358 | ftrace_dump(DUMP_ALL); | ||
359 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
360 | current->pid, current->comm, | ||
361 | idle->pid, idle->comm); /* must be idle task! */ | ||
362 | } | ||
363 | rcu_prepare_for_idle(smp_processor_id()); | ||
364 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
365 | smp_mb__before_atomic_inc(); /* See above. */ | ||
366 | atomic_inc(&rdtp->dynticks); | ||
367 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
368 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
369 | } | ||
347 | 370 | ||
348 | /** | 371 | /** |
349 | * rcu_enter_nohz - inform RCU that current CPU is entering nohz | 372 | * rcu_idle_enter - inform RCU that current CPU is entering idle |
350 | * | 373 | * |
351 | * Enter nohz mode, in other words, -leave- the mode in which RCU | 374 | * Enter idle mode, in other words, -leave- the mode in which RCU |
352 | * read-side critical sections can occur. (Though RCU read-side | 375 | * read-side critical sections can occur. (Though RCU read-side |
353 | * critical sections can occur in irq handlers in nohz mode, a possibility | 376 | * critical sections can occur in irq handlers in idle, a possibility |
354 | * handled by rcu_irq_enter() and rcu_irq_exit()). | 377 | * handled by irq_enter() and irq_exit().) |
378 | * | ||
379 | * We crowbar the ->dynticks_nesting field to zero to allow for | ||
380 | * the possibility of usermode upcalls having messed up our count | ||
381 | * of interrupt nesting level during the prior busy period. | ||
355 | */ | 382 | */ |
356 | void rcu_enter_nohz(void) | 383 | void rcu_idle_enter(void) |
357 | { | 384 | { |
358 | unsigned long flags; | 385 | unsigned long flags; |
386 | long long oldval; | ||
359 | struct rcu_dynticks *rdtp; | 387 | struct rcu_dynticks *rdtp; |
360 | 388 | ||
361 | local_irq_save(flags); | 389 | local_irq_save(flags); |
362 | rdtp = &__get_cpu_var(rcu_dynticks); | 390 | rdtp = &__get_cpu_var(rcu_dynticks); |
363 | if (--rdtp->dynticks_nesting) { | 391 | oldval = rdtp->dynticks_nesting; |
364 | local_irq_restore(flags); | 392 | rdtp->dynticks_nesting = 0; |
365 | return; | 393 | rcu_idle_enter_common(rdtp, oldval); |
366 | } | ||
367 | trace_rcu_dyntick("Start"); | ||
368 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
369 | smp_mb__before_atomic_inc(); /* See above. */ | ||
370 | atomic_inc(&rdtp->dynticks); | ||
371 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
373 | local_irq_restore(flags); | 394 | local_irq_restore(flags); |
374 | } | 395 | } |
375 | 396 | ||
376 | /* | 397 | /** |
377 | * rcu_exit_nohz - inform RCU that current CPU is leaving nohz | 398 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
399 | * | ||
400 | * Exit from an interrupt handler, which might possibly result in entering | ||
401 | * idle mode, in other words, leaving the mode in which read-side critical | ||
402 | * sections can occur. | ||
378 | * | 403 | * |
379 | * Exit nohz mode, in other words, -enter- the mode in which RCU | 404 | * This code assumes that the idle loop never does anything that might |
380 | * read-side critical sections normally occur. | 405 | * result in unbalanced calls to irq_enter() and irq_exit(). If your |
406 | * architecture violates this assumption, RCU will give you what you | ||
407 | * deserve, good and hard. But very infrequently and irreproducibly. | ||
408 | * | ||
409 | * Use things like work queues to work around this limitation. | ||
410 | * | ||
411 | * You have been warned. | ||
381 | */ | 412 | */ |
382 | void rcu_exit_nohz(void) | 413 | void rcu_irq_exit(void) |
383 | { | 414 | { |
384 | unsigned long flags; | 415 | unsigned long flags; |
416 | long long oldval; | ||
385 | struct rcu_dynticks *rdtp; | 417 | struct rcu_dynticks *rdtp; |
386 | 418 | ||
387 | local_irq_save(flags); | 419 | local_irq_save(flags); |
388 | rdtp = &__get_cpu_var(rcu_dynticks); | 420 | rdtp = &__get_cpu_var(rcu_dynticks); |
389 | if (rdtp->dynticks_nesting++) { | 421 | oldval = rdtp->dynticks_nesting; |
390 | local_irq_restore(flags); | 422 | rdtp->dynticks_nesting--; |
391 | return; | 423 | WARN_ON_ONCE(rdtp->dynticks_nesting < 0); |
392 | } | 424 | if (rdtp->dynticks_nesting) |
425 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); | ||
426 | else | ||
427 | rcu_idle_enter_common(rdtp, oldval); | ||
428 | local_irq_restore(flags); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle | ||
433 | * | ||
434 | * If the new value of the ->dynticks_nesting counter was previously zero, | ||
435 | * we really have exited idle, and must do the appropriate accounting. | ||
436 | * The caller must have disabled interrupts. | ||
437 | */ | ||
438 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | ||
439 | { | ||
393 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | 440 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ |
394 | atomic_inc(&rdtp->dynticks); | 441 | atomic_inc(&rdtp->dynticks); |
395 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 442 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
396 | smp_mb__after_atomic_inc(); /* See above. */ | 443 | smp_mb__after_atomic_inc(); /* See above. */ |
397 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 444 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
398 | trace_rcu_dyntick("End"); | 445 | rcu_cleanup_after_idle(smp_processor_id()); |
446 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); | ||
447 | if (!is_idle_task(current)) { | ||
448 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
449 | |||
450 | trace_rcu_dyntick("Error on exit: not idle task", | ||
451 | oldval, rdtp->dynticks_nesting); | ||
452 | ftrace_dump(DUMP_ALL); | ||
453 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
454 | current->pid, current->comm, | ||
455 | idle->pid, idle->comm); /* must be idle task! */ | ||
456 | } | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * rcu_idle_exit - inform RCU that current CPU is leaving idle | ||
461 | * | ||
462 | * Exit idle mode, in other words, -enter- the mode in which RCU | ||
463 | * read-side critical sections can occur. | ||
464 | * | ||
465 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to | ||
466 | * allow for the possibility of usermode upcalls messing up our count | ||
467 | * of interrupt nesting level during the busy period that is just | ||
468 | * now starting. | ||
469 | */ | ||
470 | void rcu_idle_exit(void) | ||
471 | { | ||
472 | unsigned long flags; | ||
473 | struct rcu_dynticks *rdtp; | ||
474 | long long oldval; | ||
475 | |||
476 | local_irq_save(flags); | ||
477 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
478 | oldval = rdtp->dynticks_nesting; | ||
479 | WARN_ON_ONCE(oldval != 0); | ||
480 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; | ||
481 | rcu_idle_exit_common(rdtp, oldval); | ||
482 | local_irq_restore(flags); | ||
483 | } | ||
484 | |||
485 | /** | ||
486 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | ||
487 | * | ||
488 | * Enter an interrupt handler, which might possibly result in exiting | ||
489 | * idle mode, in other words, entering the mode in which read-side critical | ||
490 | * sections can occur. | ||
491 | * | ||
492 | * Note that the Linux kernel is fully capable of entering an interrupt | ||
493 | * handler that it never exits, for example when doing upcalls to | ||
494 | * user mode! This code assumes that the idle loop never does upcalls to | ||
495 | * user mode. If your architecture does do upcalls from the idle loop (or | ||
496 | * does anything else that results in unbalanced calls to the irq_enter() | ||
497 | * and irq_exit() functions), RCU will give you what you deserve, good | ||
498 | * and hard. But very infrequently and irreproducibly. | ||
499 | * | ||
500 | * Use things like work queues to work around this limitation. | ||
501 | * | ||
502 | * You have been warned. | ||
503 | */ | ||
504 | void rcu_irq_enter(void) | ||
505 | { | ||
506 | unsigned long flags; | ||
507 | struct rcu_dynticks *rdtp; | ||
508 | long long oldval; | ||
509 | |||
510 | local_irq_save(flags); | ||
511 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
512 | oldval = rdtp->dynticks_nesting; | ||
513 | rdtp->dynticks_nesting++; | ||
514 | WARN_ON_ONCE(rdtp->dynticks_nesting == 0); | ||
515 | if (oldval) | ||
516 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); | ||
517 | else | ||
518 | rcu_idle_exit_common(rdtp, oldval); | ||
399 | local_irq_restore(flags); | 519 | local_irq_restore(flags); |
400 | } | 520 | } |
401 | 521 | ||
@@ -442,27 +562,37 @@ void rcu_nmi_exit(void) | |||
442 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 562 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
443 | } | 563 | } |
444 | 564 | ||
565 | #ifdef CONFIG_PROVE_RCU | ||
566 | |||
445 | /** | 567 | /** |
446 | * rcu_irq_enter - inform RCU of entry to hard irq context | 568 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
447 | * | 569 | * |
448 | * If the CPU was idle with dynamic ticks active, this updates the | 570 | * If the current CPU is in its idle loop and is neither in an interrupt |
449 | * rdtp->dynticks to let the RCU handling know that the CPU is active. | 571 | * or NMI handler, return true. |
450 | */ | 572 | */ |
451 | void rcu_irq_enter(void) | 573 | int rcu_is_cpu_idle(void) |
452 | { | 574 | { |
453 | rcu_exit_nohz(); | 575 | int ret; |
576 | |||
577 | preempt_disable(); | ||
578 | ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; | ||
579 | preempt_enable(); | ||
580 | return ret; | ||
454 | } | 581 | } |
582 | EXPORT_SYMBOL(rcu_is_cpu_idle); | ||
583 | |||
584 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
455 | 585 | ||
456 | /** | 586 | /** |
457 | * rcu_irq_exit - inform RCU of exit from hard irq context | 587 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
458 | * | 588 | * |
459 | * If the CPU was idle with dynamic ticks active, update the rdp->dynticks | 589 | * If the current CPU is idle or running at a first-level (not nested) |
460 | * to put let the RCU handling be aware that the CPU is going back to idle | 590 | * interrupt from idle, return true. The caller must have at least |
461 | * with no ticks. | 591 | * disabled preemption. |
462 | */ | 592 | */ |
463 | void rcu_irq_exit(void) | 593 | int rcu_is_cpu_rrupt_from_idle(void) |
464 | { | 594 | { |
465 | rcu_enter_nohz(); | 595 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; |
466 | } | 596 | } |
467 | 597 | ||
468 | #ifdef CONFIG_SMP | 598 | #ifdef CONFIG_SMP |
@@ -475,7 +605,7 @@ void rcu_irq_exit(void) | |||
475 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 605 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
476 | { | 606 | { |
477 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); | 607 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
478 | return 0; | 608 | return (rdp->dynticks_snap & 0x1) == 0; |
479 | } | 609 | } |
480 | 610 | ||
481 | /* | 611 | /* |
@@ -512,26 +642,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
512 | 642 | ||
513 | #endif /* #ifdef CONFIG_SMP */ | 643 | #endif /* #ifdef CONFIG_SMP */ |
514 | 644 | ||
515 | #else /* #ifdef CONFIG_NO_HZ */ | ||
516 | |||
517 | #ifdef CONFIG_SMP | ||
518 | |||
519 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | ||
520 | { | ||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | ||
525 | { | ||
526 | return rcu_implicit_offline_qs(rdp); | ||
527 | } | ||
528 | |||
529 | #endif /* #ifdef CONFIG_SMP */ | ||
530 | |||
531 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
532 | |||
533 | int rcu_cpu_stall_suppress __read_mostly; | ||
534 | |||
535 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 645 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
536 | { | 646 | { |
537 | rsp->gp_start = jiffies; | 647 | rsp->gp_start = jiffies; |
@@ -866,8 +976,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
866 | /* Advance to a new grace period and initialize state. */ | 976 | /* Advance to a new grace period and initialize state. */ |
867 | rsp->gpnum++; | 977 | rsp->gpnum++; |
868 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | 978 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); |
869 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | 979 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); |
870 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 980 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
871 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 981 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
872 | record_gp_stall_check_time(rsp); | 982 | record_gp_stall_check_time(rsp); |
873 | 983 | ||
@@ -877,7 +987,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
877 | rnp->qsmask = rnp->qsmaskinit; | 987 | rnp->qsmask = rnp->qsmaskinit; |
878 | rnp->gpnum = rsp->gpnum; | 988 | rnp->gpnum = rsp->gpnum; |
879 | rnp->completed = rsp->completed; | 989 | rnp->completed = rsp->completed; |
880 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 990 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ |
881 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 991 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
882 | rcu_preempt_boost_start_gp(rnp); | 992 | rcu_preempt_boost_start_gp(rnp); |
883 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 993 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
@@ -927,7 +1037,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
927 | 1037 | ||
928 | rnp = rcu_get_root(rsp); | 1038 | rnp = rcu_get_root(rsp); |
929 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1039 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
930 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ | 1040 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ |
931 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1041 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
932 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1042 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
933 | } | 1043 | } |
@@ -991,7 +1101,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
991 | 1101 | ||
992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | 1102 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ |
993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 1103 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
994 | rsp->signaled = RCU_GP_IDLE; | 1104 | rsp->fqs_state = RCU_GP_IDLE; |
995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 1105 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
996 | } | 1106 | } |
997 | 1107 | ||
@@ -1221,7 +1331,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1221 | else | 1331 | else |
1222 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1332 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1223 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1333 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1224 | rcu_report_exp_rnp(rsp, rnp); | 1334 | rcu_report_exp_rnp(rsp, rnp, true); |
1225 | rcu_node_kthread_setaffinity(rnp, -1); | 1335 | rcu_node_kthread_setaffinity(rnp, -1); |
1226 | } | 1336 | } |
1227 | 1337 | ||
@@ -1263,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1263 | /* If no callbacks are ready, just return.*/ | 1373 | /* If no callbacks are ready, just return.*/ |
1264 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1374 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1265 | trace_rcu_batch_start(rsp->name, 0, 0); | 1375 | trace_rcu_batch_start(rsp->name, 0, 0); |
1266 | trace_rcu_batch_end(rsp->name, 0); | 1376 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), |
1377 | need_resched(), is_idle_task(current), | ||
1378 | rcu_is_callbacks_kthread()); | ||
1267 | return; | 1379 | return; |
1268 | } | 1380 | } |
1269 | 1381 | ||
@@ -1291,12 +1403,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1291 | debug_rcu_head_unqueue(list); | 1403 | debug_rcu_head_unqueue(list); |
1292 | __rcu_reclaim(rsp->name, list); | 1404 | __rcu_reclaim(rsp->name, list); |
1293 | list = next; | 1405 | list = next; |
1294 | if (++count >= bl) | 1406 | /* Stop only if limit reached and CPU has something to do. */ |
1407 | if (++count >= bl && | ||
1408 | (need_resched() || | ||
1409 | (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) | ||
1295 | break; | 1410 | break; |
1296 | } | 1411 | } |
1297 | 1412 | ||
1298 | local_irq_save(flags); | 1413 | local_irq_save(flags); |
1299 | trace_rcu_batch_end(rsp->name, count); | 1414 | trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), |
1415 | is_idle_task(current), | ||
1416 | rcu_is_callbacks_kthread()); | ||
1300 | 1417 | ||
1301 | /* Update count, and requeue any remaining callbacks. */ | 1418 | /* Update count, and requeue any remaining callbacks. */ |
1302 | rdp->qlen -= count; | 1419 | rdp->qlen -= count; |
@@ -1334,16 +1451,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1334 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1451 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
1335 | * Also schedule RCU core processing. | 1452 | * Also schedule RCU core processing. |
1336 | * | 1453 | * |
1337 | * This function must be called with hardirqs disabled. It is normally | 1454 | * This function must be called from hardirq context. It is normally |
1338 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1455 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
1339 | * false, there is no point in invoking rcu_check_callbacks(). | 1456 | * false, there is no point in invoking rcu_check_callbacks(). |
1340 | */ | 1457 | */ |
1341 | void rcu_check_callbacks(int cpu, int user) | 1458 | void rcu_check_callbacks(int cpu, int user) |
1342 | { | 1459 | { |
1343 | trace_rcu_utilization("Start scheduler-tick"); | 1460 | trace_rcu_utilization("Start scheduler-tick"); |
1344 | if (user || | 1461 | if (user || rcu_is_cpu_rrupt_from_idle()) { |
1345 | (idle_cpu(cpu) && rcu_scheduler_active && | ||
1346 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||
1347 | 1462 | ||
1348 | /* | 1463 | /* |
1349 | * Get here if this CPU took its interrupt from user | 1464 | * Get here if this CPU took its interrupt from user |
@@ -1457,7 +1572,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1457 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ | 1572 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ |
1458 | } | 1573 | } |
1459 | rsp->fqs_active = 1; | 1574 | rsp->fqs_active = 1; |
1460 | switch (rsp->signaled) { | 1575 | switch (rsp->fqs_state) { |
1461 | case RCU_GP_IDLE: | 1576 | case RCU_GP_IDLE: |
1462 | case RCU_GP_INIT: | 1577 | case RCU_GP_INIT: |
1463 | 1578 | ||
@@ -1473,7 +1588,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1473 | force_qs_rnp(rsp, dyntick_save_progress_counter); | 1588 | force_qs_rnp(rsp, dyntick_save_progress_counter); |
1474 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1589 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
1475 | if (rcu_gp_in_progress(rsp)) | 1590 | if (rcu_gp_in_progress(rsp)) |
1476 | rsp->signaled = RCU_FORCE_QS; | 1591 | rsp->fqs_state = RCU_FORCE_QS; |
1477 | break; | 1592 | break; |
1478 | 1593 | ||
1479 | case RCU_FORCE_QS: | 1594 | case RCU_FORCE_QS: |
@@ -1812,7 +1927,7 @@ static int rcu_pending(int cpu) | |||
1812 | * by the current CPU, even if none need be done immediately, returning | 1927 | * by the current CPU, even if none need be done immediately, returning |
1813 | * 1 if so. | 1928 | * 1 if so. |
1814 | */ | 1929 | */ |
1815 | static int rcu_needs_cpu_quick_check(int cpu) | 1930 | static int rcu_cpu_has_callbacks(int cpu) |
1816 | { | 1931 | { |
1817 | /* RCU callbacks either ready or pending? */ | 1932 | /* RCU callbacks either ready or pending? */ |
1818 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 1933 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
@@ -1913,9 +2028,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1913 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 2028 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1914 | rdp->nxttail[i] = &rdp->nxtlist; | 2029 | rdp->nxttail[i] = &rdp->nxtlist; |
1915 | rdp->qlen = 0; | 2030 | rdp->qlen = 0; |
1916 | #ifdef CONFIG_NO_HZ | ||
1917 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2031 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
1918 | #endif /* #ifdef CONFIG_NO_HZ */ | 2032 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); |
2033 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | ||
1919 | rdp->cpu = cpu; | 2034 | rdp->cpu = cpu; |
1920 | rdp->rsp = rsp; | 2035 | rdp->rsp = rsp; |
1921 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2036 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
@@ -1942,6 +2057,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1942 | rdp->qlen_last_fqs_check = 0; | 2057 | rdp->qlen_last_fqs_check = 0; |
1943 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2058 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1944 | rdp->blimit = blimit; | 2059 | rdp->blimit = blimit; |
2060 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; | ||
2061 | atomic_set(&rdp->dynticks->dynticks, | ||
2062 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | ||
2063 | rcu_prepare_for_idle_init(cpu); | ||
1945 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2064 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1946 | 2065 | ||
1947 | /* | 2066 | /* |
@@ -2023,6 +2142,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2023 | rcu_send_cbs_to_online(&rcu_bh_state); | 2142 | rcu_send_cbs_to_online(&rcu_bh_state); |
2024 | rcu_send_cbs_to_online(&rcu_sched_state); | 2143 | rcu_send_cbs_to_online(&rcu_sched_state); |
2025 | rcu_preempt_send_cbs_to_online(); | 2144 | rcu_preempt_send_cbs_to_online(); |
2145 | rcu_cleanup_after_idle(cpu); | ||
2026 | break; | 2146 | break; |
2027 | case CPU_DEAD: | 2147 | case CPU_DEAD: |
2028 | case CPU_DEAD_FROZEN: | 2148 | case CPU_DEAD_FROZEN: |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 849ce9ec51fe..fddff92d6676 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -84,9 +84,10 @@ | |||
84 | * Dynticks per-CPU state. | 84 | * Dynticks per-CPU state. |
85 | */ | 85 | */ |
86 | struct rcu_dynticks { | 86 | struct rcu_dynticks { |
87 | int dynticks_nesting; /* Track irq/process nesting level. */ | 87 | long long dynticks_nesting; /* Track irq/process nesting level. */ |
88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ | 88 | /* Process level is worth LLONG_MAX/2. */ |
89 | atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ | 89 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
90 | atomic_t dynticks; /* Even value for idle, else odd. */ | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | /* RCU's kthread states for tracing. */ | 93 | /* RCU's kthread states for tracing. */ |
@@ -274,16 +275,12 @@ struct rcu_data { | |||
274 | /* did other CPU force QS recently? */ | 275 | /* did other CPU force QS recently? */ |
275 | long blimit; /* Upper limit on a processed batch */ | 276 | long blimit; /* Upper limit on a processed batch */ |
276 | 277 | ||
277 | #ifdef CONFIG_NO_HZ | ||
278 | /* 3) dynticks interface. */ | 278 | /* 3) dynticks interface. */ |
279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ | 279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ |
280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ | 280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ |
281 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
282 | 281 | ||
283 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 282 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
284 | #ifdef CONFIG_NO_HZ | ||
285 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ | 283 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ |
286 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
287 | unsigned long offline_fqs; /* Kicked due to being offline. */ | 284 | unsigned long offline_fqs; /* Kicked due to being offline. */ |
288 | unsigned long resched_ipi; /* Sent a resched IPI. */ | 285 | unsigned long resched_ipi; /* Sent a resched IPI. */ |
289 | 286 | ||
@@ -302,16 +299,12 @@ struct rcu_data { | |||
302 | struct rcu_state *rsp; | 299 | struct rcu_state *rsp; |
303 | }; | 300 | }; |
304 | 301 | ||
305 | /* Values for signaled field in struct rcu_state. */ | 302 | /* Values for fqs_state field in struct rcu_state. */ |
306 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ | 303 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ |
307 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ | 304 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ |
308 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ | 305 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ |
309 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ | 306 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ |
310 | #ifdef CONFIG_NO_HZ | ||
311 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK | 307 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK |
312 | #else /* #ifdef CONFIG_NO_HZ */ | ||
313 | #define RCU_SIGNAL_INIT RCU_FORCE_QS | ||
314 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
315 | 308 | ||
316 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ | 309 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ |
317 | 310 | ||
@@ -361,7 +354,7 @@ struct rcu_state { | |||
361 | 354 | ||
362 | /* The following fields are guarded by the root rcu_node's lock. */ | 355 | /* The following fields are guarded by the root rcu_node's lock. */ |
363 | 356 | ||
364 | u8 signaled ____cacheline_internodealigned_in_smp; | 357 | u8 fqs_state ____cacheline_internodealigned_in_smp; |
365 | /* Force QS state. */ | 358 | /* Force QS state. */ |
366 | u8 fqs_active; /* force_quiescent_state() */ | 359 | u8 fqs_active; /* force_quiescent_state() */ |
367 | /* is running. */ | 360 | /* is running. */ |
@@ -451,7 +444,8 @@ static void rcu_preempt_check_callbacks(int cpu); | |||
451 | static void rcu_preempt_process_callbacks(void); | 444 | static void rcu_preempt_process_callbacks(void); |
452 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 445 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
453 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 446 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) |
454 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | 447 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
448 | bool wake); | ||
455 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 449 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ |
456 | static int rcu_preempt_pending(int cpu); | 450 | static int rcu_preempt_pending(int cpu); |
457 | static int rcu_preempt_needs_cpu(int cpu); | 451 | static int rcu_preempt_needs_cpu(int cpu); |
@@ -461,6 +455,7 @@ static void __init __rcu_init_preempt(void); | |||
461 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 455 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
462 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 456 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
463 | static void invoke_rcu_callbacks_kthread(void); | 457 | static void invoke_rcu_callbacks_kthread(void); |
458 | static bool rcu_is_callbacks_kthread(void); | ||
464 | #ifdef CONFIG_RCU_BOOST | 459 | #ifdef CONFIG_RCU_BOOST |
465 | static void rcu_preempt_do_callbacks(void); | 460 | static void rcu_preempt_do_callbacks(void); |
466 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 461 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
@@ -473,5 +468,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg); | |||
473 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 468 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
474 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); | 469 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); |
475 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 470 | static void __cpuinit rcu_prepare_kthreads(int cpu); |
471 | static void rcu_prepare_for_idle_init(int cpu); | ||
472 | static void rcu_cleanup_after_idle(int cpu); | ||
473 | static void rcu_prepare_for_idle(int cpu); | ||
476 | 474 | ||
477 | #endif /* #ifndef RCU_TREE_NONCORE */ | 475 | #endif /* #ifndef RCU_TREE_NONCORE */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4b9b9f8a4184..8bb35d73e1f9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -312,6 +312,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
312 | { | 312 | { |
313 | int empty; | 313 | int empty; |
314 | int empty_exp; | 314 | int empty_exp; |
315 | int empty_exp_now; | ||
315 | unsigned long flags; | 316 | unsigned long flags; |
316 | struct list_head *np; | 317 | struct list_head *np; |
317 | #ifdef CONFIG_RCU_BOOST | 318 | #ifdef CONFIG_RCU_BOOST |
@@ -382,8 +383,10 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
382 | /* | 383 | /* |
383 | * If this was the last task on the current list, and if | 384 | * If this was the last task on the current list, and if |
384 | * we aren't waiting on any CPUs, report the quiescent state. | 385 | * we aren't waiting on any CPUs, report the quiescent state. |
385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 386 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
387 | * so we must take a snapshot of the expedited state. | ||
386 | */ | 388 | */ |
389 | empty_exp_now = !rcu_preempted_readers_exp(rnp); | ||
387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | 390 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
388 | trace_rcu_quiescent_state_report("preempt_rcu", | 391 | trace_rcu_quiescent_state_report("preempt_rcu", |
389 | rnp->gpnum, | 392 | rnp->gpnum, |
@@ -406,8 +409,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
406 | * If this was the last task on the expedited lists, | 409 | * If this was the last task on the expedited lists, |
407 | * then we need to report up the rcu_node hierarchy. | 410 | * then we need to report up the rcu_node hierarchy. |
408 | */ | 411 | */ |
409 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | 412 | if (!empty_exp && empty_exp_now) |
410 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | 413 | rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); |
411 | } else { | 414 | } else { |
412 | local_irq_restore(flags); | 415 | local_irq_restore(flags); |
413 | } | 416 | } |
@@ -729,9 +732,13 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | |||
729 | * recursively up the tree. (Calm down, calm down, we do the recursion | 732 | * recursively up the tree. (Calm down, calm down, we do the recursion |
730 | * iteratively!) | 733 | * iteratively!) |
731 | * | 734 | * |
735 | * Most callers will set the "wake" flag, but the task initiating the | ||
736 | * expedited grace period need not wake itself. | ||
737 | * | ||
732 | * Caller must hold sync_rcu_preempt_exp_mutex. | 738 | * Caller must hold sync_rcu_preempt_exp_mutex. |
733 | */ | 739 | */ |
734 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | 740 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
741 | bool wake) | ||
735 | { | 742 | { |
736 | unsigned long flags; | 743 | unsigned long flags; |
737 | unsigned long mask; | 744 | unsigned long mask; |
@@ -744,7 +751,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
744 | } | 751 | } |
745 | if (rnp->parent == NULL) { | 752 | if (rnp->parent == NULL) { |
746 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 753 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
747 | wake_up(&sync_rcu_preempt_exp_wq); | 754 | if (wake) |
755 | wake_up(&sync_rcu_preempt_exp_wq); | ||
748 | break; | 756 | break; |
749 | } | 757 | } |
750 | mask = rnp->grpmask; | 758 | mask = rnp->grpmask; |
@@ -777,7 +785,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
777 | must_wait = 1; | 785 | must_wait = 1; |
778 | } | 786 | } |
779 | if (!must_wait) | 787 | if (!must_wait) |
780 | rcu_report_exp_rnp(rsp, rnp); | 788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ |
781 | } | 789 | } |
782 | 790 | ||
783 | /* | 791 | /* |
@@ -1069,9 +1077,9 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | |||
1069 | * report on tasks preempted in RCU read-side critical sections during | 1077 | * report on tasks preempted in RCU read-side critical sections during |
1070 | * expedited RCU grace periods. | 1078 | * expedited RCU grace periods. |
1071 | */ | 1079 | */ |
1072 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | 1080 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
1081 | bool wake) | ||
1073 | { | 1082 | { |
1074 | return; | ||
1075 | } | 1083 | } |
1076 | 1084 | ||
1077 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1085 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -1157,8 +1165,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |||
1157 | 1165 | ||
1158 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1166 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
1159 | 1167 | ||
1160 | static struct lock_class_key rcu_boost_class; | ||
1161 | |||
1162 | /* | 1168 | /* |
1163 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1169 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
1164 | * or ->boost_tasks, advancing the pointer to the next task in the | 1170 | * or ->boost_tasks, advancing the pointer to the next task in the |
@@ -1221,15 +1227,13 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1221 | */ | 1227 | */ |
1222 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1228 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1223 | rt_mutex_init_proxy_locked(&mtx, t); | 1229 | rt_mutex_init_proxy_locked(&mtx, t); |
1224 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | ||
1225 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||
1226 | "rcu_boost_mutex"); | ||
1227 | t->rcu_boost_mutex = &mtx; | 1230 | t->rcu_boost_mutex = &mtx; |
1228 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1231 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1229 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1232 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1230 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1233 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
1231 | 1234 | ||
1232 | return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; | 1235 | return ACCESS_ONCE(rnp->exp_tasks) != NULL || |
1236 | ACCESS_ONCE(rnp->boost_tasks) != NULL; | ||
1233 | } | 1237 | } |
1234 | 1238 | ||
1235 | /* | 1239 | /* |
@@ -1329,6 +1333,15 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1329 | } | 1333 | } |
1330 | 1334 | ||
1331 | /* | 1335 | /* |
1336 | * Is the current CPU running the RCU-callbacks kthread? | ||
1337 | * Caller must have preemption disabled. | ||
1338 | */ | ||
1339 | static bool rcu_is_callbacks_kthread(void) | ||
1340 | { | ||
1341 | return __get_cpu_var(rcu_cpu_kthread_task) == current; | ||
1342 | } | ||
1343 | |||
1344 | /* | ||
1332 | * Set the affinity of the boost kthread. The CPU-hotplug locks are | 1345 | * Set the affinity of the boost kthread. The CPU-hotplug locks are |
1333 | * held, so no one should be messing with the existence of the boost | 1346 | * held, so no one should be messing with the existence of the boost |
1334 | * kthread. | 1347 | * kthread. |
@@ -1772,6 +1785,11 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1772 | WARN_ON_ONCE(1); | 1785 | WARN_ON_ONCE(1); |
1773 | } | 1786 | } |
1774 | 1787 | ||
1788 | static bool rcu_is_callbacks_kthread(void) | ||
1789 | { | ||
1790 | return false; | ||
1791 | } | ||
1792 | |||
1775 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | 1793 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) |
1776 | { | 1794 | { |
1777 | } | 1795 | } |
@@ -1907,7 +1925,7 @@ void synchronize_sched_expedited(void) | |||
1907 | * grace period works for us. | 1925 | * grace period works for us. |
1908 | */ | 1926 | */ |
1909 | get_online_cpus(); | 1927 | get_online_cpus(); |
1910 | snap = atomic_read(&sync_sched_expedited_started) - 1; | 1928 | snap = atomic_read(&sync_sched_expedited_started); |
1911 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | 1929 | smp_mb(); /* ensure read is before try_stop_cpus(). */ |
1912 | } | 1930 | } |
1913 | 1931 | ||
@@ -1939,88 +1957,243 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | |||
1939 | * 1 if so. This function is part of the RCU implementation; it is -not- | 1957 | * 1 if so. This function is part of the RCU implementation; it is -not- |
1940 | * an exported member of the RCU API. | 1958 | * an exported member of the RCU API. |
1941 | * | 1959 | * |
1942 | * Because we have preemptible RCU, just check whether this CPU needs | 1960 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
1943 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | 1961 | * any flavor of RCU. |
1944 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
1945 | */ | 1962 | */ |
1946 | int rcu_needs_cpu(int cpu) | 1963 | int rcu_needs_cpu(int cpu) |
1947 | { | 1964 | { |
1948 | return rcu_needs_cpu_quick_check(cpu); | 1965 | return rcu_cpu_has_callbacks(cpu); |
1966 | } | ||
1967 | |||
1968 | /* | ||
1969 | * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. | ||
1970 | */ | ||
1971 | static void rcu_prepare_for_idle_init(int cpu) | ||
1972 | { | ||
1973 | } | ||
1974 | |||
1975 | /* | ||
1976 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | ||
1977 | * after it. | ||
1978 | */ | ||
1979 | static void rcu_cleanup_after_idle(int cpu) | ||
1980 | { | ||
1981 | } | ||
1982 | |||
1983 | /* | ||
1984 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, | ||
1985 | * is nothing. | ||
1986 | */ | ||
1987 | static void rcu_prepare_for_idle(int cpu) | ||
1988 | { | ||
1949 | } | 1989 | } |
1950 | 1990 | ||
1951 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1991 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1952 | 1992 | ||
1953 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1993 | /* |
1994 | * This code is invoked when a CPU goes idle, at which point we want | ||
1995 | * to have the CPU do everything required for RCU so that it can enter | ||
1996 | * the energy-efficient dyntick-idle mode. This is handled by a | ||
1997 | * state machine implemented by rcu_prepare_for_idle() below. | ||
1998 | * | ||
1999 | * The following three proprocessor symbols control this state machine: | ||
2000 | * | ||
2001 | * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt | ||
2002 | * to satisfy RCU. Beyond this point, it is better to incur a periodic | ||
2003 | * scheduling-clock interrupt than to loop through the state machine | ||
2004 | * at full power. | ||
2005 | * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are | ||
2006 | * optional if RCU does not need anything immediately from this | ||
2007 | * CPU, even if this CPU still has RCU callbacks queued. The first | ||
2008 | * times through the state machine are mandatory: we need to give | ||
2009 | * the state machine a chance to communicate a quiescent state | ||
2010 | * to the RCU core. | ||
2011 | * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted | ||
2012 | * to sleep in dyntick-idle mode with RCU callbacks pending. This | ||
2013 | * is sized to be roughly one RCU grace period. Those energy-efficiency | ||
2014 | * benchmarkers who might otherwise be tempted to set this to a large | ||
2015 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your | ||
2016 | * system. And if you are -that- concerned about energy efficiency, | ||
2017 | * just power the system down and be done with it! | ||
2018 | * | ||
2019 | * The values below work well in practice. If future workloads require | ||
2020 | * adjustment, they can be converted into kernel config parameters, though | ||
2021 | * making the state machine smarter might be a better option. | ||
2022 | */ | ||
2023 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | ||
2024 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | ||
2025 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | ||
2026 | |||
1954 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 2027 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
1955 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 2028 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
2029 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | ||
2030 | static ktime_t rcu_idle_gp_wait; | ||
1956 | 2031 | ||
1957 | /* | 2032 | /* |
1958 | * Check to see if any future RCU-related work will need to be done | 2033 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
1959 | * by the current CPU, even if none need be done immediately, returning | 2034 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter |
1960 | * 1 if so. This function is part of the RCU implementation; it is -not- | 2035 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to |
1961 | * an exported member of the RCU API. | 2036 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed |
2037 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
2038 | * it is better to incur scheduling-clock interrupts than to spin | ||
2039 | * continuously for the same time duration! | ||
2040 | */ | ||
2041 | int rcu_needs_cpu(int cpu) | ||
2042 | { | ||
2043 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
2044 | if (!rcu_cpu_has_callbacks(cpu)) | ||
2045 | return 0; | ||
2046 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | ||
2047 | return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; | ||
2048 | } | ||
2049 | |||
2050 | /* | ||
2051 | * Timer handler used to force CPU to start pushing its remaining RCU | ||
2052 | * callbacks in the case where it entered dyntick-idle mode with callbacks | ||
2053 | * pending. The hander doesn't really need to do anything because the | ||
2054 | * real work is done upon re-entry to idle, or by the next scheduling-clock | ||
2055 | * interrupt should idle not be re-entered. | ||
2056 | */ | ||
2057 | static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) | ||
2058 | { | ||
2059 | trace_rcu_prep_idle("Timer"); | ||
2060 | return HRTIMER_NORESTART; | ||
2061 | } | ||
2062 | |||
2063 | /* | ||
2064 | * Initialize the timer used to pull CPUs out of dyntick-idle mode. | ||
2065 | */ | ||
2066 | static void rcu_prepare_for_idle_init(int cpu) | ||
2067 | { | ||
2068 | static int firsttime = 1; | ||
2069 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2070 | |||
2071 | hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2072 | hrtp->function = rcu_idle_gp_timer_func; | ||
2073 | if (firsttime) { | ||
2074 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | ||
2075 | |||
2076 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2077 | firsttime = 0; | ||
2078 | } | ||
2079 | } | ||
2080 | |||
2081 | /* | ||
2082 | * Clean up for exit from idle. Because we are exiting from idle, there | ||
2083 | * is no longer any point to rcu_idle_gp_timer, so cancel it. This will | ||
2084 | * do nothing if this timer is not active, so just cancel it unconditionally. | ||
2085 | */ | ||
2086 | static void rcu_cleanup_after_idle(int cpu) | ||
2087 | { | ||
2088 | hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); | ||
2089 | } | ||
2090 | |||
2091 | /* | ||
2092 | * Check to see if any RCU-related work can be done by the current CPU, | ||
2093 | * and if so, schedule a softirq to get it done. This function is part | ||
2094 | * of the RCU implementation; it is -not- an exported member of the RCU API. | ||
1962 | * | 2095 | * |
1963 | * Because we are not supporting preemptible RCU, attempt to accelerate | 2096 | * The idea is for the current CPU to clear out all work required by the |
1964 | * any current grace periods so that RCU no longer needs this CPU, but | 2097 | * RCU core for the current grace period, so that this CPU can be permitted |
1965 | * only if all other CPUs are already in dynticks-idle mode. This will | 2098 | * to enter dyntick-idle mode. In some cases, it will need to be awakened |
1966 | * allow the CPU cores to be powered down immediately, as opposed to after | 2099 | * at the end of the grace period by whatever CPU ends the grace period. |
1967 | * waiting many milliseconds for grace periods to elapse. | 2100 | * This allows CPUs to go dyntick-idle more quickly, and to reduce the |
2101 | * number of wakeups by a modest integer factor. | ||
1968 | * | 2102 | * |
1969 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 2103 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
1970 | * disabled, we do one pass of force_quiescent_state(), then do a | 2104 | * disabled, we do one pass of force_quiescent_state(), then do a |
1971 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | 2105 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
1972 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | 2106 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. |
2107 | * | ||
2108 | * The caller must have disabled interrupts. | ||
1973 | */ | 2109 | */ |
1974 | int rcu_needs_cpu(int cpu) | 2110 | static void rcu_prepare_for_idle(int cpu) |
1975 | { | 2111 | { |
1976 | int c = 0; | 2112 | unsigned long flags; |
1977 | int snap; | 2113 | |
1978 | int thatcpu; | 2114 | local_irq_save(flags); |
1979 | 2115 | ||
1980 | /* Check for being in the holdoff period. */ | 2116 | /* |
1981 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) | 2117 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
1982 | return rcu_needs_cpu_quick_check(cpu); | 2118 | * Also reset state to avoid prejudicing later attempts. |
1983 | 2119 | */ | |
1984 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | 2120 | if (!rcu_cpu_has_callbacks(cpu)) { |
1985 | for_each_online_cpu(thatcpu) { | 2121 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
1986 | if (thatcpu == cpu) | 2122 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
1987 | continue; | 2123 | local_irq_restore(flags); |
1988 | snap = atomic_add_return(0, &per_cpu(rcu_dynticks, | 2124 | trace_rcu_prep_idle("No callbacks"); |
1989 | thatcpu).dynticks); | 2125 | return; |
1990 | smp_mb(); /* Order sampling of snap with end of grace period. */ | 2126 | } |
1991 | if ((snap & 0x1) != 0) { | 2127 | |
1992 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2128 | /* |
1993 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2129 | * If in holdoff mode, just return. We will presumably have |
1994 | return rcu_needs_cpu_quick_check(cpu); | 2130 | * refrained from disabling the scheduling-clock tick. |
1995 | } | 2131 | */ |
2132 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | ||
2133 | local_irq_restore(flags); | ||
2134 | trace_rcu_prep_idle("In holdoff"); | ||
2135 | return; | ||
1996 | } | 2136 | } |
1997 | 2137 | ||
1998 | /* Check and update the rcu_dyntick_drain sequencing. */ | 2138 | /* Check and update the rcu_dyntick_drain sequencing. */ |
1999 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2139 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2000 | /* First time through, initialize the counter. */ | 2140 | /* First time through, initialize the counter. */ |
2001 | per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; | 2141 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; |
2142 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | ||
2143 | !rcu_pending(cpu)) { | ||
2144 | /* Can we go dyntick-idle despite still having callbacks? */ | ||
2145 | trace_rcu_prep_idle("Dyntick with callbacks"); | ||
2146 | per_cpu(rcu_dyntick_drain, cpu) = 0; | ||
2147 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | ||
2148 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | ||
2149 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | ||
2150 | return; /* Nothing more to do immediately. */ | ||
2002 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2151 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2003 | /* We have hit the limit, so time to give up. */ | 2152 | /* We have hit the limit, so time to give up. */ |
2004 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2153 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2005 | return rcu_needs_cpu_quick_check(cpu); | 2154 | local_irq_restore(flags); |
2155 | trace_rcu_prep_idle("Begin holdoff"); | ||
2156 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | ||
2157 | return; | ||
2006 | } | 2158 | } |
2007 | 2159 | ||
2008 | /* Do one step pushing remaining RCU callbacks through. */ | 2160 | /* |
2161 | * Do one step of pushing the remaining RCU callbacks through | ||
2162 | * the RCU core state machine. | ||
2163 | */ | ||
2164 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
2165 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | ||
2166 | local_irq_restore(flags); | ||
2167 | rcu_preempt_qs(cpu); | ||
2168 | force_quiescent_state(&rcu_preempt_state, 0); | ||
2169 | local_irq_save(flags); | ||
2170 | } | ||
2171 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
2009 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | 2172 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { |
2173 | local_irq_restore(flags); | ||
2010 | rcu_sched_qs(cpu); | 2174 | rcu_sched_qs(cpu); |
2011 | force_quiescent_state(&rcu_sched_state, 0); | 2175 | force_quiescent_state(&rcu_sched_state, 0); |
2012 | c = c || per_cpu(rcu_sched_data, cpu).nxtlist; | 2176 | local_irq_save(flags); |
2013 | } | 2177 | } |
2014 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | 2178 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { |
2179 | local_irq_restore(flags); | ||
2015 | rcu_bh_qs(cpu); | 2180 | rcu_bh_qs(cpu); |
2016 | force_quiescent_state(&rcu_bh_state, 0); | 2181 | force_quiescent_state(&rcu_bh_state, 0); |
2017 | c = c || per_cpu(rcu_bh_data, cpu).nxtlist; | 2182 | local_irq_save(flags); |
2018 | } | 2183 | } |
2019 | 2184 | ||
2020 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | 2185 | /* |
2021 | if (c) | 2186 | * If RCU callbacks are still pending, RCU still needs this CPU. |
2187 | * So try forcing the callbacks through the grace period. | ||
2188 | */ | ||
2189 | if (rcu_cpu_has_callbacks(cpu)) { | ||
2190 | local_irq_restore(flags); | ||
2191 | trace_rcu_prep_idle("More callbacks"); | ||
2022 | invoke_rcu_core(); | 2192 | invoke_rcu_core(); |
2023 | return c; | 2193 | } else { |
2194 | local_irq_restore(flags); | ||
2195 | trace_rcu_prep_idle("Callbacks drained"); | ||
2196 | } | ||
2024 | } | 2197 | } |
2025 | 2198 | ||
2026 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2199 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9feffa4c0695..654cfe67f0d1 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
67 | rdp->completed, rdp->gpnum, | 67 | rdp->completed, rdp->gpnum, |
68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | 68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
69 | rdp->qs_pending); | 69 | rdp->qs_pending); |
70 | #ifdef CONFIG_NO_HZ | 70 | seq_printf(m, " dt=%d/%llx/%d df=%lu", |
71 | seq_printf(m, " dt=%d/%d/%d df=%lu", | ||
72 | atomic_read(&rdp->dynticks->dynticks), | 71 | atomic_read(&rdp->dynticks->dynticks), |
73 | rdp->dynticks->dynticks_nesting, | 72 | rdp->dynticks->dynticks_nesting, |
74 | rdp->dynticks->dynticks_nmi_nesting, | 73 | rdp->dynticks->dynticks_nmi_nesting, |
75 | rdp->dynticks_fqs); | 74 | rdp->dynticks_fqs); |
76 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
77 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 75 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
78 | seq_printf(m, " ql=%ld qs=%c%c%c%c", | 76 | seq_printf(m, " ql=%ld qs=%c%c%c%c", |
79 | rdp->qlen, | 77 | rdp->qlen, |
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
141 | rdp->completed, rdp->gpnum, | 139 | rdp->completed, rdp->gpnum, |
142 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | 140 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
143 | rdp->qs_pending); | 141 | rdp->qs_pending); |
144 | #ifdef CONFIG_NO_HZ | 142 | seq_printf(m, ",%d,%llx,%d,%lu", |
145 | seq_printf(m, ",%d,%d,%d,%lu", | ||
146 | atomic_read(&rdp->dynticks->dynticks), | 143 | atomic_read(&rdp->dynticks->dynticks), |
147 | rdp->dynticks->dynticks_nesting, | 144 | rdp->dynticks->dynticks_nesting, |
148 | rdp->dynticks->dynticks_nmi_nesting, | 145 | rdp->dynticks->dynticks_nmi_nesting, |
149 | rdp->dynticks_fqs); | 146 | rdp->dynticks_fqs); |
150 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
151 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 147 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
152 | seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, | 148 | seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, |
153 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != | 149 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != |
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
171 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 167 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
172 | { | 168 | { |
173 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); | 169 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
174 | #ifdef CONFIG_NO_HZ | ||
175 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
176 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
177 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); | 171 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); |
178 | #ifdef CONFIG_RCU_BOOST | 172 | #ifdef CONFIG_RCU_BOOST |
179 | seq_puts(m, "\"kt\",\"ktl\""); | 173 | seq_puts(m, "\"kt\",\"ktl\""); |
@@ -278,7 +272,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
278 | gpnum = rsp->gpnum; | 272 | gpnum = rsp->gpnum; |
279 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 273 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
280 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", | 274 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", |
281 | rsp->completed, gpnum, rsp->signaled, | 275 | rsp->completed, gpnum, rsp->fqs_state, |
282 | (long)(rsp->jiffies_force_qs - jiffies), | 276 | (long)(rsp->jiffies_force_qs - jiffies), |
283 | (int)(jiffies & 0xffff), | 277 | (int)(jiffies & 0xffff), |
284 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 278 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index f9d8482dd487..a242e691c993 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -579,7 +579,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
579 | struct rt_mutex_waiter *waiter) | 579 | struct rt_mutex_waiter *waiter) |
580 | { | 580 | { |
581 | int ret = 0; | 581 | int ret = 0; |
582 | int was_disabled; | ||
583 | 582 | ||
584 | for (;;) { | 583 | for (;;) { |
585 | /* Try to acquire the lock: */ | 584 | /* Try to acquire the lock: */ |
@@ -602,17 +601,10 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
602 | 601 | ||
603 | raw_spin_unlock(&lock->wait_lock); | 602 | raw_spin_unlock(&lock->wait_lock); |
604 | 603 | ||
605 | was_disabled = irqs_disabled(); | ||
606 | if (was_disabled) | ||
607 | local_irq_enable(); | ||
608 | |||
609 | debug_rt_mutex_print_deadlock(waiter); | 604 | debug_rt_mutex_print_deadlock(waiter); |
610 | 605 | ||
611 | schedule_rt_mutex(lock); | 606 | schedule_rt_mutex(lock); |
612 | 607 | ||
613 | if (was_disabled) | ||
614 | local_irq_disable(); | ||
615 | |||
616 | raw_spin_lock(&lock->wait_lock); | 608 | raw_spin_lock(&lock->wait_lock); |
617 | set_current_state(state); | 609 | set_current_state(state); |
618 | } | 610 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 2c71d91efff0..4eb3a0fa351e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -347,12 +347,12 @@ void irq_exit(void) | |||
347 | if (!in_interrupt() && local_softirq_pending()) | 347 | if (!in_interrupt() && local_softirq_pending()) |
348 | invoke_softirq(); | 348 | invoke_softirq(); |
349 | 349 | ||
350 | rcu_irq_exit(); | ||
351 | #ifdef CONFIG_NO_HZ | 350 | #ifdef CONFIG_NO_HZ |
352 | /* Make sure that timer wheel updates are propagated */ | 351 | /* Make sure that timer wheel updates are propagated */ |
353 | if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) | 352 | if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) |
354 | tick_nohz_stop_sched_tick(0); | 353 | tick_nohz_irq_exit(); |
355 | #endif | 354 | #endif |
355 | rcu_irq_exit(); | ||
356 | preempt_enable_no_resched(); | 356 | preempt_enable_no_resched(); |
357 | } | 357 | } |
358 | 358 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 40420644d0ba..0ec8b832ab6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
275 | } | 275 | } |
276 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 276 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
277 | 277 | ||
278 | /** | 278 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) |
279 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | ||
280 | * | ||
281 | * When the next event is more than a tick into the future, stop the idle tick | ||
282 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
283 | * just interrupted by an interrupt which did not cause a reschedule. | ||
284 | */ | ||
285 | void tick_nohz_stop_sched_tick(int inidle) | ||
286 | { | 279 | { |
287 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | 280 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
288 | struct tick_sched *ts; | ||
289 | ktime_t last_update, expires, now; | 281 | ktime_t last_update, expires, now; |
290 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 282 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
291 | u64 time_delta; | 283 | u64 time_delta; |
292 | int cpu; | 284 | int cpu; |
293 | 285 | ||
294 | local_irq_save(flags); | ||
295 | |||
296 | cpu = smp_processor_id(); | 286 | cpu = smp_processor_id(); |
297 | ts = &per_cpu(tick_cpu_sched, cpu); | 287 | ts = &per_cpu(tick_cpu_sched, cpu); |
298 | 288 | ||
299 | /* | ||
300 | * Call to tick_nohz_start_idle stops the last_update_time from being | ||
301 | * updated. Thus, it must not be called in the event we are called from | ||
302 | * irq_exit() with the prior state different than idle. | ||
303 | */ | ||
304 | if (!inidle && !ts->inidle) | ||
305 | goto end; | ||
306 | |||
307 | /* | ||
308 | * Set ts->inidle unconditionally. Even if the system did not | ||
309 | * switch to NOHZ mode the cpu frequency governers rely on the | ||
310 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
311 | */ | ||
312 | ts->inidle = 1; | ||
313 | |||
314 | now = tick_nohz_start_idle(cpu, ts); | 289 | now = tick_nohz_start_idle(cpu, ts); |
315 | 290 | ||
316 | /* | 291 | /* |
@@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
326 | } | 301 | } |
327 | 302 | ||
328 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 303 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
329 | goto end; | 304 | return; |
330 | 305 | ||
331 | if (need_resched()) | 306 | if (need_resched()) |
332 | goto end; | 307 | return; |
333 | 308 | ||
334 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | 309 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { |
335 | static int ratelimit; | 310 | static int ratelimit; |
@@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
339 | (unsigned int) local_softirq_pending()); | 314 | (unsigned int) local_softirq_pending()); |
340 | ratelimit++; | 315 | ratelimit++; |
341 | } | 316 | } |
342 | goto end; | 317 | return; |
343 | } | 318 | } |
344 | 319 | ||
345 | ts->idle_calls++; | 320 | ts->idle_calls++; |
@@ -434,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
434 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); | 409 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
435 | ts->tick_stopped = 1; | 410 | ts->tick_stopped = 1; |
436 | ts->idle_jiffies = last_jiffies; | 411 | ts->idle_jiffies = last_jiffies; |
437 | rcu_enter_nohz(); | ||
438 | } | 412 | } |
439 | 413 | ||
440 | ts->idle_sleeps++; | 414 | ts->idle_sleeps++; |
@@ -472,8 +446,56 @@ out: | |||
472 | ts->next_jiffies = next_jiffies; | 446 | ts->next_jiffies = next_jiffies; |
473 | ts->last_jiffies = last_jiffies; | 447 | ts->last_jiffies = last_jiffies; |
474 | ts->sleep_length = ktime_sub(dev->next_event, now); | 448 | ts->sleep_length = ktime_sub(dev->next_event, now); |
475 | end: | 449 | } |
476 | local_irq_restore(flags); | 450 | |
451 | /** | ||
452 | * tick_nohz_idle_enter - stop the idle tick from the idle task | ||
453 | * | ||
454 | * When the next event is more than a tick into the future, stop the idle tick | ||
455 | * Called when we start the idle loop. | ||
456 | * | ||
457 | * The arch is responsible of calling: | ||
458 | * | ||
459 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | ||
460 | * to sleep. | ||
461 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | ||
462 | */ | ||
463 | void tick_nohz_idle_enter(void) | ||
464 | { | ||
465 | struct tick_sched *ts; | ||
466 | |||
467 | WARN_ON_ONCE(irqs_disabled()); | ||
468 | |||
469 | local_irq_disable(); | ||
470 | |||
471 | ts = &__get_cpu_var(tick_cpu_sched); | ||
472 | /* | ||
473 | * set ts->inidle unconditionally. even if the system did not | ||
474 | * switch to nohz mode the cpu frequency governers rely on the | ||
475 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
476 | */ | ||
477 | ts->inidle = 1; | ||
478 | tick_nohz_stop_sched_tick(ts); | ||
479 | |||
480 | local_irq_enable(); | ||
481 | } | ||
482 | |||
483 | /** | ||
484 | * tick_nohz_irq_exit - update next tick event from interrupt exit | ||
485 | * | ||
486 | * When an interrupt fires while we are idle and it doesn't cause | ||
487 | * a reschedule, it may still add, modify or delete a timer, enqueue | ||
488 | * an RCU callback, etc... | ||
489 | * So we need to re-calculate and reprogram the next tick event. | ||
490 | */ | ||
491 | void tick_nohz_irq_exit(void) | ||
492 | { | ||
493 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
494 | |||
495 | if (!ts->inidle) | ||
496 | return; | ||
497 | |||
498 | tick_nohz_stop_sched_tick(ts); | ||
477 | } | 499 | } |
478 | 500 | ||
479 | /** | 501 | /** |
@@ -515,11 +537,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
515 | } | 537 | } |
516 | 538 | ||
517 | /** | 539 | /** |
518 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | 540 | * tick_nohz_idle_exit - restart the idle tick from the idle task |
519 | * | 541 | * |
520 | * Restart the idle tick when the CPU is woken up from idle | 542 | * Restart the idle tick when the CPU is woken up from idle |
543 | * This also exit the RCU extended quiescent state. The CPU | ||
544 | * can use RCU again after this function is called. | ||
521 | */ | 545 | */ |
522 | void tick_nohz_restart_sched_tick(void) | 546 | void tick_nohz_idle_exit(void) |
523 | { | 547 | { |
524 | int cpu = smp_processor_id(); | 548 | int cpu = smp_processor_id(); |
525 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 549 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
@@ -529,6 +553,7 @@ void tick_nohz_restart_sched_tick(void) | |||
529 | ktime_t now; | 553 | ktime_t now; |
530 | 554 | ||
531 | local_irq_disable(); | 555 | local_irq_disable(); |
556 | |||
532 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) | 557 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
533 | now = ktime_get(); | 558 | now = ktime_get(); |
534 | 559 | ||
@@ -543,8 +568,6 @@ void tick_nohz_restart_sched_tick(void) | |||
543 | 568 | ||
544 | ts->inidle = 0; | 569 | ts->inidle = 0; |
545 | 570 | ||
546 | rcu_exit_nohz(); | ||
547 | |||
548 | /* Update jiffies first */ | 571 | /* Update jiffies first */ |
549 | select_nohz_load_balancer(0); | 572 | select_nohz_load_balancer(0); |
550 | tick_do_update_jiffies64(now); | 573 | tick_do_update_jiffies64(now); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f2bd275bb60f..a043d224adf6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -4775,6 +4775,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) | |||
4775 | { | 4775 | { |
4776 | __ftrace_dump(true, oops_dump_mode); | 4776 | __ftrace_dump(true, oops_dump_mode); |
4777 | } | 4777 | } |
4778 | EXPORT_SYMBOL_GPL(ftrace_dump); | ||
4778 | 4779 | ||
4779 | __init static int tracer_alloc_buffers(void) | 4780 | __init static int tracer_alloc_buffers(void) |
4780 | { | 4781 | { |