aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-07-06 10:13:58 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-06 10:13:58 -0400
commit5c09d127a112a78f95572921af88224f4091eb44 (patch)
tree3b3c984f035a2d82df6dbdcc6fdff5a6c7e69b00
parentc4aed353b1b079eb4843e6a708fc68b4b28f72aa (diff)
parent5cf05ad758c30d17ff23c2be346b5de982bc2121 (diff)
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull the RCU tree from Paul E. McKenney: "The major features of this series are: 1. Preventing latency spikes of more than 200 microseconds for kernels built with NR_CPUS=4096, which is reportedly becoming the default for some distros. This is a first step, as it does not help with systems that actually -have- 4096 CPUs (work on this case is in progress, but is not yet ready for mainline). This category also includes improving concurrency of rcu_barrier(), placed here due to conflicts. Posted to LKML at: https://lkml.org/lkml/2012/6/22/381. Note that patches 18-22 of that series have been defered to 3.7, as they have not yet proven themselves to be mainline-ready (and yes, these are the ones intended to get rid of RCU's latency spikes for systems that actually have 4096 CPUs). 2. Updates to documentation and rcutorture fixes, the latter category including improvements to rcu_barrier() testing. Posted to LKML at: http://lkml.indiana.edu/hypermail/linux/kernel/1206.1/04094.html. 3. Miscellaneous fixes posted to LKML at: https://lkml.org/lkml/2012/6/22/500, with the exception of the last commit, which was posted here: http://www.gossamer-threads.com/lists/linux/kernel/1561830 4. RCU_FAST_NO_HZ fixes and improvements. Posted to LKML at: http://lkml.indiana.edu/hypermail/linux/kernel/1206.1/00006.html and http://www.gossamer-threads.com/lists/linux/kernel/1561833. The first four patches of the first series went into 3.5 to fix a regression. 5. Code-style fixes. These were posted to LKML at http://lkml.indiana.edu/hypermail/linux/kernel/1205.2/01180.html and http://lkml.indiana.edu/hypermail/linux/kernel/1205.2/01181.html. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/RCU/checklist.txt39
-rw-r--r--Documentation/RCU/rcubarrier.txt15
-rw-r--r--Documentation/RCU/torture.txt9
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--arch/um/drivers/mconsole_kern.c1
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/key.h4
-rw-r--r--include/linux/rcupdate.h54
-rw-r--r--include/linux/rcutiny.h6
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/trace/events/rcu.h45
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/rcutiny.c4
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c72
-rw-r--r--kernel/rcutree.c479
-rw-r--r--kernel/rcutree.h47
-rw-r--r--kernel/rcutree_plugin.h237
-rw-r--r--kernel/rcutree_trace.c148
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--lib/list_debug.c6
23 files changed, 688 insertions, 606 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 5c8d74968090..fc103d7a0474 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome!
162 when publicizing a pointer to a structure that can 162 when publicizing a pointer to a structure that can
163 be traversed by an RCU read-side critical section. 163 be traversed by an RCU read-side critical section.
164 164
1655. If call_rcu(), or a related primitive such as call_rcu_bh() or 1655. If call_rcu(), or a related primitive such as call_rcu_bh(),
166 call_rcu_sched(), is used, the callback function must be 166 call_rcu_sched(), or call_srcu() is used, the callback function
167 written to be called from softirq context. In particular, 167 must be written to be called from softirq context. In particular,
168 it cannot block. 168 it cannot block.
169 169
1706. Since synchronize_rcu() can block, it cannot be called from 1706. Since synchronize_rcu() can block, it cannot be called from
@@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome!
202 updater uses call_rcu_sched() or synchronize_sched(), then 202 updater uses call_rcu_sched() or synchronize_sched(), then
203 the corresponding readers must disable preemption, possibly 203 the corresponding readers must disable preemption, possibly
204 by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). 204 by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
205 If the updater uses synchronize_srcu(), the the corresponding 205 If the updater uses synchronize_srcu() or call_srcu(),
206 readers must use srcu_read_lock() and srcu_read_unlock(), 206 the the corresponding readers must use srcu_read_lock() and
207 and with the same srcu_struct. The rules for the expedited 207 srcu_read_unlock(), and with the same srcu_struct. The rules for
208 primitives are the same as for their non-expedited counterparts. 208 the expedited primitives are the same as for their non-expedited
209 Mixing things up will result in confusion and broken kernels. 209 counterparts. Mixing things up will result in confusion and
210 broken kernels.
210 211
211 One exception to this rule: rcu_read_lock() and rcu_read_unlock() 212 One exception to this rule: rcu_read_lock() and rcu_read_unlock()
212 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() 213 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome!
333 victim CPU from ever going offline.) 334 victim CPU from ever going offline.)
334 335
33514. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), 33614. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
336 synchronize_srcu(), and synchronize_srcu_expedited()) may only 337 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu())
337 be invoked from process context. Unlike other forms of RCU, it 338 may only be invoked from process context. Unlike other forms of
338 -is- permissible to block in an SRCU read-side critical section 339 RCU, it -is- permissible to block in an SRCU read-side critical
339 (demarked by srcu_read_lock() and srcu_read_unlock()), hence the 340 section (demarked by srcu_read_lock() and srcu_read_unlock()),
340 "SRCU": "sleepable RCU". Please note that if you don't need 341 hence the "SRCU": "sleepable RCU". Please note that if you
341 to sleep in read-side critical sections, you should be using 342 don't need to sleep in read-side critical sections, you should be
342 RCU rather than SRCU, because RCU is almost always faster and 343 using RCU rather than SRCU, because RCU is almost always faster
343 easier to use than is SRCU. 344 and easier to use than is SRCU.
344 345
345 If you need to enter your read-side critical section in a 346 If you need to enter your read-side critical section in a
346 hardirq or exception handler, and then exit that same read-side 347 hardirq or exception handler, and then exit that same read-side
@@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome!
353 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 354 cleanup_srcu_struct(). These are passed a "struct srcu_struct"
354 that defines the scope of a given SRCU domain. Once initialized, 355 that defines the scope of a given SRCU domain. Once initialized,
355 the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() 356 the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
356 synchronize_srcu(), and synchronize_srcu_expedited(). A given 357 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
357 synchronize_srcu() waits only for SRCU read-side critical 358 A given synchronize_srcu() waits only for SRCU read-side critical
358 sections governed by srcu_read_lock() and srcu_read_unlock() 359 sections governed by srcu_read_lock() and srcu_read_unlock()
359 calls that have been passed the same srcu_struct. This property 360 calls that have been passed the same srcu_struct. This property
360 is what makes sleeping read-side critical sections tolerable -- 361 is what makes sleeping read-side critical sections tolerable --
@@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome!
374 requiring SRCU's read-side deadlock immunity or low read-side 375 requiring SRCU's read-side deadlock immunity or low read-side
375 realtime latency. 376 realtime latency.
376 377
377 Note that, rcu_assign_pointer() relates to SRCU just as they do 378 Note that, rcu_assign_pointer() relates to SRCU just as it does
378 to other forms of RCU. 379 to other forms of RCU.
379 380
38015. The whole point of call_rcu(), synchronize_rcu(), and friends 38115. The whole point of call_rcu(), synchronize_rcu(), and friends
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index e439a0edee22..38428c125135 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows:
79 2. Execute rcu_barrier(). 79 2. Execute rcu_barrier().
80 3. Allow the module to be unloaded. 80 3. Allow the module to be unloaded.
81 81
82Quick Quiz #1: Why is there no srcu_barrier()?
83
84The rcutorture module makes use of rcu_barrier in its exit function 82The rcutorture module makes use of rcu_barrier in its exit function
85as follows: 83as follows:
86 84
@@ -162,7 +160,7 @@ for any pre-existing callbacks to complete.
162Then lines 55-62 print status and do operation-specific cleanup, and 160Then lines 55-62 print status and do operation-specific cleanup, and
163then return, permitting the module-unload operation to be completed. 161then return, permitting the module-unload operation to be completed.
164 162
165Quick Quiz #2: Is there any other situation where rcu_barrier() might 163Quick Quiz #1: Is there any other situation where rcu_barrier() might
166 be required? 164 be required?
167 165
168Your module might have additional complications. For example, if your 166Your module might have additional complications. For example, if your
@@ -242,7 +240,7 @@ reaches zero, as follows:
242 4 complete(&rcu_barrier_completion); 240 4 complete(&rcu_barrier_completion);
243 5 } 241 5 }
244 242
245Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes 243Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
246 immediately (thus incrementing rcu_barrier_cpu_count to the 244 immediately (thus incrementing rcu_barrier_cpu_count to the
247 value one), but the other CPU's rcu_barrier_func() invocations 245 value one), but the other CPU's rcu_barrier_func() invocations
248 are delayed for a full grace period? Couldn't this result in 246 are delayed for a full grace period? Couldn't this result in
@@ -259,12 +257,7 @@ so that your module may be safely unloaded.
259 257
260Answers to Quick Quizzes 258Answers to Quick Quizzes
261 259
262Quick Quiz #1: Why is there no srcu_barrier()? 260Quick Quiz #1: Is there any other situation where rcu_barrier() might
263
264Answer: Since there is no call_srcu(), there can be no outstanding SRCU
265 callbacks. Therefore, there is no need to wait for them.
266
267Quick Quiz #2: Is there any other situation where rcu_barrier() might
268 be required? 261 be required?
269 262
270Answer: Interestingly enough, rcu_barrier() was not originally 263Answer: Interestingly enough, rcu_barrier() was not originally
@@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally
278 implementing rcutorture, and found that rcu_barrier() solves 271 implementing rcutorture, and found that rcu_barrier() solves
279 this problem as well. 272 this problem as well.
280 273
281Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes 274Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
282 immediately (thus incrementing rcu_barrier_cpu_count to the 275 immediately (thus incrementing rcu_barrier_cpu_count to the
283 value one), but the other CPU's rcu_barrier_func() invocations 276 value one), but the other CPU's rcu_barrier_func() invocations
284 are delayed for a full grace period? Couldn't this result in 277 are delayed for a full grace period? Couldn't this result in
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 4ddf3913fd8c..7dce8a17eac2 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows:
174 and synchronize_rcu_bh_expedited(). 174 and synchronize_rcu_bh_expedited().
175 175
176 "srcu": srcu_read_lock(), srcu_read_unlock() and 176 "srcu": srcu_read_lock(), srcu_read_unlock() and
177 call_srcu().
178
179 "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
177 synchronize_srcu(). 180 synchronize_srcu().
178 181
179 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and 182 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
180 synchronize_srcu_expedited(). 183 synchronize_srcu_expedited().
181 184
185 "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(),
186 and call_srcu().
187
188 "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(),
189 and synchronize_srcu().
190
182 "sched": preempt_disable(), preempt_enable(), and 191 "sched": preempt_disable(), preempt_enable(), and
183 call_rcu_sched(). 192 call_rcu_sched().
184 193
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6bbe8dcdc3da..69ee188515e7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier
833 833
834SRCU: Critical sections Grace period Barrier 834SRCU: Critical sections Grace period Barrier
835 835
836 srcu_read_lock synchronize_srcu N/A 836 srcu_read_lock synchronize_srcu srcu_barrier
837 srcu_read_unlock synchronize_srcu_expedited 837 srcu_read_unlock call_srcu
838 srcu_read_lock_raw 838 srcu_read_lock_raw synchronize_srcu_expedited
839 srcu_read_unlock_raw 839 srcu_read_unlock_raw
840 srcu_dereference 840 srcu_dereference
841 841
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a92c5ebf373e..12783fa833c3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2367 Set maximum number of finished RCU callbacks to process 2367 Set maximum number of finished RCU callbacks to process
2368 in one batch. 2368 in one batch.
2369 2369
2370 rcutree.fanout_leaf= [KNL,BOOT]
2371 Increase the number of CPUs assigned to each
2372 leaf rcu_node structure. Useful for very large
2373 systems.
2374
2370 rcutree.qhimark= [KNL,BOOT] 2375 rcutree.qhimark= [KNL,BOOT]
2371 Set threshold of queued 2376 Set threshold of queued
2372 RCU callbacks over which batch limiting is disabled. 2377 RCU callbacks over which batch limiting is disabled.
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 88e466b159dc..43b39d61b538 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -705,7 +705,6 @@ static void stack_proc(void *arg)
705 struct task_struct *from = current, *to = arg; 705 struct task_struct *from = current, *to = arg;
706 706
707 to->thread.saved_task = from; 707 to->thread.saved_task = from;
708 rcu_switch_from(from);
709 switch_to(from, to, from); 708 switch_to(from, to, from);
710} 709}
711 710
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e65eff6af3b..8a7476186990 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,8 +168,8 @@ extern struct cred init_cred;
168 .children = LIST_HEAD_INIT(tsk.children), \ 168 .children = LIST_HEAD_INIT(tsk.children), \
169 .sibling = LIST_HEAD_INIT(tsk.sibling), \ 169 .sibling = LIST_HEAD_INIT(tsk.sibling), \
170 .group_leader = &tsk, \ 170 .group_leader = &tsk, \
171 RCU_INIT_POINTER(.real_cred, &init_cred), \ 171 RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
172 RCU_INIT_POINTER(.cred, &init_cred), \ 172 RCU_POINTER_INITIALIZER(cred, &init_cred), \
173 .comm = INIT_TASK_COMM, \ 173 .comm = INIT_TASK_COMM, \
174 .thread = INIT_THREAD, \ 174 .thread = INIT_THREAD, \
175 .fs = &init_fs, \ 175 .fs = &init_fs, \
diff --git a/include/linux/key.h b/include/linux/key.h
index 4cd22ed627ef..cef3b315ba7c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)
303 rwsem_is_locked(&((struct key *)(KEY))->sem))) 303 rwsem_is_locked(&((struct key *)(KEY))->sem)))
304 304
305#define rcu_assign_keypointer(KEY, PAYLOAD) \ 305#define rcu_assign_keypointer(KEY, PAYLOAD) \
306 (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) 306do { \
307 rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \
308} while (0)
307 309
308#ifdef CONFIG_SYSCTL 310#ifdef CONFIG_SYSCTL
309extern ctl_table key_sysctls[]; 311extern ctl_table key_sysctls[];
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 26d1a47591f1..115ead2b5155 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -147,6 +147,7 @@ extern void synchronize_sched(void);
147 147
148extern void __rcu_read_lock(void); 148extern void __rcu_read_lock(void);
149extern void __rcu_read_unlock(void); 149extern void __rcu_read_unlock(void);
150extern void rcu_read_unlock_special(struct task_struct *t);
150void synchronize_rcu(void); 151void synchronize_rcu(void);
151 152
152/* 153/*
@@ -184,7 +185,6 @@ static inline int rcu_preempt_depth(void)
184/* Internal to kernel */ 185/* Internal to kernel */
185extern void rcu_sched_qs(int cpu); 186extern void rcu_sched_qs(int cpu);
186extern void rcu_bh_qs(int cpu); 187extern void rcu_bh_qs(int cpu);
187extern void rcu_preempt_note_context_switch(void);
188extern void rcu_check_callbacks(int cpu, int user); 188extern void rcu_check_callbacks(int cpu, int user);
189struct notifier_block; 189struct notifier_block;
190extern void rcu_idle_enter(void); 190extern void rcu_idle_enter(void);
@@ -256,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
256} 256}
257#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 257#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
258 258
259#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
260extern int rcu_is_cpu_idle(void);
261#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
262
259#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 263#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
260bool rcu_lockdep_current_cpu_online(void); 264bool rcu_lockdep_current_cpu_online(void);
261#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 265#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -267,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)
267 271
268#ifdef CONFIG_DEBUG_LOCK_ALLOC 272#ifdef CONFIG_DEBUG_LOCK_ALLOC
269 273
270#ifdef CONFIG_PROVE_RCU
271extern int rcu_is_cpu_idle(void);
272#else /* !CONFIG_PROVE_RCU */
273static inline int rcu_is_cpu_idle(void)
274{
275 return 0;
276}
277#endif /* else !CONFIG_PROVE_RCU */
278
279static inline void rcu_lock_acquire(struct lockdep_map *map) 274static inline void rcu_lock_acquire(struct lockdep_map *map)
280{ 275{
281 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); 276 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
@@ -432,8 +427,7 @@ extern int rcu_my_thread_group_empty(void);
432static inline void rcu_preempt_sleep_check(void) 427static inline void rcu_preempt_sleep_check(void)
433{ 428{
434 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 429 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
435 "Illegal context switch in RCU read-side " 430 "Illegal context switch in RCU read-side critical section");
436 "critical section");
437} 431}
438#else /* #ifdef CONFIG_PROVE_RCU */ 432#else /* #ifdef CONFIG_PROVE_RCU */
439static inline void rcu_preempt_sleep_check(void) 433static inline void rcu_preempt_sleep_check(void)
@@ -514,10 +508,10 @@ static inline void rcu_preempt_sleep_check(void)
514 (_________p1); \ 508 (_________p1); \
515 }) 509 })
516#define __rcu_assign_pointer(p, v, space) \ 510#define __rcu_assign_pointer(p, v, space) \
517 ({ \ 511 do { \
518 smp_wmb(); \ 512 smp_wmb(); \
519 (p) = (typeof(*v) __force space *)(v); \ 513 (p) = (typeof(*v) __force space *)(v); \
520 }) 514 } while (0)
521 515
522 516
523/** 517/**
@@ -852,7 +846,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
852 * 846 *
853 * Assigns the specified value to the specified RCU-protected 847 * Assigns the specified value to the specified RCU-protected
854 * pointer, ensuring that any concurrent RCU readers will see 848 * pointer, ensuring that any concurrent RCU readers will see
855 * any prior initialization. Returns the value assigned. 849 * any prior initialization.
856 * 850 *
857 * Inserts memory barriers on architectures that require them 851 * Inserts memory barriers on architectures that require them
858 * (which is most of them), and also prevents the compiler from 852 * (which is most of them), and also prevents the compiler from
@@ -904,25 +898,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
904 * the reader-accessible portions of the linked structure. 898 * the reader-accessible portions of the linked structure.
905 */ 899 */
906#define RCU_INIT_POINTER(p, v) \ 900#define RCU_INIT_POINTER(p, v) \
907 p = (typeof(*v) __force __rcu *)(v) 901 do { \
908 902 p = (typeof(*v) __force __rcu *)(v); \
909static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) 903 } while (0)
910{
911 return offset < 4096;
912}
913
914static __always_inline
915void __kfree_rcu(struct rcu_head *head, unsigned long offset)
916{
917 typedef void (*rcu_callback)(struct rcu_head *);
918
919 BUILD_BUG_ON(!__builtin_constant_p(offset));
920
921 /* See the kfree_rcu() header comment. */
922 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
923 904
924 kfree_call_rcu(head, (rcu_callback)offset); 905/**
925} 906 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
907 *
908 * GCC-style initialization for an RCU-protected pointer in a structure field.
909 */
910#define RCU_POINTER_INITIALIZER(p, v) \
911 .p = (typeof(*v) __force __rcu *)(v)
926 912
927/* 913/*
928 * Does the specified offset indicate that the corresponding rcu_head 914 * Does the specified offset indicate that the corresponding rcu_head
@@ -936,7 +922,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
936#define __kfree_rcu(head, offset) \ 922#define __kfree_rcu(head, offset) \
937 do { \ 923 do { \
938 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ 924 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
939 call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ 925 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
940 } while (0) 926 } while (0)
941 927
942/** 928/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 854dc4c5c271..4e56a9c69a35 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,6 +87,10 @@ static inline void kfree_call_rcu(struct rcu_head *head,
87 87
88#ifdef CONFIG_TINY_RCU 88#ifdef CONFIG_TINY_RCU
89 89
90static inline void rcu_preempt_note_context_switch(void)
91{
92}
93
90static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 94static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
91{ 95{
92 *delta_jiffies = ULONG_MAX; 96 *delta_jiffies = ULONG_MAX;
@@ -95,6 +99,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
95 99
96#else /* #ifdef CONFIG_TINY_RCU */ 100#else /* #ifdef CONFIG_TINY_RCU */
97 101
102void rcu_preempt_note_context_switch(void);
98int rcu_preempt_needs_cpu(void); 103int rcu_preempt_needs_cpu(void);
99 104
100static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 105static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
@@ -108,6 +113,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
108static inline void rcu_note_context_switch(int cpu) 113static inline void rcu_note_context_switch(int cpu)
109{ 114{
110 rcu_sched_qs(cpu); 115 rcu_sched_qs(cpu);
116 rcu_preempt_note_context_switch();
111} 117}
112 118
113/* 119/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4059c0f33f07..06a4c5f4f55c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1871,22 +1871,12 @@ static inline void rcu_copy_process(struct task_struct *p)
1871 INIT_LIST_HEAD(&p->rcu_node_entry); 1871 INIT_LIST_HEAD(&p->rcu_node_entry);
1872} 1872}
1873 1873
1874static inline void rcu_switch_from(struct task_struct *prev)
1875{
1876 if (prev->rcu_read_lock_nesting != 0)
1877 rcu_preempt_note_context_switch();
1878}
1879
1880#else 1874#else
1881 1875
1882static inline void rcu_copy_process(struct task_struct *p) 1876static inline void rcu_copy_process(struct task_struct *p)
1883{ 1877{
1884} 1878}
1885 1879
1886static inline void rcu_switch_from(struct task_struct *prev)
1887{
1888}
1889
1890#endif 1880#endif
1891 1881
1892#ifdef CONFIG_SMP 1882#ifdef CONFIG_SMP
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d274734b2aa4..5bde94d8585b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read,
541 __entry->rcutorturename, __entry->rhp) 541 __entry->rcutorturename, __entry->rhp)
542); 542);
543 543
544/*
545 * Tracepoint for _rcu_barrier() execution. The string "s" describes
546 * the _rcu_barrier phase:
547 * "Begin": rcu_barrier_callback() started.
548 * "Check": rcu_barrier_callback() checking for piggybacking.
549 * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
550 * "Inc1": rcu_barrier_callback() piggyback check counter incremented.
551 * "Offline": rcu_barrier_callback() found offline CPU
552 * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
553 * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
554 * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
555 * "CB": An rcu_barrier_callback() invoked a callback, not the last.
556 * "LastCB": An rcu_barrier_callback() invoked the last callback.
557 * "Inc2": rcu_barrier_callback() piggyback check counter incremented.
558 * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
559 * is the count of remaining callbacks, and "done" is the piggybacking count.
560 */
561TRACE_EVENT(rcu_barrier,
562
563 TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done),
564
565 TP_ARGS(rcuname, s, cpu, cnt, done),
566
567 TP_STRUCT__entry(
568 __field(char *, rcuname)
569 __field(char *, s)
570 __field(int, cpu)
571 __field(int, cnt)
572 __field(unsigned long, done)
573 ),
574
575 TP_fast_assign(
576 __entry->rcuname = rcuname;
577 __entry->s = s;
578 __entry->cpu = cpu;
579 __entry->cnt = cnt;
580 __entry->done = done;
581 ),
582
583 TP_printk("%s %s cpu %d remaining %d # %lu",
584 __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt,
585 __entry->done)
586);
587
544#else /* #ifdef CONFIG_RCU_TRACE */ 588#else /* #ifdef CONFIG_RCU_TRACE */
545 589
546#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) 590#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
@@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read,
564#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ 608#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
565 do { } while (0) 609 do { } while (0)
566#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 610#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
611#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
567 612
568#endif /* #else #ifdef CONFIG_RCU_TRACE */ 613#endif /* #else #ifdef CONFIG_RCU_TRACE */
569 614
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e9..4e6a61b15e86 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -54,6 +54,50 @@
54#ifdef CONFIG_PREEMPT_RCU 54#ifdef CONFIG_PREEMPT_RCU
55 55
56/* 56/*
57 * Preemptible RCU implementation for rcu_read_lock().
58 * Just increment ->rcu_read_lock_nesting, shared state will be updated
59 * if we block.
60 */
61void __rcu_read_lock(void)
62{
63 current->rcu_read_lock_nesting++;
64 barrier(); /* critical section after entry code. */
65}
66EXPORT_SYMBOL_GPL(__rcu_read_lock);
67
68/*
69 * Preemptible RCU implementation for rcu_read_unlock().
70 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
71 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
72 * invoke rcu_read_unlock_special() to clean up after a context switch
73 * in an RCU read-side critical section and other special cases.
74 */
75void __rcu_read_unlock(void)
76{
77 struct task_struct *t = current;
78
79 if (t->rcu_read_lock_nesting != 1) {
80 --t->rcu_read_lock_nesting;
81 } else {
82 barrier(); /* critical section before exit code. */
83 t->rcu_read_lock_nesting = INT_MIN;
84 barrier(); /* assign before ->rcu_read_unlock_special load */
85 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
86 rcu_read_unlock_special(t);
87 barrier(); /* ->rcu_read_unlock_special load before assign */
88 t->rcu_read_lock_nesting = 0;
89 }
90#ifdef CONFIG_PROVE_LOCKING
91 {
92 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
93
94 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
95 }
96#endif /* #ifdef CONFIG_PROVE_LOCKING */
97}
98EXPORT_SYMBOL_GPL(__rcu_read_unlock);
99
100/*
57 * Check for a task exiting while in a preemptible-RCU read-side 101 * Check for a task exiting while in a preemptible-RCU read-side
58 * critical section, clean up if so. No need to issue warnings, 102 * critical section, clean up if so. No need to issue warnings,
59 * as debug_check_no_locks_held() already does this if lockdep 103 * as debug_check_no_locks_held() already does this if lockdep
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d2..547b1fe5b052 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
172 local_irq_restore(flags); 172 local_irq_restore(flags);
173} 173}
174 174
175#ifdef CONFIG_PROVE_RCU 175#ifdef CONFIG_DEBUG_LOCK_ALLOC
176 176
177/* 177/*
178 * Test whether RCU thinks that the current CPU is idle. 178 * Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
183} 183}
184EXPORT_SYMBOL(rcu_is_cpu_idle); 184EXPORT_SYMBOL(rcu_is_cpu_idle);
185 185
186#endif /* #ifdef CONFIG_PROVE_RCU */ 186#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
187 187
188/* 188/*
189 * Test whether the current CPU was interrupted from idle. Nested 189 * Test whether the current CPU was interrupted from idle. Nested
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..918fd1e8509c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
132 RCU_TRACE(.rcb.name = "rcu_preempt") 132 RCU_TRACE(.rcb.name = "rcu_preempt")
133}; 133};
134 134
135static void rcu_read_unlock_special(struct task_struct *t);
136static int rcu_preempted_readers_exp(void); 135static int rcu_preempted_readers_exp(void);
137static void rcu_report_exp_done(void); 136static void rcu_report_exp_done(void);
138 137
@@ -351,8 +350,9 @@ static int rcu_initiate_boost(void)
351 rcu_preempt_ctrlblk.boost_tasks = 350 rcu_preempt_ctrlblk.boost_tasks =
352 rcu_preempt_ctrlblk.gp_tasks; 351 rcu_preempt_ctrlblk.gp_tasks;
353 invoke_rcu_callbacks(); 352 invoke_rcu_callbacks();
354 } else 353 } else {
355 RCU_TRACE(rcu_initiate_boost_trace()); 354 RCU_TRACE(rcu_initiate_boost_trace());
355 }
356 return 1; 356 return 1;
357} 357}
358 358
@@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void)
527} 527}
528 528
529/* 529/*
530 * Tiny-preemptible RCU implementation for rcu_read_lock().
531 * Just increment ->rcu_read_lock_nesting, shared state will be updated
532 * if we block.
533 */
534void __rcu_read_lock(void)
535{
536 current->rcu_read_lock_nesting++;
537 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
538}
539EXPORT_SYMBOL_GPL(__rcu_read_lock);
540
541/*
542 * Handle special cases during rcu_read_unlock(), such as needing to 530 * Handle special cases during rcu_read_unlock(), such as needing to
543 * notify RCU core processing or task having blocked during the RCU 531 * notify RCU core processing or task having blocked during the RCU
544 * read-side critical section. 532 * read-side critical section.
545 */ 533 */
546static noinline void rcu_read_unlock_special(struct task_struct *t) 534void rcu_read_unlock_special(struct task_struct *t)
547{ 535{
548 int empty; 536 int empty;
549 int empty_exp; 537 int empty_exp;
@@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
627} 615}
628 616
629/* 617/*
630 * Tiny-preemptible RCU implementation for rcu_read_unlock().
631 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
632 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
633 * invoke rcu_read_unlock_special() to clean up after a context switch
634 * in an RCU read-side critical section and other special cases.
635 */
636void __rcu_read_unlock(void)
637{
638 struct task_struct *t = current;
639
640 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
641 if (t->rcu_read_lock_nesting != 1)
642 --t->rcu_read_lock_nesting;
643 else {
644 t->rcu_read_lock_nesting = INT_MIN;
645 barrier(); /* assign before ->rcu_read_unlock_special load */
646 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
647 rcu_read_unlock_special(t);
648 barrier(); /* ->rcu_read_unlock_special load before assign */
649 t->rcu_read_lock_nesting = 0;
650 }
651#ifdef CONFIG_PROVE_LOCKING
652 {
653 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
654
655 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
656 }
657#endif /* #ifdef CONFIG_PROVE_LOCKING */
658}
659EXPORT_SYMBOL_GPL(__rcu_read_unlock);
660
661/*
662 * Check for a quiescent state from the current CPU. When a task blocks, 618 * Check for a quiescent state from the current CPU. When a task blocks,
663 * the task is recorded in the rcu_preempt_ctrlblk structure, which is 619 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
664 * checked elsewhere. This is called from the scheduling-clock interrupt. 620 * checked elsewhere. This is called from the scheduling-clock interrupt.
@@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void)
823 rpcp->exp_tasks = NULL; 779 rpcp->exp_tasks = NULL;
824 780
825 /* Wait for tail of ->blkd_tasks list to drain. */ 781 /* Wait for tail of ->blkd_tasks list to drain. */
826 if (!rcu_preempted_readers_exp()) 782 if (!rcu_preempted_readers_exp()) {
827 local_irq_restore(flags); 783 local_irq_restore(flags);
828 else { 784 } else {
829 rcu_initiate_boost(); 785 rcu_initiate_boost();
830 local_irq_restore(flags); 786 local_irq_restore(flags);
831 wait_event(sync_rcu_preempt_exp_wq, 787 wait_event(sync_rcu_preempt_exp_wq,
@@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
846 */ 802 */
847int rcu_preempt_needs_cpu(void) 803int rcu_preempt_needs_cpu(void)
848{ 804{
849 if (!rcu_preempt_running_reader())
850 rcu_preempt_cpu_qs();
851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; 805 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
852} 806}
853 807
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e66b34ab7555..25b15033c61f 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -49,8 +49,7 @@
49#include <asm/byteorder.h> 49#include <asm/byteorder.h>
50 50
51MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
53 "Josh Triplett <josh@freedesktop.org>");
54 53
55static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ 54static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
56static int nfakewriters = 4; /* # fake writer threads */ 55static int nfakewriters = 4; /* # fake writer threads */
@@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */
206DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 205DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
207 /* and boost task create/destroy. */ 206 /* and boost task create/destroy. */
208static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ 207static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
208static bool barrier_phase; /* Test phase. */
209static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ 209static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
210static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ 210static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
211static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); 211static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
@@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p)
407 if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { 407 if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
408 rp->rtort_mbtest = 0; 408 rp->rtort_mbtest = 0;
409 rcu_torture_free(rp); 409 rcu_torture_free(rp);
410 } else 410 } else {
411 cur_ops->deferred_free(rp); 411 cur_ops->deferred_free(rp);
412 }
412} 413}
413 414
414static int rcu_no_completed(void) 415static int rcu_no_completed(void)
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)
635 synchronize_srcu(&srcu_ctl); 636 synchronize_srcu(&srcu_ctl);
636} 637}
637 638
639static void srcu_torture_call(struct rcu_head *head,
640 void (*func)(struct rcu_head *head))
641{
642 call_srcu(&srcu_ctl, head, func);
643}
644
645static void srcu_torture_barrier(void)
646{
647 srcu_barrier(&srcu_ctl);
648}
649
638static int srcu_torture_stats(char *page) 650static int srcu_torture_stats(char *page)
639{ 651{
640 int cnt = 0; 652 int cnt = 0;
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {
661 .completed = srcu_torture_completed, 673 .completed = srcu_torture_completed,
662 .deferred_free = srcu_torture_deferred_free, 674 .deferred_free = srcu_torture_deferred_free,
663 .sync = srcu_torture_synchronize, 675 .sync = srcu_torture_synchronize,
664 .call = NULL, 676 .call = srcu_torture_call,
665 .cb_barrier = NULL, 677 .cb_barrier = srcu_torture_barrier,
666 .stats = srcu_torture_stats, 678 .stats = srcu_torture_stats,
667 .name = "srcu" 679 .name = "srcu"
668}; 680};
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
1013 do { 1025 do {
1014 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 1026 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
1015 udelay(rcu_random(&rand) & 0x3ff); 1027 udelay(rcu_random(&rand) & 0x3ff);
1016 cur_ops->sync(); 1028 if (cur_ops->cb_barrier != NULL &&
1029 rcu_random(&rand) % (nfakewriters * 8) == 0)
1030 cur_ops->cb_barrier();
1031 else
1032 cur_ops->sync();
1017 rcu_stutter_wait("rcu_torture_fakewriter"); 1033 rcu_stutter_wait("rcu_torture_fakewriter");
1018 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 1034 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1019 1035
@@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page)
1183 } 1199 }
1184 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 1200 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
1185 cnt += sprintf(&page[cnt], 1201 cnt += sprintf(&page[cnt],
1186 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " 1202 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
1187 "rtmbe: %d rtbke: %ld rtbre: %ld "
1188 "rtbf: %ld rtb: %ld nt: %ld "
1189 "onoff: %ld/%ld:%ld/%ld "
1190 "barrier: %ld/%ld:%ld",
1191 rcu_torture_current, 1203 rcu_torture_current,
1192 rcu_torture_current_version, 1204 rcu_torture_current_version,
1193 list_empty(&rcu_torture_freelist), 1205 list_empty(&rcu_torture_freelist),
1194 atomic_read(&n_rcu_torture_alloc), 1206 atomic_read(&n_rcu_torture_alloc),
1195 atomic_read(&n_rcu_torture_alloc_fail), 1207 atomic_read(&n_rcu_torture_alloc_fail),
1196 atomic_read(&n_rcu_torture_free), 1208 atomic_read(&n_rcu_torture_free));
1209 cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
1197 atomic_read(&n_rcu_torture_mberror), 1210 atomic_read(&n_rcu_torture_mberror),
1198 n_rcu_torture_boost_ktrerror, 1211 n_rcu_torture_boost_ktrerror,
1199 n_rcu_torture_boost_rterror, 1212 n_rcu_torture_boost_rterror);
1213 cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
1200 n_rcu_torture_boost_failure, 1214 n_rcu_torture_boost_failure,
1201 n_rcu_torture_boosts, 1215 n_rcu_torture_boosts,
1202 n_rcu_torture_timers, 1216 n_rcu_torture_timers);
1217 cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
1203 n_online_successes, 1218 n_online_successes,
1204 n_online_attempts, 1219 n_online_attempts,
1205 n_offline_successes, 1220 n_offline_successes,
1206 n_offline_attempts, 1221 n_offline_attempts);
1222 cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
1207 n_barrier_successes, 1223 n_barrier_successes,
1208 n_barrier_attempts, 1224 n_barrier_attempts,
1209 n_rcu_torture_barrier_error); 1225 n_rcu_torture_barrier_error);
@@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg)
1445 delta = shutdown_time - jiffies_snap; 1461 delta = shutdown_time - jiffies_snap;
1446 if (verbose) 1462 if (verbose)
1447 printk(KERN_ALERT "%s" TORTURE_FLAG 1463 printk(KERN_ALERT "%s" TORTURE_FLAG
1448 "rcu_torture_shutdown task: %lu " 1464 "rcu_torture_shutdown task: %lu jiffies remaining\n",
1449 "jiffies remaining\n",
1450 torture_type, delta); 1465 torture_type, delta);
1451 schedule_timeout_interruptible(delta); 1466 schedule_timeout_interruptible(delta);
1452 jiffies_snap = ACCESS_ONCE(jiffies); 1467 jiffies_snap = ACCESS_ONCE(jiffies);
@@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg)
1498 if (cpu_down(cpu) == 0) { 1513 if (cpu_down(cpu) == 0) {
1499 if (verbose) 1514 if (verbose)
1500 printk(KERN_ALERT "%s" TORTURE_FLAG 1515 printk(KERN_ALERT "%s" TORTURE_FLAG
1501 "rcu_torture_onoff task: " 1516 "rcu_torture_onoff task: offlined %d\n",
1502 "offlined %d\n",
1503 torture_type, cpu); 1517 torture_type, cpu);
1504 n_offline_successes++; 1518 n_offline_successes++;
1505 } 1519 }
@@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg)
1512 if (cpu_up(cpu) == 0) { 1526 if (cpu_up(cpu) == 0) {
1513 if (verbose) 1527 if (verbose)
1514 printk(KERN_ALERT "%s" TORTURE_FLAG 1528 printk(KERN_ALERT "%s" TORTURE_FLAG
1515 "rcu_torture_onoff task: " 1529 "rcu_torture_onoff task: onlined %d\n",
1516 "onlined %d\n",
1517 torture_type, cpu); 1530 torture_type, cpu);
1518 n_online_successes++; 1531 n_online_successes++;
1519 } 1532 }
@@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1631static int rcu_torture_barrier_cbs(void *arg) 1644static int rcu_torture_barrier_cbs(void *arg)
1632{ 1645{
1633 long myid = (long)arg; 1646 long myid = (long)arg;
1647 bool lastphase = 0;
1634 struct rcu_head rcu; 1648 struct rcu_head rcu;
1635 1649
1636 init_rcu_head_on_stack(&rcu); 1650 init_rcu_head_on_stack(&rcu);
@@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg)
1638 set_user_nice(current, 19); 1652 set_user_nice(current, 19);
1639 do { 1653 do {
1640 wait_event(barrier_cbs_wq[myid], 1654 wait_event(barrier_cbs_wq[myid],
1641 atomic_read(&barrier_cbs_count) == n_barrier_cbs || 1655 barrier_phase != lastphase ||
1642 kthread_should_stop() || 1656 kthread_should_stop() ||
1643 fullstop != FULLSTOP_DONTSTOP); 1657 fullstop != FULLSTOP_DONTSTOP);
1658 lastphase = barrier_phase;
1659 smp_mb(); /* ensure barrier_phase load before ->call(). */
1644 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) 1660 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1645 break; 1661 break;
1646 cur_ops->call(&rcu, rcu_torture_barrier_cbf); 1662 cur_ops->call(&rcu, rcu_torture_barrier_cbf);
@@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg)
1665 do { 1681 do {
1666 atomic_set(&barrier_cbs_invoked, 0); 1682 atomic_set(&barrier_cbs_invoked, 0);
1667 atomic_set(&barrier_cbs_count, n_barrier_cbs); 1683 atomic_set(&barrier_cbs_count, n_barrier_cbs);
1668 /* wake_up() path contains the required barriers. */ 1684 smp_mb(); /* Ensure barrier_phase after prior assignments. */
1685 barrier_phase = !barrier_phase;
1669 for (i = 0; i < n_barrier_cbs; i++) 1686 for (i = 0; i < n_barrier_cbs; i++)
1670 wake_up(&barrier_cbs_wq[i]); 1687 wake_up(&barrier_cbs_wq[i]);
1671 wait_event(barrier_wq, 1688 wait_event(barrier_wq,
@@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg)
1684 schedule_timeout_interruptible(HZ / 10); 1701 schedule_timeout_interruptible(HZ / 10);
1685 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 1702 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1686 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); 1703 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
1687 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); 1704 rcutorture_shutdown_absorb("rcu_torture_barrier");
1688 while (!kthread_should_stop()) 1705 while (!kthread_should_stop())
1689 schedule_timeout_interruptible(1); 1706 schedule_timeout_interruptible(1);
1690 return 0; 1707 return 0;
@@ -1908,8 +1925,8 @@ rcu_torture_init(void)
1908 static struct rcu_torture_ops *torture_ops[] = 1925 static struct rcu_torture_ops *torture_ops[] =
1909 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1926 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1910 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1927 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1911 &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, 1928 &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
1912 &srcu_raw_sync_ops, &srcu_expedited_ops, 1929 &srcu_raw_ops, &srcu_raw_sync_ops,
1913 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1930 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1914 1931
1915 mutex_lock(&fullstop_mutex); 1932 mutex_lock(&fullstop_mutex);
@@ -1931,8 +1948,7 @@ rcu_torture_init(void)
1931 return -EINVAL; 1948 return -EINVAL;
1932 } 1949 }
1933 if (cur_ops->fqs == NULL && fqs_duration != 0) { 1950 if (cur_ops->fqs == NULL && fqs_duration != 0) {
1934 printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " 1951 printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
1935 "fqs_duration, fqs disabled.\n");
1936 fqs_duration = 0; 1952 fqs_duration = 0;
1937 } 1953 }
1938 if (cur_ops->init) 1954 if (cur_ops->init)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 38ecdda3f55f..f280e542e3e9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,36 +60,44 @@
60 60
61/* Data structures. */ 61/* Data structures. */
62 62
63static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 63static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
64 64
65#define RCU_STATE_INITIALIZER(structname) { \ 65#define RCU_STATE_INITIALIZER(sname, cr) { \
66 .level = { &structname##_state.node[0] }, \ 66 .level = { &sname##_state.node[0] }, \
67 .levelcnt = { \ 67 .call = cr, \
68 NUM_RCU_LVL_0, /* root of hierarchy. */ \
69 NUM_RCU_LVL_1, \
70 NUM_RCU_LVL_2, \
71 NUM_RCU_LVL_3, \
72 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
73 }, \
74 .fqs_state = RCU_GP_IDLE, \ 68 .fqs_state = RCU_GP_IDLE, \
75 .gpnum = -300, \ 69 .gpnum = -300, \
76 .completed = -300, \ 70 .completed = -300, \
77 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 71 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
78 .orphan_nxttail = &structname##_state.orphan_nxtlist, \ 72 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
79 .orphan_donetail = &structname##_state.orphan_donelist, \ 73 .orphan_donetail = &sname##_state.orphan_donelist, \
80 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ 74 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
81 .n_force_qs = 0, \ 75 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
82 .n_force_qs_ngp = 0, \ 76 .name = #sname, \
83 .name = #structname, \
84} 77}
85 78
86struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); 79struct rcu_state rcu_sched_state =
80 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
87DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
88 82
89struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); 83struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
90DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 84DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
91 85
92static struct rcu_state *rcu_state; 86static struct rcu_state *rcu_state;
87LIST_HEAD(rcu_struct_flavors);
88
89/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
90static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
91module_param(rcu_fanout_leaf, int, 0);
92int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
93static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
94 NUM_RCU_LVL_0,
95 NUM_RCU_LVL_1,
96 NUM_RCU_LVL_2,
97 NUM_RCU_LVL_3,
98 NUM_RCU_LVL_4,
99};
100int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
93 101
94/* 102/*
95 * The rcu_scheduler_active variable transitions from zero to one just 103 * The rcu_scheduler_active variable transitions from zero to one just
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
147unsigned long rcutorture_testseq; 155unsigned long rcutorture_testseq;
148unsigned long rcutorture_vernum; 156unsigned long rcutorture_vernum;
149 157
150/* State information for rcu_barrier() and friends. */
151
152static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
153static atomic_t rcu_barrier_cpu_count;
154static DEFINE_MUTEX(rcu_barrier_mutex);
155static struct completion rcu_barrier_completion;
156
157/* 158/*
158 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 159 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
159 * permit this function to be invoked without holding the root rcu_node 160 * permit this function to be invoked without holding the root rcu_node
@@ -201,6 +202,7 @@ void rcu_note_context_switch(int cpu)
201{ 202{
202 trace_rcu_utilization("Start context switch"); 203 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 204 rcu_sched_qs(cpu);
205 rcu_preempt_note_context_switch(cpu);
204 trace_rcu_utilization("End context switch"); 206 trace_rcu_utilization("End context switch");
205} 207}
206EXPORT_SYMBOL_GPL(rcu_note_context_switch); 208EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -357,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
357 struct task_struct *idle = idle_task(smp_processor_id()); 359 struct task_struct *idle = idle_task(smp_processor_id());
358 360
359 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 361 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
360 ftrace_dump(DUMP_ALL); 362 ftrace_dump(DUMP_ORIG);
361 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 363 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
362 current->pid, current->comm, 364 current->pid, current->comm,
363 idle->pid, idle->comm); /* must be idle task! */ 365 idle->pid, idle->comm); /* must be idle task! */
@@ -467,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
467 469
468 trace_rcu_dyntick("Error on exit: not idle task", 470 trace_rcu_dyntick("Error on exit: not idle task",
469 oldval, rdtp->dynticks_nesting); 471 oldval, rdtp->dynticks_nesting);
470 ftrace_dump(DUMP_ALL); 472 ftrace_dump(DUMP_ORIG);
471 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 473 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
472 current->pid, current->comm, 474 current->pid, current->comm,
473 idle->pid, idle->comm); /* must be idle task! */ 475 idle->pid, idle->comm); /* must be idle task! */
@@ -584,8 +586,6 @@ void rcu_nmi_exit(void)
584 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 586 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
585} 587}
586 588
587#ifdef CONFIG_PROVE_RCU
588
589/** 589/**
590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
591 * 591 *
@@ -603,7 +603,7 @@ int rcu_is_cpu_idle(void)
603} 603}
604EXPORT_SYMBOL(rcu_is_cpu_idle); 604EXPORT_SYMBOL(rcu_is_cpu_idle);
605 605
606#ifdef CONFIG_HOTPLUG_CPU 606#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
607 607
608/* 608/*
609 * Is the current CPU online? Disable preemption to avoid false positives 609 * Is the current CPU online? Disable preemption to avoid false positives
@@ -644,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
644} 644}
645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
646 646
647#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 647#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
648
649#endif /* #ifdef CONFIG_PROVE_RCU */
650 648
651/** 649/**
652 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 650 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -732,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
732 int cpu; 730 int cpu;
733 long delta; 731 long delta;
734 unsigned long flags; 732 unsigned long flags;
735 int ndetected; 733 int ndetected = 0;
736 struct rcu_node *rnp = rcu_get_root(rsp); 734 struct rcu_node *rnp = rcu_get_root(rsp);
737 735
738 /* Only let one CPU complain about others per time interval. */ 736 /* Only let one CPU complain about others per time interval. */
@@ -773,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
773 */ 771 */
774 rnp = rcu_get_root(rsp); 772 rnp = rcu_get_root(rsp);
775 raw_spin_lock_irqsave(&rnp->lock, flags); 773 raw_spin_lock_irqsave(&rnp->lock, flags);
776 ndetected = rcu_print_task_stall(rnp); 774 ndetected += rcu_print_task_stall(rnp);
777 raw_spin_unlock_irqrestore(&rnp->lock, flags); 775 raw_spin_unlock_irqrestore(&rnp->lock, flags);
778 776
779 print_cpu_stall_info_end(); 777 print_cpu_stall_info_end();
@@ -859,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
859 */ 857 */
860void rcu_cpu_stall_reset(void) 858void rcu_cpu_stall_reset(void)
861{ 859{
862 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; 860 struct rcu_state *rsp;
863 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; 861
864 rcu_preempt_stall_reset(); 862 for_each_rcu_flavor(rsp)
863 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
865} 864}
866 865
867static struct notifier_block rcu_panic_block = { 866static struct notifier_block rcu_panic_block = {
@@ -893,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
893 if (rnp->qsmask & rdp->grpmask) { 892 if (rnp->qsmask & rdp->grpmask) {
894 rdp->qs_pending = 1; 893 rdp->qs_pending = 1;
895 rdp->passed_quiesce = 0; 894 rdp->passed_quiesce = 0;
896 } else 895 } else {
897 rdp->qs_pending = 0; 896 rdp->qs_pending = 0;
897 }
898 zero_cpu_stall_ticks(rdp); 898 zero_cpu_stall_ticks(rdp);
899 } 899 }
900} 900}
@@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
936} 936}
937 937
938/* 938/*
939 * Initialize the specified rcu_data structure's callback list to empty.
940 */
941static void init_callback_list(struct rcu_data *rdp)
942{
943 int i;
944
945 rdp->nxtlist = NULL;
946 for (i = 0; i < RCU_NEXT_SIZE; i++)
947 rdp->nxttail[i] = &rdp->nxtlist;
948}
949
950/*
939 * Advance this CPU's callbacks, but only if the current grace period 951 * Advance this CPU's callbacks, but only if the current grace period
940 * has ended. This may be called only from the CPU to whom the rdp 952 * has ended. This may be called only from the CPU to whom the rdp
941 * belongs. In addition, the corresponding leaf rcu_node structure's 953 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1327,8 +1339,6 @@ static void
1327rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1339rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1328 struct rcu_node *rnp, struct rcu_data *rdp) 1340 struct rcu_node *rnp, struct rcu_data *rdp)
1329{ 1341{
1330 int i;
1331
1332 /* 1342 /*
1333 * Orphan the callbacks. First adjust the counts. This is safe 1343 * Orphan the callbacks. First adjust the counts. This is safe
1334 * because ->onofflock excludes _rcu_barrier()'s adoption of 1344 * because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1339,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1339 rsp->qlen += rdp->qlen; 1349 rsp->qlen += rdp->qlen;
1340 rdp->n_cbs_orphaned += rdp->qlen; 1350 rdp->n_cbs_orphaned += rdp->qlen;
1341 rdp->qlen_lazy = 0; 1351 rdp->qlen_lazy = 0;
1342 rdp->qlen = 0; 1352 ACCESS_ONCE(rdp->qlen) = 0;
1343 } 1353 }
1344 1354
1345 /* 1355 /*
@@ -1368,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1368 } 1378 }
1369 1379
1370 /* Finally, initialize the rcu_data structure's list to empty. */ 1380 /* Finally, initialize the rcu_data structure's list to empty. */
1371 rdp->nxtlist = NULL; 1381 init_callback_list(rdp);
1372 for (i = 0; i < RCU_NEXT_SIZE; i++)
1373 rdp->nxttail[i] = &rdp->nxtlist;
1374} 1382}
1375 1383
1376/* 1384/*
@@ -1504,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1504 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1512 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1505 if (need_report & RCU_OFL_TASKS_EXP_GP) 1513 if (need_report & RCU_OFL_TASKS_EXP_GP)
1506 rcu_report_exp_rnp(rsp, rnp, true); 1514 rcu_report_exp_rnp(rsp, rnp, true);
1515 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1516 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1517 cpu, rdp->qlen, rdp->nxtlist);
1507} 1518}
1508 1519
1509#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1520#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1591,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1591 } 1602 }
1592 smp_mb(); /* List handling before counting for rcu_barrier(). */ 1603 smp_mb(); /* List handling before counting for rcu_barrier(). */
1593 rdp->qlen_lazy -= count_lazy; 1604 rdp->qlen_lazy -= count_lazy;
1594 rdp->qlen -= count; 1605 ACCESS_ONCE(rdp->qlen) -= count;
1595 rdp->n_cbs_invoked += count; 1606 rdp->n_cbs_invoked += count;
1596 1607
1597 /* Reinstate batch limit if we have worked down the excess. */ 1608 /* Reinstate batch limit if we have worked down the excess. */
@@ -1604,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1604 rdp->n_force_qs_snap = rsp->n_force_qs; 1615 rdp->n_force_qs_snap = rsp->n_force_qs;
1605 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1616 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1606 rdp->qlen_last_fqs_check = rdp->qlen; 1617 rdp->qlen_last_fqs_check = rdp->qlen;
1618 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
1607 1619
1608 local_irq_restore(flags); 1620 local_irq_restore(flags);
1609 1621
@@ -1744,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1744 break; /* grace period idle or initializing, ignore. */ 1756 break; /* grace period idle or initializing, ignore. */
1745 1757
1746 case RCU_SAVE_DYNTICK: 1758 case RCU_SAVE_DYNTICK:
1747 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1748 break; /* So gcc recognizes the dead code. */
1749 1759
1750 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1760 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1751 1761
@@ -1787,9 +1797,10 @@ unlock_fqs_ret:
1787 * whom the rdp belongs. 1797 * whom the rdp belongs.
1788 */ 1798 */
1789static void 1799static void
1790__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1800__rcu_process_callbacks(struct rcu_state *rsp)
1791{ 1801{
1792 unsigned long flags; 1802 unsigned long flags;
1803 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1793 1804
1794 WARN_ON_ONCE(rdp->beenonline == 0); 1805 WARN_ON_ONCE(rdp->beenonline == 0);
1795 1806
@@ -1825,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1825 */ 1836 */
1826static void rcu_process_callbacks(struct softirq_action *unused) 1837static void rcu_process_callbacks(struct softirq_action *unused)
1827{ 1838{
1839 struct rcu_state *rsp;
1840
1828 trace_rcu_utilization("Start RCU core"); 1841 trace_rcu_utilization("Start RCU core");
1829 __rcu_process_callbacks(&rcu_sched_state, 1842 for_each_rcu_flavor(rsp)
1830 &__get_cpu_var(rcu_sched_data)); 1843 __rcu_process_callbacks(rsp);
1831 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1832 rcu_preempt_process_callbacks();
1833 trace_rcu_utilization("End RCU core"); 1844 trace_rcu_utilization("End RCU core");
1834} 1845}
1835 1846
@@ -1856,6 +1867,56 @@ static void invoke_rcu_core(void)
1856 raise_softirq(RCU_SOFTIRQ); 1867 raise_softirq(RCU_SOFTIRQ);
1857} 1868}
1858 1869
1870/*
1871 * Handle any core-RCU processing required by a call_rcu() invocation.
1872 */
1873static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
1874 struct rcu_head *head, unsigned long flags)
1875{
1876 /*
1877 * If called from an extended quiescent state, invoke the RCU
1878 * core in order to force a re-evaluation of RCU's idleness.
1879 */
1880 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
1881 invoke_rcu_core();
1882
1883 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
1884 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
1885 return;
1886
1887 /*
1888 * Force the grace period if too many callbacks or too long waiting.
1889 * Enforce hysteresis, and don't invoke force_quiescent_state()
1890 * if some other CPU has recently done so. Also, don't bother
1891 * invoking force_quiescent_state() if the newly enqueued callback
1892 * is the only one waiting for a grace period to complete.
1893 */
1894 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1895
1896 /* Are we ignoring a completed grace period? */
1897 rcu_process_gp_end(rsp, rdp);
1898 check_for_new_grace_period(rsp, rdp);
1899
1900 /* Start a new grace period if one not already started. */
1901 if (!rcu_gp_in_progress(rsp)) {
1902 unsigned long nestflag;
1903 struct rcu_node *rnp_root = rcu_get_root(rsp);
1904
1905 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1906 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1907 } else {
1908 /* Give the grace period a kick. */
1909 rdp->blimit = LONG_MAX;
1910 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1911 *rdp->nxttail[RCU_DONE_TAIL] != head)
1912 force_quiescent_state(rsp, 0);
1913 rdp->n_force_qs_snap = rsp->n_force_qs;
1914 rdp->qlen_last_fqs_check = rdp->qlen;
1915 }
1916 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1917 force_quiescent_state(rsp, 1);
1918}
1919
1859static void 1920static void
1860__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1921__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1861 struct rcu_state *rsp, bool lazy) 1922 struct rcu_state *rsp, bool lazy)
@@ -1880,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1880 rdp = this_cpu_ptr(rsp->rda); 1941 rdp = this_cpu_ptr(rsp->rda);
1881 1942
1882 /* Add the callback to our list. */ 1943 /* Add the callback to our list. */
1883 rdp->qlen++; 1944 ACCESS_ONCE(rdp->qlen)++;
1884 if (lazy) 1945 if (lazy)
1885 rdp->qlen_lazy++; 1946 rdp->qlen_lazy++;
1886 else 1947 else
@@ -1895,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1895 else 1956 else
1896 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 1957 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1897 1958
1898 /* If interrupts were disabled, don't dive into RCU core. */ 1959 /* Go handle any RCU core processing required. */
1899 if (irqs_disabled_flags(flags)) { 1960 __call_rcu_core(rsp, rdp, head, flags);
1900 local_irq_restore(flags);
1901 return;
1902 }
1903
1904 /*
1905 * Force the grace period if too many callbacks or too long waiting.
1906 * Enforce hysteresis, and don't invoke force_quiescent_state()
1907 * if some other CPU has recently done so. Also, don't bother
1908 * invoking force_quiescent_state() if the newly enqueued callback
1909 * is the only one waiting for a grace period to complete.
1910 */
1911 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1912
1913 /* Are we ignoring a completed grace period? */
1914 rcu_process_gp_end(rsp, rdp);
1915 check_for_new_grace_period(rsp, rdp);
1916
1917 /* Start a new grace period if one not already started. */
1918 if (!rcu_gp_in_progress(rsp)) {
1919 unsigned long nestflag;
1920 struct rcu_node *rnp_root = rcu_get_root(rsp);
1921
1922 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1923 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1924 } else {
1925 /* Give the grace period a kick. */
1926 rdp->blimit = LONG_MAX;
1927 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1928 *rdp->nxttail[RCU_DONE_TAIL] != head)
1929 force_quiescent_state(rsp, 0);
1930 rdp->n_force_qs_snap = rsp->n_force_qs;
1931 rdp->qlen_last_fqs_check = rdp->qlen;
1932 }
1933 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1934 force_quiescent_state(rsp, 1);
1935 local_irq_restore(flags); 1961 local_irq_restore(flags);
1936} 1962}
1937 1963
@@ -1961,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1961 * occasionally incorrectly indicate that there are multiple CPUs online 1987 * occasionally incorrectly indicate that there are multiple CPUs online
1962 * when there was in fact only one the whole time, as this just adds 1988 * when there was in fact only one the whole time, as this just adds
1963 * some overhead: RCU still operates correctly. 1989 * some overhead: RCU still operates correctly.
1964 *
1965 * Of course, sampling num_online_cpus() with preemption enabled can
1966 * give erroneous results if there are concurrent CPU-hotplug operations.
1967 * For example, given a demonic sequence of preemptions in num_online_cpus()
1968 * and CPU-hotplug operations, there could be two or more CPUs online at
1969 * all times, but num_online_cpus() might well return one (or even zero).
1970 *
1971 * However, all such demonic sequences require at least one CPU-offline
1972 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1973 * is only a problem if there is an RCU read-side critical section executing
1974 * throughout. But RCU-sched and RCU-bh read-side critical sections
1975 * disable either preemption or bh, which prevents a CPU from going offline.
1976 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1977 * that there is only one CPU when in fact there was more than one throughout
1978 * is when there were no RCU readers in the system. If there are no
1979 * RCU readers, the grace period by definition can be of zero length,
1980 * regardless of the number of online CPUs.
1981 */ 1990 */
1982static inline int rcu_blocking_is_gp(void) 1991static inline int rcu_blocking_is_gp(void)
1983{ 1992{
1993 int ret;
1994
1984 might_sleep(); /* Check for RCU read-side critical section. */ 1995 might_sleep(); /* Check for RCU read-side critical section. */
1985 return num_online_cpus() <= 1; 1996 preempt_disable();
1997 ret = num_online_cpus() <= 1;
1998 preempt_enable();
1999 return ret;
1986} 2000}
1987 2001
1988/** 2002/**
@@ -2117,9 +2131,9 @@ void synchronize_sched_expedited(void)
2117 put_online_cpus(); 2131 put_online_cpus();
2118 2132
2119 /* No joy, try again later. Or just synchronize_sched(). */ 2133 /* No joy, try again later. Or just synchronize_sched(). */
2120 if (trycount++ < 10) 2134 if (trycount++ < 10) {
2121 udelay(trycount * num_online_cpus()); 2135 udelay(trycount * num_online_cpus());
2122 else { 2136 } else {
2123 synchronize_sched(); 2137 synchronize_sched();
2124 return; 2138 return;
2125 } 2139 }
@@ -2240,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2240 */ 2254 */
2241static int rcu_pending(int cpu) 2255static int rcu_pending(int cpu)
2242{ 2256{
2243 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 2257 struct rcu_state *rsp;
2244 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 2258
2245 rcu_preempt_pending(cpu); 2259 for_each_rcu_flavor(rsp)
2260 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2261 return 1;
2262 return 0;
2246} 2263}
2247 2264
2248/* 2265/*
@@ -2252,20 +2269,41 @@ static int rcu_pending(int cpu)
2252 */ 2269 */
2253static int rcu_cpu_has_callbacks(int cpu) 2270static int rcu_cpu_has_callbacks(int cpu)
2254{ 2271{
2272 struct rcu_state *rsp;
2273
2255 /* RCU callbacks either ready or pending? */ 2274 /* RCU callbacks either ready or pending? */
2256 return per_cpu(rcu_sched_data, cpu).nxtlist || 2275 for_each_rcu_flavor(rsp)
2257 per_cpu(rcu_bh_data, cpu).nxtlist || 2276 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
2258 rcu_preempt_cpu_has_callbacks(cpu); 2277 return 1;
2278 return 0;
2279}
2280
2281/*
2282 * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2283 * the compiler is expected to optimize this away.
2284 */
2285static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
2286 int cpu, unsigned long done)
2287{
2288 trace_rcu_barrier(rsp->name, s, cpu,
2289 atomic_read(&rsp->barrier_cpu_count), done);
2259} 2290}
2260 2291
2261/* 2292/*
2262 * RCU callback function for _rcu_barrier(). If we are last, wake 2293 * RCU callback function for _rcu_barrier(). If we are last, wake
2263 * up the task executing _rcu_barrier(). 2294 * up the task executing _rcu_barrier().
2264 */ 2295 */
2265static void rcu_barrier_callback(struct rcu_head *notused) 2296static void rcu_barrier_callback(struct rcu_head *rhp)
2266{ 2297{
2267 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2298 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2268 complete(&rcu_barrier_completion); 2299 struct rcu_state *rsp = rdp->rsp;
2300
2301 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2302 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2303 complete(&rsp->barrier_completion);
2304 } else {
2305 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2306 }
2269} 2307}
2270 2308
2271/* 2309/*
@@ -2273,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)
2273 */ 2311 */
2274static void rcu_barrier_func(void *type) 2312static void rcu_barrier_func(void *type)
2275{ 2313{
2276 int cpu = smp_processor_id(); 2314 struct rcu_state *rsp = type;
2277 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 2315 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2278 void (*call_rcu_func)(struct rcu_head *head,
2279 void (*func)(struct rcu_head *head));
2280 2316
2281 atomic_inc(&rcu_barrier_cpu_count); 2317 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2282 call_rcu_func = type; 2318 atomic_inc(&rsp->barrier_cpu_count);
2283 call_rcu_func(head, rcu_barrier_callback); 2319 rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2284} 2320}
2285 2321
2286/* 2322/*
2287 * Orchestrate the specified type of RCU barrier, waiting for all 2323 * Orchestrate the specified type of RCU barrier, waiting for all
2288 * RCU callbacks of the specified type to complete. 2324 * RCU callbacks of the specified type to complete.
2289 */ 2325 */
2290static void _rcu_barrier(struct rcu_state *rsp, 2326static void _rcu_barrier(struct rcu_state *rsp)
2291 void (*call_rcu_func)(struct rcu_head *head,
2292 void (*func)(struct rcu_head *head)))
2293{ 2327{
2294 int cpu; 2328 int cpu;
2295 unsigned long flags; 2329 unsigned long flags;
2296 struct rcu_data *rdp; 2330 struct rcu_data *rdp;
2297 struct rcu_head rh; 2331 struct rcu_data rd;
2332 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2333 unsigned long snap_done;
2298 2334
2299 init_rcu_head_on_stack(&rh); 2335 init_rcu_head_on_stack(&rd.barrier_head);
2336 _rcu_barrier_trace(rsp, "Begin", -1, snap);
2300 2337
2301 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2338 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2302 mutex_lock(&rcu_barrier_mutex); 2339 mutex_lock(&rsp->barrier_mutex);
2340
2341 /*
2342 * Ensure that all prior references, including to ->n_barrier_done,
2343 * are ordered before the _rcu_barrier() machinery.
2344 */
2345 smp_mb(); /* See above block comment. */
2346
2347 /*
2348 * Recheck ->n_barrier_done to see if others did our work for us.
2349 * This means checking ->n_barrier_done for an even-to-odd-to-even
2350 * transition. The "if" expression below therefore rounds the old
2351 * value up to the next even number and adds two before comparing.
2352 */
2353 snap_done = ACCESS_ONCE(rsp->n_barrier_done);
2354 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2355 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
2356 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2357 smp_mb(); /* caller's subsequent code after above check. */
2358 mutex_unlock(&rsp->barrier_mutex);
2359 return;
2360 }
2303 2361
2304 smp_mb(); /* Prevent any prior operations from leaking in. */ 2362 /*
2363 * Increment ->n_barrier_done to avoid duplicate work. Use
2364 * ACCESS_ONCE() to prevent the compiler from speculating
2365 * the increment to precede the early-exit check.
2366 */
2367 ACCESS_ONCE(rsp->n_barrier_done)++;
2368 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
2369 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2370 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
2305 2371
2306 /* 2372 /*
2307 * Initialize the count to one rather than to zero in order to 2373 * Initialize the count to one rather than to zero in order to
@@ -2320,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp,
2320 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening 2386 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening
2321 * us -- but before CPU 1's orphaned callbacks are invoked!!! 2387 * us -- but before CPU 1's orphaned callbacks are invoked!!!
2322 */ 2388 */
2323 init_completion(&rcu_barrier_completion); 2389 init_completion(&rsp->barrier_completion);
2324 atomic_set(&rcu_barrier_cpu_count, 1); 2390 atomic_set(&rsp->barrier_cpu_count, 1);
2325 raw_spin_lock_irqsave(&rsp->onofflock, flags); 2391 raw_spin_lock_irqsave(&rsp->onofflock, flags);
2326 rsp->rcu_barrier_in_progress = current; 2392 rsp->rcu_barrier_in_progress = current;
2327 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2393 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
@@ -2337,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp,
2337 preempt_disable(); 2403 preempt_disable();
2338 rdp = per_cpu_ptr(rsp->rda, cpu); 2404 rdp = per_cpu_ptr(rsp->rda, cpu);
2339 if (cpu_is_offline(cpu)) { 2405 if (cpu_is_offline(cpu)) {
2406 _rcu_barrier_trace(rsp, "Offline", cpu,
2407 rsp->n_barrier_done);
2340 preempt_enable(); 2408 preempt_enable();
2341 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) 2409 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
2342 schedule_timeout_interruptible(1); 2410 schedule_timeout_interruptible(1);
2343 } else if (ACCESS_ONCE(rdp->qlen)) { 2411 } else if (ACCESS_ONCE(rdp->qlen)) {
2344 smp_call_function_single(cpu, rcu_barrier_func, 2412 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2345 (void *)call_rcu_func, 1); 2413 rsp->n_barrier_done);
2414 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2346 preempt_enable(); 2415 preempt_enable();
2347 } else { 2416 } else {
2417 _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2418 rsp->n_barrier_done);
2348 preempt_enable(); 2419 preempt_enable();
2349 } 2420 }
2350 } 2421 }
@@ -2361,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp,
2361 rcu_adopt_orphan_cbs(rsp); 2432 rcu_adopt_orphan_cbs(rsp);
2362 rsp->rcu_barrier_in_progress = NULL; 2433 rsp->rcu_barrier_in_progress = NULL;
2363 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2434 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2364 atomic_inc(&rcu_barrier_cpu_count); 2435 atomic_inc(&rsp->barrier_cpu_count);
2365 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ 2436 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
2366 call_rcu_func(&rh, rcu_barrier_callback); 2437 rd.rsp = rsp;
2438 rsp->call(&rd.barrier_head, rcu_barrier_callback);
2367 2439
2368 /* 2440 /*
2369 * Now that we have an rcu_barrier_callback() callback on each 2441 * Now that we have an rcu_barrier_callback() callback on each
2370 * CPU, and thus each counted, remove the initial count. 2442 * CPU, and thus each counted, remove the initial count.
2371 */ 2443 */
2372 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2444 if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2373 complete(&rcu_barrier_completion); 2445 complete(&rsp->barrier_completion);
2446
2447 /* Increment ->n_barrier_done to prevent duplicate work. */
2448 smp_mb(); /* Keep increment after above mechanism. */
2449 ACCESS_ONCE(rsp->n_barrier_done)++;
2450 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
2451 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2452 smp_mb(); /* Keep increment before caller's subsequent code. */
2374 2453
2375 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 2454 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2376 wait_for_completion(&rcu_barrier_completion); 2455 wait_for_completion(&rsp->barrier_completion);
2377 2456
2378 /* Other rcu_barrier() invocations can now safely proceed. */ 2457 /* Other rcu_barrier() invocations can now safely proceed. */
2379 mutex_unlock(&rcu_barrier_mutex); 2458 mutex_unlock(&rsp->barrier_mutex);
2380 2459
2381 destroy_rcu_head_on_stack(&rh); 2460 destroy_rcu_head_on_stack(&rd.barrier_head);
2382} 2461}
2383 2462
2384/** 2463/**
@@ -2386,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
2386 */ 2465 */
2387void rcu_barrier_bh(void) 2466void rcu_barrier_bh(void)
2388{ 2467{
2389 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 2468 _rcu_barrier(&rcu_bh_state);
2390} 2469}
2391EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2470EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2392 2471
@@ -2395,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2395 */ 2474 */
2396void rcu_barrier_sched(void) 2475void rcu_barrier_sched(void)
2397{ 2476{
2398 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 2477 _rcu_barrier(&rcu_sched_state);
2399} 2478}
2400EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2479EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2401 2480
@@ -2406,18 +2485,15 @@ static void __init
2406rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2485rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2407{ 2486{
2408 unsigned long flags; 2487 unsigned long flags;
2409 int i;
2410 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2488 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2411 struct rcu_node *rnp = rcu_get_root(rsp); 2489 struct rcu_node *rnp = rcu_get_root(rsp);
2412 2490
2413 /* Set up local state, ensuring consistent view of global state. */ 2491 /* Set up local state, ensuring consistent view of global state. */
2414 raw_spin_lock_irqsave(&rnp->lock, flags); 2492 raw_spin_lock_irqsave(&rnp->lock, flags);
2415 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2493 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2416 rdp->nxtlist = NULL; 2494 init_callback_list(rdp);
2417 for (i = 0; i < RCU_NEXT_SIZE; i++)
2418 rdp->nxttail[i] = &rdp->nxtlist;
2419 rdp->qlen_lazy = 0; 2495 rdp->qlen_lazy = 0;
2420 rdp->qlen = 0; 2496 ACCESS_ONCE(rdp->qlen) = 0;
2421 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2497 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2422 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2498 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2423 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2499 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -2491,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2491 2567
2492static void __cpuinit rcu_prepare_cpu(int cpu) 2568static void __cpuinit rcu_prepare_cpu(int cpu)
2493{ 2569{
2494 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2570 struct rcu_state *rsp;
2495 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2571
2496 rcu_preempt_init_percpu_data(cpu); 2572 for_each_rcu_flavor(rsp)
2573 rcu_init_percpu_data(cpu, rsp,
2574 strcmp(rsp->name, "rcu_preempt") == 0);
2497} 2575}
2498 2576
2499/* 2577/*
@@ -2505,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2505 long cpu = (long)hcpu; 2583 long cpu = (long)hcpu;
2506 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2584 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2507 struct rcu_node *rnp = rdp->mynode; 2585 struct rcu_node *rnp = rdp->mynode;
2586 struct rcu_state *rsp;
2508 2587
2509 trace_rcu_utilization("Start CPU hotplug"); 2588 trace_rcu_utilization("Start CPU hotplug");
2510 switch (action) { 2589 switch (action) {
@@ -2529,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2529 * touch any data without introducing corruption. We send the 2608 * touch any data without introducing corruption. We send the
2530 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2609 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2531 */ 2610 */
2532 rcu_cleanup_dying_cpu(&rcu_bh_state); 2611 for_each_rcu_flavor(rsp)
2533 rcu_cleanup_dying_cpu(&rcu_sched_state); 2612 rcu_cleanup_dying_cpu(rsp);
2534 rcu_preempt_cleanup_dying_cpu();
2535 rcu_cleanup_after_idle(cpu); 2613 rcu_cleanup_after_idle(cpu);
2536 break; 2614 break;
2537 case CPU_DEAD: 2615 case CPU_DEAD:
2538 case CPU_DEAD_FROZEN: 2616 case CPU_DEAD_FROZEN:
2539 case CPU_UP_CANCELED: 2617 case CPU_UP_CANCELED:
2540 case CPU_UP_CANCELED_FROZEN: 2618 case CPU_UP_CANCELED_FROZEN:
2541 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); 2619 for_each_rcu_flavor(rsp)
2542 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); 2620 rcu_cleanup_dead_cpu(cpu, rsp);
2543 rcu_preempt_cleanup_dead_cpu(cpu);
2544 break; 2621 break;
2545 default: 2622 default:
2546 break; 2623 break;
@@ -2573,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2573{ 2650{
2574 int i; 2651 int i;
2575 2652
2576 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2653 for (i = rcu_num_lvls - 1; i > 0; i--)
2577 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2654 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2578 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; 2655 rsp->levelspread[0] = rcu_fanout_leaf;
2579} 2656}
2580#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2657#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2581static void __init rcu_init_levelspread(struct rcu_state *rsp) 2658static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -2585,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2585 int i; 2662 int i;
2586 2663
2587 cprv = NR_CPUS; 2664 cprv = NR_CPUS;
2588 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2665 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2589 ccur = rsp->levelcnt[i]; 2666 ccur = rsp->levelcnt[i];
2590 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2667 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2591 cprv = ccur; 2668 cprv = ccur;
@@ -2612,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2612 2689
2613 /* Initialize the level-tracking arrays. */ 2690 /* Initialize the level-tracking arrays. */
2614 2691
2615 for (i = 1; i < NUM_RCU_LVLS; i++) 2692 for (i = 0; i < rcu_num_lvls; i++)
2693 rsp->levelcnt[i] = num_rcu_lvl[i];
2694 for (i = 1; i < rcu_num_lvls; i++)
2616 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2695 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2617 rcu_init_levelspread(rsp); 2696 rcu_init_levelspread(rsp);
2618 2697
2619 /* Initialize the elements themselves, starting from the leaves. */ 2698 /* Initialize the elements themselves, starting from the leaves. */
2620 2699
2621 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2700 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2622 cpustride *= rsp->levelspread[i]; 2701 cpustride *= rsp->levelspread[i];
2623 rnp = rsp->level[i]; 2702 rnp = rsp->level[i];
2624 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2703 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@@ -2648,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2648 } 2727 }
2649 2728
2650 rsp->rda = rda; 2729 rsp->rda = rda;
2651 rnp = rsp->level[NUM_RCU_LVLS - 1]; 2730 rnp = rsp->level[rcu_num_lvls - 1];
2652 for_each_possible_cpu(i) { 2731 for_each_possible_cpu(i) {
2653 while (i > rnp->grphi) 2732 while (i > rnp->grphi)
2654 rnp++; 2733 rnp++;
2655 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2734 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2656 rcu_boot_init_percpu_data(i, rsp); 2735 rcu_boot_init_percpu_data(i, rsp);
2657 } 2736 }
2737 list_add(&rsp->flavors, &rcu_struct_flavors);
2738}
2739
2740/*
2741 * Compute the rcu_node tree geometry from kernel parameters. This cannot
2742 * replace the definitions in rcutree.h because those are needed to size
2743 * the ->node array in the rcu_state structure.
2744 */
2745static void __init rcu_init_geometry(void)
2746{
2747 int i;
2748 int j;
2749 int n = nr_cpu_ids;
2750 int rcu_capacity[MAX_RCU_LVLS + 1];
2751
2752 /* If the compile-time values are accurate, just leave. */
2753 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
2754 return;
2755
2756 /*
2757 * Compute number of nodes that can be handled an rcu_node tree
2758 * with the given number of levels. Setting rcu_capacity[0] makes
2759 * some of the arithmetic easier.
2760 */
2761 rcu_capacity[0] = 1;
2762 rcu_capacity[1] = rcu_fanout_leaf;
2763 for (i = 2; i <= MAX_RCU_LVLS; i++)
2764 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
2765
2766 /*
2767 * The boot-time rcu_fanout_leaf parameter is only permitted
2768 * to increase the leaf-level fanout, not decrease it. Of course,
2769 * the leaf-level fanout cannot exceed the number of bits in
2770 * the rcu_node masks. Finally, the tree must be able to accommodate
2771 * the configured number of CPUs. Complain and fall back to the
2772 * compile-time values if these limits are exceeded.
2773 */
2774 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
2775 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
2776 n > rcu_capacity[MAX_RCU_LVLS]) {
2777 WARN_ON(1);
2778 return;
2779 }
2780
2781 /* Calculate the number of rcu_nodes at each level of the tree. */
2782 for (i = 1; i <= MAX_RCU_LVLS; i++)
2783 if (n <= rcu_capacity[i]) {
2784 for (j = 0; j <= i; j++)
2785 num_rcu_lvl[j] =
2786 DIV_ROUND_UP(n, rcu_capacity[i - j]);
2787 rcu_num_lvls = i;
2788 for (j = i + 1; j <= MAX_RCU_LVLS; j++)
2789 num_rcu_lvl[j] = 0;
2790 break;
2791 }
2792
2793 /* Calculate the total number of rcu_node structures. */
2794 rcu_num_nodes = 0;
2795 for (i = 0; i <= MAX_RCU_LVLS; i++)
2796 rcu_num_nodes += num_rcu_lvl[i];
2797 rcu_num_nodes -= n;
2658} 2798}
2659 2799
2660void __init rcu_init(void) 2800void __init rcu_init(void)
@@ -2662,6 +2802,7 @@ void __init rcu_init(void)
2662 int cpu; 2802 int cpu;
2663 2803
2664 rcu_bootup_announce(); 2804 rcu_bootup_announce();
2805 rcu_init_geometry();
2665 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2806 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2666 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2807 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2667 __rcu_init_preempt(); 2808 __rcu_init_preempt();
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index ea056495783e..4d29169f2124 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -42,28 +42,28 @@
42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
43 43
44#if NR_CPUS <= RCU_FANOUT_1 44#if NR_CPUS <= RCU_FANOUT_1
45# define NUM_RCU_LVLS 1 45# define RCU_NUM_LVLS 1
46# define NUM_RCU_LVL_0 1 46# define NUM_RCU_LVL_0 1
47# define NUM_RCU_LVL_1 (NR_CPUS) 47# define NUM_RCU_LVL_1 (NR_CPUS)
48# define NUM_RCU_LVL_2 0 48# define NUM_RCU_LVL_2 0
49# define NUM_RCU_LVL_3 0 49# define NUM_RCU_LVL_3 0
50# define NUM_RCU_LVL_4 0 50# define NUM_RCU_LVL_4 0
51#elif NR_CPUS <= RCU_FANOUT_2 51#elif NR_CPUS <= RCU_FANOUT_2
52# define NUM_RCU_LVLS 2 52# define RCU_NUM_LVLS 2
53# define NUM_RCU_LVL_0 1 53# define NUM_RCU_LVL_0 1
54# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 54# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
55# define NUM_RCU_LVL_2 (NR_CPUS) 55# define NUM_RCU_LVL_2 (NR_CPUS)
56# define NUM_RCU_LVL_3 0 56# define NUM_RCU_LVL_3 0
57# define NUM_RCU_LVL_4 0 57# define NUM_RCU_LVL_4 0
58#elif NR_CPUS <= RCU_FANOUT_3 58#elif NR_CPUS <= RCU_FANOUT_3
59# define NUM_RCU_LVLS 3 59# define RCU_NUM_LVLS 3
60# define NUM_RCU_LVL_0 1 60# define NUM_RCU_LVL_0 1
61# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 61# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
62# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 62# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
63# define NUM_RCU_LVL_3 (NR_CPUS) 63# define NUM_RCU_LVL_3 (NR_CPUS)
64# define NUM_RCU_LVL_4 0 64# define NUM_RCU_LVL_4 0
65#elif NR_CPUS <= RCU_FANOUT_4 65#elif NR_CPUS <= RCU_FANOUT_4
66# define NUM_RCU_LVLS 4 66# define RCU_NUM_LVLS 4
67# define NUM_RCU_LVL_0 1 67# define NUM_RCU_LVL_0 1
68# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) 68# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
69# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 69# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
@@ -76,6 +76,9 @@
76#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 76#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
77#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 77#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
78 78
79extern int rcu_num_lvls;
80extern int rcu_num_nodes;
81
79/* 82/*
80 * Dynticks per-CPU state. 83 * Dynticks per-CPU state.
81 */ 84 */
@@ -97,6 +100,7 @@ struct rcu_dynticks {
97 /* # times non-lazy CBs posted to CPU. */ 100 /* # times non-lazy CBs posted to CPU. */
98 unsigned long nonlazy_posted_snap; 101 unsigned long nonlazy_posted_snap;
99 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
100#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
101}; 105};
102 106
@@ -206,7 +210,7 @@ struct rcu_node {
206 */ 210 */
207#define rcu_for_each_node_breadth_first(rsp, rnp) \ 211#define rcu_for_each_node_breadth_first(rsp, rnp) \
208 for ((rnp) = &(rsp)->node[0]; \ 212 for ((rnp) = &(rsp)->node[0]; \
209 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 213 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
210 214
211/* 215/*
212 * Do a breadth-first scan of the non-leaf rcu_node structures for the 216 * Do a breadth-first scan of the non-leaf rcu_node structures for the
@@ -215,7 +219,7 @@ struct rcu_node {
215 */ 219 */
216#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 220#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
217 for ((rnp) = &(rsp)->node[0]; \ 221 for ((rnp) = &(rsp)->node[0]; \
218 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) 222 (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
219 223
220/* 224/*
221 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 225 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -224,8 +228,8 @@ struct rcu_node {
224 * It is still a leaf node, even if it is also the root node. 228 * It is still a leaf node, even if it is also the root node.
225 */ 229 */
226#define rcu_for_each_leaf_node(rsp, rnp) \ 230#define rcu_for_each_leaf_node(rsp, rnp) \
227 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 231 for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
228 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 232 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
229 233
230/* Index values for nxttail array in struct rcu_data. */ 234/* Index values for nxttail array in struct rcu_data. */
231#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 235#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
@@ -311,6 +315,9 @@ struct rcu_data {
311 unsigned long n_rp_need_fqs; 315 unsigned long n_rp_need_fqs;
312 unsigned long n_rp_need_nothing; 316 unsigned long n_rp_need_nothing;
313 317
318 /* 6) _rcu_barrier() callback. */
319 struct rcu_head barrier_head;
320
314 int cpu; 321 int cpu;
315 struct rcu_state *rsp; 322 struct rcu_state *rsp;
316}; 323};
@@ -357,10 +364,12 @@ do { \
357 */ 364 */
358struct rcu_state { 365struct rcu_state {
359 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ 366 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
360 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ 367 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
361 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 368 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
362 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ 369 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
363 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 370 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
371 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
372 void (*func)(struct rcu_head *head));
364 373
365 /* The following fields are guarded by the root rcu_node's lock. */ 374 /* The following fields are guarded by the root rcu_node's lock. */
366 375
@@ -392,6 +401,11 @@ struct rcu_state {
392 struct task_struct *rcu_barrier_in_progress; 401 struct task_struct *rcu_barrier_in_progress;
393 /* Task doing rcu_barrier(), */ 402 /* Task doing rcu_barrier(), */
394 /* or NULL if no barrier. */ 403 /* or NULL if no barrier. */
404 struct mutex barrier_mutex; /* Guards barrier fields. */
405 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
406 struct completion barrier_completion; /* Wake at barrier end. */
407 unsigned long n_barrier_done; /* ++ at start and end of */
408 /* _rcu_barrier(). */
395 raw_spinlock_t fqslock; /* Only one task forcing */ 409 raw_spinlock_t fqslock; /* Only one task forcing */
396 /* quiescent states. */ 410 /* quiescent states. */
397 unsigned long jiffies_force_qs; /* Time at which to invoke */ 411 unsigned long jiffies_force_qs; /* Time at which to invoke */
@@ -409,8 +423,13 @@ struct rcu_state {
409 unsigned long gp_max; /* Maximum GP duration in */ 423 unsigned long gp_max; /* Maximum GP duration in */
410 /* jiffies. */ 424 /* jiffies. */
411 char *name; /* Name of structure. */ 425 char *name; /* Name of structure. */
426 struct list_head flavors; /* List of RCU flavors. */
412}; 427};
413 428
429extern struct list_head rcu_struct_flavors;
430#define for_each_rcu_flavor(rsp) \
431 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
432
414/* Return values for rcu_preempt_offline_tasks(). */ 433/* Return values for rcu_preempt_offline_tasks(). */
415 434
416#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ 435#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
@@ -444,6 +463,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
444/* Forward declarations for rcutree_plugin.h */ 463/* Forward declarations for rcutree_plugin.h */
445static void rcu_bootup_announce(void); 464static void rcu_bootup_announce(void);
446long rcu_batches_completed(void); 465long rcu_batches_completed(void);
466static void rcu_preempt_note_context_switch(int cpu);
447static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 467static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
448#ifdef CONFIG_HOTPLUG_CPU 468#ifdef CONFIG_HOTPLUG_CPU
449static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 469static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
@@ -452,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu);
452#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 472#endif /* #ifdef CONFIG_HOTPLUG_CPU */
453static void rcu_print_detail_task_stall(struct rcu_state *rsp); 473static void rcu_print_detail_task_stall(struct rcu_state *rsp);
454static int rcu_print_task_stall(struct rcu_node *rnp); 474static int rcu_print_task_stall(struct rcu_node *rnp);
455static void rcu_preempt_stall_reset(void);
456static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 475static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
457#ifdef CONFIG_HOTPLUG_CPU 476#ifdef CONFIG_HOTPLUG_CPU
458static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 477static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
459 struct rcu_node *rnp, 478 struct rcu_node *rnp,
460 struct rcu_data *rdp); 479 struct rcu_data *rdp);
461#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 480#endif /* #ifdef CONFIG_HOTPLUG_CPU */
462static void rcu_preempt_cleanup_dead_cpu(int cpu);
463static void rcu_preempt_check_callbacks(int cpu); 481static void rcu_preempt_check_callbacks(int cpu);
464static void rcu_preempt_process_callbacks(void);
465void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 482void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
466#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 483#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
467static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 484static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
468 bool wake); 485 bool wake);
469#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 486#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
470static int rcu_preempt_pending(int cpu);
471static int rcu_preempt_cpu_has_callbacks(int cpu);
472static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
473static void rcu_preempt_cleanup_dying_cpu(void);
474static void __init __rcu_init_preempt(void); 487static void __init __rcu_init_preempt(void);
475static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 488static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
476static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 489static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 5271a020887e..7f3244c0df01 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void)
68 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); 68 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
69#endif 69#endif
70#if NUM_RCU_LVL_4 != 0 70#if NUM_RCU_LVL_4 != 0
71 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 71 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
72#endif 72#endif
73 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
74 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
75 if (nr_cpu_ids != NR_CPUS)
76 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
73} 77}
74 78
75#ifdef CONFIG_TREE_PREEMPT_RCU 79#ifdef CONFIG_TREE_PREEMPT_RCU
76 80
77struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); 81struct rcu_state rcu_preempt_state =
82 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
78DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 83DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
79static struct rcu_state *rcu_state = &rcu_preempt_state; 84static struct rcu_state *rcu_state = &rcu_preempt_state;
80 85
81static void rcu_read_unlock_special(struct task_struct *t);
82static int rcu_preempted_readers_exp(struct rcu_node *rnp); 86static int rcu_preempted_readers_exp(struct rcu_node *rnp);
83 87
84/* 88/*
@@ -153,7 +157,7 @@ static void rcu_preempt_qs(int cpu)
153 * 157 *
154 * Caller must disable preemption. 158 * Caller must disable preemption.
155 */ 159 */
156void rcu_preempt_note_context_switch(void) 160static void rcu_preempt_note_context_switch(int cpu)
157{ 161{
158 struct task_struct *t = current; 162 struct task_struct *t = current;
159 unsigned long flags; 163 unsigned long flags;
@@ -164,7 +168,7 @@ void rcu_preempt_note_context_switch(void)
164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 168 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
165 169
166 /* Possibly blocking in an RCU read-side critical section. */ 170 /* Possibly blocking in an RCU read-side critical section. */
167 rdp = __this_cpu_ptr(rcu_preempt_state.rda); 171 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
168 rnp = rdp->mynode; 172 rnp = rdp->mynode;
169 raw_spin_lock_irqsave(&rnp->lock, flags); 173 raw_spin_lock_irqsave(&rnp->lock, flags);
170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,23 +232,11 @@ void rcu_preempt_note_context_switch(void)
228 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
229 */ 233 */
230 local_irq_save(flags); 234 local_irq_save(flags);
231 rcu_preempt_qs(smp_processor_id()); 235 rcu_preempt_qs(cpu);
232 local_irq_restore(flags); 236 local_irq_restore(flags);
233} 237}
234 238
235/* 239/*
236 * Tree-preemptible RCU implementation for rcu_read_lock().
237 * Just increment ->rcu_read_lock_nesting, shared state will be updated
238 * if we block.
239 */
240void __rcu_read_lock(void)
241{
242 current->rcu_read_lock_nesting++;
243 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
244}
245EXPORT_SYMBOL_GPL(__rcu_read_lock);
246
247/*
248 * Check for preempted RCU readers blocking the current grace period 240 * Check for preempted RCU readers blocking the current grace period
249 * for the specified rcu_node structure. If the caller needs a reliable 241 * for the specified rcu_node structure. If the caller needs a reliable
250 * answer, it must hold the rcu_node's ->lock. 242 * answer, it must hold the rcu_node's ->lock.
@@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
310 * notify RCU core processing or task having blocked during the RCU 302 * notify RCU core processing or task having blocked during the RCU
311 * read-side critical section. 303 * read-side critical section.
312 */ 304 */
313static noinline void rcu_read_unlock_special(struct task_struct *t) 305void rcu_read_unlock_special(struct task_struct *t)
314{ 306{
315 int empty; 307 int empty;
316 int empty_exp; 308 int empty_exp;
@@ -398,8 +390,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
398 rnp->grphi, 390 rnp->grphi,
399 !!rnp->gp_tasks); 391 !!rnp->gp_tasks);
400 rcu_report_unblock_qs_rnp(rnp, flags); 392 rcu_report_unblock_qs_rnp(rnp, flags);
401 } else 393 } else {
402 raw_spin_unlock_irqrestore(&rnp->lock, flags); 394 raw_spin_unlock_irqrestore(&rnp->lock, flags);
395 }
403 396
404#ifdef CONFIG_RCU_BOOST 397#ifdef CONFIG_RCU_BOOST
405 /* Unboost if we were boosted. */ 398 /* Unboost if we were boosted. */
@@ -418,38 +411,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
418 } 411 }
419} 412}
420 413
421/*
422 * Tree-preemptible RCU implementation for rcu_read_unlock().
423 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
424 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
425 * invoke rcu_read_unlock_special() to clean up after a context switch
426 * in an RCU read-side critical section and other special cases.
427 */
428void __rcu_read_unlock(void)
429{
430 struct task_struct *t = current;
431
432 if (t->rcu_read_lock_nesting != 1)
433 --t->rcu_read_lock_nesting;
434 else {
435 barrier(); /* critical section before exit code. */
436 t->rcu_read_lock_nesting = INT_MIN;
437 barrier(); /* assign before ->rcu_read_unlock_special load */
438 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
439 rcu_read_unlock_special(t);
440 barrier(); /* ->rcu_read_unlock_special load before assign */
441 t->rcu_read_lock_nesting = 0;
442 }
443#ifdef CONFIG_PROVE_LOCKING
444 {
445 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
446
447 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
448 }
449#endif /* #ifdef CONFIG_PROVE_LOCKING */
450}
451EXPORT_SYMBOL_GPL(__rcu_read_unlock);
452
453#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 414#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
454 415
455/* 416/*
@@ -540,16 +501,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
540} 501}
541 502
542/* 503/*
543 * Suppress preemptible RCU's CPU stall warnings by pushing the
544 * time of the next stall-warning message comfortably far into the
545 * future.
546 */
547static void rcu_preempt_stall_reset(void)
548{
549 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
550}
551
552/*
553 * Check that the list of blocked tasks for the newly completed grace 504 * Check that the list of blocked tasks for the newly completed grace
554 * period is in fact empty. It is a serious bug to complete a grace 505 * period is in fact empty. It is a serious bug to complete a grace
555 * period that still has RCU readers blocked! This function must be 506 * period that still has RCU readers blocked! This function must be
@@ -650,14 +601,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
650#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 601#endif /* #ifdef CONFIG_HOTPLUG_CPU */
651 602
652/* 603/*
653 * Do CPU-offline processing for preemptible RCU.
654 */
655static void rcu_preempt_cleanup_dead_cpu(int cpu)
656{
657 rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
658}
659
660/*
661 * Check for a quiescent state from the current CPU. When a task blocks, 604 * Check for a quiescent state from the current CPU. When a task blocks,
662 * the task is recorded in the corresponding CPU's rcu_node structure, 605 * the task is recorded in the corresponding CPU's rcu_node structure,
663 * which is checked elsewhere. 606 * which is checked elsewhere.
@@ -677,15 +620,6 @@ static void rcu_preempt_check_callbacks(int cpu)
677 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 620 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
678} 621}
679 622
680/*
681 * Process callbacks for preemptible RCU.
682 */
683static void rcu_preempt_process_callbacks(void)
684{
685 __rcu_process_callbacks(&rcu_preempt_state,
686 &__get_cpu_var(rcu_preempt_data));
687}
688
689#ifdef CONFIG_RCU_BOOST 623#ifdef CONFIG_RCU_BOOST
690 624
691static void rcu_preempt_do_callbacks(void) 625static void rcu_preempt_do_callbacks(void)
@@ -824,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
824 int must_wait = 0; 758 int must_wait = 0;
825 759
826 raw_spin_lock_irqsave(&rnp->lock, flags); 760 raw_spin_lock_irqsave(&rnp->lock, flags);
827 if (list_empty(&rnp->blkd_tasks)) 761 if (list_empty(&rnp->blkd_tasks)) {
828 raw_spin_unlock_irqrestore(&rnp->lock, flags); 762 raw_spin_unlock_irqrestore(&rnp->lock, flags);
829 else { 763 } else {
830 rnp->exp_tasks = rnp->blkd_tasks.next; 764 rnp->exp_tasks = rnp->blkd_tasks.next;
831 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 765 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
832 must_wait = 1; 766 must_wait = 1;
@@ -870,9 +804,9 @@ void synchronize_rcu_expedited(void)
870 * expedited grace period for us, just leave. 804 * expedited grace period for us, just leave.
871 */ 805 */
872 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 806 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
873 if (trycount++ < 10) 807 if (trycount++ < 10) {
874 udelay(trycount * num_online_cpus()); 808 udelay(trycount * num_online_cpus());
875 else { 809 } else {
876 synchronize_rcu(); 810 synchronize_rcu();
877 return; 811 return;
878 } 812 }
@@ -917,51 +851,16 @@ mb_ret:
917} 851}
918EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 852EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
919 853
920/*
921 * Check to see if there is any immediate preemptible-RCU-related work
922 * to be done.
923 */
924static int rcu_preempt_pending(int cpu)
925{
926 return __rcu_pending(&rcu_preempt_state,
927 &per_cpu(rcu_preempt_data, cpu));
928}
929
930/*
931 * Does preemptible RCU have callbacks on this CPU?
932 */
933static int rcu_preempt_cpu_has_callbacks(int cpu)
934{
935 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
936}
937
938/** 854/**
939 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 855 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
940 */ 856 */
941void rcu_barrier(void) 857void rcu_barrier(void)
942{ 858{
943 _rcu_barrier(&rcu_preempt_state, call_rcu); 859 _rcu_barrier(&rcu_preempt_state);
944} 860}
945EXPORT_SYMBOL_GPL(rcu_barrier); 861EXPORT_SYMBOL_GPL(rcu_barrier);
946 862
947/* 863/*
948 * Initialize preemptible RCU's per-CPU data.
949 */
950static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
951{
952 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
953}
954
955/*
956 * Move preemptible RCU's callbacks from dying CPU to other online CPU
957 * and record a quiescent state.
958 */
959static void rcu_preempt_cleanup_dying_cpu(void)
960{
961 rcu_cleanup_dying_cpu(&rcu_preempt_state);
962}
963
964/*
965 * Initialize preemptible RCU's state structures. 864 * Initialize preemptible RCU's state structures.
966 */ 865 */
967static void __init __rcu_init_preempt(void) 866static void __init __rcu_init_preempt(void)
@@ -1002,6 +901,14 @@ void rcu_force_quiescent_state(void)
1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 901EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1003 902
1004/* 903/*
904 * Because preemptible RCU does not exist, we never have to check for
905 * CPUs being in quiescent states.
906 */
907static void rcu_preempt_note_context_switch(int cpu)
908{
909}
910
911/*
1005 * Because preemptible RCU does not exist, there are never any preempted 912 * Because preemptible RCU does not exist, there are never any preempted
1006 * RCU readers. 913 * RCU readers.
1007 */ 914 */
@@ -1038,14 +945,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
1038} 945}
1039 946
1040/* 947/*
1041 * Because preemptible RCU does not exist, there is no need to suppress
1042 * its CPU stall warnings.
1043 */
1044static void rcu_preempt_stall_reset(void)
1045{
1046}
1047
1048/*
1049 * Because there is no preemptible RCU, there can be no readers blocked, 948 * Because there is no preemptible RCU, there can be no readers blocked,
1050 * so there is no need to check for blocked tasks. So check only for 949 * so there is no need to check for blocked tasks. So check only for
1051 * bogus qsmask values. 950 * bogus qsmask values.
@@ -1073,14 +972,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1073#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 972#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1074 973
1075/* 974/*
1076 * Because preemptible RCU does not exist, it never needs CPU-offline
1077 * processing.
1078 */
1079static void rcu_preempt_cleanup_dead_cpu(int cpu)
1080{
1081}
1082
1083/*
1084 * Because preemptible RCU does not exist, it never has any callbacks 975 * Because preemptible RCU does not exist, it never has any callbacks
1085 * to check. 976 * to check.
1086 */ 977 */
@@ -1089,14 +980,6 @@ static void rcu_preempt_check_callbacks(int cpu)
1089} 980}
1090 981
1091/* 982/*
1092 * Because preemptible RCU does not exist, it never has any callbacks
1093 * to process.
1094 */
1095static void rcu_preempt_process_callbacks(void)
1096{
1097}
1098
1099/*
1100 * Queue an RCU callback for lazy invocation after a grace period. 983 * Queue an RCU callback for lazy invocation after a grace period.
1101 * This will likely be later named something like "call_rcu_lazy()", 984 * This will likely be later named something like "call_rcu_lazy()",
1102 * but this change will require some way of tagging the lazy RCU 985 * but this change will require some way of tagging the lazy RCU
@@ -1137,22 +1020,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1137#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1020#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1138 1021
1139/* 1022/*
1140 * Because preemptible RCU does not exist, it never has any work to do.
1141 */
1142static int rcu_preempt_pending(int cpu)
1143{
1144 return 0;
1145}
1146
1147/*
1148 * Because preemptible RCU does not exist, it never has callbacks
1149 */
1150static int rcu_preempt_cpu_has_callbacks(int cpu)
1151{
1152 return 0;
1153}
1154
1155/*
1156 * Because preemptible RCU does not exist, rcu_barrier() is just 1023 * Because preemptible RCU does not exist, rcu_barrier() is just
1157 * another name for rcu_barrier_sched(). 1024 * another name for rcu_barrier_sched().
1158 */ 1025 */
@@ -1163,21 +1030,6 @@ void rcu_barrier(void)
1163EXPORT_SYMBOL_GPL(rcu_barrier); 1030EXPORT_SYMBOL_GPL(rcu_barrier);
1164 1031
1165/* 1032/*
1166 * Because preemptible RCU does not exist, there is no per-CPU
1167 * data to initialize.
1168 */
1169static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1170{
1171}
1172
1173/*
1174 * Because there is no preemptible RCU, there is no cleanup to do.
1175 */
1176static void rcu_preempt_cleanup_dying_cpu(void)
1177{
1178}
1179
1180/*
1181 * Because preemptible RCU does not exist, it need not be initialized. 1033 * Because preemptible RCU does not exist, it need not be initialized.
1182 */ 1034 */
1183static void __init __rcu_init_preempt(void) 1035static void __init __rcu_init_preempt(void)
@@ -1960,9 +1812,11 @@ static void rcu_idle_count_callbacks_posted(void)
1960 */ 1812 */
1961#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1813#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1962#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1814#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1963#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ 1815#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1964#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1816#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1965 1817
1818extern int tick_nohz_enabled;
1819
1966/* 1820/*
1967 * Does the specified flavor of RCU have non-lazy callbacks pending on 1821 * Does the specified flavor of RCU have non-lazy callbacks pending on
1968 * the specified CPU? Both RCU flavor and CPU are specified by the 1822 * the specified CPU? Both RCU flavor and CPU are specified by the
@@ -2039,10 +1893,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
2039 return 1; 1893 return 1;
2040 } 1894 }
2041 /* Set up for the possibility that RCU will post a timer. */ 1895 /* Set up for the possibility that RCU will post a timer. */
2042 if (rcu_cpu_has_nonlazy_callbacks(cpu)) 1896 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2043 *delta_jiffies = RCU_IDLE_GP_DELAY; 1897 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
2044 else 1898 RCU_IDLE_GP_DELAY) - jiffies;
2045 *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; 1899 } else {
1900 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
1901 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1902 }
2046 return 0; 1903 return 0;
2047} 1904}
2048 1905
@@ -2101,6 +1958,7 @@ static void rcu_cleanup_after_idle(int cpu)
2101 1958
2102 del_timer(&rdtp->idle_gp_timer); 1959 del_timer(&rdtp->idle_gp_timer);
2103 trace_rcu_prep_idle("Cleanup after idle"); 1960 trace_rcu_prep_idle("Cleanup after idle");
1961 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
2104} 1962}
2105 1963
2106/* 1964/*
@@ -2126,6 +1984,18 @@ static void rcu_prepare_for_idle(int cpu)
2126{ 1984{
2127 struct timer_list *tp; 1985 struct timer_list *tp;
2128 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1986 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1987 int tne;
1988
1989 /* Handle nohz enablement switches conservatively. */
1990 tne = ACCESS_ONCE(tick_nohz_enabled);
1991 if (tne != rdtp->tick_nohz_enabled_snap) {
1992 if (rcu_cpu_has_callbacks(cpu))
1993 invoke_rcu_core(); /* force nohz to see update. */
1994 rdtp->tick_nohz_enabled_snap = tne;
1995 return;
1996 }
1997 if (!tne)
1998 return;
2129 1999
2130 /* 2000 /*
2131 * If this is an idle re-entry, for example, due to use of 2001 * If this is an idle re-entry, for example, due to use of
@@ -2179,10 +2049,11 @@ static void rcu_prepare_for_idle(int cpu)
2179 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 2049 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2180 trace_rcu_prep_idle("Dyntick with callbacks"); 2050 trace_rcu_prep_idle("Dyntick with callbacks");
2181 rdtp->idle_gp_timer_expires = 2051 rdtp->idle_gp_timer_expires =
2182 jiffies + RCU_IDLE_GP_DELAY; 2052 round_up(jiffies + RCU_IDLE_GP_DELAY,
2053 RCU_IDLE_GP_DELAY);
2183 } else { 2054 } else {
2184 rdtp->idle_gp_timer_expires = 2055 rdtp->idle_gp_timer_expires =
2185 jiffies + RCU_IDLE_LAZY_GP_DELAY; 2056 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
2186 trace_rcu_prep_idle("Dyntick with lazy callbacks"); 2057 trace_rcu_prep_idle("Dyntick with lazy callbacks");
2187 } 2058 }
2188 tp = &rdtp->idle_gp_timer; 2059 tp = &rdtp->idle_gp_timer;
@@ -2223,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu)
2223 if (rcu_cpu_has_callbacks(cpu)) { 2094 if (rcu_cpu_has_callbacks(cpu)) {
2224 trace_rcu_prep_idle("More callbacks"); 2095 trace_rcu_prep_idle("More callbacks");
2225 invoke_rcu_core(); 2096 invoke_rcu_core();
2226 } else 2097 } else {
2227 trace_rcu_prep_idle("Callbacks drained"); 2098 trace_rcu_prep_idle("Callbacks drained");
2099 }
2228} 2100}
2229 2101
2230/* 2102/*
@@ -2261,6 +2133,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2261 2133
2262static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2134static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2263{ 2135{
2136 *cp = '\0';
2264} 2137}
2265 2138
2266#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 2139#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d4bc16ddd1d4..abffb486e94e 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,31 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "rcutree.h" 47#include "rcutree.h"
48 48
49static int show_rcubarrier(struct seq_file *m, void *unused)
50{
51 struct rcu_state *rsp;
52
53 for_each_rcu_flavor(rsp)
54 seq_printf(m, "%s: %c bcc: %d nbd: %lu\n",
55 rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.',
56 atomic_read(&rsp->barrier_cpu_count),
57 rsp->n_barrier_done);
58 return 0;
59}
60
61static int rcubarrier_open(struct inode *inode, struct file *file)
62{
63 return single_open(file, show_rcubarrier, NULL);
64}
65
66static const struct file_operations rcubarrier_fops = {
67 .owner = THIS_MODULE,
68 .open = rcubarrier_open,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73
49#ifdef CONFIG_RCU_BOOST 74#ifdef CONFIG_RCU_BOOST
50 75
51static char convert_kthread_status(unsigned int kthread_status) 76static char convert_kthread_status(unsigned int kthread_status)
@@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
95 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 120 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
96} 121}
97 122
98#define PRINT_RCU_DATA(name, func, m) \
99 do { \
100 int _p_r_d_i; \
101 \
102 for_each_possible_cpu(_p_r_d_i) \
103 func(m, &per_cpu(name, _p_r_d_i)); \
104 } while (0)
105
106static int show_rcudata(struct seq_file *m, void *unused) 123static int show_rcudata(struct seq_file *m, void *unused)
107{ 124{
108#ifdef CONFIG_TREE_PREEMPT_RCU 125 int cpu;
109 seq_puts(m, "rcu_preempt:\n"); 126 struct rcu_state *rsp;
110 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); 127
111#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 128 for_each_rcu_flavor(rsp) {
112 seq_puts(m, "rcu_sched:\n"); 129 seq_printf(m, "%s:\n", rsp->name);
113 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); 130 for_each_possible_cpu(cpu)
114 seq_puts(m, "rcu_bh:\n"); 131 print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
115 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); 132 }
116 return 0; 133 return 0;
117} 134}
118 135
@@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
166 183
167static int show_rcudata_csv(struct seq_file *m, void *unused) 184static int show_rcudata_csv(struct seq_file *m, void *unused)
168{ 185{
186 int cpu;
187 struct rcu_state *rsp;
188
169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 189 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 190 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171 seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); 191 seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
@@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
173 seq_puts(m, "\"kt\",\"ktl\""); 193 seq_puts(m, "\"kt\",\"ktl\"");
174#endif /* #ifdef CONFIG_RCU_BOOST */ 194#endif /* #ifdef CONFIG_RCU_BOOST */
175 seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); 195 seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
176#ifdef CONFIG_TREE_PREEMPT_RCU 196 for_each_rcu_flavor(rsp) {
177 seq_puts(m, "\"rcu_preempt:\"\n"); 197 seq_printf(m, "\"%s:\"\n", rsp->name);
178 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); 198 for_each_possible_cpu(cpu)
179#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 199 print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
180 seq_puts(m, "\"rcu_sched:\"\n"); 200 }
181 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
182 seq_puts(m, "\"rcu_bh:\"\n");
183 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
184 return 0; 201 return 0;
185} 202}
186 203
@@ -201,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = {
201 218
202static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) 219static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
203{ 220{
204 seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu " 221 seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
205 "j=%04x bt=%04x\n",
206 rnp->grplo, rnp->grphi, 222 rnp->grplo, rnp->grphi,
207 "T."[list_empty(&rnp->blkd_tasks)], 223 "T."[list_empty(&rnp->blkd_tasks)],
208 "N."[!rnp->gp_tasks], 224 "N."[!rnp->gp_tasks],
@@ -210,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
210 "B."[!rnp->boost_tasks], 226 "B."[!rnp->boost_tasks],
211 convert_kthread_status(rnp->boost_kthread_status), 227 convert_kthread_status(rnp->boost_kthread_status),
212 rnp->n_tasks_boosted, rnp->n_exp_boosts, 228 rnp->n_tasks_boosted, rnp->n_exp_boosts,
213 rnp->n_normal_boosts, 229 rnp->n_normal_boosts);
230 seq_printf(m, "j=%04x bt=%04x\n",
214 (int)(jiffies & 0xffff), 231 (int)(jiffies & 0xffff),
215 (int)(rnp->boost_time & 0xffff)); 232 (int)(rnp->boost_time & 0xffff));
216 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", 233 seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
217 " balk",
218 rnp->n_balk_blkd_tasks, 234 rnp->n_balk_blkd_tasks,
219 rnp->n_balk_exp_gp_tasks, 235 rnp->n_balk_exp_gp_tasks,
220 rnp->n_balk_boost_tasks, 236 rnp->n_balk_boost_tasks,
@@ -270,15 +286,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
270 struct rcu_node *rnp; 286 struct rcu_node *rnp;
271 287
272 gpnum = rsp->gpnum; 288 gpnum = rsp->gpnum;
273 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " 289 seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ",
274 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", 290 rsp->name, rsp->completed, gpnum, rsp->fqs_state,
275 rsp->completed, gpnum, rsp->fqs_state,
276 (long)(rsp->jiffies_force_qs - jiffies), 291 (long)(rsp->jiffies_force_qs - jiffies),
277 (int)(jiffies & 0xffff), 292 (int)(jiffies & 0xffff));
293 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
278 rsp->n_force_qs, rsp->n_force_qs_ngp, 294 rsp->n_force_qs, rsp->n_force_qs_ngp,
279 rsp->n_force_qs - rsp->n_force_qs_ngp, 295 rsp->n_force_qs - rsp->n_force_qs_ngp,
280 rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); 296 rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
281 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { 297 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
282 if (rnp->level != level) { 298 if (rnp->level != level) {
283 seq_puts(m, "\n"); 299 seq_puts(m, "\n");
284 level = rnp->level; 300 level = rnp->level;
@@ -295,14 +311,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
295 311
296static int show_rcuhier(struct seq_file *m, void *unused) 312static int show_rcuhier(struct seq_file *m, void *unused)
297{ 313{
298#ifdef CONFIG_TREE_PREEMPT_RCU 314 struct rcu_state *rsp;
299 seq_puts(m, "rcu_preempt:\n"); 315
300 print_one_rcu_state(m, &rcu_preempt_state); 316 for_each_rcu_flavor(rsp)
301#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 317 print_one_rcu_state(m, rsp);
302 seq_puts(m, "rcu_sched:\n");
303 print_one_rcu_state(m, &rcu_sched_state);
304 seq_puts(m, "rcu_bh:\n");
305 print_one_rcu_state(m, &rcu_bh_state);
306 return 0; 318 return 0;
307} 319}
308 320
@@ -343,11 +355,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
343 355
344static int show_rcugp(struct seq_file *m, void *unused) 356static int show_rcugp(struct seq_file *m, void *unused)
345{ 357{
346#ifdef CONFIG_TREE_PREEMPT_RCU 358 struct rcu_state *rsp;
347 show_one_rcugp(m, &rcu_preempt_state); 359
348#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 360 for_each_rcu_flavor(rsp)
349 show_one_rcugp(m, &rcu_sched_state); 361 show_one_rcugp(m, rsp);
350 show_one_rcugp(m, &rcu_bh_state);
351 return 0; 362 return 0;
352} 363}
353 364
@@ -366,44 +377,36 @@ static const struct file_operations rcugp_fops = {
366 377
367static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) 378static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
368{ 379{
369 seq_printf(m, "%3d%cnp=%ld " 380 seq_printf(m, "%3d%cnp=%ld ",
370 "qsp=%ld rpq=%ld cbr=%ld cng=%ld "
371 "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
372 rdp->cpu, 381 rdp->cpu,
373 cpu_is_offline(rdp->cpu) ? '!' : ' ', 382 cpu_is_offline(rdp->cpu) ? '!' : ' ',
374 rdp->n_rcu_pending, 383 rdp->n_rcu_pending);
384 seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
375 rdp->n_rp_qs_pending, 385 rdp->n_rp_qs_pending,
376 rdp->n_rp_report_qs, 386 rdp->n_rp_report_qs,
377 rdp->n_rp_cb_ready, 387 rdp->n_rp_cb_ready,
378 rdp->n_rp_cpu_needs_gp, 388 rdp->n_rp_cpu_needs_gp);
389 seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
379 rdp->n_rp_gp_completed, 390 rdp->n_rp_gp_completed,
380 rdp->n_rp_gp_started, 391 rdp->n_rp_gp_started,
381 rdp->n_rp_need_fqs, 392 rdp->n_rp_need_fqs,
382 rdp->n_rp_need_nothing); 393 rdp->n_rp_need_nothing);
383} 394}
384 395
385static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) 396static int show_rcu_pending(struct seq_file *m, void *unused)
386{ 397{
387 int cpu; 398 int cpu;
388 struct rcu_data *rdp; 399 struct rcu_data *rdp;
389 400 struct rcu_state *rsp;
390 for_each_possible_cpu(cpu) { 401
391 rdp = per_cpu_ptr(rsp->rda, cpu); 402 for_each_rcu_flavor(rsp) {
392 if (rdp->beenonline) 403 seq_printf(m, "%s:\n", rsp->name);
393 print_one_rcu_pending(m, rdp); 404 for_each_possible_cpu(cpu) {
405 rdp = per_cpu_ptr(rsp->rda, cpu);
406 if (rdp->beenonline)
407 print_one_rcu_pending(m, rdp);
408 }
394 } 409 }
395}
396
397static int show_rcu_pending(struct seq_file *m, void *unused)
398{
399#ifdef CONFIG_TREE_PREEMPT_RCU
400 seq_puts(m, "rcu_preempt:\n");
401 print_rcu_pendings(m, &rcu_preempt_state);
402#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
403 seq_puts(m, "rcu_sched:\n");
404 print_rcu_pendings(m, &rcu_sched_state);
405 seq_puts(m, "rcu_bh:\n");
406 print_rcu_pendings(m, &rcu_bh_state);
407 return 0; 410 return 0;
408} 411}
409 412
@@ -453,6 +456,11 @@ static int __init rcutree_trace_init(void)
453 if (!rcudir) 456 if (!rcudir)
454 goto free_out; 457 goto free_out;
455 458
459 retval = debugfs_create_file("rcubarrier", 0444, rcudir,
460 NULL, &rcubarrier_fops);
461 if (!retval)
462 goto free_out;
463
456 retval = debugfs_create_file("rcudata", 0444, rcudir, 464 retval = debugfs_create_file("rcudata", 0444, rcudir,
457 NULL, &rcudata_fops); 465 NULL, &rcudata_fops);
458 if (!retval) 466 if (!retval)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d5594a4268d4..eaead2df6aa8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
2081#endif 2081#endif
2082 2082
2083 /* Here we just switch the register state and the stack. */ 2083 /* Here we just switch the register state and the stack. */
2084 rcu_switch_from(prev);
2085 switch_to(prev, next, prev); 2084 switch_to(prev, next, prev);
2086 2085
2087 barrier(); 2086 barrier();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..66ff07f6184c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)
105/* 105/*
106 * NO HZ enabled ? 106 * NO HZ enabled ?
107 */ 107 */
108static int tick_nohz_enabled __read_mostly = 1; 108int tick_nohz_enabled __read_mostly = 1;
109 109
110/* 110/*
111 * Enable / Disable tickless mode 111 * Enable / Disable tickless mode
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 23a5e031cd8b..c24c2f7e296f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -87,12 +87,10 @@ void __list_add_rcu(struct list_head *new,
87 struct list_head *prev, struct list_head *next) 87 struct list_head *prev, struct list_head *next)
88{ 88{
89 WARN(next->prev != prev, 89 WARN(next->prev != prev,
90 "list_add_rcu corruption. next->prev should be " 90 "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
91 "prev (%p), but was %p. (next=%p).\n",
92 prev, next->prev, next); 91 prev, next->prev, next);
93 WARN(prev->next != next, 92 WARN(prev->next != next,
94 "list_add_rcu corruption. prev->next should be " 93 "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
95 "next (%p), but was %p. (prev=%p).\n",
96 next, prev->next, prev); 94 next, prev->next, prev);
97 new->next = next; 95 new->next = next;
98 new->prev = prev; 96 new->prev = prev;