aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.txt39
-rw-r--r--Documentation/RCU/rcubarrier.txt15
-rw-r--r--Documentation/RCU/torture.txt9
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/key.h4
-rw-r--r--include/linux/rcupdate.h50
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/rcutiny.c4
-rw-r--r--kernel/rcutiny_plugin.h49
-rw-r--r--kernel/rcutorture.c36
-rw-r--r--kernel/rcutree.c164
-rw-r--r--kernel/rcutree.h1
-rw-r--r--kernel/rcutree_plugin.h81
-rw-r--r--kernel/time/tick-sched.c2
15 files changed, 251 insertions, 257 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 5c8d74968090..fc103d7a0474 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome!
162 when publicizing a pointer to a structure that can 162 when publicizing a pointer to a structure that can
163 be traversed by an RCU read-side critical section. 163 be traversed by an RCU read-side critical section.
164 164
1655. If call_rcu(), or a related primitive such as call_rcu_bh() or 1655. If call_rcu(), or a related primitive such as call_rcu_bh(),
166 call_rcu_sched(), is used, the callback function must be 166 call_rcu_sched(), or call_srcu() is used, the callback function
167 written to be called from softirq context. In particular, 167 must be written to be called from softirq context. In particular,
168 it cannot block. 168 it cannot block.
169 169
1706. Since synchronize_rcu() can block, it cannot be called from 1706. Since synchronize_rcu() can block, it cannot be called from
@@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome!
202 updater uses call_rcu_sched() or synchronize_sched(), then 202 updater uses call_rcu_sched() or synchronize_sched(), then
203 the corresponding readers must disable preemption, possibly 203 the corresponding readers must disable preemption, possibly
204 by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). 204 by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
205 If the updater uses synchronize_srcu(), the the corresponding 205 If the updater uses synchronize_srcu() or call_srcu(),
206 readers must use srcu_read_lock() and srcu_read_unlock(), 206 the the corresponding readers must use srcu_read_lock() and
207 and with the same srcu_struct. The rules for the expedited 207 srcu_read_unlock(), and with the same srcu_struct. The rules for
208 primitives are the same as for their non-expedited counterparts. 208 the expedited primitives are the same as for their non-expedited
209 Mixing things up will result in confusion and broken kernels. 209 counterparts. Mixing things up will result in confusion and
210 broken kernels.
210 211
211 One exception to this rule: rcu_read_lock() and rcu_read_unlock() 212 One exception to this rule: rcu_read_lock() and rcu_read_unlock()
212 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() 213 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome!
333 victim CPU from ever going offline.) 334 victim CPU from ever going offline.)
334 335
33514. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), 33614. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
336 synchronize_srcu(), and synchronize_srcu_expedited()) may only 337 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu())
337 be invoked from process context. Unlike other forms of RCU, it 338 may only be invoked from process context. Unlike other forms of
338 -is- permissible to block in an SRCU read-side critical section 339 RCU, it -is- permissible to block in an SRCU read-side critical
339 (demarked by srcu_read_lock() and srcu_read_unlock()), hence the 340 section (demarked by srcu_read_lock() and srcu_read_unlock()),
340 "SRCU": "sleepable RCU". Please note that if you don't need 341 hence the "SRCU": "sleepable RCU". Please note that if you
341 to sleep in read-side critical sections, you should be using 342 don't need to sleep in read-side critical sections, you should be
342 RCU rather than SRCU, because RCU is almost always faster and 343 using RCU rather than SRCU, because RCU is almost always faster
343 easier to use than is SRCU. 344 and easier to use than is SRCU.
344 345
345 If you need to enter your read-side critical section in a 346 If you need to enter your read-side critical section in a
346 hardirq or exception handler, and then exit that same read-side 347 hardirq or exception handler, and then exit that same read-side
@@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome!
353 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 354 cleanup_srcu_struct(). These are passed a "struct srcu_struct"
354 that defines the scope of a given SRCU domain. Once initialized, 355 that defines the scope of a given SRCU domain. Once initialized,
355 the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() 356 the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
356 synchronize_srcu(), and synchronize_srcu_expedited(). A given 357 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
357 synchronize_srcu() waits only for SRCU read-side critical 358 A given synchronize_srcu() waits only for SRCU read-side critical
358 sections governed by srcu_read_lock() and srcu_read_unlock() 359 sections governed by srcu_read_lock() and srcu_read_unlock()
359 calls that have been passed the same srcu_struct. This property 360 calls that have been passed the same srcu_struct. This property
360 is what makes sleeping read-side critical sections tolerable -- 361 is what makes sleeping read-side critical sections tolerable --
@@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome!
374 requiring SRCU's read-side deadlock immunity or low read-side 375 requiring SRCU's read-side deadlock immunity or low read-side
375 realtime latency. 376 realtime latency.
376 377
377 Note that, rcu_assign_pointer() relates to SRCU just as they do 378 Note that, rcu_assign_pointer() relates to SRCU just as it does
378 to other forms of RCU. 379 to other forms of RCU.
379 380
38015. The whole point of call_rcu(), synchronize_rcu(), and friends 38115. The whole point of call_rcu(), synchronize_rcu(), and friends
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index e439a0edee22..38428c125135 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows:
79 2. Execute rcu_barrier(). 79 2. Execute rcu_barrier().
80 3. Allow the module to be unloaded. 80 3. Allow the module to be unloaded.
81 81
82Quick Quiz #1: Why is there no srcu_barrier()?
83
84The rcutorture module makes use of rcu_barrier in its exit function 82The rcutorture module makes use of rcu_barrier in its exit function
85as follows: 83as follows:
86 84
@@ -162,7 +160,7 @@ for any pre-existing callbacks to complete.
162Then lines 55-62 print status and do operation-specific cleanup, and 160Then lines 55-62 print status and do operation-specific cleanup, and
163then return, permitting the module-unload operation to be completed. 161then return, permitting the module-unload operation to be completed.
164 162
165Quick Quiz #2: Is there any other situation where rcu_barrier() might 163Quick Quiz #1: Is there any other situation where rcu_barrier() might
166 be required? 164 be required?
167 165
168Your module might have additional complications. For example, if your 166Your module might have additional complications. For example, if your
@@ -242,7 +240,7 @@ reaches zero, as follows:
242 4 complete(&rcu_barrier_completion); 240 4 complete(&rcu_barrier_completion);
243 5 } 241 5 }
244 242
245Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes 243Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
246 immediately (thus incrementing rcu_barrier_cpu_count to the 244 immediately (thus incrementing rcu_barrier_cpu_count to the
247 value one), but the other CPU's rcu_barrier_func() invocations 245 value one), but the other CPU's rcu_barrier_func() invocations
248 are delayed for a full grace period? Couldn't this result in 246 are delayed for a full grace period? Couldn't this result in
@@ -259,12 +257,7 @@ so that your module may be safely unloaded.
259 257
260Answers to Quick Quizzes 258Answers to Quick Quizzes
261 259
262Quick Quiz #1: Why is there no srcu_barrier()? 260Quick Quiz #1: Is there any other situation where rcu_barrier() might
263
264Answer: Since there is no call_srcu(), there can be no outstanding SRCU
265 callbacks. Therefore, there is no need to wait for them.
266
267Quick Quiz #2: Is there any other situation where rcu_barrier() might
268 be required? 261 be required?
269 262
270Answer: Interestingly enough, rcu_barrier() was not originally 263Answer: Interestingly enough, rcu_barrier() was not originally
@@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally
278 implementing rcutorture, and found that rcu_barrier() solves 271 implementing rcutorture, and found that rcu_barrier() solves
279 this problem as well. 272 this problem as well.
280 273
281Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes 274Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
282 immediately (thus incrementing rcu_barrier_cpu_count to the 275 immediately (thus incrementing rcu_barrier_cpu_count to the
283 value one), but the other CPU's rcu_barrier_func() invocations 276 value one), but the other CPU's rcu_barrier_func() invocations
284 are delayed for a full grace period? Couldn't this result in 277 are delayed for a full grace period? Couldn't this result in
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 4ddf3913fd8c..7dce8a17eac2 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows:
174 and synchronize_rcu_bh_expedited(). 174 and synchronize_rcu_bh_expedited().
175 175
176 "srcu": srcu_read_lock(), srcu_read_unlock() and 176 "srcu": srcu_read_lock(), srcu_read_unlock() and
177 call_srcu().
178
179 "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
177 synchronize_srcu(). 180 synchronize_srcu().
178 181
179 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and 182 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
180 synchronize_srcu_expedited(). 183 synchronize_srcu_expedited().
181 184
185 "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(),
186 and call_srcu().
187
188 "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(),
189 and synchronize_srcu().
190
182 "sched": preempt_disable(), preempt_enable(), and 191 "sched": preempt_disable(), preempt_enable(), and
183 call_rcu_sched(). 192 call_rcu_sched().
184 193
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6bbe8dcdc3da..69ee188515e7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier
833 833
834SRCU: Critical sections Grace period Barrier 834SRCU: Critical sections Grace period Barrier
835 835
836 srcu_read_lock synchronize_srcu N/A 836 srcu_read_lock synchronize_srcu srcu_barrier
837 srcu_read_unlock synchronize_srcu_expedited 837 srcu_read_unlock call_srcu
838 srcu_read_lock_raw 838 srcu_read_lock_raw synchronize_srcu_expedited
839 srcu_read_unlock_raw 839 srcu_read_unlock_raw
840 srcu_dereference 840 srcu_dereference
841 841
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e65eff6af3b..8a7476186990 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,8 +168,8 @@ extern struct cred init_cred;
168 .children = LIST_HEAD_INIT(tsk.children), \ 168 .children = LIST_HEAD_INIT(tsk.children), \
169 .sibling = LIST_HEAD_INIT(tsk.sibling), \ 169 .sibling = LIST_HEAD_INIT(tsk.sibling), \
170 .group_leader = &tsk, \ 170 .group_leader = &tsk, \
171 RCU_INIT_POINTER(.real_cred, &init_cred), \ 171 RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
172 RCU_INIT_POINTER(.cred, &init_cred), \ 172 RCU_POINTER_INITIALIZER(cred, &init_cred), \
173 .comm = INIT_TASK_COMM, \ 173 .comm = INIT_TASK_COMM, \
174 .thread = INIT_THREAD, \ 174 .thread = INIT_THREAD, \
175 .fs = &init_fs, \ 175 .fs = &init_fs, \
diff --git a/include/linux/key.h b/include/linux/key.h
index 4cd22ed627ef..cef3b315ba7c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)
303 rwsem_is_locked(&((struct key *)(KEY))->sem))) 303 rwsem_is_locked(&((struct key *)(KEY))->sem)))
304 304
305#define rcu_assign_keypointer(KEY, PAYLOAD) \ 305#define rcu_assign_keypointer(KEY, PAYLOAD) \
306 (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) 306do { \
307 rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \
308} while (0)
307 309
308#ifdef CONFIG_SYSCTL 310#ifdef CONFIG_SYSCTL
309extern ctl_table key_sysctls[]; 311extern ctl_table key_sysctls[];
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9cac722b169c..c2c0d86dd3ac 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -147,6 +147,7 @@ extern void synchronize_sched(void);
147 147
148extern void __rcu_read_lock(void); 148extern void __rcu_read_lock(void);
149extern void __rcu_read_unlock(void); 149extern void __rcu_read_unlock(void);
150extern void rcu_read_unlock_special(struct task_struct *t);
150void synchronize_rcu(void); 151void synchronize_rcu(void);
151 152
152/* 153/*
@@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
255} 256}
256#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 257#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
257 258
259#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
260extern int rcu_is_cpu_idle(void);
261#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
262
258#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 263#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
259bool rcu_lockdep_current_cpu_online(void); 264bool rcu_lockdep_current_cpu_online(void);
260#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 265#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)
266 271
267#ifdef CONFIG_DEBUG_LOCK_ALLOC 272#ifdef CONFIG_DEBUG_LOCK_ALLOC
268 273
269#ifdef CONFIG_PROVE_RCU
270extern int rcu_is_cpu_idle(void);
271#else /* !CONFIG_PROVE_RCU */
272static inline int rcu_is_cpu_idle(void)
273{
274 return 0;
275}
276#endif /* else !CONFIG_PROVE_RCU */
277
278static inline void rcu_lock_acquire(struct lockdep_map *map) 274static inline void rcu_lock_acquire(struct lockdep_map *map)
279{ 275{
280 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); 276 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
@@ -513,10 +509,10 @@ static inline void rcu_preempt_sleep_check(void)
513 (_________p1); \ 509 (_________p1); \
514 }) 510 })
515#define __rcu_assign_pointer(p, v, space) \ 511#define __rcu_assign_pointer(p, v, space) \
516 ({ \ 512 do { \
517 smp_wmb(); \ 513 smp_wmb(); \
518 (p) = (typeof(*v) __force space *)(v); \ 514 (p) = (typeof(*v) __force space *)(v); \
519 }) 515 } while (0)
520 516
521 517
522/** 518/**
@@ -851,7 +847,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
851 * 847 *
852 * Assigns the specified value to the specified RCU-protected 848 * Assigns the specified value to the specified RCU-protected
853 * pointer, ensuring that any concurrent RCU readers will see 849 * pointer, ensuring that any concurrent RCU readers will see
854 * any prior initialization. Returns the value assigned. 850 * any prior initialization.
855 * 851 *
856 * Inserts memory barriers on architectures that require them 852 * Inserts memory barriers on architectures that require them
857 * (which is most of them), and also prevents the compiler from 853 * (which is most of them), and also prevents the compiler from
@@ -903,25 +899,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
903 * the reader-accessible portions of the linked structure. 899 * the reader-accessible portions of the linked structure.
904 */ 900 */
905#define RCU_INIT_POINTER(p, v) \ 901#define RCU_INIT_POINTER(p, v) \
906 p = (typeof(*v) __force __rcu *)(v) 902 do { \
907 903 p = (typeof(*v) __force __rcu *)(v); \
908static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) 904 } while (0)
909{
910 return offset < 4096;
911}
912
913static __always_inline
914void __kfree_rcu(struct rcu_head *head, unsigned long offset)
915{
916 typedef void (*rcu_callback)(struct rcu_head *);
917
918 BUILD_BUG_ON(!__builtin_constant_p(offset));
919
920 /* See the kfree_rcu() header comment. */
921 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
922 905
923 kfree_call_rcu(head, (rcu_callback)offset); 906/**
924} 907 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
908 *
909 * GCC-style initialization for an RCU-protected pointer in a structure field.
910 */
911#define RCU_POINTER_INITIALIZER(p, v) \
912 .p = (typeof(*v) __force __rcu *)(v)
925 913
926/* 914/*
927 * Does the specified offset indicate that the corresponding rcu_head 915 * Does the specified offset indicate that the corresponding rcu_head
@@ -935,7 +923,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
935#define __kfree_rcu(head, offset) \ 923#define __kfree_rcu(head, offset) \
936 do { \ 924 do { \
937 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ 925 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
938 call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ 926 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
939 } while (0) 927 } while (0)
940 928
941/** 929/**
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e9..4e6a61b15e86 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -54,6 +54,50 @@
54#ifdef CONFIG_PREEMPT_RCU 54#ifdef CONFIG_PREEMPT_RCU
55 55
56/* 56/*
57 * Preemptible RCU implementation for rcu_read_lock().
58 * Just increment ->rcu_read_lock_nesting, shared state will be updated
59 * if we block.
60 */
61void __rcu_read_lock(void)
62{
63 current->rcu_read_lock_nesting++;
64 barrier(); /* critical section after entry code. */
65}
66EXPORT_SYMBOL_GPL(__rcu_read_lock);
67
68/*
69 * Preemptible RCU implementation for rcu_read_unlock().
70 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
71 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
72 * invoke rcu_read_unlock_special() to clean up after a context switch
73 * in an RCU read-side critical section and other special cases.
74 */
75void __rcu_read_unlock(void)
76{
77 struct task_struct *t = current;
78
79 if (t->rcu_read_lock_nesting != 1) {
80 --t->rcu_read_lock_nesting;
81 } else {
82 barrier(); /* critical section before exit code. */
83 t->rcu_read_lock_nesting = INT_MIN;
84 barrier(); /* assign before ->rcu_read_unlock_special load */
85 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
86 rcu_read_unlock_special(t);
87 barrier(); /* ->rcu_read_unlock_special load before assign */
88 t->rcu_read_lock_nesting = 0;
89 }
90#ifdef CONFIG_PROVE_LOCKING
91 {
92 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
93
94 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
95 }
96#endif /* #ifdef CONFIG_PROVE_LOCKING */
97}
98EXPORT_SYMBOL_GPL(__rcu_read_unlock);
99
100/*
57 * Check for a task exiting while in a preemptible-RCU read-side 101 * Check for a task exiting while in a preemptible-RCU read-side
58 * critical section, clean up if so. No need to issue warnings, 102 * critical section, clean up if so. No need to issue warnings,
59 * as debug_check_no_locks_held() already does this if lockdep 103 * as debug_check_no_locks_held() already does this if lockdep
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d2..547b1fe5b052 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
172 local_irq_restore(flags); 172 local_irq_restore(flags);
173} 173}
174 174
175#ifdef CONFIG_PROVE_RCU 175#ifdef CONFIG_DEBUG_LOCK_ALLOC
176 176
177/* 177/*
178 * Test whether RCU thinks that the current CPU is idle. 178 * Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
183} 183}
184EXPORT_SYMBOL(rcu_is_cpu_idle); 184EXPORT_SYMBOL(rcu_is_cpu_idle);
185 185
186#endif /* #ifdef CONFIG_PROVE_RCU */ 186#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
187 187
188/* 188/*
189 * Test whether the current CPU was interrupted from idle. Nested 189 * Test whether the current CPU was interrupted from idle. Nested
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..116725b5edfb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
132 RCU_TRACE(.rcb.name = "rcu_preempt") 132 RCU_TRACE(.rcb.name = "rcu_preempt")
133}; 133};
134 134
135static void rcu_read_unlock_special(struct task_struct *t);
136static int rcu_preempted_readers_exp(void); 135static int rcu_preempted_readers_exp(void);
137static void rcu_report_exp_done(void); 136static void rcu_report_exp_done(void);
138 137
@@ -527,23 +526,11 @@ void rcu_preempt_note_context_switch(void)
527} 526}
528 527
529/* 528/*
530 * Tiny-preemptible RCU implementation for rcu_read_lock().
531 * Just increment ->rcu_read_lock_nesting, shared state will be updated
532 * if we block.
533 */
534void __rcu_read_lock(void)
535{
536 current->rcu_read_lock_nesting++;
537 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
538}
539EXPORT_SYMBOL_GPL(__rcu_read_lock);
540
541/*
542 * Handle special cases during rcu_read_unlock(), such as needing to 529 * Handle special cases during rcu_read_unlock(), such as needing to
543 * notify RCU core processing or task having blocked during the RCU 530 * notify RCU core processing or task having blocked during the RCU
544 * read-side critical section. 531 * read-side critical section.
545 */ 532 */
546static noinline void rcu_read_unlock_special(struct task_struct *t) 533void rcu_read_unlock_special(struct task_struct *t)
547{ 534{
548 int empty; 535 int empty;
549 int empty_exp; 536 int empty_exp;
@@ -627,38 +614,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
627} 614}
628 615
629/* 616/*
630 * Tiny-preemptible RCU implementation for rcu_read_unlock().
631 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
632 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
633 * invoke rcu_read_unlock_special() to clean up after a context switch
634 * in an RCU read-side critical section and other special cases.
635 */
636void __rcu_read_unlock(void)
637{
638 struct task_struct *t = current;
639
640 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
641 if (t->rcu_read_lock_nesting != 1)
642 --t->rcu_read_lock_nesting;
643 else {
644 t->rcu_read_lock_nesting = INT_MIN;
645 barrier(); /* assign before ->rcu_read_unlock_special load */
646 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
647 rcu_read_unlock_special(t);
648 barrier(); /* ->rcu_read_unlock_special load before assign */
649 t->rcu_read_lock_nesting = 0;
650 }
651#ifdef CONFIG_PROVE_LOCKING
652 {
653 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
654
655 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
656 }
657#endif /* #ifdef CONFIG_PROVE_LOCKING */
658}
659EXPORT_SYMBOL_GPL(__rcu_read_unlock);
660
661/*
662 * Check for a quiescent state from the current CPU. When a task blocks, 617 * Check for a quiescent state from the current CPU. When a task blocks,
663 * the task is recorded in the rcu_preempt_ctrlblk structure, which is 618 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
664 * checked elsewhere. This is called from the scheduling-clock interrupt. 619 * checked elsewhere. This is called from the scheduling-clock interrupt.
@@ -846,8 +801,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
846 */ 801 */
847int rcu_preempt_needs_cpu(void) 802int rcu_preempt_needs_cpu(void)
848{ 803{
849 if (!rcu_preempt_running_reader())
850 rcu_preempt_cpu_qs();
851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; 804 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
852} 805}
853 806
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e66b34ab7555..c279ee920947 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -206,6 +206,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */
206DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 206DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
207 /* and boost task create/destroy. */ 207 /* and boost task create/destroy. */
208static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ 208static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
209static bool barrier_phase; /* Test phase. */
209static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ 210static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
210static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ 211static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
211static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); 212static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)
635 synchronize_srcu(&srcu_ctl); 636 synchronize_srcu(&srcu_ctl);
636} 637}
637 638
639static void srcu_torture_call(struct rcu_head *head,
640 void (*func)(struct rcu_head *head))
641{
642 call_srcu(&srcu_ctl, head, func);
643}
644
645static void srcu_torture_barrier(void)
646{
647 srcu_barrier(&srcu_ctl);
648}
649
638static int srcu_torture_stats(char *page) 650static int srcu_torture_stats(char *page)
639{ 651{
640 int cnt = 0; 652 int cnt = 0;
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {
661 .completed = srcu_torture_completed, 673 .completed = srcu_torture_completed,
662 .deferred_free = srcu_torture_deferred_free, 674 .deferred_free = srcu_torture_deferred_free,
663 .sync = srcu_torture_synchronize, 675 .sync = srcu_torture_synchronize,
664 .call = NULL, 676 .call = srcu_torture_call,
665 .cb_barrier = NULL, 677 .cb_barrier = srcu_torture_barrier,
666 .stats = srcu_torture_stats, 678 .stats = srcu_torture_stats,
667 .name = "srcu" 679 .name = "srcu"
668}; 680};
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
1013 do { 1025 do {
1014 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 1026 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
1015 udelay(rcu_random(&rand) & 0x3ff); 1027 udelay(rcu_random(&rand) & 0x3ff);
1016 cur_ops->sync(); 1028 if (cur_ops->cb_barrier != NULL &&
1029 rcu_random(&rand) % (nfakewriters * 8) == 0)
1030 cur_ops->cb_barrier();
1031 else
1032 cur_ops->sync();
1017 rcu_stutter_wait("rcu_torture_fakewriter"); 1033 rcu_stutter_wait("rcu_torture_fakewriter");
1018 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 1034 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1019 1035
@@ -1631,6 +1647,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1631static int rcu_torture_barrier_cbs(void *arg) 1647static int rcu_torture_barrier_cbs(void *arg)
1632{ 1648{
1633 long myid = (long)arg; 1649 long myid = (long)arg;
1650 bool lastphase = 0;
1634 struct rcu_head rcu; 1651 struct rcu_head rcu;
1635 1652
1636 init_rcu_head_on_stack(&rcu); 1653 init_rcu_head_on_stack(&rcu);
@@ -1638,9 +1655,11 @@ static int rcu_torture_barrier_cbs(void *arg)
1638 set_user_nice(current, 19); 1655 set_user_nice(current, 19);
1639 do { 1656 do {
1640 wait_event(barrier_cbs_wq[myid], 1657 wait_event(barrier_cbs_wq[myid],
1641 atomic_read(&barrier_cbs_count) == n_barrier_cbs || 1658 barrier_phase != lastphase ||
1642 kthread_should_stop() || 1659 kthread_should_stop() ||
1643 fullstop != FULLSTOP_DONTSTOP); 1660 fullstop != FULLSTOP_DONTSTOP);
1661 lastphase = barrier_phase;
1662 smp_mb(); /* ensure barrier_phase load before ->call(). */
1644 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) 1663 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1645 break; 1664 break;
1646 cur_ops->call(&rcu, rcu_torture_barrier_cbf); 1665 cur_ops->call(&rcu, rcu_torture_barrier_cbf);
@@ -1665,7 +1684,8 @@ static int rcu_torture_barrier(void *arg)
1665 do { 1684 do {
1666 atomic_set(&barrier_cbs_invoked, 0); 1685 atomic_set(&barrier_cbs_invoked, 0);
1667 atomic_set(&barrier_cbs_count, n_barrier_cbs); 1686 atomic_set(&barrier_cbs_count, n_barrier_cbs);
1668 /* wake_up() path contains the required barriers. */ 1687 smp_mb(); /* Ensure barrier_phase after prior assignments. */
1688 barrier_phase = !barrier_phase;
1669 for (i = 0; i < n_barrier_cbs; i++) 1689 for (i = 0; i < n_barrier_cbs; i++)
1670 wake_up(&barrier_cbs_wq[i]); 1690 wake_up(&barrier_cbs_wq[i]);
1671 wait_event(barrier_wq, 1691 wait_event(barrier_wq,
@@ -1684,7 +1704,7 @@ static int rcu_torture_barrier(void *arg)
1684 schedule_timeout_interruptible(HZ / 10); 1704 schedule_timeout_interruptible(HZ / 10);
1685 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 1705 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1686 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); 1706 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
1687 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); 1707 rcutorture_shutdown_absorb("rcu_torture_barrier");
1688 while (!kthread_should_stop()) 1708 while (!kthread_should_stop())
1689 schedule_timeout_interruptible(1); 1709 schedule_timeout_interruptible(1);
1690 return 0; 1710 return 0;
@@ -1908,8 +1928,8 @@ rcu_torture_init(void)
1908 static struct rcu_torture_ops *torture_ops[] = 1928 static struct rcu_torture_ops *torture_ops[] =
1909 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1929 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1910 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1930 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1911 &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, 1931 &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
1912 &srcu_raw_sync_ops, &srcu_expedited_ops, 1932 &srcu_raw_ops, &srcu_raw_sync_ops,
1913 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1933 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1914 1934
1915 mutex_lock(&fullstop_mutex); 1935 mutex_lock(&fullstop_mutex);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 967b4bed2cf3..117218a43724 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -359,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
359 struct task_struct *idle = idle_task(smp_processor_id()); 359 struct task_struct *idle = idle_task(smp_processor_id());
360 360
361 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 361 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
362 ftrace_dump(DUMP_ALL); 362 ftrace_dump(DUMP_ORIG);
363 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 363 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
364 current->pid, current->comm, 364 current->pid, current->comm,
365 idle->pid, idle->comm); /* must be idle task! */ 365 idle->pid, idle->comm); /* must be idle task! */
@@ -469,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
469 469
470 trace_rcu_dyntick("Error on exit: not idle task", 470 trace_rcu_dyntick("Error on exit: not idle task",
471 oldval, rdtp->dynticks_nesting); 471 oldval, rdtp->dynticks_nesting);
472 ftrace_dump(DUMP_ALL); 472 ftrace_dump(DUMP_ORIG);
473 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 473 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
474 current->pid, current->comm, 474 current->pid, current->comm,
475 idle->pid, idle->comm); /* must be idle task! */ 475 idle->pid, idle->comm); /* must be idle task! */
@@ -586,8 +586,6 @@ void rcu_nmi_exit(void)
586 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 586 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
587} 587}
588 588
589#ifdef CONFIG_PROVE_RCU
590
591/** 589/**
592 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
593 * 591 *
@@ -605,7 +603,7 @@ int rcu_is_cpu_idle(void)
605} 603}
606EXPORT_SYMBOL(rcu_is_cpu_idle); 604EXPORT_SYMBOL(rcu_is_cpu_idle);
607 605
608#ifdef CONFIG_HOTPLUG_CPU 606#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
609 607
610/* 608/*
611 * Is the current CPU online? Disable preemption to avoid false positives 609 * Is the current CPU online? Disable preemption to avoid false positives
@@ -646,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
646} 644}
647EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
648 646
649#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 647#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
650
651#endif /* #ifdef CONFIG_PROVE_RCU */
652 648
653/** 649/**
654 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 650 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -734,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
734 int cpu; 730 int cpu;
735 long delta; 731 long delta;
736 unsigned long flags; 732 unsigned long flags;
737 int ndetected; 733 int ndetected = 0;
738 struct rcu_node *rnp = rcu_get_root(rsp); 734 struct rcu_node *rnp = rcu_get_root(rsp);
739 735
740 /* Only let one CPU complain about others per time interval. */ 736 /* Only let one CPU complain about others per time interval. */
@@ -775,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
775 */ 771 */
776 rnp = rcu_get_root(rsp); 772 rnp = rcu_get_root(rsp);
777 raw_spin_lock_irqsave(&rnp->lock, flags); 773 raw_spin_lock_irqsave(&rnp->lock, flags);
778 ndetected = rcu_print_task_stall(rnp); 774 ndetected += rcu_print_task_stall(rnp);
779 raw_spin_unlock_irqrestore(&rnp->lock, flags); 775 raw_spin_unlock_irqrestore(&rnp->lock, flags);
780 776
781 print_cpu_stall_info_end(); 777 print_cpu_stall_info_end();
@@ -939,6 +935,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
939} 935}
940 936
941/* 937/*
938 * Initialize the specified rcu_data structure's callback list to empty.
939 */
940static void init_callback_list(struct rcu_data *rdp)
941{
942 int i;
943
944 rdp->nxtlist = NULL;
945 for (i = 0; i < RCU_NEXT_SIZE; i++)
946 rdp->nxttail[i] = &rdp->nxtlist;
947}
948
949/*
942 * Advance this CPU's callbacks, but only if the current grace period 950 * Advance this CPU's callbacks, but only if the current grace period
943 * has ended. This may be called only from the CPU to whom the rdp 951 * has ended. This may be called only from the CPU to whom the rdp
944 * belongs. In addition, the corresponding leaf rcu_node structure's 952 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1330,8 +1338,6 @@ static void
1330rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1338rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1331 struct rcu_node *rnp, struct rcu_data *rdp) 1339 struct rcu_node *rnp, struct rcu_data *rdp)
1332{ 1340{
1333 int i;
1334
1335 /* 1341 /*
1336 * Orphan the callbacks. First adjust the counts. This is safe 1342 * Orphan the callbacks. First adjust the counts. This is safe
1337 * because ->onofflock excludes _rcu_barrier()'s adoption of 1343 * because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1342,7 +1348,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1342 rsp->qlen += rdp->qlen; 1348 rsp->qlen += rdp->qlen;
1343 rdp->n_cbs_orphaned += rdp->qlen; 1349 rdp->n_cbs_orphaned += rdp->qlen;
1344 rdp->qlen_lazy = 0; 1350 rdp->qlen_lazy = 0;
1345 rdp->qlen = 0; 1351 ACCESS_ONCE(rdp->qlen) = 0;
1346 } 1352 }
1347 1353
1348 /* 1354 /*
@@ -1371,9 +1377,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1371 } 1377 }
1372 1378
1373 /* Finally, initialize the rcu_data structure's list to empty. */ 1379 /* Finally, initialize the rcu_data structure's list to empty. */
1374 rdp->nxtlist = NULL; 1380 init_callback_list(rdp);
1375 for (i = 0; i < RCU_NEXT_SIZE; i++)
1376 rdp->nxttail[i] = &rdp->nxtlist;
1377} 1381}
1378 1382
1379/* 1383/*
@@ -1507,6 +1511,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1507 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1511 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1508 if (need_report & RCU_OFL_TASKS_EXP_GP) 1512 if (need_report & RCU_OFL_TASKS_EXP_GP)
1509 rcu_report_exp_rnp(rsp, rnp, true); 1513 rcu_report_exp_rnp(rsp, rnp, true);
1514 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1515 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1516 cpu, rdp->qlen, rdp->nxtlist);
1510} 1517}
1511 1518
1512#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1519#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1594,7 +1601,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1594 } 1601 }
1595 smp_mb(); /* List handling before counting for rcu_barrier(). */ 1602 smp_mb(); /* List handling before counting for rcu_barrier(). */
1596 rdp->qlen_lazy -= count_lazy; 1603 rdp->qlen_lazy -= count_lazy;
1597 rdp->qlen -= count; 1604 ACCESS_ONCE(rdp->qlen) -= count;
1598 rdp->n_cbs_invoked += count; 1605 rdp->n_cbs_invoked += count;
1599 1606
1600 /* Reinstate batch limit if we have worked down the excess. */ 1607 /* Reinstate batch limit if we have worked down the excess. */
@@ -1607,6 +1614,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1607 rdp->n_force_qs_snap = rsp->n_force_qs; 1614 rdp->n_force_qs_snap = rsp->n_force_qs;
1608 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1615 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1609 rdp->qlen_last_fqs_check = rdp->qlen; 1616 rdp->qlen_last_fqs_check = rdp->qlen;
1617 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
1610 1618
1611 local_irq_restore(flags); 1619 local_irq_restore(flags);
1612 1620
@@ -1858,6 +1866,56 @@ static void invoke_rcu_core(void)
1858 raise_softirq(RCU_SOFTIRQ); 1866 raise_softirq(RCU_SOFTIRQ);
1859} 1867}
1860 1868
1869/*
1870 * Handle any core-RCU processing required by a call_rcu() invocation.
1871 */
1872static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
1873 struct rcu_head *head, unsigned long flags)
1874{
1875 /*
1876 * If called from an extended quiescent state, invoke the RCU
1877 * core in order to force a re-evaluation of RCU's idleness.
1878 */
1879 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
1880 invoke_rcu_core();
1881
1882 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
1883 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
1884 return;
1885
1886 /*
1887 * Force the grace period if too many callbacks or too long waiting.
1888 * Enforce hysteresis, and don't invoke force_quiescent_state()
1889 * if some other CPU has recently done so. Also, don't bother
1890 * invoking force_quiescent_state() if the newly enqueued callback
1891 * is the only one waiting for a grace period to complete.
1892 */
1893 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1894
1895 /* Are we ignoring a completed grace period? */
1896 rcu_process_gp_end(rsp, rdp);
1897 check_for_new_grace_period(rsp, rdp);
1898
1899 /* Start a new grace period if one not already started. */
1900 if (!rcu_gp_in_progress(rsp)) {
1901 unsigned long nestflag;
1902 struct rcu_node *rnp_root = rcu_get_root(rsp);
1903
1904 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1905 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1906 } else {
1907 /* Give the grace period a kick. */
1908 rdp->blimit = LONG_MAX;
1909 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1910 *rdp->nxttail[RCU_DONE_TAIL] != head)
1911 force_quiescent_state(rsp, 0);
1912 rdp->n_force_qs_snap = rsp->n_force_qs;
1913 rdp->qlen_last_fqs_check = rdp->qlen;
1914 }
1915 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1916 force_quiescent_state(rsp, 1);
1917}
1918
1861static void 1919static void
1862__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1920__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1863 struct rcu_state *rsp, bool lazy) 1921 struct rcu_state *rsp, bool lazy)
@@ -1882,7 +1940,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1882 rdp = this_cpu_ptr(rsp->rda); 1940 rdp = this_cpu_ptr(rsp->rda);
1883 1941
1884 /* Add the callback to our list. */ 1942 /* Add the callback to our list. */
1885 rdp->qlen++; 1943 ACCESS_ONCE(rdp->qlen)++;
1886 if (lazy) 1944 if (lazy)
1887 rdp->qlen_lazy++; 1945 rdp->qlen_lazy++;
1888 else 1946 else
@@ -1897,43 +1955,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1897 else 1955 else
1898 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 1956 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1899 1957
1900 /* If interrupts were disabled, don't dive into RCU core. */ 1958 /* Go handle any RCU core processing required. */
1901 if (irqs_disabled_flags(flags)) { 1959 __call_rcu_core(rsp, rdp, head, flags);
1902 local_irq_restore(flags);
1903 return;
1904 }
1905
1906 /*
1907 * Force the grace period if too many callbacks or too long waiting.
1908 * Enforce hysteresis, and don't invoke force_quiescent_state()
1909 * if some other CPU has recently done so. Also, don't bother
1910 * invoking force_quiescent_state() if the newly enqueued callback
1911 * is the only one waiting for a grace period to complete.
1912 */
1913 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1914
1915 /* Are we ignoring a completed grace period? */
1916 rcu_process_gp_end(rsp, rdp);
1917 check_for_new_grace_period(rsp, rdp);
1918
1919 /* Start a new grace period if one not already started. */
1920 if (!rcu_gp_in_progress(rsp)) {
1921 unsigned long nestflag;
1922 struct rcu_node *rnp_root = rcu_get_root(rsp);
1923
1924 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1925 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1926 } else {
1927 /* Give the grace period a kick. */
1928 rdp->blimit = LONG_MAX;
1929 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1930 *rdp->nxttail[RCU_DONE_TAIL] != head)
1931 force_quiescent_state(rsp, 0);
1932 rdp->n_force_qs_snap = rsp->n_force_qs;
1933 rdp->qlen_last_fqs_check = rdp->qlen;
1934 }
1935 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1936 force_quiescent_state(rsp, 1);
1937 local_irq_restore(flags); 1960 local_irq_restore(flags);
1938} 1961}
1939 1962
@@ -1963,28 +1986,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1963 * occasionally incorrectly indicate that there are multiple CPUs online 1986 * occasionally incorrectly indicate that there are multiple CPUs online
1964 * when there was in fact only one the whole time, as this just adds 1987 * when there was in fact only one the whole time, as this just adds
1965 * some overhead: RCU still operates correctly. 1988 * some overhead: RCU still operates correctly.
1966 *
1967 * Of course, sampling num_online_cpus() with preemption enabled can
1968 * give erroneous results if there are concurrent CPU-hotplug operations.
1969 * For example, given a demonic sequence of preemptions in num_online_cpus()
1970 * and CPU-hotplug operations, there could be two or more CPUs online at
1971 * all times, but num_online_cpus() might well return one (or even zero).
1972 *
1973 * However, all such demonic sequences require at least one CPU-offline
1974 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1975 * is only a problem if there is an RCU read-side critical section executing
1976 * throughout. But RCU-sched and RCU-bh read-side critical sections
1977 * disable either preemption or bh, which prevents a CPU from going offline.
1978 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1979 * that there is only one CPU when in fact there was more than one throughout
1980 * is when there were no RCU readers in the system. If there are no
1981 * RCU readers, the grace period by definition can be of zero length,
1982 * regardless of the number of online CPUs.
1983 */ 1989 */
1984static inline int rcu_blocking_is_gp(void) 1990static inline int rcu_blocking_is_gp(void)
1985{ 1991{
1992 int ret;
1993
1986 might_sleep(); /* Check for RCU read-side critical section. */ 1994 might_sleep(); /* Check for RCU read-side critical section. */
1987 return num_online_cpus() <= 1; 1995 preempt_disable();
1996 ret = num_online_cpus() <= 1;
1997 preempt_enable();
1998 return ret;
1988} 1999}
1989 2000
1990/** 2001/**
@@ -2473,18 +2484,15 @@ static void __init
2473rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2484rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2474{ 2485{
2475 unsigned long flags; 2486 unsigned long flags;
2476 int i;
2477 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2487 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2478 struct rcu_node *rnp = rcu_get_root(rsp); 2488 struct rcu_node *rnp = rcu_get_root(rsp);
2479 2489
2480 /* Set up local state, ensuring consistent view of global state. */ 2490 /* Set up local state, ensuring consistent view of global state. */
2481 raw_spin_lock_irqsave(&rnp->lock, flags); 2491 raw_spin_lock_irqsave(&rnp->lock, flags);
2482 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2492 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2483 rdp->nxtlist = NULL; 2493 init_callback_list(rdp);
2484 for (i = 0; i < RCU_NEXT_SIZE; i++)
2485 rdp->nxttail[i] = &rdp->nxtlist;
2486 rdp->qlen_lazy = 0; 2494 rdp->qlen_lazy = 0;
2487 rdp->qlen = 0; 2495 ACCESS_ONCE(rdp->qlen) = 0;
2488 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2496 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2489 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2497 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2490 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2498 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index b92c4550a6e6..4d29169f2124 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -100,6 +100,7 @@ struct rcu_dynticks {
100 /* # times non-lazy CBs posted to CPU. */ 100 /* # times non-lazy CBs posted to CPU. */
101 unsigned long nonlazy_posted_snap; 101 unsigned long nonlazy_posted_snap;
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
103#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
104}; 105};
105 106
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index d18b4d383afe..a9194d5606c4 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -83,7 +83,6 @@ struct rcu_state rcu_preempt_state =
83DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 83DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
84static struct rcu_state *rcu_state = &rcu_preempt_state; 84static struct rcu_state *rcu_state = &rcu_preempt_state;
85 85
86static void rcu_read_unlock_special(struct task_struct *t);
87static int rcu_preempted_readers_exp(struct rcu_node *rnp); 86static int rcu_preempted_readers_exp(struct rcu_node *rnp);
88 87
89/* 88/*
@@ -238,18 +237,6 @@ static void rcu_preempt_note_context_switch(int cpu)
238} 237}
239 238
240/* 239/*
241 * Tree-preemptible RCU implementation for rcu_read_lock().
242 * Just increment ->rcu_read_lock_nesting, shared state will be updated
243 * if we block.
244 */
245void __rcu_read_lock(void)
246{
247 current->rcu_read_lock_nesting++;
248 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
249}
250EXPORT_SYMBOL_GPL(__rcu_read_lock);
251
252/*
253 * Check for preempted RCU readers blocking the current grace period 240 * Check for preempted RCU readers blocking the current grace period
254 * for the specified rcu_node structure. If the caller needs a reliable 241 * for the specified rcu_node structure. If the caller needs a reliable
255 * answer, it must hold the rcu_node's ->lock. 242 * answer, it must hold the rcu_node's ->lock.
@@ -315,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
315 * notify RCU core processing or task having blocked during the RCU 302 * notify RCU core processing or task having blocked during the RCU
316 * read-side critical section. 303 * read-side critical section.
317 */ 304 */
318static noinline void rcu_read_unlock_special(struct task_struct *t) 305void rcu_read_unlock_special(struct task_struct *t)
319{ 306{
320 int empty; 307 int empty;
321 int empty_exp; 308 int empty_exp;
@@ -423,38 +410,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
423 } 410 }
424} 411}
425 412
426/*
427 * Tree-preemptible RCU implementation for rcu_read_unlock().
428 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
429 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
430 * invoke rcu_read_unlock_special() to clean up after a context switch
431 * in an RCU read-side critical section and other special cases.
432 */
433void __rcu_read_unlock(void)
434{
435 struct task_struct *t = current;
436
437 if (t->rcu_read_lock_nesting != 1)
438 --t->rcu_read_lock_nesting;
439 else {
440 barrier(); /* critical section before exit code. */
441 t->rcu_read_lock_nesting = INT_MIN;
442 barrier(); /* assign before ->rcu_read_unlock_special load */
443 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
444 rcu_read_unlock_special(t);
445 barrier(); /* ->rcu_read_unlock_special load before assign */
446 t->rcu_read_lock_nesting = 0;
447 }
448#ifdef CONFIG_PROVE_LOCKING
449 {
450 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
451
452 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
453 }
454#endif /* #ifdef CONFIG_PROVE_LOCKING */
455}
456EXPORT_SYMBOL_GPL(__rcu_read_unlock);
457
458#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 413#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
459 414
460/* 415/*
@@ -1856,9 +1811,11 @@ static void rcu_idle_count_callbacks_posted(void)
1856 */ 1811 */
1857#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1812#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1858#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1813#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1859#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ 1814#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1860#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1815#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1861 1816
1817extern int tick_nohz_enabled;
1818
1862/* 1819/*
1863 * Does the specified flavor of RCU have non-lazy callbacks pending on 1820 * Does the specified flavor of RCU have non-lazy callbacks pending on
1864 * the specified CPU? Both RCU flavor and CPU are specified by the 1821 * the specified CPU? Both RCU flavor and CPU are specified by the
@@ -1935,10 +1892,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1935 return 1; 1892 return 1;
1936 } 1893 }
1937 /* Set up for the possibility that RCU will post a timer. */ 1894 /* Set up for the possibility that RCU will post a timer. */
1938 if (rcu_cpu_has_nonlazy_callbacks(cpu)) 1895 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1939 *delta_jiffies = RCU_IDLE_GP_DELAY; 1896 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
1940 else 1897 RCU_IDLE_GP_DELAY) - jiffies;
1941 *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; 1898 } else {
1899 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
1900 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1901 }
1942 return 0; 1902 return 0;
1943} 1903}
1944 1904
@@ -1997,6 +1957,7 @@ static void rcu_cleanup_after_idle(int cpu)
1997 1957
1998 del_timer(&rdtp->idle_gp_timer); 1958 del_timer(&rdtp->idle_gp_timer);
1999 trace_rcu_prep_idle("Cleanup after idle"); 1959 trace_rcu_prep_idle("Cleanup after idle");
1960 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
2000} 1961}
2001 1962
2002/* 1963/*
@@ -2022,6 +1983,18 @@ static void rcu_prepare_for_idle(int cpu)
2022{ 1983{
2023 struct timer_list *tp; 1984 struct timer_list *tp;
2024 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1985 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1986 int tne;
1987
1988 /* Handle nohz enablement switches conservatively. */
1989 tne = ACCESS_ONCE(tick_nohz_enabled);
1990 if (tne != rdtp->tick_nohz_enabled_snap) {
1991 if (rcu_cpu_has_callbacks(cpu))
1992 invoke_rcu_core(); /* force nohz to see update. */
1993 rdtp->tick_nohz_enabled_snap = tne;
1994 return;
1995 }
1996 if (!tne)
1997 return;
2025 1998
2026 /* 1999 /*
2027 * If this is an idle re-entry, for example, due to use of 2000 * If this is an idle re-entry, for example, due to use of
@@ -2075,10 +2048,11 @@ static void rcu_prepare_for_idle(int cpu)
2075 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 2048 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2076 trace_rcu_prep_idle("Dyntick with callbacks"); 2049 trace_rcu_prep_idle("Dyntick with callbacks");
2077 rdtp->idle_gp_timer_expires = 2050 rdtp->idle_gp_timer_expires =
2078 jiffies + RCU_IDLE_GP_DELAY; 2051 round_up(jiffies + RCU_IDLE_GP_DELAY,
2052 RCU_IDLE_GP_DELAY);
2079 } else { 2053 } else {
2080 rdtp->idle_gp_timer_expires = 2054 rdtp->idle_gp_timer_expires =
2081 jiffies + RCU_IDLE_LAZY_GP_DELAY; 2055 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
2082 trace_rcu_prep_idle("Dyntick with lazy callbacks"); 2056 trace_rcu_prep_idle("Dyntick with lazy callbacks");
2083 } 2057 }
2084 tp = &rdtp->idle_gp_timer; 2058 tp = &rdtp->idle_gp_timer;
@@ -2157,6 +2131,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2157 2131
2158static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2132static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2159{ 2133{
2134 *cp = '\0';
2160} 2135}
2161 2136
2162#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 2137#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..66ff07f6184c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)
105/* 105/*
106 * NO HZ enabled ? 106 * NO HZ enabled ?
107 */ 107 */
108static int tick_nohz_enabled __read_mostly = 1; 108int tick_nohz_enabled __read_mostly = 1;
109 109
110/* 110/*
111 * Enable / Disable tickless mode 111 * Enable / Disable tickless mode