aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c16
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/locking/locktorture.c529
-rw-r--r--kernel/rcu/rcutorture.c278
-rw-r--r--kernel/rcu/tiny.c20
-rw-r--r--kernel/rcu/tree.c115
-rw-r--r--kernel/rcu/tree.h18
-rw-r--r--kernel/rcu/tree_plugin.h404
-rw-r--r--kernel/rcu/update.c345
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/torture.c32
-rw-r--r--kernel/workqueue.c5
13 files changed, 1397 insertions, 379 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 81e2a388a0f6..356450f09c1f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -79,6 +79,8 @@ static struct {
79 79
80/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ 80/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
81#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) 81#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
82#define cpuhp_lock_acquire_tryread() \
83 lock_map_acquire_tryread(&cpu_hotplug.dep_map)
82#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) 84#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
83#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) 85#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
84 86
@@ -91,10 +93,22 @@ void get_online_cpus(void)
91 mutex_lock(&cpu_hotplug.lock); 93 mutex_lock(&cpu_hotplug.lock);
92 cpu_hotplug.refcount++; 94 cpu_hotplug.refcount++;
93 mutex_unlock(&cpu_hotplug.lock); 95 mutex_unlock(&cpu_hotplug.lock);
94
95} 96}
96EXPORT_SYMBOL_GPL(get_online_cpus); 97EXPORT_SYMBOL_GPL(get_online_cpus);
97 98
99bool try_get_online_cpus(void)
100{
101 if (cpu_hotplug.active_writer == current)
102 return true;
103 if (!mutex_trylock(&cpu_hotplug.lock))
104 return false;
105 cpuhp_lock_acquire_tryread();
106 cpu_hotplug.refcount++;
107 mutex_unlock(&cpu_hotplug.lock);
108 return true;
109}
110EXPORT_SYMBOL_GPL(try_get_online_cpus);
111
98void put_online_cpus(void) 112void put_online_cpus(void)
99{ 113{
100 if (cpu_hotplug.active_writer == current) 114 if (cpu_hotplug.active_writer == current)
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
667{ 667{
668 struct task_struct *tsk = current; 668 struct task_struct *tsk = current;
669 int group_dead; 669 int group_dead;
670 TASKS_RCU(int tasks_rcu_i);
670 671
671 profile_task_exit(tsk); 672 profile_task_exit(tsk);
672 673
@@ -775,6 +776,7 @@ void do_exit(long code)
775 */ 776 */
776 flush_ptrace_hw_breakpoint(tsk); 777 flush_ptrace_hw_breakpoint(tsk);
777 778
779 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
778 exit_notify(tsk, group_dead); 780 exit_notify(tsk, group_dead);
779 proc_exit_connector(tsk); 781 proc_exit_connector(tsk);
780#ifdef CONFIG_NUMA 782#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
814 if (tsk->nr_dirtied) 816 if (tsk->nr_dirtied)
815 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 817 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
816 exit_rcu(); 818 exit_rcu();
819 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
817 820
818 /* 821 /*
819 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 822 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 0955b885d0dc..ec8cce259779 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -20,30 +20,20 @@
20 * Author: Paul E. McKenney <paulmck@us.ibm.com> 20 * Author: Paul E. McKenney <paulmck@us.ibm.com>
21 * Based on kernel/rcu/torture.c. 21 * Based on kernel/rcu/torture.c.
22 */ 22 */
23#include <linux/types.h>
24#include <linux/kernel.h> 23#include <linux/kernel.h>
25#include <linux/init.h>
26#include <linux/module.h> 24#include <linux/module.h>
27#include <linux/kthread.h> 25#include <linux/kthread.h>
28#include <linux/err.h>
29#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/rwlock.h>
28#include <linux/mutex.h>
29#include <linux/rwsem.h>
30#include <linux/smp.h> 30#include <linux/smp.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/atomic.h> 33#include <linux/atomic.h>
34#include <linux/bitops.h>
35#include <linux/completion.h>
36#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
37#include <linux/percpu.h>
38#include <linux/notifier.h>
39#include <linux/reboot.h>
40#include <linux/freezer.h>
41#include <linux/cpu.h>
42#include <linux/delay.h> 35#include <linux/delay.h>
43#include <linux/stat.h>
44#include <linux/slab.h> 36#include <linux/slab.h>
45#include <linux/trace_clock.h>
46#include <asm/byteorder.h>
47#include <linux/torture.h> 37#include <linux/torture.h>
48 38
49MODULE_LICENSE("GPL"); 39MODULE_LICENSE("GPL");
@@ -51,6 +41,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
51 41
52torture_param(int, nwriters_stress, -1, 42torture_param(int, nwriters_stress, -1,
53 "Number of write-locking stress-test threads"); 43 "Number of write-locking stress-test threads");
44torture_param(int, nreaders_stress, -1,
45 "Number of read-locking stress-test threads");
54torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); 46torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
55torture_param(int, onoff_interval, 0, 47torture_param(int, onoff_interval, 0,
56 "Time between CPU hotplugs (s), 0=disable"); 48 "Time between CPU hotplugs (s), 0=disable");
@@ -66,30 +58,28 @@ torture_param(bool, verbose, true,
66static char *torture_type = "spin_lock"; 58static char *torture_type = "spin_lock";
67module_param(torture_type, charp, 0444); 59module_param(torture_type, charp, 0444);
68MODULE_PARM_DESC(torture_type, 60MODULE_PARM_DESC(torture_type,
69 "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); 61 "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
70
71static atomic_t n_lock_torture_errors;
72 62
73static struct task_struct *stats_task; 63static struct task_struct *stats_task;
74static struct task_struct **writer_tasks; 64static struct task_struct **writer_tasks;
65static struct task_struct **reader_tasks;
75 66
76static int nrealwriters_stress;
77static bool lock_is_write_held; 67static bool lock_is_write_held;
68static bool lock_is_read_held;
78 69
79struct lock_writer_stress_stats { 70struct lock_stress_stats {
80 long n_write_lock_fail; 71 long n_lock_fail;
81 long n_write_lock_acquired; 72 long n_lock_acquired;
82}; 73};
83static struct lock_writer_stress_stats *lwsa;
84 74
85#if defined(MODULE) 75#if defined(MODULE)
86#define LOCKTORTURE_RUNNABLE_INIT 1 76#define LOCKTORTURE_RUNNABLE_INIT 1
87#else 77#else
88#define LOCKTORTURE_RUNNABLE_INIT 0 78#define LOCKTORTURE_RUNNABLE_INIT 0
89#endif 79#endif
90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; 80int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
91module_param(locktorture_runnable, int, 0444); 81module_param(torture_runnable, int, 0444);
92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init"); 82MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
93 83
94/* Forward reference. */ 84/* Forward reference. */
95static void lock_torture_cleanup(void); 85static void lock_torture_cleanup(void);
@@ -102,12 +92,25 @@ struct lock_torture_ops {
102 int (*writelock)(void); 92 int (*writelock)(void);
103 void (*write_delay)(struct torture_random_state *trsp); 93 void (*write_delay)(struct torture_random_state *trsp);
104 void (*writeunlock)(void); 94 void (*writeunlock)(void);
95 int (*readlock)(void);
96 void (*read_delay)(struct torture_random_state *trsp);
97 void (*readunlock)(void);
105 unsigned long flags; 98 unsigned long flags;
106 const char *name; 99 const char *name;
107}; 100};
108 101
109static struct lock_torture_ops *cur_ops; 102struct lock_torture_cxt {
110 103 int nrealwriters_stress;
104 int nrealreaders_stress;
105 bool debug_lock;
106 atomic_t n_lock_torture_errors;
107 struct lock_torture_ops *cur_ops;
108 struct lock_stress_stats *lwsa; /* writer statistics */
109 struct lock_stress_stats *lrsa; /* reader statistics */
110};
111static struct lock_torture_cxt cxt = { 0, 0, false,
112 ATOMIC_INIT(0),
113 NULL, NULL};
111/* 114/*
112 * Definitions for lock torture testing. 115 * Definitions for lock torture testing.
113 */ 116 */
@@ -123,10 +126,10 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
123 126
124 /* We want a long delay occasionally to force massive contention. */ 127 /* We want a long delay occasionally to force massive contention. */
125 if (!(torture_random(trsp) % 128 if (!(torture_random(trsp) %
126 (nrealwriters_stress * 2000 * longdelay_us))) 129 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
127 mdelay(longdelay_us); 130 mdelay(longdelay_us);
128#ifdef CONFIG_PREEMPT 131#ifdef CONFIG_PREEMPT
129 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 132 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
130 preempt_schedule(); /* Allow test to be preempted. */ 133 preempt_schedule(); /* Allow test to be preempted. */
131#endif 134#endif
132} 135}
@@ -140,6 +143,9 @@ static struct lock_torture_ops lock_busted_ops = {
140 .writelock = torture_lock_busted_write_lock, 143 .writelock = torture_lock_busted_write_lock,
141 .write_delay = torture_lock_busted_write_delay, 144 .write_delay = torture_lock_busted_write_delay,
142 .writeunlock = torture_lock_busted_write_unlock, 145 .writeunlock = torture_lock_busted_write_unlock,
146 .readlock = NULL,
147 .read_delay = NULL,
148 .readunlock = NULL,
143 .name = "lock_busted" 149 .name = "lock_busted"
144}; 150};
145 151
@@ -160,13 +166,13 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
160 * we want a long delay occasionally to force massive contention. 166 * we want a long delay occasionally to force massive contention.
161 */ 167 */
162 if (!(torture_random(trsp) % 168 if (!(torture_random(trsp) %
163 (nrealwriters_stress * 2000 * longdelay_us))) 169 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
164 mdelay(longdelay_us); 170 mdelay(longdelay_us);
165 if (!(torture_random(trsp) % 171 if (!(torture_random(trsp) %
166 (nrealwriters_stress * 2 * shortdelay_us))) 172 (cxt.nrealwriters_stress * 2 * shortdelay_us)))
167 udelay(shortdelay_us); 173 udelay(shortdelay_us);
168#ifdef CONFIG_PREEMPT 174#ifdef CONFIG_PREEMPT
169 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 175 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
170 preempt_schedule(); /* Allow test to be preempted. */ 176 preempt_schedule(); /* Allow test to be preempted. */
171#endif 177#endif
172} 178}
@@ -180,39 +186,253 @@ static struct lock_torture_ops spin_lock_ops = {
180 .writelock = torture_spin_lock_write_lock, 186 .writelock = torture_spin_lock_write_lock,
181 .write_delay = torture_spin_lock_write_delay, 187 .write_delay = torture_spin_lock_write_delay,
182 .writeunlock = torture_spin_lock_write_unlock, 188 .writeunlock = torture_spin_lock_write_unlock,
189 .readlock = NULL,
190 .read_delay = NULL,
191 .readunlock = NULL,
183 .name = "spin_lock" 192 .name = "spin_lock"
184}; 193};
185 194
186static int torture_spin_lock_write_lock_irq(void) 195static int torture_spin_lock_write_lock_irq(void)
187__acquires(torture_spinlock_irq) 196__acquires(torture_spinlock)
188{ 197{
189 unsigned long flags; 198 unsigned long flags;
190 199
191 spin_lock_irqsave(&torture_spinlock, flags); 200 spin_lock_irqsave(&torture_spinlock, flags);
192 cur_ops->flags = flags; 201 cxt.cur_ops->flags = flags;
193 return 0; 202 return 0;
194} 203}
195 204
196static void torture_lock_spin_write_unlock_irq(void) 205static void torture_lock_spin_write_unlock_irq(void)
197__releases(torture_spinlock) 206__releases(torture_spinlock)
198{ 207{
199 spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); 208 spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
200} 209}
201 210
202static struct lock_torture_ops spin_lock_irq_ops = { 211static struct lock_torture_ops spin_lock_irq_ops = {
203 .writelock = torture_spin_lock_write_lock_irq, 212 .writelock = torture_spin_lock_write_lock_irq,
204 .write_delay = torture_spin_lock_write_delay, 213 .write_delay = torture_spin_lock_write_delay,
205 .writeunlock = torture_lock_spin_write_unlock_irq, 214 .writeunlock = torture_lock_spin_write_unlock_irq,
215 .readlock = NULL,
216 .read_delay = NULL,
217 .readunlock = NULL,
206 .name = "spin_lock_irq" 218 .name = "spin_lock_irq"
207}; 219};
208 220
221static DEFINE_RWLOCK(torture_rwlock);
222
223static int torture_rwlock_write_lock(void) __acquires(torture_rwlock)
224{
225 write_lock(&torture_rwlock);
226 return 0;
227}
228
229static void torture_rwlock_write_delay(struct torture_random_state *trsp)
230{
231 const unsigned long shortdelay_us = 2;
232 const unsigned long longdelay_ms = 100;
233
234 /* We want a short delay mostly to emulate likely code, and
235 * we want a long delay occasionally to force massive contention.
236 */
237 if (!(torture_random(trsp) %
238 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
239 mdelay(longdelay_ms);
240 else
241 udelay(shortdelay_us);
242}
243
244static void torture_rwlock_write_unlock(void) __releases(torture_rwlock)
245{
246 write_unlock(&torture_rwlock);
247}
248
249static int torture_rwlock_read_lock(void) __acquires(torture_rwlock)
250{
251 read_lock(&torture_rwlock);
252 return 0;
253}
254
255static void torture_rwlock_read_delay(struct torture_random_state *trsp)
256{
257 const unsigned long shortdelay_us = 10;
258 const unsigned long longdelay_ms = 100;
259
260 /* We want a short delay mostly to emulate likely code, and
261 * we want a long delay occasionally to force massive contention.
262 */
263 if (!(torture_random(trsp) %
264 (cxt.nrealreaders_stress * 2000 * longdelay_ms)))
265 mdelay(longdelay_ms);
266 else
267 udelay(shortdelay_us);
268}
269
270static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
271{
272 read_unlock(&torture_rwlock);
273}
274
275static struct lock_torture_ops rw_lock_ops = {
276 .writelock = torture_rwlock_write_lock,
277 .write_delay = torture_rwlock_write_delay,
278 .writeunlock = torture_rwlock_write_unlock,
279 .readlock = torture_rwlock_read_lock,
280 .read_delay = torture_rwlock_read_delay,
281 .readunlock = torture_rwlock_read_unlock,
282 .name = "rw_lock"
283};
284
285static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock)
286{
287 unsigned long flags;
288
289 write_lock_irqsave(&torture_rwlock, flags);
290 cxt.cur_ops->flags = flags;
291 return 0;
292}
293
294static void torture_rwlock_write_unlock_irq(void)
295__releases(torture_rwlock)
296{
297 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
298}
299
300static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
301{
302 unsigned long flags;
303
304 read_lock_irqsave(&torture_rwlock, flags);
305 cxt.cur_ops->flags = flags;
306 return 0;
307}
308
309static void torture_rwlock_read_unlock_irq(void)
310__releases(torture_rwlock)
311{
312 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
313}
314
315static struct lock_torture_ops rw_lock_irq_ops = {
316 .writelock = torture_rwlock_write_lock_irq,
317 .write_delay = torture_rwlock_write_delay,
318 .writeunlock = torture_rwlock_write_unlock_irq,
319 .readlock = torture_rwlock_read_lock_irq,
320 .read_delay = torture_rwlock_read_delay,
321 .readunlock = torture_rwlock_read_unlock_irq,
322 .name = "rw_lock_irq"
323};
324
325static DEFINE_MUTEX(torture_mutex);
326
327static int torture_mutex_lock(void) __acquires(torture_mutex)
328{
329 mutex_lock(&torture_mutex);
330 return 0;
331}
332
333static void torture_mutex_delay(struct torture_random_state *trsp)
334{
335 const unsigned long longdelay_ms = 100;
336
337 /* We want a long delay occasionally to force massive contention. */
338 if (!(torture_random(trsp) %
339 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
340 mdelay(longdelay_ms * 5);
341 else
342 mdelay(longdelay_ms / 5);
343#ifdef CONFIG_PREEMPT
344 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
345 preempt_schedule(); /* Allow test to be preempted. */
346#endif
347}
348
349static void torture_mutex_unlock(void) __releases(torture_mutex)
350{
351 mutex_unlock(&torture_mutex);
352}
353
354static struct lock_torture_ops mutex_lock_ops = {
355 .writelock = torture_mutex_lock,
356 .write_delay = torture_mutex_delay,
357 .writeunlock = torture_mutex_unlock,
358 .readlock = NULL,
359 .read_delay = NULL,
360 .readunlock = NULL,
361 .name = "mutex_lock"
362};
363
364static DECLARE_RWSEM(torture_rwsem);
365static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
366{
367 down_write(&torture_rwsem);
368 return 0;
369}
370
371static void torture_rwsem_write_delay(struct torture_random_state *trsp)
372{
373 const unsigned long longdelay_ms = 100;
374
375 /* We want a long delay occasionally to force massive contention. */
376 if (!(torture_random(trsp) %
377 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
378 mdelay(longdelay_ms * 10);
379 else
380 mdelay(longdelay_ms / 10);
381#ifdef CONFIG_PREEMPT
382 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
383 preempt_schedule(); /* Allow test to be preempted. */
384#endif
385}
386
387static void torture_rwsem_up_write(void) __releases(torture_rwsem)
388{
389 up_write(&torture_rwsem);
390}
391
392static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
393{
394 down_read(&torture_rwsem);
395 return 0;
396}
397
398static void torture_rwsem_read_delay(struct torture_random_state *trsp)
399{
400 const unsigned long longdelay_ms = 100;
401
402 /* We want a long delay occasionally to force massive contention. */
403 if (!(torture_random(trsp) %
404 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
405 mdelay(longdelay_ms * 2);
406 else
407 mdelay(longdelay_ms / 2);
408#ifdef CONFIG_PREEMPT
409 if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
410 preempt_schedule(); /* Allow test to be preempted. */
411#endif
412}
413
414static void torture_rwsem_up_read(void) __releases(torture_rwsem)
415{
416 up_read(&torture_rwsem);
417}
418
419static struct lock_torture_ops rwsem_lock_ops = {
420 .writelock = torture_rwsem_down_write,
421 .write_delay = torture_rwsem_write_delay,
422 .writeunlock = torture_rwsem_up_write,
423 .readlock = torture_rwsem_down_read,
424 .read_delay = torture_rwsem_read_delay,
425 .readunlock = torture_rwsem_up_read,
426 .name = "rwsem_lock"
427};
428
209/* 429/*
210 * Lock torture writer kthread. Repeatedly acquires and releases 430 * Lock torture writer kthread. Repeatedly acquires and releases
211 * the lock, checking for duplicate acquisitions. 431 * the lock, checking for duplicate acquisitions.
212 */ 432 */
213static int lock_torture_writer(void *arg) 433static int lock_torture_writer(void *arg)
214{ 434{
215 struct lock_writer_stress_stats *lwsp = arg; 435 struct lock_stress_stats *lwsp = arg;
216 static DEFINE_TORTURE_RANDOM(rand); 436 static DEFINE_TORTURE_RANDOM(rand);
217 437
218 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); 438 VERBOSE_TOROUT_STRING("lock_torture_writer task started");
@@ -221,14 +441,19 @@ static int lock_torture_writer(void *arg)
221 do { 441 do {
222 if ((torture_random(&rand) & 0xfffff) == 0) 442 if ((torture_random(&rand) & 0xfffff) == 0)
223 schedule_timeout_uninterruptible(1); 443 schedule_timeout_uninterruptible(1);
224 cur_ops->writelock(); 444
445 cxt.cur_ops->writelock();
225 if (WARN_ON_ONCE(lock_is_write_held)) 446 if (WARN_ON_ONCE(lock_is_write_held))
226 lwsp->n_write_lock_fail++; 447 lwsp->n_lock_fail++;
227 lock_is_write_held = 1; 448 lock_is_write_held = 1;
228 lwsp->n_write_lock_acquired++; 449 if (WARN_ON_ONCE(lock_is_read_held))
229 cur_ops->write_delay(&rand); 450 lwsp->n_lock_fail++; /* rare, but... */
451
452 lwsp->n_lock_acquired++;
453 cxt.cur_ops->write_delay(&rand);
230 lock_is_write_held = 0; 454 lock_is_write_held = 0;
231 cur_ops->writeunlock(); 455 cxt.cur_ops->writeunlock();
456
232 stutter_wait("lock_torture_writer"); 457 stutter_wait("lock_torture_writer");
233 } while (!torture_must_stop()); 458 } while (!torture_must_stop());
234 torture_kthread_stopping("lock_torture_writer"); 459 torture_kthread_stopping("lock_torture_writer");
@@ -236,32 +461,66 @@ static int lock_torture_writer(void *arg)
236} 461}
237 462
238/* 463/*
464 * Lock torture reader kthread. Repeatedly acquires and releases
465 * the reader lock.
466 */
467static int lock_torture_reader(void *arg)
468{
469 struct lock_stress_stats *lrsp = arg;
470 static DEFINE_TORTURE_RANDOM(rand);
471
472 VERBOSE_TOROUT_STRING("lock_torture_reader task started");
473 set_user_nice(current, MAX_NICE);
474
475 do {
476 if ((torture_random(&rand) & 0xfffff) == 0)
477 schedule_timeout_uninterruptible(1);
478
479 cxt.cur_ops->readlock();
480 lock_is_read_held = 1;
481 if (WARN_ON_ONCE(lock_is_write_held))
482 lrsp->n_lock_fail++; /* rare, but... */
483
484 lrsp->n_lock_acquired++;
485 cxt.cur_ops->read_delay(&rand);
486 lock_is_read_held = 0;
487 cxt.cur_ops->readunlock();
488
489 stutter_wait("lock_torture_reader");
490 } while (!torture_must_stop());
491 torture_kthread_stopping("lock_torture_reader");
492 return 0;
493}
494
495/*
239 * Create an lock-torture-statistics message in the specified buffer. 496 * Create an lock-torture-statistics message in the specified buffer.
240 */ 497 */
241static void lock_torture_printk(char *page) 498static void __torture_print_stats(char *page,
499 struct lock_stress_stats *statp, bool write)
242{ 500{
243 bool fail = 0; 501 bool fail = 0;
244 int i; 502 int i, n_stress;
245 long max = 0; 503 long max = 0;
246 long min = lwsa[0].n_write_lock_acquired; 504 long min = statp[0].n_lock_acquired;
247 long long sum = 0; 505 long long sum = 0;
248 506
249 for (i = 0; i < nrealwriters_stress; i++) { 507 n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
250 if (lwsa[i].n_write_lock_fail) 508 for (i = 0; i < n_stress; i++) {
509 if (statp[i].n_lock_fail)
251 fail = true; 510 fail = true;
252 sum += lwsa[i].n_write_lock_acquired; 511 sum += statp[i].n_lock_acquired;
253 if (max < lwsa[i].n_write_lock_fail) 512 if (max < statp[i].n_lock_fail)
254 max = lwsa[i].n_write_lock_fail; 513 max = statp[i].n_lock_fail;
255 if (min > lwsa[i].n_write_lock_fail) 514 if (min > statp[i].n_lock_fail)
256 min = lwsa[i].n_write_lock_fail; 515 min = statp[i].n_lock_fail;
257 } 516 }
258 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
259 page += sprintf(page, 517 page += sprintf(page,
260 "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", 518 "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
519 write ? "Writes" : "Reads ",
261 sum, max, min, max / 2 > min ? "???" : "", 520 sum, max, min, max / 2 > min ? "???" : "",
262 fail, fail ? "!!!" : ""); 521 fail, fail ? "!!!" : "");
263 if (fail) 522 if (fail)
264 atomic_inc(&n_lock_torture_errors); 523 atomic_inc(&cxt.n_lock_torture_errors);
265} 524}
266 525
267/* 526/*
@@ -274,18 +533,35 @@ static void lock_torture_printk(char *page)
274 */ 533 */
275static void lock_torture_stats_print(void) 534static void lock_torture_stats_print(void)
276{ 535{
277 int size = nrealwriters_stress * 200 + 8192; 536 int size = cxt.nrealwriters_stress * 200 + 8192;
278 char *buf; 537 char *buf;
279 538
539 if (cxt.cur_ops->readlock)
540 size += cxt.nrealreaders_stress * 200 + 8192;
541
280 buf = kmalloc(size, GFP_KERNEL); 542 buf = kmalloc(size, GFP_KERNEL);
281 if (!buf) { 543 if (!buf) {
282 pr_err("lock_torture_stats_print: Out of memory, need: %d", 544 pr_err("lock_torture_stats_print: Out of memory, need: %d",
283 size); 545 size);
284 return; 546 return;
285 } 547 }
286 lock_torture_printk(buf); 548
549 __torture_print_stats(buf, cxt.lwsa, true);
287 pr_alert("%s", buf); 550 pr_alert("%s", buf);
288 kfree(buf); 551 kfree(buf);
552
553 if (cxt.cur_ops->readlock) {
554 buf = kmalloc(size, GFP_KERNEL);
555 if (!buf) {
556 pr_err("lock_torture_stats_print: Out of memory, need: %d",
557 size);
558 return;
559 }
560
561 __torture_print_stats(buf, cxt.lrsa, false);
562 pr_alert("%s", buf);
563 kfree(buf);
564 }
289} 565}
290 566
291/* 567/*
@@ -312,9 +588,10 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
312 const char *tag) 588 const char *tag)
313{ 589{
314 pr_alert("%s" TORTURE_FLAG 590 pr_alert("%s" TORTURE_FLAG
315 "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", 591 "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
316 torture_type, tag, nrealwriters_stress, stat_interval, verbose, 592 torture_type, tag, cxt.debug_lock ? " [debug]": "",
317 shuffle_interval, stutter, shutdown_secs, 593 cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
594 verbose, shuffle_interval, stutter, shutdown_secs,
318 onoff_interval, onoff_holdoff); 595 onoff_interval, onoff_holdoff);
319} 596}
320 597
@@ -322,46 +599,59 @@ static void lock_torture_cleanup(void)
322{ 599{
323 int i; 600 int i;
324 601
325 if (torture_cleanup()) 602 if (torture_cleanup_begin())
326 return; 603 return;
327 604
328 if (writer_tasks) { 605 if (writer_tasks) {
329 for (i = 0; i < nrealwriters_stress; i++) 606 for (i = 0; i < cxt.nrealwriters_stress; i++)
330 torture_stop_kthread(lock_torture_writer, 607 torture_stop_kthread(lock_torture_writer,
331 writer_tasks[i]); 608 writer_tasks[i]);
332 kfree(writer_tasks); 609 kfree(writer_tasks);
333 writer_tasks = NULL; 610 writer_tasks = NULL;
334 } 611 }
335 612
613 if (reader_tasks) {
614 for (i = 0; i < cxt.nrealreaders_stress; i++)
615 torture_stop_kthread(lock_torture_reader,
616 reader_tasks[i]);
617 kfree(reader_tasks);
618 reader_tasks = NULL;
619 }
620
336 torture_stop_kthread(lock_torture_stats, stats_task); 621 torture_stop_kthread(lock_torture_stats, stats_task);
337 lock_torture_stats_print(); /* -After- the stats thread is stopped! */ 622 lock_torture_stats_print(); /* -After- the stats thread is stopped! */
338 623
339 if (atomic_read(&n_lock_torture_errors)) 624 if (atomic_read(&cxt.n_lock_torture_errors))
340 lock_torture_print_module_parms(cur_ops, 625 lock_torture_print_module_parms(cxt.cur_ops,
341 "End of test: FAILURE"); 626 "End of test: FAILURE");
342 else if (torture_onoff_failures()) 627 else if (torture_onoff_failures())
343 lock_torture_print_module_parms(cur_ops, 628 lock_torture_print_module_parms(cxt.cur_ops,
344 "End of test: LOCK_HOTPLUG"); 629 "End of test: LOCK_HOTPLUG");
345 else 630 else
346 lock_torture_print_module_parms(cur_ops, 631 lock_torture_print_module_parms(cxt.cur_ops,
347 "End of test: SUCCESS"); 632 "End of test: SUCCESS");
633 torture_cleanup_end();
348} 634}
349 635
350static int __init lock_torture_init(void) 636static int __init lock_torture_init(void)
351{ 637{
352 int i; 638 int i, j;
353 int firsterr = 0; 639 int firsterr = 0;
354 static struct lock_torture_ops *torture_ops[] = { 640 static struct lock_torture_ops *torture_ops[] = {
355 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, 641 &lock_busted_ops,
642 &spin_lock_ops, &spin_lock_irq_ops,
643 &rw_lock_ops, &rw_lock_irq_ops,
644 &mutex_lock_ops,
645 &rwsem_lock_ops,
356 }; 646 };
357 647
358 if (!torture_init_begin(torture_type, verbose, &locktorture_runnable)) 648 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
359 return -EBUSY; 649 return -EBUSY;
360 650
361 /* Process args and tell the world that the torturer is on the job. */ 651 /* Process args and tell the world that the torturer is on the job. */
362 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 652 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
363 cur_ops = torture_ops[i]; 653 cxt.cur_ops = torture_ops[i];
364 if (strcmp(torture_type, cur_ops->name) == 0) 654 if (strcmp(torture_type, cxt.cur_ops->name) == 0)
365 break; 655 break;
366 } 656 }
367 if (i == ARRAY_SIZE(torture_ops)) { 657 if (i == ARRAY_SIZE(torture_ops)) {
@@ -374,31 +664,69 @@ static int __init lock_torture_init(void)
374 torture_init_end(); 664 torture_init_end();
375 return -EINVAL; 665 return -EINVAL;
376 } 666 }
377 if (cur_ops->init) 667 if (cxt.cur_ops->init)
378 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ 668 cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
379 669
380 if (nwriters_stress >= 0) 670 if (nwriters_stress >= 0)
381 nrealwriters_stress = nwriters_stress; 671 cxt.nrealwriters_stress = nwriters_stress;
382 else 672 else
383 nrealwriters_stress = 2 * num_online_cpus(); 673 cxt.nrealwriters_stress = 2 * num_online_cpus();
384 lock_torture_print_module_parms(cur_ops, "Start of test"); 674
675#ifdef CONFIG_DEBUG_MUTEXES
676 if (strncmp(torture_type, "mutex", 5) == 0)
677 cxt.debug_lock = true;
678#endif
679#ifdef CONFIG_DEBUG_SPINLOCK
680 if ((strncmp(torture_type, "spin", 4) == 0) ||
681 (strncmp(torture_type, "rw_lock", 7) == 0))
682 cxt.debug_lock = true;
683#endif
385 684
386 /* Initialize the statistics so that each run gets its own numbers. */ 685 /* Initialize the statistics so that each run gets its own numbers. */
387 686
388 lock_is_write_held = 0; 687 lock_is_write_held = 0;
389 lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); 688 cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
390 if (lwsa == NULL) { 689 if (cxt.lwsa == NULL) {
391 VERBOSE_TOROUT_STRING("lwsa: Out of memory"); 690 VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
392 firsterr = -ENOMEM; 691 firsterr = -ENOMEM;
393 goto unwind; 692 goto unwind;
394 } 693 }
395 for (i = 0; i < nrealwriters_stress; i++) { 694 for (i = 0; i < cxt.nrealwriters_stress; i++) {
396 lwsa[i].n_write_lock_fail = 0; 695 cxt.lwsa[i].n_lock_fail = 0;
397 lwsa[i].n_write_lock_acquired = 0; 696 cxt.lwsa[i].n_lock_acquired = 0;
398 } 697 }
399 698
400 /* Start up the kthreads. */ 699 if (cxt.cur_ops->readlock) {
700 if (nreaders_stress >= 0)
701 cxt.nrealreaders_stress = nreaders_stress;
702 else {
703 /*
704 * By default distribute evenly the number of
705 * readers and writers. We still run the same number
706 * of threads as the writer-only locks default.
707 */
708 if (nwriters_stress < 0) /* user doesn't care */
709 cxt.nrealwriters_stress = num_online_cpus();
710 cxt.nrealreaders_stress = cxt.nrealwriters_stress;
711 }
712
713 lock_is_read_held = 0;
714 cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
715 if (cxt.lrsa == NULL) {
716 VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
717 firsterr = -ENOMEM;
718 kfree(cxt.lwsa);
719 goto unwind;
720 }
721
722 for (i = 0; i < cxt.nrealreaders_stress; i++) {
723 cxt.lrsa[i].n_lock_fail = 0;
724 cxt.lrsa[i].n_lock_acquired = 0;
725 }
726 }
727 lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
401 728
729 /* Prepare torture context. */
402 if (onoff_interval > 0) { 730 if (onoff_interval > 0) {
403 firsterr = torture_onoff_init(onoff_holdoff * HZ, 731 firsterr = torture_onoff_init(onoff_holdoff * HZ,
404 onoff_interval * HZ); 732 onoff_interval * HZ);
@@ -422,18 +750,51 @@ static int __init lock_torture_init(void)
422 goto unwind; 750 goto unwind;
423 } 751 }
424 752
425 writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), 753 writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
426 GFP_KERNEL); 754 GFP_KERNEL);
427 if (writer_tasks == NULL) { 755 if (writer_tasks == NULL) {
428 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); 756 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
429 firsterr = -ENOMEM; 757 firsterr = -ENOMEM;
430 goto unwind; 758 goto unwind;
431 } 759 }
432 for (i = 0; i < nrealwriters_stress; i++) { 760
433 firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], 761 if (cxt.cur_ops->readlock) {
762 reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
763 GFP_KERNEL);
764 if (reader_tasks == NULL) {
765 VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
766 firsterr = -ENOMEM;
767 goto unwind;
768 }
769 }
770
771 /*
772 * Create the kthreads and start torturing (oh, those poor little locks).
773 *
774 * TODO: Note that we interleave writers with readers, giving writers a
775 * slight advantage, by creating its kthread first. This can be modified
776 * for very specific needs, or even let the user choose the policy, if
777 * ever wanted.
778 */
779 for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
780 j < cxt.nrealreaders_stress; i++, j++) {
781 if (i >= cxt.nrealwriters_stress)
782 goto create_reader;
783
784 /* Create writer. */
785 firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
434 writer_tasks[i]); 786 writer_tasks[i]);
435 if (firsterr) 787 if (firsterr)
436 goto unwind; 788 goto unwind;
789
790 create_reader:
791 if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
792 continue;
793 /* Create reader. */
794 firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
795 reader_tasks[j]);
796 if (firsterr)
797 goto unwind;
437 } 798 }
438 if (stat_interval > 0) { 799 if (stat_interval > 0) {
439 firsterr = torture_create_kthread(lock_torture_stats, NULL, 800 firsterr = torture_create_kthread(lock_torture_stats, NULL,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 948a7693748e..240fa9094f83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -49,11 +49,19 @@
49#include <linux/trace_clock.h> 49#include <linux/trace_clock.h>
50#include <asm/byteorder.h> 50#include <asm/byteorder.h>
51#include <linux/torture.h> 51#include <linux/torture.h>
52#include <linux/vmalloc.h>
52 53
53MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");
54MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); 55MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
55 56
56 57
58torture_param(int, cbflood_inter_holdoff, HZ,
59 "Holdoff between floods (jiffies)");
60torture_param(int, cbflood_intra_holdoff, 1,
61 "Holdoff between bursts (jiffies)");
62torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable");
63torture_param(int, cbflood_n_per_burst, 20000,
64 "# callbacks per burst in flood");
57torture_param(int, fqs_duration, 0, 65torture_param(int, fqs_duration, 0,
58 "Duration of fqs bursts (us), 0 to disable"); 66 "Duration of fqs bursts (us), 0 to disable");
59torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); 67torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
@@ -96,10 +104,12 @@ module_param(torture_type, charp, 0444);
96MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); 104MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
97 105
98static int nrealreaders; 106static int nrealreaders;
107static int ncbflooders;
99static struct task_struct *writer_task; 108static struct task_struct *writer_task;
100static struct task_struct **fakewriter_tasks; 109static struct task_struct **fakewriter_tasks;
101static struct task_struct **reader_tasks; 110static struct task_struct **reader_tasks;
102static struct task_struct *stats_task; 111static struct task_struct *stats_task;
112static struct task_struct **cbflood_task;
103static struct task_struct *fqs_task; 113static struct task_struct *fqs_task;
104static struct task_struct *boost_tasks[NR_CPUS]; 114static struct task_struct *boost_tasks[NR_CPUS];
105static struct task_struct *stall_task; 115static struct task_struct *stall_task;
@@ -138,6 +148,7 @@ static long n_rcu_torture_boosts;
138static long n_rcu_torture_timers; 148static long n_rcu_torture_timers;
139static long n_barrier_attempts; 149static long n_barrier_attempts;
140static long n_barrier_successes; 150static long n_barrier_successes;
151static atomic_long_t n_cbfloods;
141static struct list_head rcu_torture_removed; 152static struct list_head rcu_torture_removed;
142 153
143static int rcu_torture_writer_state; 154static int rcu_torture_writer_state;
@@ -157,9 +168,9 @@ static int rcu_torture_writer_state;
157#else 168#else
158#define RCUTORTURE_RUNNABLE_INIT 0 169#define RCUTORTURE_RUNNABLE_INIT 0
159#endif 170#endif
160int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 171static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
161module_param(rcutorture_runnable, int, 0444); 172module_param(torture_runnable, int, 0444);
162MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); 173MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
163 174
164#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) 175#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
165#define rcu_can_boost() 1 176#define rcu_can_boost() 1
@@ -182,7 +193,7 @@ static u64 notrace rcu_trace_clock_local(void)
182#endif /* #else #ifdef CONFIG_RCU_TRACE */ 193#endif /* #else #ifdef CONFIG_RCU_TRACE */
183 194
184static unsigned long boost_starttime; /* jiffies of next boost test start. */ 195static unsigned long boost_starttime; /* jiffies of next boost test start. */
185DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 196static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
186 /* and boost task create/destroy. */ 197 /* and boost task create/destroy. */
187static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ 198static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
188static bool barrier_phase; /* Test phase. */ 199static bool barrier_phase; /* Test phase. */
@@ -242,7 +253,7 @@ struct rcu_torture_ops {
242 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 253 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
243 void (*cb_barrier)(void); 254 void (*cb_barrier)(void);
244 void (*fqs)(void); 255 void (*fqs)(void);
245 void (*stats)(char *page); 256 void (*stats)(void);
246 int irq_capable; 257 int irq_capable;
247 int can_boost; 258 int can_boost;
248 const char *name; 259 const char *name;
@@ -525,21 +536,21 @@ static void srcu_torture_barrier(void)
525 srcu_barrier(&srcu_ctl); 536 srcu_barrier(&srcu_ctl);
526} 537}
527 538
528static void srcu_torture_stats(char *page) 539static void srcu_torture_stats(void)
529{ 540{
530 int cpu; 541 int cpu;
531 int idx = srcu_ctl.completed & 0x1; 542 int idx = srcu_ctl.completed & 0x1;
532 543
533 page += sprintf(page, "%s%s per-CPU(idx=%d):", 544 pr_alert("%s%s per-CPU(idx=%d):",
534 torture_type, TORTURE_FLAG, idx); 545 torture_type, TORTURE_FLAG, idx);
535 for_each_possible_cpu(cpu) { 546 for_each_possible_cpu(cpu) {
536 long c0, c1; 547 long c0, c1;
537 548
538 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; 549 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
539 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; 550 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
540 page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); 551 pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
541 } 552 }
542 sprintf(page, "\n"); 553 pr_cont("\n");
543} 554}
544 555
545static void srcu_torture_synchronize_expedited(void) 556static void srcu_torture_synchronize_expedited(void)
@@ -601,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
601 .name = "sched" 612 .name = "sched"
602}; 613};
603 614
615#ifdef CONFIG_TASKS_RCU
616
617/*
618 * Definitions for RCU-tasks torture testing.
619 */
620
621static int tasks_torture_read_lock(void)
622{
623 return 0;
624}
625
626static void tasks_torture_read_unlock(int idx)
627{
628}
629
630static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
631{
632 call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
633}
634
635static struct rcu_torture_ops tasks_ops = {
636 .ttype = RCU_TASKS_FLAVOR,
637 .init = rcu_sync_torture_init,
638 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock,
641 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks,
644 .exp_sync = synchronize_rcu_tasks,
645 .call = call_rcu_tasks,
646 .cb_barrier = rcu_barrier_tasks,
647 .fqs = NULL,
648 .stats = NULL,
649 .irq_capable = 1,
650 .name = "tasks"
651};
652
653#define RCUTORTURE_TASKS_OPS &tasks_ops,
654
655#else /* #ifdef CONFIG_TASKS_RCU */
656
657#define RCUTORTURE_TASKS_OPS
658
659#endif /* #else #ifdef CONFIG_TASKS_RCU */
660
604/* 661/*
605 * RCU torture priority-boost testing. Runs one real-time thread per 662 * RCU torture priority-boost testing. Runs one real-time thread per
606 * CPU for moderate bursts, repeatedly registering RCU callbacks and 663 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -667,7 +724,7 @@ static int rcu_torture_boost(void *arg)
667 } 724 }
668 call_rcu_time = jiffies; 725 call_rcu_time = jiffies;
669 } 726 }
670 cond_resched(); 727 cond_resched_rcu_qs();
671 stutter_wait("rcu_torture_boost"); 728 stutter_wait("rcu_torture_boost");
672 if (torture_must_stop()) 729 if (torture_must_stop())
673 goto checkwait; 730 goto checkwait;
@@ -707,6 +764,58 @@ checkwait: stutter_wait("rcu_torture_boost");
707 return 0; 764 return 0;
708} 765}
709 766
767static void rcu_torture_cbflood_cb(struct rcu_head *rhp)
768{
769}
770
771/*
772 * RCU torture callback-flood kthread. Repeatedly induces bursts of calls
773 * to call_rcu() or analogous, increasing the probability of occurrence
774 * of callback-overflow corner cases.
775 */
776static int
777rcu_torture_cbflood(void *arg)
778{
779 int err = 1;
780 int i;
781 int j;
782 struct rcu_head *rhp;
783
784 if (cbflood_n_per_burst > 0 &&
785 cbflood_inter_holdoff > 0 &&
786 cbflood_intra_holdoff > 0 &&
787 cur_ops->call &&
788 cur_ops->cb_barrier) {
789 rhp = vmalloc(sizeof(*rhp) *
790 cbflood_n_burst * cbflood_n_per_burst);
791 err = !rhp;
792 }
793 if (err) {
794 VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM");
795 while (!torture_must_stop())
796 schedule_timeout_interruptible(HZ);
797 return 0;
798 }
799 VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started");
800 do {
801 schedule_timeout_interruptible(cbflood_inter_holdoff);
802 atomic_long_inc(&n_cbfloods);
803 WARN_ON(signal_pending(current));
804 for (i = 0; i < cbflood_n_burst; i++) {
805 for (j = 0; j < cbflood_n_per_burst; j++) {
806 cur_ops->call(&rhp[i * cbflood_n_per_burst + j],
807 rcu_torture_cbflood_cb);
808 }
809 schedule_timeout_interruptible(cbflood_intra_holdoff);
810 WARN_ON(signal_pending(current));
811 }
812 cur_ops->cb_barrier();
813 stutter_wait("rcu_torture_cbflood");
814 } while (!torture_must_stop());
815 torture_kthread_stopping("rcu_torture_cbflood");
816 return 0;
817}
818
710/* 819/*
711 * RCU torture force-quiescent-state kthread. Repeatedly induces 820 * RCU torture force-quiescent-state kthread. Repeatedly induces
712 * bursts of calls to force_quiescent_state(), increasing the probability 821 * bursts of calls to force_quiescent_state(), increasing the probability
@@ -1019,7 +1128,7 @@ rcu_torture_reader(void *arg)
1019 __this_cpu_inc(rcu_torture_batch[completed]); 1128 __this_cpu_inc(rcu_torture_batch[completed]);
1020 preempt_enable(); 1129 preempt_enable();
1021 cur_ops->readunlock(idx); 1130 cur_ops->readunlock(idx);
1022 cond_resched(); 1131 cond_resched_rcu_qs();
1023 stutter_wait("rcu_torture_reader"); 1132 stutter_wait("rcu_torture_reader");
1024 } while (!torture_must_stop()); 1133 } while (!torture_must_stop());
1025 if (irqreader && cur_ops->irq_capable) { 1134 if (irqreader && cur_ops->irq_capable) {
@@ -1031,10 +1140,15 @@ rcu_torture_reader(void *arg)
1031} 1140}
1032 1141
1033/* 1142/*
1034 * Create an RCU-torture statistics message in the specified buffer. 1143 * Print torture statistics. Caller must ensure that there is only
1144 * one call to this function at a given time!!! This is normally
1145 * accomplished by relying on the module system to only have one copy
1146 * of the module loaded, and then by giving the rcu_torture_stats
1147 * kthread full control (or the init/cleanup functions when rcu_torture_stats
1148 * thread is not running).
1035 */ 1149 */
1036static void 1150static void
1037rcu_torture_printk(char *page) 1151rcu_torture_stats_print(void)
1038{ 1152{
1039 int cpu; 1153 int cpu;
1040 int i; 1154 int i;
@@ -1052,55 +1166,61 @@ rcu_torture_printk(char *page)
1052 if (pipesummary[i] != 0) 1166 if (pipesummary[i] != 0)
1053 break; 1167 break;
1054 } 1168 }
1055 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); 1169
1056 page += sprintf(page, 1170 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1057 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", 1171 pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
1058 rcu_torture_current, 1172 rcu_torture_current,
1059 rcu_torture_current_version, 1173 rcu_torture_current_version,
1060 list_empty(&rcu_torture_freelist), 1174 list_empty(&rcu_torture_freelist),
1061 atomic_read(&n_rcu_torture_alloc), 1175 atomic_read(&n_rcu_torture_alloc),
1062 atomic_read(&n_rcu_torture_alloc_fail), 1176 atomic_read(&n_rcu_torture_alloc_fail),
1063 atomic_read(&n_rcu_torture_free)); 1177 atomic_read(&n_rcu_torture_free));
1064 page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", 1178 pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
1065 atomic_read(&n_rcu_torture_mberror), 1179 atomic_read(&n_rcu_torture_mberror),
1066 n_rcu_torture_boost_ktrerror, 1180 n_rcu_torture_boost_ktrerror,
1067 n_rcu_torture_boost_rterror); 1181 n_rcu_torture_boost_rterror);
1068 page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", 1182 pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
1069 n_rcu_torture_boost_failure, 1183 n_rcu_torture_boost_failure,
1070 n_rcu_torture_boosts, 1184 n_rcu_torture_boosts,
1071 n_rcu_torture_timers); 1185 n_rcu_torture_timers);
1072 page = torture_onoff_stats(page); 1186 torture_onoff_stats();
1073 page += sprintf(page, "barrier: %ld/%ld:%ld", 1187 pr_cont("barrier: %ld/%ld:%ld ",
1074 n_barrier_successes, 1188 n_barrier_successes,
1075 n_barrier_attempts, 1189 n_barrier_attempts,
1076 n_rcu_torture_barrier_error); 1190 n_rcu_torture_barrier_error);
1077 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1191 pr_cont("cbflood: %ld\n", atomic_long_read(&n_cbfloods));
1192
1193 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1078 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1194 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1079 n_rcu_torture_barrier_error != 0 || 1195 n_rcu_torture_barrier_error != 0 ||
1080 n_rcu_torture_boost_ktrerror != 0 || 1196 n_rcu_torture_boost_ktrerror != 0 ||
1081 n_rcu_torture_boost_rterror != 0 || 1197 n_rcu_torture_boost_rterror != 0 ||
1082 n_rcu_torture_boost_failure != 0 || 1198 n_rcu_torture_boost_failure != 0 ||
1083 i > 1) { 1199 i > 1) {
1084 page += sprintf(page, "!!! "); 1200 pr_cont("%s", "!!! ");
1085 atomic_inc(&n_rcu_torture_error); 1201 atomic_inc(&n_rcu_torture_error);
1086 WARN_ON_ONCE(1); 1202 WARN_ON_ONCE(1);
1087 } 1203 }
1088 page += sprintf(page, "Reader Pipe: "); 1204 pr_cont("Reader Pipe: ");
1089 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1205 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1090 page += sprintf(page, " %ld", pipesummary[i]); 1206 pr_cont(" %ld", pipesummary[i]);
1091 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1207 pr_cont("\n");
1092 page += sprintf(page, "Reader Batch: "); 1208
1209 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1210 pr_cont("Reader Batch: ");
1093 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1211 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1094 page += sprintf(page, " %ld", batchsummary[i]); 1212 pr_cont(" %ld", batchsummary[i]);
1095 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1213 pr_cont("\n");
1096 page += sprintf(page, "Free-Block Circulation: "); 1214
1215 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1216 pr_cont("Free-Block Circulation: ");
1097 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1217 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
1098 page += sprintf(page, " %d", 1218 pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
1099 atomic_read(&rcu_torture_wcount[i]));
1100 } 1219 }
1101 page += sprintf(page, "\n"); 1220 pr_cont("\n");
1221
1102 if (cur_ops->stats) 1222 if (cur_ops->stats)
1103 cur_ops->stats(page); 1223 cur_ops->stats();
1104 if (rtcv_snap == rcu_torture_current_version && 1224 if (rtcv_snap == rcu_torture_current_version &&
1105 rcu_torture_current != NULL) { 1225 rcu_torture_current != NULL) {
1106 int __maybe_unused flags; 1226 int __maybe_unused flags;
@@ -1109,10 +1229,9 @@ rcu_torture_printk(char *page)
1109 1229
1110 rcutorture_get_gp_data(cur_ops->ttype, 1230 rcutorture_get_gp_data(cur_ops->ttype,
1111 &flags, &gpnum, &completed); 1231 &flags, &gpnum, &completed);
1112 page += sprintf(page, 1232 pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
1113 "??? Writer stall state %d g%lu c%lu f%#x\n", 1233 rcu_torture_writer_state,
1114 rcu_torture_writer_state, 1234 gpnum, completed, flags);
1115 gpnum, completed, flags);
1116 show_rcu_gp_kthreads(); 1235 show_rcu_gp_kthreads();
1117 rcutorture_trace_dump(); 1236 rcutorture_trace_dump();
1118 } 1237 }
@@ -1120,30 +1239,6 @@ rcu_torture_printk(char *page)
1120} 1239}
1121 1240
1122/* 1241/*
1123 * Print torture statistics. Caller must ensure that there is only
1124 * one call to this function at a given time!!! This is normally
1125 * accomplished by relying on the module system to only have one copy
1126 * of the module loaded, and then by giving the rcu_torture_stats
1127 * kthread full control (or the init/cleanup functions when rcu_torture_stats
1128 * thread is not running).
1129 */
1130static void
1131rcu_torture_stats_print(void)
1132{
1133 int size = nr_cpu_ids * 200 + 8192;
1134 char *buf;
1135
1136 buf = kmalloc(size, GFP_KERNEL);
1137 if (!buf) {
1138 pr_err("rcu-torture: Out of memory, need: %d", size);
1139 return;
1140 }
1141 rcu_torture_printk(buf);
1142 pr_alert("%s", buf);
1143 kfree(buf);
1144}
1145
1146/*
1147 * Periodically prints torture statistics, if periodic statistics printing 1242 * Periodically prints torture statistics, if periodic statistics printing
1148 * was specified via the stat_interval module parameter. 1243 * was specified via the stat_interval module parameter.
1149 */ 1244 */
@@ -1295,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
1295 if (atomic_dec_and_test(&barrier_cbs_count)) 1390 if (atomic_dec_and_test(&barrier_cbs_count))
1296 wake_up(&barrier_wq); 1391 wake_up(&barrier_wq);
1297 } while (!torture_must_stop()); 1392 } while (!torture_must_stop());
1298 cur_ops->cb_barrier(); 1393 if (cur_ops->cb_barrier != NULL)
1394 cur_ops->cb_barrier();
1299 destroy_rcu_head_on_stack(&rcu); 1395 destroy_rcu_head_on_stack(&rcu);
1300 torture_kthread_stopping("rcu_torture_barrier_cbs"); 1396 torture_kthread_stopping("rcu_torture_barrier_cbs");
1301 return 0; 1397 return 0;
@@ -1418,7 +1514,7 @@ rcu_torture_cleanup(void)
1418 int i; 1514 int i;
1419 1515
1420 rcutorture_record_test_transition(); 1516 rcutorture_record_test_transition();
1421 if (torture_cleanup()) { 1517 if (torture_cleanup_begin()) {
1422 if (cur_ops->cb_barrier != NULL) 1518 if (cur_ops->cb_barrier != NULL)
1423 cur_ops->cb_barrier(); 1519 cur_ops->cb_barrier();
1424 return; 1520 return;
@@ -1447,6 +1543,8 @@ rcu_torture_cleanup(void)
1447 1543
1448 torture_stop_kthread(rcu_torture_stats, stats_task); 1544 torture_stop_kthread(rcu_torture_stats, stats_task);
1449 torture_stop_kthread(rcu_torture_fqs, fqs_task); 1545 torture_stop_kthread(rcu_torture_fqs, fqs_task);
1546 for (i = 0; i < ncbflooders; i++)
1547 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
1450 if ((test_boost == 1 && cur_ops->can_boost) || 1548 if ((test_boost == 1 && cur_ops->can_boost) ||
1451 test_boost == 2) { 1549 test_boost == 2) {
1452 unregister_cpu_notifier(&rcutorture_cpu_nb); 1550 unregister_cpu_notifier(&rcutorture_cpu_nb);
@@ -1468,6 +1566,7 @@ rcu_torture_cleanup(void)
1468 "End of test: RCU_HOTPLUG"); 1566 "End of test: RCU_HOTPLUG");
1469 else 1567 else
1470 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); 1568 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
1569 torture_cleanup_end();
1471} 1570}
1472 1571
1473#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 1572#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -1534,9 +1633,10 @@ rcu_torture_init(void)
1534 int firsterr = 0; 1633 int firsterr = 0;
1535 static struct rcu_torture_ops *torture_ops[] = { 1634 static struct rcu_torture_ops *torture_ops[] = {
1536 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1635 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
1636 RCUTORTURE_TASKS_OPS
1537 }; 1637 };
1538 1638
1539 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) 1639 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
1540 return -EBUSY; 1640 return -EBUSY;
1541 1641
1542 /* Process args and tell the world that the torturer is on the job. */ 1642 /* Process args and tell the world that the torturer is on the job. */
@@ -1693,6 +1793,24 @@ rcu_torture_init(void)
1693 goto unwind; 1793 goto unwind;
1694 if (object_debug) 1794 if (object_debug)
1695 rcu_test_debug_objects(); 1795 rcu_test_debug_objects();
1796 if (cbflood_n_burst > 0) {
1797 /* Create the cbflood threads */
1798 ncbflooders = (num_online_cpus() + 3) / 4;
1799 cbflood_task = kcalloc(ncbflooders, sizeof(*cbflood_task),
1800 GFP_KERNEL);
1801 if (!cbflood_task) {
1802 VERBOSE_TOROUT_ERRSTRING("out of memory");
1803 firsterr = -ENOMEM;
1804 goto unwind;
1805 }
1806 for (i = 0; i < ncbflooders; i++) {
1807 firsterr = torture_create_kthread(rcu_torture_cbflood,
1808 NULL,
1809 cbflood_task[i]);
1810 if (firsterr)
1811 goto unwind;
1812 }
1813 }
1696 rcutorture_record_test_transition(); 1814 rcutorture_record_test_transition();
1697 torture_init_end(); 1815 torture_init_end();
1698 return 0; 1816 return 0;
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..c0623fc47125 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -51,7 +51,7 @@ static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
51 51
52#include "tiny_plugin.h" 52#include "tiny_plugin.h"
53 53
54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
55static void rcu_idle_enter_common(long long newval) 55static void rcu_idle_enter_common(long long newval)
56{ 56{
57 if (newval) { 57 if (newval) {
@@ -62,7 +62,7 @@ static void rcu_idle_enter_common(long long newval)
62 } 62 }
63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"), 63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
64 rcu_dynticks_nesting, newval)); 64 rcu_dynticks_nesting, newval));
65 if (!is_idle_task(current)) { 65 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); 66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
67 67
68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), 68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
72 current->pid, current->comm, 72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */ 73 idle->pid, idle->comm); /* must be idle task! */
74 } 74 }
75 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier(); 76 barrier();
77 rcu_dynticks_nesting = newval; 77 rcu_dynticks_nesting = newval;
78} 78}
@@ -114,7 +114,7 @@ void rcu_irq_exit(void)
114} 114}
115EXPORT_SYMBOL_GPL(rcu_irq_exit); 115EXPORT_SYMBOL_GPL(rcu_irq_exit);
116 116
117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ 117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
118static void rcu_idle_exit_common(long long oldval) 118static void rcu_idle_exit_common(long long oldval)
119{ 119{
120 if (oldval) { 120 if (oldval) {
@@ -123,7 +123,7 @@ static void rcu_idle_exit_common(long long oldval)
123 return; 123 return;
124 } 124 }
125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); 125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
126 if (!is_idle_task(current)) { 126 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); 127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
128 128
129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), 129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
217 * are at it, given that any rcu quiescent state is also an rcu_bh 217 * are at it, given that any rcu quiescent state is also an rcu_bh
218 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 218 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
219 */ 219 */
220void rcu_sched_qs(int cpu) 220void rcu_sched_qs(void)
221{ 221{
222 unsigned long flags; 222 unsigned long flags;
223 223
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
231/* 231/*
232 * Record an rcu_bh quiescent state. 232 * Record an rcu_bh quiescent state.
233 */ 233 */
234void rcu_bh_qs(int cpu) 234void rcu_bh_qs(void)
235{ 235{
236 unsigned long flags; 236 unsigned long flags;
237 237
@@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
251{ 251{
252 RCU_TRACE(check_cpu_stalls()); 252 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 253 if (user || rcu_is_cpu_rrupt_from_idle())
254 rcu_sched_qs(cpu); 254 rcu_sched_qs();
255 else if (!in_softirq()) 255 else if (!in_softirq())
256 rcu_bh_qs(cpu); 256 rcu_bh_qs();
257 if (user)
258 rcu_note_voluntary_context_switch(current);
257} 259}
258 260
259/* 261/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..133e47223095 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -79,9 +79,18 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
79 * the tracing userspace tools to be able to decipher the string 79 * the tracing userspace tools to be able to decipher the string
80 * address to the matching string. 80 * address to the matching string.
81 */ 81 */
82#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 82#ifdef CONFIG_TRACING
83# define DEFINE_RCU_TPS(sname) \
83static char sname##_varname[] = #sname; \ 84static char sname##_varname[] = #sname; \
84static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \ 85static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname;
86# define RCU_STATE_NAME(sname) sname##_varname
87#else
88# define DEFINE_RCU_TPS(sname)
89# define RCU_STATE_NAME(sname) __stringify(sname)
90#endif
91
92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
93DEFINE_RCU_TPS(sname) \
85struct rcu_state sname##_state = { \ 94struct rcu_state sname##_state = { \
86 .level = { &sname##_state.node[0] }, \ 95 .level = { &sname##_state.node[0] }, \
87 .call = cr, \ 96 .call = cr, \
@@ -93,7 +102,7 @@ struct rcu_state sname##_state = { \
93 .orphan_donetail = &sname##_state.orphan_donelist, \ 102 .orphan_donetail = &sname##_state.orphan_donelist, \
94 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 103 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
95 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ 104 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
96 .name = sname##_varname, \ 105 .name = RCU_STATE_NAME(sname), \
97 .abbr = sabbr, \ 106 .abbr = sabbr, \
98}; \ 107}; \
99DEFINE_PER_CPU(struct rcu_data, sname##_data) 108DEFINE_PER_CPU(struct rcu_data, sname##_data)
@@ -188,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
188 * one since the start of the grace period, this just sets a flag. 197 * one since the start of the grace period, this just sets a flag.
189 * The caller must have disabled preemption. 198 * The caller must have disabled preemption.
190 */ 199 */
191void rcu_sched_qs(int cpu) 200void rcu_sched_qs(void)
192{ 201{
193 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 202 if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
194 203 trace_rcu_grace_period(TPS("rcu_sched"),
195 if (rdp->passed_quiesce == 0) 204 __this_cpu_read(rcu_sched_data.gpnum),
196 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); 205 TPS("cpuqs"));
197 rdp->passed_quiesce = 1; 206 __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
207 }
198} 208}
199 209
200void rcu_bh_qs(int cpu) 210void rcu_bh_qs(void)
201{ 211{
202 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 212 if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
203 213 trace_rcu_grace_period(TPS("rcu_bh"),
204 if (rdp->passed_quiesce == 0) 214 __this_cpu_read(rcu_bh_data.gpnum),
205 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); 215 TPS("cpuqs"));
206 rdp->passed_quiesce = 1; 216 __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
217 }
207} 218}
208 219
209static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 220static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
278void rcu_note_context_switch(int cpu) 289void rcu_note_context_switch(int cpu)
279{ 290{
280 trace_rcu_utilization(TPS("Start context switch")); 291 trace_rcu_utilization(TPS("Start context switch"));
281 rcu_sched_qs(cpu); 292 rcu_sched_qs();
282 rcu_preempt_note_context_switch(cpu); 293 rcu_preempt_note_context_switch(cpu);
283 if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 294 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
284 rcu_momentary_dyntick_idle(); 295 rcu_momentary_dyntick_idle();
@@ -526,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
526 atomic_inc(&rdtp->dynticks); 537 atomic_inc(&rdtp->dynticks);
527 smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 538 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
528 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 539 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
540 rcu_dynticks_task_enter();
529 541
530 /* 542 /*
531 * It is illegal to enter an extended quiescent state while 543 * It is illegal to enter an extended quiescent state while
@@ -642,6 +654,7 @@ void rcu_irq_exit(void)
642static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 654static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
643 int user) 655 int user)
644{ 656{
657 rcu_dynticks_task_exit();
645 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ 658 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
646 atomic_inc(&rdtp->dynticks); 659 atomic_inc(&rdtp->dynticks);
647 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 660 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -819,7 +832,7 @@ bool notrace __rcu_is_watching(void)
819 */ 832 */
820bool notrace rcu_is_watching(void) 833bool notrace rcu_is_watching(void)
821{ 834{
822 int ret; 835 bool ret;
823 836
824 preempt_disable(); 837 preempt_disable();
825 ret = __rcu_is_watching(); 838 ret = __rcu_is_watching();
@@ -1647,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1647 rnp->level, rnp->grplo, 1660 rnp->level, rnp->grplo,
1648 rnp->grphi, rnp->qsmask); 1661 rnp->grphi, rnp->qsmask);
1649 raw_spin_unlock_irq(&rnp->lock); 1662 raw_spin_unlock_irq(&rnp->lock);
1650 cond_resched(); 1663 cond_resched_rcu_qs();
1651 } 1664 }
1652 1665
1653 mutex_unlock(&rsp->onoff_mutex); 1666 mutex_unlock(&rsp->onoff_mutex);
@@ -1668,7 +1681,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1668 if (fqs_state == RCU_SAVE_DYNTICK) { 1681 if (fqs_state == RCU_SAVE_DYNTICK) {
1669 /* Collect dyntick-idle snapshots. */ 1682 /* Collect dyntick-idle snapshots. */
1670 if (is_sysidle_rcu_state(rsp)) { 1683 if (is_sysidle_rcu_state(rsp)) {
1671 isidle = 1; 1684 isidle = true;
1672 maxj = jiffies - ULONG_MAX / 4; 1685 maxj = jiffies - ULONG_MAX / 4;
1673 } 1686 }
1674 force_qs_rnp(rsp, dyntick_save_progress_counter, 1687 force_qs_rnp(rsp, dyntick_save_progress_counter,
@@ -1677,14 +1690,15 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1677 fqs_state = RCU_FORCE_QS; 1690 fqs_state = RCU_FORCE_QS;
1678 } else { 1691 } else {
1679 /* Handle dyntick-idle and offline CPUs. */ 1692 /* Handle dyntick-idle and offline CPUs. */
1680 isidle = 0; 1693 isidle = false;
1681 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 1694 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1682 } 1695 }
1683 /* Clear flag to prevent immediate re-entry. */ 1696 /* Clear flag to prevent immediate re-entry. */
1684 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1697 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1685 raw_spin_lock_irq(&rnp->lock); 1698 raw_spin_lock_irq(&rnp->lock);
1686 smp_mb__after_unlock_lock(); 1699 smp_mb__after_unlock_lock();
1687 ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS; 1700 ACCESS_ONCE(rsp->gp_flags) =
1701 ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS;
1688 raw_spin_unlock_irq(&rnp->lock); 1702 raw_spin_unlock_irq(&rnp->lock);
1689 } 1703 }
1690 return fqs_state; 1704 return fqs_state;
@@ -1736,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1736 /* smp_mb() provided by prior unlock-lock pair. */ 1750 /* smp_mb() provided by prior unlock-lock pair. */
1737 nocb += rcu_future_gp_cleanup(rsp, rnp); 1751 nocb += rcu_future_gp_cleanup(rsp, rnp);
1738 raw_spin_unlock_irq(&rnp->lock); 1752 raw_spin_unlock_irq(&rnp->lock);
1739 cond_resched(); 1753 cond_resched_rcu_qs();
1740 } 1754 }
1741 rnp = rcu_get_root(rsp); 1755 rnp = rcu_get_root(rsp);
1742 raw_spin_lock_irq(&rnp->lock); 1756 raw_spin_lock_irq(&rnp->lock);
@@ -1785,8 +1799,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
1785 /* Locking provides needed memory barrier. */ 1799 /* Locking provides needed memory barrier. */
1786 if (rcu_gp_init(rsp)) 1800 if (rcu_gp_init(rsp))
1787 break; 1801 break;
1788 cond_resched(); 1802 cond_resched_rcu_qs();
1789 flush_signals(current); 1803 WARN_ON(signal_pending(current));
1790 trace_rcu_grace_period(rsp->name, 1804 trace_rcu_grace_period(rsp->name,
1791 ACCESS_ONCE(rsp->gpnum), 1805 ACCESS_ONCE(rsp->gpnum),
1792 TPS("reqwaitsig")); 1806 TPS("reqwaitsig"));
@@ -1828,11 +1842,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
1828 trace_rcu_grace_period(rsp->name, 1842 trace_rcu_grace_period(rsp->name,
1829 ACCESS_ONCE(rsp->gpnum), 1843 ACCESS_ONCE(rsp->gpnum),
1830 TPS("fqsend")); 1844 TPS("fqsend"));
1831 cond_resched(); 1845 cond_resched_rcu_qs();
1832 } else { 1846 } else {
1833 /* Deal with stray signal. */ 1847 /* Deal with stray signal. */
1834 cond_resched(); 1848 cond_resched_rcu_qs();
1835 flush_signals(current); 1849 WARN_ON(signal_pending(current));
1836 trace_rcu_grace_period(rsp->name, 1850 trace_rcu_grace_period(rsp->name,
1837 ACCESS_ONCE(rsp->gpnum), 1851 ACCESS_ONCE(rsp->gpnum),
1838 TPS("fqswaitsig")); 1852 TPS("fqswaitsig"));
@@ -1928,7 +1942,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1928{ 1942{
1929 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 1943 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1930 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 1944 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1931 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 1945 rcu_gp_kthread_wake(rsp);
1932} 1946}
1933 1947
1934/* 1948/*
@@ -2210,8 +2224,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2210 /* Adjust any no-longer-needed kthreads. */ 2224 /* Adjust any no-longer-needed kthreads. */
2211 rcu_boost_kthread_setaffinity(rnp, -1); 2225 rcu_boost_kthread_setaffinity(rnp, -1);
2212 2226
2213 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
2214
2215 /* Exclude any attempts to start a new grace period. */ 2227 /* Exclude any attempts to start a new grace period. */
2216 mutex_lock(&rsp->onoff_mutex); 2228 mutex_lock(&rsp->onoff_mutex);
2217 raw_spin_lock_irqsave(&rsp->orphan_lock, flags); 2229 raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
@@ -2393,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
2393 * at least not while the corresponding CPU is online. 2405 * at least not while the corresponding CPU is online.
2394 */ 2406 */
2395 2407
2396 rcu_sched_qs(cpu); 2408 rcu_sched_qs();
2397 rcu_bh_qs(cpu); 2409 rcu_bh_qs();
2398 2410
2399 } else if (!in_softirq()) { 2411 } else if (!in_softirq()) {
2400 2412
@@ -2405,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
2405 * critical section, so note it. 2417 * critical section, so note it.
2406 */ 2418 */
2407 2419
2408 rcu_bh_qs(cpu); 2420 rcu_bh_qs();
2409 } 2421 }
2410 rcu_preempt_check_callbacks(cpu); 2422 rcu_preempt_check_callbacks(cpu);
2411 if (rcu_pending(cpu)) 2423 if (rcu_pending(cpu))
2412 invoke_rcu_core(); 2424 invoke_rcu_core();
2425 if (user)
2426 rcu_note_voluntary_context_switch(current);
2413 trace_rcu_utilization(TPS("End scheduler-tick")); 2427 trace_rcu_utilization(TPS("End scheduler-tick"));
2414} 2428}
2415 2429
@@ -2432,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2432 struct rcu_node *rnp; 2446 struct rcu_node *rnp;
2433 2447
2434 rcu_for_each_leaf_node(rsp, rnp) { 2448 rcu_for_each_leaf_node(rsp, rnp) {
2435 cond_resched(); 2449 cond_resched_rcu_qs();
2436 mask = 0; 2450 mask = 0;
2437 raw_spin_lock_irqsave(&rnp->lock, flags); 2451 raw_spin_lock_irqsave(&rnp->lock, flags);
2438 smp_mb__after_unlock_lock(); 2452 smp_mb__after_unlock_lock();
@@ -2449,7 +2463,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2449 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2463 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2450 if ((rnp->qsmask & bit) != 0) { 2464 if ((rnp->qsmask & bit) != 0) {
2451 if ((rnp->qsmaskinit & bit) != 0) 2465 if ((rnp->qsmaskinit & bit) != 0)
2452 *isidle = 0; 2466 *isidle = false;
2453 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2467 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2454 mask |= bit; 2468 mask |= bit;
2455 } 2469 }
@@ -2505,9 +2519,10 @@ static void force_quiescent_state(struct rcu_state *rsp)
2505 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2519 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2506 return; /* Someone beat us to it. */ 2520 return; /* Someone beat us to it. */
2507 } 2521 }
2508 ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS; 2522 ACCESS_ONCE(rsp->gp_flags) =
2523 ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
2509 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2524 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2510 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 2525 rcu_gp_kthread_wake(rsp);
2511} 2526}
2512 2527
2513/* 2528/*
@@ -2925,11 +2940,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
2925 * restructure your code to batch your updates, and then use a single 2940 * restructure your code to batch your updates, and then use a single
2926 * synchronize_sched() instead. 2941 * synchronize_sched() instead.
2927 * 2942 *
2928 * Note that it is illegal to call this function while holding any lock
2929 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
2930 * to call this function from a CPU-hotplug notifier. Failing to observe
2931 * these restriction will result in deadlock.
2932 *
2933 * This implementation can be thought of as an application of ticket 2943 * This implementation can be thought of as an application of ticket
2934 * locking to RCU, with sync_sched_expedited_started and 2944 * locking to RCU, with sync_sched_expedited_started and
2935 * sync_sched_expedited_done taking on the roles of the halves 2945 * sync_sched_expedited_done taking on the roles of the halves
@@ -2979,7 +2989,12 @@ void synchronize_sched_expedited(void)
2979 */ 2989 */
2980 snap = atomic_long_inc_return(&rsp->expedited_start); 2990 snap = atomic_long_inc_return(&rsp->expedited_start);
2981 firstsnap = snap; 2991 firstsnap = snap;
2982 get_online_cpus(); 2992 if (!try_get_online_cpus()) {
2993 /* CPU hotplug operation in flight, fall back to normal GP. */
2994 wait_rcu_gp(call_rcu_sched);
2995 atomic_long_inc(&rsp->expedited_normal);
2996 return;
2997 }
2983 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 2998 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2984 2999
2985 /* 3000 /*
@@ -3026,7 +3041,12 @@ void synchronize_sched_expedited(void)
3026 * and they started after our first try, so their grace 3041 * and they started after our first try, so their grace
3027 * period works for us. 3042 * period works for us.
3028 */ 3043 */
3029 get_online_cpus(); 3044 if (!try_get_online_cpus()) {
3045 /* CPU hotplug operation in flight, use normal GP. */
3046 wait_rcu_gp(call_rcu_sched);
3047 atomic_long_inc(&rsp->expedited_normal);
3048 return;
3049 }
3030 snap = atomic_long_read(&rsp->expedited_start); 3050 snap = atomic_long_read(&rsp->expedited_start);
3031 smp_mb(); /* ensure read is before try_stop_cpus(). */ 3051 smp_mb(); /* ensure read is before try_stop_cpus(). */
3032 } 3052 }
@@ -3442,6 +3462,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
3442 case CPU_UP_PREPARE_FROZEN: 3462 case CPU_UP_PREPARE_FROZEN:
3443 rcu_prepare_cpu(cpu); 3463 rcu_prepare_cpu(cpu);
3444 rcu_prepare_kthreads(cpu); 3464 rcu_prepare_kthreads(cpu);
3465 rcu_spawn_all_nocb_kthreads(cpu);
3445 break; 3466 break;
3446 case CPU_ONLINE: 3467 case CPU_ONLINE:
3447 case CPU_DOWN_FAILED: 3468 case CPU_DOWN_FAILED:
@@ -3489,7 +3510,7 @@ static int rcu_pm_notify(struct notifier_block *self,
3489} 3510}
3490 3511
3491/* 3512/*
3492 * Spawn the kthread that handles this RCU flavor's grace periods. 3513 * Spawn the kthreads that handle each RCU flavor's grace periods.
3493 */ 3514 */
3494static int __init rcu_spawn_gp_kthread(void) 3515static int __init rcu_spawn_gp_kthread(void)
3495{ 3516{
@@ -3498,6 +3519,7 @@ static int __init rcu_spawn_gp_kthread(void)
3498 struct rcu_state *rsp; 3519 struct rcu_state *rsp;
3499 struct task_struct *t; 3520 struct task_struct *t;
3500 3521
3522 rcu_scheduler_fully_active = 1;
3501 for_each_rcu_flavor(rsp) { 3523 for_each_rcu_flavor(rsp) {
3502 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); 3524 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3503 BUG_ON(IS_ERR(t)); 3525 BUG_ON(IS_ERR(t));
@@ -3505,8 +3527,9 @@ static int __init rcu_spawn_gp_kthread(void)
3505 raw_spin_lock_irqsave(&rnp->lock, flags); 3527 raw_spin_lock_irqsave(&rnp->lock, flags);
3506 rsp->gp_kthread = t; 3528 rsp->gp_kthread = t;
3507 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3529 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3508 rcu_spawn_nocb_kthreads(rsp);
3509 } 3530 }
3531 rcu_spawn_nocb_kthreads();
3532 rcu_spawn_boost_kthreads();
3510 return 0; 3533 return 0;
3511} 3534}
3512early_initcall(rcu_spawn_gp_kthread); 3535early_initcall(rcu_spawn_gp_kthread);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..d03764652d91 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -350,7 +350,7 @@ struct rcu_data {
350 int nocb_p_count_lazy; /* (approximate). */ 350 int nocb_p_count_lazy; /* (approximate). */
351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
352 struct task_struct *nocb_kthread; 352 struct task_struct *nocb_kthread;
353 bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ 353 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
354 354
355 /* The following fields are used by the leader, hence own cacheline. */ 355 /* The following fields are used by the leader, hence own cacheline. */
356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; 356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
@@ -383,6 +383,11 @@ struct rcu_data {
383#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 383#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
384#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 384#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
385 385
386/* Values for nocb_defer_wakeup field in struct rcu_data. */
387#define RCU_NOGP_WAKE_NOT 0
388#define RCU_NOGP_WAKE 1
389#define RCU_NOGP_WAKE_FORCE 2
390
386#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) 391#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
387 /* For jiffies_till_first_fqs and */ 392 /* For jiffies_till_first_fqs and */
388 /* and jiffies_till_next_fqs. */ 393 /* and jiffies_till_next_fqs. */
@@ -572,6 +577,7 @@ static void rcu_preempt_do_callbacks(void);
572static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 577static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
573 struct rcu_node *rnp); 578 struct rcu_node *rnp);
574#endif /* #ifdef CONFIG_RCU_BOOST */ 579#endif /* #ifdef CONFIG_RCU_BOOST */
580static void __init rcu_spawn_boost_kthreads(void);
575static void rcu_prepare_kthreads(int cpu); 581static void rcu_prepare_kthreads(int cpu);
576static void rcu_cleanup_after_idle(int cpu); 582static void rcu_cleanup_after_idle(int cpu);
577static void rcu_prepare_for_idle(int cpu); 583static void rcu_prepare_for_idle(int cpu);
@@ -589,10 +595,14 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
589static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 595static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
590 struct rcu_data *rdp, 596 struct rcu_data *rdp,
591 unsigned long flags); 597 unsigned long flags);
592static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); 598static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
593static void do_nocb_deferred_wakeup(struct rcu_data *rdp); 599static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
594static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 600static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
595static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 601static void rcu_spawn_all_nocb_kthreads(int cpu);
602static void __init rcu_spawn_nocb_kthreads(void);
603#ifdef CONFIG_RCU_NOCB_CPU
604static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
605#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
596static void __maybe_unused rcu_kick_nohz_cpu(int cpu); 606static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
597static bool init_nocb_callback_list(struct rcu_data *rdp); 607static bool init_nocb_callback_list(struct rcu_data *rdp);
598static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); 608static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
@@ -605,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
605static void rcu_bind_gp_kthread(void); 615static void rcu_bind_gp_kthread(void);
606static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
607static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 617static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
618static void rcu_dynticks_task_enter(void);
619static void rcu_dynticks_task_exit(void);
608 620
609#endif /* #ifndef RCU_TREE_NONCORE */ 621#endif /* #ifndef RCU_TREE_NONCORE */
610 622
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
85 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 85 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
86 if (nr_cpu_ids != NR_CPUS) 86 if (nr_cpu_ids != NR_CPUS)
87 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 87 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
88#ifdef CONFIG_RCU_NOCB_CPU
89#ifndef CONFIG_RCU_NOCB_CPU_NONE
90 if (!have_rcu_nocb_mask) {
91 zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
92 have_rcu_nocb_mask = true;
93 }
94#ifdef CONFIG_RCU_NOCB_CPU_ZERO
95 pr_info("\tOffload RCU callbacks from CPU 0\n");
96 cpumask_set_cpu(0, rcu_nocb_mask);
97#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
98#ifdef CONFIG_RCU_NOCB_CPU_ALL
99 pr_info("\tOffload RCU callbacks from all CPUs\n");
100 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
101#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
102#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
103 if (have_rcu_nocb_mask) {
104 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
105 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
106 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
107 rcu_nocb_mask);
108 }
109 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
110 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
111 if (rcu_nocb_poll)
112 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
113 }
114#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
115} 88}
116 89
117#ifdef CONFIG_TREE_PREEMPT_RCU 90#ifdef CONFIG_TREE_PREEMPT_RCU
@@ -134,7 +107,7 @@ static void __init rcu_bootup_announce(void)
134 * Return the number of RCU-preempt batches processed thus far 107 * Return the number of RCU-preempt batches processed thus far
135 * for debug and statistics. 108 * for debug and statistics.
136 */ 109 */
137long rcu_batches_completed_preempt(void) 110static long rcu_batches_completed_preempt(void)
138{ 111{
139 return rcu_preempt_state.completed; 112 return rcu_preempt_state.completed;
140} 113}
@@ -155,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
155 * not in a quiescent state. There might be any number of tasks blocked 128 * not in a quiescent state. There might be any number of tasks blocked
156 * while in an RCU read-side critical section. 129 * while in an RCU read-side critical section.
157 * 130 *
158 * Unlike the other rcu_*_qs() functions, callers to this function 131 * As with the other rcu_*_qs() functions, callers to this function
159 * must disable irqs in order to protect the assignment to 132 * must disable preemption.
160 * ->rcu_read_unlock_special. 133 */
161 */ 134static void rcu_preempt_qs(void)
162static void rcu_preempt_qs(int cpu) 135{
163{ 136 if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
164 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 137 trace_rcu_grace_period(TPS("rcu_preempt"),
165 138 __this_cpu_read(rcu_preempt_data.gpnum),
166 if (rdp->passed_quiesce == 0) 139 TPS("cpuqs"));
167 trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); 140 __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
168 rdp->passed_quiesce = 1; 141 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
169 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 142 current->rcu_read_unlock_special.b.need_qs = false;
143 }
170} 144}
171 145
172/* 146/*
@@ -190,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
190 struct rcu_node *rnp; 164 struct rcu_node *rnp;
191 165
192 if (t->rcu_read_lock_nesting > 0 && 166 if (t->rcu_read_lock_nesting > 0 &&
193 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 167 !t->rcu_read_unlock_special.b.blocked) {
194 168
195 /* Possibly blocking in an RCU read-side critical section. */ 169 /* Possibly blocking in an RCU read-side critical section. */
196 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 170 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
197 rnp = rdp->mynode; 171 rnp = rdp->mynode;
198 raw_spin_lock_irqsave(&rnp->lock, flags); 172 raw_spin_lock_irqsave(&rnp->lock, flags);
199 smp_mb__after_unlock_lock(); 173 smp_mb__after_unlock_lock();
200 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special.b.blocked = true;
201 t->rcu_blocked_node = rnp; 175 t->rcu_blocked_node = rnp;
202 176
203 /* 177 /*
@@ -239,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
239 : rnp->gpnum + 1); 213 : rnp->gpnum + 1);
240 raw_spin_unlock_irqrestore(&rnp->lock, flags); 214 raw_spin_unlock_irqrestore(&rnp->lock, flags);
241 } else if (t->rcu_read_lock_nesting < 0 && 215 } else if (t->rcu_read_lock_nesting < 0 &&
242 t->rcu_read_unlock_special) { 216 t->rcu_read_unlock_special.s) {
243 217
244 /* 218 /*
245 * Complete exit from RCU read-side critical section on 219 * Complete exit from RCU read-side critical section on
@@ -257,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
257 * grace period, then the fact that the task has been enqueued 231 * grace period, then the fact that the task has been enqueued
258 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
259 */ 233 */
260 local_irq_save(flags); 234 rcu_preempt_qs();
261 rcu_preempt_qs(cpu);
262 local_irq_restore(flags);
263} 235}
264 236
265/* 237/*
@@ -340,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
340 bool drop_boost_mutex = false; 312 bool drop_boost_mutex = false;
341#endif /* #ifdef CONFIG_RCU_BOOST */ 313#endif /* #ifdef CONFIG_RCU_BOOST */
342 struct rcu_node *rnp; 314 struct rcu_node *rnp;
343 int special; 315 union rcu_special special;
344 316
345 /* NMI handlers cannot block and cannot safely manipulate state. */ 317 /* NMI handlers cannot block and cannot safely manipulate state. */
346 if (in_nmi()) 318 if (in_nmi())
@@ -350,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
350 322
351 /* 323 /*
352 * If RCU core is waiting for this CPU to exit critical section, 324 * If RCU core is waiting for this CPU to exit critical section,
353 * let it know that we have done so. 325 * let it know that we have done so. Because irqs are disabled,
326 * t->rcu_read_unlock_special cannot change.
354 */ 327 */
355 special = t->rcu_read_unlock_special; 328 special = t->rcu_read_unlock_special;
356 if (special & RCU_READ_UNLOCK_NEED_QS) { 329 if (special.b.need_qs) {
357 rcu_preempt_qs(smp_processor_id()); 330 rcu_preempt_qs();
358 if (!t->rcu_read_unlock_special) { 331 if (!t->rcu_read_unlock_special.s) {
359 local_irq_restore(flags); 332 local_irq_restore(flags);
360 return; 333 return;
361 } 334 }
@@ -368,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
368 } 341 }
369 342
370 /* Clean up if blocked during RCU read-side critical section. */ 343 /* Clean up if blocked during RCU read-side critical section. */
371 if (special & RCU_READ_UNLOCK_BLOCKED) { 344 if (special.b.blocked) {
372 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 345 t->rcu_read_unlock_special.b.blocked = false;
373 346
374 /* 347 /*
375 * Remove this task from the list it blocked on. The 348 * Remove this task from the list it blocked on. The
@@ -653,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
653 struct task_struct *t = current; 626 struct task_struct *t = current;
654 627
655 if (t->rcu_read_lock_nesting == 0) { 628 if (t->rcu_read_lock_nesting == 0) {
656 rcu_preempt_qs(cpu); 629 rcu_preempt_qs();
657 return; 630 return;
658 } 631 }
659 if (t->rcu_read_lock_nesting > 0 && 632 if (t->rcu_read_lock_nesting > 0 &&
660 per_cpu(rcu_preempt_data, cpu).qs_pending) 633 per_cpu(rcu_preempt_data, cpu).qs_pending &&
661 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 634 !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
635 t->rcu_read_unlock_special.b.need_qs = true;
662} 636}
663 637
664#ifdef CONFIG_RCU_BOOST 638#ifdef CONFIG_RCU_BOOST
@@ -819,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
819 * In fact, if you are using synchronize_rcu_expedited() in a loop, 793 * In fact, if you are using synchronize_rcu_expedited() in a loop,
820 * please restructure your code to batch your updates, and then Use a 794 * please restructure your code to batch your updates, and then Use a
821 * single synchronize_rcu() instead. 795 * single synchronize_rcu() instead.
822 *
823 * Note that it is illegal to call this function while holding any lock
824 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
825 * to call this function from a CPU-hotplug notifier. Failing to observe
826 * these restriction will result in deadlock.
827 */ 796 */
828void synchronize_rcu_expedited(void) 797void synchronize_rcu_expedited(void)
829{ 798{
@@ -845,7 +814,11 @@ void synchronize_rcu_expedited(void)
845 * being boosted. This simplifies the process of moving tasks 814 * being boosted. This simplifies the process of moving tasks
846 * from leaf to root rcu_node structures. 815 * from leaf to root rcu_node structures.
847 */ 816 */
848 get_online_cpus(); 817 if (!try_get_online_cpus()) {
818 /* CPU-hotplug operation in flight, fall back to normal GP. */
819 wait_rcu_gp(call_rcu);
820 return;
821 }
849 822
850 /* 823 /*
851 * Acquire lock, falling back to synchronize_rcu() if too many 824 * Acquire lock, falling back to synchronize_rcu() if too many
@@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void)
897 870
898 /* Clean up and exit. */ 871 /* Clean up and exit. */
899 smp_mb(); /* ensure expedited GP seen before counter increment. */ 872 smp_mb(); /* ensure expedited GP seen before counter increment. */
900 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 873 ACCESS_ONCE(sync_rcu_preempt_exp_count) =
874 sync_rcu_preempt_exp_count + 1;
901unlock_mb_ret: 875unlock_mb_ret:
902 mutex_unlock(&sync_rcu_preempt_exp_mutex); 876 mutex_unlock(&sync_rcu_preempt_exp_mutex);
903mb_ret: 877mb_ret:
@@ -941,7 +915,7 @@ void exit_rcu(void)
941 return; 915 return;
942 t->rcu_read_lock_nesting = 1; 916 t->rcu_read_lock_nesting = 1;
943 barrier(); 917 barrier();
944 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; 918 t->rcu_read_unlock_special.b.blocked = true;
945 __rcu_read_unlock(); 919 __rcu_read_unlock();
946} 920}
947 921
@@ -1462,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1462}; 1436};
1463 1437
1464/* 1438/*
1465 * Spawn all kthreads -- called as soon as the scheduler is running. 1439 * Spawn boost kthreads -- called as soon as the scheduler is running.
1466 */ 1440 */
1467static int __init rcu_spawn_kthreads(void) 1441static void __init rcu_spawn_boost_kthreads(void)
1468{ 1442{
1469 struct rcu_node *rnp; 1443 struct rcu_node *rnp;
1470 int cpu; 1444 int cpu;
1471 1445
1472 rcu_scheduler_fully_active = 1;
1473 for_each_possible_cpu(cpu) 1446 for_each_possible_cpu(cpu)
1474 per_cpu(rcu_cpu_has_work, cpu) = 0; 1447 per_cpu(rcu_cpu_has_work, cpu) = 0;
1475 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1448 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
@@ -1479,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void)
1479 rcu_for_each_leaf_node(rcu_state_p, rnp) 1452 rcu_for_each_leaf_node(rcu_state_p, rnp)
1480 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1453 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1481 } 1454 }
1482 return 0;
1483} 1455}
1484early_initcall(rcu_spawn_kthreads);
1485 1456
1486static void rcu_prepare_kthreads(int cpu) 1457static void rcu_prepare_kthreads(int cpu)
1487{ 1458{
@@ -1519,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1519{ 1490{
1520} 1491}
1521 1492
1522static int __init rcu_scheduler_really_started(void) 1493static void __init rcu_spawn_boost_kthreads(void)
1523{ 1494{
1524 rcu_scheduler_fully_active = 1;
1525 return 0;
1526} 1495}
1527early_initcall(rcu_scheduler_really_started);
1528 1496
1529static void rcu_prepare_kthreads(int cpu) 1497static void rcu_prepare_kthreads(int cpu)
1530{ 1498{
@@ -1625,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1625 1593
1626 /* Exit early if we advanced recently. */ 1594 /* Exit early if we advanced recently. */
1627 if (jiffies == rdtp->last_advance_all) 1595 if (jiffies == rdtp->last_advance_all)
1628 return 0; 1596 return false;
1629 rdtp->last_advance_all = jiffies; 1597 rdtp->last_advance_all = jiffies;
1630 1598
1631 for_each_rcu_flavor(rsp) { 1599 for_each_rcu_flavor(rsp) {
@@ -1848,7 +1816,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1848 get_online_cpus(); 1816 get_online_cpus();
1849 for_each_online_cpu(cpu) { 1817 for_each_online_cpu(cpu) {
1850 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1818 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1851 cond_resched(); 1819 cond_resched_rcu_qs();
1852 } 1820 }
1853 put_online_cpus(); 1821 put_online_cpus();
1854 1822
@@ -2075,7 +2043,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2075 if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) 2043 if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
2076 return; 2044 return;
2077 if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { 2045 if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
2078 /* Prior xchg orders against prior callback enqueue. */ 2046 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
2079 ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; 2047 ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
2080 wake_up(&rdp_leader->nocb_wq); 2048 wake_up(&rdp_leader->nocb_wq);
2081 } 2049 }
@@ -2104,6 +2072,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2104 ACCESS_ONCE(*old_rhpp) = rhp; 2072 ACCESS_ONCE(*old_rhpp) = rhp;
2105 atomic_long_add(rhcount, &rdp->nocb_q_count); 2073 atomic_long_add(rhcount, &rdp->nocb_q_count);
2106 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 2074 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2075 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
2107 2076
2108 /* If we are not being polled and there is a kthread, awaken it ... */ 2077 /* If we are not being polled and there is a kthread, awaken it ... */
2109 t = ACCESS_ONCE(rdp->nocb_kthread); 2078 t = ACCESS_ONCE(rdp->nocb_kthread);
@@ -2120,16 +2089,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2120 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2089 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2121 TPS("WakeEmpty")); 2090 TPS("WakeEmpty"));
2122 } else { 2091 } else {
2123 rdp->nocb_defer_wakeup = true; 2092 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
2124 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2093 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2125 TPS("WakeEmptyIsDeferred")); 2094 TPS("WakeEmptyIsDeferred"));
2126 } 2095 }
2127 rdp->qlen_last_fqs_check = 0; 2096 rdp->qlen_last_fqs_check = 0;
2128 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2097 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2129 /* ... or if many callbacks queued. */ 2098 /* ... or if many callbacks queued. */
2130 wake_nocb_leader(rdp, true); 2099 if (!irqs_disabled_flags(flags)) {
2100 wake_nocb_leader(rdp, true);
2101 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2102 TPS("WakeOvf"));
2103 } else {
2104 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
2105 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2106 TPS("WakeOvfIsDeferred"));
2107 }
2131 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2108 rdp->qlen_last_fqs_check = LONG_MAX / 2;
2132 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
2133 } else { 2109 } else {
2134 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); 2110 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
2135 } 2111 }
@@ -2150,7 +2126,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2150{ 2126{
2151 2127
2152 if (!rcu_is_nocb_cpu(rdp->cpu)) 2128 if (!rcu_is_nocb_cpu(rdp->cpu))
2153 return 0; 2129 return false;
2154 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); 2130 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2155 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2131 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2156 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2132 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
@@ -2161,7 +2137,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2161 trace_rcu_callback(rdp->rsp->name, rhp, 2137 trace_rcu_callback(rdp->rsp->name, rhp,
2162 -atomic_long_read(&rdp->nocb_q_count_lazy), 2138 -atomic_long_read(&rdp->nocb_q_count_lazy),
2163 -atomic_long_read(&rdp->nocb_q_count)); 2139 -atomic_long_read(&rdp->nocb_q_count));
2164 return 1; 2140
2141 /*
2142 * If called from an extended quiescent state with interrupts
2143 * disabled, invoke the RCU core in order to allow the idle-entry
2144 * deferred-wakeup check to function.
2145 */
2146 if (irqs_disabled_flags(flags) &&
2147 !rcu_is_watching() &&
2148 cpu_online(smp_processor_id()))
2149 invoke_rcu_core();
2150
2151 return true;
2165} 2152}
2166 2153
2167/* 2154/*
@@ -2177,7 +2164,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2177 2164
2178 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 2165 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
2179 if (!rcu_is_nocb_cpu(smp_processor_id())) 2166 if (!rcu_is_nocb_cpu(smp_processor_id()))
2180 return 0; 2167 return false;
2181 rsp->qlen = 0; 2168 rsp->qlen = 0;
2182 rsp->qlen_lazy = 0; 2169 rsp->qlen_lazy = 0;
2183 2170
@@ -2196,7 +2183,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2196 rsp->orphan_nxtlist = NULL; 2183 rsp->orphan_nxtlist = NULL;
2197 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2184 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2198 } 2185 }
2199 return 1; 2186 return true;
2200} 2187}
2201 2188
2202/* 2189/*
@@ -2229,7 +2216,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2229 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); 2216 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
2230 if (likely(d)) 2217 if (likely(d))
2231 break; 2218 break;
2232 flush_signals(current); 2219 WARN_ON(signal_pending(current));
2233 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); 2220 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
2234 } 2221 }
2235 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); 2222 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
@@ -2288,7 +2275,7 @@ wait_again:
2288 if (!rcu_nocb_poll) 2275 if (!rcu_nocb_poll)
2289 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, 2276 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2290 "WokeEmpty"); 2277 "WokeEmpty");
2291 flush_signals(current); 2278 WARN_ON(signal_pending(current));
2292 schedule_timeout_interruptible(1); 2279 schedule_timeout_interruptible(1);
2293 2280
2294 /* Rescan in case we were a victim of memory ordering. */ 2281 /* Rescan in case we were a victim of memory ordering. */
@@ -2327,6 +2314,7 @@ wait_again:
2327 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); 2314 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
2328 atomic_long_add(rdp->nocb_gp_count_lazy, 2315 atomic_long_add(rdp->nocb_gp_count_lazy,
2329 &rdp->nocb_follower_count_lazy); 2316 &rdp->nocb_follower_count_lazy);
2317 smp_mb__after_atomic(); /* Store *tail before wakeup. */
2330 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2318 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2331 /* 2319 /*
2332 * List was empty, wake up the follower. 2320 * List was empty, wake up the follower.
@@ -2367,7 +2355,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2367 if (!rcu_nocb_poll) 2355 if (!rcu_nocb_poll)
2368 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2356 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2369 "WokeEmpty"); 2357 "WokeEmpty");
2370 flush_signals(current); 2358 WARN_ON(signal_pending(current));
2371 schedule_timeout_interruptible(1); 2359 schedule_timeout_interruptible(1);
2372 } 2360 }
2373} 2361}
@@ -2428,15 +2416,16 @@ static int rcu_nocb_kthread(void *arg)
2428 list = next; 2416 list = next;
2429 } 2417 }
2430 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2418 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2431 ACCESS_ONCE(rdp->nocb_p_count) -= c; 2419 ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c;
2432 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; 2420 ACCESS_ONCE(rdp->nocb_p_count_lazy) =
2421 rdp->nocb_p_count_lazy - cl;
2433 rdp->n_nocbs_invoked += c; 2422 rdp->n_nocbs_invoked += c;
2434 } 2423 }
2435 return 0; 2424 return 0;
2436} 2425}
2437 2426
2438/* Is a deferred wakeup of rcu_nocb_kthread() required? */ 2427/* Is a deferred wakeup of rcu_nocb_kthread() required? */
2439static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) 2428static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2440{ 2429{
2441 return ACCESS_ONCE(rdp->nocb_defer_wakeup); 2430 return ACCESS_ONCE(rdp->nocb_defer_wakeup);
2442} 2431}
@@ -2444,11 +2433,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2444/* Do a deferred wakeup of rcu_nocb_kthread(). */ 2433/* Do a deferred wakeup of rcu_nocb_kthread(). */
2445static void do_nocb_deferred_wakeup(struct rcu_data *rdp) 2434static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2446{ 2435{
2436 int ndw;
2437
2447 if (!rcu_nocb_need_deferred_wakeup(rdp)) 2438 if (!rcu_nocb_need_deferred_wakeup(rdp))
2448 return; 2439 return;
2449 ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; 2440 ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
2450 wake_nocb_leader(rdp, false); 2441 ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
2451 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); 2442 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
2443 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2444}
2445
2446void __init rcu_init_nohz(void)
2447{
2448 int cpu;
2449 bool need_rcu_nocb_mask = true;
2450 struct rcu_state *rsp;
2451
2452#ifdef CONFIG_RCU_NOCB_CPU_NONE
2453 need_rcu_nocb_mask = false;
2454#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
2455
2456#if defined(CONFIG_NO_HZ_FULL)
2457 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2458 need_rcu_nocb_mask = true;
2459#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2460
2461 if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
2462 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
2463 pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
2464 return;
2465 }
2466 have_rcu_nocb_mask = true;
2467 }
2468 if (!have_rcu_nocb_mask)
2469 return;
2470
2471#ifdef CONFIG_RCU_NOCB_CPU_ZERO
2472 pr_info("\tOffload RCU callbacks from CPU 0\n");
2473 cpumask_set_cpu(0, rcu_nocb_mask);
2474#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
2475#ifdef CONFIG_RCU_NOCB_CPU_ALL
2476 pr_info("\tOffload RCU callbacks from all CPUs\n");
2477 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
2478#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
2479#if defined(CONFIG_NO_HZ_FULL)
2480 if (tick_nohz_full_running)
2481 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2482#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2483
2484 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
2485 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
2486 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2487 rcu_nocb_mask);
2488 }
2489 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
2490 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
2491 if (rcu_nocb_poll)
2492 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2493
2494 for_each_rcu_flavor(rsp) {
2495 for_each_cpu(cpu, rcu_nocb_mask) {
2496 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2497
2498 /*
2499 * If there are early callbacks, they will need
2500 * to be moved to the nocb lists.
2501 */
2502 WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
2503 &rdp->nxtlist &&
2504 rdp->nxttail[RCU_NEXT_TAIL] != NULL);
2505 init_nocb_callback_list(rdp);
2506 }
2507 rcu_organize_nocb_kthreads(rsp);
2508 }
2452} 2509}
2453 2510
2454/* Initialize per-rcu_data variables for no-CBs CPUs. */ 2511/* Initialize per-rcu_data variables for no-CBs CPUs. */
@@ -2459,15 +2516,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2459 rdp->nocb_follower_tail = &rdp->nocb_follower_head; 2516 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2460} 2517}
2461 2518
2519/*
2520 * If the specified CPU is a no-CBs CPU that does not already have its
2521 * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are
2522 * brought online out of order, this can require re-organizing the
2523 * leader-follower relationships.
2524 */
2525static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2526{
2527 struct rcu_data *rdp;
2528 struct rcu_data *rdp_last;
2529 struct rcu_data *rdp_old_leader;
2530 struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
2531 struct task_struct *t;
2532
2533 /*
2534 * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
2535 * then nothing to do.
2536 */
2537 if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)
2538 return;
2539
2540 /* If we didn't spawn the leader first, reorganize! */
2541 rdp_old_leader = rdp_spawn->nocb_leader;
2542 if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
2543 rdp_last = NULL;
2544 rdp = rdp_old_leader;
2545 do {
2546 rdp->nocb_leader = rdp_spawn;
2547 if (rdp_last && rdp != rdp_spawn)
2548 rdp_last->nocb_next_follower = rdp;
2549 rdp_last = rdp;
2550 rdp = rdp->nocb_next_follower;
2551 rdp_last->nocb_next_follower = NULL;
2552 } while (rdp);
2553 rdp_spawn->nocb_next_follower = rdp_old_leader;
2554 }
2555
2556 /* Spawn the kthread for this CPU and RCU flavor. */
2557 t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2558 "rcuo%c/%d", rsp->abbr, cpu);
2559 BUG_ON(IS_ERR(t));
2560 ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
2561}
2562
2563/*
2564 * If the specified CPU is a no-CBs CPU that does not already have its
2565 * rcuo kthreads, spawn them.
2566 */
2567static void rcu_spawn_all_nocb_kthreads(int cpu)
2568{
2569 struct rcu_state *rsp;
2570
2571 if (rcu_scheduler_fully_active)
2572 for_each_rcu_flavor(rsp)
2573 rcu_spawn_one_nocb_kthread(rsp, cpu);
2574}
2575
2576/*
2577 * Once the scheduler is running, spawn rcuo kthreads for all online
2578 * no-CBs CPUs. This assumes that the early_initcall()s happen before
2579 * non-boot CPUs come online -- if this changes, we will need to add
2580 * some mutual exclusion.
2581 */
2582static void __init rcu_spawn_nocb_kthreads(void)
2583{
2584 int cpu;
2585
2586 for_each_online_cpu(cpu)
2587 rcu_spawn_all_nocb_kthreads(cpu);
2588}
2589
2462/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ 2590/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */
2463static int rcu_nocb_leader_stride = -1; 2591static int rcu_nocb_leader_stride = -1;
2464module_param(rcu_nocb_leader_stride, int, 0444); 2592module_param(rcu_nocb_leader_stride, int, 0444);
2465 2593
2466/* 2594/*
2467 * Create a kthread for each RCU flavor for each no-CBs CPU. 2595 * Initialize leader-follower relationships for all no-CBs CPU.
2468 * Also initialize leader-follower relationships.
2469 */ 2596 */
2470static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2597static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
2471{ 2598{
2472 int cpu; 2599 int cpu;
2473 int ls = rcu_nocb_leader_stride; 2600 int ls = rcu_nocb_leader_stride;
@@ -2475,14 +2602,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2475 struct rcu_data *rdp; 2602 struct rcu_data *rdp;
2476 struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ 2603 struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */
2477 struct rcu_data *rdp_prev = NULL; 2604 struct rcu_data *rdp_prev = NULL;
2478 struct task_struct *t;
2479 2605
2480 if (rcu_nocb_mask == NULL) 2606 if (!have_rcu_nocb_mask)
2481 return; 2607 return;
2482#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
2483 if (tick_nohz_full_running)
2484 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2485#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
2486 if (ls == -1) { 2608 if (ls == -1) {
2487 ls = int_sqrt(nr_cpu_ids); 2609 ls = int_sqrt(nr_cpu_ids);
2488 rcu_nocb_leader_stride = ls; 2610 rcu_nocb_leader_stride = ls;
@@ -2505,21 +2627,15 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2505 rdp_prev->nocb_next_follower = rdp; 2627 rdp_prev->nocb_next_follower = rdp;
2506 } 2628 }
2507 rdp_prev = rdp; 2629 rdp_prev = rdp;
2508
2509 /* Spawn the kthread for this CPU. */
2510 t = kthread_run(rcu_nocb_kthread, rdp,
2511 "rcuo%c/%d", rsp->abbr, cpu);
2512 BUG_ON(IS_ERR(t));
2513 ACCESS_ONCE(rdp->nocb_kthread) = t;
2514 } 2630 }
2515} 2631}
2516 2632
2517/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2633/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2518static bool init_nocb_callback_list(struct rcu_data *rdp) 2634static bool init_nocb_callback_list(struct rcu_data *rdp)
2519{ 2635{
2520 if (rcu_nocb_mask == NULL || 2636 if (!rcu_is_nocb_cpu(rdp->cpu))
2521 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2522 return false; 2637 return false;
2638
2523 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2639 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2524 return true; 2640 return true;
2525} 2641}
@@ -2541,21 +2657,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
2541static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2657static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2542 bool lazy, unsigned long flags) 2658 bool lazy, unsigned long flags)
2543{ 2659{
2544 return 0; 2660 return false;
2545} 2661}
2546 2662
2547static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2663static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2548 struct rcu_data *rdp, 2664 struct rcu_data *rdp,
2549 unsigned long flags) 2665 unsigned long flags)
2550{ 2666{
2551 return 0; 2667 return false;
2552} 2668}
2553 2669
2554static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2670static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2555{ 2671{
2556} 2672}
2557 2673
2558static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) 2674static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2559{ 2675{
2560 return false; 2676 return false;
2561} 2677}
@@ -2564,7 +2680,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2564{ 2680{
2565} 2681}
2566 2682
2567static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2683static void rcu_spawn_all_nocb_kthreads(int cpu)
2684{
2685}
2686
2687static void __init rcu_spawn_nocb_kthreads(void)
2568{ 2688{
2569} 2689}
2570 2690
@@ -2595,16 +2715,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2595 2715
2596#ifdef CONFIG_NO_HZ_FULL_SYSIDLE 2716#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2597 2717
2598/*
2599 * Define RCU flavor that holds sysidle state. This needs to be the
2600 * most active flavor of RCU.
2601 */
2602#ifdef CONFIG_PREEMPT_RCU
2603static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
2604#else /* #ifdef CONFIG_PREEMPT_RCU */
2605static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
2606#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
2607
2608static int full_sysidle_state; /* Current system-idle state. */ 2718static int full_sysidle_state; /* Current system-idle state. */
2609#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ 2719#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
2610#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ 2720#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
@@ -2622,6 +2732,10 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2622{ 2732{
2623 unsigned long j; 2733 unsigned long j;
2624 2734
2735 /* If there are no nohz_full= CPUs, no need to track this. */
2736 if (!tick_nohz_full_enabled())
2737 return;
2738
2625 /* Adjust nesting, check for fully idle. */ 2739 /* Adjust nesting, check for fully idle. */
2626 if (irq) { 2740 if (irq) {
2627 rdtp->dynticks_idle_nesting--; 2741 rdtp->dynticks_idle_nesting--;
@@ -2687,6 +2801,10 @@ void rcu_sysidle_force_exit(void)
2687 */ 2801 */
2688static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) 2802static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2689{ 2803{
2804 /* If there are no nohz_full= CPUs, no need to track this. */
2805 if (!tick_nohz_full_enabled())
2806 return;
2807
2690 /* Adjust nesting, check for already non-idle. */ 2808 /* Adjust nesting, check for already non-idle. */
2691 if (irq) { 2809 if (irq) {
2692 rdtp->dynticks_idle_nesting++; 2810 rdtp->dynticks_idle_nesting++;
@@ -2741,12 +2859,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2741 unsigned long j; 2859 unsigned long j;
2742 struct rcu_dynticks *rdtp = rdp->dynticks; 2860 struct rcu_dynticks *rdtp = rdp->dynticks;
2743 2861
2862 /* If there are no nohz_full= CPUs, don't check system-wide idleness. */
2863 if (!tick_nohz_full_enabled())
2864 return;
2865
2744 /* 2866 /*
2745 * If some other CPU has already reported non-idle, if this is 2867 * If some other CPU has already reported non-idle, if this is
2746 * not the flavor of RCU that tracks sysidle state, or if this 2868 * not the flavor of RCU that tracks sysidle state, or if this
2747 * is an offline or the timekeeping CPU, nothing to do. 2869 * is an offline or the timekeeping CPU, nothing to do.
2748 */ 2870 */
2749 if (!*isidle || rdp->rsp != rcu_sysidle_state || 2871 if (!*isidle || rdp->rsp != rcu_state_p ||
2750 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) 2872 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2751 return; 2873 return;
2752 if (rcu_gp_in_progress(rdp->rsp)) 2874 if (rcu_gp_in_progress(rdp->rsp))
@@ -2772,7 +2894,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2772 */ 2894 */
2773static bool is_sysidle_rcu_state(struct rcu_state *rsp) 2895static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2774{ 2896{
2775 return rsp == rcu_sysidle_state; 2897 return rsp == rcu_state_p;
2776} 2898}
2777 2899
2778/* 2900/*
@@ -2850,7 +2972,7 @@ static void rcu_sysidle_cancel(void)
2850static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, 2972static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2851 unsigned long maxj, bool gpkt) 2973 unsigned long maxj, bool gpkt)
2852{ 2974{
2853 if (rsp != rcu_sysidle_state) 2975 if (rsp != rcu_state_p)
2854 return; /* Wrong flavor, ignore. */ 2976 return; /* Wrong flavor, ignore. */
2855 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) 2977 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2856 return; /* Running state machine from timekeeping CPU. */ 2978 return; /* Running state machine from timekeeping CPU. */
@@ -2867,6 +2989,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2867static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, 2989static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2868 unsigned long maxj) 2990 unsigned long maxj)
2869{ 2991{
2992 /* If there are no nohz_full= CPUs, no need to track this. */
2993 if (!tick_nohz_full_enabled())
2994 return;
2995
2870 rcu_sysidle_report(rsp, isidle, maxj, true); 2996 rcu_sysidle_report(rsp, isidle, maxj, true);
2871} 2997}
2872 2998
@@ -2893,7 +3019,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
2893 3019
2894/* 3020/*
2895 * Check to see if the system is fully idle, other than the timekeeping CPU. 3021 * Check to see if the system is fully idle, other than the timekeeping CPU.
2896 * The caller must have disabled interrupts. 3022 * The caller must have disabled interrupts. This is not intended to be
3023 * called unless tick_nohz_full_enabled().
2897 */ 3024 */
2898bool rcu_sys_is_idle(void) 3025bool rcu_sys_is_idle(void)
2899{ 3026{
@@ -2919,13 +3046,12 @@ bool rcu_sys_is_idle(void)
2919 3046
2920 /* Scan all the CPUs looking for nonidle CPUs. */ 3047 /* Scan all the CPUs looking for nonidle CPUs. */
2921 for_each_possible_cpu(cpu) { 3048 for_each_possible_cpu(cpu) {
2922 rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu); 3049 rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
2923 rcu_sysidle_check_cpu(rdp, &isidle, &maxj); 3050 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2924 if (!isidle) 3051 if (!isidle)
2925 break; 3052 break;
2926 } 3053 }
2927 rcu_sysidle_report(rcu_sysidle_state, 3054 rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
2928 isidle, maxj, false);
2929 oldrss = rss; 3055 oldrss = rss;
2930 rss = ACCESS_ONCE(full_sysidle_state); 3056 rss = ACCESS_ONCE(full_sysidle_state);
2931 } 3057 }
@@ -2952,7 +3078,7 @@ bool rcu_sys_is_idle(void)
2952 * provided by the memory allocator. 3078 * provided by the memory allocator.
2953 */ 3079 */
2954 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL && 3080 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2955 !rcu_gp_in_progress(rcu_sysidle_state) && 3081 !rcu_gp_in_progress(rcu_state_p) &&
2956 !rsh.inuse && xchg(&rsh.inuse, 1) == 0) 3082 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2957 call_rcu(&rsh.rh, rcu_sysidle_cb); 3083 call_rcu(&rsh.rh, rcu_sysidle_cb);
2958 return false; 3084 return false;
@@ -3036,3 +3162,19 @@ static void rcu_bind_gp_kthread(void)
3036 housekeeping_affine(current); 3162 housekeeping_affine(current);
3037#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3163#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3038} 3164}
3165
3166/* Record the current task on dyntick-idle entry. */
3167static void rcu_dynticks_task_enter(void)
3168{
3169#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3170 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
3171#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3172}
3173
3174/* Record no current task on dyntick-idle exit. */
3175static void rcu_dynticks_task_exit(void)
3176{
3177#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3178 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
3179#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3180}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
51#include <linux/tick.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
52 54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91 barrier(); /* critical section before exit code. */ 93 barrier(); /* critical section before exit code. */
92 t->rcu_read_lock_nesting = INT_MIN; 94 t->rcu_read_lock_nesting = INT_MIN;
93 barrier(); /* assign before ->rcu_read_unlock_special load */ 95 barrier(); /* assign before ->rcu_read_unlock_special load */
94 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 96 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95 rcu_read_unlock_special(t); 97 rcu_read_unlock_special(t);
96 barrier(); /* ->rcu_read_unlock_special load before assign */ 98 barrier(); /* ->rcu_read_unlock_special load before assign */
97 t->rcu_read_lock_nesting = 0; 99 t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
137EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 139EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
138 140
139/** 141/**
142 * rcu_read_lock_held() - might we be in RCU read-side critical section?
143 *
144 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
145 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
146 * this assumes we are in an RCU read-side critical section unless it can
147 * prove otherwise. This is useful for debug checks in functions that
148 * require that they be called within an RCU read-side critical section.
149 *
150 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
151 * and while lockdep is disabled.
152 *
153 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
154 * occur in the same context, for example, it is illegal to invoke
155 * rcu_read_unlock() in process context if the matching rcu_read_lock()
156 * was invoked from within an irq handler.
157 *
158 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
159 * offline from an RCU perspective, so check for those as well.
160 */
161int rcu_read_lock_held(void)
162{
163 if (!debug_lockdep_rcu_enabled())
164 return 1;
165 if (!rcu_is_watching())
166 return 0;
167 if (!rcu_lockdep_current_cpu_online())
168 return 0;
169 return lock_is_held(&rcu_lock_map);
170}
171EXPORT_SYMBOL_GPL(rcu_read_lock_held);
172
173/**
140 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 174 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
141 * 175 *
142 * Check for bottom half being disabled, which covers both the 176 * Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
347early_initcall(check_cpu_stall_init); 381early_initcall(check_cpu_stall_init);
348 382
349#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 383#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
384
385#ifdef CONFIG_TASKS_RCU
386
387/*
388 * Simple variant of RCU whose quiescent states are voluntary context switch,
389 * user-space execution, and idle. As such, grace periods can take one good
390 * long time. There are no read-side primitives similar to rcu_read_lock()
391 * and rcu_read_unlock() because this implementation is intended to get
392 * the system into a safe state for some of the manipulations involved in
393 * tracing and the like. Finally, this implementation does not support
394 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
395 * per-CPU callback lists will be needed.
396 */
397
398/* Global list of callbacks and associated lock. */
399static struct rcu_head *rcu_tasks_cbs_head;
400static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
401static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
402static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
403
404/* Track exiting tasks in order to allow them to be waited for. */
405DEFINE_SRCU(tasks_rcu_exit_srcu);
406
407/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
408static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
409module_param(rcu_task_stall_timeout, int, 0644);
410
411static void rcu_spawn_tasks_kthread(void);
412
413/*
414 * Post an RCU-tasks callback. First call must be from process context
415 * after the scheduler if fully operational.
416 */
417void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
418{
419 unsigned long flags;
420 bool needwake;
421
422 rhp->next = NULL;
423 rhp->func = func;
424 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
425 needwake = !rcu_tasks_cbs_head;
426 *rcu_tasks_cbs_tail = rhp;
427 rcu_tasks_cbs_tail = &rhp->next;
428 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
429 if (needwake) {
430 rcu_spawn_tasks_kthread();
431 wake_up(&rcu_tasks_cbs_wq);
432 }
433}
434EXPORT_SYMBOL_GPL(call_rcu_tasks);
435
436/**
437 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
438 *
439 * Control will return to the caller some time after a full rcu-tasks
440 * grace period has elapsed, in other words after all currently
441 * executing rcu-tasks read-side critical sections have elapsed. These
442 * read-side critical sections are delimited by calls to schedule(),
443 * cond_resched_rcu_qs(), idle execution, userspace execution, calls
444 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
445 *
446 * This is a very specialized primitive, intended only for a few uses in
447 * tracing and other situations requiring manipulation of function
448 * preambles and profiling hooks. The synchronize_rcu_tasks() function
449 * is not (yet) intended for heavy use from multiple CPUs.
450 *
451 * Note that this guarantee implies further memory-ordering guarantees.
452 * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
453 * each CPU is guaranteed to have executed a full memory barrier since the
454 * end of its last RCU-tasks read-side critical section whose beginning
455 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU
456 * having an RCU-tasks read-side critical section that extends beyond
457 * the return from synchronize_rcu_tasks() is guaranteed to have executed
458 * a full memory barrier after the beginning of synchronize_rcu_tasks()
459 * and before the beginning of that RCU-tasks read-side critical section.
460 * Note that these guarantees include CPUs that are offline, idle, or
461 * executing in user mode, as well as CPUs that are executing in the kernel.
462 *
463 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
464 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
465 * to have executed a full memory barrier during the execution of
466 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
467 * (but again only if the system has more than one CPU).
468 */
469void synchronize_rcu_tasks(void)
470{
471 /* Complain if the scheduler has not started. */
472 rcu_lockdep_assert(!rcu_scheduler_active,
473 "synchronize_rcu_tasks called too soon");
474
475 /* Wait for the grace period. */
476 wait_rcu_gp(call_rcu_tasks);
477}
478EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
479
480/**
481 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
482 *
483 * Although the current implementation is guaranteed to wait, it is not
484 * obligated to, for example, if there are no pending callbacks.
485 */
486void rcu_barrier_tasks(void)
487{
488 /* There is only one callback queue, so this is easy. ;-) */
489 synchronize_rcu_tasks();
490}
491EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
492
493/* See if tasks are still holding out, complain if so. */
494static void check_holdout_task(struct task_struct *t,
495 bool needreport, bool *firstreport)
496{
497 int cpu;
498
499 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
500 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
501 !ACCESS_ONCE(t->on_rq) ||
502 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
503 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
504 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
505 list_del_init(&t->rcu_tasks_holdout_list);
506 put_task_struct(t);
507 return;
508 }
509 if (!needreport)
510 return;
511 if (*firstreport) {
512 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
513 *firstreport = false;
514 }
515 cpu = task_cpu(t);
516 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
517 t, ".I"[is_idle_task(t)],
518 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
519 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
520 t->rcu_tasks_idle_cpu, cpu);
521 sched_show_task(t);
522}
523
524/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
525static int __noreturn rcu_tasks_kthread(void *arg)
526{
527 unsigned long flags;
528 struct task_struct *g, *t;
529 unsigned long lastreport;
530 struct rcu_head *list;
531 struct rcu_head *next;
532 LIST_HEAD(rcu_tasks_holdouts);
533
534 /* FIXME: Add housekeeping affinity. */
535
536 /*
537 * Each pass through the following loop makes one check for
538 * newly arrived callbacks, and, if there are some, waits for
539 * one RCU-tasks grace period and then invokes the callbacks.
540 * This loop is terminated by the system going down. ;-)
541 */
542 for (;;) {
543
544 /* Pick up any new callbacks. */
545 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
546 list = rcu_tasks_cbs_head;
547 rcu_tasks_cbs_head = NULL;
548 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
549 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
550
551 /* If there were none, wait a bit and start over. */
552 if (!list) {
553 wait_event_interruptible(rcu_tasks_cbs_wq,
554 rcu_tasks_cbs_head);
555 if (!rcu_tasks_cbs_head) {
556 WARN_ON(signal_pending(current));
557 schedule_timeout_interruptible(HZ/10);
558 }
559 continue;
560 }
561
562 /*
563 * Wait for all pre-existing t->on_rq and t->nvcsw
564 * transitions to complete. Invoking synchronize_sched()
565 * suffices because all these transitions occur with
566 * interrupts disabled. Without this synchronize_sched(),
567 * a read-side critical section that started before the
568 * grace period might be incorrectly seen as having started
569 * after the grace period.
570 *
571 * This synchronize_sched() also dispenses with the
572 * need for a memory barrier on the first store to
573 * ->rcu_tasks_holdout, as it forces the store to happen
574 * after the beginning of the grace period.
575 */
576 synchronize_sched();
577
578 /*
579 * There were callbacks, so we need to wait for an
580 * RCU-tasks grace period. Start off by scanning
581 * the task list for tasks that are not already
582 * voluntarily blocked. Mark these tasks and make
583 * a list of them in rcu_tasks_holdouts.
584 */
585 rcu_read_lock();
586 for_each_process_thread(g, t) {
587 if (t != current && ACCESS_ONCE(t->on_rq) &&
588 !is_idle_task(t)) {
589 get_task_struct(t);
590 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
591 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
592 list_add(&t->rcu_tasks_holdout_list,
593 &rcu_tasks_holdouts);
594 }
595 }
596 rcu_read_unlock();
597
598 /*
599 * Wait for tasks that are in the process of exiting.
600 * This does only part of the job, ensuring that all
601 * tasks that were previously exiting reach the point
602 * where they have disabled preemption, allowing the
603 * later synchronize_sched() to finish the job.
604 */
605 synchronize_srcu(&tasks_rcu_exit_srcu);
606
607 /*
608 * Each pass through the following loop scans the list
609 * of holdout tasks, removing any that are no longer
610 * holdouts. When the list is empty, we are done.
611 */
612 lastreport = jiffies;
613 while (!list_empty(&rcu_tasks_holdouts)) {
614 bool firstreport;
615 bool needreport;
616 int rtst;
617 struct task_struct *t1;
618
619 schedule_timeout_interruptible(HZ);
620 rtst = ACCESS_ONCE(rcu_task_stall_timeout);
621 needreport = rtst > 0 &&
622 time_after(jiffies, lastreport + rtst);
623 if (needreport)
624 lastreport = jiffies;
625 firstreport = true;
626 WARN_ON(signal_pending(current));
627 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
628 rcu_tasks_holdout_list) {
629 check_holdout_task(t, needreport, &firstreport);
630 cond_resched();
631 }
632 }
633
634 /*
635 * Because ->on_rq and ->nvcsw are not guaranteed
636 * to have a full memory barriers prior to them in the
637 * schedule() path, memory reordering on other CPUs could
638 * cause their RCU-tasks read-side critical sections to
639 * extend past the end of the grace period. However,
640 * because these ->nvcsw updates are carried out with
641 * interrupts disabled, we can use synchronize_sched()
642 * to force the needed ordering on all such CPUs.
643 *
644 * This synchronize_sched() also confines all
645 * ->rcu_tasks_holdout accesses to be within the grace
646 * period, avoiding the need for memory barriers for
647 * ->rcu_tasks_holdout accesses.
648 *
649 * In addition, this synchronize_sched() waits for exiting
650 * tasks to complete their final preempt_disable() region
651 * of execution, cleaning up after the synchronize_srcu()
652 * above.
653 */
654 synchronize_sched();
655
656 /* Invoke the callbacks. */
657 while (list) {
658 next = list->next;
659 local_bh_disable();
660 list->func(list);
661 local_bh_enable();
662 list = next;
663 cond_resched();
664 }
665 schedule_timeout_uninterruptible(HZ/10);
666 }
667}
668
669/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
670static void rcu_spawn_tasks_kthread(void)
671{
672 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
673 static struct task_struct *rcu_tasks_kthread_ptr;
674 struct task_struct *t;
675
676 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
677 smp_mb(); /* Ensure caller sees full kthread. */
678 return;
679 }
680 mutex_lock(&rcu_tasks_kthread_mutex);
681 if (rcu_tasks_kthread_ptr) {
682 mutex_unlock(&rcu_tasks_kthread_mutex);
683 return;
684 }
685 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
686 BUG_ON(IS_ERR(t));
687 smp_mb(); /* Ensure others see full kthread. */
688 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
689 mutex_unlock(&rcu_tasks_kthread_mutex);
690}
691
692#endif /* #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
278 pending >>= softirq_bit; 278 pending >>= softirq_bit;
279 } 279 }
280 280
281 rcu_bh_qs(smp_processor_id()); 281 rcu_bh_qs();
282 local_irq_disable(); 282 local_irq_disable();
283 283
284 pending = local_softirq_pending(); 284 pending = local_softirq_pending();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 91180987e40e..4aada6d9fe74 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1055,15 +1055,6 @@ static struct ctl_table kern_table[] = {
1055 .child = key_sysctls, 1055 .child = key_sysctls,
1056 }, 1056 },
1057#endif 1057#endif
1058#ifdef CONFIG_RCU_TORTURE_TEST
1059 {
1060 .procname = "rcutorture_runnable",
1061 .data = &rcutorture_runnable,
1062 .maxlen = sizeof(int),
1063 .mode = 0644,
1064 .proc_handler = proc_dointvec,
1065 },
1066#endif
1067#ifdef CONFIG_PERF_EVENTS 1058#ifdef CONFIG_PERF_EVENTS
1068 /* 1059 /*
1069 * User-space scripts rely on the existence of this file 1060 * User-space scripts rely on the existence of this file
diff --git a/kernel/torture.c b/kernel/torture.c
index d600af21f022..dd70993c266c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
211/* 211/*
212 * Print online/offline testing statistics. 212 * Print online/offline testing statistics.
213 */ 213 */
214char *torture_onoff_stats(char *page) 214void torture_onoff_stats(void)
215{ 215{
216#ifdef CONFIG_HOTPLUG_CPU 216#ifdef CONFIG_HOTPLUG_CPU
217 page += sprintf(page, 217 pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
218 "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", 218 n_online_successes, n_online_attempts,
219 n_online_successes, n_online_attempts, 219 n_offline_successes, n_offline_attempts,
220 n_offline_successes, n_offline_attempts, 220 min_online, max_online,
221 min_online, max_online, 221 min_offline, max_offline,
222 min_offline, max_offline, 222 sum_online, sum_offline, HZ);
223 sum_online, sum_offline, HZ);
224#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 223#endif /* #ifdef CONFIG_HOTPLUG_CPU */
225 return page;
226} 224}
227EXPORT_SYMBOL_GPL(torture_onoff_stats); 225EXPORT_SYMBOL_GPL(torture_onoff_stats);
228 226
@@ -635,8 +633,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
635 * 633 *
636 * This must be called before the caller starts shutting down its own 634 * This must be called before the caller starts shutting down its own
637 * kthreads. 635 * kthreads.
636 *
637 * Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
638 * in order to correctly perform the cleanup. They are separated because
639 * threads can still need to reference the torture_type type, thus nullify
640 * only after completing all other relevant calls.
638 */ 641 */
639bool torture_cleanup(void) 642bool torture_cleanup_begin(void)
640{ 643{
641 mutex_lock(&fullstop_mutex); 644 mutex_lock(&fullstop_mutex);
642 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { 645 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
@@ -651,12 +654,17 @@ bool torture_cleanup(void)
651 torture_shuffle_cleanup(); 654 torture_shuffle_cleanup();
652 torture_stutter_cleanup(); 655 torture_stutter_cleanup();
653 torture_onoff_cleanup(); 656 torture_onoff_cleanup();
657 return false;
658}
659EXPORT_SYMBOL_GPL(torture_cleanup_begin);
660
661void torture_cleanup_end(void)
662{
654 mutex_lock(&fullstop_mutex); 663 mutex_lock(&fullstop_mutex);
655 torture_type = NULL; 664 torture_type = NULL;
656 mutex_unlock(&fullstop_mutex); 665 mutex_unlock(&fullstop_mutex);
657 return false;
658} 666}
659EXPORT_SYMBOL_GPL(torture_cleanup); 667EXPORT_SYMBOL_GPL(torture_cleanup_end);
660 668
661/* 669/*
662 * Is it time for the current torture test to stop? 670 * Is it time for the current torture test to stop?
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5dbe22aa3efd..09b685daee3d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2043,9 +2043,10 @@ __acquires(&pool->lock)
2043 * kernels, where a requeueing work item waiting for something to 2043 * kernels, where a requeueing work item waiting for something to
2044 * happen could deadlock with stop_machine as such work item could 2044 * happen could deadlock with stop_machine as such work item could
2045 * indefinitely requeue itself while all other CPUs are trapped in 2045 * indefinitely requeue itself while all other CPUs are trapped in
2046 * stop_machine. 2046 * stop_machine. At the same time, report a quiescent RCU state so
2047 * the same condition doesn't freeze RCU.
2047 */ 2048 */
2048 cond_resched(); 2049 cond_resched_rcu_qs();
2049 2050
2050 spin_lock_irq(&pool->lock); 2051 spin_lock_irq(&pool->lock);
2051 2052