aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_ops.txt2
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/memory-barriers.txt1
-rw-r--r--include/linux/rcupdate.h15
-rw-r--r--include/trace/events/rcu.h31
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/context_tracking.c75
-rw-r--r--kernel/rcu.h7
-rw-r--r--kernel/rcupdate.c60
-rw-r--r--kernel/rcutiny.c8
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c57
-rw-r--r--kernel/rcutree.c260
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h13
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--lib/Kconfig.debug117
17 files changed, 539 insertions, 189 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 27f2b21a9d5c..d9ca5be9b471 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -253,6 +253,8 @@ This performs an atomic exchange operation on the atomic variable v, setting
253the given new value. It returns the old value that the atomic variable v had 253the given new value. It returns the old value that the atomic variable v had
254just before the operation. 254just before the operation.
255 255
256atomic_xchg requires explicit memory barriers around the operation.
257
256 int atomic_cmpxchg(atomic_t *v, int old, int new); 258 int atomic_cmpxchg(atomic_t *v, int old, int new);
257 259
258This performs an atomic compare exchange operation on the atomic value v, 260This performs an atomic compare exchange operation on the atomic value v,
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 363e348bff9b..6c723811c0a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2438,7 +2438,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2438 real-time workloads. It can also improve energy 2438 real-time workloads. It can also improve energy
2439 efficiency for asymmetric multiprocessors. 2439 efficiency for asymmetric multiprocessors.
2440 2440
2441 rcu_nocbs_poll [KNL,BOOT] 2441 rcu_nocb_poll [KNL,BOOT]
2442 Rather than requiring that offloaded CPUs 2442 Rather than requiring that offloaded CPUs
2443 (specified by rcu_nocbs= above) explicitly 2443 (specified by rcu_nocbs= above) explicitly
2444 awaken the corresponding "rcuoN" kthreads, 2444 awaken the corresponding "rcuoN" kthreads,
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3c4e1b3b80a1..fa5d8a9ae205 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1685,6 +1685,7 @@ explicit lock operations, described later). These include:
1685 1685
1686 xchg(); 1686 xchg();
1687 cmpxchg(); 1687 cmpxchg();
1688 atomic_xchg();
1688 atomic_cmpxchg(); 1689 atomic_cmpxchg();
1689 atomic_inc_return(); 1690 atomic_inc_return();
1690 atomic_dec_return(); 1691 atomic_dec_return();
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..b758ce17b309 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
53extern void rcutorture_record_test_transition(void); 53extern void rcutorture_record_test_transition(void);
54extern void rcutorture_record_progress(unsigned long vernum); 54extern void rcutorture_record_progress(unsigned long vernum);
55extern void do_trace_rcu_torture_read(char *rcutorturename, 55extern void do_trace_rcu_torture_read(char *rcutorturename,
56 struct rcu_head *rhp); 56 struct rcu_head *rhp,
57 unsigned long secs,
58 unsigned long c_old,
59 unsigned long c);
57#else 60#else
58static inline void rcutorture_record_test_transition(void) 61static inline void rcutorture_record_test_transition(void)
59{ 62{
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum)
63} 66}
64#ifdef CONFIG_RCU_TRACE 67#ifdef CONFIG_RCU_TRACE
65extern void do_trace_rcu_torture_read(char *rcutorturename, 68extern void do_trace_rcu_torture_read(char *rcutorturename,
66 struct rcu_head *rhp); 69 struct rcu_head *rhp,
70 unsigned long secs,
71 unsigned long c_old,
72 unsigned long c);
67#else 73#else
68#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 74#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
75 do { } while (0)
69#endif 76#endif
70#endif 77#endif
71 78
@@ -749,7 +756,7 @@ static inline void rcu_preempt_sleep_check(void)
749 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) 756 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
750 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may 757 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
751 * be preempted, but explicit blocking is illegal. Finally, in preemptible 758 * be preempted, but explicit blocking is illegal. Finally, in preemptible
752 * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, 759 * RCU implementations in real-time (with -rt patchset) kernel builds,
753 * RCU read-side critical sections may be preempted and they may also 760 * RCU read-side critical sections may be preempted and they may also
754 * block, but only when acquiring spinlocks that are subject to priority 761 * block, but only when acquiring spinlocks that are subject to priority
755 * inheritance. 762 * inheritance.
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b1ec34..1918e832da4f 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization,
44 * of a new grace period or the end of an old grace period ("cpustart" 44 * of a new grace period or the end of an old grace period ("cpustart"
45 * and "cpuend", respectively), a CPU passing through a quiescent 45 * and "cpuend", respectively), a CPU passing through a quiescent
46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" 46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
47 * and "cpuofl", respectively), and a CPU being kicked for being too 47 * and "cpuofl", respectively), a CPU being kicked for being too
48 * long in dyntick-idle mode ("kick"). 48 * long in dyntick-idle mode ("kick"), a CPU accelerating its new
49 * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
50 * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
49 */ 51 */
50TRACE_EVENT(rcu_grace_period, 52TRACE_EVENT(rcu_grace_period,
51 53
@@ -393,7 +395,7 @@ TRACE_EVENT(rcu_kfree_callback,
393 */ 395 */
394TRACE_EVENT(rcu_batch_start, 396TRACE_EVENT(rcu_batch_start,
395 397
396 TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), 398 TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
397 399
398 TP_ARGS(rcuname, qlen_lazy, qlen, blimit), 400 TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
399 401
@@ -401,7 +403,7 @@ TRACE_EVENT(rcu_batch_start,
401 __field(char *, rcuname) 403 __field(char *, rcuname)
402 __field(long, qlen_lazy) 404 __field(long, qlen_lazy)
403 __field(long, qlen) 405 __field(long, qlen)
404 __field(int, blimit) 406 __field(long, blimit)
405 ), 407 ),
406 408
407 TP_fast_assign( 409 TP_fast_assign(
@@ -411,7 +413,7 @@ TRACE_EVENT(rcu_batch_start,
411 __entry->blimit = blimit; 413 __entry->blimit = blimit;
412 ), 414 ),
413 415
414 TP_printk("%s CBs=%ld/%ld bl=%d", 416 TP_printk("%s CBs=%ld/%ld bl=%ld",
415 __entry->rcuname, __entry->qlen_lazy, __entry->qlen, 417 __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
416 __entry->blimit) 418 __entry->blimit)
417); 419);
@@ -523,22 +525,30 @@ TRACE_EVENT(rcu_batch_end,
523 */ 525 */
524TRACE_EVENT(rcu_torture_read, 526TRACE_EVENT(rcu_torture_read,
525 527
526 TP_PROTO(char *rcutorturename, struct rcu_head *rhp), 528 TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
529 unsigned long secs, unsigned long c_old, unsigned long c),
527 530
528 TP_ARGS(rcutorturename, rhp), 531 TP_ARGS(rcutorturename, rhp, secs, c_old, c),
529 532
530 TP_STRUCT__entry( 533 TP_STRUCT__entry(
531 __field(char *, rcutorturename) 534 __field(char *, rcutorturename)
532 __field(struct rcu_head *, rhp) 535 __field(struct rcu_head *, rhp)
536 __field(unsigned long, secs)
537 __field(unsigned long, c_old)
538 __field(unsigned long, c)
533 ), 539 ),
534 540
535 TP_fast_assign( 541 TP_fast_assign(
536 __entry->rcutorturename = rcutorturename; 542 __entry->rcutorturename = rcutorturename;
537 __entry->rhp = rhp; 543 __entry->rhp = rhp;
544 __entry->secs = secs;
545 __entry->c_old = c_old;
546 __entry->c = c;
538 ), 547 ),
539 548
540 TP_printk("%s torture read %p", 549 TP_printk("%s torture read %p %luus c: %lu %lu",
541 __entry->rcutorturename, __entry->rhp) 550 __entry->rcutorturename, __entry->rhp,
551 __entry->secs, __entry->c_old, __entry->c)
542); 552);
543 553
544/* 554/*
@@ -608,7 +618,8 @@ TRACE_EVENT(rcu_barrier,
608#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) 618#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
609#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ 619#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
610 do { } while (0) 620 do { } while (0)
611#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 621#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
622 do { } while (0)
612#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) 623#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
613 624
614#endif /* #else #ifdef CONFIG_RCU_TRACE */ 625#endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/init/Kconfig b/init/Kconfig
index be8b7f55312d..b023334df142 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -453,7 +453,7 @@ config TREE_RCU
453 453
454config TREE_PREEMPT_RCU 454config TREE_PREEMPT_RCU
455 bool "Preemptible tree-based hierarchical RCU" 455 bool "Preemptible tree-based hierarchical RCU"
456 depends on PREEMPT && SMP 456 depends on PREEMPT
457 help 457 help
458 This option selects the RCU implementation that is 458 This option selects the RCU implementation that is
459 designed for very large SMP systems with hundreds or 459 designed for very large SMP systems with hundreds or
@@ -461,6 +461,8 @@ config TREE_PREEMPT_RCU
461 is also required. It also scales down nicely to 461 is also required. It also scales down nicely to
462 smaller systems. 462 smaller systems.
463 463
464 Select this option if you are unsure.
465
464config TINY_RCU 466config TINY_RCU
465 bool "UP-only small-memory-footprint RCU" 467 bool "UP-only small-memory-footprint RCU"
466 depends on !PREEMPT && !SMP 468 depends on !PREEMPT && !SMP
@@ -486,6 +488,14 @@ config PREEMPT_RCU
486 This option enables preemptible-RCU code that is common between 488 This option enables preemptible-RCU code that is common between
487 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. 489 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
488 490
491config RCU_STALL_COMMON
492 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
493 help
494 This option enables RCU CPU stall code that is common between
495 the TINY and TREE variants of RCU. The purpose is to allow
496 the tiny variants to disable RCU CPU stall warnings, while
497 making these warnings mandatory for the tree variants.
498
489config CONTEXT_TRACKING 499config CONTEXT_TRACKING
490 bool 500 bool
491 501
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..d566aba7e801 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,3 +1,19 @@
1/*
2 * Context tracking: Probe on high level context boundaries such as kernel
3 * and userspace. This includes syscalls and exceptions entry/exit.
4 *
5 * This is used by RCU to remove its dependency on the timer tick while a CPU
6 * runs in userspace.
7 *
8 * Started by Frederic Weisbecker:
9 *
10 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
11 *
12 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
13 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
14 *
15 */
16
1#include <linux/context_tracking.h> 17#include <linux/context_tracking.h>
2#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
3#include <linux/sched.h> 19#include <linux/sched.h>
@@ -6,8 +22,8 @@
6 22
7struct context_tracking { 23struct context_tracking {
8 /* 24 /*
9 * When active is false, hooks are not set to 25 * When active is false, probes are unset in order
10 * minimize overhead: TIF flags are cleared 26 * to minimize overhead: TIF flags are cleared
11 * and calls to user_enter/exit are ignored. This 27 * and calls to user_enter/exit are ignored. This
12 * may be further optimized using static keys. 28 * may be further optimized using static keys.
13 */ 29 */
@@ -24,6 +40,15 @@ static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
24#endif 40#endif
25}; 41};
26 42
43/**
44 * user_enter - Inform the context tracking that the CPU is going to
45 * enter userspace mode.
46 *
47 * This function must be called right before we switch from the kernel
48 * to userspace, when it's guaranteed the remaining kernel instructions
49 * to execute won't use any RCU read side critical section because this
50 * function sets RCU in extended quiescent state.
51 */
27void user_enter(void) 52void user_enter(void)
28{ 53{
29 unsigned long flags; 54 unsigned long flags;
@@ -39,40 +64,70 @@ void user_enter(void)
39 if (in_interrupt()) 64 if (in_interrupt())
40 return; 65 return;
41 66
67 /* Kernel threads aren't supposed to go to userspace */
42 WARN_ON_ONCE(!current->mm); 68 WARN_ON_ONCE(!current->mm);
43 69
44 local_irq_save(flags); 70 local_irq_save(flags);
45 if (__this_cpu_read(context_tracking.active) && 71 if (__this_cpu_read(context_tracking.active) &&
46 __this_cpu_read(context_tracking.state) != IN_USER) { 72 __this_cpu_read(context_tracking.state) != IN_USER) {
47 __this_cpu_write(context_tracking.state, IN_USER); 73 __this_cpu_write(context_tracking.state, IN_USER);
74 /*
75 * At this stage, only low level arch entry code remains and
76 * then we'll run in userspace. We can assume there won't be
77 * any RCU read-side critical section until the next call to
78 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
79 * on the tick.
80 */
48 rcu_user_enter(); 81 rcu_user_enter();
49 } 82 }
50 local_irq_restore(flags); 83 local_irq_restore(flags);
51} 84}
52 85
86
87/**
88 * user_exit - Inform the context tracking that the CPU is
89 * exiting userspace mode and entering the kernel.
90 *
91 * This function must be called after we entered the kernel from userspace
92 * before any use of RCU read side critical section. This potentially include
93 * any high level kernel code like syscalls, exceptions, signal handling, etc...
94 *
95 * This call supports re-entrancy. This way it can be called from any exception
96 * handler without needing to know if we came from userspace or not.
97 */
53void user_exit(void) 98void user_exit(void)
54{ 99{
55 unsigned long flags; 100 unsigned long flags;
56 101
57 /*
58 * Some contexts may involve an exception occuring in an irq,
59 * leading to that nesting:
60 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
61 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
62 * helpers are enough to protect RCU uses inside the exception. So
63 * just return immediately if we detect we are in an IRQ.
64 */
65 if (in_interrupt()) 102 if (in_interrupt())
66 return; 103 return;
67 104
68 local_irq_save(flags); 105 local_irq_save(flags);
69 if (__this_cpu_read(context_tracking.state) == IN_USER) { 106 if (__this_cpu_read(context_tracking.state) == IN_USER) {
70 __this_cpu_write(context_tracking.state, IN_KERNEL); 107 __this_cpu_write(context_tracking.state, IN_KERNEL);
108 /*
109 * We are going to run code that may use RCU. Inform
110 * RCU core about that (ie: we may need the tick again).
111 */
71 rcu_user_exit(); 112 rcu_user_exit();
72 } 113 }
73 local_irq_restore(flags); 114 local_irq_restore(flags);
74} 115}
75 116
117
118/**
119 * context_tracking_task_switch - context switch the syscall callbacks
120 * @prev: the task that is being switched out
121 * @next: the task that is being switched in
122 *
123 * The context tracking uses the syscall slow path to implement its user-kernel
124 * boundaries probes on syscalls. This way it doesn't impact the syscall fast
125 * path on CPUs that don't do context tracking.
126 *
127 * But we need to clear the flag on the previous task because it may later
128 * migrate to some CPU that doesn't do the context tracking. As such the TIF
129 * flag may not be desired there.
130 */
76void context_tracking_task_switch(struct task_struct *prev, 131void context_tracking_task_switch(struct task_struct *prev,
77 struct task_struct *next) 132 struct task_struct *next)
78{ 133{
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 20dfba576c2b..7f8e7590e3e5 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
111 111
112extern int rcu_expedited; 112extern int rcu_expedited;
113 113
114#ifdef CONFIG_RCU_STALL_COMMON
115
116extern int rcu_cpu_stall_suppress;
117int rcu_jiffies_till_stall_check(void);
118
119#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
120
114#endif /* __LINUX_RCU_H */ 121#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf76177b44..48ab70384a4c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,65 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
405 405
406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) 407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
408 unsigned long secs,
409 unsigned long c_old, unsigned long c)
408{ 410{
409 trace_rcu_torture_read(rcutorturename, rhp); 411 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
410} 412}
411EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 413EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
412#else 414#else
413#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 415#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
416 do { } while (0)
414#endif 417#endif
418
419#ifdef CONFIG_RCU_STALL_COMMON
420
421#ifdef CONFIG_PROVE_RCU
422#define RCU_STALL_DELAY_DELTA (5 * HZ)
423#else
424#define RCU_STALL_DELAY_DELTA 0
425#endif
426
427int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
428int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
429
430module_param(rcu_cpu_stall_suppress, int, 0644);
431module_param(rcu_cpu_stall_timeout, int, 0644);
432
433int rcu_jiffies_till_stall_check(void)
434{
435 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
436
437 /*
438 * Limit check must be consistent with the Kconfig limits
439 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
440 */
441 if (till_stall_check < 3) {
442 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
443 till_stall_check = 3;
444 } else if (till_stall_check > 300) {
445 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
446 till_stall_check = 300;
447 }
448 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
449}
450
451static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
452{
453 rcu_cpu_stall_suppress = 1;
454 return NOTIFY_DONE;
455}
456
457static struct notifier_block rcu_panic_block = {
458 .notifier_call = rcu_panic,
459};
460
461static int __init check_cpu_stall_init(void)
462{
463 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
464 return 0;
465}
466early_initcall(check_cpu_stall_init);
467
468#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e7dce58f9c2a..a0714a51b6d7 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
51 void (*func)(struct rcu_head *rcu), 51 void (*func)(struct rcu_head *rcu),
52 struct rcu_ctrlblk *rcp); 52 struct rcu_ctrlblk *rcp);
53 53
54#include "rcutiny_plugin.h"
55
56static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 54static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
57 55
56#include "rcutiny_plugin.h"
57
58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(long long newval) 59static void rcu_idle_enter_common(long long newval)
60{ 60{
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
193 * interrupts don't count, we must be running at the first interrupt 193 * interrupts don't count, we must be running at the first interrupt
194 * level. 194 * level.
195 */ 195 */
196int rcu_is_cpu_rrupt_from_idle(void) 196static int rcu_is_cpu_rrupt_from_idle(void)
197{ 197{
198 return rcu_dynticks_nesting <= 1; 198 return rcu_dynticks_nesting <= 1;
199} 199}
@@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
205 */ 205 */
206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
207{ 207{
208 reset_cpu_stall_ticks(rcp);
208 if (rcp->rcucblist != NULL && 209 if (rcp->rcucblist != NULL &&
209 rcp->donetail != rcp->curtail) { 210 rcp->donetail != rcp->curtail) {
210 rcp->donetail = rcp->curtail; 211 rcp->donetail = rcp->curtail;
@@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu)
251 */ 252 */
252void rcu_check_callbacks(int cpu, int user) 253void rcu_check_callbacks(int cpu, int user)
253{ 254{
255 check_cpu_stalls();
254 if (user || rcu_is_cpu_rrupt_from_idle()) 256 if (user || rcu_is_cpu_rrupt_from_idle())
255 rcu_sched_qs(cpu); 257 rcu_sched_qs(cpu);
256 else if (!in_softirq()) 258 else if (!in_softirq())
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index f85016a2309b..8a233002faeb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -33,6 +33,9 @@ struct rcu_ctrlblk {
33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
34 struct rcu_head **curtail; /* ->next pointer of last CB. */ 34 struct rcu_head **curtail; /* ->next pointer of last CB. */
35 RCU_TRACE(long qlen); /* Number of pending CBs. */ 35 RCU_TRACE(long qlen); /* Number of pending CBs. */
36 RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
37 RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
38 RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
36 RCU_TRACE(char *name); /* Name of RCU type. */ 39 RCU_TRACE(char *name); /* Name of RCU type. */
37}; 40};
38 41
@@ -54,6 +57,51 @@ int rcu_scheduler_active __read_mostly;
54EXPORT_SYMBOL_GPL(rcu_scheduler_active); 57EXPORT_SYMBOL_GPL(rcu_scheduler_active);
55#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
56 59
60#ifdef CONFIG_RCU_TRACE
61
62static void check_cpu_stall(struct rcu_ctrlblk *rcp)
63{
64 unsigned long j;
65 unsigned long js;
66
67 if (rcu_cpu_stall_suppress)
68 return;
69 rcp->ticks_this_gp++;
70 j = jiffies;
71 js = rcp->jiffies_stall;
72 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
73 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
74 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
75 jiffies - rcp->gp_start, rcp->qlen);
76 dump_stack();
77 }
78 if (*rcp->curtail && ULONG_CMP_GE(j, js))
79 rcp->jiffies_stall = jiffies +
80 3 * rcu_jiffies_till_stall_check() + 3;
81 else if (ULONG_CMP_GE(j, js))
82 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
83}
84
85static void check_cpu_stall_preempt(void);
86
87#endif /* #ifdef CONFIG_RCU_TRACE */
88
89static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
90{
91#ifdef CONFIG_RCU_TRACE
92 rcp->ticks_this_gp = 0;
93 rcp->gp_start = jiffies;
94 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
95#endif /* #ifdef CONFIG_RCU_TRACE */
96}
97
98static void check_cpu_stalls(void)
99{
100 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
101 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
102 RCU_TRACE(check_cpu_stall_preempt());
103}
104
57#ifdef CONFIG_TINY_PREEMPT_RCU 105#ifdef CONFIG_TINY_PREEMPT_RCU
58 106
59#include <linux/delay.h> 107#include <linux/delay.h>
@@ -448,6 +496,7 @@ static void rcu_preempt_start_gp(void)
448 /* Official start of GP. */ 496 /* Official start of GP. */
449 rcu_preempt_ctrlblk.gpnum++; 497 rcu_preempt_ctrlblk.gpnum++;
450 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); 498 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
499 reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
451 500
452 /* Any blocked RCU readers block new GP. */ 501 /* Any blocked RCU readers block new GP. */
453 if (rcu_preempt_blocked_readers_any()) 502 if (rcu_preempt_blocked_readers_any())
@@ -1054,4 +1103,11 @@ MODULE_AUTHOR("Paul E. McKenney");
1054MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); 1103MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1055MODULE_LICENSE("GPL"); 1104MODULE_LICENSE("GPL");
1056 1105
1106static void check_cpu_stall_preempt(void)
1107{
1108#ifdef CONFIG_TINY_PREEMPT_RCU
1109 check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
1110#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
1111}
1112
1057#endif /* #ifdef CONFIG_RCU_TRACE */ 1113#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01c85fd..cd4c35d097a4 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -46,6 +46,7 @@
46#include <linux/stat.h> 46#include <linux/stat.h>
47#include <linux/srcu.h> 47#include <linux/srcu.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/trace_clock.h>
49#include <asm/byteorder.h> 50#include <asm/byteorder.h>
50 51
51MODULE_LICENSE("GPL"); 52MODULE_LICENSE("GPL");
@@ -845,7 +846,7 @@ static int rcu_torture_boost(void *arg)
845 /* Wait for the next test interval. */ 846 /* Wait for the next test interval. */
846 oldstarttime = boost_starttime; 847 oldstarttime = boost_starttime;
847 while (ULONG_CMP_LT(jiffies, oldstarttime)) { 848 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
848 schedule_timeout_uninterruptible(1); 849 schedule_timeout_interruptible(oldstarttime - jiffies);
849 rcu_stutter_wait("rcu_torture_boost"); 850 rcu_stutter_wait("rcu_torture_boost");
850 if (kthread_should_stop() || 851 if (kthread_should_stop() ||
851 fullstop != FULLSTOP_DONTSTOP) 852 fullstop != FULLSTOP_DONTSTOP)
@@ -1028,7 +1029,6 @@ void rcutorture_trace_dump(void)
1028 return; 1029 return;
1029 if (atomic_xchg(&beenhere, 1) != 0) 1030 if (atomic_xchg(&beenhere, 1) != 0)
1030 return; 1031 return;
1031 do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
1032 ftrace_dump(DUMP_ALL); 1032 ftrace_dump(DUMP_ALL);
1033} 1033}
1034 1034
@@ -1042,13 +1042,16 @@ static void rcu_torture_timer(unsigned long unused)
1042{ 1042{
1043 int idx; 1043 int idx;
1044 int completed; 1044 int completed;
1045 int completed_end;
1045 static DEFINE_RCU_RANDOM(rand); 1046 static DEFINE_RCU_RANDOM(rand);
1046 static DEFINE_SPINLOCK(rand_lock); 1047 static DEFINE_SPINLOCK(rand_lock);
1047 struct rcu_torture *p; 1048 struct rcu_torture *p;
1048 int pipe_count; 1049 int pipe_count;
1050 unsigned long long ts;
1049 1051
1050 idx = cur_ops->readlock(); 1052 idx = cur_ops->readlock();
1051 completed = cur_ops->completed(); 1053 completed = cur_ops->completed();
1054 ts = trace_clock_local();
1052 p = rcu_dereference_check(rcu_torture_current, 1055 p = rcu_dereference_check(rcu_torture_current,
1053 rcu_read_lock_bh_held() || 1056 rcu_read_lock_bh_held() ||
1054 rcu_read_lock_sched_held() || 1057 rcu_read_lock_sched_held() ||
@@ -1058,7 +1061,6 @@ static void rcu_torture_timer(unsigned long unused)
1058 cur_ops->readunlock(idx); 1061 cur_ops->readunlock(idx);
1059 return; 1062 return;
1060 } 1063 }
1061 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1062 if (p->rtort_mbtest == 0) 1064 if (p->rtort_mbtest == 0)
1063 atomic_inc(&n_rcu_torture_mberror); 1065 atomic_inc(&n_rcu_torture_mberror);
1064 spin_lock(&rand_lock); 1066 spin_lock(&rand_lock);
@@ -1071,10 +1073,16 @@ static void rcu_torture_timer(unsigned long unused)
1071 /* Should not happen, but... */ 1073 /* Should not happen, but... */
1072 pipe_count = RCU_TORTURE_PIPE_LEN; 1074 pipe_count = RCU_TORTURE_PIPE_LEN;
1073 } 1075 }
1074 if (pipe_count > 1) 1076 completed_end = cur_ops->completed();
1077 if (pipe_count > 1) {
1078 unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
1079
1080 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
1081 completed, completed_end);
1075 rcutorture_trace_dump(); 1082 rcutorture_trace_dump();
1083 }
1076 __this_cpu_inc(rcu_torture_count[pipe_count]); 1084 __this_cpu_inc(rcu_torture_count[pipe_count]);
1077 completed = cur_ops->completed() - completed; 1085 completed = completed_end - completed;
1078 if (completed > RCU_TORTURE_PIPE_LEN) { 1086 if (completed > RCU_TORTURE_PIPE_LEN) {
1079 /* Should not happen, but... */ 1087 /* Should not happen, but... */
1080 completed = RCU_TORTURE_PIPE_LEN; 1088 completed = RCU_TORTURE_PIPE_LEN;
@@ -1094,11 +1102,13 @@ static int
1094rcu_torture_reader(void *arg) 1102rcu_torture_reader(void *arg)
1095{ 1103{
1096 int completed; 1104 int completed;
1105 int completed_end;
1097 int idx; 1106 int idx;
1098 DEFINE_RCU_RANDOM(rand); 1107 DEFINE_RCU_RANDOM(rand);
1099 struct rcu_torture *p; 1108 struct rcu_torture *p;
1100 int pipe_count; 1109 int pipe_count;
1101 struct timer_list t; 1110 struct timer_list t;
1111 unsigned long long ts;
1102 1112
1103 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 1113 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
1104 set_user_nice(current, 19); 1114 set_user_nice(current, 19);
@@ -1112,6 +1122,7 @@ rcu_torture_reader(void *arg)
1112 } 1122 }
1113 idx = cur_ops->readlock(); 1123 idx = cur_ops->readlock();
1114 completed = cur_ops->completed(); 1124 completed = cur_ops->completed();
1125 ts = trace_clock_local();
1115 p = rcu_dereference_check(rcu_torture_current, 1126 p = rcu_dereference_check(rcu_torture_current,
1116 rcu_read_lock_bh_held() || 1127 rcu_read_lock_bh_held() ||
1117 rcu_read_lock_sched_held() || 1128 rcu_read_lock_sched_held() ||
@@ -1122,7 +1133,6 @@ rcu_torture_reader(void *arg)
1122 schedule_timeout_interruptible(HZ); 1133 schedule_timeout_interruptible(HZ);
1123 continue; 1134 continue;
1124 } 1135 }
1125 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1126 if (p->rtort_mbtest == 0) 1136 if (p->rtort_mbtest == 0)
1127 atomic_inc(&n_rcu_torture_mberror); 1137 atomic_inc(&n_rcu_torture_mberror);
1128 cur_ops->read_delay(&rand); 1138 cur_ops->read_delay(&rand);
@@ -1132,10 +1142,17 @@ rcu_torture_reader(void *arg)
1132 /* Should not happen, but... */ 1142 /* Should not happen, but... */
1133 pipe_count = RCU_TORTURE_PIPE_LEN; 1143 pipe_count = RCU_TORTURE_PIPE_LEN;
1134 } 1144 }
1135 if (pipe_count > 1) 1145 completed_end = cur_ops->completed();
1146 if (pipe_count > 1) {
1147 unsigned long __maybe_unused ts_rem =
1148 do_div(ts, NSEC_PER_USEC);
1149
1150 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1151 ts, completed, completed_end);
1136 rcutorture_trace_dump(); 1152 rcutorture_trace_dump();
1153 }
1137 __this_cpu_inc(rcu_torture_count[pipe_count]); 1154 __this_cpu_inc(rcu_torture_count[pipe_count]);
1138 completed = cur_ops->completed() - completed; 1155 completed = completed_end - completed;
1139 if (completed > RCU_TORTURE_PIPE_LEN) { 1156 if (completed > RCU_TORTURE_PIPE_LEN) {
1140 /* Should not happen, but... */ 1157 /* Should not happen, but... */
1141 completed = RCU_TORTURE_PIPE_LEN; 1158 completed = RCU_TORTURE_PIPE_LEN;
@@ -1301,19 +1318,35 @@ static void rcu_torture_shuffle_tasks(void)
1301 set_cpus_allowed_ptr(reader_tasks[i], 1318 set_cpus_allowed_ptr(reader_tasks[i],
1302 shuffle_tmp_mask); 1319 shuffle_tmp_mask);
1303 } 1320 }
1304
1305 if (fakewriter_tasks) { 1321 if (fakewriter_tasks) {
1306 for (i = 0; i < nfakewriters; i++) 1322 for (i = 0; i < nfakewriters; i++)
1307 if (fakewriter_tasks[i]) 1323 if (fakewriter_tasks[i])
1308 set_cpus_allowed_ptr(fakewriter_tasks[i], 1324 set_cpus_allowed_ptr(fakewriter_tasks[i],
1309 shuffle_tmp_mask); 1325 shuffle_tmp_mask);
1310 } 1326 }
1311
1312 if (writer_task) 1327 if (writer_task)
1313 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); 1328 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
1314
1315 if (stats_task) 1329 if (stats_task)
1316 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); 1330 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
1331 if (stutter_task)
1332 set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
1333 if (fqs_task)
1334 set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
1335 if (shutdown_task)
1336 set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
1337#ifdef CONFIG_HOTPLUG_CPU
1338 if (onoff_task)
1339 set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
1340#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1341 if (stall_task)
1342 set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
1343 if (barrier_cbs_tasks)
1344 for (i = 0; i < n_barrier_cbs; i++)
1345 if (barrier_cbs_tasks[i])
1346 set_cpus_allowed_ptr(barrier_cbs_tasks[i],
1347 shuffle_tmp_mask);
1348 if (barrier_task)
1349 set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
1317 1350
1318 if (rcu_idle_cpu == -1) 1351 if (rcu_idle_cpu == -1)
1319 rcu_idle_cpu = num_online_cpus() - 1; 1352 rcu_idle_cpu = num_online_cpus() - 1;
@@ -1749,7 +1782,7 @@ static int rcu_torture_barrier_init(void)
1749 barrier_cbs_wq = 1782 barrier_cbs_wq =
1750 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), 1783 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1751 GFP_KERNEL); 1784 GFP_KERNEL);
1752 if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) 1785 if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
1753 return -ENOMEM; 1786 return -ENOMEM;
1754 for (i = 0; i < n_barrier_cbs; i++) { 1787 for (i = 0; i < n_barrier_cbs; i++) {
1755 init_waitqueue_head(&barrier_cbs_wq[i]); 1788 init_waitqueue_head(&barrier_cbs_wq[i]);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77b614e..5b8ad827fd86 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
105 * The rcu_scheduler_active variable transitions from zero to one just 105 * The rcu_scheduler_active variable transitions from zero to one just
106 * before the first task is spawned. So when this variable is zero, RCU 106 * before the first task is spawned. So when this variable is zero, RCU
107 * can assume that there is but one task, allowing RCU to (for example) 107 * can assume that there is but one task, allowing RCU to (for example)
108 * optimized synchronize_sched() to a simple barrier(). When this variable 108 * optimize synchronize_sched() to a simple barrier(). When this variable
109 * is one, RCU must actually do all the hard work required to detect real 109 * is one, RCU must actually do all the hard work required to detect real
110 * grace periods. This variable is also used to suppress boot-time false 110 * grace periods. This variable is also used to suppress boot-time false
111 * positives from lockdep-RCU error checking. 111 * positives from lockdep-RCU error checking.
@@ -217,12 +217,6 @@ module_param(blimit, long, 0444);
217module_param(qhimark, long, 0444); 217module_param(qhimark, long, 0444);
218module_param(qlowmark, long, 0444); 218module_param(qlowmark, long, 0444);
219 219
220int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
221int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
222
223module_param(rcu_cpu_stall_suppress, int, 0644);
224module_param(rcu_cpu_stall_timeout, int, 0644);
225
226static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 220static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
227static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 221static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
228 222
@@ -305,17 +299,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305} 299}
306 300
307/* 301/*
308 * Does the current CPU require a yet-as-unscheduled grace period? 302 * Does the current CPU require a not-yet-started grace period?
303 * The caller must have disabled interrupts to prevent races with
304 * normal callback registry.
309 */ 305 */
310static int 306static int
311cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 307cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
312{ 308{
313 struct rcu_head **ntp; 309 int i;
314 310
315 ntp = rdp->nxttail[RCU_DONE_TAIL + 311 if (rcu_gp_in_progress(rsp))
316 (ACCESS_ONCE(rsp->completed) != rdp->completed)]; 312 return 0; /* No, a grace period is already in progress. */
317 return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp && 313 if (!rdp->nxttail[RCU_NEXT_TAIL])
318 !rcu_gp_in_progress(rsp); 314 return 0; /* No, this is a no-CBs (or offline) CPU. */
315 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
316 return 1; /* Yes, this CPU has newly registered callbacks. */
317 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
318 if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
319 ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
320 rdp->nxtcompleted[i]))
321 return 1; /* Yes, CBs for future grace period. */
322 return 0; /* No grace period needed. */
319} 323}
320 324
321/* 325/*
@@ -336,7 +340,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
336static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 340static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
337 bool user) 341 bool user)
338{ 342{
339 trace_rcu_dyntick("Start", oldval, 0); 343 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
340 if (!user && !is_idle_task(current)) { 344 if (!user && !is_idle_task(current)) {
341 struct task_struct *idle = idle_task(smp_processor_id()); 345 struct task_struct *idle = idle_task(smp_processor_id());
342 346
@@ -727,7 +731,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
727 * interrupt from idle, return true. The caller must have at least 731 * interrupt from idle, return true. The caller must have at least
728 * disabled preemption. 732 * disabled preemption.
729 */ 733 */
730int rcu_is_cpu_rrupt_from_idle(void) 734static int rcu_is_cpu_rrupt_from_idle(void)
731{ 735{
732 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 736 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
733} 737}
@@ -793,28 +797,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
793 return 0; 797 return 0;
794} 798}
795 799
796static int jiffies_till_stall_check(void)
797{
798 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
799
800 /*
801 * Limit check must be consistent with the Kconfig limits
802 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
803 */
804 if (till_stall_check < 3) {
805 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
806 till_stall_check = 3;
807 } else if (till_stall_check > 300) {
808 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
809 till_stall_check = 300;
810 }
811 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
812}
813
814static void record_gp_stall_check_time(struct rcu_state *rsp) 800static void record_gp_stall_check_time(struct rcu_state *rsp)
815{ 801{
816 rsp->gp_start = jiffies; 802 rsp->gp_start = jiffies;
817 rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); 803 rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
818} 804}
819 805
820/* 806/*
@@ -857,7 +843,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
857 raw_spin_unlock_irqrestore(&rnp->lock, flags); 843 raw_spin_unlock_irqrestore(&rnp->lock, flags);
858 return; 844 return;
859 } 845 }
860 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; 846 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
861 raw_spin_unlock_irqrestore(&rnp->lock, flags); 847 raw_spin_unlock_irqrestore(&rnp->lock, flags);
862 848
863 /* 849 /*
@@ -935,7 +921,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
935 raw_spin_lock_irqsave(&rnp->lock, flags); 921 raw_spin_lock_irqsave(&rnp->lock, flags);
936 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 922 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
937 rsp->jiffies_stall = jiffies + 923 rsp->jiffies_stall = jiffies +
938 3 * jiffies_till_stall_check() + 3; 924 3 * rcu_jiffies_till_stall_check() + 3;
939 raw_spin_unlock_irqrestore(&rnp->lock, flags); 925 raw_spin_unlock_irqrestore(&rnp->lock, flags);
940 926
941 set_need_resched(); /* kick ourselves to get things going. */ 927 set_need_resched(); /* kick ourselves to get things going. */
@@ -966,12 +952,6 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
966 } 952 }
967} 953}
968 954
969static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
970{
971 rcu_cpu_stall_suppress = 1;
972 return NOTIFY_DONE;
973}
974
975/** 955/**
976 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 956 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
977 * 957 *
@@ -989,15 +969,6 @@ void rcu_cpu_stall_reset(void)
989 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 969 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
990} 970}
991 971
992static struct notifier_block rcu_panic_block = {
993 .notifier_call = rcu_panic,
994};
995
996static void __init check_cpu_stall_init(void)
997{
998 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
999}
1000
1001/* 972/*
1002 * Update CPU-local rcu_data state to record the newly noticed grace period. 973 * Update CPU-local rcu_data state to record the newly noticed grace period.
1003 * This is used both when we started the grace period and when we notice 974 * This is used both when we started the grace period and when we notice
@@ -1071,6 +1042,145 @@ static void init_callback_list(struct rcu_data *rdp)
1071} 1042}
1072 1043
1073/* 1044/*
1045 * Determine the value that ->completed will have at the end of the
1046 * next subsequent grace period. This is used to tag callbacks so that
1047 * a CPU can invoke callbacks in a timely fashion even if that CPU has
1048 * been dyntick-idle for an extended period with callbacks under the
1049 * influence of RCU_FAST_NO_HZ.
1050 *
1051 * The caller must hold rnp->lock with interrupts disabled.
1052 */
1053static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1054 struct rcu_node *rnp)
1055{
1056 /*
1057 * If RCU is idle, we just wait for the next grace period.
1058 * But we can only be sure that RCU is idle if we are looking
1059 * at the root rcu_node structure -- otherwise, a new grace
1060 * period might have started, but just not yet gotten around
1061 * to initializing the current non-root rcu_node structure.
1062 */
1063 if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
1064 return rnp->completed + 1;
1065
1066 /*
1067 * Otherwise, wait for a possible partial grace period and
1068 * then the subsequent full grace period.
1069 */
1070 return rnp->completed + 2;
1071}
1072
1073/*
1074 * If there is room, assign a ->completed number to any callbacks on
1075 * this CPU that have not already been assigned. Also accelerate any
1076 * callbacks that were previously assigned a ->completed number that has
1077 * since proven to be too conservative, which can happen if callbacks get
1078 * assigned a ->completed number while RCU is idle, but with reference to
1079 * a non-root rcu_node structure. This function is idempotent, so it does
1080 * not hurt to call it repeatedly.
1081 *
1082 * The caller must hold rnp->lock with interrupts disabled.
1083 */
1084static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1085 struct rcu_data *rdp)
1086{
1087 unsigned long c;
1088 int i;
1089
1090 /* If the CPU has no callbacks, nothing to do. */
1091 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1092 return;
1093
1094 /*
1095 * Starting from the sublist containing the callbacks most
1096 * recently assigned a ->completed number and working down, find the
1097 * first sublist that is not assignable to an upcoming grace period.
1098 * Such a sublist has something in it (first two tests) and has
1099 * a ->completed number assigned that will complete sooner than
1100 * the ->completed number for newly arrived callbacks (last test).
1101 *
1102 * The key point is that any later sublist can be assigned the
1103 * same ->completed number as the newly arrived callbacks, which
1104 * means that the callbacks in any of these later sublist can be
1105 * grouped into a single sublist, whether or not they have already
1106 * been assigned a ->completed number.
1107 */
1108 c = rcu_cbs_completed(rsp, rnp);
1109 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1110 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1111 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1112 break;
1113
1114 /*
1115 * If there are no sublist for unassigned callbacks, leave.
1116 * At the same time, advance "i" one sublist, so that "i" will
1117 * index into the sublist where all the remaining callbacks should
1118 * be grouped into.
1119 */
1120 if (++i >= RCU_NEXT_TAIL)
1121 return;
1122
1123 /*
1124 * Assign all subsequent callbacks' ->completed number to the next
1125 * full grace period and group them all in the sublist initially
1126 * indexed by "i".
1127 */
1128 for (; i <= RCU_NEXT_TAIL; i++) {
1129 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1130 rdp->nxtcompleted[i] = c;
1131 }
1132
1133 /* Trace depending on how much we were able to accelerate. */
1134 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1135 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
1136 else
1137 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
1138}
1139
1140/*
1141 * Move any callbacks whose grace period has completed to the
1142 * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1143 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1144 * sublist. This function is idempotent, so it does not hurt to
1145 * invoke it repeatedly. As long as it is not invoked -too- often...
1146 *
1147 * The caller must hold rnp->lock with interrupts disabled.
1148 */
1149static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1150 struct rcu_data *rdp)
1151{
1152 int i, j;
1153
1154 /* If the CPU has no callbacks, nothing to do. */
1155 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1156 return;
1157
1158 /*
1159 * Find all callbacks whose ->completed numbers indicate that they
1160 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1161 */
1162 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1163 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1164 break;
1165 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1166 }
1167 /* Clean up any sublist tail pointers that were misordered above. */
1168 for (j = RCU_WAIT_TAIL; j < i; j++)
1169 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1170
1171 /* Copy down callbacks to fill in empty sublists. */
1172 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1173 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1174 break;
1175 rdp->nxttail[j] = rdp->nxttail[i];
1176 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1177 }
1178
1179 /* Classify any remaining callbacks. */
1180 rcu_accelerate_cbs(rsp, rnp, rdp);
1181}
1182
1183/*
1074 * Advance this CPU's callbacks, but only if the current grace period 1184 * Advance this CPU's callbacks, but only if the current grace period
1075 * has ended. This may be called only from the CPU to whom the rdp 1185 * has ended. This may be called only from the CPU to whom the rdp
1076 * belongs. In addition, the corresponding leaf rcu_node structure's 1186 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1080,12 +1190,15 @@ static void
1080__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1190__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1081{ 1191{
1082 /* Did another grace period end? */ 1192 /* Did another grace period end? */
1083 if (rdp->completed != rnp->completed) { 1193 if (rdp->completed == rnp->completed) {
1084 1194
1085 /* Advance callbacks. No harm if list empty. */ 1195 /* No, so just accelerate recent callbacks. */
1086 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 1196 rcu_accelerate_cbs(rsp, rnp, rdp);
1087 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 1197
1088 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1198 } else {
1199
1200 /* Advance callbacks. */
1201 rcu_advance_cbs(rsp, rnp, rdp);
1089 1202
1090 /* Remember that we saw this grace-period completion. */ 1203 /* Remember that we saw this grace-period completion. */
1091 rdp->completed = rnp->completed; 1204 rdp->completed = rnp->completed;
@@ -1392,17 +1505,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1392 /* 1505 /*
1393 * Because there is no grace period in progress right now, 1506 * Because there is no grace period in progress right now,
1394 * any callbacks we have up to this point will be satisfied 1507 * any callbacks we have up to this point will be satisfied
1395 * by the next grace period. So promote all callbacks to be 1508 * by the next grace period. So this is a good place to
1396 * handled after the end of the next grace period. If the 1509 * assign a grace period number to recently posted callbacks.
1397 * CPU is not yet aware of the end of the previous grace period,
1398 * we need to allow for the callback advancement that will
1399 * occur when it does become aware. Deadlock prevents us from
1400 * making it aware at this point: We cannot acquire a leaf
1401 * rcu_node ->lock while holding the root rcu_node ->lock.
1402 */ 1510 */
1403 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1511 rcu_accelerate_cbs(rsp, rnp, rdp);
1404 if (rdp->completed == rsp->completed)
1405 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1406 1512
1407 rsp->gp_flags = RCU_GP_FLAG_INIT; 1513 rsp->gp_flags = RCU_GP_FLAG_INIT;
1408 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ 1514 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1527,7 +1633,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1527 * This GP can't end until cpu checks in, so all of our 1633 * This GP can't end until cpu checks in, so all of our
1528 * callbacks can be processed during the next GP. 1634 * callbacks can be processed during the next GP.
1529 */ 1635 */
1530 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1636 rcu_accelerate_cbs(rsp, rnp, rdp);
1531 1637
1532 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1638 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1533 } 1639 }
@@ -1779,7 +1885,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1779 long bl, count, count_lazy; 1885 long bl, count, count_lazy;
1780 int i; 1886 int i;
1781 1887
1782 /* If no callbacks are ready, just return.*/ 1888 /* If no callbacks are ready, just return. */
1783 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1889 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1784 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 1890 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1785 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1891 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2008,19 +2114,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2008 2114
2009 WARN_ON_ONCE(rdp->beenonline == 0); 2115 WARN_ON_ONCE(rdp->beenonline == 0);
2010 2116
2011 /* 2117 /* Handle the end of a grace period that some other CPU ended. */
2012 * Advance callbacks in response to end of earlier grace
2013 * period that some other CPU ended.
2014 */
2015 rcu_process_gp_end(rsp, rdp); 2118 rcu_process_gp_end(rsp, rdp);
2016 2119
2017 /* Update RCU state based on any recent quiescent states. */ 2120 /* Update RCU state based on any recent quiescent states. */
2018 rcu_check_quiescent_state(rsp, rdp); 2121 rcu_check_quiescent_state(rsp, rdp);
2019 2122
2020 /* Does this CPU require a not-yet-started grace period? */ 2123 /* Does this CPU require a not-yet-started grace period? */
2124 local_irq_save(flags);
2021 if (cpu_needs_another_gp(rsp, rdp)) { 2125 if (cpu_needs_another_gp(rsp, rdp)) {
2022 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 2126 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2023 rcu_start_gp(rsp, flags); /* releases above lock */ 2127 rcu_start_gp(rsp, flags); /* releases above lock */
2128 } else {
2129 local_irq_restore(flags);
2024 } 2130 }
2025 2131
2026 /* If there are callbacks ready, invoke them. */ 2132 /* If there are callbacks ready, invoke them. */
@@ -2719,9 +2825,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2719 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2825 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2720 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2826 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2721 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2827 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2722#ifdef CONFIG_RCU_USER_QS
2723 WARN_ON_ONCE(rdp->dynticks->in_user);
2724#endif
2725 rdp->cpu = cpu; 2828 rdp->cpu = cpu;
2726 rdp->rsp = rsp; 2829 rdp->rsp = rsp;
2727 rcu_boot_init_nocb_percpu_data(rdp); 2830 rcu_boot_init_nocb_percpu_data(rdp);
@@ -2938,6 +3041,10 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2938 3041
2939 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 3042 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2940 3043
3044 /* Silence gcc 4.8 warning about array index out of range. */
3045 if (rcu_num_lvls > RCU_NUM_LVLS)
3046 panic("rcu_init_one: rcu_num_lvls overflow");
3047
2941 /* Initialize the level-tracking arrays. */ 3048 /* Initialize the level-tracking arrays. */
2942 3049
2943 for (i = 0; i < rcu_num_lvls; i++) 3050 for (i = 0; i < rcu_num_lvls; i++)
@@ -3074,7 +3181,6 @@ void __init rcu_init(void)
3074 cpu_notifier(rcu_cpu_notify, 0); 3181 cpu_notifier(rcu_cpu_notify, 0);
3075 for_each_online_cpu(cpu) 3182 for_each_online_cpu(cpu)
3076 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 3183 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3077 check_cpu_stall_init();
3078} 3184}
3079 3185
3080#include "rcutree_plugin.h" 3186#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4b69291b093d..c896b5045d9d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -102,10 +102,6 @@ struct rcu_dynticks {
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105#ifdef CONFIG_RCU_USER_QS
106 bool ignore_user_qs; /* Treat userspace as extended QS or not */
107 bool in_user; /* Is the CPU in userland from RCU POV? */
108#endif
109}; 105};
110 106
111/* RCU's kthread states for tracing. */ 107/* RCU's kthread states for tracing. */
@@ -282,6 +278,8 @@ struct rcu_data {
282 */ 278 */
283 struct rcu_head *nxtlist; 279 struct rcu_head *nxtlist;
284 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 280 struct rcu_head **nxttail[RCU_NEXT_SIZE];
281 unsigned long nxtcompleted[RCU_NEXT_SIZE];
282 /* grace periods for sublists. */
285 long qlen_lazy; /* # of lazy queued callbacks */ 283 long qlen_lazy; /* # of lazy queued callbacks */
286 long qlen; /* # of queued callbacks, incl lazy */ 284 long qlen; /* # of queued callbacks, incl lazy */
287 long qlen_last_fqs_check; 285 long qlen_last_fqs_check;
@@ -343,11 +341,6 @@ struct rcu_data {
343 341
344#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 342#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
345 343
346#ifdef CONFIG_PROVE_RCU
347#define RCU_STALL_DELAY_DELTA (5 * HZ)
348#else
349#define RCU_STALL_DELAY_DELTA 0
350#endif
351#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 344#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
352 /* to take at least one */ 345 /* to take at least one */
353 /* scheduling clock irq */ 346 /* scheduling clock irq */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index f6e5ec2932b4..c1cc7e17ff9d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -40,8 +40,7 @@
40#ifdef CONFIG_RCU_NOCB_CPU 40#ifdef CONFIG_RCU_NOCB_CPU
41static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 41static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
42static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ 42static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
43static bool rcu_nocb_poll; /* Offload kthread are to poll. */ 43static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
44module_param(rcu_nocb_poll, bool, 0444);
45static char __initdata nocb_buf[NR_CPUS * 5]; 44static char __initdata nocb_buf[NR_CPUS * 5];
46#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 45#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
47 46
@@ -2159,6 +2158,13 @@ static int __init rcu_nocb_setup(char *str)
2159} 2158}
2160__setup("rcu_nocbs=", rcu_nocb_setup); 2159__setup("rcu_nocbs=", rcu_nocb_setup);
2161 2160
2161static int __init parse_rcu_nocb_poll(char *arg)
2162{
2163 rcu_nocb_poll = 1;
2164 return 0;
2165}
2166early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
2167
2162/* Is the specified CPU a no-CPUs CPU? */ 2168/* Is the specified CPU a no-CPUs CPU? */
2163static bool is_nocb_cpu(int cpu) 2169static bool is_nocb_cpu(int cpu)
2164{ 2170{
@@ -2366,10 +2372,11 @@ static int rcu_nocb_kthread(void *arg)
2366 for (;;) { 2372 for (;;) {
2367 /* If not polling, wait for next batch of callbacks. */ 2373 /* If not polling, wait for next batch of callbacks. */
2368 if (!rcu_nocb_poll) 2374 if (!rcu_nocb_poll)
2369 wait_event(rdp->nocb_wq, rdp->nocb_head); 2375 wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
2370 list = ACCESS_ONCE(rdp->nocb_head); 2376 list = ACCESS_ONCE(rdp->nocb_head);
2371 if (!list) { 2377 if (!list) {
2372 schedule_timeout_interruptible(1); 2378 schedule_timeout_interruptible(1);
2379 flush_signals(current);
2373 continue; 2380 continue;
2374 } 2381 }
2375 2382
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cbb..1bbb1b200cec 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -44,6 +44,7 @@ u64 notrace trace_clock_local(void)
44 44
45 return clock; 45 return clock;
46} 46}
47EXPORT_SYMBOL_GPL(trace_clock_local);
47 48
48/* 49/*
49 * trace_clock(): 'between' trace clock. Not completely serialized, 50 * trace_clock(): 'between' trace clock. Not completely serialized,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 67604e599384..a1714c897e3f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -605,61 +605,6 @@ config PROVE_LOCKING
605 605
606 For more details, see Documentation/lockdep-design.txt. 606 For more details, see Documentation/lockdep-design.txt.
607 607
608config PROVE_RCU
609 bool "RCU debugging: prove RCU correctness"
610 depends on PROVE_LOCKING
611 default n
612 help
613 This feature enables lockdep extensions that check for correct
614 use of RCU APIs. This is currently under development. Say Y
615 if you want to debug RCU usage or help work on the PROVE_RCU
616 feature.
617
618 Say N if you are unsure.
619
620config PROVE_RCU_REPEATEDLY
621 bool "RCU debugging: don't disable PROVE_RCU on first splat"
622 depends on PROVE_RCU
623 default n
624 help
625 By itself, PROVE_RCU will disable checking upon issuing the
626 first warning (or "splat"). This feature prevents such
627 disabling, allowing multiple RCU-lockdep warnings to be printed
628 on a single reboot.
629
630 Say Y to allow multiple RCU-lockdep warnings per boot.
631
632 Say N if you are unsure.
633
634config PROVE_RCU_DELAY
635 bool "RCU debugging: preemptible RCU race provocation"
636 depends on DEBUG_KERNEL && PREEMPT_RCU
637 default n
638 help
639 There is a class of races that involve an unlikely preemption
640 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
641 been set to INT_MIN. This feature inserts a delay at that
642 point to increase the probability of these races.
643
644 Say Y to increase probability of preemption of __rcu_read_unlock().
645
646 Say N if you are unsure.
647
648config SPARSE_RCU_POINTER
649 bool "RCU debugging: sparse-based checks for pointer usage"
650 default n
651 help
652 This feature enables the __rcu sparse annotation for
653 RCU-protected pointers. This annotation will cause sparse
654 to flag any non-RCU used of annotated pointers. This can be
655 helpful when debugging RCU usage. Please note that this feature
656 is not intended to enforce code cleanliness; it is instead merely
657 a debugging aid.
658
659 Say Y to make sparse flag questionable use of RCU-protected pointers
660
661 Say N if you are unsure.
662
663config LOCKDEP 608config LOCKDEP
664 bool 609 bool
665 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 610 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -937,6 +882,63 @@ config BOOT_PRINTK_DELAY
937 BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect 882 BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
938 what it believes to be lockup conditions. 883 what it believes to be lockup conditions.
939 884
885menu "RCU Debugging"
886
887config PROVE_RCU
888 bool "RCU debugging: prove RCU correctness"
889 depends on PROVE_LOCKING
890 default n
891 help
892 This feature enables lockdep extensions that check for correct
893 use of RCU APIs. This is currently under development. Say Y
894 if you want to debug RCU usage or help work on the PROVE_RCU
895 feature.
896
897 Say N if you are unsure.
898
899config PROVE_RCU_REPEATEDLY
900 bool "RCU debugging: don't disable PROVE_RCU on first splat"
901 depends on PROVE_RCU
902 default n
903 help
904 By itself, PROVE_RCU will disable checking upon issuing the
905 first warning (or "splat"). This feature prevents such
906 disabling, allowing multiple RCU-lockdep warnings to be printed
907 on a single reboot.
908
909 Say Y to allow multiple RCU-lockdep warnings per boot.
910
911 Say N if you are unsure.
912
913config PROVE_RCU_DELAY
914 bool "RCU debugging: preemptible RCU race provocation"
915 depends on DEBUG_KERNEL && PREEMPT_RCU
916 default n
917 help
918 There is a class of races that involve an unlikely preemption
919 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
920 been set to INT_MIN. This feature inserts a delay at that
921 point to increase the probability of these races.
922
923 Say Y to increase probability of preemption of __rcu_read_unlock().
924
925 Say N if you are unsure.
926
927config SPARSE_RCU_POINTER
928 bool "RCU debugging: sparse-based checks for pointer usage"
929 default n
930 help
931 This feature enables the __rcu sparse annotation for
932 RCU-protected pointers. This annotation will cause sparse
933 to flag any non-RCU used of annotated pointers. This can be
934 helpful when debugging RCU usage. Please note that this feature
935 is not intended to enforce code cleanliness; it is instead merely
936 a debugging aid.
937
938 Say Y to make sparse flag questionable use of RCU-protected pointers
939
940 Say N if you are unsure.
941
940config RCU_TORTURE_TEST 942config RCU_TORTURE_TEST
941 tristate "torture tests for RCU" 943 tristate "torture tests for RCU"
942 depends on DEBUG_KERNEL 944 depends on DEBUG_KERNEL
@@ -970,7 +972,7 @@ config RCU_TORTURE_TEST_RUNNABLE
970 972
971config RCU_CPU_STALL_TIMEOUT 973config RCU_CPU_STALL_TIMEOUT
972 int "RCU CPU stall timeout in seconds" 974 int "RCU CPU stall timeout in seconds"
973 depends on TREE_RCU || TREE_PREEMPT_RCU 975 depends on RCU_STALL_COMMON
974 range 3 300 976 range 3 300
975 default 21 977 default 21
976 help 978 help
@@ -1008,6 +1010,7 @@ config RCU_CPU_STALL_INFO
1008config RCU_TRACE 1010config RCU_TRACE
1009 bool "Enable tracing for RCU" 1011 bool "Enable tracing for RCU"
1010 depends on DEBUG_KERNEL 1012 depends on DEBUG_KERNEL
1013 select TRACE_CLOCK
1011 help 1014 help
1012 This option provides tracing in RCU which presents stats 1015 This option provides tracing in RCU which presents stats
1013 in debugfs for debugging RCU implementation. 1016 in debugfs for debugging RCU implementation.
@@ -1015,6 +1018,8 @@ config RCU_TRACE
1015 Say Y here if you want to enable RCU tracing 1018 Say Y here if you want to enable RCU tracing
1016 Say N if you are unsure. 1019 Say N if you are unsure.
1017 1020
1021endmenu # "RCU Debugging"
1022
1018config KPROBES_SANITY_TEST 1023config KPROBES_SANITY_TEST
1019 bool "Kprobes sanity tests" 1024 bool "Kprobes sanity tests"
1020 depends on DEBUG_KERNEL 1025 depends on DEBUG_KERNEL