aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/context_tracking.c75
-rw-r--r--kernel/rcu.h7
-rw-r--r--kernel/rcupdate.c60
-rw-r--r--kernel/rcutiny.c8
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c66
-rw-r--r--kernel/rcutree.c260
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/srcu.c37
-rw-r--r--kernel/trace/trace_clock.c1
10 files changed, 446 insertions, 135 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..d566aba7e801 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,3 +1,19 @@
1/*
2 * Context tracking: Probe on high level context boundaries such as kernel
3 * and userspace. This includes syscalls and exceptions entry/exit.
4 *
5 * This is used by RCU to remove its dependency on the timer tick while a CPU
6 * runs in userspace.
7 *
8 * Started by Frederic Weisbecker:
9 *
10 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
11 *
12 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
13 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
14 *
15 */
16
1#include <linux/context_tracking.h> 17#include <linux/context_tracking.h>
2#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
3#include <linux/sched.h> 19#include <linux/sched.h>
@@ -6,8 +22,8 @@
6 22
7struct context_tracking { 23struct context_tracking {
8 /* 24 /*
9 * When active is false, hooks are not set to 25 * When active is false, probes are unset in order
10 * minimize overhead: TIF flags are cleared 26 * to minimize overhead: TIF flags are cleared
11 * and calls to user_enter/exit are ignored. This 27 * and calls to user_enter/exit are ignored. This
12 * may be further optimized using static keys. 28 * may be further optimized using static keys.
13 */ 29 */
@@ -24,6 +40,15 @@ static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
24#endif 40#endif
25}; 41};
26 42
43/**
44 * user_enter - Inform the context tracking that the CPU is going to
45 * enter userspace mode.
46 *
47 * This function must be called right before we switch from the kernel
48 * to userspace, when it's guaranteed the remaining kernel instructions
49 * to execute won't use any RCU read side critical section because this
50 * function sets RCU in extended quiescent state.
51 */
27void user_enter(void) 52void user_enter(void)
28{ 53{
29 unsigned long flags; 54 unsigned long flags;
@@ -39,40 +64,70 @@ void user_enter(void)
39 if (in_interrupt()) 64 if (in_interrupt())
40 return; 65 return;
41 66
67 /* Kernel threads aren't supposed to go to userspace */
42 WARN_ON_ONCE(!current->mm); 68 WARN_ON_ONCE(!current->mm);
43 69
44 local_irq_save(flags); 70 local_irq_save(flags);
45 if (__this_cpu_read(context_tracking.active) && 71 if (__this_cpu_read(context_tracking.active) &&
46 __this_cpu_read(context_tracking.state) != IN_USER) { 72 __this_cpu_read(context_tracking.state) != IN_USER) {
47 __this_cpu_write(context_tracking.state, IN_USER); 73 __this_cpu_write(context_tracking.state, IN_USER);
74 /*
75 * At this stage, only low level arch entry code remains and
76 * then we'll run in userspace. We can assume there won't be
77 * any RCU read-side critical section until the next call to
78 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
79 * on the tick.
80 */
48 rcu_user_enter(); 81 rcu_user_enter();
49 } 82 }
50 local_irq_restore(flags); 83 local_irq_restore(flags);
51} 84}
52 85
86
87/**
88 * user_exit - Inform the context tracking that the CPU is
89 * exiting userspace mode and entering the kernel.
90 *
91 * This function must be called after we entered the kernel from userspace
92 * before any use of RCU read side critical section. This potentially include
93 * any high level kernel code like syscalls, exceptions, signal handling, etc...
94 *
95 * This call supports re-entrancy. This way it can be called from any exception
96 * handler without needing to know if we came from userspace or not.
97 */
53void user_exit(void) 98void user_exit(void)
54{ 99{
55 unsigned long flags; 100 unsigned long flags;
56 101
57 /*
58 * Some contexts may involve an exception occuring in an irq,
59 * leading to that nesting:
60 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
61 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
62 * helpers are enough to protect RCU uses inside the exception. So
63 * just return immediately if we detect we are in an IRQ.
64 */
65 if (in_interrupt()) 102 if (in_interrupt())
66 return; 103 return;
67 104
68 local_irq_save(flags); 105 local_irq_save(flags);
69 if (__this_cpu_read(context_tracking.state) == IN_USER) { 106 if (__this_cpu_read(context_tracking.state) == IN_USER) {
70 __this_cpu_write(context_tracking.state, IN_KERNEL); 107 __this_cpu_write(context_tracking.state, IN_KERNEL);
108 /*
109 * We are going to run code that may use RCU. Inform
110 * RCU core about that (ie: we may need the tick again).
111 */
71 rcu_user_exit(); 112 rcu_user_exit();
72 } 113 }
73 local_irq_restore(flags); 114 local_irq_restore(flags);
74} 115}
75 116
117
118/**
119 * context_tracking_task_switch - context switch the syscall callbacks
120 * @prev: the task that is being switched out
121 * @next: the task that is being switched in
122 *
123 * The context tracking uses the syscall slow path to implement its user-kernel
124 * boundaries probes on syscalls. This way it doesn't impact the syscall fast
125 * path on CPUs that don't do context tracking.
126 *
127 * But we need to clear the flag on the previous task because it may later
128 * migrate to some CPU that doesn't do the context tracking. As such the TIF
129 * flag may not be desired there.
130 */
76void context_tracking_task_switch(struct task_struct *prev, 131void context_tracking_task_switch(struct task_struct *prev,
77 struct task_struct *next) 132 struct task_struct *next)
78{ 133{
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 20dfba576c2b..7f8e7590e3e5 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
111 111
112extern int rcu_expedited; 112extern int rcu_expedited;
113 113
114#ifdef CONFIG_RCU_STALL_COMMON
115
116extern int rcu_cpu_stall_suppress;
117int rcu_jiffies_till_stall_check(void);
118
119#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
120
114#endif /* __LINUX_RCU_H */ 121#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf76177b44..48ab70384a4c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,65 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
405 405
406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) 407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
408 unsigned long secs,
409 unsigned long c_old, unsigned long c)
408{ 410{
409 trace_rcu_torture_read(rcutorturename, rhp); 411 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
410} 412}
411EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 413EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
412#else 414#else
413#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 415#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
416 do { } while (0)
414#endif 417#endif
418
419#ifdef CONFIG_RCU_STALL_COMMON
420
421#ifdef CONFIG_PROVE_RCU
422#define RCU_STALL_DELAY_DELTA (5 * HZ)
423#else
424#define RCU_STALL_DELAY_DELTA 0
425#endif
426
427int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
428int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
429
430module_param(rcu_cpu_stall_suppress, int, 0644);
431module_param(rcu_cpu_stall_timeout, int, 0644);
432
433int rcu_jiffies_till_stall_check(void)
434{
435 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
436
437 /*
438 * Limit check must be consistent with the Kconfig limits
439 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
440 */
441 if (till_stall_check < 3) {
442 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
443 till_stall_check = 3;
444 } else if (till_stall_check > 300) {
445 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
446 till_stall_check = 300;
447 }
448 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
449}
450
451static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
452{
453 rcu_cpu_stall_suppress = 1;
454 return NOTIFY_DONE;
455}
456
457static struct notifier_block rcu_panic_block = {
458 .notifier_call = rcu_panic,
459};
460
461static int __init check_cpu_stall_init(void)
462{
463 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
464 return 0;
465}
466early_initcall(check_cpu_stall_init);
467
468#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e7dce58f9c2a..a0714a51b6d7 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
51 void (*func)(struct rcu_head *rcu), 51 void (*func)(struct rcu_head *rcu),
52 struct rcu_ctrlblk *rcp); 52 struct rcu_ctrlblk *rcp);
53 53
54#include "rcutiny_plugin.h"
55
56static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 54static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
57 55
56#include "rcutiny_plugin.h"
57
58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(long long newval) 59static void rcu_idle_enter_common(long long newval)
60{ 60{
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
193 * interrupts don't count, we must be running at the first interrupt 193 * interrupts don't count, we must be running at the first interrupt
194 * level. 194 * level.
195 */ 195 */
196int rcu_is_cpu_rrupt_from_idle(void) 196static int rcu_is_cpu_rrupt_from_idle(void)
197{ 197{
198 return rcu_dynticks_nesting <= 1; 198 return rcu_dynticks_nesting <= 1;
199} 199}
@@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
205 */ 205 */
206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
207{ 207{
208 reset_cpu_stall_ticks(rcp);
208 if (rcp->rcucblist != NULL && 209 if (rcp->rcucblist != NULL &&
209 rcp->donetail != rcp->curtail) { 210 rcp->donetail != rcp->curtail) {
210 rcp->donetail = rcp->curtail; 211 rcp->donetail = rcp->curtail;
@@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu)
251 */ 252 */
252void rcu_check_callbacks(int cpu, int user) 253void rcu_check_callbacks(int cpu, int user)
253{ 254{
255 check_cpu_stalls();
254 if (user || rcu_is_cpu_rrupt_from_idle()) 256 if (user || rcu_is_cpu_rrupt_from_idle())
255 rcu_sched_qs(cpu); 257 rcu_sched_qs(cpu);
256 else if (!in_softirq()) 258 else if (!in_softirq())
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index f85016a2309b..8a233002faeb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -33,6 +33,9 @@ struct rcu_ctrlblk {
33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
34 struct rcu_head **curtail; /* ->next pointer of last CB. */ 34 struct rcu_head **curtail; /* ->next pointer of last CB. */
35 RCU_TRACE(long qlen); /* Number of pending CBs. */ 35 RCU_TRACE(long qlen); /* Number of pending CBs. */
36 RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
37 RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
38 RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
36 RCU_TRACE(char *name); /* Name of RCU type. */ 39 RCU_TRACE(char *name); /* Name of RCU type. */
37}; 40};
38 41
@@ -54,6 +57,51 @@ int rcu_scheduler_active __read_mostly;
54EXPORT_SYMBOL_GPL(rcu_scheduler_active); 57EXPORT_SYMBOL_GPL(rcu_scheduler_active);
55#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
56 59
60#ifdef CONFIG_RCU_TRACE
61
62static void check_cpu_stall(struct rcu_ctrlblk *rcp)
63{
64 unsigned long j;
65 unsigned long js;
66
67 if (rcu_cpu_stall_suppress)
68 return;
69 rcp->ticks_this_gp++;
70 j = jiffies;
71 js = rcp->jiffies_stall;
72 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
73 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
74 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
75 jiffies - rcp->gp_start, rcp->qlen);
76 dump_stack();
77 }
78 if (*rcp->curtail && ULONG_CMP_GE(j, js))
79 rcp->jiffies_stall = jiffies +
80 3 * rcu_jiffies_till_stall_check() + 3;
81 else if (ULONG_CMP_GE(j, js))
82 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
83}
84
85static void check_cpu_stall_preempt(void);
86
87#endif /* #ifdef CONFIG_RCU_TRACE */
88
89static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
90{
91#ifdef CONFIG_RCU_TRACE
92 rcp->ticks_this_gp = 0;
93 rcp->gp_start = jiffies;
94 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
95#endif /* #ifdef CONFIG_RCU_TRACE */
96}
97
98static void check_cpu_stalls(void)
99{
100 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
101 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
102 RCU_TRACE(check_cpu_stall_preempt());
103}
104
57#ifdef CONFIG_TINY_PREEMPT_RCU 105#ifdef CONFIG_TINY_PREEMPT_RCU
58 106
59#include <linux/delay.h> 107#include <linux/delay.h>
@@ -448,6 +496,7 @@ static void rcu_preempt_start_gp(void)
448 /* Official start of GP. */ 496 /* Official start of GP. */
449 rcu_preempt_ctrlblk.gpnum++; 497 rcu_preempt_ctrlblk.gpnum++;
450 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); 498 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
499 reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
451 500
452 /* Any blocked RCU readers block new GP. */ 501 /* Any blocked RCU readers block new GP. */
453 if (rcu_preempt_blocked_readers_any()) 502 if (rcu_preempt_blocked_readers_any())
@@ -1054,4 +1103,11 @@ MODULE_AUTHOR("Paul E. McKenney");
1054MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); 1103MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1055MODULE_LICENSE("GPL"); 1104MODULE_LICENSE("GPL");
1056 1105
1106static void check_cpu_stall_preempt(void)
1107{
1108#ifdef CONFIG_TINY_PREEMPT_RCU
1109 check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
1110#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
1111}
1112
1057#endif /* #ifdef CONFIG_RCU_TRACE */ 1113#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01c85fd..e1f3a8c96724 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -46,6 +46,7 @@
46#include <linux/stat.h> 46#include <linux/stat.h>
47#include <linux/srcu.h> 47#include <linux/srcu.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/trace_clock.h>
49#include <asm/byteorder.h> 50#include <asm/byteorder.h>
50 51
51MODULE_LICENSE("GPL"); 52MODULE_LICENSE("GPL");
@@ -207,6 +208,20 @@ MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
207#define rcu_can_boost() 0 208#define rcu_can_boost() 0
208#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ 209#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
209 210
211#ifdef CONFIG_RCU_TRACE
212static u64 notrace rcu_trace_clock_local(void)
213{
214 u64 ts = trace_clock_local();
215 unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
216 return ts;
217}
218#else /* #ifdef CONFIG_RCU_TRACE */
219static u64 notrace rcu_trace_clock_local(void)
220{
221 return 0ULL;
222}
223#endif /* #else #ifdef CONFIG_RCU_TRACE */
224
210static unsigned long shutdown_time; /* jiffies to system shutdown. */ 225static unsigned long shutdown_time; /* jiffies to system shutdown. */
211static unsigned long boost_starttime; /* jiffies of next boost test start. */ 226static unsigned long boost_starttime; /* jiffies of next boost test start. */
212DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 227DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
@@ -845,7 +860,7 @@ static int rcu_torture_boost(void *arg)
845 /* Wait for the next test interval. */ 860 /* Wait for the next test interval. */
846 oldstarttime = boost_starttime; 861 oldstarttime = boost_starttime;
847 while (ULONG_CMP_LT(jiffies, oldstarttime)) { 862 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
848 schedule_timeout_uninterruptible(1); 863 schedule_timeout_interruptible(oldstarttime - jiffies);
849 rcu_stutter_wait("rcu_torture_boost"); 864 rcu_stutter_wait("rcu_torture_boost");
850 if (kthread_should_stop() || 865 if (kthread_should_stop() ||
851 fullstop != FULLSTOP_DONTSTOP) 866 fullstop != FULLSTOP_DONTSTOP)
@@ -1028,7 +1043,6 @@ void rcutorture_trace_dump(void)
1028 return; 1043 return;
1029 if (atomic_xchg(&beenhere, 1) != 0) 1044 if (atomic_xchg(&beenhere, 1) != 0)
1030 return; 1045 return;
1031 do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
1032 ftrace_dump(DUMP_ALL); 1046 ftrace_dump(DUMP_ALL);
1033} 1047}
1034 1048
@@ -1042,13 +1056,16 @@ static void rcu_torture_timer(unsigned long unused)
1042{ 1056{
1043 int idx; 1057 int idx;
1044 int completed; 1058 int completed;
1059 int completed_end;
1045 static DEFINE_RCU_RANDOM(rand); 1060 static DEFINE_RCU_RANDOM(rand);
1046 static DEFINE_SPINLOCK(rand_lock); 1061 static DEFINE_SPINLOCK(rand_lock);
1047 struct rcu_torture *p; 1062 struct rcu_torture *p;
1048 int pipe_count; 1063 int pipe_count;
1064 unsigned long long ts;
1049 1065
1050 idx = cur_ops->readlock(); 1066 idx = cur_ops->readlock();
1051 completed = cur_ops->completed(); 1067 completed = cur_ops->completed();
1068 ts = rcu_trace_clock_local();
1052 p = rcu_dereference_check(rcu_torture_current, 1069 p = rcu_dereference_check(rcu_torture_current,
1053 rcu_read_lock_bh_held() || 1070 rcu_read_lock_bh_held() ||
1054 rcu_read_lock_sched_held() || 1071 rcu_read_lock_sched_held() ||
@@ -1058,7 +1075,6 @@ static void rcu_torture_timer(unsigned long unused)
1058 cur_ops->readunlock(idx); 1075 cur_ops->readunlock(idx);
1059 return; 1076 return;
1060 } 1077 }
1061 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1062 if (p->rtort_mbtest == 0) 1078 if (p->rtort_mbtest == 0)
1063 atomic_inc(&n_rcu_torture_mberror); 1079 atomic_inc(&n_rcu_torture_mberror);
1064 spin_lock(&rand_lock); 1080 spin_lock(&rand_lock);
@@ -1071,10 +1087,14 @@ static void rcu_torture_timer(unsigned long unused)
1071 /* Should not happen, but... */ 1087 /* Should not happen, but... */
1072 pipe_count = RCU_TORTURE_PIPE_LEN; 1088 pipe_count = RCU_TORTURE_PIPE_LEN;
1073 } 1089 }
1074 if (pipe_count > 1) 1090 completed_end = cur_ops->completed();
1091 if (pipe_count > 1) {
1092 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
1093 completed, completed_end);
1075 rcutorture_trace_dump(); 1094 rcutorture_trace_dump();
1095 }
1076 __this_cpu_inc(rcu_torture_count[pipe_count]); 1096 __this_cpu_inc(rcu_torture_count[pipe_count]);
1077 completed = cur_ops->completed() - completed; 1097 completed = completed_end - completed;
1078 if (completed > RCU_TORTURE_PIPE_LEN) { 1098 if (completed > RCU_TORTURE_PIPE_LEN) {
1079 /* Should not happen, but... */ 1099 /* Should not happen, but... */
1080 completed = RCU_TORTURE_PIPE_LEN; 1100 completed = RCU_TORTURE_PIPE_LEN;
@@ -1094,11 +1114,13 @@ static int
1094rcu_torture_reader(void *arg) 1114rcu_torture_reader(void *arg)
1095{ 1115{
1096 int completed; 1116 int completed;
1117 int completed_end;
1097 int idx; 1118 int idx;
1098 DEFINE_RCU_RANDOM(rand); 1119 DEFINE_RCU_RANDOM(rand);
1099 struct rcu_torture *p; 1120 struct rcu_torture *p;
1100 int pipe_count; 1121 int pipe_count;
1101 struct timer_list t; 1122 struct timer_list t;
1123 unsigned long long ts;
1102 1124
1103 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 1125 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
1104 set_user_nice(current, 19); 1126 set_user_nice(current, 19);
@@ -1112,6 +1134,7 @@ rcu_torture_reader(void *arg)
1112 } 1134 }
1113 idx = cur_ops->readlock(); 1135 idx = cur_ops->readlock();
1114 completed = cur_ops->completed(); 1136 completed = cur_ops->completed();
1137 ts = rcu_trace_clock_local();
1115 p = rcu_dereference_check(rcu_torture_current, 1138 p = rcu_dereference_check(rcu_torture_current,
1116 rcu_read_lock_bh_held() || 1139 rcu_read_lock_bh_held() ||
1117 rcu_read_lock_sched_held() || 1140 rcu_read_lock_sched_held() ||
@@ -1122,7 +1145,6 @@ rcu_torture_reader(void *arg)
1122 schedule_timeout_interruptible(HZ); 1145 schedule_timeout_interruptible(HZ);
1123 continue; 1146 continue;
1124 } 1147 }
1125 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1126 if (p->rtort_mbtest == 0) 1148 if (p->rtort_mbtest == 0)
1127 atomic_inc(&n_rcu_torture_mberror); 1149 atomic_inc(&n_rcu_torture_mberror);
1128 cur_ops->read_delay(&rand); 1150 cur_ops->read_delay(&rand);
@@ -1132,10 +1154,14 @@ rcu_torture_reader(void *arg)
1132 /* Should not happen, but... */ 1154 /* Should not happen, but... */
1133 pipe_count = RCU_TORTURE_PIPE_LEN; 1155 pipe_count = RCU_TORTURE_PIPE_LEN;
1134 } 1156 }
1135 if (pipe_count > 1) 1157 completed_end = cur_ops->completed();
1158 if (pipe_count > 1) {
1159 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1160 ts, completed, completed_end);
1136 rcutorture_trace_dump(); 1161 rcutorture_trace_dump();
1162 }
1137 __this_cpu_inc(rcu_torture_count[pipe_count]); 1163 __this_cpu_inc(rcu_torture_count[pipe_count]);
1138 completed = cur_ops->completed() - completed; 1164 completed = completed_end - completed;
1139 if (completed > RCU_TORTURE_PIPE_LEN) { 1165 if (completed > RCU_TORTURE_PIPE_LEN) {
1140 /* Should not happen, but... */ 1166 /* Should not happen, but... */
1141 completed = RCU_TORTURE_PIPE_LEN; 1167 completed = RCU_TORTURE_PIPE_LEN;
@@ -1301,19 +1327,35 @@ static void rcu_torture_shuffle_tasks(void)
1301 set_cpus_allowed_ptr(reader_tasks[i], 1327 set_cpus_allowed_ptr(reader_tasks[i],
1302 shuffle_tmp_mask); 1328 shuffle_tmp_mask);
1303 } 1329 }
1304
1305 if (fakewriter_tasks) { 1330 if (fakewriter_tasks) {
1306 for (i = 0; i < nfakewriters; i++) 1331 for (i = 0; i < nfakewriters; i++)
1307 if (fakewriter_tasks[i]) 1332 if (fakewriter_tasks[i])
1308 set_cpus_allowed_ptr(fakewriter_tasks[i], 1333 set_cpus_allowed_ptr(fakewriter_tasks[i],
1309 shuffle_tmp_mask); 1334 shuffle_tmp_mask);
1310 } 1335 }
1311
1312 if (writer_task) 1336 if (writer_task)
1313 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); 1337 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
1314
1315 if (stats_task) 1338 if (stats_task)
1316 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); 1339 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
1340 if (stutter_task)
1341 set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
1342 if (fqs_task)
1343 set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
1344 if (shutdown_task)
1345 set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
1346#ifdef CONFIG_HOTPLUG_CPU
1347 if (onoff_task)
1348 set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
1349#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1350 if (stall_task)
1351 set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
1352 if (barrier_cbs_tasks)
1353 for (i = 0; i < n_barrier_cbs; i++)
1354 if (barrier_cbs_tasks[i])
1355 set_cpus_allowed_ptr(barrier_cbs_tasks[i],
1356 shuffle_tmp_mask);
1357 if (barrier_task)
1358 set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
1317 1359
1318 if (rcu_idle_cpu == -1) 1360 if (rcu_idle_cpu == -1)
1319 rcu_idle_cpu = num_online_cpus() - 1; 1361 rcu_idle_cpu = num_online_cpus() - 1;
@@ -1749,7 +1791,7 @@ static int rcu_torture_barrier_init(void)
1749 barrier_cbs_wq = 1791 barrier_cbs_wq =
1750 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), 1792 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1751 GFP_KERNEL); 1793 GFP_KERNEL);
1752 if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) 1794 if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
1753 return -ENOMEM; 1795 return -ENOMEM;
1754 for (i = 0; i < n_barrier_cbs; i++) { 1796 for (i = 0; i < n_barrier_cbs; i++) {
1755 init_waitqueue_head(&barrier_cbs_wq[i]); 1797 init_waitqueue_head(&barrier_cbs_wq[i]);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77b614e..5b8ad827fd86 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
105 * The rcu_scheduler_active variable transitions from zero to one just 105 * The rcu_scheduler_active variable transitions from zero to one just
106 * before the first task is spawned. So when this variable is zero, RCU 106 * before the first task is spawned. So when this variable is zero, RCU
107 * can assume that there is but one task, allowing RCU to (for example) 107 * can assume that there is but one task, allowing RCU to (for example)
108 * optimized synchronize_sched() to a simple barrier(). When this variable 108 * optimize synchronize_sched() to a simple barrier(). When this variable
109 * is one, RCU must actually do all the hard work required to detect real 109 * is one, RCU must actually do all the hard work required to detect real
110 * grace periods. This variable is also used to suppress boot-time false 110 * grace periods. This variable is also used to suppress boot-time false
111 * positives from lockdep-RCU error checking. 111 * positives from lockdep-RCU error checking.
@@ -217,12 +217,6 @@ module_param(blimit, long, 0444);
217module_param(qhimark, long, 0444); 217module_param(qhimark, long, 0444);
218module_param(qlowmark, long, 0444); 218module_param(qlowmark, long, 0444);
219 219
220int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
221int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
222
223module_param(rcu_cpu_stall_suppress, int, 0644);
224module_param(rcu_cpu_stall_timeout, int, 0644);
225
226static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 220static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
227static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 221static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
228 222
@@ -305,17 +299,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305} 299}
306 300
307/* 301/*
308 * Does the current CPU require a yet-as-unscheduled grace period? 302 * Does the current CPU require a not-yet-started grace period?
303 * The caller must have disabled interrupts to prevent races with
304 * normal callback registry.
309 */ 305 */
310static int 306static int
311cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 307cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
312{ 308{
313 struct rcu_head **ntp; 309 int i;
314 310
315 ntp = rdp->nxttail[RCU_DONE_TAIL + 311 if (rcu_gp_in_progress(rsp))
316 (ACCESS_ONCE(rsp->completed) != rdp->completed)]; 312 return 0; /* No, a grace period is already in progress. */
317 return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp && 313 if (!rdp->nxttail[RCU_NEXT_TAIL])
318 !rcu_gp_in_progress(rsp); 314 return 0; /* No, this is a no-CBs (or offline) CPU. */
315 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
316 return 1; /* Yes, this CPU has newly registered callbacks. */
317 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
318 if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
319 ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
320 rdp->nxtcompleted[i]))
321 return 1; /* Yes, CBs for future grace period. */
322 return 0; /* No grace period needed. */
319} 323}
320 324
321/* 325/*
@@ -336,7 +340,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
336static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 340static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
337 bool user) 341 bool user)
338{ 342{
339 trace_rcu_dyntick("Start", oldval, 0); 343 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
340 if (!user && !is_idle_task(current)) { 344 if (!user && !is_idle_task(current)) {
341 struct task_struct *idle = idle_task(smp_processor_id()); 345 struct task_struct *idle = idle_task(smp_processor_id());
342 346
@@ -727,7 +731,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
727 * interrupt from idle, return true. The caller must have at least 731 * interrupt from idle, return true. The caller must have at least
728 * disabled preemption. 732 * disabled preemption.
729 */ 733 */
730int rcu_is_cpu_rrupt_from_idle(void) 734static int rcu_is_cpu_rrupt_from_idle(void)
731{ 735{
732 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 736 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
733} 737}
@@ -793,28 +797,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
793 return 0; 797 return 0;
794} 798}
795 799
796static int jiffies_till_stall_check(void)
797{
798 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
799
800 /*
801 * Limit check must be consistent with the Kconfig limits
802 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
803 */
804 if (till_stall_check < 3) {
805 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
806 till_stall_check = 3;
807 } else if (till_stall_check > 300) {
808 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
809 till_stall_check = 300;
810 }
811 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
812}
813
814static void record_gp_stall_check_time(struct rcu_state *rsp) 800static void record_gp_stall_check_time(struct rcu_state *rsp)
815{ 801{
816 rsp->gp_start = jiffies; 802 rsp->gp_start = jiffies;
817 rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); 803 rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
818} 804}
819 805
820/* 806/*
@@ -857,7 +843,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
857 raw_spin_unlock_irqrestore(&rnp->lock, flags); 843 raw_spin_unlock_irqrestore(&rnp->lock, flags);
858 return; 844 return;
859 } 845 }
860 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; 846 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
861 raw_spin_unlock_irqrestore(&rnp->lock, flags); 847 raw_spin_unlock_irqrestore(&rnp->lock, flags);
862 848
863 /* 849 /*
@@ -935,7 +921,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
935 raw_spin_lock_irqsave(&rnp->lock, flags); 921 raw_spin_lock_irqsave(&rnp->lock, flags);
936 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 922 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
937 rsp->jiffies_stall = jiffies + 923 rsp->jiffies_stall = jiffies +
938 3 * jiffies_till_stall_check() + 3; 924 3 * rcu_jiffies_till_stall_check() + 3;
939 raw_spin_unlock_irqrestore(&rnp->lock, flags); 925 raw_spin_unlock_irqrestore(&rnp->lock, flags);
940 926
941 set_need_resched(); /* kick ourselves to get things going. */ 927 set_need_resched(); /* kick ourselves to get things going. */
@@ -966,12 +952,6 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
966 } 952 }
967} 953}
968 954
969static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
970{
971 rcu_cpu_stall_suppress = 1;
972 return NOTIFY_DONE;
973}
974
975/** 955/**
976 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 956 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
977 * 957 *
@@ -989,15 +969,6 @@ void rcu_cpu_stall_reset(void)
989 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 969 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
990} 970}
991 971
992static struct notifier_block rcu_panic_block = {
993 .notifier_call = rcu_panic,
994};
995
996static void __init check_cpu_stall_init(void)
997{
998 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
999}
1000
1001/* 972/*
1002 * Update CPU-local rcu_data state to record the newly noticed grace period. 973 * Update CPU-local rcu_data state to record the newly noticed grace period.
1003 * This is used both when we started the grace period and when we notice 974 * This is used both when we started the grace period and when we notice
@@ -1071,6 +1042,145 @@ static void init_callback_list(struct rcu_data *rdp)
1071} 1042}
1072 1043
1073/* 1044/*
1045 * Determine the value that ->completed will have at the end of the
1046 * next subsequent grace period. This is used to tag callbacks so that
1047 * a CPU can invoke callbacks in a timely fashion even if that CPU has
1048 * been dyntick-idle for an extended period with callbacks under the
1049 * influence of RCU_FAST_NO_HZ.
1050 *
1051 * The caller must hold rnp->lock with interrupts disabled.
1052 */
1053static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1054 struct rcu_node *rnp)
1055{
1056 /*
1057 * If RCU is idle, we just wait for the next grace period.
1058 * But we can only be sure that RCU is idle if we are looking
1059 * at the root rcu_node structure -- otherwise, a new grace
1060 * period might have started, but just not yet gotten around
1061 * to initializing the current non-root rcu_node structure.
1062 */
1063 if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
1064 return rnp->completed + 1;
1065
1066 /*
1067 * Otherwise, wait for a possible partial grace period and
1068 * then the subsequent full grace period.
1069 */
1070 return rnp->completed + 2;
1071}
1072
1073/*
1074 * If there is room, assign a ->completed number to any callbacks on
1075 * this CPU that have not already been assigned. Also accelerate any
1076 * callbacks that were previously assigned a ->completed number that has
1077 * since proven to be too conservative, which can happen if callbacks get
1078 * assigned a ->completed number while RCU is idle, but with reference to
1079 * a non-root rcu_node structure. This function is idempotent, so it does
1080 * not hurt to call it repeatedly.
1081 *
1082 * The caller must hold rnp->lock with interrupts disabled.
1083 */
1084static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1085 struct rcu_data *rdp)
1086{
1087 unsigned long c;
1088 int i;
1089
1090 /* If the CPU has no callbacks, nothing to do. */
1091 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1092 return;
1093
1094 /*
1095 * Starting from the sublist containing the callbacks most
1096 * recently assigned a ->completed number and working down, find the
1097 * first sublist that is not assignable to an upcoming grace period.
1098 * Such a sublist has something in it (first two tests) and has
1099 * a ->completed number assigned that will complete sooner than
1100 * the ->completed number for newly arrived callbacks (last test).
1101 *
1102 * The key point is that any later sublist can be assigned the
1103 * same ->completed number as the newly arrived callbacks, which
1104 * means that the callbacks in any of these later sublist can be
1105 * grouped into a single sublist, whether or not they have already
1106 * been assigned a ->completed number.
1107 */
1108 c = rcu_cbs_completed(rsp, rnp);
1109 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1110 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1111 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1112 break;
1113
1114 /*
1115 * If there are no sublist for unassigned callbacks, leave.
1116 * At the same time, advance "i" one sublist, so that "i" will
1117 * index into the sublist where all the remaining callbacks should
1118 * be grouped into.
1119 */
1120 if (++i >= RCU_NEXT_TAIL)
1121 return;
1122
1123 /*
1124 * Assign all subsequent callbacks' ->completed number to the next
1125 * full grace period and group them all in the sublist initially
1126 * indexed by "i".
1127 */
1128 for (; i <= RCU_NEXT_TAIL; i++) {
1129 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1130 rdp->nxtcompleted[i] = c;
1131 }
1132
1133 /* Trace depending on how much we were able to accelerate. */
1134 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1135 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
1136 else
1137 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
1138}
1139
1140/*
1141 * Move any callbacks whose grace period has completed to the
1142 * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1143 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1144 * sublist. This function is idempotent, so it does not hurt to
1145 * invoke it repeatedly. As long as it is not invoked -too- often...
1146 *
1147 * The caller must hold rnp->lock with interrupts disabled.
1148 */
1149static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1150 struct rcu_data *rdp)
1151{
1152 int i, j;
1153
1154 /* If the CPU has no callbacks, nothing to do. */
1155 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1156 return;
1157
1158 /*
1159 * Find all callbacks whose ->completed numbers indicate that they
1160 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1161 */
1162 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1163 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1164 break;
1165 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1166 }
1167 /* Clean up any sublist tail pointers that were misordered above. */
1168 for (j = RCU_WAIT_TAIL; j < i; j++)
1169 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1170
1171 /* Copy down callbacks to fill in empty sublists. */
1172 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1173 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1174 break;
1175 rdp->nxttail[j] = rdp->nxttail[i];
1176 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1177 }
1178
1179 /* Classify any remaining callbacks. */
1180 rcu_accelerate_cbs(rsp, rnp, rdp);
1181}
1182
1183/*
1074 * Advance this CPU's callbacks, but only if the current grace period 1184 * Advance this CPU's callbacks, but only if the current grace period
1075 * has ended. This may be called only from the CPU to whom the rdp 1185 * has ended. This may be called only from the CPU to whom the rdp
1076 * belongs. In addition, the corresponding leaf rcu_node structure's 1186 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1080,12 +1190,15 @@ static void
1080__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1190__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1081{ 1191{
1082 /* Did another grace period end? */ 1192 /* Did another grace period end? */
1083 if (rdp->completed != rnp->completed) { 1193 if (rdp->completed == rnp->completed) {
1084 1194
1085 /* Advance callbacks. No harm if list empty. */ 1195 /* No, so just accelerate recent callbacks. */
1086 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 1196 rcu_accelerate_cbs(rsp, rnp, rdp);
1087 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 1197
1088 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1198 } else {
1199
1200 /* Advance callbacks. */
1201 rcu_advance_cbs(rsp, rnp, rdp);
1089 1202
1090 /* Remember that we saw this grace-period completion. */ 1203 /* Remember that we saw this grace-period completion. */
1091 rdp->completed = rnp->completed; 1204 rdp->completed = rnp->completed;
@@ -1392,17 +1505,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1392 /* 1505 /*
1393 * Because there is no grace period in progress right now, 1506 * Because there is no grace period in progress right now,
1394 * any callbacks we have up to this point will be satisfied 1507 * any callbacks we have up to this point will be satisfied
1395 * by the next grace period. So promote all callbacks to be 1508 * by the next grace period. So this is a good place to
1396 * handled after the end of the next grace period. If the 1509 * assign a grace period number to recently posted callbacks.
1397 * CPU is not yet aware of the end of the previous grace period,
1398 * we need to allow for the callback advancement that will
1399 * occur when it does become aware. Deadlock prevents us from
1400 * making it aware at this point: We cannot acquire a leaf
1401 * rcu_node ->lock while holding the root rcu_node ->lock.
1402 */ 1510 */
1403 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1511 rcu_accelerate_cbs(rsp, rnp, rdp);
1404 if (rdp->completed == rsp->completed)
1405 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1406 1512
1407 rsp->gp_flags = RCU_GP_FLAG_INIT; 1513 rsp->gp_flags = RCU_GP_FLAG_INIT;
1408 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ 1514 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1527,7 +1633,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1527 * This GP can't end until cpu checks in, so all of our 1633 * This GP can't end until cpu checks in, so all of our
1528 * callbacks can be processed during the next GP. 1634 * callbacks can be processed during the next GP.
1529 */ 1635 */
1530 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1636 rcu_accelerate_cbs(rsp, rnp, rdp);
1531 1637
1532 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1638 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1533 } 1639 }
@@ -1779,7 +1885,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1779 long bl, count, count_lazy; 1885 long bl, count, count_lazy;
1780 int i; 1886 int i;
1781 1887
1782 /* If no callbacks are ready, just return.*/ 1888 /* If no callbacks are ready, just return. */
1783 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1889 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1784 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 1890 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1785 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1891 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2008,19 +2114,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2008 2114
2009 WARN_ON_ONCE(rdp->beenonline == 0); 2115 WARN_ON_ONCE(rdp->beenonline == 0);
2010 2116
2011 /* 2117 /* Handle the end of a grace period that some other CPU ended. */
2012 * Advance callbacks in response to end of earlier grace
2013 * period that some other CPU ended.
2014 */
2015 rcu_process_gp_end(rsp, rdp); 2118 rcu_process_gp_end(rsp, rdp);
2016 2119
2017 /* Update RCU state based on any recent quiescent states. */ 2120 /* Update RCU state based on any recent quiescent states. */
2018 rcu_check_quiescent_state(rsp, rdp); 2121 rcu_check_quiescent_state(rsp, rdp);
2019 2122
2020 /* Does this CPU require a not-yet-started grace period? */ 2123 /* Does this CPU require a not-yet-started grace period? */
2124 local_irq_save(flags);
2021 if (cpu_needs_another_gp(rsp, rdp)) { 2125 if (cpu_needs_another_gp(rsp, rdp)) {
2022 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 2126 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2023 rcu_start_gp(rsp, flags); /* releases above lock */ 2127 rcu_start_gp(rsp, flags); /* releases above lock */
2128 } else {
2129 local_irq_restore(flags);
2024 } 2130 }
2025 2131
2026 /* If there are callbacks ready, invoke them. */ 2132 /* If there are callbacks ready, invoke them. */
@@ -2719,9 +2825,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2719 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2825 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2720 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2826 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2721 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2827 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2722#ifdef CONFIG_RCU_USER_QS
2723 WARN_ON_ONCE(rdp->dynticks->in_user);
2724#endif
2725 rdp->cpu = cpu; 2828 rdp->cpu = cpu;
2726 rdp->rsp = rsp; 2829 rdp->rsp = rsp;
2727 rcu_boot_init_nocb_percpu_data(rdp); 2830 rcu_boot_init_nocb_percpu_data(rdp);
@@ -2938,6 +3041,10 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2938 3041
2939 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 3042 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2940 3043
3044 /* Silence gcc 4.8 warning about array index out of range. */
3045 if (rcu_num_lvls > RCU_NUM_LVLS)
3046 panic("rcu_init_one: rcu_num_lvls overflow");
3047
2941 /* Initialize the level-tracking arrays. */ 3048 /* Initialize the level-tracking arrays. */
2942 3049
2943 for (i = 0; i < rcu_num_lvls; i++) 3050 for (i = 0; i < rcu_num_lvls; i++)
@@ -3074,7 +3181,6 @@ void __init rcu_init(void)
3074 cpu_notifier(rcu_cpu_notify, 0); 3181 cpu_notifier(rcu_cpu_notify, 0);
3075 for_each_online_cpu(cpu) 3182 for_each_online_cpu(cpu)
3076 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 3183 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3077 check_cpu_stall_init();
3078} 3184}
3079 3185
3080#include "rcutree_plugin.h" 3186#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4b69291b093d..c896b5045d9d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -102,10 +102,6 @@ struct rcu_dynticks {
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105#ifdef CONFIG_RCU_USER_QS
106 bool ignore_user_qs; /* Treat userspace as extended QS or not */
107 bool in_user; /* Is the CPU in userland from RCU POV? */
108#endif
109}; 105};
110 106
111/* RCU's kthread states for tracing. */ 107/* RCU's kthread states for tracing. */
@@ -282,6 +278,8 @@ struct rcu_data {
282 */ 278 */
283 struct rcu_head *nxtlist; 279 struct rcu_head *nxtlist;
284 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 280 struct rcu_head **nxttail[RCU_NEXT_SIZE];
281 unsigned long nxtcompleted[RCU_NEXT_SIZE];
282 /* grace periods for sublists. */
285 long qlen_lazy; /* # of lazy queued callbacks */ 283 long qlen_lazy; /* # of lazy queued callbacks */
286 long qlen; /* # of queued callbacks, incl lazy */ 284 long qlen; /* # of queued callbacks, incl lazy */
287 long qlen_last_fqs_check; 285 long qlen_last_fqs_check;
@@ -343,11 +341,6 @@ struct rcu_data {
343 341
344#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 342#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
345 343
346#ifdef CONFIG_PROVE_RCU
347#define RCU_STALL_DELAY_DELTA (5 * HZ)
348#else
349#define RCU_STALL_DELAY_DELTA 0
350#endif
351#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 344#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
352 /* to take at least one */ 345 /* to take at least one */
353 /* scheduling clock irq */ 346 /* scheduling clock irq */
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 2b859828cdc3..01d5ccb8bfe3 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -282,12 +282,8 @@ static int srcu_readers_active(struct srcu_struct *sp)
282 */ 282 */
283void cleanup_srcu_struct(struct srcu_struct *sp) 283void cleanup_srcu_struct(struct srcu_struct *sp)
284{ 284{
285 int sum; 285 if (WARN_ON(srcu_readers_active(sp)))
286 286 return; /* Leakage unless caller handles error. */
287 sum = srcu_readers_active(sp);
288 WARN_ON(sum); /* Leakage unless caller handles error. */
289 if (sum != 0)
290 return;
291 free_percpu(sp->per_cpu_ref); 287 free_percpu(sp->per_cpu_ref);
292 sp->per_cpu_ref = NULL; 288 sp->per_cpu_ref = NULL;
293} 289}
@@ -302,9 +298,8 @@ int __srcu_read_lock(struct srcu_struct *sp)
302{ 298{
303 int idx; 299 int idx;
304 300
301 idx = ACCESS_ONCE(sp->completed) & 0x1;
305 preempt_disable(); 302 preempt_disable();
306 idx = rcu_dereference_index_check(sp->completed,
307 rcu_read_lock_sched_held()) & 0x1;
308 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; 303 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
309 smp_mb(); /* B */ /* Avoid leaking the critical section. */ 304 smp_mb(); /* B */ /* Avoid leaking the critical section. */
310 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; 305 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
@@ -321,10 +316,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
321 */ 316 */
322void __srcu_read_unlock(struct srcu_struct *sp, int idx) 317void __srcu_read_unlock(struct srcu_struct *sp, int idx)
323{ 318{
324 preempt_disable();
325 smp_mb(); /* C */ /* Avoid leaking the critical section. */ 319 smp_mb(); /* C */ /* Avoid leaking the critical section. */
326 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; 320 this_cpu_dec(sp->per_cpu_ref->c[idx]);
327 preempt_enable();
328} 321}
329EXPORT_SYMBOL_GPL(__srcu_read_unlock); 322EXPORT_SYMBOL_GPL(__srcu_read_unlock);
330 323
@@ -423,6 +416,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
423 !lock_is_held(&rcu_sched_lock_map), 416 !lock_is_held(&rcu_sched_lock_map),
424 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); 417 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
425 418
419 might_sleep();
426 init_completion(&rcu.completion); 420 init_completion(&rcu.completion);
427 421
428 head->next = NULL; 422 head->next = NULL;
@@ -455,10 +449,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
455 * synchronize_srcu - wait for prior SRCU read-side critical-section completion 449 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
456 * @sp: srcu_struct with which to synchronize. 450 * @sp: srcu_struct with which to synchronize.
457 * 451 *
458 * Flip the completed counter, and wait for the old count to drain to zero. 452 * Wait for the count to drain to zero of both indexes. To avoid the
459 * As with classic RCU, the updater must use some separate means of 453 * possible starvation of synchronize_srcu(), it waits for the count of
460 * synchronizing concurrent updates. Can block; must be called from 454 * the index=((->completed & 1) ^ 1) to drain to zero at first,
461 * process context. 455 * and then flip the completed and wait for the count of the other index.
456 *
457 * Can block; must be called from process context.
462 * 458 *
463 * Note that it is illegal to call synchronize_srcu() from the corresponding 459 * Note that it is illegal to call synchronize_srcu() from the corresponding
464 * SRCU read-side critical section; doing so will result in deadlock. 460 * SRCU read-side critical section; doing so will result in deadlock.
@@ -480,12 +476,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
480 * Wait for an SRCU grace period to elapse, but be more aggressive about 476 * Wait for an SRCU grace period to elapse, but be more aggressive about
481 * spinning rather than blocking when waiting. 477 * spinning rather than blocking when waiting.
482 * 478 *
483 * Note that it is illegal to call this function while holding any lock 479 * Note that it is also illegal to call synchronize_srcu_expedited()
484 * that is acquired by a CPU-hotplug notifier. It is also illegal to call 480 * from the corresponding SRCU read-side critical section;
485 * synchronize_srcu_expedited() from the corresponding SRCU read-side 481 * doing so will result in deadlock. However, it is perfectly legal
486 * critical section; doing so will result in deadlock. However, it is 482 * to call synchronize_srcu_expedited() on one srcu_struct from some
487 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct 483 * other srcu_struct's read-side critical section, as long as
488 * from some other srcu_struct's read-side critical section, as long as
489 * the resulting graph of srcu_structs is acyclic. 484 * the resulting graph of srcu_structs is acyclic.
490 */ 485 */
491void synchronize_srcu_expedited(struct srcu_struct *sp) 486void synchronize_srcu_expedited(struct srcu_struct *sp)
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cbb..1bbb1b200cec 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -44,6 +44,7 @@ u64 notrace trace_clock_local(void)
44 44
45 return clock; 45 return clock;
46} 46}
47EXPORT_SYMBOL_GPL(trace_clock_local);
47 48
48/* 49/*
49 * trace_clock(): 'between' trace clock. Not completely serialized, 50 * trace_clock(): 'between' trace clock. Not completely serialized,