aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/Makefile3
-rw-r--r--kernel/rcu/rcu.h6
-rw-r--r--kernel/rcu/rcutorture.c66
-rw-r--r--kernel/rcu/srcu.c2
-rw-r--r--kernel/rcu/tiny.c113
-rw-r--r--kernel/rcu/tiny_plugin.h9
-rw-r--r--kernel/rcu/tree.c355
-rw-r--r--kernel/rcu/tree.h62
-rw-r--r--kernel/rcu/tree_plugin.h276
-rw-r--r--kernel/rcu/tree_trace.c8
10 files changed, 414 insertions, 486 deletions
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index e6fae503d1bc..50a808424b06 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,4 +1,5 @@
1obj-y += update.o srcu.o 1obj-y += update.o
2obj-$(CONFIG_SRCU) += srcu.o
2obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 3obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
3obj-$(CONFIG_TREE_RCU) += tree.o 4obj-$(CONFIG_TREE_RCU) += tree.o
4obj-$(CONFIG_PREEMPT_RCU) += tree.o 5obj-$(CONFIG_PREEMPT_RCU) += tree.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 07bb02eda844..80adef7d4c3d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void);
137 137
138void rcu_early_boot_tests(void); 138void rcu_early_boot_tests(void);
139 139
140/*
141 * This function really isn't for public consumption, but RCU is special in
142 * that context switches can allow the state machine to make progress.
143 */
144extern void resched_cpu(int cpu);
145
140#endif /* __LINUX_RCU_H */ 146#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4d559baf06e0..30d42aa55d83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -244,7 +244,8 @@ struct rcu_torture_ops {
244 int (*readlock)(void); 244 int (*readlock)(void);
245 void (*read_delay)(struct torture_random_state *rrsp); 245 void (*read_delay)(struct torture_random_state *rrsp);
246 void (*readunlock)(int idx); 246 void (*readunlock)(int idx);
247 int (*completed)(void); 247 unsigned long (*started)(void);
248 unsigned long (*completed)(void);
248 void (*deferred_free)(struct rcu_torture *p); 249 void (*deferred_free)(struct rcu_torture *p);
249 void (*sync)(void); 250 void (*sync)(void);
250 void (*exp_sync)(void); 251 void (*exp_sync)(void);
@@ -296,11 +297,6 @@ static void rcu_torture_read_unlock(int idx) __releases(RCU)
296 rcu_read_unlock(); 297 rcu_read_unlock();
297} 298}
298 299
299static int rcu_torture_completed(void)
300{
301 return rcu_batches_completed();
302}
303
304/* 300/*
305 * Update callback in the pipe. This should be invoked after a grace period. 301 * Update callback in the pipe. This should be invoked after a grace period.
306 */ 302 */
@@ -356,7 +352,7 @@ rcu_torture_cb(struct rcu_head *p)
356 cur_ops->deferred_free(rp); 352 cur_ops->deferred_free(rp);
357} 353}
358 354
359static int rcu_no_completed(void) 355static unsigned long rcu_no_completed(void)
360{ 356{
361 return 0; 357 return 0;
362} 358}
@@ -377,7 +373,8 @@ static struct rcu_torture_ops rcu_ops = {
377 .readlock = rcu_torture_read_lock, 373 .readlock = rcu_torture_read_lock,
378 .read_delay = rcu_read_delay, 374 .read_delay = rcu_read_delay,
379 .readunlock = rcu_torture_read_unlock, 375 .readunlock = rcu_torture_read_unlock,
380 .completed = rcu_torture_completed, 376 .started = rcu_batches_started,
377 .completed = rcu_batches_completed,
381 .deferred_free = rcu_torture_deferred_free, 378 .deferred_free = rcu_torture_deferred_free,
382 .sync = synchronize_rcu, 379 .sync = synchronize_rcu,
383 .exp_sync = synchronize_rcu_expedited, 380 .exp_sync = synchronize_rcu_expedited,
@@ -407,11 +404,6 @@ static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH)
407 rcu_read_unlock_bh(); 404 rcu_read_unlock_bh();
408} 405}
409 406
410static int rcu_bh_torture_completed(void)
411{
412 return rcu_batches_completed_bh();
413}
414
415static void rcu_bh_torture_deferred_free(struct rcu_torture *p) 407static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
416{ 408{
417 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); 409 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
@@ -423,7 +415,8 @@ static struct rcu_torture_ops rcu_bh_ops = {
423 .readlock = rcu_bh_torture_read_lock, 415 .readlock = rcu_bh_torture_read_lock,
424 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 416 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
425 .readunlock = rcu_bh_torture_read_unlock, 417 .readunlock = rcu_bh_torture_read_unlock,
426 .completed = rcu_bh_torture_completed, 418 .started = rcu_batches_started_bh,
419 .completed = rcu_batches_completed_bh,
427 .deferred_free = rcu_bh_torture_deferred_free, 420 .deferred_free = rcu_bh_torture_deferred_free,
428 .sync = synchronize_rcu_bh, 421 .sync = synchronize_rcu_bh,
429 .exp_sync = synchronize_rcu_bh_expedited, 422 .exp_sync = synchronize_rcu_bh_expedited,
@@ -466,6 +459,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
466 .readlock = rcu_torture_read_lock, 459 .readlock = rcu_torture_read_lock,
467 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 460 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
468 .readunlock = rcu_torture_read_unlock, 461 .readunlock = rcu_torture_read_unlock,
462 .started = rcu_no_completed,
469 .completed = rcu_no_completed, 463 .completed = rcu_no_completed,
470 .deferred_free = rcu_busted_torture_deferred_free, 464 .deferred_free = rcu_busted_torture_deferred_free,
471 .sync = synchronize_rcu_busted, 465 .sync = synchronize_rcu_busted,
@@ -510,7 +504,7 @@ static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
510 srcu_read_unlock(&srcu_ctl, idx); 504 srcu_read_unlock(&srcu_ctl, idx);
511} 505}
512 506
513static int srcu_torture_completed(void) 507static unsigned long srcu_torture_completed(void)
514{ 508{
515 return srcu_batches_completed(&srcu_ctl); 509 return srcu_batches_completed(&srcu_ctl);
516} 510}
@@ -564,6 +558,7 @@ static struct rcu_torture_ops srcu_ops = {
564 .readlock = srcu_torture_read_lock, 558 .readlock = srcu_torture_read_lock,
565 .read_delay = srcu_read_delay, 559 .read_delay = srcu_read_delay,
566 .readunlock = srcu_torture_read_unlock, 560 .readunlock = srcu_torture_read_unlock,
561 .started = NULL,
567 .completed = srcu_torture_completed, 562 .completed = srcu_torture_completed,
568 .deferred_free = srcu_torture_deferred_free, 563 .deferred_free = srcu_torture_deferred_free,
569 .sync = srcu_torture_synchronize, 564 .sync = srcu_torture_synchronize,
@@ -600,7 +595,8 @@ static struct rcu_torture_ops sched_ops = {
600 .readlock = sched_torture_read_lock, 595 .readlock = sched_torture_read_lock,
601 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 596 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
602 .readunlock = sched_torture_read_unlock, 597 .readunlock = sched_torture_read_unlock,
603 .completed = rcu_no_completed, 598 .started = rcu_batches_started_sched,
599 .completed = rcu_batches_completed_sched,
604 .deferred_free = rcu_sched_torture_deferred_free, 600 .deferred_free = rcu_sched_torture_deferred_free,
605 .sync = synchronize_sched, 601 .sync = synchronize_sched,
606 .exp_sync = synchronize_sched_expedited, 602 .exp_sync = synchronize_sched_expedited,
@@ -638,6 +634,7 @@ static struct rcu_torture_ops tasks_ops = {
638 .readlock = tasks_torture_read_lock, 634 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 635 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock, 636 .readunlock = tasks_torture_read_unlock,
637 .started = rcu_no_completed,
641 .completed = rcu_no_completed, 638 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free, 639 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks, 640 .sync = synchronize_rcu_tasks,
@@ -1015,8 +1012,8 @@ static void rcutorture_trace_dump(void)
1015static void rcu_torture_timer(unsigned long unused) 1012static void rcu_torture_timer(unsigned long unused)
1016{ 1013{
1017 int idx; 1014 int idx;
1018 int completed; 1015 unsigned long started;
1019 int completed_end; 1016 unsigned long completed;
1020 static DEFINE_TORTURE_RANDOM(rand); 1017 static DEFINE_TORTURE_RANDOM(rand);
1021 static DEFINE_SPINLOCK(rand_lock); 1018 static DEFINE_SPINLOCK(rand_lock);
1022 struct rcu_torture *p; 1019 struct rcu_torture *p;
@@ -1024,7 +1021,10 @@ static void rcu_torture_timer(unsigned long unused)
1024 unsigned long long ts; 1021 unsigned long long ts;
1025 1022
1026 idx = cur_ops->readlock(); 1023 idx = cur_ops->readlock();
1027 completed = cur_ops->completed(); 1024 if (cur_ops->started)
1025 started = cur_ops->started();
1026 else
1027 started = cur_ops->completed();
1028 ts = rcu_trace_clock_local(); 1028 ts = rcu_trace_clock_local();
1029 p = rcu_dereference_check(rcu_torture_current, 1029 p = rcu_dereference_check(rcu_torture_current,
1030 rcu_read_lock_bh_held() || 1030 rcu_read_lock_bh_held() ||
@@ -1047,14 +1047,16 @@ static void rcu_torture_timer(unsigned long unused)
1047 /* Should not happen, but... */ 1047 /* Should not happen, but... */
1048 pipe_count = RCU_TORTURE_PIPE_LEN; 1048 pipe_count = RCU_TORTURE_PIPE_LEN;
1049 } 1049 }
1050 completed_end = cur_ops->completed(); 1050 completed = cur_ops->completed();
1051 if (pipe_count > 1) { 1051 if (pipe_count > 1) {
1052 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, 1052 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
1053 completed, completed_end); 1053 started, completed);
1054 rcutorture_trace_dump(); 1054 rcutorture_trace_dump();
1055 } 1055 }
1056 __this_cpu_inc(rcu_torture_count[pipe_count]); 1056 __this_cpu_inc(rcu_torture_count[pipe_count]);
1057 completed = completed_end - completed; 1057 completed = completed - started;
1058 if (cur_ops->started)
1059 completed++;
1058 if (completed > RCU_TORTURE_PIPE_LEN) { 1060 if (completed > RCU_TORTURE_PIPE_LEN) {
1059 /* Should not happen, but... */ 1061 /* Should not happen, but... */
1060 completed = RCU_TORTURE_PIPE_LEN; 1062 completed = RCU_TORTURE_PIPE_LEN;
@@ -1073,8 +1075,8 @@ static void rcu_torture_timer(unsigned long unused)
1073static int 1075static int
1074rcu_torture_reader(void *arg) 1076rcu_torture_reader(void *arg)
1075{ 1077{
1076 int completed; 1078 unsigned long started;
1077 int completed_end; 1079 unsigned long completed;
1078 int idx; 1080 int idx;
1079 DEFINE_TORTURE_RANDOM(rand); 1081 DEFINE_TORTURE_RANDOM(rand);
1080 struct rcu_torture *p; 1082 struct rcu_torture *p;
@@ -1093,7 +1095,10 @@ rcu_torture_reader(void *arg)
1093 mod_timer(&t, jiffies + 1); 1095 mod_timer(&t, jiffies + 1);
1094 } 1096 }
1095 idx = cur_ops->readlock(); 1097 idx = cur_ops->readlock();
1096 completed = cur_ops->completed(); 1098 if (cur_ops->started)
1099 started = cur_ops->started();
1100 else
1101 started = cur_ops->completed();
1097 ts = rcu_trace_clock_local(); 1102 ts = rcu_trace_clock_local();
1098 p = rcu_dereference_check(rcu_torture_current, 1103 p = rcu_dereference_check(rcu_torture_current,
1099 rcu_read_lock_bh_held() || 1104 rcu_read_lock_bh_held() ||
@@ -1114,14 +1119,16 @@ rcu_torture_reader(void *arg)
1114 /* Should not happen, but... */ 1119 /* Should not happen, but... */
1115 pipe_count = RCU_TORTURE_PIPE_LEN; 1120 pipe_count = RCU_TORTURE_PIPE_LEN;
1116 } 1121 }
1117 completed_end = cur_ops->completed(); 1122 completed = cur_ops->completed();
1118 if (pipe_count > 1) { 1123 if (pipe_count > 1) {
1119 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, 1124 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1120 ts, completed, completed_end); 1125 ts, started, completed);
1121 rcutorture_trace_dump(); 1126 rcutorture_trace_dump();
1122 } 1127 }
1123 __this_cpu_inc(rcu_torture_count[pipe_count]); 1128 __this_cpu_inc(rcu_torture_count[pipe_count]);
1124 completed = completed_end - completed; 1129 completed = completed - started;
1130 if (cur_ops->started)
1131 completed++;
1125 if (completed > RCU_TORTURE_PIPE_LEN) { 1132 if (completed > RCU_TORTURE_PIPE_LEN) {
1126 /* Should not happen, but... */ 1133 /* Should not happen, but... */
1127 completed = RCU_TORTURE_PIPE_LEN; 1134 completed = RCU_TORTURE_PIPE_LEN;
@@ -1420,6 +1427,9 @@ static int rcu_torture_barrier(void *arg)
1420 cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ 1427 cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */
1421 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { 1428 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
1422 n_rcu_torture_barrier_error++; 1429 n_rcu_torture_barrier_error++;
1430 pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n",
1431 atomic_read(&barrier_cbs_invoked),
1432 n_barrier_cbs);
1423 WARN_ON_ONCE(1); 1433 WARN_ON_ONCE(1);
1424 } 1434 }
1425 n_barrier_successes++; 1435 n_barrier_successes++;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index e037f3eb2f7b..445bf8ffe3fb 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
546 * Report the number of batches, correlated with, but not necessarily 546 * Report the number of batches, correlated with, but not necessarily
547 * precisely the same as, the number of grace periods that have elapsed. 547 * precisely the same as, the number of grace periods that have elapsed.
548 */ 548 */
549long srcu_batches_completed(struct srcu_struct *sp) 549unsigned long srcu_batches_completed(struct srcu_struct *sp)
550{ 550{
551 return sp->completed; 551 return sp->completed;
552} 552}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 0db5649f8817..cc9ceca7bde1 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head,
47 void (*func)(struct rcu_head *rcu), 47 void (*func)(struct rcu_head *rcu),
48 struct rcu_ctrlblk *rcp); 48 struct rcu_ctrlblk *rcp);
49 49
50static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
51
52#include "tiny_plugin.h" 50#include "tiny_plugin.h"
53 51
54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
55static void rcu_idle_enter_common(long long newval)
56{
57 if (newval) {
58 RCU_TRACE(trace_rcu_dyntick(TPS("--="),
59 rcu_dynticks_nesting, newval));
60 rcu_dynticks_nesting = newval;
61 return;
62 }
63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
64 rcu_dynticks_nesting, newval));
65 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
67
68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
69 rcu_dynticks_nesting, newval));
70 ftrace_dump(DUMP_ALL);
71 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */
74 }
75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier();
77 rcu_dynticks_nesting = newval;
78}
79
80/* 52/*
81 * Enter idle, which is an extended quiescent state if we have fully 53 * Enter idle, which is an extended quiescent state if we have fully
82 * entered that mode (i.e., if the new value of dynticks_nesting is zero). 54 * entered that mode.
83 */ 55 */
84void rcu_idle_enter(void) 56void rcu_idle_enter(void)
85{ 57{
86 unsigned long flags;
87 long long newval;
88
89 local_irq_save(flags);
90 WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
91 if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
92 DYNTICK_TASK_NEST_VALUE)
93 newval = 0;
94 else
95 newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
96 rcu_idle_enter_common(newval);
97 local_irq_restore(flags);
98} 58}
99EXPORT_SYMBOL_GPL(rcu_idle_enter); 59EXPORT_SYMBOL_GPL(rcu_idle_enter);
100 60
@@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
103 */ 63 */
104void rcu_irq_exit(void) 64void rcu_irq_exit(void)
105{ 65{
106 unsigned long flags;
107 long long newval;
108
109 local_irq_save(flags);
110 newval = rcu_dynticks_nesting - 1;
111 WARN_ON_ONCE(newval < 0);
112 rcu_idle_enter_common(newval);
113 local_irq_restore(flags);
114} 66}
115EXPORT_SYMBOL_GPL(rcu_irq_exit); 67EXPORT_SYMBOL_GPL(rcu_irq_exit);
116 68
117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
118static void rcu_idle_exit_common(long long oldval)
119{
120 if (oldval) {
121 RCU_TRACE(trace_rcu_dyntick(TPS("++="),
122 oldval, rcu_dynticks_nesting));
123 return;
124 }
125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
126 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
128
129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
130 oldval, rcu_dynticks_nesting));
131 ftrace_dump(DUMP_ALL);
132 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
133 current->pid, current->comm,
134 idle->pid, idle->comm); /* must be idle task! */
135 }
136}
137
138/* 69/*
139 * Exit idle, so that we are no longer in an extended quiescent state. 70 * Exit idle, so that we are no longer in an extended quiescent state.
140 */ 71 */
141void rcu_idle_exit(void) 72void rcu_idle_exit(void)
142{ 73{
143 unsigned long flags;
144 long long oldval;
145
146 local_irq_save(flags);
147 oldval = rcu_dynticks_nesting;
148 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
149 if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
150 rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
151 else
152 rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
153 rcu_idle_exit_common(oldval);
154 local_irq_restore(flags);
155} 74}
156EXPORT_SYMBOL_GPL(rcu_idle_exit); 75EXPORT_SYMBOL_GPL(rcu_idle_exit);
157 76
@@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
160 */ 79 */
161void rcu_irq_enter(void) 80void rcu_irq_enter(void)
162{ 81{
163 unsigned long flags;
164 long long oldval;
165
166 local_irq_save(flags);
167 oldval = rcu_dynticks_nesting;
168 rcu_dynticks_nesting++;
169 WARN_ON_ONCE(rcu_dynticks_nesting == 0);
170 rcu_idle_exit_common(oldval);
171 local_irq_restore(flags);
172} 82}
173EXPORT_SYMBOL_GPL(rcu_irq_enter); 83EXPORT_SYMBOL_GPL(rcu_irq_enter);
174 84
@@ -179,23 +89,13 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
179 */ 89 */
180bool notrace __rcu_is_watching(void) 90bool notrace __rcu_is_watching(void)
181{ 91{
182 return rcu_dynticks_nesting; 92 return true;
183} 93}
184EXPORT_SYMBOL(__rcu_is_watching); 94EXPORT_SYMBOL(__rcu_is_watching);
185 95
186#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ 96#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
187 97
188/* 98/*
189 * Test whether the current CPU was interrupted from idle. Nested
190 * interrupts don't count, we must be running at the first interrupt
191 * level.
192 */
193static int rcu_is_cpu_rrupt_from_idle(void)
194{
195 return rcu_dynticks_nesting <= 1;
196}
197
198/*
199 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 99 * Helper function for rcu_sched_qs() and rcu_bh_qs().
200 * Also irqs are disabled to avoid confusion due to interrupt handlers 100 * Also irqs are disabled to avoid confusion due to interrupt handlers
201 * invoking call_rcu(). 101 * invoking call_rcu().
@@ -250,7 +150,7 @@ void rcu_bh_qs(void)
250void rcu_check_callbacks(int user) 150void rcu_check_callbacks(int user)
251{ 151{
252 RCU_TRACE(check_cpu_stalls()); 152 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 153 if (user)
254 rcu_sched_qs(); 154 rcu_sched_qs();
255 else if (!in_softirq()) 155 else if (!in_softirq())
256 rcu_bh_qs(); 156 rcu_bh_qs();
@@ -357,6 +257,11 @@ static void __call_rcu(struct rcu_head *head,
357 rcp->curtail = &head->next; 257 rcp->curtail = &head->next;
358 RCU_TRACE(rcp->qlen++); 258 RCU_TRACE(rcp->qlen++);
359 local_irq_restore(flags); 259 local_irq_restore(flags);
260
261 if (unlikely(is_idle_task(current))) {
262 /* force scheduling for rcu_sched_qs() */
263 resched_cpu(0);
264 }
360} 265}
361 266
362/* 267/*
@@ -383,6 +288,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
383void __init rcu_init(void) 288void __init rcu_init(void)
384{ 289{
385 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 290 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291 RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk));
292 RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk));
386 293
387 rcu_early_boot_tests(); 294 rcu_early_boot_tests();
388} 295}
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 858c56569127..f94e209a10d6 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -145,17 +145,16 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
145 rcp->ticks_this_gp++; 145 rcp->ticks_this_gp++;
146 j = jiffies; 146 j = jiffies;
147 js = ACCESS_ONCE(rcp->jiffies_stall); 147 js = ACCESS_ONCE(rcp->jiffies_stall);
148 if (*rcp->curtail && ULONG_CMP_GE(j, js)) { 148 if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", 149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
150 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, 150 rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
151 jiffies - rcp->gp_start, rcp->qlen); 151 jiffies - rcp->gp_start, rcp->qlen);
152 dump_stack(); 152 dump_stack();
153 }
154 if (*rcp->curtail && ULONG_CMP_GE(j, js))
155 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 153 ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
156 3 * rcu_jiffies_till_stall_check() + 3; 154 3 * rcu_jiffies_till_stall_check() + 3;
157 else if (ULONG_CMP_GE(j, js)) 155 } else if (ULONG_CMP_GE(j, js)) {
158 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); 156 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
157 }
159} 158}
160 159
161static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) 160static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7680fc275036..48d640ca1a05 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
156static void invoke_rcu_core(void); 156static void invoke_rcu_core(void);
157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
158 158
159/* rcuc/rcub kthread realtime priority */
160static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
161module_param(kthread_prio, int, 0644);
162
159/* 163/*
160 * Track the rcutorture test sequence number and the update version 164 * Track the rcutorture test sequence number and the update version
161 * number within a given test. The rcutorture_testseq is incremented 165 * number within a given test. The rcutorture_testseq is incremented
@@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
215#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 219#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
216}; 220};
217 221
222DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
223EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
224
218/* 225/*
219 * Let the RCU core know that this CPU has gone through the scheduler, 226 * Let the RCU core know that this CPU has gone through the scheduler,
220 * which is a quiescent state. This is called when the need for a 227 * which is a quiescent state. This is called when the need for a
@@ -284,6 +291,22 @@ void rcu_note_context_switch(void)
284} 291}
285EXPORT_SYMBOL_GPL(rcu_note_context_switch); 292EXPORT_SYMBOL_GPL(rcu_note_context_switch);
286 293
294/*
295 * Register a quiesecent state for all RCU flavors. If there is an
296 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
297 * dyntick-idle quiescent state visible to other CPUs (but only for those
298 * RCU flavors in desparate need of a quiescent state, which will normally
299 * be none of them). Either way, do a lightweight quiescent state for
300 * all RCU flavors.
301 */
302void rcu_all_qs(void)
303{
304 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
305 rcu_momentary_dyntick_idle();
306 this_cpu_inc(rcu_qs_ctr);
307}
308EXPORT_SYMBOL_GPL(rcu_all_qs);
309
287static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 310static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
288static long qhimark = 10000; /* If this many pending, ignore blimit. */ 311static long qhimark = 10000; /* If this many pending, ignore blimit. */
289static long qlowmark = 100; /* Once only this many pending, use blimit. */ 312static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp);
315static int rcu_pending(void); 338static int rcu_pending(void);
316 339
317/* 340/*
318 * Return the number of RCU-sched batches processed thus far for debug & stats. 341 * Return the number of RCU batches started thus far for debug & stats.
342 */
343unsigned long rcu_batches_started(void)
344{
345 return rcu_state_p->gpnum;
346}
347EXPORT_SYMBOL_GPL(rcu_batches_started);
348
349/*
350 * Return the number of RCU-sched batches started thus far for debug & stats.
351 */
352unsigned long rcu_batches_started_sched(void)
353{
354 return rcu_sched_state.gpnum;
355}
356EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
357
358/*
359 * Return the number of RCU BH batches started thus far for debug & stats.
319 */ 360 */
320long rcu_batches_completed_sched(void) 361unsigned long rcu_batches_started_bh(void)
362{
363 return rcu_bh_state.gpnum;
364}
365EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
366
367/*
368 * Return the number of RCU batches completed thus far for debug & stats.
369 */
370unsigned long rcu_batches_completed(void)
371{
372 return rcu_state_p->completed;
373}
374EXPORT_SYMBOL_GPL(rcu_batches_completed);
375
376/*
377 * Return the number of RCU-sched batches completed thus far for debug & stats.
378 */
379unsigned long rcu_batches_completed_sched(void)
321{ 380{
322 return rcu_sched_state.completed; 381 return rcu_sched_state.completed;
323} 382}
324EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 383EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
325 384
326/* 385/*
327 * Return the number of RCU BH batches processed thus far for debug & stats. 386 * Return the number of RCU BH batches completed thus far for debug & stats.
328 */ 387 */
329long rcu_batches_completed_bh(void) 388unsigned long rcu_batches_completed_bh(void)
330{ 389{
331 return rcu_bh_state.completed; 390 return rcu_bh_state.completed;
332} 391}
@@ -759,39 +818,71 @@ void rcu_irq_enter(void)
759/** 818/**
760 * rcu_nmi_enter - inform RCU of entry to NMI context 819 * rcu_nmi_enter - inform RCU of entry to NMI context
761 * 820 *
762 * If the CPU was idle with dynamic ticks active, and there is no 821 * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
763 * irq handler running, this updates rdtp->dynticks_nmi to let the 822 * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
764 * RCU grace-period handling know that the CPU is active. 823 * that the CPU is active. This implementation permits nested NMIs, as
824 * long as the nesting level does not overflow an int. (You will probably
825 * run out of stack space first.)
765 */ 826 */
766void rcu_nmi_enter(void) 827void rcu_nmi_enter(void)
767{ 828{
768 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 829 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
830 int incby = 2;
769 831
770 if (rdtp->dynticks_nmi_nesting == 0 && 832 /* Complain about underflow. */
771 (atomic_read(&rdtp->dynticks) & 0x1)) 833 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
772 return; 834
773 rdtp->dynticks_nmi_nesting++; 835 /*
774 smp_mb__before_atomic(); /* Force delay from prior write. */ 836 * If idle from RCU viewpoint, atomically increment ->dynticks
775 atomic_inc(&rdtp->dynticks); 837 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
776 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 838 * Otherwise, increment ->dynticks_nmi_nesting by two. This means
777 smp_mb__after_atomic(); /* See above. */ 839 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
778 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 840 * to be in the outermost NMI handler that interrupted an RCU-idle
841 * period (observation due to Andy Lutomirski).
842 */
843 if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
844 smp_mb__before_atomic(); /* Force delay from prior write. */
845 atomic_inc(&rdtp->dynticks);
846 /* atomic_inc() before later RCU read-side crit sects */
847 smp_mb__after_atomic(); /* See above. */
848 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
849 incby = 1;
850 }
851 rdtp->dynticks_nmi_nesting += incby;
852 barrier();
779} 853}
780 854
781/** 855/**
782 * rcu_nmi_exit - inform RCU of exit from NMI context 856 * rcu_nmi_exit - inform RCU of exit from NMI context
783 * 857 *
784 * If the CPU was idle with dynamic ticks active, and there is no 858 * If we are returning from the outermost NMI handler that interrupted an
785 * irq handler running, this updates rdtp->dynticks_nmi to let the 859 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
786 * RCU grace-period handling know that the CPU is no longer active. 860 * to let the RCU grace-period handling know that the CPU is back to
861 * being RCU-idle.
787 */ 862 */
788void rcu_nmi_exit(void) 863void rcu_nmi_exit(void)
789{ 864{
790 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 865 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
791 866
792 if (rdtp->dynticks_nmi_nesting == 0 || 867 /*
793 --rdtp->dynticks_nmi_nesting != 0) 868 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
869 * (We are exiting an NMI handler, so RCU better be paying attention
870 * to us!)
871 */
872 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
873 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
874
875 /*
876 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
877 * leave it in non-RCU-idle state.
878 */
879 if (rdtp->dynticks_nmi_nesting != 1) {
880 rdtp->dynticks_nmi_nesting -= 2;
794 return; 881 return;
882 }
883
884 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
885 rdtp->dynticks_nmi_nesting = 0;
795 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 886 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
796 smp_mb__before_atomic(); /* See above. */ 887 smp_mb__before_atomic(); /* See above. */
797 atomic_inc(&rdtp->dynticks); 888 atomic_inc(&rdtp->dynticks);
@@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
898 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 989 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
899 return 1; 990 return 1;
900 } else { 991 } else {
992 if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4,
993 rdp->mynode->gpnum))
994 ACCESS_ONCE(rdp->gpwrap) = true;
901 return 0; 995 return 0;
902 } 996 }
903} 997}
904 998
905/* 999/*
906 * This function really isn't for public consumption, but RCU is special in
907 * that context switches can allow the state machine to make progress.
908 */
909extern void resched_cpu(int cpu);
910
911/*
912 * Return true if the specified CPU has passed through a quiescent 1000 * Return true if the specified CPU has passed through a quiescent
913 * state by virtue of being in or having passed through an dynticks 1001 * state by virtue of being in or having passed through an dynticks
914 * idle state since the last call to dyntick_save_progress_counter() 1002 * idle state since the last call to dyntick_save_progress_counter()
@@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
1011 j1 = rcu_jiffies_till_stall_check(); 1099 j1 = rcu_jiffies_till_stall_check();
1012 ACCESS_ONCE(rsp->jiffies_stall) = j + j1; 1100 ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
1013 rsp->jiffies_resched = j + j1 / 2; 1101 rsp->jiffies_resched = j + j1 / 2;
1102 rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs);
1103}
1104
1105/*
1106 * Complain about starvation of grace-period kthread.
1107 */
1108static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1109{
1110 unsigned long gpa;
1111 unsigned long j;
1112
1113 j = jiffies;
1114 gpa = ACCESS_ONCE(rsp->gp_activity);
1115 if (j - gpa > 2 * HZ)
1116 pr_err("%s kthread starved for %ld jiffies!\n",
1117 rsp->name, j - gpa);
1014} 1118}
1015 1119
1016/* 1120/*
@@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
1033 } 1137 }
1034} 1138}
1035 1139
1036static void print_other_cpu_stall(struct rcu_state *rsp) 1140static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1037{ 1141{
1038 int cpu; 1142 int cpu;
1039 long delta; 1143 long delta;
1040 unsigned long flags; 1144 unsigned long flags;
1145 unsigned long gpa;
1146 unsigned long j;
1041 int ndetected = 0; 1147 int ndetected = 0;
1042 struct rcu_node *rnp = rcu_get_root(rsp); 1148 struct rcu_node *rnp = rcu_get_root(rsp);
1043 long totqlen = 0; 1149 long totqlen = 0;
@@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
1075 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1181 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1076 } 1182 }
1077 1183
1078 /*
1079 * Now rat on any tasks that got kicked up to the root rcu_node
1080 * due to CPU offlining.
1081 */
1082 rnp = rcu_get_root(rsp);
1083 raw_spin_lock_irqsave(&rnp->lock, flags);
1084 ndetected += rcu_print_task_stall(rnp);
1085 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1086
1087 print_cpu_stall_info_end(); 1184 print_cpu_stall_info_end();
1088 for_each_possible_cpu(cpu) 1185 for_each_possible_cpu(cpu)
1089 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1186 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
1090 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1187 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
1091 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1188 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1092 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1189 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1093 if (ndetected == 0) 1190 if (ndetected) {
1094 pr_err("INFO: Stall ended before state dump start\n");
1095 else
1096 rcu_dump_cpu_stacks(rsp); 1191 rcu_dump_cpu_stacks(rsp);
1192 } else {
1193 if (ACCESS_ONCE(rsp->gpnum) != gpnum ||
1194 ACCESS_ONCE(rsp->completed) == gpnum) {
1195 pr_err("INFO: Stall ended before state dump start\n");
1196 } else {
1197 j = jiffies;
1198 gpa = ACCESS_ONCE(rsp->gp_activity);
1199 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n",
1200 rsp->name, j - gpa, j, gpa,
1201 jiffies_till_next_fqs);
1202 /* In this case, the current CPU might be at fault. */
1203 sched_show_task(current);
1204 }
1205 }
1097 1206
1098 /* Complain about tasks blocking the grace period. */ 1207 /* Complain about tasks blocking the grace period. */
1099
1100 rcu_print_detail_task_stall(rsp); 1208 rcu_print_detail_task_stall(rsp);
1101 1209
1210 rcu_check_gp_kthread_starvation(rsp);
1211
1102 force_quiescent_state(rsp); /* Kick them all. */ 1212 force_quiescent_state(rsp); /* Kick them all. */
1103} 1213}
1104 1214
@@ -1123,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
1123 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1233 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
1124 jiffies - rsp->gp_start, 1234 jiffies - rsp->gp_start,
1125 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1235 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1236
1237 rcu_check_gp_kthread_starvation(rsp);
1238
1126 rcu_dump_cpu_stacks(rsp); 1239 rcu_dump_cpu_stacks(rsp);
1127 1240
1128 raw_spin_lock_irqsave(&rnp->lock, flags); 1241 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1193,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1193 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 1306 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
1194 1307
1195 /* They had a few time units to dump stack, so complain. */ 1308 /* They had a few time units to dump stack, so complain. */
1196 print_other_cpu_stall(rsp); 1309 print_other_cpu_stall(rsp, gpnum);
1197 } 1310 }
1198} 1311}
1199 1312
@@ -1530,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1530 bool ret; 1643 bool ret;
1531 1644
1532 /* Handle the ends of any preceding grace periods first. */ 1645 /* Handle the ends of any preceding grace periods first. */
1533 if (rdp->completed == rnp->completed) { 1646 if (rdp->completed == rnp->completed &&
1647 !unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1534 1648
1535 /* No grace period end, so just accelerate recent callbacks. */ 1649 /* No grace period end, so just accelerate recent callbacks. */
1536 ret = rcu_accelerate_cbs(rsp, rnp, rdp); 1650 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1545,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1545 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); 1659 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1546 } 1660 }
1547 1661
1548 if (rdp->gpnum != rnp->gpnum) { 1662 if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1549 /* 1663 /*
1550 * If the current grace period is waiting for this CPU, 1664 * If the current grace period is waiting for this CPU,
1551 * set up to detect a quiescent state, otherwise don't 1665 * set up to detect a quiescent state, otherwise don't
@@ -1554,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1554 rdp->gpnum = rnp->gpnum; 1668 rdp->gpnum = rnp->gpnum;
1555 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 1669 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1556 rdp->passed_quiesce = 0; 1670 rdp->passed_quiesce = 0;
1671 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1557 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1672 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1558 zero_cpu_stall_ticks(rdp); 1673 zero_cpu_stall_ticks(rdp);
1674 ACCESS_ONCE(rdp->gpwrap) = false;
1559 } 1675 }
1560 return ret; 1676 return ret;
1561} 1677}
@@ -1569,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1569 local_irq_save(flags); 1685 local_irq_save(flags);
1570 rnp = rdp->mynode; 1686 rnp = rdp->mynode;
1571 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && 1687 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1572 rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ 1688 rdp->completed == ACCESS_ONCE(rnp->completed) &&
1689 !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */
1573 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1690 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1574 local_irq_restore(flags); 1691 local_irq_restore(flags);
1575 return; 1692 return;
@@ -1589,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1589 struct rcu_data *rdp; 1706 struct rcu_data *rdp;
1590 struct rcu_node *rnp = rcu_get_root(rsp); 1707 struct rcu_node *rnp = rcu_get_root(rsp);
1591 1708
1709 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1592 rcu_bind_gp_kthread(); 1710 rcu_bind_gp_kthread();
1593 raw_spin_lock_irq(&rnp->lock); 1711 raw_spin_lock_irq(&rnp->lock);
1594 smp_mb__after_unlock_lock(); 1712 smp_mb__after_unlock_lock();
@@ -1649,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1649 rnp->grphi, rnp->qsmask); 1767 rnp->grphi, rnp->qsmask);
1650 raw_spin_unlock_irq(&rnp->lock); 1768 raw_spin_unlock_irq(&rnp->lock);
1651 cond_resched_rcu_qs(); 1769 cond_resched_rcu_qs();
1770 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1652 } 1771 }
1653 1772
1654 mutex_unlock(&rsp->onoff_mutex); 1773 mutex_unlock(&rsp->onoff_mutex);
@@ -1665,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1665 unsigned long maxj; 1784 unsigned long maxj;
1666 struct rcu_node *rnp = rcu_get_root(rsp); 1785 struct rcu_node *rnp = rcu_get_root(rsp);
1667 1786
1787 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1668 rsp->n_force_qs++; 1788 rsp->n_force_qs++;
1669 if (fqs_state == RCU_SAVE_DYNTICK) { 1789 if (fqs_state == RCU_SAVE_DYNTICK) {
1670 /* Collect dyntick-idle snapshots. */ 1790 /* Collect dyntick-idle snapshots. */
@@ -1703,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1703 struct rcu_data *rdp; 1823 struct rcu_data *rdp;
1704 struct rcu_node *rnp = rcu_get_root(rsp); 1824 struct rcu_node *rnp = rcu_get_root(rsp);
1705 1825
1826 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1706 raw_spin_lock_irq(&rnp->lock); 1827 raw_spin_lock_irq(&rnp->lock);
1707 smp_mb__after_unlock_lock(); 1828 smp_mb__after_unlock_lock();
1708 gp_duration = jiffies - rsp->gp_start; 1829 gp_duration = jiffies - rsp->gp_start;
@@ -1739,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1739 nocb += rcu_future_gp_cleanup(rsp, rnp); 1860 nocb += rcu_future_gp_cleanup(rsp, rnp);
1740 raw_spin_unlock_irq(&rnp->lock); 1861 raw_spin_unlock_irq(&rnp->lock);
1741 cond_resched_rcu_qs(); 1862 cond_resched_rcu_qs();
1863 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1742 } 1864 }
1743 rnp = rcu_get_root(rsp); 1865 rnp = rcu_get_root(rsp);
1744 raw_spin_lock_irq(&rnp->lock); 1866 raw_spin_lock_irq(&rnp->lock);
@@ -1788,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1788 if (rcu_gp_init(rsp)) 1910 if (rcu_gp_init(rsp))
1789 break; 1911 break;
1790 cond_resched_rcu_qs(); 1912 cond_resched_rcu_qs();
1913 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1791 WARN_ON(signal_pending(current)); 1914 WARN_ON(signal_pending(current));
1792 trace_rcu_grace_period(rsp->name, 1915 trace_rcu_grace_period(rsp->name,
1793 ACCESS_ONCE(rsp->gpnum), 1916 ACCESS_ONCE(rsp->gpnum),
@@ -1831,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
1831 ACCESS_ONCE(rsp->gpnum), 1954 ACCESS_ONCE(rsp->gpnum),
1832 TPS("fqsend")); 1955 TPS("fqsend"));
1833 cond_resched_rcu_qs(); 1956 cond_resched_rcu_qs();
1957 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1834 } else { 1958 } else {
1835 /* Deal with stray signal. */ 1959 /* Deal with stray signal. */
1836 cond_resched_rcu_qs(); 1960 cond_resched_rcu_qs();
1961 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1837 WARN_ON(signal_pending(current)); 1962 WARN_ON(signal_pending(current));
1838 trace_rcu_grace_period(rsp->name, 1963 trace_rcu_grace_period(rsp->name,
1839 ACCESS_ONCE(rsp->gpnum), 1964 ACCESS_ONCE(rsp->gpnum),
@@ -2010,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2010 rnp = rdp->mynode; 2135 rnp = rdp->mynode;
2011 raw_spin_lock_irqsave(&rnp->lock, flags); 2136 raw_spin_lock_irqsave(&rnp->lock, flags);
2012 smp_mb__after_unlock_lock(); 2137 smp_mb__after_unlock_lock();
2013 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 2138 if ((rdp->passed_quiesce == 0 &&
2014 rnp->completed == rnp->gpnum) { 2139 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
2140 rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
2141 rdp->gpwrap) {
2015 2142
2016 /* 2143 /*
2017 * The grace period in which this quiescent state was 2144 * The grace period in which this quiescent state was
@@ -2020,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2020 * within the current grace period. 2147 * within the current grace period.
2021 */ 2148 */
2022 rdp->passed_quiesce = 0; /* need qs for new gp. */ 2149 rdp->passed_quiesce = 0; /* need qs for new gp. */
2150 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
2023 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2151 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2024 return; 2152 return;
2025 } 2153 }
@@ -2064,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2064 * Was there a quiescent state since the beginning of the grace 2192 * Was there a quiescent state since the beginning of the grace
2065 * period? If no, then exit and wait for the next call. 2193 * period? If no, then exit and wait for the next call.
2066 */ 2194 */
2067 if (!rdp->passed_quiesce) 2195 if (!rdp->passed_quiesce &&
2196 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
2068 return; 2197 return;
2069 2198
2070 /* 2199 /*
@@ -2195,6 +2324,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2195} 2324}
2196 2325
2197/* 2326/*
2327 * All CPUs for the specified rcu_node structure have gone offline,
2328 * and all tasks that were preempted within an RCU read-side critical
2329 * section while running on one of those CPUs have since exited their RCU
2330 * read-side critical section. Some other CPU is reporting this fact with
2331 * the specified rcu_node structure's ->lock held and interrupts disabled.
2332 * This function therefore goes up the tree of rcu_node structures,
2333 * clearing the corresponding bits in the ->qsmaskinit fields. Note that
2334 * the leaf rcu_node structure's ->qsmaskinit field has already been
2335 * updated
2336 *
2337 * This function does check that the specified rcu_node structure has
2338 * all CPUs offline and no blocked tasks, so it is OK to invoke it
2339 * prematurely. That said, invoking it after the fact will cost you
2340 * a needless lock acquisition. So once it has done its work, don't
2341 * invoke it again.
2342 */
2343static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2344{
2345 long mask;
2346 struct rcu_node *rnp = rnp_leaf;
2347
2348 if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
2349 return;
2350 for (;;) {
2351 mask = rnp->grpmask;
2352 rnp = rnp->parent;
2353 if (!rnp)
2354 break;
2355 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2356 smp_mb__after_unlock_lock(); /* GP memory ordering. */
2357 rnp->qsmaskinit &= ~mask;
2358 if (rnp->qsmaskinit) {
2359 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2360 return;
2361 }
2362 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2363 }
2364}
2365
2366/*
2198 * The CPU has been completely removed, and some other CPU is reporting 2367 * The CPU has been completely removed, and some other CPU is reporting
2199 * this fact from process context. Do the remainder of the cleanup, 2368 * this fact from process context. Do the remainder of the cleanup,
2200 * including orphaning the outgoing CPU's RCU callbacks, and also 2369 * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2204,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2204static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2373static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2205{ 2374{
2206 unsigned long flags; 2375 unsigned long flags;
2207 unsigned long mask;
2208 int need_report = 0;
2209 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2376 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2210 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2377 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2211 2378
@@ -2219,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2219 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 2386 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
2220 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 2387 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
2221 rcu_adopt_orphan_cbs(rsp, flags); 2388 rcu_adopt_orphan_cbs(rsp, flags);
2389 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
2222 2390
2223 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 2391 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
2224 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2392 raw_spin_lock_irqsave(&rnp->lock, flags);
2225 do { 2393 smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
2226 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2394 rnp->qsmaskinit &= ~rdp->grpmask;
2227 smp_mb__after_unlock_lock(); 2395 if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
2228 rnp->qsmaskinit &= ~mask; 2396 rcu_cleanup_dead_rnp(rnp);
2229 if (rnp->qsmaskinit != 0) { 2397 rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */
2230 if (rnp != rdp->mynode)
2231 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2232 break;
2233 }
2234 if (rnp == rdp->mynode)
2235 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2236 else
2237 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2238 mask = rnp->grpmask;
2239 rnp = rnp->parent;
2240 } while (rnp != NULL);
2241
2242 /*
2243 * We still hold the leaf rcu_node structure lock here, and
2244 * irqs are still disabled. The reason for this subterfuge is
2245 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2246 * held leads to deadlock.
2247 */
2248 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2249 rnp = rdp->mynode;
2250 if (need_report & RCU_OFL_TASKS_NORM_GP)
2251 rcu_report_unblock_qs_rnp(rnp, flags);
2252 else
2253 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2254 if (need_report & RCU_OFL_TASKS_EXP_GP)
2255 rcu_report_exp_rnp(rsp, rnp, true);
2256 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2398 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
2257 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2399 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2258 cpu, rdp->qlen, rdp->nxtlist); 2400 cpu, rdp->qlen, rdp->nxtlist);
@@ -2268,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2268{ 2410{
2269} 2411}
2270 2412
2413static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2414{
2415}
2416
2271static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2417static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2272{ 2418{
2273} 2419}
@@ -2464,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2464 } 2610 }
2465 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2611 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2466 } 2612 }
2467 rnp = rcu_get_root(rsp);
2468 if (rnp->qsmask == 0) {
2469 raw_spin_lock_irqsave(&rnp->lock, flags);
2470 smp_mb__after_unlock_lock();
2471 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2472 }
2473} 2613}
2474 2614
2475/* 2615/*
@@ -2569,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
2569 * Schedule RCU callback invocation. If the specified type of RCU 2709 * Schedule RCU callback invocation. If the specified type of RCU
2570 * does not support RCU priority boosting, just do a direct call, 2710 * does not support RCU priority boosting, just do a direct call,
2571 * otherwise wake up the per-CPU kernel kthread. Note that because we 2711 * otherwise wake up the per-CPU kernel kthread. Note that because we
2572 * are running on the current CPU with interrupts disabled, the 2712 * are running on the current CPU with softirqs disabled, the
2573 * rcu_cpu_kthread_task cannot disappear out from under us. 2713 * rcu_cpu_kthread_task cannot disappear out from under us.
2574 */ 2714 */
2575static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2715static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
@@ -3109,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3109 3249
3110 /* Is the RCU core waiting for a quiescent state from this CPU? */ 3250 /* Is the RCU core waiting for a quiescent state from this CPU? */
3111 if (rcu_scheduler_fully_active && 3251 if (rcu_scheduler_fully_active &&
3112 rdp->qs_pending && !rdp->passed_quiesce) { 3252 rdp->qs_pending && !rdp->passed_quiesce &&
3253 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
3113 rdp->n_rp_qs_pending++; 3254 rdp->n_rp_qs_pending++;
3114 } else if (rdp->qs_pending && rdp->passed_quiesce) { 3255 } else if (rdp->qs_pending &&
3256 (rdp->passed_quiesce ||
3257 rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
3115 rdp->n_rp_report_qs++; 3258 rdp->n_rp_report_qs++;
3116 return 1; 3259 return 1;
3117 } 3260 }
@@ -3135,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3135 } 3278 }
3136 3279
3137 /* Has a new RCU grace period started? */ 3280 /* Has a new RCU grace period started? */
3138 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 3281 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum ||
3282 unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */
3139 rdp->n_rp_gp_started++; 3283 rdp->n_rp_gp_started++;
3140 return 1; 3284 return 1;
3141 } 3285 }
@@ -3318,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3318 } else { 3462 } else {
3319 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3463 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3320 rsp->n_barrier_done); 3464 rsp->n_barrier_done);
3465 smp_mb__before_atomic();
3321 atomic_inc(&rsp->barrier_cpu_count); 3466 atomic_inc(&rsp->barrier_cpu_count);
3322 __call_rcu(&rdp->barrier_head, 3467 __call_rcu(&rdp->barrier_head,
3323 rcu_barrier_callback, rsp, cpu, 0); 3468 rcu_barrier_callback, rsp, cpu, 0);
@@ -3385,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3385 /* Set up local state, ensuring consistent view of global state. */ 3530 /* Set up local state, ensuring consistent view of global state. */
3386 raw_spin_lock_irqsave(&rnp->lock, flags); 3531 raw_spin_lock_irqsave(&rnp->lock, flags);
3387 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 3532 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3388 init_callback_list(rdp);
3389 rdp->qlen_lazy = 0;
3390 ACCESS_ONCE(rdp->qlen) = 0;
3391 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3533 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3392 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 3534 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
3393 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 3535 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -3444,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3444 rdp->gpnum = rnp->completed; 3586 rdp->gpnum = rnp->completed;
3445 rdp->completed = rnp->completed; 3587 rdp->completed = rnp->completed;
3446 rdp->passed_quiesce = 0; 3588 rdp->passed_quiesce = 0;
3589 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
3447 rdp->qs_pending = 0; 3590 rdp->qs_pending = 0;
3448 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3591 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3449 } 3592 }
@@ -3535,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self,
3535static int __init rcu_spawn_gp_kthread(void) 3678static int __init rcu_spawn_gp_kthread(void)
3536{ 3679{
3537 unsigned long flags; 3680 unsigned long flags;
3681 int kthread_prio_in = kthread_prio;
3538 struct rcu_node *rnp; 3682 struct rcu_node *rnp;
3539 struct rcu_state *rsp; 3683 struct rcu_state *rsp;
3684 struct sched_param sp;
3540 struct task_struct *t; 3685 struct task_struct *t;
3541 3686
3687 /* Force priority into range. */
3688 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
3689 kthread_prio = 1;
3690 else if (kthread_prio < 0)
3691 kthread_prio = 0;
3692 else if (kthread_prio > 99)
3693 kthread_prio = 99;
3694 if (kthread_prio != kthread_prio_in)
3695 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
3696 kthread_prio, kthread_prio_in);
3697
3542 rcu_scheduler_fully_active = 1; 3698 rcu_scheduler_fully_active = 1;
3543 for_each_rcu_flavor(rsp) { 3699 for_each_rcu_flavor(rsp) {
3544 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); 3700 t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
3545 BUG_ON(IS_ERR(t)); 3701 BUG_ON(IS_ERR(t));
3546 rnp = rcu_get_root(rsp); 3702 rnp = rcu_get_root(rsp);
3547 raw_spin_lock_irqsave(&rnp->lock, flags); 3703 raw_spin_lock_irqsave(&rnp->lock, flags);
3548 rsp->gp_kthread = t; 3704 rsp->gp_kthread = t;
3705 if (kthread_prio) {
3706 sp.sched_priority = kthread_prio;
3707 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3708 }
3709 wake_up_process(t);
3549 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3710 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3550 } 3711 }
3551 rcu_spawn_nocb_kthreads(); 3712 rcu_spawn_nocb_kthreads();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e7b1843896e..119de399eb2f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,7 +27,6 @@
27#include <linux/threads.h> 27#include <linux/threads.h>
28#include <linux/cpumask.h> 28#include <linux/cpumask.h>
29#include <linux/seqlock.h> 29#include <linux/seqlock.h>
30#include <linux/irq_work.h>
31 30
32/* 31/*
33 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and 32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -172,11 +171,6 @@ struct rcu_node {
172 /* queued on this rcu_node structure that */ 171 /* queued on this rcu_node structure that */
173 /* are blocking the current grace period, */ 172 /* are blocking the current grace period, */
174 /* there can be no such task. */ 173 /* there can be no such task. */
175 struct completion boost_completion;
176 /* Used to ensure that the rt_mutex used */
177 /* to carry out the boosting is fully */
178 /* released with no future boostee accesses */
179 /* before that rt_mutex is re-initialized. */
180 struct rt_mutex boost_mtx; 174 struct rt_mutex boost_mtx;
181 /* Used only for the priority-boosting */ 175 /* Used only for the priority-boosting */
182 /* side effect, not as a lock. */ 176 /* side effect, not as a lock. */
@@ -257,9 +251,12 @@ struct rcu_data {
257 /* in order to detect GP end. */ 251 /* in order to detect GP end. */
258 unsigned long gpnum; /* Highest gp number that this CPU */ 252 unsigned long gpnum; /* Highest gp number that this CPU */
259 /* is aware of having started. */ 253 /* is aware of having started. */
254 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
255 /* for rcu_all_qs() invocations. */
260 bool passed_quiesce; /* User-mode/idle loop etc. */ 256 bool passed_quiesce; /* User-mode/idle loop etc. */
261 bool qs_pending; /* Core waits for quiesc state. */ 257 bool qs_pending; /* Core waits for quiesc state. */
262 bool beenonline; /* CPU online at least once. */ 258 bool beenonline; /* CPU online at least once. */
259 bool gpwrap; /* Possible gpnum/completed wrap. */
263 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 260 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
264 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 261 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
265#ifdef CONFIG_RCU_CPU_STALL_INFO 262#ifdef CONFIG_RCU_CPU_STALL_INFO
@@ -340,14 +337,10 @@ struct rcu_data {
340#ifdef CONFIG_RCU_NOCB_CPU 337#ifdef CONFIG_RCU_NOCB_CPU
341 struct rcu_head *nocb_head; /* CBs waiting for kthread. */ 338 struct rcu_head *nocb_head; /* CBs waiting for kthread. */
342 struct rcu_head **nocb_tail; 339 struct rcu_head **nocb_tail;
343 atomic_long_t nocb_q_count; /* # CBs waiting for kthread */ 340 atomic_long_t nocb_q_count; /* # CBs waiting for nocb */
344 atomic_long_t nocb_q_count_lazy; /* (approximate). */ 341 atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
345 struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ 342 struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
346 struct rcu_head **nocb_follower_tail; 343 struct rcu_head **nocb_follower_tail;
347 atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */
348 atomic_long_t nocb_follower_count_lazy; /* (approximate). */
349 int nocb_p_count; /* # CBs being invoked by kthread */
350 int nocb_p_count_lazy; /* (approximate). */
351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 344 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
352 struct task_struct *nocb_kthread; 345 struct task_struct *nocb_kthread;
353 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ 346 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
@@ -356,8 +349,6 @@ struct rcu_data {
356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; 349 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
357 /* CBs waiting for GP. */ 350 /* CBs waiting for GP. */
358 struct rcu_head **nocb_gp_tail; 351 struct rcu_head **nocb_gp_tail;
359 long nocb_gp_count;
360 long nocb_gp_count_lazy;
361 bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ 352 bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */
362 struct rcu_data *nocb_next_follower; 353 struct rcu_data *nocb_next_follower;
363 /* Next follower in wakeup chain. */ 354 /* Next follower in wakeup chain. */
@@ -488,10 +479,14 @@ struct rcu_state {
488 /* due to no GP active. */ 479 /* due to no GP active. */
489 unsigned long gp_start; /* Time at which GP started, */ 480 unsigned long gp_start; /* Time at which GP started, */
490 /* but in jiffies. */ 481 /* but in jiffies. */
482 unsigned long gp_activity; /* Time of last GP kthread */
483 /* activity in jiffies. */
491 unsigned long jiffies_stall; /* Time at which to check */ 484 unsigned long jiffies_stall; /* Time at which to check */
492 /* for CPU stalls. */ 485 /* for CPU stalls. */
493 unsigned long jiffies_resched; /* Time at which to resched */ 486 unsigned long jiffies_resched; /* Time at which to resched */
494 /* a reluctant CPU. */ 487 /* a reluctant CPU. */
488 unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */
489 /* GP start. */
495 unsigned long gp_max; /* Maximum GP duration in */ 490 unsigned long gp_max; /* Maximum GP duration in */
496 /* jiffies. */ 491 /* jiffies. */
497 const char *name; /* Name of structure. */ 492 const char *name; /* Name of structure. */
@@ -514,13 +509,6 @@ extern struct list_head rcu_struct_flavors;
514#define for_each_rcu_flavor(rsp) \ 509#define for_each_rcu_flavor(rsp) \
515 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 510 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
516 511
517/* Return values for rcu_preempt_offline_tasks(). */
518
519#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
520 /* GP were moved to root. */
521#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
522 /* GP were moved to root. */
523
524/* 512/*
525 * RCU implementation internal declarations: 513 * RCU implementation internal declarations:
526 */ 514 */
@@ -546,27 +534,16 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
546 534
547/* Forward declarations for rcutree_plugin.h */ 535/* Forward declarations for rcutree_plugin.h */
548static void rcu_bootup_announce(void); 536static void rcu_bootup_announce(void);
549long rcu_batches_completed(void);
550static void rcu_preempt_note_context_switch(void); 537static void rcu_preempt_note_context_switch(void);
551static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 538static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
552#ifdef CONFIG_HOTPLUG_CPU 539#ifdef CONFIG_HOTPLUG_CPU
553static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 540static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
554 unsigned long flags);
555#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 541#endif /* #ifdef CONFIG_HOTPLUG_CPU */
556static void rcu_print_detail_task_stall(struct rcu_state *rsp); 542static void rcu_print_detail_task_stall(struct rcu_state *rsp);
557static int rcu_print_task_stall(struct rcu_node *rnp); 543static int rcu_print_task_stall(struct rcu_node *rnp);
558static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 544static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
559#ifdef CONFIG_HOTPLUG_CPU
560static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
561 struct rcu_node *rnp,
562 struct rcu_data *rdp);
563#endif /* #ifdef CONFIG_HOTPLUG_CPU */
564static void rcu_preempt_check_callbacks(void); 545static void rcu_preempt_check_callbacks(void);
565void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 546void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
566#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU)
567static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
568 bool wake);
569#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */
570static void __init __rcu_init_preempt(void); 547static void __init __rcu_init_preempt(void);
571static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 548static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
572static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 549static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
@@ -622,24 +599,15 @@ static void rcu_dynticks_task_exit(void);
622#endif /* #ifndef RCU_TREE_NONCORE */ 599#endif /* #ifndef RCU_TREE_NONCORE */
623 600
624#ifdef CONFIG_RCU_TRACE 601#ifdef CONFIG_RCU_TRACE
625#ifdef CONFIG_RCU_NOCB_CPU 602/* Read out queue lengths for tracing. */
626/* Sum up queue lengths for tracing. */
627static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) 603static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
628{ 604{
629 *ql = atomic_long_read(&rdp->nocb_q_count) + 605#ifdef CONFIG_RCU_NOCB_CPU
630 rdp->nocb_p_count + 606 *ql = atomic_long_read(&rdp->nocb_q_count);
631 atomic_long_read(&rdp->nocb_follower_count) + 607 *qll = atomic_long_read(&rdp->nocb_q_count_lazy);
632 rdp->nocb_p_count + rdp->nocb_gp_count;
633 *qll = atomic_long_read(&rdp->nocb_q_count_lazy) +
634 rdp->nocb_p_count_lazy +
635 atomic_long_read(&rdp->nocb_follower_count_lazy) +
636 rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy;
637}
638#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 608#else /* #ifdef CONFIG_RCU_NOCB_CPU */
639static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
640{
641 *ql = 0; 609 *ql = 0;
642 *qll = 0; 610 *qll = 0;
643}
644#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 611#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
612}
645#endif /* #ifdef CONFIG_RCU_TRACE */ 613#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3ec85cb5d544..0d7bbe3095ad 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -34,10 +34,6 @@
34 34
35#include "../locking/rtmutex_common.h" 35#include "../locking/rtmutex_common.h"
36 36
37/* rcuc/rcub kthread realtime priority */
38static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
39module_param(kthread_prio, int, 0644);
40
41/* 37/*
42 * Control variables for per-CPU and per-rcu_node kthreads. These 38 * Control variables for per-CPU and per-rcu_node kthreads. These
43 * handle all flavors of RCU. 39 * handle all flavors of RCU.
@@ -53,7 +49,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
53static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 49static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
54static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ 50static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
55static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ 51static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
56static char __initdata nocb_buf[NR_CPUS * 5];
57#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 52#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
58 53
59/* 54/*
@@ -103,6 +98,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
103static struct rcu_state *rcu_state_p = &rcu_preempt_state; 98static struct rcu_state *rcu_state_p = &rcu_preempt_state;
104 99
105static int rcu_preempted_readers_exp(struct rcu_node *rnp); 100static int rcu_preempted_readers_exp(struct rcu_node *rnp);
101static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
102 bool wake);
106 103
107/* 104/*
108 * Tell them what RCU they are running. 105 * Tell them what RCU they are running.
@@ -114,25 +111,6 @@ static void __init rcu_bootup_announce(void)
114} 111}
115 112
116/* 113/*
117 * Return the number of RCU-preempt batches processed thus far
118 * for debug and statistics.
119 */
120static long rcu_batches_completed_preempt(void)
121{
122 return rcu_preempt_state.completed;
123}
124EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
125
126/*
127 * Return the number of RCU batches processed thus far for debug & stats.
128 */
129long rcu_batches_completed(void)
130{
131 return rcu_batches_completed_preempt();
132}
133EXPORT_SYMBOL_GPL(rcu_batches_completed);
134
135/*
136 * Record a preemptible-RCU quiescent state for the specified CPU. Note 114 * Record a preemptible-RCU quiescent state for the specified CPU. Note
137 * that this just means that the task currently running on the CPU is 115 * that this just means that the task currently running on the CPU is
138 * not in a quiescent state. There might be any number of tasks blocked 116 * not in a quiescent state. There might be any number of tasks blocked
@@ -307,15 +285,25 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
307} 285}
308 286
309/* 287/*
288 * Return true if the specified rcu_node structure has tasks that were
289 * preempted within an RCU read-side critical section.
290 */
291static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
292{
293 return !list_empty(&rnp->blkd_tasks);
294}
295
296/*
310 * Handle special cases during rcu_read_unlock(), such as needing to 297 * Handle special cases during rcu_read_unlock(), such as needing to
311 * notify RCU core processing or task having blocked during the RCU 298 * notify RCU core processing or task having blocked during the RCU
312 * read-side critical section. 299 * read-side critical section.
313 */ 300 */
314void rcu_read_unlock_special(struct task_struct *t) 301void rcu_read_unlock_special(struct task_struct *t)
315{ 302{
316 int empty; 303 bool empty;
317 int empty_exp; 304 bool empty_exp;
318 int empty_exp_now; 305 bool empty_norm;
306 bool empty_exp_now;
319 unsigned long flags; 307 unsigned long flags;
320 struct list_head *np; 308 struct list_head *np;
321#ifdef CONFIG_RCU_BOOST 309#ifdef CONFIG_RCU_BOOST
@@ -367,7 +355,8 @@ void rcu_read_unlock_special(struct task_struct *t)
367 break; 355 break;
368 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 356 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
369 } 357 }
370 empty = !rcu_preempt_blocked_readers_cgp(rnp); 358 empty = !rcu_preempt_has_tasks(rnp);
359 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
371 empty_exp = !rcu_preempted_readers_exp(rnp); 360 empty_exp = !rcu_preempted_readers_exp(rnp);
372 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 361 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
373 np = rcu_next_node_entry(t, rnp); 362 np = rcu_next_node_entry(t, rnp);
@@ -387,13 +376,21 @@ void rcu_read_unlock_special(struct task_struct *t)
387#endif /* #ifdef CONFIG_RCU_BOOST */ 376#endif /* #ifdef CONFIG_RCU_BOOST */
388 377
389 /* 378 /*
379 * If this was the last task on the list, go see if we
380 * need to propagate ->qsmaskinit bit clearing up the
381 * rcu_node tree.
382 */
383 if (!empty && !rcu_preempt_has_tasks(rnp))
384 rcu_cleanup_dead_rnp(rnp);
385
386 /*
390 * If this was the last task on the current list, and if 387 * If this was the last task on the current list, and if
391 * we aren't waiting on any CPUs, report the quiescent state. 388 * we aren't waiting on any CPUs, report the quiescent state.
392 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 389 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
393 * so we must take a snapshot of the expedited state. 390 * so we must take a snapshot of the expedited state.
394 */ 391 */
395 empty_exp_now = !rcu_preempted_readers_exp(rnp); 392 empty_exp_now = !rcu_preempted_readers_exp(rnp);
396 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 393 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
397 trace_rcu_quiescent_state_report(TPS("preempt_rcu"), 394 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
398 rnp->gpnum, 395 rnp->gpnum,
399 0, rnp->qsmask, 396 0, rnp->qsmask,
@@ -408,10 +405,8 @@ void rcu_read_unlock_special(struct task_struct *t)
408 405
409#ifdef CONFIG_RCU_BOOST 406#ifdef CONFIG_RCU_BOOST
410 /* Unboost if we were boosted. */ 407 /* Unboost if we were boosted. */
411 if (drop_boost_mutex) { 408 if (drop_boost_mutex)
412 rt_mutex_unlock(&rnp->boost_mtx); 409 rt_mutex_unlock(&rnp->boost_mtx);
413 complete(&rnp->boost_completion);
414 }
415#endif /* #ifdef CONFIG_RCU_BOOST */ 410#endif /* #ifdef CONFIG_RCU_BOOST */
416 411
417 /* 412 /*
@@ -519,99 +514,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
519static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 514static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
520{ 515{
521 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 516 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
522 if (!list_empty(&rnp->blkd_tasks)) 517 if (rcu_preempt_has_tasks(rnp))
523 rnp->gp_tasks = rnp->blkd_tasks.next; 518 rnp->gp_tasks = rnp->blkd_tasks.next;
524 WARN_ON_ONCE(rnp->qsmask); 519 WARN_ON_ONCE(rnp->qsmask);
525} 520}
526 521
527#ifdef CONFIG_HOTPLUG_CPU 522#ifdef CONFIG_HOTPLUG_CPU
528 523
529/*
530 * Handle tasklist migration for case in which all CPUs covered by the
531 * specified rcu_node have gone offline. Move them up to the root
532 * rcu_node. The reason for not just moving them to the immediate
533 * parent is to remove the need for rcu_read_unlock_special() to
534 * make more than two attempts to acquire the target rcu_node's lock.
535 * Returns true if there were tasks blocking the current RCU grace
536 * period.
537 *
538 * Returns 1 if there was previously a task blocking the current grace
539 * period on the specified rcu_node structure.
540 *
541 * The caller must hold rnp->lock with irqs disabled.
542 */
543static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
544 struct rcu_node *rnp,
545 struct rcu_data *rdp)
546{
547 struct list_head *lp;
548 struct list_head *lp_root;
549 int retval = 0;
550 struct rcu_node *rnp_root = rcu_get_root(rsp);
551 struct task_struct *t;
552
553 if (rnp == rnp_root) {
554 WARN_ONCE(1, "Last CPU thought to be offlined?");
555 return 0; /* Shouldn't happen: at least one CPU online. */
556 }
557
558 /* If we are on an internal node, complain bitterly. */
559 WARN_ON_ONCE(rnp != rdp->mynode);
560
561 /*
562 * Move tasks up to root rcu_node. Don't try to get fancy for
563 * this corner-case operation -- just put this node's tasks
564 * at the head of the root node's list, and update the root node's
565 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
566 * if non-NULL. This might result in waiting for more tasks than
567 * absolutely necessary, but this is a good performance/complexity
568 * tradeoff.
569 */
570 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
571 retval |= RCU_OFL_TASKS_NORM_GP;
572 if (rcu_preempted_readers_exp(rnp))
573 retval |= RCU_OFL_TASKS_EXP_GP;
574 lp = &rnp->blkd_tasks;
575 lp_root = &rnp_root->blkd_tasks;
576 while (!list_empty(lp)) {
577 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
578 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
579 smp_mb__after_unlock_lock();
580 list_del(&t->rcu_node_entry);
581 t->rcu_blocked_node = rnp_root;
582 list_add(&t->rcu_node_entry, lp_root);
583 if (&t->rcu_node_entry == rnp->gp_tasks)
584 rnp_root->gp_tasks = rnp->gp_tasks;
585 if (&t->rcu_node_entry == rnp->exp_tasks)
586 rnp_root->exp_tasks = rnp->exp_tasks;
587#ifdef CONFIG_RCU_BOOST
588 if (&t->rcu_node_entry == rnp->boost_tasks)
589 rnp_root->boost_tasks = rnp->boost_tasks;
590#endif /* #ifdef CONFIG_RCU_BOOST */
591 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
592 }
593
594 rnp->gp_tasks = NULL;
595 rnp->exp_tasks = NULL;
596#ifdef CONFIG_RCU_BOOST
597 rnp->boost_tasks = NULL;
598 /*
599 * In case root is being boosted and leaf was not. Make sure
600 * that we boost the tasks blocking the current grace period
601 * in this case.
602 */
603 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
604 smp_mb__after_unlock_lock();
605 if (rnp_root->boost_tasks != NULL &&
606 rnp_root->boost_tasks != rnp_root->gp_tasks &&
607 rnp_root->boost_tasks != rnp_root->exp_tasks)
608 rnp_root->boost_tasks = rnp_root->gp_tasks;
609 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
610#endif /* #ifdef CONFIG_RCU_BOOST */
611
612 return retval;
613}
614
615#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 524#endif /* #ifdef CONFIG_HOTPLUG_CPU */
616 525
617/* 526/*
@@ -771,7 +680,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
771 680
772 raw_spin_lock_irqsave(&rnp->lock, flags); 681 raw_spin_lock_irqsave(&rnp->lock, flags);
773 smp_mb__after_unlock_lock(); 682 smp_mb__after_unlock_lock();
774 if (list_empty(&rnp->blkd_tasks)) { 683 if (!rcu_preempt_has_tasks(rnp)) {
775 raw_spin_unlock_irqrestore(&rnp->lock, flags); 684 raw_spin_unlock_irqrestore(&rnp->lock, flags);
776 } else { 685 } else {
777 rnp->exp_tasks = rnp->blkd_tasks.next; 686 rnp->exp_tasks = rnp->blkd_tasks.next;
@@ -933,15 +842,6 @@ static void __init rcu_bootup_announce(void)
933} 842}
934 843
935/* 844/*
936 * Return the number of RCU batches processed thus far for debug & stats.
937 */
938long rcu_batches_completed(void)
939{
940 return rcu_batches_completed_sched();
941}
942EXPORT_SYMBOL_GPL(rcu_batches_completed);
943
944/*
945 * Because preemptible RCU does not exist, we never have to check for 845 * Because preemptible RCU does not exist, we never have to check for
946 * CPUs being in quiescent states. 846 * CPUs being in quiescent states.
947 */ 847 */
@@ -960,11 +860,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
960 860
961#ifdef CONFIG_HOTPLUG_CPU 861#ifdef CONFIG_HOTPLUG_CPU
962 862
963/* Because preemptible RCU does not exist, no quieting of tasks. */ 863/*
964static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 864 * Because there is no preemptible RCU, there can be no readers blocked.
965 __releases(rnp->lock) 865 */
866static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
966{ 867{
967 raw_spin_unlock_irqrestore(&rnp->lock, flags); 868 return false;
968} 869}
969 870
970#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 871#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -996,23 +897,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
996 WARN_ON_ONCE(rnp->qsmask); 897 WARN_ON_ONCE(rnp->qsmask);
997} 898}
998 899
999#ifdef CONFIG_HOTPLUG_CPU
1000
1001/*
1002 * Because preemptible RCU does not exist, it never needs to migrate
1003 * tasks that were blocked within RCU read-side critical sections, and
1004 * such non-existent tasks cannot possibly have been blocking the current
1005 * grace period.
1006 */
1007static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1008 struct rcu_node *rnp,
1009 struct rcu_data *rdp)
1010{
1011 return 0;
1012}
1013
1014#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1015
1016/* 900/*
1017 * Because preemptible RCU does not exist, it never has any callbacks 901 * Because preemptible RCU does not exist, it never has any callbacks
1018 * to check. 902 * to check.
@@ -1031,20 +915,6 @@ void synchronize_rcu_expedited(void)
1031} 915}
1032EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 916EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1033 917
1034#ifdef CONFIG_HOTPLUG_CPU
1035
1036/*
1037 * Because preemptible RCU does not exist, there is never any need to
1038 * report on tasks preempted in RCU read-side critical sections during
1039 * expedited RCU grace periods.
1040 */
1041static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1042 bool wake)
1043{
1044}
1045
1046#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1047
1048/* 918/*
1049 * Because preemptible RCU does not exist, rcu_barrier() is just 919 * Because preemptible RCU does not exist, rcu_barrier() is just
1050 * another name for rcu_barrier_sched(). 920 * another name for rcu_barrier_sched().
@@ -1080,7 +950,7 @@ void exit_rcu(void)
1080 950
1081static void rcu_initiate_boost_trace(struct rcu_node *rnp) 951static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1082{ 952{
1083 if (list_empty(&rnp->blkd_tasks)) 953 if (!rcu_preempt_has_tasks(rnp))
1084 rnp->n_balk_blkd_tasks++; 954 rnp->n_balk_blkd_tasks++;
1085 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 955 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1086 rnp->n_balk_exp_gp_tasks++; 956 rnp->n_balk_exp_gp_tasks++;
@@ -1127,7 +997,8 @@ static int rcu_boost(struct rcu_node *rnp)
1127 struct task_struct *t; 997 struct task_struct *t;
1128 struct list_head *tb; 998 struct list_head *tb;
1129 999
1130 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1000 if (ACCESS_ONCE(rnp->exp_tasks) == NULL &&
1001 ACCESS_ONCE(rnp->boost_tasks) == NULL)
1131 return 0; /* Nothing left to boost. */ 1002 return 0; /* Nothing left to boost. */
1132 1003
1133 raw_spin_lock_irqsave(&rnp->lock, flags); 1004 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1175,15 +1046,11 @@ static int rcu_boost(struct rcu_node *rnp)
1175 */ 1046 */
1176 t = container_of(tb, struct task_struct, rcu_node_entry); 1047 t = container_of(tb, struct task_struct, rcu_node_entry);
1177 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); 1048 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
1178 init_completion(&rnp->boost_completion);
1179 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1049 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1180 /* Lock only for side effect: boosts task t's priority. */ 1050 /* Lock only for side effect: boosts task t's priority. */
1181 rt_mutex_lock(&rnp->boost_mtx); 1051 rt_mutex_lock(&rnp->boost_mtx);
1182 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ 1052 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
1183 1053
1184 /* Wait for boostee to be done w/boost_mtx before reinitializing. */
1185 wait_for_completion(&rnp->boost_completion);
1186
1187 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1054 return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
1188 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1055 ACCESS_ONCE(rnp->boost_tasks) != NULL;
1189} 1056}
@@ -1416,12 +1283,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1416 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1283 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1417 if ((mask & 0x1) && cpu != outgoingcpu) 1284 if ((mask & 0x1) && cpu != outgoingcpu)
1418 cpumask_set_cpu(cpu, cm); 1285 cpumask_set_cpu(cpu, cm);
1419 if (cpumask_weight(cm) == 0) { 1286 if (cpumask_weight(cm) == 0)
1420 cpumask_setall(cm); 1287 cpumask_setall(cm);
1421 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1422 cpumask_clear_cpu(cpu, cm);
1423 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1424 }
1425 set_cpus_allowed_ptr(t, cm); 1288 set_cpus_allowed_ptr(t, cm);
1426 free_cpumask_var(cm); 1289 free_cpumask_var(cm);
1427} 1290}
@@ -1446,12 +1309,8 @@ static void __init rcu_spawn_boost_kthreads(void)
1446 for_each_possible_cpu(cpu) 1309 for_each_possible_cpu(cpu)
1447 per_cpu(rcu_cpu_has_work, cpu) = 0; 1310 per_cpu(rcu_cpu_has_work, cpu) = 0;
1448 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1311 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1449 rnp = rcu_get_root(rcu_state_p); 1312 rcu_for_each_leaf_node(rcu_state_p, rnp)
1450 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1313 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1451 if (NUM_RCU_NODES > 1) {
1452 rcu_for_each_leaf_node(rcu_state_p, rnp)
1453 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1454 }
1455} 1314}
1456 1315
1457static void rcu_prepare_kthreads(int cpu) 1316static void rcu_prepare_kthreads(int cpu)
@@ -1605,7 +1464,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1605 * completed since we last checked and there are 1464 * completed since we last checked and there are
1606 * callbacks not yet ready to invoke. 1465 * callbacks not yet ready to invoke.
1607 */ 1466 */
1608 if (rdp->completed != rnp->completed && 1467 if ((rdp->completed != rnp->completed ||
1468 unlikely(ACCESS_ONCE(rdp->gpwrap))) &&
1609 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1469 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1610 note_gp_changes(rsp, rdp); 1470 note_gp_changes(rsp, rdp);
1611 1471
@@ -1898,11 +1758,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1898 ticks_value = rsp->gpnum - rdp->gpnum; 1758 ticks_value = rsp->gpnum - rdp->gpnum;
1899 } 1759 }
1900 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1760 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1901 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", 1761 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
1902 cpu, ticks_value, ticks_title, 1762 cpu, ticks_value, ticks_title,
1903 atomic_read(&rdtp->dynticks) & 0xfff, 1763 atomic_read(&rdtp->dynticks) & 0xfff,
1904 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1764 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1905 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), 1765 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1766 ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
1906 fast_no_hz); 1767 fast_no_hz);
1907} 1768}
1908 1769
@@ -2056,9 +1917,26 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2056static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) 1917static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2057{ 1918{
2058 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1919 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1920 unsigned long ret;
1921#ifdef CONFIG_PROVE_RCU
2059 struct rcu_head *rhp; 1922 struct rcu_head *rhp;
1923#endif /* #ifdef CONFIG_PROVE_RCU */
2060 1924
2061 /* No-CBs CPUs might have callbacks on any of three lists. */ 1925 /*
1926 * Check count of all no-CBs callbacks awaiting invocation.
1927 * There needs to be a barrier before this function is called,
1928 * but associated with a prior determination that no more
1929 * callbacks would be posted. In the worst case, the first
1930 * barrier in _rcu_barrier() suffices (but the caller cannot
1931 * necessarily rely on this, not a substitute for the caller
1932 * getting the concurrency design right!). There must also be
1933 * a barrier between the following load an posting of a callback
1934 * (if a callback is in fact needed). This is associated with an
1935 * atomic_inc() in the caller.
1936 */
1937 ret = atomic_long_read(&rdp->nocb_q_count);
1938
1939#ifdef CONFIG_PROVE_RCU
2062 rhp = ACCESS_ONCE(rdp->nocb_head); 1940 rhp = ACCESS_ONCE(rdp->nocb_head);
2063 if (!rhp) 1941 if (!rhp)
2064 rhp = ACCESS_ONCE(rdp->nocb_gp_head); 1942 rhp = ACCESS_ONCE(rdp->nocb_gp_head);
@@ -2072,8 +1950,9 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2072 cpu, rhp->func); 1950 cpu, rhp->func);
2073 WARN_ON_ONCE(1); 1951 WARN_ON_ONCE(1);
2074 } 1952 }
1953#endif /* #ifdef CONFIG_PROVE_RCU */
2075 1954
2076 return !!rhp; 1955 return !!ret;
2077} 1956}
2078 1957
2079/* 1958/*
@@ -2095,9 +1974,10 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2095 struct task_struct *t; 1974 struct task_struct *t;
2096 1975
2097 /* Enqueue the callback on the nocb list and update counts. */ 1976 /* Enqueue the callback on the nocb list and update counts. */
1977 atomic_long_add(rhcount, &rdp->nocb_q_count);
1978 /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
2098 old_rhpp = xchg(&rdp->nocb_tail, rhtp); 1979 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
2099 ACCESS_ONCE(*old_rhpp) = rhp; 1980 ACCESS_ONCE(*old_rhpp) = rhp;
2100 atomic_long_add(rhcount, &rdp->nocb_q_count);
2101 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 1981 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2102 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ 1982 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
2103 1983
@@ -2288,9 +2168,6 @@ wait_again:
2288 /* Move callbacks to wait-for-GP list, which is empty. */ 2168 /* Move callbacks to wait-for-GP list, which is empty. */
2289 ACCESS_ONCE(rdp->nocb_head) = NULL; 2169 ACCESS_ONCE(rdp->nocb_head) = NULL;
2290 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); 2170 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2291 rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0);
2292 rdp->nocb_gp_count_lazy =
2293 atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2294 gotcbs = true; 2171 gotcbs = true;
2295 } 2172 }
2296 2173
@@ -2338,9 +2215,6 @@ wait_again:
2338 /* Append callbacks to follower's "done" list. */ 2215 /* Append callbacks to follower's "done" list. */
2339 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); 2216 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
2340 *tail = rdp->nocb_gp_head; 2217 *tail = rdp->nocb_gp_head;
2341 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
2342 atomic_long_add(rdp->nocb_gp_count_lazy,
2343 &rdp->nocb_follower_count_lazy);
2344 smp_mb__after_atomic(); /* Store *tail before wakeup. */ 2218 smp_mb__after_atomic(); /* Store *tail before wakeup. */
2345 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2219 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2346 /* 2220 /*
@@ -2415,13 +2289,11 @@ static int rcu_nocb_kthread(void *arg)
2415 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); 2289 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
2416 ACCESS_ONCE(rdp->nocb_follower_head) = NULL; 2290 ACCESS_ONCE(rdp->nocb_follower_head) = NULL;
2417 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); 2291 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
2418 c = atomic_long_xchg(&rdp->nocb_follower_count, 0);
2419 cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0);
2420 rdp->nocb_p_count += c;
2421 rdp->nocb_p_count_lazy += cl;
2422 2292
2423 /* Each pass through the following loop invokes a callback. */ 2293 /* Each pass through the following loop invokes a callback. */
2424 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2294 trace_rcu_batch_start(rdp->rsp->name,
2295 atomic_long_read(&rdp->nocb_q_count_lazy),
2296 atomic_long_read(&rdp->nocb_q_count), -1);
2425 c = cl = 0; 2297 c = cl = 0;
2426 while (list) { 2298 while (list) {
2427 next = list->next; 2299 next = list->next;
@@ -2443,9 +2315,9 @@ static int rcu_nocb_kthread(void *arg)
2443 list = next; 2315 list = next;
2444 } 2316 }
2445 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2317 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2446 ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; 2318 smp_mb__before_atomic(); /* _add after CB invocation. */
2447 ACCESS_ONCE(rdp->nocb_p_count_lazy) = 2319 atomic_long_add(-c, &rdp->nocb_q_count);
2448 rdp->nocb_p_count_lazy - cl; 2320 atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
2449 rdp->n_nocbs_invoked += c; 2321 rdp->n_nocbs_invoked += c;
2450 } 2322 }
2451 return 0; 2323 return 0;
@@ -2513,8 +2385,8 @@ void __init rcu_init_nohz(void)
2513 cpumask_and(rcu_nocb_mask, cpu_possible_mask, 2385 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2514 rcu_nocb_mask); 2386 rcu_nocb_mask);
2515 } 2387 }
2516 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 2388 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
2517 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); 2389 cpumask_pr_args(rcu_nocb_mask));
2518 if (rcu_nocb_poll) 2390 if (rcu_nocb_poll)
2519 pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); 2391 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2520 2392
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 5cdc62e1beeb..fbb6240509ea 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -46,6 +46,8 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "tree.h" 47#include "tree.h"
48 48
49DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
50
49static int r_open(struct inode *inode, struct file *file, 51static int r_open(struct inode *inode, struct file *file,
50 const struct seq_operations *op) 52 const struct seq_operations *op)
51{ 53{
@@ -115,11 +117,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
115 117
116 if (!rdp->beenonline) 118 if (!rdp->beenonline)
117 return; 119 return;
118 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", 120 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d",
119 rdp->cpu, 121 rdp->cpu,
120 cpu_is_offline(rdp->cpu) ? '!' : ' ', 122 cpu_is_offline(rdp->cpu) ? '!' : ' ',
121 ulong2long(rdp->completed), ulong2long(rdp->gpnum), 123 ulong2long(rdp->completed), ulong2long(rdp->gpnum),
122 rdp->passed_quiesce, rdp->qs_pending); 124 rdp->passed_quiesce,
125 rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
126 rdp->qs_pending);
123 seq_printf(m, " dt=%d/%llx/%d df=%lu", 127 seq_printf(m, " dt=%d/%llx/%d df=%lu",
124 atomic_read(&rdp->dynticks->dynticks), 128 atomic_read(&rdp->dynticks->dynticks),
125 rdp->dynticks->dynticks_nesting, 129 rdp->dynticks->dynticks_nesting,