aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c255
1 files changed, 153 insertions, 102 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 068de3a93606..32618b3fe4e6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -53,18 +53,38 @@
53#include <linux/delay.h> 53#include <linux/delay.h>
54#include <linux/stop_machine.h> 54#include <linux/stop_machine.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/ftrace_event.h>
57#include <linux/suspend.h>
56 58
57#include "rcutree.h" 59#include "rcutree.h"
58#include <trace/events/rcu.h> 60#include <trace/events/rcu.h>
59 61
60#include "rcu.h" 62#include "rcu.h"
61 63
64/*
65 * Strings used in tracepoints need to be exported via the
66 * tracing system such that tools like perf and trace-cmd can
67 * translate the string address pointers to actual text.
68 */
69#define TPS(x) tracepoint_string(x)
70
62/* Data structures. */ 71/* Data structures. */
63 72
64static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; 73static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
65static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; 74static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
66 75
67#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ 76/*
77 * In order to export the rcu_state name to the tracing tools, it
78 * needs to be added in the __tracepoint_string section.
79 * This requires defining a separate variable tp_<sname>_varname
80 * that points to the string being used, and this will allow
81 * the tracing userspace tools to be able to decipher the string
82 * address to the matching string.
83 */
84#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
85static char sname##_varname[] = #sname; \
86static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
87struct rcu_state sname##_state = { \
68 .level = { &sname##_state.node[0] }, \ 88 .level = { &sname##_state.node[0] }, \
69 .call = cr, \ 89 .call = cr, \
70 .fqs_state = RCU_GP_IDLE, \ 90 .fqs_state = RCU_GP_IDLE, \
@@ -75,16 +95,13 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
75 .orphan_donetail = &sname##_state.orphan_donelist, \ 95 .orphan_donetail = &sname##_state.orphan_donelist, \
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 96 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ 97 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
78 .name = #sname, \ 98 .name = sname##_varname, \
79 .abbr = sabbr, \ 99 .abbr = sabbr, \
80} 100}; \
81 101DEFINE_PER_CPU(struct rcu_data, sname##_data)
82struct rcu_state rcu_sched_state =
83 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
84DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
85 102
86struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 103RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
87DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 104RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
88 105
89static struct rcu_state *rcu_state; 106static struct rcu_state *rcu_state;
90LIST_HEAD(rcu_struct_flavors); 107LIST_HEAD(rcu_struct_flavors);
@@ -178,7 +195,7 @@ void rcu_sched_qs(int cpu)
178 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 195 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
179 196
180 if (rdp->passed_quiesce == 0) 197 if (rdp->passed_quiesce == 0)
181 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); 198 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
182 rdp->passed_quiesce = 1; 199 rdp->passed_quiesce = 1;
183} 200}
184 201
@@ -187,7 +204,7 @@ void rcu_bh_qs(int cpu)
187 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 204 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
188 205
189 if (rdp->passed_quiesce == 0) 206 if (rdp->passed_quiesce == 0)
190 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); 207 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
191 rdp->passed_quiesce = 1; 208 rdp->passed_quiesce = 1;
192} 209}
193 210
@@ -198,16 +215,20 @@ void rcu_bh_qs(int cpu)
198 */ 215 */
199void rcu_note_context_switch(int cpu) 216void rcu_note_context_switch(int cpu)
200{ 217{
201 trace_rcu_utilization("Start context switch"); 218 trace_rcu_utilization(TPS("Start context switch"));
202 rcu_sched_qs(cpu); 219 rcu_sched_qs(cpu);
203 rcu_preempt_note_context_switch(cpu); 220 rcu_preempt_note_context_switch(cpu);
204 trace_rcu_utilization("End context switch"); 221 trace_rcu_utilization(TPS("End context switch"));
205} 222}
206EXPORT_SYMBOL_GPL(rcu_note_context_switch); 223EXPORT_SYMBOL_GPL(rcu_note_context_switch);
207 224
208DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 225DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
209 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 226 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
210 .dynticks = ATOMIC_INIT(1), 227 .dynticks = ATOMIC_INIT(1),
228#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
229 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
230 .dynticks_idle = ATOMIC_INIT(1),
231#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
211}; 232};
212 233
213static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 234static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
@@ -226,7 +247,10 @@ module_param(jiffies_till_next_fqs, ulong, 0644);
226 247
227static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 248static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
228 struct rcu_data *rdp); 249 struct rcu_data *rdp);
229static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); 250static void force_qs_rnp(struct rcu_state *rsp,
251 int (*f)(struct rcu_data *rsp, bool *isidle,
252 unsigned long *maxj),
253 bool *isidle, unsigned long *maxj);
230static void force_quiescent_state(struct rcu_state *rsp); 254static void force_quiescent_state(struct rcu_state *rsp);
231static int rcu_pending(int cpu); 255static int rcu_pending(int cpu);
232 256
@@ -345,11 +369,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
345static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
346 bool user) 370 bool user)
347{ 371{
348 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); 372 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
349 if (!user && !is_idle_task(current)) { 373 if (!user && !is_idle_task(current)) {
350 struct task_struct *idle = idle_task(smp_processor_id()); 374 struct task_struct *idle = idle_task(smp_processor_id());
351 375
352 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 376 trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
353 ftrace_dump(DUMP_ORIG); 377 ftrace_dump(DUMP_ORIG);
354 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 378 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
355 current->pid, current->comm, 379 current->pid, current->comm,
@@ -411,6 +435,7 @@ void rcu_idle_enter(void)
411 435
412 local_irq_save(flags); 436 local_irq_save(flags);
413 rcu_eqs_enter(false); 437 rcu_eqs_enter(false);
438 rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
414 local_irq_restore(flags); 439 local_irq_restore(flags);
415} 440}
416EXPORT_SYMBOL_GPL(rcu_idle_enter); 441EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -428,27 +453,6 @@ void rcu_user_enter(void)
428{ 453{
429 rcu_eqs_enter(1); 454 rcu_eqs_enter(1);
430} 455}
431
432/**
433 * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
434 * after the current irq returns.
435 *
436 * This is similar to rcu_user_enter() but in the context of a non-nesting
437 * irq. After this call, RCU enters into idle mode when the interrupt
438 * returns.
439 */
440void rcu_user_enter_after_irq(void)
441{
442 unsigned long flags;
443 struct rcu_dynticks *rdtp;
444
445 local_irq_save(flags);
446 rdtp = &__get_cpu_var(rcu_dynticks);
447 /* Ensure this irq is interrupting a non-idle RCU state. */
448 WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
449 rdtp->dynticks_nesting = 1;
450 local_irq_restore(flags);
451}
452#endif /* CONFIG_RCU_USER_QS */ 456#endif /* CONFIG_RCU_USER_QS */
453 457
454/** 458/**
@@ -479,9 +483,10 @@ void rcu_irq_exit(void)
479 rdtp->dynticks_nesting--; 483 rdtp->dynticks_nesting--;
480 WARN_ON_ONCE(rdtp->dynticks_nesting < 0); 484 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
481 if (rdtp->dynticks_nesting) 485 if (rdtp->dynticks_nesting)
482 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); 486 trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
483 else 487 else
484 rcu_eqs_enter_common(rdtp, oldval, true); 488 rcu_eqs_enter_common(rdtp, oldval, true);
489 rcu_sysidle_enter(rdtp, 1);
485 local_irq_restore(flags); 490 local_irq_restore(flags);
486} 491}
487 492
@@ -501,11 +506,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
501 smp_mb__after_atomic_inc(); /* See above. */ 506 smp_mb__after_atomic_inc(); /* See above. */
502 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 507 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
503 rcu_cleanup_after_idle(smp_processor_id()); 508 rcu_cleanup_after_idle(smp_processor_id());
504 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); 509 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
505 if (!user && !is_idle_task(current)) { 510 if (!user && !is_idle_task(current)) {
506 struct task_struct *idle = idle_task(smp_processor_id()); 511 struct task_struct *idle = idle_task(smp_processor_id());
507 512
508 trace_rcu_dyntick("Error on exit: not idle task", 513 trace_rcu_dyntick(TPS("Error on exit: not idle task"),
509 oldval, rdtp->dynticks_nesting); 514 oldval, rdtp->dynticks_nesting);
510 ftrace_dump(DUMP_ORIG); 515 ftrace_dump(DUMP_ORIG);
511 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 516 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -550,6 +555,7 @@ void rcu_idle_exit(void)
550 555
551 local_irq_save(flags); 556 local_irq_save(flags);
552 rcu_eqs_exit(false); 557 rcu_eqs_exit(false);
558 rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
553 local_irq_restore(flags); 559 local_irq_restore(flags);
554} 560}
555EXPORT_SYMBOL_GPL(rcu_idle_exit); 561EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -565,28 +571,6 @@ void rcu_user_exit(void)
565{ 571{
566 rcu_eqs_exit(1); 572 rcu_eqs_exit(1);
567} 573}
568
569/**
570 * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
571 * idle mode after the current non-nesting irq returns.
572 *
573 * This is similar to rcu_user_exit() but in the context of an irq.
574 * This is called when the irq has interrupted a userspace RCU idle mode
575 * context. When the current non-nesting interrupt returns after this call,
576 * the CPU won't restore the RCU idle mode.
577 */
578void rcu_user_exit_after_irq(void)
579{
580 unsigned long flags;
581 struct rcu_dynticks *rdtp;
582
583 local_irq_save(flags);
584 rdtp = &__get_cpu_var(rcu_dynticks);
585 /* Ensure we are interrupting an RCU idle mode. */
586 WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
587 rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
588 local_irq_restore(flags);
589}
590#endif /* CONFIG_RCU_USER_QS */ 574#endif /* CONFIG_RCU_USER_QS */
591 575
592/** 576/**
@@ -620,9 +604,10 @@ void rcu_irq_enter(void)
620 rdtp->dynticks_nesting++; 604 rdtp->dynticks_nesting++;
621 WARN_ON_ONCE(rdtp->dynticks_nesting == 0); 605 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
622 if (oldval) 606 if (oldval)
623 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); 607 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
624 else 608 else
625 rcu_eqs_exit_common(rdtp, oldval, true); 609 rcu_eqs_exit_common(rdtp, oldval, true);
610 rcu_sysidle_exit(rdtp, 1);
626 local_irq_restore(flags); 611 local_irq_restore(flags);
627} 612}
628 613
@@ -746,9 +731,11 @@ static int rcu_is_cpu_rrupt_from_idle(void)
746 * credit them with an implicit quiescent state. Return 1 if this CPU 731 * credit them with an implicit quiescent state. Return 1 if this CPU
747 * is in dynticks idle mode, which is an extended quiescent state. 732 * is in dynticks idle mode, which is an extended quiescent state.
748 */ 733 */
749static int dyntick_save_progress_counter(struct rcu_data *rdp) 734static int dyntick_save_progress_counter(struct rcu_data *rdp,
735 bool *isidle, unsigned long *maxj)
750{ 736{
751 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 737 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
738 rcu_sysidle_check_cpu(rdp, isidle, maxj);
752 return (rdp->dynticks_snap & 0x1) == 0; 739 return (rdp->dynticks_snap & 0x1) == 0;
753} 740}
754 741
@@ -758,7 +745,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
758 * idle state since the last call to dyntick_save_progress_counter() 745 * idle state since the last call to dyntick_save_progress_counter()
759 * for this same CPU, or by virtue of having been offline. 746 * for this same CPU, or by virtue of having been offline.
760 */ 747 */
761static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 748static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
749 bool *isidle, unsigned long *maxj)
762{ 750{
763 unsigned int curr; 751 unsigned int curr;
764 unsigned int snap; 752 unsigned int snap;
@@ -775,7 +763,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
775 * of the current RCU grace period. 763 * of the current RCU grace period.
776 */ 764 */
777 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { 765 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
778 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); 766 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
779 rdp->dynticks_fqs++; 767 rdp->dynticks_fqs++;
780 return 1; 768 return 1;
781 } 769 }
@@ -795,7 +783,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
795 return 0; /* Grace period is not old enough. */ 783 return 0; /* Grace period is not old enough. */
796 barrier(); 784 barrier();
797 if (cpu_is_offline(rdp->cpu)) { 785 if (cpu_is_offline(rdp->cpu)) {
798 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 786 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
799 rdp->offline_fqs++; 787 rdp->offline_fqs++;
800 return 1; 788 return 1;
801 } 789 }
@@ -1032,7 +1020,7 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1032 * rcu_nocb_wait_gp(). 1020 * rcu_nocb_wait_gp().
1033 */ 1021 */
1034static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1022static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1035 unsigned long c, char *s) 1023 unsigned long c, const char *s)
1036{ 1024{
1037 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, 1025 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1038 rnp->completed, c, rnp->level, 1026 rnp->completed, c, rnp->level,
@@ -1058,9 +1046,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1058 * grace period is already marked as needed, return to the caller. 1046 * grace period is already marked as needed, return to the caller.
1059 */ 1047 */
1060 c = rcu_cbs_completed(rdp->rsp, rnp); 1048 c = rcu_cbs_completed(rdp->rsp, rnp);
1061 trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); 1049 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1062 if (rnp->need_future_gp[c & 0x1]) { 1050 if (rnp->need_future_gp[c & 0x1]) {
1063 trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); 1051 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1064 return c; 1052 return c;
1065 } 1053 }
1066 1054
@@ -1074,7 +1062,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1074 if (rnp->gpnum != rnp->completed || 1062 if (rnp->gpnum != rnp->completed ||
1075 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { 1063 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1076 rnp->need_future_gp[c & 0x1]++; 1064 rnp->need_future_gp[c & 0x1]++;
1077 trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); 1065 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1078 return c; 1066 return c;
1079 } 1067 }
1080 1068
@@ -1102,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1102 * recorded, trace and leave. 1090 * recorded, trace and leave.
1103 */ 1091 */
1104 if (rnp_root->need_future_gp[c & 0x1]) { 1092 if (rnp_root->need_future_gp[c & 0x1]) {
1105 trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); 1093 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
1106 goto unlock_out; 1094 goto unlock_out;
1107 } 1095 }
1108 1096
@@ -1111,9 +1099,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1111 1099
1112 /* If a grace period is not already in progress, start one. */ 1100 /* If a grace period is not already in progress, start one. */
1113 if (rnp_root->gpnum != rnp_root->completed) { 1101 if (rnp_root->gpnum != rnp_root->completed) {
1114 trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); 1102 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1115 } else { 1103 } else {
1116 trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); 1104 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1117 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); 1105 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1118 } 1106 }
1119unlock_out: 1107unlock_out:
@@ -1137,7 +1125,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1137 rcu_nocb_gp_cleanup(rsp, rnp); 1125 rcu_nocb_gp_cleanup(rsp, rnp);
1138 rnp->need_future_gp[c & 0x1] = 0; 1126 rnp->need_future_gp[c & 0x1] = 0;
1139 needmore = rnp->need_future_gp[(c + 1) & 0x1]; 1127 needmore = rnp->need_future_gp[(c + 1) & 0x1];
1140 trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); 1128 trace_rcu_future_gp(rnp, rdp, c,
1129 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1141 return needmore; 1130 return needmore;
1142} 1131}
1143 1132
@@ -1205,9 +1194,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1205 1194
1206 /* Trace depending on how much we were able to accelerate. */ 1195 /* Trace depending on how much we were able to accelerate. */
1207 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1196 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1208 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB"); 1197 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1209 else 1198 else
1210 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB"); 1199 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1211} 1200}
1212 1201
1213/* 1202/*
@@ -1273,7 +1262,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
1273 1262
1274 /* Remember that we saw this grace-period completion. */ 1263 /* Remember that we saw this grace-period completion. */
1275 rdp->completed = rnp->completed; 1264 rdp->completed = rnp->completed;
1276 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); 1265 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1277 } 1266 }
1278 1267
1279 if (rdp->gpnum != rnp->gpnum) { 1268 if (rdp->gpnum != rnp->gpnum) {
@@ -1283,7 +1272,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
1283 * go looking for one. 1272 * go looking for one.
1284 */ 1273 */
1285 rdp->gpnum = rnp->gpnum; 1274 rdp->gpnum = rnp->gpnum;
1286 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); 1275 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1287 rdp->passed_quiesce = 0; 1276 rdp->passed_quiesce = 0;
1288 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1277 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1289 zero_cpu_stall_ticks(rdp); 1278 zero_cpu_stall_ticks(rdp);
@@ -1315,6 +1304,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1315 struct rcu_data *rdp; 1304 struct rcu_data *rdp;
1316 struct rcu_node *rnp = rcu_get_root(rsp); 1305 struct rcu_node *rnp = rcu_get_root(rsp);
1317 1306
1307 rcu_bind_gp_kthread();
1318 raw_spin_lock_irq(&rnp->lock); 1308 raw_spin_lock_irq(&rnp->lock);
1319 rsp->gp_flags = 0; /* Clear all flags: New grace period. */ 1309 rsp->gp_flags = 0; /* Clear all flags: New grace period. */
1320 1310
@@ -1326,7 +1316,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1326 1316
1327 /* Advance to a new grace period and initialize state. */ 1317 /* Advance to a new grace period and initialize state. */
1328 rsp->gpnum++; 1318 rsp->gpnum++;
1329 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); 1319 trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
1330 record_gp_stall_check_time(rsp); 1320 record_gp_stall_check_time(rsp);
1331 raw_spin_unlock_irq(&rnp->lock); 1321 raw_spin_unlock_irq(&rnp->lock);
1332 1322
@@ -1379,16 +1369,25 @@ static int rcu_gp_init(struct rcu_state *rsp)
1379int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) 1369int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1380{ 1370{
1381 int fqs_state = fqs_state_in; 1371 int fqs_state = fqs_state_in;
1372 bool isidle = false;
1373 unsigned long maxj;
1382 struct rcu_node *rnp = rcu_get_root(rsp); 1374 struct rcu_node *rnp = rcu_get_root(rsp);
1383 1375
1384 rsp->n_force_qs++; 1376 rsp->n_force_qs++;
1385 if (fqs_state == RCU_SAVE_DYNTICK) { 1377 if (fqs_state == RCU_SAVE_DYNTICK) {
1386 /* Collect dyntick-idle snapshots. */ 1378 /* Collect dyntick-idle snapshots. */
1387 force_qs_rnp(rsp, dyntick_save_progress_counter); 1379 if (is_sysidle_rcu_state(rsp)) {
1380 isidle = 1;
1381 maxj = jiffies - ULONG_MAX / 4;
1382 }
1383 force_qs_rnp(rsp, dyntick_save_progress_counter,
1384 &isidle, &maxj);
1385 rcu_sysidle_report_gp(rsp, isidle, maxj);
1388 fqs_state = RCU_FORCE_QS; 1386 fqs_state = RCU_FORCE_QS;
1389 } else { 1387 } else {
1390 /* Handle dyntick-idle and offline CPUs. */ 1388 /* Handle dyntick-idle and offline CPUs. */
1391 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1389 isidle = 0;
1390 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1392 } 1391 }
1393 /* Clear flag to prevent immediate re-entry. */ 1392 /* Clear flag to prevent immediate re-entry. */
1394 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1393 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -1448,7 +1447,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1448 rcu_nocb_gp_set(rnp, nocb); 1447 rcu_nocb_gp_set(rnp, nocb);
1449 1448
1450 rsp->completed = rsp->gpnum; /* Declare grace period done. */ 1449 rsp->completed = rsp->gpnum; /* Declare grace period done. */
1451 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 1450 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1452 rsp->fqs_state = RCU_GP_IDLE; 1451 rsp->fqs_state = RCU_GP_IDLE;
1453 rdp = this_cpu_ptr(rsp->rda); 1452 rdp = this_cpu_ptr(rsp->rda);
1454 rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ 1453 rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
@@ -1558,10 +1557,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1558 1557
1559 /* 1558 /*
1560 * We can't do wakeups while holding the rnp->lock, as that 1559 * We can't do wakeups while holding the rnp->lock, as that
1561 * could cause possible deadlocks with the rq->lock. Deter 1560 * could cause possible deadlocks with the rq->lock. Defer
1562 * the wakeup to interrupt context. 1561 * the wakeup to interrupt context. And don't bother waking
1562 * up the running kthread.
1563 */ 1563 */
1564 irq_work_queue(&rsp->wakeup_work); 1564 if (current != rsp->gp_kthread)
1565 irq_work_queue(&rsp->wakeup_work);
1565} 1566}
1566 1567
1567/* 1568/*
@@ -1857,7 +1858,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1857 RCU_TRACE(mask = rdp->grpmask); 1858 RCU_TRACE(mask = rdp->grpmask);
1858 trace_rcu_grace_period(rsp->name, 1859 trace_rcu_grace_period(rsp->name,
1859 rnp->gpnum + 1 - !!(rnp->qsmask & mask), 1860 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1860 "cpuofl"); 1861 TPS("cpuofl"));
1861} 1862}
1862 1863
1863/* 1864/*
@@ -2044,7 +2045,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2044 */ 2045 */
2045void rcu_check_callbacks(int cpu, int user) 2046void rcu_check_callbacks(int cpu, int user)
2046{ 2047{
2047 trace_rcu_utilization("Start scheduler-tick"); 2048 trace_rcu_utilization(TPS("Start scheduler-tick"));
2048 increment_cpu_stall_ticks(); 2049 increment_cpu_stall_ticks();
2049 if (user || rcu_is_cpu_rrupt_from_idle()) { 2050 if (user || rcu_is_cpu_rrupt_from_idle()) {
2050 2051
@@ -2077,7 +2078,7 @@ void rcu_check_callbacks(int cpu, int user)
2077 rcu_preempt_check_callbacks(cpu); 2078 rcu_preempt_check_callbacks(cpu);
2078 if (rcu_pending(cpu)) 2079 if (rcu_pending(cpu))
2079 invoke_rcu_core(); 2080 invoke_rcu_core();
2080 trace_rcu_utilization("End scheduler-tick"); 2081 trace_rcu_utilization(TPS("End scheduler-tick"));
2081} 2082}
2082 2083
2083/* 2084/*
@@ -2087,7 +2088,10 @@ void rcu_check_callbacks(int cpu, int user)
2087 * 2088 *
2088 * The caller must have suppressed start of new grace periods. 2089 * The caller must have suppressed start of new grace periods.
2089 */ 2090 */
2090static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 2091static void force_qs_rnp(struct rcu_state *rsp,
2092 int (*f)(struct rcu_data *rsp, bool *isidle,
2093 unsigned long *maxj),
2094 bool *isidle, unsigned long *maxj)
2091{ 2095{
2092 unsigned long bit; 2096 unsigned long bit;
2093 int cpu; 2097 int cpu;
@@ -2110,9 +2114,12 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
2110 cpu = rnp->grplo; 2114 cpu = rnp->grplo;
2111 bit = 1; 2115 bit = 1;
2112 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2116 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2113 if ((rnp->qsmask & bit) != 0 && 2117 if ((rnp->qsmask & bit) != 0) {
2114 f(per_cpu_ptr(rsp->rda, cpu))) 2118 if ((rnp->qsmaskinit & bit) != 0)
2115 mask |= bit; 2119 *isidle = 0;
2120 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2121 mask |= bit;
2122 }
2116 } 2123 }
2117 if (mask != 0) { 2124 if (mask != 0) {
2118 2125
@@ -2208,10 +2215,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
2208 2215
2209 if (cpu_is_offline(smp_processor_id())) 2216 if (cpu_is_offline(smp_processor_id()))
2210 return; 2217 return;
2211 trace_rcu_utilization("Start RCU core"); 2218 trace_rcu_utilization(TPS("Start RCU core"));
2212 for_each_rcu_flavor(rsp) 2219 for_each_rcu_flavor(rsp)
2213 __rcu_process_callbacks(rsp); 2220 __rcu_process_callbacks(rsp);
2214 trace_rcu_utilization("End RCU core"); 2221 trace_rcu_utilization(TPS("End RCU core"));
2215} 2222}
2216 2223
2217/* 2224/*
@@ -2287,6 +2294,13 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2287} 2294}
2288 2295
2289/* 2296/*
2297 * RCU callback function to leak a callback.
2298 */
2299static void rcu_leak_callback(struct rcu_head *rhp)
2300{
2301}
2302
2303/*
2290 * Helper function for call_rcu() and friends. The cpu argument will 2304 * Helper function for call_rcu() and friends. The cpu argument will
2291 * normally be -1, indicating "currently running CPU". It may specify 2305 * normally be -1, indicating "currently running CPU". It may specify
2292 * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier() 2306 * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
@@ -2300,7 +2314,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2300 struct rcu_data *rdp; 2314 struct rcu_data *rdp;
2301 2315
2302 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ 2316 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
2303 debug_rcu_head_queue(head); 2317 if (debug_rcu_head_queue(head)) {
2318 /* Probable double call_rcu(), so leak the callback. */
2319 ACCESS_ONCE(head->func) = rcu_leak_callback;
2320 WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
2321 return;
2322 }
2304 head->func = func; 2323 head->func = func;
2305 head->next = NULL; 2324 head->next = NULL;
2306 2325
@@ -2720,7 +2739,7 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2720 * Helper function for _rcu_barrier() tracing. If tracing is disabled, 2739 * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2721 * the compiler is expected to optimize this away. 2740 * the compiler is expected to optimize this away.
2722 */ 2741 */
2723static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, 2742static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
2724 int cpu, unsigned long done) 2743 int cpu, unsigned long done)
2725{ 2744{
2726 trace_rcu_barrier(rsp->name, s, cpu, 2745 trace_rcu_barrier(rsp->name, s, cpu,
@@ -2785,9 +2804,20 @@ static void _rcu_barrier(struct rcu_state *rsp)
2785 * transition. The "if" expression below therefore rounds the old 2804 * transition. The "if" expression below therefore rounds the old
2786 * value up to the next even number and adds two before comparing. 2805 * value up to the next even number and adds two before comparing.
2787 */ 2806 */
2788 snap_done = ACCESS_ONCE(rsp->n_barrier_done); 2807 snap_done = rsp->n_barrier_done;
2789 _rcu_barrier_trace(rsp, "Check", -1, snap_done); 2808 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2790 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { 2809
2810 /*
2811 * If the value in snap is odd, we needed to wait for the current
2812 * rcu_barrier() to complete, then wait for the next one, in other
2813 * words, we need the value of snap_done to be three larger than
2814 * the value of snap. On the other hand, if the value in snap is
2815 * even, we only had to wait for the next rcu_barrier() to complete,
2816 * in other words, we need the value of snap_done to be only two
2817 * greater than the value of snap. The "(snap + 3) & ~0x1" computes
2818 * this for us (thank you, Linus!).
2819 */
2820 if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
2791 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); 2821 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2792 smp_mb(); /* caller's subsequent code after above check. */ 2822 smp_mb(); /* caller's subsequent code after above check. */
2793 mutex_unlock(&rsp->barrier_mutex); 2823 mutex_unlock(&rsp->barrier_mutex);
@@ -2930,6 +2960,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2930 rdp->blimit = blimit; 2960 rdp->blimit = blimit;
2931 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 2961 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
2932 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 2962 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2963 rcu_sysidle_init_percpu_data(rdp->dynticks);
2933 atomic_set(&rdp->dynticks->dynticks, 2964 atomic_set(&rdp->dynticks->dynticks,
2934 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2965 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2935 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2966 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
@@ -2952,7 +2983,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2952 rdp->completed = rnp->completed; 2983 rdp->completed = rnp->completed;
2953 rdp->passed_quiesce = 0; 2984 rdp->passed_quiesce = 0;
2954 rdp->qs_pending = 0; 2985 rdp->qs_pending = 0;
2955 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); 2986 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
2956 } 2987 }
2957 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2988 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2958 rnp = rnp->parent; 2989 rnp = rnp->parent;
@@ -2982,7 +3013,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
2982 struct rcu_node *rnp = rdp->mynode; 3013 struct rcu_node *rnp = rdp->mynode;
2983 struct rcu_state *rsp; 3014 struct rcu_state *rsp;
2984 3015
2985 trace_rcu_utilization("Start CPU hotplug"); 3016 trace_rcu_utilization(TPS("Start CPU hotplug"));
2986 switch (action) { 3017 switch (action) {
2987 case CPU_UP_PREPARE: 3018 case CPU_UP_PREPARE:
2988 case CPU_UP_PREPARE_FROZEN: 3019 case CPU_UP_PREPARE_FROZEN:
@@ -3011,7 +3042,26 @@ static int rcu_cpu_notify(struct notifier_block *self,
3011 default: 3042 default:
3012 break; 3043 break;
3013 } 3044 }
3014 trace_rcu_utilization("End CPU hotplug"); 3045 trace_rcu_utilization(TPS("End CPU hotplug"));
3046 return NOTIFY_OK;
3047}
3048
3049static int rcu_pm_notify(struct notifier_block *self,
3050 unsigned long action, void *hcpu)
3051{
3052 switch (action) {
3053 case PM_HIBERNATION_PREPARE:
3054 case PM_SUSPEND_PREPARE:
3055 if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
3056 rcu_expedited = 1;
3057 break;
3058 case PM_POST_HIBERNATION:
3059 case PM_POST_SUSPEND:
3060 rcu_expedited = 0;
3061 break;
3062 default:
3063 break;
3064 }
3015 return NOTIFY_OK; 3065 return NOTIFY_OK;
3016} 3066}
3017 3067
@@ -3256,6 +3306,7 @@ void __init rcu_init(void)
3256 * or the scheduler are operational. 3306 * or the scheduler are operational.
3257 */ 3307 */
3258 cpu_notifier(rcu_cpu_notify, 0); 3308 cpu_notifier(rcu_cpu_notify, 0);
3309 pm_notifier(rcu_pm_notify, 0);
3259 for_each_online_cpu(cpu) 3310 for_each_online_cpu(cpu)
3260 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 3311 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3261} 3312}