diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:40 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:40 -0500 |
commit | 423d091dfe58d3109d84c408810a7cfa82f6f184 (patch) | |
tree | 43c4385d1dc7219582f924d42db1f3e203a577bd /kernel | |
parent | 1483b3823542c9721eddf09a077af1e02ac96b50 (diff) | |
parent | 919b83452b2e7c1dbced0456015508b4b9585db3 (diff) |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
cpu: Export cpu_up()
rcu: Apply ACCESS_ONCE() to rcu_boost() return value
Revert "rcu: Permit rt_mutex_unlock() with irqs disabled"
docs: Additional LWN links to RCU API
rcu: Augment rcu_batch_end tracing for idle and callback state
rcu: Add rcutorture tests for srcu_read_lock_raw()
rcu: Make rcutorture test for hotpluggability before offlining CPUs
driver-core/cpu: Expose hotpluggability to the rest of the kernel
rcu: Remove redundant rcu_cpu_stall_suppress declaration
rcu: Adaptive dyntick-idle preparation
rcu: Keep invoking callbacks if CPU otherwise idle
rcu: Irq nesting is always 0 on rcu_enter_idle_common
rcu: Don't check irq nesting from rcu idle entry/exit
rcu: Permit dyntick-idle with callbacks pending
rcu: Document same-context read-side constraints
rcu: Identify dyntick-idle CPUs on first force_quiescent_state() pass
rcu: Remove dynticks false positives and RCU failures
rcu: Reduce latency of rcu_prepare_for_idle()
rcu: Eliminate RCU_FAST_NO_HZ grace-period hang
rcu: Avoid needlessly IPIing CPUs at GP end
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 1 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_support.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 2 | ||||
-rw-r--r-- | kernel/lockdep.c | 22 | ||||
-rw-r--r-- | kernel/rcu.h | 7 | ||||
-rw-r--r-- | kernel/rcupdate.c | 12 | ||||
-rw-r--r-- | kernel/rcutiny.c | 149 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 29 | ||||
-rw-r--r-- | kernel/rcutorture.c | 225 | ||||
-rw-r--r-- | kernel/rcutree.c | 290 | ||||
-rw-r--r-- | kernel/rcutree.h | 26 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 289 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 12 | ||||
-rw-r--r-- | kernel/rtmutex.c | 8 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 97 | ||||
-rw-r--r-- | kernel/trace/trace.c | 1 |
17 files changed, 933 insertions, 243 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 563f13609470..9d448ddb2247 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -380,6 +380,7 @@ out: | |||
380 | cpu_maps_update_done(); | 380 | cpu_maps_update_done(); |
381 | return err; | 381 | return err; |
382 | } | 382 | } |
383 | EXPORT_SYMBOL_GPL(cpu_up); | ||
383 | 384 | ||
384 | #ifdef CONFIG_PM_SLEEP_SMP | 385 | #ifdef CONFIG_PM_SLEEP_SMP |
385 | static cpumask_var_t frozen_cpus; | 386 | static cpumask_var_t frozen_cpus; |
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 5532dd37aa86..7d6fb40d2188 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c | |||
@@ -636,7 +636,7 @@ char kdb_task_state_char (const struct task_struct *p) | |||
636 | (p->exit_state & EXIT_ZOMBIE) ? 'Z' : | 636 | (p->exit_state & EXIT_ZOMBIE) ? 'Z' : |
637 | (p->exit_state & EXIT_DEAD) ? 'E' : | 637 | (p->exit_state & EXIT_DEAD) ? 'E' : |
638 | (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; | 638 | (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; |
639 | if (p->pid == 0) { | 639 | if (is_idle_task(p)) { |
640 | /* Idle task. Is it really idle, apart from the kdb | 640 | /* Idle task. Is it really idle, apart from the kdb |
641 | * interrupt? */ | 641 | * interrupt? */ |
642 | if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { | 642 | if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 58690af323e4..fc0e7ff11dda 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -5366,7 +5366,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
5366 | regs = get_irq_regs(); | 5366 | regs = get_irq_regs(); |
5367 | 5367 | ||
5368 | if (regs && !perf_exclude_event(event, regs)) { | 5368 | if (regs && !perf_exclude_event(event, regs)) { |
5369 | if (!(event->attr.exclude_idle && current->pid == 0)) | 5369 | if (!(event->attr.exclude_idle && is_idle_task(current))) |
5370 | if (perf_event_overflow(event, &data, regs)) | 5370 | if (perf_event_overflow(event, &data, regs)) |
5371 | ret = HRTIMER_NORESTART; | 5371 | ret = HRTIMER_NORESTART; |
5372 | } | 5372 | } |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e69d633d6aa6..8fb755132322 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -4181,6 +4181,28 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
4181 | printk("%s:%d %s!\n", file, line, s); | 4181 | printk("%s:%d %s!\n", file, line, s); |
4182 | printk("\nother info that might help us debug this:\n\n"); | 4182 | printk("\nother info that might help us debug this:\n\n"); |
4183 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | 4183 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); |
4184 | |||
4185 | /* | ||
4186 | * If a CPU is in the RCU-free window in idle (ie: in the section | ||
4187 | * between rcu_idle_enter() and rcu_idle_exit(), then RCU | ||
4188 | * considers that CPU to be in an "extended quiescent state", | ||
4189 | * which means that RCU will be completely ignoring that CPU. | ||
4190 | * Therefore, rcu_read_lock() and friends have absolutely no | ||
4191 | * effect on a CPU running in that state. In other words, even if | ||
4192 | * such an RCU-idle CPU has called rcu_read_lock(), RCU might well | ||
4193 | * delete data structures out from under it. RCU really has no | ||
4194 | * choice here: we need to keep an RCU-free window in idle where | ||
4195 | * the CPU may possibly enter into low power mode. This way we can | ||
4196 | * notice an extended quiescent state to other CPUs that started a grace | ||
4197 | * period. Otherwise we would delay any grace period as long as we run | ||
4198 | * in the idle task. | ||
4199 | * | ||
4200 | * So complain bitterly if someone does call rcu_read_lock(), | ||
4201 | * rcu_read_lock_bh() and so on from extended quiescent states. | ||
4202 | */ | ||
4203 | if (rcu_is_cpu_idle()) | ||
4204 | printk("RCU used illegally from extended quiescent state!\n"); | ||
4205 | |||
4184 | lockdep_print_held_locks(curr); | 4206 | lockdep_print_held_locks(curr); |
4185 | printk("\nstack backtrace:\n"); | 4207 | printk("\nstack backtrace:\n"); |
4186 | dump_stack(); | 4208 | dump_stack(); |
diff --git a/kernel/rcu.h b/kernel/rcu.h index f600868d550d..aa88baab5f78 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h | |||
@@ -30,6 +30,13 @@ | |||
30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Process-level increment to ->dynticks_nesting field. This allows for | ||
34 | * architectures that use half-interrupts and half-exceptions from | ||
35 | * process context. | ||
36 | */ | ||
37 | #define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) | ||
38 | |||
39 | /* | ||
33 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | 40 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally |
34 | * by call_rcu() and rcu callback execution, and are therefore not part of the | 41 | * by call_rcu() and rcu callback execution, and are therefore not part of the |
35 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | 42 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c5b98e565aee..2bc4e135ff23 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void) | |||
93 | { | 93 | { |
94 | if (!debug_lockdep_rcu_enabled()) | 94 | if (!debug_lockdep_rcu_enabled()) |
95 | return 1; | 95 | return 1; |
96 | if (rcu_is_cpu_idle()) | ||
97 | return 0; | ||
96 | return in_softirq() || irqs_disabled(); | 98 | return in_softirq() || irqs_disabled(); |
97 | } | 99 | } |
98 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | 100 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); |
@@ -316,3 +318,13 @@ struct debug_obj_descr rcuhead_debug_descr = { | |||
316 | }; | 318 | }; |
317 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); | 319 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); |
318 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | 320 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
321 | |||
322 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) | ||
323 | void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) | ||
324 | { | ||
325 | trace_rcu_torture_read(rcutorturename, rhp); | ||
326 | } | ||
327 | EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); | ||
328 | #else | ||
329 | #define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) | ||
330 | #endif | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 636af6d9c6e5..977296dca0a4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -53,31 +53,137 @@ static void __call_rcu(struct rcu_head *head, | |||
53 | 53 | ||
54 | #include "rcutiny_plugin.h" | 54 | #include "rcutiny_plugin.h" |
55 | 55 | ||
56 | #ifdef CONFIG_NO_HZ | 56 | static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; |
57 | 57 | ||
58 | static long rcu_dynticks_nesting = 1; | 58 | /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ |
59 | static void rcu_idle_enter_common(long long oldval) | ||
60 | { | ||
61 | if (rcu_dynticks_nesting) { | ||
62 | RCU_TRACE(trace_rcu_dyntick("--=", | ||
63 | oldval, rcu_dynticks_nesting)); | ||
64 | return; | ||
65 | } | ||
66 | RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); | ||
67 | if (!is_idle_task(current)) { | ||
68 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
69 | |||
70 | RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", | ||
71 | oldval, rcu_dynticks_nesting)); | ||
72 | ftrace_dump(DUMP_ALL); | ||
73 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
74 | current->pid, current->comm, | ||
75 | idle->pid, idle->comm); /* must be idle task! */ | ||
76 | } | ||
77 | rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ | ||
78 | } | ||
59 | 79 | ||
60 | /* | 80 | /* |
61 | * Enter dynticks-idle mode, which is an extended quiescent state | 81 | * Enter idle, which is an extended quiescent state if we have fully |
62 | * if we have fully entered that mode (i.e., if the new value of | 82 | * entered that mode (i.e., if the new value of dynticks_nesting is zero). |
63 | * dynticks_nesting is zero). | ||
64 | */ | 83 | */ |
65 | void rcu_enter_nohz(void) | 84 | void rcu_idle_enter(void) |
66 | { | 85 | { |
67 | if (--rcu_dynticks_nesting == 0) | 86 | unsigned long flags; |
68 | rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ | 87 | long long oldval; |
88 | |||
89 | local_irq_save(flags); | ||
90 | oldval = rcu_dynticks_nesting; | ||
91 | rcu_dynticks_nesting = 0; | ||
92 | rcu_idle_enter_common(oldval); | ||
93 | local_irq_restore(flags); | ||
69 | } | 94 | } |
70 | 95 | ||
71 | /* | 96 | /* |
72 | * Exit dynticks-idle mode, so that we are no longer in an extended | 97 | * Exit an interrupt handler towards idle. |
73 | * quiescent state. | ||
74 | */ | 98 | */ |
75 | void rcu_exit_nohz(void) | 99 | void rcu_irq_exit(void) |
100 | { | ||
101 | unsigned long flags; | ||
102 | long long oldval; | ||
103 | |||
104 | local_irq_save(flags); | ||
105 | oldval = rcu_dynticks_nesting; | ||
106 | rcu_dynticks_nesting--; | ||
107 | WARN_ON_ONCE(rcu_dynticks_nesting < 0); | ||
108 | rcu_idle_enter_common(oldval); | ||
109 | local_irq_restore(flags); | ||
110 | } | ||
111 | |||
112 | /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ | ||
113 | static void rcu_idle_exit_common(long long oldval) | ||
76 | { | 114 | { |
115 | if (oldval) { | ||
116 | RCU_TRACE(trace_rcu_dyntick("++=", | ||
117 | oldval, rcu_dynticks_nesting)); | ||
118 | return; | ||
119 | } | ||
120 | RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); | ||
121 | if (!is_idle_task(current)) { | ||
122 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
123 | |||
124 | RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", | ||
125 | oldval, rcu_dynticks_nesting)); | ||
126 | ftrace_dump(DUMP_ALL); | ||
127 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
128 | current->pid, current->comm, | ||
129 | idle->pid, idle->comm); /* must be idle task! */ | ||
130 | } | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Exit idle, so that we are no longer in an extended quiescent state. | ||
135 | */ | ||
136 | void rcu_idle_exit(void) | ||
137 | { | ||
138 | unsigned long flags; | ||
139 | long long oldval; | ||
140 | |||
141 | local_irq_save(flags); | ||
142 | oldval = rcu_dynticks_nesting; | ||
143 | WARN_ON_ONCE(oldval != 0); | ||
144 | rcu_dynticks_nesting = DYNTICK_TASK_NESTING; | ||
145 | rcu_idle_exit_common(oldval); | ||
146 | local_irq_restore(flags); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Enter an interrupt handler, moving away from idle. | ||
151 | */ | ||
152 | void rcu_irq_enter(void) | ||
153 | { | ||
154 | unsigned long flags; | ||
155 | long long oldval; | ||
156 | |||
157 | local_irq_save(flags); | ||
158 | oldval = rcu_dynticks_nesting; | ||
77 | rcu_dynticks_nesting++; | 159 | rcu_dynticks_nesting++; |
160 | WARN_ON_ONCE(rcu_dynticks_nesting == 0); | ||
161 | rcu_idle_exit_common(oldval); | ||
162 | local_irq_restore(flags); | ||
163 | } | ||
164 | |||
165 | #ifdef CONFIG_PROVE_RCU | ||
166 | |||
167 | /* | ||
168 | * Test whether RCU thinks that the current CPU is idle. | ||
169 | */ | ||
170 | int rcu_is_cpu_idle(void) | ||
171 | { | ||
172 | return !rcu_dynticks_nesting; | ||
78 | } | 173 | } |
174 | EXPORT_SYMBOL(rcu_is_cpu_idle); | ||
175 | |||
176 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
79 | 177 | ||
80 | #endif /* #ifdef CONFIG_NO_HZ */ | 178 | /* |
179 | * Test whether the current CPU was interrupted from idle. Nested | ||
180 | * interrupts don't count, we must be running at the first interrupt | ||
181 | * level. | ||
182 | */ | ||
183 | int rcu_is_cpu_rrupt_from_idle(void) | ||
184 | { | ||
185 | return rcu_dynticks_nesting <= 0; | ||
186 | } | ||
81 | 187 | ||
82 | /* | 188 | /* |
83 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). | 189 | * Helper function for rcu_sched_qs() and rcu_bh_qs(). |
@@ -126,14 +232,13 @@ void rcu_bh_qs(int cpu) | |||
126 | 232 | ||
127 | /* | 233 | /* |
128 | * Check to see if the scheduling-clock interrupt came from an extended | 234 | * Check to see if the scheduling-clock interrupt came from an extended |
129 | * quiescent state, and, if so, tell RCU about it. | 235 | * quiescent state, and, if so, tell RCU about it. This function must |
236 | * be called from hardirq context. It is normally called from the | ||
237 | * scheduling-clock interrupt. | ||
130 | */ | 238 | */ |
131 | void rcu_check_callbacks(int cpu, int user) | 239 | void rcu_check_callbacks(int cpu, int user) |
132 | { | 240 | { |
133 | if (user || | 241 | if (user || rcu_is_cpu_rrupt_from_idle()) |
134 | (idle_cpu(cpu) && | ||
135 | !in_softirq() && | ||
136 | hardirq_count() <= (1 << HARDIRQ_SHIFT))) | ||
137 | rcu_sched_qs(cpu); | 242 | rcu_sched_qs(cpu); |
138 | else if (!in_softirq()) | 243 | else if (!in_softirq()) |
139 | rcu_bh_qs(cpu); | 244 | rcu_bh_qs(cpu); |
@@ -154,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
154 | /* If no RCU callbacks ready to invoke, just return. */ | 259 | /* If no RCU callbacks ready to invoke, just return. */ |
155 | if (&rcp->rcucblist == rcp->donetail) { | 260 | if (&rcp->rcucblist == rcp->donetail) { |
156 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | 261 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); |
157 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | 262 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, |
263 | ACCESS_ONCE(rcp->rcucblist), | ||
264 | need_resched(), | ||
265 | is_idle_task(current), | ||
266 | rcu_is_callbacks_kthread())); | ||
158 | return; | 267 | return; |
159 | } | 268 | } |
160 | 269 | ||
@@ -183,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
183 | RCU_TRACE(cb_count++); | 292 | RCU_TRACE(cb_count++); |
184 | } | 293 | } |
185 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | 294 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); |
186 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | 295 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), |
296 | is_idle_task(current), | ||
297 | rcu_is_callbacks_kthread())); | ||
187 | } | 298 | } |
188 | 299 | ||
189 | static void rcu_process_callbacks(struct softirq_action *unused) | 300 | static void rcu_process_callbacks(struct softirq_action *unused) |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 2b0484a5dc28..9cb1ae4aabdd 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -312,8 +312,8 @@ static int rcu_boost(void) | |||
312 | rt_mutex_lock(&mtx); | 312 | rt_mutex_lock(&mtx); |
313 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 313 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
314 | 314 | ||
315 | return rcu_preempt_ctrlblk.boost_tasks != NULL || | 315 | return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL || |
316 | rcu_preempt_ctrlblk.exp_tasks != NULL; | 316 | ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL; |
317 | } | 317 | } |
318 | 318 | ||
319 | /* | 319 | /* |
@@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void) | |||
885 | wake_up(&rcu_kthread_wq); | 885 | wake_up(&rcu_kthread_wq); |
886 | } | 886 | } |
887 | 887 | ||
888 | #ifdef CONFIG_RCU_TRACE | ||
889 | |||
890 | /* | ||
891 | * Is the current CPU running the RCU-callbacks kthread? | ||
892 | * Caller must have preemption disabled. | ||
893 | */ | ||
894 | static bool rcu_is_callbacks_kthread(void) | ||
895 | { | ||
896 | return rcu_kthread_task == current; | ||
897 | } | ||
898 | |||
899 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
900 | |||
888 | /* | 901 | /* |
889 | * This kthread invokes RCU callbacks whose grace periods have | 902 | * This kthread invokes RCU callbacks whose grace periods have |
890 | * elapsed. It is awakened as needed, and takes the place of the | 903 | * elapsed. It is awakened as needed, and takes the place of the |
@@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void) | |||
938 | raise_softirq(RCU_SOFTIRQ); | 951 | raise_softirq(RCU_SOFTIRQ); |
939 | } | 952 | } |
940 | 953 | ||
954 | #ifdef CONFIG_RCU_TRACE | ||
955 | |||
956 | /* | ||
957 | * There is no callback kthread, so this thread is never it. | ||
958 | */ | ||
959 | static bool rcu_is_callbacks_kthread(void) | ||
960 | { | ||
961 | return false; | ||
962 | } | ||
963 | |||
964 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
965 | |||
941 | void rcu_init(void) | 966 | void rcu_init(void) |
942 | { | 967 | { |
943 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 968 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 764825c2685c..88f17b8a3b1d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -61,9 +61,11 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ | |||
61 | static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ | 61 | static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ |
62 | static int stutter = 5; /* Start/stop testing interval (in sec) */ | 62 | static int stutter = 5; /* Start/stop testing interval (in sec) */ |
63 | static int irqreader = 1; /* RCU readers from irq (timers). */ | 63 | static int irqreader = 1; /* RCU readers from irq (timers). */ |
64 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ | 64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ |
65 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ | 65 | static int fqs_holdoff; /* Hold time within burst (us). */ |
66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
67 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ | ||
68 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ | ||
67 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ | 69 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ |
68 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ | 70 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ |
69 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ | 71 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ |
@@ -91,6 +93,10 @@ module_param(fqs_holdoff, int, 0444); | |||
91 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 93 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
92 | module_param(fqs_stutter, int, 0444); | 94 | module_param(fqs_stutter, int, 0444); |
93 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 95 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
96 | module_param(onoff_interval, int, 0444); | ||
97 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); | ||
98 | module_param(shutdown_secs, int, 0444); | ||
99 | MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable."); | ||
94 | module_param(test_boost, int, 0444); | 100 | module_param(test_boost, int, 0444); |
95 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); | 101 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); |
96 | module_param(test_boost_interval, int, 0444); | 102 | module_param(test_boost_interval, int, 0444); |
@@ -119,6 +125,10 @@ static struct task_struct *shuffler_task; | |||
119 | static struct task_struct *stutter_task; | 125 | static struct task_struct *stutter_task; |
120 | static struct task_struct *fqs_task; | 126 | static struct task_struct *fqs_task; |
121 | static struct task_struct *boost_tasks[NR_CPUS]; | 127 | static struct task_struct *boost_tasks[NR_CPUS]; |
128 | static struct task_struct *shutdown_task; | ||
129 | #ifdef CONFIG_HOTPLUG_CPU | ||
130 | static struct task_struct *onoff_task; | ||
131 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
122 | 132 | ||
123 | #define RCU_TORTURE_PIPE_LEN 10 | 133 | #define RCU_TORTURE_PIPE_LEN 10 |
124 | 134 | ||
@@ -149,6 +159,10 @@ static long n_rcu_torture_boost_rterror; | |||
149 | static long n_rcu_torture_boost_failure; | 159 | static long n_rcu_torture_boost_failure; |
150 | static long n_rcu_torture_boosts; | 160 | static long n_rcu_torture_boosts; |
151 | static long n_rcu_torture_timers; | 161 | static long n_rcu_torture_timers; |
162 | static long n_offline_attempts; | ||
163 | static long n_offline_successes; | ||
164 | static long n_online_attempts; | ||
165 | static long n_online_successes; | ||
152 | static struct list_head rcu_torture_removed; | 166 | static struct list_head rcu_torture_removed; |
153 | static cpumask_var_t shuffle_tmp_mask; | 167 | static cpumask_var_t shuffle_tmp_mask; |
154 | 168 | ||
@@ -160,6 +174,8 @@ static int stutter_pause_test; | |||
160 | #define RCUTORTURE_RUNNABLE_INIT 0 | 174 | #define RCUTORTURE_RUNNABLE_INIT 0 |
161 | #endif | 175 | #endif |
162 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | 176 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; |
177 | module_param(rcutorture_runnable, int, 0444); | ||
178 | MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); | ||
163 | 179 | ||
164 | #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) | 180 | #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) |
165 | #define rcu_can_boost() 1 | 181 | #define rcu_can_boost() 1 |
@@ -167,6 +183,7 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | |||
167 | #define rcu_can_boost() 0 | 183 | #define rcu_can_boost() 0 |
168 | #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ | 184 | #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ |
169 | 185 | ||
186 | static unsigned long shutdown_time; /* jiffies to system shutdown. */ | ||
170 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | 187 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ |
171 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 188 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
172 | /* and boost task create/destroy. */ | 189 | /* and boost task create/destroy. */ |
@@ -182,6 +199,9 @@ static int fullstop = FULLSTOP_RMMOD; | |||
182 | */ | 199 | */ |
183 | static DEFINE_MUTEX(fullstop_mutex); | 200 | static DEFINE_MUTEX(fullstop_mutex); |
184 | 201 | ||
202 | /* Forward reference. */ | ||
203 | static void rcu_torture_cleanup(void); | ||
204 | |||
185 | /* | 205 | /* |
186 | * Detect and respond to a system shutdown. | 206 | * Detect and respond to a system shutdown. |
187 | */ | 207 | */ |
@@ -612,6 +632,30 @@ static struct rcu_torture_ops srcu_ops = { | |||
612 | .name = "srcu" | 632 | .name = "srcu" |
613 | }; | 633 | }; |
614 | 634 | ||
635 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) | ||
636 | { | ||
637 | return srcu_read_lock_raw(&srcu_ctl); | ||
638 | } | ||
639 | |||
640 | static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl) | ||
641 | { | ||
642 | srcu_read_unlock_raw(&srcu_ctl, idx); | ||
643 | } | ||
644 | |||
645 | static struct rcu_torture_ops srcu_raw_ops = { | ||
646 | .init = srcu_torture_init, | ||
647 | .cleanup = srcu_torture_cleanup, | ||
648 | .readlock = srcu_torture_read_lock_raw, | ||
649 | .read_delay = srcu_read_delay, | ||
650 | .readunlock = srcu_torture_read_unlock_raw, | ||
651 | .completed = srcu_torture_completed, | ||
652 | .deferred_free = rcu_sync_torture_deferred_free, | ||
653 | .sync = srcu_torture_synchronize, | ||
654 | .cb_barrier = NULL, | ||
655 | .stats = srcu_torture_stats, | ||
656 | .name = "srcu_raw" | ||
657 | }; | ||
658 | |||
615 | static void srcu_torture_synchronize_expedited(void) | 659 | static void srcu_torture_synchronize_expedited(void) |
616 | { | 660 | { |
617 | synchronize_srcu_expedited(&srcu_ctl); | 661 | synchronize_srcu_expedited(&srcu_ctl); |
@@ -913,6 +957,18 @@ rcu_torture_fakewriter(void *arg) | |||
913 | return 0; | 957 | return 0; |
914 | } | 958 | } |
915 | 959 | ||
960 | void rcutorture_trace_dump(void) | ||
961 | { | ||
962 | static atomic_t beenhere = ATOMIC_INIT(0); | ||
963 | |||
964 | if (atomic_read(&beenhere)) | ||
965 | return; | ||
966 | if (atomic_xchg(&beenhere, 1) != 0) | ||
967 | return; | ||
968 | do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL); | ||
969 | ftrace_dump(DUMP_ALL); | ||
970 | } | ||
971 | |||
916 | /* | 972 | /* |
917 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, | 973 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, |
918 | * incrementing the corresponding element of the pipeline array. The | 974 | * incrementing the corresponding element of the pipeline array. The |
@@ -934,6 +990,7 @@ static void rcu_torture_timer(unsigned long unused) | |||
934 | rcu_read_lock_bh_held() || | 990 | rcu_read_lock_bh_held() || |
935 | rcu_read_lock_sched_held() || | 991 | rcu_read_lock_sched_held() || |
936 | srcu_read_lock_held(&srcu_ctl)); | 992 | srcu_read_lock_held(&srcu_ctl)); |
993 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); | ||
937 | if (p == NULL) { | 994 | if (p == NULL) { |
938 | /* Leave because rcu_torture_writer is not yet underway */ | 995 | /* Leave because rcu_torture_writer is not yet underway */ |
939 | cur_ops->readunlock(idx); | 996 | cur_ops->readunlock(idx); |
@@ -951,6 +1008,8 @@ static void rcu_torture_timer(unsigned long unused) | |||
951 | /* Should not happen, but... */ | 1008 | /* Should not happen, but... */ |
952 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1009 | pipe_count = RCU_TORTURE_PIPE_LEN; |
953 | } | 1010 | } |
1011 | if (pipe_count > 1) | ||
1012 | rcutorture_trace_dump(); | ||
954 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1013 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
955 | completed = cur_ops->completed() - completed; | 1014 | completed = cur_ops->completed() - completed; |
956 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1015 | if (completed > RCU_TORTURE_PIPE_LEN) { |
@@ -994,6 +1053,7 @@ rcu_torture_reader(void *arg) | |||
994 | rcu_read_lock_bh_held() || | 1053 | rcu_read_lock_bh_held() || |
995 | rcu_read_lock_sched_held() || | 1054 | rcu_read_lock_sched_held() || |
996 | srcu_read_lock_held(&srcu_ctl)); | 1055 | srcu_read_lock_held(&srcu_ctl)); |
1056 | do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); | ||
997 | if (p == NULL) { | 1057 | if (p == NULL) { |
998 | /* Wait for rcu_torture_writer to get underway */ | 1058 | /* Wait for rcu_torture_writer to get underway */ |
999 | cur_ops->readunlock(idx); | 1059 | cur_ops->readunlock(idx); |
@@ -1009,6 +1069,8 @@ rcu_torture_reader(void *arg) | |||
1009 | /* Should not happen, but... */ | 1069 | /* Should not happen, but... */ |
1010 | pipe_count = RCU_TORTURE_PIPE_LEN; | 1070 | pipe_count = RCU_TORTURE_PIPE_LEN; |
1011 | } | 1071 | } |
1072 | if (pipe_count > 1) | ||
1073 | rcutorture_trace_dump(); | ||
1012 | __this_cpu_inc(rcu_torture_count[pipe_count]); | 1074 | __this_cpu_inc(rcu_torture_count[pipe_count]); |
1013 | completed = cur_ops->completed() - completed; | 1075 | completed = cur_ops->completed() - completed; |
1014 | if (completed > RCU_TORTURE_PIPE_LEN) { | 1076 | if (completed > RCU_TORTURE_PIPE_LEN) { |
@@ -1056,7 +1118,8 @@ rcu_torture_printk(char *page) | |||
1056 | cnt += sprintf(&page[cnt], | 1118 | cnt += sprintf(&page[cnt], |
1057 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " | 1119 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " |
1058 | "rtmbe: %d rtbke: %ld rtbre: %ld " | 1120 | "rtmbe: %d rtbke: %ld rtbre: %ld " |
1059 | "rtbf: %ld rtb: %ld nt: %ld", | 1121 | "rtbf: %ld rtb: %ld nt: %ld " |
1122 | "onoff: %ld/%ld:%ld/%ld", | ||
1060 | rcu_torture_current, | 1123 | rcu_torture_current, |
1061 | rcu_torture_current_version, | 1124 | rcu_torture_current_version, |
1062 | list_empty(&rcu_torture_freelist), | 1125 | list_empty(&rcu_torture_freelist), |
@@ -1068,7 +1131,11 @@ rcu_torture_printk(char *page) | |||
1068 | n_rcu_torture_boost_rterror, | 1131 | n_rcu_torture_boost_rterror, |
1069 | n_rcu_torture_boost_failure, | 1132 | n_rcu_torture_boost_failure, |
1070 | n_rcu_torture_boosts, | 1133 | n_rcu_torture_boosts, |
1071 | n_rcu_torture_timers); | 1134 | n_rcu_torture_timers, |
1135 | n_online_successes, | ||
1136 | n_online_attempts, | ||
1137 | n_offline_successes, | ||
1138 | n_offline_attempts); | ||
1072 | if (atomic_read(&n_rcu_torture_mberror) != 0 || | 1139 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
1073 | n_rcu_torture_boost_ktrerror != 0 || | 1140 | n_rcu_torture_boost_ktrerror != 0 || |
1074 | n_rcu_torture_boost_rterror != 0 || | 1141 | n_rcu_torture_boost_rterror != 0 || |
@@ -1232,12 +1299,14 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) | |||
1232 | "shuffle_interval=%d stutter=%d irqreader=%d " | 1299 | "shuffle_interval=%d stutter=%d irqreader=%d " |
1233 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " | 1300 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " |
1234 | "test_boost=%d/%d test_boost_interval=%d " | 1301 | "test_boost=%d/%d test_boost_interval=%d " |
1235 | "test_boost_duration=%d\n", | 1302 | "test_boost_duration=%d shutdown_secs=%d " |
1303 | "onoff_interval=%d\n", | ||
1236 | torture_type, tag, nrealreaders, nfakewriters, | 1304 | torture_type, tag, nrealreaders, nfakewriters, |
1237 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, | 1305 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, |
1238 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, | 1306 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, |
1239 | test_boost, cur_ops->can_boost, | 1307 | test_boost, cur_ops->can_boost, |
1240 | test_boost_interval, test_boost_duration); | 1308 | test_boost_interval, test_boost_duration, shutdown_secs, |
1309 | onoff_interval); | ||
1241 | } | 1310 | } |
1242 | 1311 | ||
1243 | static struct notifier_block rcutorture_shutdown_nb = { | 1312 | static struct notifier_block rcutorture_shutdown_nb = { |
@@ -1287,6 +1356,131 @@ static int rcutorture_booster_init(int cpu) | |||
1287 | return 0; | 1356 | return 0; |
1288 | } | 1357 | } |
1289 | 1358 | ||
1359 | /* | ||
1360 | * Cause the rcutorture test to shutdown the system after the test has | ||
1361 | * run for the time specified by the shutdown_secs module parameter. | ||
1362 | */ | ||
1363 | static int | ||
1364 | rcu_torture_shutdown(void *arg) | ||
1365 | { | ||
1366 | long delta; | ||
1367 | unsigned long jiffies_snap; | ||
1368 | |||
1369 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); | ||
1370 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
1371 | while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && | ||
1372 | !kthread_should_stop()) { | ||
1373 | delta = shutdown_time - jiffies_snap; | ||
1374 | if (verbose) | ||
1375 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1376 | "rcu_torture_shutdown task: %lu " | ||
1377 | "jiffies remaining\n", | ||
1378 | torture_type, delta); | ||
1379 | schedule_timeout_interruptible(delta); | ||
1380 | jiffies_snap = ACCESS_ONCE(jiffies); | ||
1381 | } | ||
1382 | if (kthread_should_stop()) { | ||
1383 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); | ||
1384 | return 0; | ||
1385 | } | ||
1386 | |||
1387 | /* OK, shut down the system. */ | ||
1388 | |||
1389 | VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); | ||
1390 | shutdown_task = NULL; /* Avoid self-kill deadlock. */ | ||
1391 | rcu_torture_cleanup(); /* Get the success/failure message. */ | ||
1392 | kernel_power_off(); /* Shut down the system. */ | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | #ifdef CONFIG_HOTPLUG_CPU | ||
1397 | |||
1398 | /* | ||
1399 | * Execute random CPU-hotplug operations at the interval specified | ||
1400 | * by the onoff_interval. | ||
1401 | */ | ||
1402 | static int | ||
1403 | rcu_torture_onoff(void *arg) | ||
1404 | { | ||
1405 | int cpu; | ||
1406 | int maxcpu = -1; | ||
1407 | DEFINE_RCU_RANDOM(rand); | ||
1408 | |||
1409 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); | ||
1410 | for_each_online_cpu(cpu) | ||
1411 | maxcpu = cpu; | ||
1412 | WARN_ON(maxcpu < 0); | ||
1413 | while (!kthread_should_stop()) { | ||
1414 | cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); | ||
1415 | if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { | ||
1416 | if (verbose) | ||
1417 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1418 | "rcu_torture_onoff task: offlining %d\n", | ||
1419 | torture_type, cpu); | ||
1420 | n_offline_attempts++; | ||
1421 | if (cpu_down(cpu) == 0) { | ||
1422 | if (verbose) | ||
1423 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1424 | "rcu_torture_onoff task: " | ||
1425 | "offlined %d\n", | ||
1426 | torture_type, cpu); | ||
1427 | n_offline_successes++; | ||
1428 | } | ||
1429 | } else if (cpu_is_hotpluggable(cpu)) { | ||
1430 | if (verbose) | ||
1431 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1432 | "rcu_torture_onoff task: onlining %d\n", | ||
1433 | torture_type, cpu); | ||
1434 | n_online_attempts++; | ||
1435 | if (cpu_up(cpu) == 0) { | ||
1436 | if (verbose) | ||
1437 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1438 | "rcu_torture_onoff task: " | ||
1439 | "onlined %d\n", | ||
1440 | torture_type, cpu); | ||
1441 | n_online_successes++; | ||
1442 | } | ||
1443 | } | ||
1444 | schedule_timeout_interruptible(onoff_interval * HZ); | ||
1445 | } | ||
1446 | VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); | ||
1447 | return 0; | ||
1448 | } | ||
1449 | |||
1450 | static int | ||
1451 | rcu_torture_onoff_init(void) | ||
1452 | { | ||
1453 | if (onoff_interval <= 0) | ||
1454 | return 0; | ||
1455 | onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); | ||
1456 | if (IS_ERR(onoff_task)) { | ||
1457 | onoff_task = NULL; | ||
1458 | return PTR_ERR(onoff_task); | ||
1459 | } | ||
1460 | return 0; | ||
1461 | } | ||
1462 | |||
1463 | static void rcu_torture_onoff_cleanup(void) | ||
1464 | { | ||
1465 | if (onoff_task == NULL) | ||
1466 | return; | ||
1467 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); | ||
1468 | kthread_stop(onoff_task); | ||
1469 | } | ||
1470 | |||
1471 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1472 | |||
1473 | static void | ||
1474 | rcu_torture_onoff_init(void) | ||
1475 | { | ||
1476 | } | ||
1477 | |||
1478 | static void rcu_torture_onoff_cleanup(void) | ||
1479 | { | ||
1480 | } | ||
1481 | |||
1482 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | ||
1483 | |||
1290 | static int rcutorture_cpu_notify(struct notifier_block *self, | 1484 | static int rcutorture_cpu_notify(struct notifier_block *self, |
1291 | unsigned long action, void *hcpu) | 1485 | unsigned long action, void *hcpu) |
1292 | { | 1486 | { |
@@ -1391,6 +1585,11 @@ rcu_torture_cleanup(void) | |||
1391 | for_each_possible_cpu(i) | 1585 | for_each_possible_cpu(i) |
1392 | rcutorture_booster_cleanup(i); | 1586 | rcutorture_booster_cleanup(i); |
1393 | } | 1587 | } |
1588 | if (shutdown_task != NULL) { | ||
1589 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); | ||
1590 | kthread_stop(shutdown_task); | ||
1591 | } | ||
1592 | rcu_torture_onoff_cleanup(); | ||
1394 | 1593 | ||
1395 | /* Wait for all RCU callbacks to fire. */ | 1594 | /* Wait for all RCU callbacks to fire. */ |
1396 | 1595 | ||
@@ -1416,7 +1615,7 @@ rcu_torture_init(void) | |||
1416 | static struct rcu_torture_ops *torture_ops[] = | 1615 | static struct rcu_torture_ops *torture_ops[] = |
1417 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1616 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1418 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | 1617 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1419 | &srcu_ops, &srcu_expedited_ops, | 1618 | &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, |
1420 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1619 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1421 | 1620 | ||
1422 | mutex_lock(&fullstop_mutex); | 1621 | mutex_lock(&fullstop_mutex); |
@@ -1607,6 +1806,18 @@ rcu_torture_init(void) | |||
1607 | } | 1806 | } |
1608 | } | 1807 | } |
1609 | } | 1808 | } |
1809 | if (shutdown_secs > 0) { | ||
1810 | shutdown_time = jiffies + shutdown_secs * HZ; | ||
1811 | shutdown_task = kthread_run(rcu_torture_shutdown, NULL, | ||
1812 | "rcu_torture_shutdown"); | ||
1813 | if (IS_ERR(shutdown_task)) { | ||
1814 | firsterr = PTR_ERR(shutdown_task); | ||
1815 | VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); | ||
1816 | shutdown_task = NULL; | ||
1817 | goto unwind; | ||
1818 | } | ||
1819 | } | ||
1820 | rcu_torture_onoff_init(); | ||
1610 | register_reboot_notifier(&rcutorture_shutdown_nb); | 1821 | register_reboot_notifier(&rcutorture_shutdown_nb); |
1611 | rcutorture_record_test_transition(); | 1822 | rcutorture_record_test_transition(); |
1612 | mutex_unlock(&fullstop_mutex); | 1823 | mutex_unlock(&fullstop_mutex); |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6b76d812740c..6c4a6722abfd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
69 | NUM_RCU_LVL_3, \ | 69 | NUM_RCU_LVL_3, \ |
70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | 70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ |
71 | }, \ | 71 | }, \ |
72 | .signaled = RCU_GP_IDLE, \ | 72 | .fqs_state = RCU_GP_IDLE, \ |
73 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
74 | .completed = -300, \ | 74 | .completed = -300, \ |
75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu) | |||
195 | } | 195 | } |
196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
197 | 197 | ||
198 | #ifdef CONFIG_NO_HZ | ||
199 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
200 | .dynticks_nesting = 1, | 199 | .dynticks_nesting = DYNTICK_TASK_NESTING, |
201 | .dynticks = ATOMIC_INIT(1), | 200 | .dynticks = ATOMIC_INIT(1), |
202 | }; | 201 | }; |
203 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
204 | 202 | ||
205 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 203 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
206 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 204 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
328 | return 1; | 326 | return 1; |
329 | } | 327 | } |
330 | 328 | ||
331 | /* If preemptible RCU, no point in sending reschedule IPI. */ | 329 | /* |
332 | if (rdp->preemptible) | 330 | * The CPU is online, so send it a reschedule IPI. This forces |
333 | return 0; | 331 | * it through the scheduler, and (inefficiently) also handles cases |
334 | 332 | * where idle loops fail to inform RCU about the CPU being idle. | |
335 | /* The CPU is online, so send it a reschedule IPI. */ | 333 | */ |
336 | if (rdp->cpu != smp_processor_id()) | 334 | if (rdp->cpu != smp_processor_id()) |
337 | smp_send_reschedule(rdp->cpu); | 335 | smp_send_reschedule(rdp->cpu); |
338 | else | 336 | else |
@@ -343,59 +341,181 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
343 | 341 | ||
344 | #endif /* #ifdef CONFIG_SMP */ | 342 | #endif /* #ifdef CONFIG_SMP */ |
345 | 343 | ||
346 | #ifdef CONFIG_NO_HZ | 344 | /* |
345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | ||
346 | * | ||
347 | * If the new value of the ->dynticks_nesting counter now is zero, | ||
348 | * we really have entered idle, and must do the appropriate accounting. | ||
349 | * The caller must have disabled interrupts. | ||
350 | */ | ||
351 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | ||
352 | { | ||
353 | trace_rcu_dyntick("Start", oldval, 0); | ||
354 | if (!is_idle_task(current)) { | ||
355 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
356 | |||
357 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | ||
358 | ftrace_dump(DUMP_ALL); | ||
359 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
360 | current->pid, current->comm, | ||
361 | idle->pid, idle->comm); /* must be idle task! */ | ||
362 | } | ||
363 | rcu_prepare_for_idle(smp_processor_id()); | ||
364 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
365 | smp_mb__before_atomic_inc(); /* See above. */ | ||
366 | atomic_inc(&rdtp->dynticks); | ||
367 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
368 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
369 | } | ||
347 | 370 | ||
348 | /** | 371 | /** |
349 | * rcu_enter_nohz - inform RCU that current CPU is entering nohz | 372 | * rcu_idle_enter - inform RCU that current CPU is entering idle |
350 | * | 373 | * |
351 | * Enter nohz mode, in other words, -leave- the mode in which RCU | 374 | * Enter idle mode, in other words, -leave- the mode in which RCU |
352 | * read-side critical sections can occur. (Though RCU read-side | 375 | * read-side critical sections can occur. (Though RCU read-side |
353 | * critical sections can occur in irq handlers in nohz mode, a possibility | 376 | * critical sections can occur in irq handlers in idle, a possibility |
354 | * handled by rcu_irq_enter() and rcu_irq_exit()). | 377 | * handled by irq_enter() and irq_exit().) |
378 | * | ||
379 | * We crowbar the ->dynticks_nesting field to zero to allow for | ||
380 | * the possibility of usermode upcalls having messed up our count | ||
381 | * of interrupt nesting level during the prior busy period. | ||
355 | */ | 382 | */ |
356 | void rcu_enter_nohz(void) | 383 | void rcu_idle_enter(void) |
357 | { | 384 | { |
358 | unsigned long flags; | 385 | unsigned long flags; |
386 | long long oldval; | ||
359 | struct rcu_dynticks *rdtp; | 387 | struct rcu_dynticks *rdtp; |
360 | 388 | ||
361 | local_irq_save(flags); | 389 | local_irq_save(flags); |
362 | rdtp = &__get_cpu_var(rcu_dynticks); | 390 | rdtp = &__get_cpu_var(rcu_dynticks); |
363 | if (--rdtp->dynticks_nesting) { | 391 | oldval = rdtp->dynticks_nesting; |
364 | local_irq_restore(flags); | 392 | rdtp->dynticks_nesting = 0; |
365 | return; | 393 | rcu_idle_enter_common(rdtp, oldval); |
366 | } | ||
367 | trace_rcu_dyntick("Start"); | ||
368 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
369 | smp_mb__before_atomic_inc(); /* See above. */ | ||
370 | atomic_inc(&rdtp->dynticks); | ||
371 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
373 | local_irq_restore(flags); | 394 | local_irq_restore(flags); |
374 | } | 395 | } |
375 | 396 | ||
376 | /* | 397 | /** |
377 | * rcu_exit_nohz - inform RCU that current CPU is leaving nohz | 398 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
399 | * | ||
400 | * Exit from an interrupt handler, which might possibly result in entering | ||
401 | * idle mode, in other words, leaving the mode in which read-side critical | ||
402 | * sections can occur. | ||
378 | * | 403 | * |
379 | * Exit nohz mode, in other words, -enter- the mode in which RCU | 404 | * This code assumes that the idle loop never does anything that might |
380 | * read-side critical sections normally occur. | 405 | * result in unbalanced calls to irq_enter() and irq_exit(). If your |
406 | * architecture violates this assumption, RCU will give you what you | ||
407 | * deserve, good and hard. But very infrequently and irreproducibly. | ||
408 | * | ||
409 | * Use things like work queues to work around this limitation. | ||
410 | * | ||
411 | * You have been warned. | ||
381 | */ | 412 | */ |
382 | void rcu_exit_nohz(void) | 413 | void rcu_irq_exit(void) |
383 | { | 414 | { |
384 | unsigned long flags; | 415 | unsigned long flags; |
416 | long long oldval; | ||
385 | struct rcu_dynticks *rdtp; | 417 | struct rcu_dynticks *rdtp; |
386 | 418 | ||
387 | local_irq_save(flags); | 419 | local_irq_save(flags); |
388 | rdtp = &__get_cpu_var(rcu_dynticks); | 420 | rdtp = &__get_cpu_var(rcu_dynticks); |
389 | if (rdtp->dynticks_nesting++) { | 421 | oldval = rdtp->dynticks_nesting; |
390 | local_irq_restore(flags); | 422 | rdtp->dynticks_nesting--; |
391 | return; | 423 | WARN_ON_ONCE(rdtp->dynticks_nesting < 0); |
392 | } | 424 | if (rdtp->dynticks_nesting) |
425 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); | ||
426 | else | ||
427 | rcu_idle_enter_common(rdtp, oldval); | ||
428 | local_irq_restore(flags); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle | ||
433 | * | ||
434 | * If the new value of the ->dynticks_nesting counter was previously zero, | ||
435 | * we really have exited idle, and must do the appropriate accounting. | ||
436 | * The caller must have disabled interrupts. | ||
437 | */ | ||
438 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | ||
439 | { | ||
393 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | 440 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ |
394 | atomic_inc(&rdtp->dynticks); | 441 | atomic_inc(&rdtp->dynticks); |
395 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 442 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
396 | smp_mb__after_atomic_inc(); /* See above. */ | 443 | smp_mb__after_atomic_inc(); /* See above. */ |
397 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 444 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
398 | trace_rcu_dyntick("End"); | 445 | rcu_cleanup_after_idle(smp_processor_id()); |
446 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); | ||
447 | if (!is_idle_task(current)) { | ||
448 | struct task_struct *idle = idle_task(smp_processor_id()); | ||
449 | |||
450 | trace_rcu_dyntick("Error on exit: not idle task", | ||
451 | oldval, rdtp->dynticks_nesting); | ||
452 | ftrace_dump(DUMP_ALL); | ||
453 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | ||
454 | current->pid, current->comm, | ||
455 | idle->pid, idle->comm); /* must be idle task! */ | ||
456 | } | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * rcu_idle_exit - inform RCU that current CPU is leaving idle | ||
461 | * | ||
462 | * Exit idle mode, in other words, -enter- the mode in which RCU | ||
463 | * read-side critical sections can occur. | ||
464 | * | ||
465 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to | ||
466 | * allow for the possibility of usermode upcalls messing up our count | ||
467 | * of interrupt nesting level during the busy period that is just | ||
468 | * now starting. | ||
469 | */ | ||
470 | void rcu_idle_exit(void) | ||
471 | { | ||
472 | unsigned long flags; | ||
473 | struct rcu_dynticks *rdtp; | ||
474 | long long oldval; | ||
475 | |||
476 | local_irq_save(flags); | ||
477 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
478 | oldval = rdtp->dynticks_nesting; | ||
479 | WARN_ON_ONCE(oldval != 0); | ||
480 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; | ||
481 | rcu_idle_exit_common(rdtp, oldval); | ||
482 | local_irq_restore(flags); | ||
483 | } | ||
484 | |||
485 | /** | ||
486 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | ||
487 | * | ||
488 | * Enter an interrupt handler, which might possibly result in exiting | ||
489 | * idle mode, in other words, entering the mode in which read-side critical | ||
490 | * sections can occur. | ||
491 | * | ||
492 | * Note that the Linux kernel is fully capable of entering an interrupt | ||
493 | * handler that it never exits, for example when doing upcalls to | ||
494 | * user mode! This code assumes that the idle loop never does upcalls to | ||
495 | * user mode. If your architecture does do upcalls from the idle loop (or | ||
496 | * does anything else that results in unbalanced calls to the irq_enter() | ||
497 | * and irq_exit() functions), RCU will give you what you deserve, good | ||
498 | * and hard. But very infrequently and irreproducibly. | ||
499 | * | ||
500 | * Use things like work queues to work around this limitation. | ||
501 | * | ||
502 | * You have been warned. | ||
503 | */ | ||
504 | void rcu_irq_enter(void) | ||
505 | { | ||
506 | unsigned long flags; | ||
507 | struct rcu_dynticks *rdtp; | ||
508 | long long oldval; | ||
509 | |||
510 | local_irq_save(flags); | ||
511 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
512 | oldval = rdtp->dynticks_nesting; | ||
513 | rdtp->dynticks_nesting++; | ||
514 | WARN_ON_ONCE(rdtp->dynticks_nesting == 0); | ||
515 | if (oldval) | ||
516 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); | ||
517 | else | ||
518 | rcu_idle_exit_common(rdtp, oldval); | ||
399 | local_irq_restore(flags); | 519 | local_irq_restore(flags); |
400 | } | 520 | } |
401 | 521 | ||
@@ -442,27 +562,37 @@ void rcu_nmi_exit(void) | |||
442 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 562 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
443 | } | 563 | } |
444 | 564 | ||
565 | #ifdef CONFIG_PROVE_RCU | ||
566 | |||
445 | /** | 567 | /** |
446 | * rcu_irq_enter - inform RCU of entry to hard irq context | 568 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
447 | * | 569 | * |
448 | * If the CPU was idle with dynamic ticks active, this updates the | 570 | * If the current CPU is in its idle loop and is neither in an interrupt |
449 | * rdtp->dynticks to let the RCU handling know that the CPU is active. | 571 | * or NMI handler, return true. |
450 | */ | 572 | */ |
451 | void rcu_irq_enter(void) | 573 | int rcu_is_cpu_idle(void) |
452 | { | 574 | { |
453 | rcu_exit_nohz(); | 575 | int ret; |
576 | |||
577 | preempt_disable(); | ||
578 | ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; | ||
579 | preempt_enable(); | ||
580 | return ret; | ||
454 | } | 581 | } |
582 | EXPORT_SYMBOL(rcu_is_cpu_idle); | ||
583 | |||
584 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
455 | 585 | ||
456 | /** | 586 | /** |
457 | * rcu_irq_exit - inform RCU of exit from hard irq context | 587 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
458 | * | 588 | * |
459 | * If the CPU was idle with dynamic ticks active, update the rdp->dynticks | 589 | * If the current CPU is idle or running at a first-level (not nested) |
460 | * to put let the RCU handling be aware that the CPU is going back to idle | 590 | * interrupt from idle, return true. The caller must have at least |
461 | * with no ticks. | 591 | * disabled preemption. |
462 | */ | 592 | */ |
463 | void rcu_irq_exit(void) | 593 | int rcu_is_cpu_rrupt_from_idle(void) |
464 | { | 594 | { |
465 | rcu_enter_nohz(); | 595 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; |
466 | } | 596 | } |
467 | 597 | ||
468 | #ifdef CONFIG_SMP | 598 | #ifdef CONFIG_SMP |
@@ -475,7 +605,7 @@ void rcu_irq_exit(void) | |||
475 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 605 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
476 | { | 606 | { |
477 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); | 607 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
478 | return 0; | 608 | return (rdp->dynticks_snap & 0x1) == 0; |
479 | } | 609 | } |
480 | 610 | ||
481 | /* | 611 | /* |
@@ -512,26 +642,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
512 | 642 | ||
513 | #endif /* #ifdef CONFIG_SMP */ | 643 | #endif /* #ifdef CONFIG_SMP */ |
514 | 644 | ||
515 | #else /* #ifdef CONFIG_NO_HZ */ | ||
516 | |||
517 | #ifdef CONFIG_SMP | ||
518 | |||
519 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | ||
520 | { | ||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | ||
525 | { | ||
526 | return rcu_implicit_offline_qs(rdp); | ||
527 | } | ||
528 | |||
529 | #endif /* #ifdef CONFIG_SMP */ | ||
530 | |||
531 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
532 | |||
533 | int rcu_cpu_stall_suppress __read_mostly; | ||
534 | |||
535 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 645 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
536 | { | 646 | { |
537 | rsp->gp_start = jiffies; | 647 | rsp->gp_start = jiffies; |
@@ -866,8 +976,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
866 | /* Advance to a new grace period and initialize state. */ | 976 | /* Advance to a new grace period and initialize state. */ |
867 | rsp->gpnum++; | 977 | rsp->gpnum++; |
868 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | 978 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); |
869 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | 979 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); |
870 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 980 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
871 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 981 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
872 | record_gp_stall_check_time(rsp); | 982 | record_gp_stall_check_time(rsp); |
873 | 983 | ||
@@ -877,7 +987,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
877 | rnp->qsmask = rnp->qsmaskinit; | 987 | rnp->qsmask = rnp->qsmaskinit; |
878 | rnp->gpnum = rsp->gpnum; | 988 | rnp->gpnum = rsp->gpnum; |
879 | rnp->completed = rsp->completed; | 989 | rnp->completed = rsp->completed; |
880 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 990 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ |
881 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 991 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
882 | rcu_preempt_boost_start_gp(rnp); | 992 | rcu_preempt_boost_start_gp(rnp); |
883 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 993 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
@@ -927,7 +1037,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
927 | 1037 | ||
928 | rnp = rcu_get_root(rsp); | 1038 | rnp = rcu_get_root(rsp); |
929 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1039 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
930 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ | 1040 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ |
931 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1041 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
932 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1042 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
933 | } | 1043 | } |
@@ -991,7 +1101,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
991 | 1101 | ||
992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | 1102 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ |
993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 1103 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
994 | rsp->signaled = RCU_GP_IDLE; | 1104 | rsp->fqs_state = RCU_GP_IDLE; |
995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 1105 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
996 | } | 1106 | } |
997 | 1107 | ||
@@ -1221,7 +1331,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1221 | else | 1331 | else |
1222 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1332 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1223 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1333 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1224 | rcu_report_exp_rnp(rsp, rnp); | 1334 | rcu_report_exp_rnp(rsp, rnp, true); |
1225 | rcu_node_kthread_setaffinity(rnp, -1); | 1335 | rcu_node_kthread_setaffinity(rnp, -1); |
1226 | } | 1336 | } |
1227 | 1337 | ||
@@ -1263,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1263 | /* If no callbacks are ready, just return.*/ | 1373 | /* If no callbacks are ready, just return.*/ |
1264 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1374 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1265 | trace_rcu_batch_start(rsp->name, 0, 0); | 1375 | trace_rcu_batch_start(rsp->name, 0, 0); |
1266 | trace_rcu_batch_end(rsp->name, 0); | 1376 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), |
1377 | need_resched(), is_idle_task(current), | ||
1378 | rcu_is_callbacks_kthread()); | ||
1267 | return; | 1379 | return; |
1268 | } | 1380 | } |
1269 | 1381 | ||
@@ -1291,12 +1403,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1291 | debug_rcu_head_unqueue(list); | 1403 | debug_rcu_head_unqueue(list); |
1292 | __rcu_reclaim(rsp->name, list); | 1404 | __rcu_reclaim(rsp->name, list); |
1293 | list = next; | 1405 | list = next; |
1294 | if (++count >= bl) | 1406 | /* Stop only if limit reached and CPU has something to do. */ |
1407 | if (++count >= bl && | ||
1408 | (need_resched() || | ||
1409 | (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) | ||
1295 | break; | 1410 | break; |
1296 | } | 1411 | } |
1297 | 1412 | ||
1298 | local_irq_save(flags); | 1413 | local_irq_save(flags); |
1299 | trace_rcu_batch_end(rsp->name, count); | 1414 | trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), |
1415 | is_idle_task(current), | ||
1416 | rcu_is_callbacks_kthread()); | ||
1300 | 1417 | ||
1301 | /* Update count, and requeue any remaining callbacks. */ | 1418 | /* Update count, and requeue any remaining callbacks. */ |
1302 | rdp->qlen -= count; | 1419 | rdp->qlen -= count; |
@@ -1334,16 +1451,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1334 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1451 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
1335 | * Also schedule RCU core processing. | 1452 | * Also schedule RCU core processing. |
1336 | * | 1453 | * |
1337 | * This function must be called with hardirqs disabled. It is normally | 1454 | * This function must be called from hardirq context. It is normally |
1338 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1455 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
1339 | * false, there is no point in invoking rcu_check_callbacks(). | 1456 | * false, there is no point in invoking rcu_check_callbacks(). |
1340 | */ | 1457 | */ |
1341 | void rcu_check_callbacks(int cpu, int user) | 1458 | void rcu_check_callbacks(int cpu, int user) |
1342 | { | 1459 | { |
1343 | trace_rcu_utilization("Start scheduler-tick"); | 1460 | trace_rcu_utilization("Start scheduler-tick"); |
1344 | if (user || | 1461 | if (user || rcu_is_cpu_rrupt_from_idle()) { |
1345 | (idle_cpu(cpu) && rcu_scheduler_active && | ||
1346 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||
1347 | 1462 | ||
1348 | /* | 1463 | /* |
1349 | * Get here if this CPU took its interrupt from user | 1464 | * Get here if this CPU took its interrupt from user |
@@ -1457,7 +1572,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1457 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ | 1572 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ |
1458 | } | 1573 | } |
1459 | rsp->fqs_active = 1; | 1574 | rsp->fqs_active = 1; |
1460 | switch (rsp->signaled) { | 1575 | switch (rsp->fqs_state) { |
1461 | case RCU_GP_IDLE: | 1576 | case RCU_GP_IDLE: |
1462 | case RCU_GP_INIT: | 1577 | case RCU_GP_INIT: |
1463 | 1578 | ||
@@ -1473,7 +1588,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1473 | force_qs_rnp(rsp, dyntick_save_progress_counter); | 1588 | force_qs_rnp(rsp, dyntick_save_progress_counter); |
1474 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1589 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
1475 | if (rcu_gp_in_progress(rsp)) | 1590 | if (rcu_gp_in_progress(rsp)) |
1476 | rsp->signaled = RCU_FORCE_QS; | 1591 | rsp->fqs_state = RCU_FORCE_QS; |
1477 | break; | 1592 | break; |
1478 | 1593 | ||
1479 | case RCU_FORCE_QS: | 1594 | case RCU_FORCE_QS: |
@@ -1812,7 +1927,7 @@ static int rcu_pending(int cpu) | |||
1812 | * by the current CPU, even if none need be done immediately, returning | 1927 | * by the current CPU, even if none need be done immediately, returning |
1813 | * 1 if so. | 1928 | * 1 if so. |
1814 | */ | 1929 | */ |
1815 | static int rcu_needs_cpu_quick_check(int cpu) | 1930 | static int rcu_cpu_has_callbacks(int cpu) |
1816 | { | 1931 | { |
1817 | /* RCU callbacks either ready or pending? */ | 1932 | /* RCU callbacks either ready or pending? */ |
1818 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 1933 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
@@ -1913,9 +2028,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1913 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 2028 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1914 | rdp->nxttail[i] = &rdp->nxtlist; | 2029 | rdp->nxttail[i] = &rdp->nxtlist; |
1915 | rdp->qlen = 0; | 2030 | rdp->qlen = 0; |
1916 | #ifdef CONFIG_NO_HZ | ||
1917 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2031 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
1918 | #endif /* #ifdef CONFIG_NO_HZ */ | 2032 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); |
2033 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | ||
1919 | rdp->cpu = cpu; | 2034 | rdp->cpu = cpu; |
1920 | rdp->rsp = rsp; | 2035 | rdp->rsp = rsp; |
1921 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2036 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
@@ -1942,6 +2057,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1942 | rdp->qlen_last_fqs_check = 0; | 2057 | rdp->qlen_last_fqs_check = 0; |
1943 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2058 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1944 | rdp->blimit = blimit; | 2059 | rdp->blimit = blimit; |
2060 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; | ||
2061 | atomic_set(&rdp->dynticks->dynticks, | ||
2062 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | ||
2063 | rcu_prepare_for_idle_init(cpu); | ||
1945 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2064 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1946 | 2065 | ||
1947 | /* | 2066 | /* |
@@ -2023,6 +2142,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2023 | rcu_send_cbs_to_online(&rcu_bh_state); | 2142 | rcu_send_cbs_to_online(&rcu_bh_state); |
2024 | rcu_send_cbs_to_online(&rcu_sched_state); | 2143 | rcu_send_cbs_to_online(&rcu_sched_state); |
2025 | rcu_preempt_send_cbs_to_online(); | 2144 | rcu_preempt_send_cbs_to_online(); |
2145 | rcu_cleanup_after_idle(cpu); | ||
2026 | break; | 2146 | break; |
2027 | case CPU_DEAD: | 2147 | case CPU_DEAD: |
2028 | case CPU_DEAD_FROZEN: | 2148 | case CPU_DEAD_FROZEN: |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 849ce9ec51fe..fddff92d6676 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -84,9 +84,10 @@ | |||
84 | * Dynticks per-CPU state. | 84 | * Dynticks per-CPU state. |
85 | */ | 85 | */ |
86 | struct rcu_dynticks { | 86 | struct rcu_dynticks { |
87 | int dynticks_nesting; /* Track irq/process nesting level. */ | 87 | long long dynticks_nesting; /* Track irq/process nesting level. */ |
88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ | 88 | /* Process level is worth LLONG_MAX/2. */ |
89 | atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ | 89 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
90 | atomic_t dynticks; /* Even value for idle, else odd. */ | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | /* RCU's kthread states for tracing. */ | 93 | /* RCU's kthread states for tracing. */ |
@@ -274,16 +275,12 @@ struct rcu_data { | |||
274 | /* did other CPU force QS recently? */ | 275 | /* did other CPU force QS recently? */ |
275 | long blimit; /* Upper limit on a processed batch */ | 276 | long blimit; /* Upper limit on a processed batch */ |
276 | 277 | ||
277 | #ifdef CONFIG_NO_HZ | ||
278 | /* 3) dynticks interface. */ | 278 | /* 3) dynticks interface. */ |
279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ | 279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ |
280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ | 280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ |
281 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
282 | 281 | ||
283 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 282 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
284 | #ifdef CONFIG_NO_HZ | ||
285 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ | 283 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ |
286 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
287 | unsigned long offline_fqs; /* Kicked due to being offline. */ | 284 | unsigned long offline_fqs; /* Kicked due to being offline. */ |
288 | unsigned long resched_ipi; /* Sent a resched IPI. */ | 285 | unsigned long resched_ipi; /* Sent a resched IPI. */ |
289 | 286 | ||
@@ -302,16 +299,12 @@ struct rcu_data { | |||
302 | struct rcu_state *rsp; | 299 | struct rcu_state *rsp; |
303 | }; | 300 | }; |
304 | 301 | ||
305 | /* Values for signaled field in struct rcu_state. */ | 302 | /* Values for fqs_state field in struct rcu_state. */ |
306 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ | 303 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ |
307 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ | 304 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ |
308 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ | 305 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ |
309 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ | 306 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ |
310 | #ifdef CONFIG_NO_HZ | ||
311 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK | 307 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK |
312 | #else /* #ifdef CONFIG_NO_HZ */ | ||
313 | #define RCU_SIGNAL_INIT RCU_FORCE_QS | ||
314 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
315 | 308 | ||
316 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ | 309 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ |
317 | 310 | ||
@@ -361,7 +354,7 @@ struct rcu_state { | |||
361 | 354 | ||
362 | /* The following fields are guarded by the root rcu_node's lock. */ | 355 | /* The following fields are guarded by the root rcu_node's lock. */ |
363 | 356 | ||
364 | u8 signaled ____cacheline_internodealigned_in_smp; | 357 | u8 fqs_state ____cacheline_internodealigned_in_smp; |
365 | /* Force QS state. */ | 358 | /* Force QS state. */ |
366 | u8 fqs_active; /* force_quiescent_state() */ | 359 | u8 fqs_active; /* force_quiescent_state() */ |
367 | /* is running. */ | 360 | /* is running. */ |
@@ -451,7 +444,8 @@ static void rcu_preempt_check_callbacks(int cpu); | |||
451 | static void rcu_preempt_process_callbacks(void); | 444 | static void rcu_preempt_process_callbacks(void); |
452 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 445 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
453 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 446 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) |
454 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | 447 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
448 | bool wake); | ||
455 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 449 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ |
456 | static int rcu_preempt_pending(int cpu); | 450 | static int rcu_preempt_pending(int cpu); |
457 | static int rcu_preempt_needs_cpu(int cpu); | 451 | static int rcu_preempt_needs_cpu(int cpu); |
@@ -461,6 +455,7 @@ static void __init __rcu_init_preempt(void); | |||
461 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 455 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
462 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 456 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
463 | static void invoke_rcu_callbacks_kthread(void); | 457 | static void invoke_rcu_callbacks_kthread(void); |
458 | static bool rcu_is_callbacks_kthread(void); | ||
464 | #ifdef CONFIG_RCU_BOOST | 459 | #ifdef CONFIG_RCU_BOOST |
465 | static void rcu_preempt_do_callbacks(void); | 460 | static void rcu_preempt_do_callbacks(void); |
466 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 461 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
@@ -473,5 +468,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg); | |||
473 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 468 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
474 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); | 469 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); |
475 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 470 | static void __cpuinit rcu_prepare_kthreads(int cpu); |
471 | static void rcu_prepare_for_idle_init(int cpu); | ||
472 | static void rcu_cleanup_after_idle(int cpu); | ||
473 | static void rcu_prepare_for_idle(int cpu); | ||
476 | 474 | ||
477 | #endif /* #ifndef RCU_TREE_NONCORE */ | 475 | #endif /* #ifndef RCU_TREE_NONCORE */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4b9b9f8a4184..8bb35d73e1f9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -312,6 +312,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
312 | { | 312 | { |
313 | int empty; | 313 | int empty; |
314 | int empty_exp; | 314 | int empty_exp; |
315 | int empty_exp_now; | ||
315 | unsigned long flags; | 316 | unsigned long flags; |
316 | struct list_head *np; | 317 | struct list_head *np; |
317 | #ifdef CONFIG_RCU_BOOST | 318 | #ifdef CONFIG_RCU_BOOST |
@@ -382,8 +383,10 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
382 | /* | 383 | /* |
383 | * If this was the last task on the current list, and if | 384 | * If this was the last task on the current list, and if |
384 | * we aren't waiting on any CPUs, report the quiescent state. | 385 | * we aren't waiting on any CPUs, report the quiescent state. |
385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 386 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
387 | * so we must take a snapshot of the expedited state. | ||
386 | */ | 388 | */ |
389 | empty_exp_now = !rcu_preempted_readers_exp(rnp); | ||
387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | 390 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
388 | trace_rcu_quiescent_state_report("preempt_rcu", | 391 | trace_rcu_quiescent_state_report("preempt_rcu", |
389 | rnp->gpnum, | 392 | rnp->gpnum, |
@@ -406,8 +409,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
406 | * If this was the last task on the expedited lists, | 409 | * If this was the last task on the expedited lists, |
407 | * then we need to report up the rcu_node hierarchy. | 410 | * then we need to report up the rcu_node hierarchy. |
408 | */ | 411 | */ |
409 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | 412 | if (!empty_exp && empty_exp_now) |
410 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | 413 | rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); |
411 | } else { | 414 | } else { |
412 | local_irq_restore(flags); | 415 | local_irq_restore(flags); |
413 | } | 416 | } |
@@ -729,9 +732,13 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | |||
729 | * recursively up the tree. (Calm down, calm down, we do the recursion | 732 | * recursively up the tree. (Calm down, calm down, we do the recursion |
730 | * iteratively!) | 733 | * iteratively!) |
731 | * | 734 | * |
735 | * Most callers will set the "wake" flag, but the task initiating the | ||
736 | * expedited grace period need not wake itself. | ||
737 | * | ||
732 | * Caller must hold sync_rcu_preempt_exp_mutex. | 738 | * Caller must hold sync_rcu_preempt_exp_mutex. |
733 | */ | 739 | */ |
734 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | 740 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
741 | bool wake) | ||
735 | { | 742 | { |
736 | unsigned long flags; | 743 | unsigned long flags; |
737 | unsigned long mask; | 744 | unsigned long mask; |
@@ -744,7 +751,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
744 | } | 751 | } |
745 | if (rnp->parent == NULL) { | 752 | if (rnp->parent == NULL) { |
746 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 753 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
747 | wake_up(&sync_rcu_preempt_exp_wq); | 754 | if (wake) |
755 | wake_up(&sync_rcu_preempt_exp_wq); | ||
748 | break; | 756 | break; |
749 | } | 757 | } |
750 | mask = rnp->grpmask; | 758 | mask = rnp->grpmask; |
@@ -777,7 +785,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
777 | must_wait = 1; | 785 | must_wait = 1; |
778 | } | 786 | } |
779 | if (!must_wait) | 787 | if (!must_wait) |
780 | rcu_report_exp_rnp(rsp, rnp); | 788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ |
781 | } | 789 | } |
782 | 790 | ||
783 | /* | 791 | /* |
@@ -1069,9 +1077,9 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | |||
1069 | * report on tasks preempted in RCU read-side critical sections during | 1077 | * report on tasks preempted in RCU read-side critical sections during |
1070 | * expedited RCU grace periods. | 1078 | * expedited RCU grace periods. |
1071 | */ | 1079 | */ |
1072 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | 1080 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
1081 | bool wake) | ||
1073 | { | 1082 | { |
1074 | return; | ||
1075 | } | 1083 | } |
1076 | 1084 | ||
1077 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1085 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -1157,8 +1165,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |||
1157 | 1165 | ||
1158 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1166 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
1159 | 1167 | ||
1160 | static struct lock_class_key rcu_boost_class; | ||
1161 | |||
1162 | /* | 1168 | /* |
1163 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1169 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
1164 | * or ->boost_tasks, advancing the pointer to the next task in the | 1170 | * or ->boost_tasks, advancing the pointer to the next task in the |
@@ -1221,15 +1227,13 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1221 | */ | 1227 | */ |
1222 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1228 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1223 | rt_mutex_init_proxy_locked(&mtx, t); | 1229 | rt_mutex_init_proxy_locked(&mtx, t); |
1224 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | ||
1225 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||
1226 | "rcu_boost_mutex"); | ||
1227 | t->rcu_boost_mutex = &mtx; | 1230 | t->rcu_boost_mutex = &mtx; |
1228 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1231 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1229 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1232 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1230 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1233 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
1231 | 1234 | ||
1232 | return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; | 1235 | return ACCESS_ONCE(rnp->exp_tasks) != NULL || |
1236 | ACCESS_ONCE(rnp->boost_tasks) != NULL; | ||
1233 | } | 1237 | } |
1234 | 1238 | ||
1235 | /* | 1239 | /* |
@@ -1329,6 +1333,15 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1329 | } | 1333 | } |
1330 | 1334 | ||
1331 | /* | 1335 | /* |
1336 | * Is the current CPU running the RCU-callbacks kthread? | ||
1337 | * Caller must have preemption disabled. | ||
1338 | */ | ||
1339 | static bool rcu_is_callbacks_kthread(void) | ||
1340 | { | ||
1341 | return __get_cpu_var(rcu_cpu_kthread_task) == current; | ||
1342 | } | ||
1343 | |||
1344 | /* | ||
1332 | * Set the affinity of the boost kthread. The CPU-hotplug locks are | 1345 | * Set the affinity of the boost kthread. The CPU-hotplug locks are |
1333 | * held, so no one should be messing with the existence of the boost | 1346 | * held, so no one should be messing with the existence of the boost |
1334 | * kthread. | 1347 | * kthread. |
@@ -1772,6 +1785,11 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1772 | WARN_ON_ONCE(1); | 1785 | WARN_ON_ONCE(1); |
1773 | } | 1786 | } |
1774 | 1787 | ||
1788 | static bool rcu_is_callbacks_kthread(void) | ||
1789 | { | ||
1790 | return false; | ||
1791 | } | ||
1792 | |||
1775 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | 1793 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) |
1776 | { | 1794 | { |
1777 | } | 1795 | } |
@@ -1907,7 +1925,7 @@ void synchronize_sched_expedited(void) | |||
1907 | * grace period works for us. | 1925 | * grace period works for us. |
1908 | */ | 1926 | */ |
1909 | get_online_cpus(); | 1927 | get_online_cpus(); |
1910 | snap = atomic_read(&sync_sched_expedited_started) - 1; | 1928 | snap = atomic_read(&sync_sched_expedited_started); |
1911 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | 1929 | smp_mb(); /* ensure read is before try_stop_cpus(). */ |
1912 | } | 1930 | } |
1913 | 1931 | ||
@@ -1939,88 +1957,243 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | |||
1939 | * 1 if so. This function is part of the RCU implementation; it is -not- | 1957 | * 1 if so. This function is part of the RCU implementation; it is -not- |
1940 | * an exported member of the RCU API. | 1958 | * an exported member of the RCU API. |
1941 | * | 1959 | * |
1942 | * Because we have preemptible RCU, just check whether this CPU needs | 1960 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
1943 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | 1961 | * any flavor of RCU. |
1944 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
1945 | */ | 1962 | */ |
1946 | int rcu_needs_cpu(int cpu) | 1963 | int rcu_needs_cpu(int cpu) |
1947 | { | 1964 | { |
1948 | return rcu_needs_cpu_quick_check(cpu); | 1965 | return rcu_cpu_has_callbacks(cpu); |
1966 | } | ||
1967 | |||
1968 | /* | ||
1969 | * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. | ||
1970 | */ | ||
1971 | static void rcu_prepare_for_idle_init(int cpu) | ||
1972 | { | ||
1973 | } | ||
1974 | |||
1975 | /* | ||
1976 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | ||
1977 | * after it. | ||
1978 | */ | ||
1979 | static void rcu_cleanup_after_idle(int cpu) | ||
1980 | { | ||
1981 | } | ||
1982 | |||
1983 | /* | ||
1984 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, | ||
1985 | * is nothing. | ||
1986 | */ | ||
1987 | static void rcu_prepare_for_idle(int cpu) | ||
1988 | { | ||
1949 | } | 1989 | } |
1950 | 1990 | ||
1951 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1991 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1952 | 1992 | ||
1953 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1993 | /* |
1994 | * This code is invoked when a CPU goes idle, at which point we want | ||
1995 | * to have the CPU do everything required for RCU so that it can enter | ||
1996 | * the energy-efficient dyntick-idle mode. This is handled by a | ||
1997 | * state machine implemented by rcu_prepare_for_idle() below. | ||
1998 | * | ||
1999 | * The following three proprocessor symbols control this state machine: | ||
2000 | * | ||
2001 | * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt | ||
2002 | * to satisfy RCU. Beyond this point, it is better to incur a periodic | ||
2003 | * scheduling-clock interrupt than to loop through the state machine | ||
2004 | * at full power. | ||
2005 | * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are | ||
2006 | * optional if RCU does not need anything immediately from this | ||
2007 | * CPU, even if this CPU still has RCU callbacks queued. The first | ||
2008 | * times through the state machine are mandatory: we need to give | ||
2009 | * the state machine a chance to communicate a quiescent state | ||
2010 | * to the RCU core. | ||
2011 | * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted | ||
2012 | * to sleep in dyntick-idle mode with RCU callbacks pending. This | ||
2013 | * is sized to be roughly one RCU grace period. Those energy-efficiency | ||
2014 | * benchmarkers who might otherwise be tempted to set this to a large | ||
2015 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your | ||
2016 | * system. And if you are -that- concerned about energy efficiency, | ||
2017 | * just power the system down and be done with it! | ||
2018 | * | ||
2019 | * The values below work well in practice. If future workloads require | ||
2020 | * adjustment, they can be converted into kernel config parameters, though | ||
2021 | * making the state machine smarter might be a better option. | ||
2022 | */ | ||
2023 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | ||
2024 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | ||
2025 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | ||
2026 | |||
1954 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 2027 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
1955 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 2028 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
2029 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | ||
2030 | static ktime_t rcu_idle_gp_wait; | ||
1956 | 2031 | ||
1957 | /* | 2032 | /* |
1958 | * Check to see if any future RCU-related work will need to be done | 2033 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
1959 | * by the current CPU, even if none need be done immediately, returning | 2034 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter |
1960 | * 1 if so. This function is part of the RCU implementation; it is -not- | 2035 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to |
1961 | * an exported member of the RCU API. | 2036 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed |
2037 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
2038 | * it is better to incur scheduling-clock interrupts than to spin | ||
2039 | * continuously for the same time duration! | ||
2040 | */ | ||
2041 | int rcu_needs_cpu(int cpu) | ||
2042 | { | ||
2043 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
2044 | if (!rcu_cpu_has_callbacks(cpu)) | ||
2045 | return 0; | ||
2046 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | ||
2047 | return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; | ||
2048 | } | ||
2049 | |||
2050 | /* | ||
2051 | * Timer handler used to force CPU to start pushing its remaining RCU | ||
2052 | * callbacks in the case where it entered dyntick-idle mode with callbacks | ||
2053 | * pending. The hander doesn't really need to do anything because the | ||
2054 | * real work is done upon re-entry to idle, or by the next scheduling-clock | ||
2055 | * interrupt should idle not be re-entered. | ||
2056 | */ | ||
2057 | static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) | ||
2058 | { | ||
2059 | trace_rcu_prep_idle("Timer"); | ||
2060 | return HRTIMER_NORESTART; | ||
2061 | } | ||
2062 | |||
2063 | /* | ||
2064 | * Initialize the timer used to pull CPUs out of dyntick-idle mode. | ||
2065 | */ | ||
2066 | static void rcu_prepare_for_idle_init(int cpu) | ||
2067 | { | ||
2068 | static int firsttime = 1; | ||
2069 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2070 | |||
2071 | hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2072 | hrtp->function = rcu_idle_gp_timer_func; | ||
2073 | if (firsttime) { | ||
2074 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | ||
2075 | |||
2076 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2077 | firsttime = 0; | ||
2078 | } | ||
2079 | } | ||
2080 | |||
2081 | /* | ||
2082 | * Clean up for exit from idle. Because we are exiting from idle, there | ||
2083 | * is no longer any point to rcu_idle_gp_timer, so cancel it. This will | ||
2084 | * do nothing if this timer is not active, so just cancel it unconditionally. | ||
2085 | */ | ||
2086 | static void rcu_cleanup_after_idle(int cpu) | ||
2087 | { | ||
2088 | hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); | ||
2089 | } | ||
2090 | |||
2091 | /* | ||
2092 | * Check to see if any RCU-related work can be done by the current CPU, | ||
2093 | * and if so, schedule a softirq to get it done. This function is part | ||
2094 | * of the RCU implementation; it is -not- an exported member of the RCU API. | ||
1962 | * | 2095 | * |
1963 | * Because we are not supporting preemptible RCU, attempt to accelerate | 2096 | * The idea is for the current CPU to clear out all work required by the |
1964 | * any current grace periods so that RCU no longer needs this CPU, but | 2097 | * RCU core for the current grace period, so that this CPU can be permitted |
1965 | * only if all other CPUs are already in dynticks-idle mode. This will | 2098 | * to enter dyntick-idle mode. In some cases, it will need to be awakened |
1966 | * allow the CPU cores to be powered down immediately, as opposed to after | 2099 | * at the end of the grace period by whatever CPU ends the grace period. |
1967 | * waiting many milliseconds for grace periods to elapse. | 2100 | * This allows CPUs to go dyntick-idle more quickly, and to reduce the |
2101 | * number of wakeups by a modest integer factor. | ||
1968 | * | 2102 | * |
1969 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 2103 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
1970 | * disabled, we do one pass of force_quiescent_state(), then do a | 2104 | * disabled, we do one pass of force_quiescent_state(), then do a |
1971 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | 2105 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
1972 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | 2106 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. |
2107 | * | ||
2108 | * The caller must have disabled interrupts. | ||
1973 | */ | 2109 | */ |
1974 | int rcu_needs_cpu(int cpu) | 2110 | static void rcu_prepare_for_idle(int cpu) |
1975 | { | 2111 | { |
1976 | int c = 0; | 2112 | unsigned long flags; |
1977 | int snap; | 2113 | |
1978 | int thatcpu; | 2114 | local_irq_save(flags); |
1979 | 2115 | ||
1980 | /* Check for being in the holdoff period. */ | 2116 | /* |
1981 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) | 2117 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
1982 | return rcu_needs_cpu_quick_check(cpu); | 2118 | * Also reset state to avoid prejudicing later attempts. |
1983 | 2119 | */ | |
1984 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | 2120 | if (!rcu_cpu_has_callbacks(cpu)) { |
1985 | for_each_online_cpu(thatcpu) { | 2121 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
1986 | if (thatcpu == cpu) | 2122 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
1987 | continue; | 2123 | local_irq_restore(flags); |
1988 | snap = atomic_add_return(0, &per_cpu(rcu_dynticks, | 2124 | trace_rcu_prep_idle("No callbacks"); |
1989 | thatcpu).dynticks); | 2125 | return; |
1990 | smp_mb(); /* Order sampling of snap with end of grace period. */ | 2126 | } |
1991 | if ((snap & 0x1) != 0) { | 2127 | |
1992 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2128 | /* |
1993 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2129 | * If in holdoff mode, just return. We will presumably have |
1994 | return rcu_needs_cpu_quick_check(cpu); | 2130 | * refrained from disabling the scheduling-clock tick. |
1995 | } | 2131 | */ |
2132 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | ||
2133 | local_irq_restore(flags); | ||
2134 | trace_rcu_prep_idle("In holdoff"); | ||
2135 | return; | ||
1996 | } | 2136 | } |
1997 | 2137 | ||
1998 | /* Check and update the rcu_dyntick_drain sequencing. */ | 2138 | /* Check and update the rcu_dyntick_drain sequencing. */ |
1999 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2139 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2000 | /* First time through, initialize the counter. */ | 2140 | /* First time through, initialize the counter. */ |
2001 | per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; | 2141 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; |
2142 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | ||
2143 | !rcu_pending(cpu)) { | ||
2144 | /* Can we go dyntick-idle despite still having callbacks? */ | ||
2145 | trace_rcu_prep_idle("Dyntick with callbacks"); | ||
2146 | per_cpu(rcu_dyntick_drain, cpu) = 0; | ||
2147 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | ||
2148 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | ||
2149 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | ||
2150 | return; /* Nothing more to do immediately. */ | ||
2002 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2151 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2003 | /* We have hit the limit, so time to give up. */ | 2152 | /* We have hit the limit, so time to give up. */ |
2004 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2153 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2005 | return rcu_needs_cpu_quick_check(cpu); | 2154 | local_irq_restore(flags); |
2155 | trace_rcu_prep_idle("Begin holdoff"); | ||
2156 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | ||
2157 | return; | ||
2006 | } | 2158 | } |
2007 | 2159 | ||
2008 | /* Do one step pushing remaining RCU callbacks through. */ | 2160 | /* |
2161 | * Do one step of pushing the remaining RCU callbacks through | ||
2162 | * the RCU core state machine. | ||
2163 | */ | ||
2164 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
2165 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | ||
2166 | local_irq_restore(flags); | ||
2167 | rcu_preempt_qs(cpu); | ||
2168 | force_quiescent_state(&rcu_preempt_state, 0); | ||
2169 | local_irq_save(flags); | ||
2170 | } | ||
2171 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
2009 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | 2172 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { |
2173 | local_irq_restore(flags); | ||
2010 | rcu_sched_qs(cpu); | 2174 | rcu_sched_qs(cpu); |
2011 | force_quiescent_state(&rcu_sched_state, 0); | 2175 | force_quiescent_state(&rcu_sched_state, 0); |
2012 | c = c || per_cpu(rcu_sched_data, cpu).nxtlist; | 2176 | local_irq_save(flags); |
2013 | } | 2177 | } |
2014 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | 2178 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { |
2179 | local_irq_restore(flags); | ||
2015 | rcu_bh_qs(cpu); | 2180 | rcu_bh_qs(cpu); |
2016 | force_quiescent_state(&rcu_bh_state, 0); | 2181 | force_quiescent_state(&rcu_bh_state, 0); |
2017 | c = c || per_cpu(rcu_bh_data, cpu).nxtlist; | 2182 | local_irq_save(flags); |
2018 | } | 2183 | } |
2019 | 2184 | ||
2020 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | 2185 | /* |
2021 | if (c) | 2186 | * If RCU callbacks are still pending, RCU still needs this CPU. |
2187 | * So try forcing the callbacks through the grace period. | ||
2188 | */ | ||
2189 | if (rcu_cpu_has_callbacks(cpu)) { | ||
2190 | local_irq_restore(flags); | ||
2191 | trace_rcu_prep_idle("More callbacks"); | ||
2022 | invoke_rcu_core(); | 2192 | invoke_rcu_core(); |
2023 | return c; | 2193 | } else { |
2194 | local_irq_restore(flags); | ||
2195 | trace_rcu_prep_idle("Callbacks drained"); | ||
2196 | } | ||
2024 | } | 2197 | } |
2025 | 2198 | ||
2026 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2199 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9feffa4c0695..654cfe67f0d1 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
67 | rdp->completed, rdp->gpnum, | 67 | rdp->completed, rdp->gpnum, |
68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | 68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
69 | rdp->qs_pending); | 69 | rdp->qs_pending); |
70 | #ifdef CONFIG_NO_HZ | 70 | seq_printf(m, " dt=%d/%llx/%d df=%lu", |
71 | seq_printf(m, " dt=%d/%d/%d df=%lu", | ||
72 | atomic_read(&rdp->dynticks->dynticks), | 71 | atomic_read(&rdp->dynticks->dynticks), |
73 | rdp->dynticks->dynticks_nesting, | 72 | rdp->dynticks->dynticks_nesting, |
74 | rdp->dynticks->dynticks_nmi_nesting, | 73 | rdp->dynticks->dynticks_nmi_nesting, |
75 | rdp->dynticks_fqs); | 74 | rdp->dynticks_fqs); |
76 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
77 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 75 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
78 | seq_printf(m, " ql=%ld qs=%c%c%c%c", | 76 | seq_printf(m, " ql=%ld qs=%c%c%c%c", |
79 | rdp->qlen, | 77 | rdp->qlen, |
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
141 | rdp->completed, rdp->gpnum, | 139 | rdp->completed, rdp->gpnum, |
142 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | 140 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
143 | rdp->qs_pending); | 141 | rdp->qs_pending); |
144 | #ifdef CONFIG_NO_HZ | 142 | seq_printf(m, ",%d,%llx,%d,%lu", |
145 | seq_printf(m, ",%d,%d,%d,%lu", | ||
146 | atomic_read(&rdp->dynticks->dynticks), | 143 | atomic_read(&rdp->dynticks->dynticks), |
147 | rdp->dynticks->dynticks_nesting, | 144 | rdp->dynticks->dynticks_nesting, |
148 | rdp->dynticks->dynticks_nmi_nesting, | 145 | rdp->dynticks->dynticks_nmi_nesting, |
149 | rdp->dynticks_fqs); | 146 | rdp->dynticks_fqs); |
150 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
151 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 147 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
152 | seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, | 148 | seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, |
153 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != | 149 | ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != |
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
171 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 167 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
172 | { | 168 | { |
173 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); | 169 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
174 | #ifdef CONFIG_NO_HZ | ||
175 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
176 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
177 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); | 171 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); |
178 | #ifdef CONFIG_RCU_BOOST | 172 | #ifdef CONFIG_RCU_BOOST |
179 | seq_puts(m, "\"kt\",\"ktl\""); | 173 | seq_puts(m, "\"kt\",\"ktl\""); |
@@ -278,7 +272,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
278 | gpnum = rsp->gpnum; | 272 | gpnum = rsp->gpnum; |
279 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 273 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
280 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", | 274 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", |
281 | rsp->completed, gpnum, rsp->signaled, | 275 | rsp->completed, gpnum, rsp->fqs_state, |
282 | (long)(rsp->jiffies_force_qs - jiffies), | 276 | (long)(rsp->jiffies_force_qs - jiffies), |
283 | (int)(jiffies & 0xffff), | 277 | (int)(jiffies & 0xffff), |
284 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 278 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index f9d8482dd487..a242e691c993 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -579,7 +579,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
579 | struct rt_mutex_waiter *waiter) | 579 | struct rt_mutex_waiter *waiter) |
580 | { | 580 | { |
581 | int ret = 0; | 581 | int ret = 0; |
582 | int was_disabled; | ||
583 | 582 | ||
584 | for (;;) { | 583 | for (;;) { |
585 | /* Try to acquire the lock: */ | 584 | /* Try to acquire the lock: */ |
@@ -602,17 +601,10 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
602 | 601 | ||
603 | raw_spin_unlock(&lock->wait_lock); | 602 | raw_spin_unlock(&lock->wait_lock); |
604 | 603 | ||
605 | was_disabled = irqs_disabled(); | ||
606 | if (was_disabled) | ||
607 | local_irq_enable(); | ||
608 | |||
609 | debug_rt_mutex_print_deadlock(waiter); | 604 | debug_rt_mutex_print_deadlock(waiter); |
610 | 605 | ||
611 | schedule_rt_mutex(lock); | 606 | schedule_rt_mutex(lock); |
612 | 607 | ||
613 | if (was_disabled) | ||
614 | local_irq_disable(); | ||
615 | |||
616 | raw_spin_lock(&lock->wait_lock); | 608 | raw_spin_lock(&lock->wait_lock); |
617 | set_current_state(state); | 609 | set_current_state(state); |
618 | } | 610 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 2c71d91efff0..4eb3a0fa351e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -347,12 +347,12 @@ void irq_exit(void) | |||
347 | if (!in_interrupt() && local_softirq_pending()) | 347 | if (!in_interrupt() && local_softirq_pending()) |
348 | invoke_softirq(); | 348 | invoke_softirq(); |
349 | 349 | ||
350 | rcu_irq_exit(); | ||
351 | #ifdef CONFIG_NO_HZ | 350 | #ifdef CONFIG_NO_HZ |
352 | /* Make sure that timer wheel updates are propagated */ | 351 | /* Make sure that timer wheel updates are propagated */ |
353 | if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) | 352 | if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) |
354 | tick_nohz_stop_sched_tick(0); | 353 | tick_nohz_irq_exit(); |
355 | #endif | 354 | #endif |
355 | rcu_irq_exit(); | ||
356 | preempt_enable_no_resched(); | 356 | preempt_enable_no_resched(); |
357 | } | 357 | } |
358 | 358 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 40420644d0ba..0ec8b832ab6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
275 | } | 275 | } |
276 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 276 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
277 | 277 | ||
278 | /** | 278 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) |
279 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | ||
280 | * | ||
281 | * When the next event is more than a tick into the future, stop the idle tick | ||
282 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
283 | * just interrupted by an interrupt which did not cause a reschedule. | ||
284 | */ | ||
285 | void tick_nohz_stop_sched_tick(int inidle) | ||
286 | { | 279 | { |
287 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | 280 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
288 | struct tick_sched *ts; | ||
289 | ktime_t last_update, expires, now; | 281 | ktime_t last_update, expires, now; |
290 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 282 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
291 | u64 time_delta; | 283 | u64 time_delta; |
292 | int cpu; | 284 | int cpu; |
293 | 285 | ||
294 | local_irq_save(flags); | ||
295 | |||
296 | cpu = smp_processor_id(); | 286 | cpu = smp_processor_id(); |
297 | ts = &per_cpu(tick_cpu_sched, cpu); | 287 | ts = &per_cpu(tick_cpu_sched, cpu); |
298 | 288 | ||
299 | /* | ||
300 | * Call to tick_nohz_start_idle stops the last_update_time from being | ||
301 | * updated. Thus, it must not be called in the event we are called from | ||
302 | * irq_exit() with the prior state different than idle. | ||
303 | */ | ||
304 | if (!inidle && !ts->inidle) | ||
305 | goto end; | ||
306 | |||
307 | /* | ||
308 | * Set ts->inidle unconditionally. Even if the system did not | ||
309 | * switch to NOHZ mode the cpu frequency governers rely on the | ||
310 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
311 | */ | ||
312 | ts->inidle = 1; | ||
313 | |||
314 | now = tick_nohz_start_idle(cpu, ts); | 289 | now = tick_nohz_start_idle(cpu, ts); |
315 | 290 | ||
316 | /* | 291 | /* |
@@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
326 | } | 301 | } |
327 | 302 | ||
328 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 303 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
329 | goto end; | 304 | return; |
330 | 305 | ||
331 | if (need_resched()) | 306 | if (need_resched()) |
332 | goto end; | 307 | return; |
333 | 308 | ||
334 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | 309 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { |
335 | static int ratelimit; | 310 | static int ratelimit; |
@@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
339 | (unsigned int) local_softirq_pending()); | 314 | (unsigned int) local_softirq_pending()); |
340 | ratelimit++; | 315 | ratelimit++; |
341 | } | 316 | } |
342 | goto end; | 317 | return; |
343 | } | 318 | } |
344 | 319 | ||
345 | ts->idle_calls++; | 320 | ts->idle_calls++; |
@@ -434,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
434 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); | 409 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
435 | ts->tick_stopped = 1; | 410 | ts->tick_stopped = 1; |
436 | ts->idle_jiffies = last_jiffies; | 411 | ts->idle_jiffies = last_jiffies; |
437 | rcu_enter_nohz(); | ||
438 | } | 412 | } |
439 | 413 | ||
440 | ts->idle_sleeps++; | 414 | ts->idle_sleeps++; |
@@ -472,8 +446,56 @@ out: | |||
472 | ts->next_jiffies = next_jiffies; | 446 | ts->next_jiffies = next_jiffies; |
473 | ts->last_jiffies = last_jiffies; | 447 | ts->last_jiffies = last_jiffies; |
474 | ts->sleep_length = ktime_sub(dev->next_event, now); | 448 | ts->sleep_length = ktime_sub(dev->next_event, now); |
475 | end: | 449 | } |
476 | local_irq_restore(flags); | 450 | |
451 | /** | ||
452 | * tick_nohz_idle_enter - stop the idle tick from the idle task | ||
453 | * | ||
454 | * When the next event is more than a tick into the future, stop the idle tick | ||
455 | * Called when we start the idle loop. | ||
456 | * | ||
457 | * The arch is responsible of calling: | ||
458 | * | ||
459 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | ||
460 | * to sleep. | ||
461 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | ||
462 | */ | ||
463 | void tick_nohz_idle_enter(void) | ||
464 | { | ||
465 | struct tick_sched *ts; | ||
466 | |||
467 | WARN_ON_ONCE(irqs_disabled()); | ||
468 | |||
469 | local_irq_disable(); | ||
470 | |||
471 | ts = &__get_cpu_var(tick_cpu_sched); | ||
472 | /* | ||
473 | * set ts->inidle unconditionally. even if the system did not | ||
474 | * switch to nohz mode the cpu frequency governers rely on the | ||
475 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
476 | */ | ||
477 | ts->inidle = 1; | ||
478 | tick_nohz_stop_sched_tick(ts); | ||
479 | |||
480 | local_irq_enable(); | ||
481 | } | ||
482 | |||
483 | /** | ||
484 | * tick_nohz_irq_exit - update next tick event from interrupt exit | ||
485 | * | ||
486 | * When an interrupt fires while we are idle and it doesn't cause | ||
487 | * a reschedule, it may still add, modify or delete a timer, enqueue | ||
488 | * an RCU callback, etc... | ||
489 | * So we need to re-calculate and reprogram the next tick event. | ||
490 | */ | ||
491 | void tick_nohz_irq_exit(void) | ||
492 | { | ||
493 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
494 | |||
495 | if (!ts->inidle) | ||
496 | return; | ||
497 | |||
498 | tick_nohz_stop_sched_tick(ts); | ||
477 | } | 499 | } |
478 | 500 | ||
479 | /** | 501 | /** |
@@ -515,11 +537,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
515 | } | 537 | } |
516 | 538 | ||
517 | /** | 539 | /** |
518 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | 540 | * tick_nohz_idle_exit - restart the idle tick from the idle task |
519 | * | 541 | * |
520 | * Restart the idle tick when the CPU is woken up from idle | 542 | * Restart the idle tick when the CPU is woken up from idle |
543 | * This also exit the RCU extended quiescent state. The CPU | ||
544 | * can use RCU again after this function is called. | ||
521 | */ | 545 | */ |
522 | void tick_nohz_restart_sched_tick(void) | 546 | void tick_nohz_idle_exit(void) |
523 | { | 547 | { |
524 | int cpu = smp_processor_id(); | 548 | int cpu = smp_processor_id(); |
525 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 549 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
@@ -529,6 +553,7 @@ void tick_nohz_restart_sched_tick(void) | |||
529 | ktime_t now; | 553 | ktime_t now; |
530 | 554 | ||
531 | local_irq_disable(); | 555 | local_irq_disable(); |
556 | |||
532 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) | 557 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
533 | now = ktime_get(); | 558 | now = ktime_get(); |
534 | 559 | ||
@@ -543,8 +568,6 @@ void tick_nohz_restart_sched_tick(void) | |||
543 | 568 | ||
544 | ts->inidle = 0; | 569 | ts->inidle = 0; |
545 | 570 | ||
546 | rcu_exit_nohz(); | ||
547 | |||
548 | /* Update jiffies first */ | 571 | /* Update jiffies first */ |
549 | select_nohz_load_balancer(0); | 572 | select_nohz_load_balancer(0); |
550 | tick_do_update_jiffies64(now); | 573 | tick_do_update_jiffies64(now); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f2bd275bb60f..a043d224adf6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -4775,6 +4775,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) | |||
4775 | { | 4775 | { |
4776 | __ftrace_dump(true, oops_dump_mode); | 4776 | __ftrace_dump(true, oops_dump_mode); |
4777 | } | 4777 | } |
4778 | EXPORT_SYMBOL_GPL(ftrace_dump); | ||
4778 | 4779 | ||
4779 | __init static int tracer_alloc_buffers(void) | 4780 | __init static int tracer_alloc_buffers(void) |
4780 | { | 4781 | { |