diff options
Diffstat (limited to 'kernel/time/tick-sched.c')
-rw-r--r-- | kernel/time/tick-sched.c | 296 |
1 files changed, 280 insertions, 16 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 225f8bf19095..bc67d4245e1d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -21,11 +21,15 @@ | |||
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | #include <linux/posix-timers.h> | ||
25 | #include <linux/perf_event.h> | ||
24 | 26 | ||
25 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
26 | 28 | ||
27 | #include "tick-internal.h" | 29 | #include "tick-internal.h" |
28 | 30 | ||
31 | #include <trace/events/timer.h> | ||
32 | |||
29 | /* | 33 | /* |
30 | * Per cpu nohz control structure | 34 | * Per cpu nohz control structure |
31 | */ | 35 | */ |
@@ -104,7 +108,7 @@ static void tick_sched_do_timer(ktime_t now) | |||
104 | { | 108 | { |
105 | int cpu = smp_processor_id(); | 109 | int cpu = smp_processor_id(); |
106 | 110 | ||
107 | #ifdef CONFIG_NO_HZ | 111 | #ifdef CONFIG_NO_HZ_COMMON |
108 | /* | 112 | /* |
109 | * Check if the do_timer duty was dropped. We don't care about | 113 | * Check if the do_timer duty was dropped. We don't care about |
110 | * concurrency: This happens only when the cpu in charge went | 114 | * concurrency: This happens only when the cpu in charge went |
@@ -112,7 +116,8 @@ static void tick_sched_do_timer(ktime_t now) | |||
112 | * this duty, then the jiffies update is still serialized by | 116 | * this duty, then the jiffies update is still serialized by |
113 | * jiffies_lock. | 117 | * jiffies_lock. |
114 | */ | 118 | */ |
115 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | 119 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
120 | && !tick_nohz_full_cpu(cpu)) | ||
116 | tick_do_timer_cpu = cpu; | 121 | tick_do_timer_cpu = cpu; |
117 | #endif | 122 | #endif |
118 | 123 | ||
@@ -123,7 +128,7 @@ static void tick_sched_do_timer(ktime_t now) | |||
123 | 128 | ||
124 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | 129 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) |
125 | { | 130 | { |
126 | #ifdef CONFIG_NO_HZ | 131 | #ifdef CONFIG_NO_HZ_COMMON |
127 | /* | 132 | /* |
128 | * When we are idle and the tick is stopped, we have to touch | 133 | * When we are idle and the tick is stopped, we have to touch |
129 | * the watchdog as we might not schedule for a really long | 134 | * the watchdog as we might not schedule for a really long |
@@ -142,10 +147,226 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
142 | profile_tick(CPU_PROFILING); | 147 | profile_tick(CPU_PROFILING); |
143 | } | 148 | } |
144 | 149 | ||
150 | #ifdef CONFIG_NO_HZ_FULL | ||
151 | static cpumask_var_t nohz_full_mask; | ||
152 | bool have_nohz_full_mask; | ||
153 | |||
154 | static bool can_stop_full_tick(void) | ||
155 | { | ||
156 | WARN_ON_ONCE(!irqs_disabled()); | ||
157 | |||
158 | if (!sched_can_stop_tick()) { | ||
159 | trace_tick_stop(0, "more than 1 task in runqueue\n"); | ||
160 | return false; | ||
161 | } | ||
162 | |||
163 | if (!posix_cpu_timers_can_stop_tick(current)) { | ||
164 | trace_tick_stop(0, "posix timers running\n"); | ||
165 | return false; | ||
166 | } | ||
167 | |||
168 | if (!perf_event_can_stop_tick()) { | ||
169 | trace_tick_stop(0, "perf events running\n"); | ||
170 | return false; | ||
171 | } | ||
172 | |||
173 | /* sched_clock_tick() needs us? */ | ||
174 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
175 | /* | ||
176 | * TODO: kick full dynticks CPUs when | ||
177 | * sched_clock_stable is set. | ||
178 | */ | ||
179 | if (!sched_clock_stable) { | ||
180 | trace_tick_stop(0, "unstable sched clock\n"); | ||
181 | return false; | ||
182 | } | ||
183 | #endif | ||
184 | |||
185 | return true; | ||
186 | } | ||
187 | |||
188 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | ||
189 | |||
190 | /* | ||
191 | * Re-evaluate the need for the tick on the current CPU | ||
192 | * and restart it if necessary. | ||
193 | */ | ||
194 | void tick_nohz_full_check(void) | ||
195 | { | ||
196 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
197 | |||
198 | if (tick_nohz_full_cpu(smp_processor_id())) { | ||
199 | if (ts->tick_stopped && !is_idle_task(current)) { | ||
200 | if (!can_stop_full_tick()) | ||
201 | tick_nohz_restart_sched_tick(ts, ktime_get()); | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | |||
206 | static void nohz_full_kick_work_func(struct irq_work *work) | ||
207 | { | ||
208 | tick_nohz_full_check(); | ||
209 | } | ||
210 | |||
211 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | ||
212 | .func = nohz_full_kick_work_func, | ||
213 | }; | ||
214 | |||
215 | /* | ||
216 | * Kick the current CPU if it's full dynticks in order to force it to | ||
217 | * re-evaluate its dependency on the tick and restart it if necessary. | ||
218 | */ | ||
219 | void tick_nohz_full_kick(void) | ||
220 | { | ||
221 | if (tick_nohz_full_cpu(smp_processor_id())) | ||
222 | irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); | ||
223 | } | ||
224 | |||
225 | static void nohz_full_kick_ipi(void *info) | ||
226 | { | ||
227 | tick_nohz_full_check(); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Kick all full dynticks CPUs in order to force these to re-evaluate | ||
232 | * their dependency on the tick and restart it if necessary. | ||
233 | */ | ||
234 | void tick_nohz_full_kick_all(void) | ||
235 | { | ||
236 | if (!have_nohz_full_mask) | ||
237 | return; | ||
238 | |||
239 | preempt_disable(); | ||
240 | smp_call_function_many(nohz_full_mask, | ||
241 | nohz_full_kick_ipi, NULL, false); | ||
242 | preempt_enable(); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Re-evaluate the need for the tick as we switch the current task. | ||
247 | * It might need the tick due to per task/process properties: | ||
248 | * perf events, posix cpu timers, ... | ||
249 | */ | ||
250 | void tick_nohz_task_switch(struct task_struct *tsk) | ||
251 | { | ||
252 | unsigned long flags; | ||
253 | |||
254 | local_irq_save(flags); | ||
255 | |||
256 | if (!tick_nohz_full_cpu(smp_processor_id())) | ||
257 | goto out; | ||
258 | |||
259 | if (tick_nohz_tick_stopped() && !can_stop_full_tick()) | ||
260 | tick_nohz_full_kick(); | ||
261 | |||
262 | out: | ||
263 | local_irq_restore(flags); | ||
264 | } | ||
265 | |||
266 | int tick_nohz_full_cpu(int cpu) | ||
267 | { | ||
268 | if (!have_nohz_full_mask) | ||
269 | return 0; | ||
270 | |||
271 | return cpumask_test_cpu(cpu, nohz_full_mask); | ||
272 | } | ||
273 | |||
274 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | ||
275 | static int __init tick_nohz_full_setup(char *str) | ||
276 | { | ||
277 | int cpu; | ||
278 | |||
279 | alloc_bootmem_cpumask_var(&nohz_full_mask); | ||
280 | if (cpulist_parse(str, nohz_full_mask) < 0) { | ||
281 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | ||
282 | return 1; | ||
283 | } | ||
284 | |||
285 | cpu = smp_processor_id(); | ||
286 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | ||
287 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | ||
288 | cpumask_clear_cpu(cpu, nohz_full_mask); | ||
289 | } | ||
290 | have_nohz_full_mask = true; | ||
291 | |||
292 | return 1; | ||
293 | } | ||
294 | __setup("nohz_full=", tick_nohz_full_setup); | ||
295 | |||
296 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | ||
297 | unsigned long action, | ||
298 | void *hcpu) | ||
299 | { | ||
300 | unsigned int cpu = (unsigned long)hcpu; | ||
301 | |||
302 | switch (action & ~CPU_TASKS_FROZEN) { | ||
303 | case CPU_DOWN_PREPARE: | ||
304 | /* | ||
305 | * If we handle the timekeeping duty for full dynticks CPUs, | ||
306 | * we can't safely shutdown that CPU. | ||
307 | */ | ||
308 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | ||
309 | return -EINVAL; | ||
310 | break; | ||
311 | } | ||
312 | return NOTIFY_OK; | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Worst case string length in chunks of CPU range seems 2 steps | ||
317 | * separations: 0,2,4,6,... | ||
318 | * This is NR_CPUS + sizeof('\0') | ||
319 | */ | ||
320 | static char __initdata nohz_full_buf[NR_CPUS + 1]; | ||
321 | |||
322 | static int tick_nohz_init_all(void) | ||
323 | { | ||
324 | int err = -1; | ||
325 | |||
326 | #ifdef CONFIG_NO_HZ_FULL_ALL | ||
327 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | ||
328 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | ||
329 | return err; | ||
330 | } | ||
331 | err = 0; | ||
332 | cpumask_setall(nohz_full_mask); | ||
333 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | ||
334 | have_nohz_full_mask = true; | ||
335 | #endif | ||
336 | return err; | ||
337 | } | ||
338 | |||
339 | void __init tick_nohz_init(void) | ||
340 | { | ||
341 | int cpu; | ||
342 | |||
343 | if (!have_nohz_full_mask) { | ||
344 | if (tick_nohz_init_all() < 0) | ||
345 | return; | ||
346 | } | ||
347 | |||
348 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | ||
349 | |||
350 | /* Make sure full dynticks CPU are also RCU nocbs */ | ||
351 | for_each_cpu(cpu, nohz_full_mask) { | ||
352 | if (!rcu_is_nocb_cpu(cpu)) { | ||
353 | pr_warning("NO_HZ: CPU %d is not RCU nocb: " | ||
354 | "cleared from nohz_full range", cpu); | ||
355 | cpumask_clear_cpu(cpu, nohz_full_mask); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | ||
360 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | ||
361 | } | ||
362 | #else | ||
363 | #define have_nohz_full_mask (0) | ||
364 | #endif | ||
365 | |||
145 | /* | 366 | /* |
146 | * NOHZ - aka dynamic tick functionality | 367 | * NOHZ - aka dynamic tick functionality |
147 | */ | 368 | */ |
148 | #ifdef CONFIG_NO_HZ | 369 | #ifdef CONFIG_NO_HZ_COMMON |
149 | /* | 370 | /* |
150 | * NO HZ enabled ? | 371 | * NO HZ enabled ? |
151 | */ | 372 | */ |
@@ -345,11 +566,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
345 | delta_jiffies = rcu_delta_jiffies; | 566 | delta_jiffies = rcu_delta_jiffies; |
346 | } | 567 | } |
347 | } | 568 | } |
569 | |||
348 | /* | 570 | /* |
349 | * Do not stop the tick, if we are only one off | 571 | * Do not stop the tick, if we are only one off (or less) |
350 | * or if the cpu is required for rcu | 572 | * or if the cpu is required for RCU: |
351 | */ | 573 | */ |
352 | if (!ts->tick_stopped && delta_jiffies == 1) | 574 | if (!ts->tick_stopped && delta_jiffies <= 1) |
353 | goto out; | 575 | goto out; |
354 | 576 | ||
355 | /* Schedule the tick, if we are at least one jiffie off */ | 577 | /* Schedule the tick, if we are at least one jiffie off */ |
@@ -378,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
378 | time_delta = KTIME_MAX; | 600 | time_delta = KTIME_MAX; |
379 | } | 601 | } |
380 | 602 | ||
603 | #ifdef CONFIG_NO_HZ_FULL | ||
604 | if (!ts->inidle) { | ||
605 | time_delta = min(time_delta, | ||
606 | scheduler_tick_max_deferment()); | ||
607 | } | ||
608 | #endif | ||
609 | |||
381 | /* | 610 | /* |
382 | * calculate the expiry time for the next timer wheel | 611 | * calculate the expiry time for the next timer wheel |
383 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | 612 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
@@ -421,6 +650,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
421 | 650 | ||
422 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 651 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
423 | ts->tick_stopped = 1; | 652 | ts->tick_stopped = 1; |
653 | trace_tick_stop(1, " "); | ||
424 | } | 654 | } |
425 | 655 | ||
426 | /* | 656 | /* |
@@ -457,6 +687,24 @@ out: | |||
457 | return ret; | 687 | return ret; |
458 | } | 688 | } |
459 | 689 | ||
690 | static void tick_nohz_full_stop_tick(struct tick_sched *ts) | ||
691 | { | ||
692 | #ifdef CONFIG_NO_HZ_FULL | ||
693 | int cpu = smp_processor_id(); | ||
694 | |||
695 | if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) | ||
696 | return; | ||
697 | |||
698 | if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) | ||
699 | return; | ||
700 | |||
701 | if (!can_stop_full_tick()) | ||
702 | return; | ||
703 | |||
704 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); | ||
705 | #endif | ||
706 | } | ||
707 | |||
460 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | 708 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) |
461 | { | 709 | { |
462 | /* | 710 | /* |
@@ -489,6 +737,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
489 | return false; | 737 | return false; |
490 | } | 738 | } |
491 | 739 | ||
740 | if (have_nohz_full_mask) { | ||
741 | /* | ||
742 | * Keep the tick alive to guarantee timekeeping progression | ||
743 | * if there are full dynticks CPUs around | ||
744 | */ | ||
745 | if (tick_do_timer_cpu == cpu) | ||
746 | return false; | ||
747 | /* | ||
748 | * Boot safety: make sure the timekeeping duty has been | ||
749 | * assigned before entering dyntick-idle mode, | ||
750 | */ | ||
751 | if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) | ||
752 | return false; | ||
753 | } | ||
754 | |||
492 | return true; | 755 | return true; |
493 | } | 756 | } |
494 | 757 | ||
@@ -568,12 +831,13 @@ void tick_nohz_irq_exit(void) | |||
568 | { | 831 | { |
569 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 832 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
570 | 833 | ||
571 | if (!ts->inidle) | 834 | if (ts->inidle) { |
572 | return; | 835 | /* Cancel the timer because CPU already waken up from the C-states*/ |
573 | 836 | menu_hrtimer_cancel(); | |
574 | /* Cancel the timer because CPU already waken up from the C-states*/ | 837 | __tick_nohz_idle_enter(ts); |
575 | menu_hrtimer_cancel(); | 838 | } else { |
576 | __tick_nohz_idle_enter(ts); | 839 | tick_nohz_full_stop_tick(ts); |
840 | } | ||
577 | } | 841 | } |
578 | 842 | ||
579 | /** | 843 | /** |
@@ -802,7 +1066,7 @@ static inline void tick_check_nohz(int cpu) | |||
802 | static inline void tick_nohz_switch_to_nohz(void) { } | 1066 | static inline void tick_nohz_switch_to_nohz(void) { } |
803 | static inline void tick_check_nohz(int cpu) { } | 1067 | static inline void tick_check_nohz(int cpu) { } |
804 | 1068 | ||
805 | #endif /* NO_HZ */ | 1069 | #endif /* CONFIG_NO_HZ_COMMON */ |
806 | 1070 | ||
807 | /* | 1071 | /* |
808 | * Called from irq_enter to notify about the possible interruption of idle() | 1072 | * Called from irq_enter to notify about the possible interruption of idle() |
@@ -887,14 +1151,14 @@ void tick_setup_sched_timer(void) | |||
887 | now = ktime_get(); | 1151 | now = ktime_get(); |
888 | } | 1152 | } |
889 | 1153 | ||
890 | #ifdef CONFIG_NO_HZ | 1154 | #ifdef CONFIG_NO_HZ_COMMON |
891 | if (tick_nohz_enabled) | 1155 | if (tick_nohz_enabled) |
892 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 1156 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
893 | #endif | 1157 | #endif |
894 | } | 1158 | } |
895 | #endif /* HIGH_RES_TIMERS */ | 1159 | #endif /* HIGH_RES_TIMERS */ |
896 | 1160 | ||
897 | #if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS | 1161 | #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS |
898 | void tick_cancel_sched_timer(int cpu) | 1162 | void tick_cancel_sched_timer(int cpu) |
899 | { | 1163 | { |
900 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 1164 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |