aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/tick-sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/tick-sched.c')
-rw-r--r--kernel/time/tick-sched.c296
1 files changed, 280 insertions, 16 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 225f8bf19095..bc67d4245e1d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -21,11 +21,15 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h>
25#include <linux/perf_event.h>
24 26
25#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
26 28
27#include "tick-internal.h" 29#include "tick-internal.h"
28 30
31#include <trace/events/timer.h>
32
29/* 33/*
30 * Per cpu nohz control structure 34 * Per cpu nohz control structure
31 */ 35 */
@@ -104,7 +108,7 @@ static void tick_sched_do_timer(ktime_t now)
104{ 108{
105 int cpu = smp_processor_id(); 109 int cpu = smp_processor_id();
106 110
107#ifdef CONFIG_NO_HZ 111#ifdef CONFIG_NO_HZ_COMMON
108 /* 112 /*
109 * Check if the do_timer duty was dropped. We don't care about 113 * Check if the do_timer duty was dropped. We don't care about
110 * concurrency: This happens only when the cpu in charge went 114 * concurrency: This happens only when the cpu in charge went
@@ -112,7 +116,8 @@ static void tick_sched_do_timer(ktime_t now)
112 * this duty, then the jiffies update is still serialized by 116 * this duty, then the jiffies update is still serialized by
113 * jiffies_lock. 117 * jiffies_lock.
114 */ 118 */
115 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) 119 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
120 && !tick_nohz_full_cpu(cpu))
116 tick_do_timer_cpu = cpu; 121 tick_do_timer_cpu = cpu;
117#endif 122#endif
118 123
@@ -123,7 +128,7 @@ static void tick_sched_do_timer(ktime_t now)
123 128
124static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) 129static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
125{ 130{
126#ifdef CONFIG_NO_HZ 131#ifdef CONFIG_NO_HZ_COMMON
127 /* 132 /*
128 * When we are idle and the tick is stopped, we have to touch 133 * When we are idle and the tick is stopped, we have to touch
129 * the watchdog as we might not schedule for a really long 134 * the watchdog as we might not schedule for a really long
@@ -142,10 +147,226 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
142 profile_tick(CPU_PROFILING); 147 profile_tick(CPU_PROFILING);
143} 148}
144 149
150#ifdef CONFIG_NO_HZ_FULL
151static cpumask_var_t nohz_full_mask;
152bool have_nohz_full_mask;
153
154static bool can_stop_full_tick(void)
155{
156 WARN_ON_ONCE(!irqs_disabled());
157
158 if (!sched_can_stop_tick()) {
159 trace_tick_stop(0, "more than 1 task in runqueue\n");
160 return false;
161 }
162
163 if (!posix_cpu_timers_can_stop_tick(current)) {
164 trace_tick_stop(0, "posix timers running\n");
165 return false;
166 }
167
168 if (!perf_event_can_stop_tick()) {
169 trace_tick_stop(0, "perf events running\n");
170 return false;
171 }
172
173 /* sched_clock_tick() needs us? */
174#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
175 /*
176 * TODO: kick full dynticks CPUs when
177 * sched_clock_stable is set.
178 */
179 if (!sched_clock_stable) {
180 trace_tick_stop(0, "unstable sched clock\n");
181 return false;
182 }
183#endif
184
185 return true;
186}
187
188static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
189
190/*
191 * Re-evaluate the need for the tick on the current CPU
192 * and restart it if necessary.
193 */
194void tick_nohz_full_check(void)
195{
196 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
197
198 if (tick_nohz_full_cpu(smp_processor_id())) {
199 if (ts->tick_stopped && !is_idle_task(current)) {
200 if (!can_stop_full_tick())
201 tick_nohz_restart_sched_tick(ts, ktime_get());
202 }
203 }
204}
205
206static void nohz_full_kick_work_func(struct irq_work *work)
207{
208 tick_nohz_full_check();
209}
210
211static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
212 .func = nohz_full_kick_work_func,
213};
214
215/*
216 * Kick the current CPU if it's full dynticks in order to force it to
217 * re-evaluate its dependency on the tick and restart it if necessary.
218 */
219void tick_nohz_full_kick(void)
220{
221 if (tick_nohz_full_cpu(smp_processor_id()))
222 irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
223}
224
225static void nohz_full_kick_ipi(void *info)
226{
227 tick_nohz_full_check();
228}
229
230/*
231 * Kick all full dynticks CPUs in order to force these to re-evaluate
232 * their dependency on the tick and restart it if necessary.
233 */
234void tick_nohz_full_kick_all(void)
235{
236 if (!have_nohz_full_mask)
237 return;
238
239 preempt_disable();
240 smp_call_function_many(nohz_full_mask,
241 nohz_full_kick_ipi, NULL, false);
242 preempt_enable();
243}
244
245/*
246 * Re-evaluate the need for the tick as we switch the current task.
247 * It might need the tick due to per task/process properties:
248 * perf events, posix cpu timers, ...
249 */
250void tick_nohz_task_switch(struct task_struct *tsk)
251{
252 unsigned long flags;
253
254 local_irq_save(flags);
255
256 if (!tick_nohz_full_cpu(smp_processor_id()))
257 goto out;
258
259 if (tick_nohz_tick_stopped() && !can_stop_full_tick())
260 tick_nohz_full_kick();
261
262out:
263 local_irq_restore(flags);
264}
265
266int tick_nohz_full_cpu(int cpu)
267{
268 if (!have_nohz_full_mask)
269 return 0;
270
271 return cpumask_test_cpu(cpu, nohz_full_mask);
272}
273
274/* Parse the boot-time nohz CPU list from the kernel parameters. */
275static int __init tick_nohz_full_setup(char *str)
276{
277 int cpu;
278
279 alloc_bootmem_cpumask_var(&nohz_full_mask);
280 if (cpulist_parse(str, nohz_full_mask) < 0) {
281 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
282 return 1;
283 }
284
285 cpu = smp_processor_id();
286 if (cpumask_test_cpu(cpu, nohz_full_mask)) {
287 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
288 cpumask_clear_cpu(cpu, nohz_full_mask);
289 }
290 have_nohz_full_mask = true;
291
292 return 1;
293}
294__setup("nohz_full=", tick_nohz_full_setup);
295
296static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
297 unsigned long action,
298 void *hcpu)
299{
300 unsigned int cpu = (unsigned long)hcpu;
301
302 switch (action & ~CPU_TASKS_FROZEN) {
303 case CPU_DOWN_PREPARE:
304 /*
305 * If we handle the timekeeping duty for full dynticks CPUs,
306 * we can't safely shutdown that CPU.
307 */
308 if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
309 return -EINVAL;
310 break;
311 }
312 return NOTIFY_OK;
313}
314
315/*
316 * Worst case string length in chunks of CPU range seems 2 steps
317 * separations: 0,2,4,6,...
318 * This is NR_CPUS + sizeof('\0')
319 */
320static char __initdata nohz_full_buf[NR_CPUS + 1];
321
322static int tick_nohz_init_all(void)
323{
324 int err = -1;
325
326#ifdef CONFIG_NO_HZ_FULL_ALL
327 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
328 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
329 return err;
330 }
331 err = 0;
332 cpumask_setall(nohz_full_mask);
333 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
334 have_nohz_full_mask = true;
335#endif
336 return err;
337}
338
339void __init tick_nohz_init(void)
340{
341 int cpu;
342
343 if (!have_nohz_full_mask) {
344 if (tick_nohz_init_all() < 0)
345 return;
346 }
347
348 cpu_notifier(tick_nohz_cpu_down_callback, 0);
349
350 /* Make sure full dynticks CPU are also RCU nocbs */
351 for_each_cpu(cpu, nohz_full_mask) {
352 if (!rcu_is_nocb_cpu(cpu)) {
353 pr_warning("NO_HZ: CPU %d is not RCU nocb: "
354 "cleared from nohz_full range", cpu);
355 cpumask_clear_cpu(cpu, nohz_full_mask);
356 }
357 }
358
359 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
360 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
361}
362#else
363#define have_nohz_full_mask (0)
364#endif
365
145/* 366/*
146 * NOHZ - aka dynamic tick functionality 367 * NOHZ - aka dynamic tick functionality
147 */ 368 */
148#ifdef CONFIG_NO_HZ 369#ifdef CONFIG_NO_HZ_COMMON
149/* 370/*
150 * NO HZ enabled ? 371 * NO HZ enabled ?
151 */ 372 */
@@ -345,11 +566,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
345 delta_jiffies = rcu_delta_jiffies; 566 delta_jiffies = rcu_delta_jiffies;
346 } 567 }
347 } 568 }
569
348 /* 570 /*
349 * Do not stop the tick, if we are only one off 571 * Do not stop the tick, if we are only one off (or less)
350 * or if the cpu is required for rcu 572 * or if the cpu is required for RCU:
351 */ 573 */
352 if (!ts->tick_stopped && delta_jiffies == 1) 574 if (!ts->tick_stopped && delta_jiffies <= 1)
353 goto out; 575 goto out;
354 576
355 /* Schedule the tick, if we are at least one jiffie off */ 577 /* Schedule the tick, if we are at least one jiffie off */
@@ -378,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
378 time_delta = KTIME_MAX; 600 time_delta = KTIME_MAX;
379 } 601 }
380 602
603#ifdef CONFIG_NO_HZ_FULL
604 if (!ts->inidle) {
605 time_delta = min(time_delta,
606 scheduler_tick_max_deferment());
607 }
608#endif
609
381 /* 610 /*
382 * calculate the expiry time for the next timer wheel 611 * calculate the expiry time for the next timer wheel
383 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals 612 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
@@ -421,6 +650,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
421 650
422 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 651 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
423 ts->tick_stopped = 1; 652 ts->tick_stopped = 1;
653 trace_tick_stop(1, " ");
424 } 654 }
425 655
426 /* 656 /*
@@ -457,6 +687,24 @@ out:
457 return ret; 687 return ret;
458} 688}
459 689
690static void tick_nohz_full_stop_tick(struct tick_sched *ts)
691{
692#ifdef CONFIG_NO_HZ_FULL
693 int cpu = smp_processor_id();
694
695 if (!tick_nohz_full_cpu(cpu) || is_idle_task(current))
696 return;
697
698 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
699 return;
700
701 if (!can_stop_full_tick())
702 return;
703
704 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
705#endif
706}
707
460static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) 708static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
461{ 709{
462 /* 710 /*
@@ -489,6 +737,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
489 return false; 737 return false;
490 } 738 }
491 739
740 if (have_nohz_full_mask) {
741 /*
742 * Keep the tick alive to guarantee timekeeping progression
743 * if there are full dynticks CPUs around
744 */
745 if (tick_do_timer_cpu == cpu)
746 return false;
747 /*
748 * Boot safety: make sure the timekeeping duty has been
749 * assigned before entering dyntick-idle mode,
750 */
751 if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
752 return false;
753 }
754
492 return true; 755 return true;
493} 756}
494 757
@@ -568,12 +831,13 @@ void tick_nohz_irq_exit(void)
568{ 831{
569 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 832 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
570 833
571 if (!ts->inidle) 834 if (ts->inidle) {
572 return; 835 /* Cancel the timer because CPU already waken up from the C-states*/
573 836 menu_hrtimer_cancel();
574 /* Cancel the timer because CPU already waken up from the C-states*/ 837 __tick_nohz_idle_enter(ts);
575 menu_hrtimer_cancel(); 838 } else {
576 __tick_nohz_idle_enter(ts); 839 tick_nohz_full_stop_tick(ts);
840 }
577} 841}
578 842
579/** 843/**
@@ -802,7 +1066,7 @@ static inline void tick_check_nohz(int cpu)
802static inline void tick_nohz_switch_to_nohz(void) { } 1066static inline void tick_nohz_switch_to_nohz(void) { }
803static inline void tick_check_nohz(int cpu) { } 1067static inline void tick_check_nohz(int cpu) { }
804 1068
805#endif /* NO_HZ */ 1069#endif /* CONFIG_NO_HZ_COMMON */
806 1070
807/* 1071/*
808 * Called from irq_enter to notify about the possible interruption of idle() 1072 * Called from irq_enter to notify about the possible interruption of idle()
@@ -887,14 +1151,14 @@ void tick_setup_sched_timer(void)
887 now = ktime_get(); 1151 now = ktime_get();
888 } 1152 }
889 1153
890#ifdef CONFIG_NO_HZ 1154#ifdef CONFIG_NO_HZ_COMMON
891 if (tick_nohz_enabled) 1155 if (tick_nohz_enabled)
892 ts->nohz_mode = NOHZ_MODE_HIGHRES; 1156 ts->nohz_mode = NOHZ_MODE_HIGHRES;
893#endif 1157#endif
894} 1158}
895#endif /* HIGH_RES_TIMERS */ 1159#endif /* HIGH_RES_TIMERS */
896 1160
897#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS 1161#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
898void tick_cancel_sched_timer(int cpu) 1162void tick_cancel_sched_timer(int cpu)
899{ 1163{
900 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 1164 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);