aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/tick-sched.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-02-16 04:28:03 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:59 -0500
commit79bf2bb335b85db25d27421c798595a2fa2a0e82 (patch)
tree550ec2654ae1dd65b871de7fe9c890108c6e86d8 /kernel/time/tick-sched.c
parentf8381cba04ba8173fd5a2b8e5cd8b3290ee13a98 (diff)
[PATCH] tick-management: dyntick / highres functionality
With Ingo Molnar <mingo@elte.hu> Add functions to provide dynamic ticks and high resolution timers. The code which keeps track of jiffies and handles the long idle periods is shared between tick based and high resolution timer based dynticks. The dyntick functionality can be disabled on the kernel commandline. Provide also the infrastructure to support high resolution timers. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/time/tick-sched.c')
-rw-r--r--kernel/time/tick-sched.c558
1 files changed, 558 insertions, 0 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
new file mode 100644
index 000000000000..99d35e2af182
--- /dev/null
+++ b/kernel/time/tick-sched.c
@@ -0,0 +1,558 @@
1/*
2 * linux/kernel/time/tick-sched.c
3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 *
8 * No idle tick implementation for low and high resolution timers
9 *
10 * Started by: Thomas Gleixner and Ingo Molnar
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/kernel_stat.h>
19#include <linux/percpu.h>
20#include <linux/profile.h>
21#include <linux/sched.h>
22#include <linux/tick.h>
23
24#include "tick-internal.h"
25
26/*
27 * Per cpu nohz control structure
28 */
29static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
30
31/*
32 * The time, when the last jiffy update happened. Protected by xtime_lock.
33 */
34static ktime_t last_jiffies_update;
35
36/*
37 * Must be called with interrupts disabled !
38 */
39static void tick_do_update_jiffies64(ktime_t now)
40{
41 unsigned long ticks = 0;
42 ktime_t delta;
43
44 /* Reevalute with xtime_lock held */
45 write_seqlock(&xtime_lock);
46
47 delta = ktime_sub(now, last_jiffies_update);
48 if (delta.tv64 >= tick_period.tv64) {
49
50 delta = ktime_sub(delta, tick_period);
51 last_jiffies_update = ktime_add(last_jiffies_update,
52 tick_period);
53
54 /* Slow path for long timeouts */
55 if (unlikely(delta.tv64 >= tick_period.tv64)) {
56 s64 incr = ktime_to_ns(tick_period);
57
58 ticks = ktime_divns(delta, incr);
59
60 last_jiffies_update = ktime_add_ns(last_jiffies_update,
61 incr * ticks);
62 }
63 do_timer(++ticks);
64 }
65 write_sequnlock(&xtime_lock);
66}
67
68/*
69 * Initialize and return retrieve the jiffies update.
70 */
71static ktime_t tick_init_jiffy_update(void)
72{
73 ktime_t period;
74
75 write_seqlock(&xtime_lock);
76 /* Did we start the jiffies update yet ? */
77 if (last_jiffies_update.tv64 == 0)
78 last_jiffies_update = tick_next_period;
79 period = last_jiffies_update;
80 write_sequnlock(&xtime_lock);
81 return period;
82}
83
84/*
85 * NOHZ - aka dynamic tick functionality
86 */
87#ifdef CONFIG_NO_HZ
88/*
89 * NO HZ enabled ?
90 */
91static int tick_nohz_enabled __read_mostly = 1;
92
93/*
94 * Enable / Disable tickless mode
95 */
96static int __init setup_tick_nohz(char *str)
97{
98 if (!strcmp(str, "off"))
99 tick_nohz_enabled = 0;
100 else if (!strcmp(str, "on"))
101 tick_nohz_enabled = 1;
102 else
103 return 0;
104 return 1;
105}
106
107__setup("nohz=", setup_tick_nohz);
108
109/**
110 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
111 *
112 * Called from interrupt entry when the CPU was idle
113 *
114 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
115 * must be updated. Otherwise an interrupt handler could use a stale jiffy
116 * value. We do this unconditionally on any cpu, as we don't know whether the
117 * cpu, which has the update task assigned is in a long sleep.
118 */
119void tick_nohz_update_jiffies(void)
120{
121 int cpu = smp_processor_id();
122 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
123 unsigned long flags;
124 ktime_t now;
125
126 if (!ts->tick_stopped)
127 return;
128
129 cpu_clear(cpu, nohz_cpu_mask);
130 now = ktime_get();
131
132 local_irq_save(flags);
133 tick_do_update_jiffies64(now);
134 local_irq_restore(flags);
135}
136
137/**
138 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
139 *
140 * When the next event is more than a tick into the future, stop the idle tick
141 * Called either from the idle loop or from irq_exit() when an idle period was
142 * just interrupted by an interrupt which did not cause a reschedule.
143 */
144void tick_nohz_stop_sched_tick(void)
145{
146 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
147 struct tick_sched *ts;
148 ktime_t last_update, expires, now, delta;
149 int cpu;
150
151 local_irq_save(flags);
152
153 cpu = smp_processor_id();
154 ts = &per_cpu(tick_cpu_sched, cpu);
155
156 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
157 goto end;
158
159 if (need_resched())
160 goto end;
161
162 cpu = smp_processor_id();
163 BUG_ON(local_softirq_pending());
164
165 now = ktime_get();
166 /*
167 * When called from irq_exit we need to account the idle sleep time
168 * correctly.
169 */
170 if (ts->tick_stopped) {
171 delta = ktime_sub(now, ts->idle_entrytime);
172 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
173 }
174
175 ts->idle_entrytime = now;
176 ts->idle_calls++;
177
178 /* Read jiffies and the time when jiffies were updated last */
179 do {
180 seq = read_seqbegin(&xtime_lock);
181 last_update = last_jiffies_update;
182 last_jiffies = jiffies;
183 } while (read_seqretry(&xtime_lock, seq));
184
185 /* Get the next timer wheel timer */
186 next_jiffies = get_next_timer_interrupt(last_jiffies);
187 delta_jiffies = next_jiffies - last_jiffies;
188
189 /*
190 * Do not stop the tick, if we are only one off
191 * or if the cpu is required for rcu
192 */
193 if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu)))
194 goto out;
195
196 /* Schedule the tick, if we are at least one jiffie off */
197 if ((long)delta_jiffies >= 1) {
198
199 if (rcu_needs_cpu(cpu))
200 delta_jiffies = 1;
201 else
202 cpu_set(cpu, nohz_cpu_mask);
203 /*
204 * nohz_stop_sched_tick can be called several times before
205 * the nohz_restart_sched_tick is called. This happens when
206 * interrupts arrive which do not cause a reschedule. In the
207 * first call we save the current tick time, so we can restart
208 * the scheduler tick in nohz_restart_sched_tick.
209 */
210 if (!ts->tick_stopped) {
211 ts->idle_tick = ts->sched_timer.expires;
212 ts->tick_stopped = 1;
213 ts->idle_jiffies = last_jiffies;
214 }
215 /*
216 * calculate the expiry time for the next timer wheel
217 * timer
218 */
219 expires = ktime_add_ns(last_update, tick_period.tv64 *
220 delta_jiffies);
221 ts->idle_expires = expires;
222 ts->idle_sleeps++;
223
224 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
225 hrtimer_start(&ts->sched_timer, expires,
226 HRTIMER_MODE_ABS);
227 /* Check, if the timer was already in the past */
228 if (hrtimer_active(&ts->sched_timer))
229 goto out;
230 } else if(!tick_program_event(expires, 0))
231 goto out;
232 /*
233 * We are past the event already. So we crossed a
234 * jiffie boundary. Update jiffies and raise the
235 * softirq.
236 */
237 tick_do_update_jiffies64(ktime_get());
238 cpu_clear(cpu, nohz_cpu_mask);
239 }
240 raise_softirq_irqoff(TIMER_SOFTIRQ);
241out:
242 ts->next_jiffies = next_jiffies;
243 ts->last_jiffies = last_jiffies;
244end:
245 local_irq_restore(flags);
246}
247
248/**
249 * nohz_restart_sched_tick - restart the idle tick from the idle task
250 *
251 * Restart the idle tick when the CPU is woken up from idle
252 */
253void tick_nohz_restart_sched_tick(void)
254{
255 int cpu = smp_processor_id();
256 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
257 unsigned long ticks;
258 ktime_t now, delta;
259
260 if (!ts->tick_stopped)
261 return;
262
263 /* Update jiffies first */
264 now = ktime_get();
265
266 local_irq_disable();
267 tick_do_update_jiffies64(now);
268 cpu_clear(cpu, nohz_cpu_mask);
269
270 /* Account the idle time */
271 delta = ktime_sub(now, ts->idle_entrytime);
272 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
273
274 /*
275 * We stopped the tick in idle. Update process times would miss the
276 * time we slept as update_process_times does only a 1 tick
277 * accounting. Enforce that this is accounted to idle !
278 */
279 ticks = jiffies - ts->idle_jiffies;
280 /*
281 * We might be one off. Do not randomly account a huge number of ticks!
282 */
283 if (ticks && ticks < LONG_MAX) {
284 add_preempt_count(HARDIRQ_OFFSET);
285 account_system_time(current, HARDIRQ_OFFSET,
286 jiffies_to_cputime(ticks));
287 sub_preempt_count(HARDIRQ_OFFSET);
288 }
289
290 /*
291 * Cancel the scheduled timer and restore the tick
292 */
293 ts->tick_stopped = 0;
294 hrtimer_cancel(&ts->sched_timer);
295 ts->sched_timer.expires = ts->idle_tick;
296
297 while (1) {
298 /* Forward the time to expire in the future */
299 hrtimer_forward(&ts->sched_timer, now, tick_period);
300
301 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
302 hrtimer_start(&ts->sched_timer,
303 ts->sched_timer.expires,
304 HRTIMER_MODE_ABS);
305 /* Check, if the timer was already in the past */
306 if (hrtimer_active(&ts->sched_timer))
307 break;
308 } else {
309 if (!tick_program_event(ts->sched_timer.expires, 0))
310 break;
311 }
312 /* Update jiffies and reread time */
313 tick_do_update_jiffies64(now);
314 now = ktime_get();
315 }
316 local_irq_enable();
317}
318
319static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
320{
321 hrtimer_forward(&ts->sched_timer, now, tick_period);
322 return tick_program_event(ts->sched_timer.expires, 0);
323}
324
325/*
326 * The nohz low res interrupt handler
327 */
328static void tick_nohz_handler(struct clock_event_device *dev)
329{
330 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
331 struct pt_regs *regs = get_irq_regs();
332 ktime_t now = ktime_get();
333
334 dev->next_event.tv64 = KTIME_MAX;
335
336 /* Check, if the jiffies need an update */
337 tick_do_update_jiffies64(now);
338
339 /*
340 * When we are idle and the tick is stopped, we have to touch
341 * the watchdog as we might not schedule for a really long
342 * time. This happens on complete idle SMP systems while
343 * waiting on the login prompt. We also increment the "start
344 * of idle" jiffy stamp so the idle accounting adjustment we
345 * do when we go busy again does not account too much ticks.
346 */
347 if (ts->tick_stopped) {
348 touch_softlockup_watchdog();
349 ts->idle_jiffies++;
350 }
351
352 update_process_times(user_mode(regs));
353 profile_tick(CPU_PROFILING);
354
355 /* Do not restart, when we are in the idle loop */
356 if (ts->tick_stopped)
357 return;
358
359 while (tick_nohz_reprogram(ts, now)) {
360 now = ktime_get();
361 tick_do_update_jiffies64(now);
362 }
363}
364
365/**
366 * tick_nohz_switch_to_nohz - switch to nohz mode
367 */
368static void tick_nohz_switch_to_nohz(void)
369{
370 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
371 ktime_t next;
372
373 if (!tick_nohz_enabled)
374 return;
375
376 local_irq_disable();
377 if (tick_switch_to_oneshot(tick_nohz_handler)) {
378 local_irq_enable();
379 return;
380 }
381
382 ts->nohz_mode = NOHZ_MODE_LOWRES;
383
384 /*
385 * Recycle the hrtimer in ts, so we can share the
386 * hrtimer_forward with the highres code.
387 */
388 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
389 /* Get the next period */
390 next = tick_init_jiffy_update();
391
392 for (;;) {
393 ts->sched_timer.expires = next;
394 if (!tick_program_event(next, 0))
395 break;
396 next = ktime_add(next, tick_period);
397 }
398 local_irq_enable();
399
400 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n",
401 smp_processor_id());
402}
403
404#else
405
406static inline void tick_nohz_switch_to_nohz(void) { }
407
408#endif /* NO_HZ */
409
410/*
411 * High resolution timer specific code
412 */
413#ifdef CONFIG_HIGH_RES_TIMERS
414/*
415 * We rearm the timer until we get disabled by the idle code
416 * Called with interrupts disabled and timer->base->cpu_base->lock held.
417 */
418static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
419{
420 struct tick_sched *ts =
421 container_of(timer, struct tick_sched, sched_timer);
422 struct hrtimer_cpu_base *base = timer->base->cpu_base;
423 struct pt_regs *regs = get_irq_regs();
424 ktime_t now = ktime_get();
425
426 /* Check, if the jiffies need an update */
427 tick_do_update_jiffies64(now);
428
429 /*
430 * Do not call, when we are not in irq context and have
431 * no valid regs pointer
432 */
433 if (regs) {
434 /*
435 * When we are idle and the tick is stopped, we have to touch
436 * the watchdog as we might not schedule for a really long
437 * time. This happens on complete idle SMP systems while
438 * waiting on the login prompt. We also increment the "start of
439 * idle" jiffy stamp so the idle accounting adjustment we do
440 * when we go busy again does not account too much ticks.
441 */
442 if (ts->tick_stopped) {
443 touch_softlockup_watchdog();
444 ts->idle_jiffies++;
445 }
446 /*
447 * update_process_times() might take tasklist_lock, hence
448 * drop the base lock. sched-tick hrtimers are per-CPU and
449 * never accessible by userspace APIs, so this is safe to do.
450 */
451 spin_unlock(&base->lock);
452 update_process_times(user_mode(regs));
453 profile_tick(CPU_PROFILING);
454 spin_lock(&base->lock);
455 }
456
457 /* Do not restart, when we are in the idle loop */
458 if (ts->tick_stopped)
459 return HRTIMER_NORESTART;
460
461 hrtimer_forward(timer, now, tick_period);
462
463 return HRTIMER_RESTART;
464}
465
466/**
467 * tick_setup_sched_timer - setup the tick emulation timer
468 */
469void tick_setup_sched_timer(void)
470{
471 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
472 ktime_t now = ktime_get();
473
474 /*
475 * Emulate tick processing via per-CPU hrtimers:
476 */
477 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
478 ts->sched_timer.function = tick_sched_timer;
479 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
480
481 /* Get the next period */
482 ts->sched_timer.expires = tick_init_jiffy_update();
483
484 for (;;) {
485 hrtimer_forward(&ts->sched_timer, now, tick_period);
486 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
487 HRTIMER_MODE_ABS);
488 /* Check, if the timer was already in the past */
489 if (hrtimer_active(&ts->sched_timer))
490 break;
491 now = ktime_get();
492 }
493
494#ifdef CONFIG_NO_HZ
495 if (tick_nohz_enabled)
496 ts->nohz_mode = NOHZ_MODE_HIGHRES;
497#endif
498}
499
500void tick_cancel_sched_timer(int cpu)
501{
502 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
503
504 if (ts->sched_timer.base)
505 hrtimer_cancel(&ts->sched_timer);
506 ts->tick_stopped = 0;
507 ts->nohz_mode = NOHZ_MODE_INACTIVE;
508}
509#endif /* HIGH_RES_TIMERS */
510
511/**
512 * Async notification about clocksource changes
513 */
514void tick_clock_notify(void)
515{
516 int cpu;
517
518 for_each_possible_cpu(cpu)
519 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
520}
521
522/*
523 * Async notification about clock event changes
524 */
525void tick_oneshot_notify(void)
526{
527 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
528
529 set_bit(0, &ts->check_clocks);
530}
531
532/**
533 * Check, if a change happened, which makes oneshot possible.
534 *
535 * Called cyclic from the hrtimer softirq (driven by the timer
536 * softirq) allow_nohz signals, that we can switch into low-res nohz
537 * mode, because high resolution timers are disabled (either compile
538 * or runtime).
539 */
540int tick_check_oneshot_change(int allow_nohz)
541{
542 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
543
544 if (!test_and_clear_bit(0, &ts->check_clocks))
545 return 0;
546
547 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
548 return 0;
549
550 if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
551 return 0;
552
553 if (!allow_nohz)
554 return 1;
555
556 tick_nohz_switch_to_nohz();
557 return 0;
558}