aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-02-16 04:28:03 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:59 -0500
commit79bf2bb335b85db25d27421c798595a2fa2a0e82 (patch)
tree550ec2654ae1dd65b871de7fe9c890108c6e86d8 /kernel
parentf8381cba04ba8173fd5a2b8e5cd8b3290ee13a98 (diff)
[PATCH] tick-management: dyntick / highres functionality
With Ingo Molnar <mingo@elte.hu> Add functions to provide dynamic ticks and high resolution timers. The code which keeps track of jiffies and handles the long idle periods is shared between tick based and high resolution timer based dynticks. The dyntick functionality can be disabled on the kernel commandline. Provide also the infrastructure to support high resolution timers. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c20
-rw-r--r--kernel/softirq.c15
-rw-r--r--kernel/time/Kconfig15
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-broadcast.c191
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/time/tick-internal.h49
-rw-r--r--kernel/time/tick-oneshot.c84
-rw-r--r--kernel/time/tick-sched.c558
-rw-r--r--kernel/timer.c5
11 files changed, 959 insertions, 14 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index a2310d1bebe1..e04ef38ea3be 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -2,8 +2,8 @@
2 * linux/kernel/hrtimer.c 2 * linux/kernel/hrtimer.c
3 * 3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
7 * 7 *
8 * High-resolution kernel timers 8 * High-resolution kernel timers
9 * 9 *
@@ -38,6 +38,7 @@
38#include <linux/notifier.h> 38#include <linux/notifier.h>
39#include <linux/syscalls.h> 39#include <linux/syscalls.h>
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/tick.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43 44
@@ -288,7 +289,7 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
288/* 289/*
289 * Divide a ktime value by a nanosecond value 290 * Divide a ktime value by a nanosecond value
290 */ 291 */
291static unsigned long ktime_divns(const ktime_t kt, s64 div) 292unsigned long ktime_divns(const ktime_t kt, s64 div)
292{ 293{
293 u64 dclc, inc, dns; 294 u64 dclc, inc, dns;
294 int sft = 0; 295 int sft = 0;
@@ -305,9 +306,6 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div)
305 306
306 return (unsigned long) dclc; 307 return (unsigned long) dclc;
307} 308}
308
309#else /* BITS_PER_LONG < 64 */
310# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div))
311#endif /* BITS_PER_LONG >= 64 */ 309#endif /* BITS_PER_LONG >= 64 */
312 310
313/* 311/*
@@ -682,6 +680,16 @@ void hrtimer_run_queues(void)
682 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 680 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
683 int i; 681 int i;
684 682
683 /*
684 * This _is_ ugly: We have to check in the softirq context,
685 * whether we can switch to highres and / or nohz mode. The
686 * clocksource switch happens in the timer interrupt with
687 * xtime_lock held. Notification from there only sets the
688 * check bit in the tick_oneshot code, otherwise we might
689 * deadlock vs. xtime_lock.
690 */
691 tick_check_oneshot_change(1);
692
685 hrtimer_get_softirq_time(cpu_base); 693 hrtimer_get_softirq_time(cpu_base);
686 694
687 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 695 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 14e1a14f94d2..8b75008e2bd8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -17,6 +17,7 @@
17#include <linux/kthread.h> 17#include <linux/kthread.h>
18#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/tick.h>
20 21
21#include <asm/irq.h> 22#include <asm/irq.h>
22/* 23/*
@@ -278,9 +279,11 @@ EXPORT_SYMBOL(do_softirq);
278 */ 279 */
279void irq_enter(void) 280void irq_enter(void)
280{ 281{
281 account_system_vtime(current); 282 __irq_enter();
282 add_preempt_count(HARDIRQ_OFFSET); 283#ifdef CONFIG_NO_HZ
283 trace_hardirq_enter(); 284 if (idle_cpu(smp_processor_id()))
285 tick_nohz_update_jiffies();
286#endif
284} 287}
285 288
286#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 289#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -299,6 +302,12 @@ void irq_exit(void)
299 sub_preempt_count(IRQ_EXIT_OFFSET); 302 sub_preempt_count(IRQ_EXIT_OFFSET);
300 if (!in_interrupt() && local_softirq_pending()) 303 if (!in_interrupt() && local_softirq_pending())
301 invoke_softirq(); 304 invoke_softirq();
305
306#ifdef CONFIG_NO_HZ
307 /* Make sure that timer wheel updates are propagated */
308 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
309 tick_nohz_stop_sched_tick();
310#endif
302 preempt_enable_no_resched(); 311 preempt_enable_no_resched();
303} 312}
304 313
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
new file mode 100644
index 000000000000..9ec54eb3667f
--- /dev/null
+++ b/kernel/time/Kconfig
@@ -0,0 +1,15 @@
1#
2# Timer subsystem related configuration options
3#
4config TICK_ONESHOT
5 bool
6 default n
7
8config NO_HZ
9 bool "Tickless System (Dynamic Ticks)"
10 depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
11 select TICK_ONESHOT
12 help
13 This option enables a tickless system: timer interrupts will
14 only trigger on an as-needed basis both when the system is
15 busy and when the system is idle.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index a941743c3ff8..f246bc836b9a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -3,3 +3,5 @@ obj-y += ntp.o clocksource.o jiffies.o
3obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o 3obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
6obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
7obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3cb8ac978270..193a0793af95 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -29,6 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
32#include <linux/tick.h>
32 33
33/* XXX - Would like a better way for initializing curr_clocksource */ 34/* XXX - Would like a better way for initializing curr_clocksource */
34extern struct clocksource clocksource_jiffies; 35extern struct clocksource clocksource_jiffies;
@@ -109,6 +110,13 @@ static void clocksource_watchdog(unsigned long data)
109 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 110 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
110 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 111 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
111 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 112 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
113 /*
114 * We just marked the clocksource as
115 * highres-capable, notify the rest of the
116 * system as well so that we transition
117 * into high-res mode:
118 */
119 tick_clock_notify();
112 } 120 }
113 cs->flags |= CLOCK_SOURCE_WATCHDOG; 121 cs->flags |= CLOCK_SOURCE_WATCHDOG;
114 cs->wd_last = csnow; 122 cs->wd_last = csnow;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0ee4968ff791..8314ecb32d33 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -29,7 +29,7 @@
29 29
30struct tick_device tick_broadcast_device; 30struct tick_device tick_broadcast_device;
31static cpumask_t tick_broadcast_mask; 31static cpumask_t tick_broadcast_mask;
32DEFINE_SPINLOCK(tick_broadcast_lock); 32static DEFINE_SPINLOCK(tick_broadcast_lock);
33 33
34/* 34/*
35 * Start the device in periodic mode 35 * Start the device in periodic mode
@@ -215,6 +215,8 @@ static void tick_do_broadcast_on_off(void *why)
215 else { 215 else {
216 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 216 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
217 tick_broadcast_start_periodic(bc); 217 tick_broadcast_start_periodic(bc);
218 else
219 tick_broadcast_setup_oneshot(bc);
218 } 220 }
219out: 221out:
220 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 222 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -268,3 +270,190 @@ void tick_shutdown_broadcast(unsigned int *cpup)
268 270
269 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 271 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
270} 272}
273
274#ifdef CONFIG_TICK_ONESHOT
275
276static cpumask_t tick_broadcast_oneshot_mask;
277
278static int tick_broadcast_set_event(ktime_t expires, int force)
279{
280 struct clock_event_device *bc = tick_broadcast_device.evtdev;
281 ktime_t now = ktime_get();
282 int res;
283
284 for(;;) {
285 res = clockevents_program_event(bc, expires, now);
286 if (!res || !force)
287 return res;
288 now = ktime_get();
289 expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
290 }
291}
292
293/*
294 * Reprogram the broadcast device:
295 *
296 * Called with tick_broadcast_lock held and interrupts disabled.
297 */
298static int tick_broadcast_reprogram(void)
299{
300 ktime_t expires = { .tv64 = KTIME_MAX };
301 struct tick_device *td;
302 int cpu;
303
304 /*
305 * Find the event which expires next:
306 */
307 for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
308 cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
309 td = &per_cpu(tick_cpu_device, cpu);
310 if (td->evtdev->next_event.tv64 < expires.tv64)
311 expires = td->evtdev->next_event;
312 }
313
314 if (expires.tv64 == KTIME_MAX)
315 return 0;
316
317 return tick_broadcast_set_event(expires, 0);
318}
319
320/*
321 * Handle oneshot mode broadcasting
322 */
323static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
324{
325 struct tick_device *td;
326 cpumask_t mask;
327 ktime_t now;
328 int cpu;
329
330 spin_lock(&tick_broadcast_lock);
331again:
332 dev->next_event.tv64 = KTIME_MAX;
333 mask = CPU_MASK_NONE;
334 now = ktime_get();
335 /* Find all expired events */
336 for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
337 cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
338 td = &per_cpu(tick_cpu_device, cpu);
339 if (td->evtdev->next_event.tv64 <= now.tv64)
340 cpu_set(cpu, mask);
341 }
342
343 /*
344 * Wakeup the cpus which have an expired event. The broadcast
345 * device is reprogrammed in the return from idle code.
346 */
347 if (!tick_do_broadcast(mask)) {
348 /*
349 * The global event did not expire any CPU local
350 * events. This happens in dyntick mode, as the
351 * maximum PIT delta is quite small.
352 */
353 if (tick_broadcast_reprogram())
354 goto again;
355 }
356 spin_unlock(&tick_broadcast_lock);
357}
358
359/*
360 * Powerstate information: The system enters/leaves a state, where
361 * affected devices might stop
362 */
363void tick_broadcast_oneshot_control(unsigned long reason)
364{
365 struct clock_event_device *bc, *dev;
366 struct tick_device *td;
367 unsigned long flags;
368 int cpu;
369
370 spin_lock_irqsave(&tick_broadcast_lock, flags);
371
372 /*
373 * Periodic mode does not care about the enter/exit of power
374 * states
375 */
376 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
377 goto out;
378
379 bc = tick_broadcast_device.evtdev;
380 cpu = smp_processor_id();
381 td = &per_cpu(tick_cpu_device, cpu);
382 dev = td->evtdev;
383
384 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
385 goto out;
386
387 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
388 if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
389 cpu_set(cpu, tick_broadcast_oneshot_mask);
390 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
391 if (dev->next_event.tv64 < bc->next_event.tv64)
392 tick_broadcast_set_event(dev->next_event, 1);
393 }
394 } else {
395 if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
396 cpu_clear(cpu, tick_broadcast_oneshot_mask);
397 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
398 if (dev->next_event.tv64 != KTIME_MAX)
399 tick_program_event(dev->next_event, 1);
400 }
401 }
402
403out:
404 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
405}
406
407/**
408 * tick_broadcast_setup_highres - setup the broadcast device for highres
409 */
410void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
411{
412 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) {
413 bc->event_handler = tick_handle_oneshot_broadcast;
414 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
415 bc->next_event.tv64 = KTIME_MAX;
416 }
417}
418
419/*
420 * Select oneshot operating mode for the broadcast device
421 */
422void tick_broadcast_switch_to_oneshot(void)
423{
424 struct clock_event_device *bc;
425 unsigned long flags;
426
427 spin_lock_irqsave(&tick_broadcast_lock, flags);
428
429 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
430 bc = tick_broadcast_device.evtdev;
431 if (bc)
432 tick_broadcast_setup_oneshot(bc);
433 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
434}
435
436
437/*
438 * Remove a dead CPU from broadcasting
439 */
440void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
441{
442 struct clock_event_device *bc;
443 unsigned long flags;
444 unsigned int cpu = *cpup;
445
446 spin_lock_irqsave(&tick_broadcast_lock, flags);
447
448 bc = tick_broadcast_device.evtdev;
449 cpu_clear(cpu, tick_broadcast_oneshot_mask);
450
451 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) {
452 if (bc && cpus_empty(tick_broadcast_oneshot_mask))
453 clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
454 }
455
456 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
457}
458
459#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 48167a6ae55c..c35d449be031 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,6 +34,16 @@ ktime_t tick_period;
34static int tick_do_timer_cpu = -1; 34static int tick_do_timer_cpu = -1;
35DEFINE_SPINLOCK(tick_device_lock); 35DEFINE_SPINLOCK(tick_device_lock);
36 36
37/**
38 * tick_is_oneshot_available - check for a oneshot capable event device
39 */
40int tick_is_oneshot_available(void)
41{
42 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
43
44 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
45}
46
37/* 47/*
38 * Periodic tick 48 * Periodic tick
39 */ 49 */
@@ -162,6 +172,8 @@ static void tick_setup_device(struct tick_device *td,
162 172
163 if (td->mode == TICKDEV_MODE_PERIODIC) 173 if (td->mode == TICKDEV_MODE_PERIODIC)
164 tick_setup_periodic(newdev, 0); 174 tick_setup_periodic(newdev, 0);
175 else
176 tick_setup_oneshot(newdev, handler, next_event);
165} 177}
166 178
167/* 179/*
@@ -209,6 +221,12 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 */ 221 */
210 if (curdev) { 222 if (curdev) {
211 /* 223 /*
224 * Prefer one shot capable devices !
225 */
226 if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
227 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
228 goto out_bc;
229 /*
212 * Check the rating 230 * Check the rating
213 */ 231 */
214 if (curdev->rating >= newdev->rating) 232 if (curdev->rating >= newdev->rating)
@@ -226,6 +244,8 @@ static int tick_check_new_device(struct clock_event_device *newdev)
226 } 244 }
227 clockevents_exchange_device(curdev, newdev); 245 clockevents_exchange_device(curdev, newdev);
228 tick_setup_device(td, newdev, cpu, cpumask); 246 tick_setup_device(td, newdev, cpu, cpumask);
247 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
248 tick_oneshot_notify();
229 249
230 spin_unlock_irqrestore(&tick_device_lock, flags); 250 spin_unlock_irqrestore(&tick_device_lock, flags);
231 return NOTIFY_STOP; 251 return NOTIFY_STOP;
@@ -285,7 +305,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
285 tick_broadcast_on_off(reason, dev); 305 tick_broadcast_on_off(reason, dev);
286 break; 306 break;
287 307
308 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
309 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
310 tick_broadcast_oneshot_control(reason);
311 break;
312
288 case CLOCK_EVT_NOTIFY_CPU_DEAD: 313 case CLOCK_EVT_NOTIFY_CPU_DEAD:
314 tick_shutdown_broadcast_oneshot(dev);
289 tick_shutdown_broadcast(dev); 315 tick_shutdown_broadcast(dev);
290 tick_shutdown(dev); 316 tick_shutdown(dev);
291 break; 317 break;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 9272f446b21c..54861a0f29ff 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -10,12 +10,57 @@ extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
10extern void tick_handle_periodic(struct clock_event_device *dev); 10extern void tick_handle_periodic(struct clock_event_device *dev);
11 11
12/* 12/*
13 * NO_HZ / high resolution timer shared code
14 */
15#ifdef CONFIG_TICK_ONESHOT
16extern void tick_setup_oneshot(struct clock_event_device *newdev,
17 void (*handler)(struct clock_event_device *),
18 ktime_t nextevt);
19extern int tick_program_event(ktime_t expires, int force);
20extern void tick_oneshot_notify(void);
21extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
22
23# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
24extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
25extern void tick_broadcast_oneshot_control(unsigned long reason);
26extern void tick_broadcast_switch_to_oneshot(void);
27extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
28# else /* BROADCAST */
29static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
30{
31 BUG();
32}
33static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
34static inline void tick_broadcast_switch_to_oneshot(void) { }
35static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
36# endif /* !BROADCAST */
37
38#else /* !ONESHOT */
39static inline
40void tick_setup_oneshot(struct clock_event_device *newdev,
41 void (*handler)(struct clock_event_device *),
42 ktime_t nextevt)
43{
44 BUG();
45}
46static inline int tick_program_event(ktime_t expires, int force)
47{
48 return 0;
49}
50static inline void tick_oneshot_notify(void) { }
51static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
52{
53 BUG();
54}
55static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
56static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
57#endif /* !TICK_ONESHOT */
58
59/*
13 * Broadcasting support 60 * Broadcasting support
14 */ 61 */
15#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 62#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
16extern int tick_do_broadcast(cpumask_t mask); 63extern int tick_do_broadcast(cpumask_t mask);
17extern struct tick_device tick_broadcast_device;
18extern spinlock_t tick_broadcast_lock;
19 64
20extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); 65extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
21extern int tick_check_broadcast_device(struct clock_event_device *dev); 66extern int tick_check_broadcast_device(struct clock_event_device *dev);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
new file mode 100644
index 000000000000..2e8b7ff863cc
--- /dev/null
+++ b/kernel/time/tick-oneshot.c
@@ -0,0 +1,84 @@
1/*
2 * linux/kernel/time/tick-oneshot.c
3 *
4 * This file contains functions which manage high resolution tick
5 * related events.
6 *
7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10 *
11 * This code is licenced under the GPL version 2. For details see
12 * kernel-base/COPYING.
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/irq.h>
18#include <linux/percpu.h>
19#include <linux/profile.h>
20#include <linux/sched.h>
21#include <linux/tick.h>
22
23#include "tick-internal.h"
24
25/**
26 * tick_program_event
27 */
28int tick_program_event(ktime_t expires, int force)
29{
30 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
31 ktime_t now = ktime_get();
32
33 while (1) {
34 int ret = clockevents_program_event(dev, expires, now);
35
36 if (!ret || !force)
37 return ret;
38 now = ktime_get();
39 expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
40 }
41}
42
43/**
44 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
45 */
46void tick_setup_oneshot(struct clock_event_device *newdev,
47 void (*handler)(struct clock_event_device *),
48 ktime_t next_event)
49{
50 newdev->event_handler = handler;
51 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
52 clockevents_program_event(newdev, next_event, ktime_get());
53}
54
55/**
56 * tick_switch_to_oneshot - switch to oneshot mode
57 */
58int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
59{
60 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
61 struct clock_event_device *dev = td->evtdev;
62
63 if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
64 !tick_device_is_functional(dev))
65 return -EINVAL;
66
67 td->mode = TICKDEV_MODE_ONESHOT;
68 dev->event_handler = handler;
69 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
70 tick_broadcast_switch_to_oneshot();
71 return 0;
72}
73
74#ifdef CONFIG_HIGH_RES_TIMERS
75/**
76 * tick_init_highres - switch to high resolution mode
77 *
78 * Called with interrupts disabled.
79 */
80int tick_init_highres(void)
81{
82 return tick_switch_to_oneshot(hrtimer_interrupt);
83}
84#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
new file mode 100644
index 000000000000..99d35e2af182
--- /dev/null
+++ b/kernel/time/tick-sched.c
@@ -0,0 +1,558 @@
1/*
2 * linux/kernel/time/tick-sched.c
3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 *
8 * No idle tick implementation for low and high resolution timers
9 *
10 * Started by: Thomas Gleixner and Ingo Molnar
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/kernel_stat.h>
19#include <linux/percpu.h>
20#include <linux/profile.h>
21#include <linux/sched.h>
22#include <linux/tick.h>
23
24#include "tick-internal.h"
25
26/*
27 * Per cpu nohz control structure
28 */
29static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
30
31/*
32 * The time, when the last jiffy update happened. Protected by xtime_lock.
33 */
34static ktime_t last_jiffies_update;
35
36/*
37 * Must be called with interrupts disabled !
38 */
39static void tick_do_update_jiffies64(ktime_t now)
40{
41 unsigned long ticks = 0;
42 ktime_t delta;
43
44 /* Reevalute with xtime_lock held */
45 write_seqlock(&xtime_lock);
46
47 delta = ktime_sub(now, last_jiffies_update);
48 if (delta.tv64 >= tick_period.tv64) {
49
50 delta = ktime_sub(delta, tick_period);
51 last_jiffies_update = ktime_add(last_jiffies_update,
52 tick_period);
53
54 /* Slow path for long timeouts */
55 if (unlikely(delta.tv64 >= tick_period.tv64)) {
56 s64 incr = ktime_to_ns(tick_period);
57
58 ticks = ktime_divns(delta, incr);
59
60 last_jiffies_update = ktime_add_ns(last_jiffies_update,
61 incr * ticks);
62 }
63 do_timer(++ticks);
64 }
65 write_sequnlock(&xtime_lock);
66}
67
68/*
69 * Initialize and return retrieve the jiffies update.
70 */
71static ktime_t tick_init_jiffy_update(void)
72{
73 ktime_t period;
74
75 write_seqlock(&xtime_lock);
76 /* Did we start the jiffies update yet ? */
77 if (last_jiffies_update.tv64 == 0)
78 last_jiffies_update = tick_next_period;
79 period = last_jiffies_update;
80 write_sequnlock(&xtime_lock);
81 return period;
82}
83
84/*
85 * NOHZ - aka dynamic tick functionality
86 */
87#ifdef CONFIG_NO_HZ
88/*
89 * NO HZ enabled ?
90 */
91static int tick_nohz_enabled __read_mostly = 1;
92
93/*
94 * Enable / Disable tickless mode
95 */
96static int __init setup_tick_nohz(char *str)
97{
98 if (!strcmp(str, "off"))
99 tick_nohz_enabled = 0;
100 else if (!strcmp(str, "on"))
101 tick_nohz_enabled = 1;
102 else
103 return 0;
104 return 1;
105}
106
107__setup("nohz=", setup_tick_nohz);
108
109/**
110 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
111 *
112 * Called from interrupt entry when the CPU was idle
113 *
114 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
115 * must be updated. Otherwise an interrupt handler could use a stale jiffy
116 * value. We do this unconditionally on any cpu, as we don't know whether the
117 * cpu, which has the update task assigned is in a long sleep.
118 */
119void tick_nohz_update_jiffies(void)
120{
121 int cpu = smp_processor_id();
122 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
123 unsigned long flags;
124 ktime_t now;
125
126 if (!ts->tick_stopped)
127 return;
128
129 cpu_clear(cpu, nohz_cpu_mask);
130 now = ktime_get();
131
132 local_irq_save(flags);
133 tick_do_update_jiffies64(now);
134 local_irq_restore(flags);
135}
136
137/**
138 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
139 *
140 * When the next event is more than a tick into the future, stop the idle tick
141 * Called either from the idle loop or from irq_exit() when an idle period was
142 * just interrupted by an interrupt which did not cause a reschedule.
143 */
144void tick_nohz_stop_sched_tick(void)
145{
146 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
147 struct tick_sched *ts;
148 ktime_t last_update, expires, now, delta;
149 int cpu;
150
151 local_irq_save(flags);
152
153 cpu = smp_processor_id();
154 ts = &per_cpu(tick_cpu_sched, cpu);
155
156 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
157 goto end;
158
159 if (need_resched())
160 goto end;
161
162 cpu = smp_processor_id();
163 BUG_ON(local_softirq_pending());
164
165 now = ktime_get();
166 /*
167 * When called from irq_exit we need to account the idle sleep time
168 * correctly.
169 */
170 if (ts->tick_stopped) {
171 delta = ktime_sub(now, ts->idle_entrytime);
172 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
173 }
174
175 ts->idle_entrytime = now;
176 ts->idle_calls++;
177
178 /* Read jiffies and the time when jiffies were updated last */
179 do {
180 seq = read_seqbegin(&xtime_lock);
181 last_update = last_jiffies_update;
182 last_jiffies = jiffies;
183 } while (read_seqretry(&xtime_lock, seq));
184
185 /* Get the next timer wheel timer */
186 next_jiffies = get_next_timer_interrupt(last_jiffies);
187 delta_jiffies = next_jiffies - last_jiffies;
188
189 /*
190 * Do not stop the tick, if we are only one off
191 * or if the cpu is required for rcu
192 */
193 if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu)))
194 goto out;
195
196 /* Schedule the tick, if we are at least one jiffie off */
197 if ((long)delta_jiffies >= 1) {
198
199 if (rcu_needs_cpu(cpu))
200 delta_jiffies = 1;
201 else
202 cpu_set(cpu, nohz_cpu_mask);
203 /*
204 * nohz_stop_sched_tick can be called several times before
205 * the nohz_restart_sched_tick is called. This happens when
206 * interrupts arrive which do not cause a reschedule. In the
207 * first call we save the current tick time, so we can restart
208 * the scheduler tick in nohz_restart_sched_tick.
209 */
210 if (!ts->tick_stopped) {
211 ts->idle_tick = ts->sched_timer.expires;
212 ts->tick_stopped = 1;
213 ts->idle_jiffies = last_jiffies;
214 }
215 /*
216 * calculate the expiry time for the next timer wheel
217 * timer
218 */
219 expires = ktime_add_ns(last_update, tick_period.tv64 *
220 delta_jiffies);
221 ts->idle_expires = expires;
222 ts->idle_sleeps++;
223
224 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
225 hrtimer_start(&ts->sched_timer, expires,
226 HRTIMER_MODE_ABS);
227 /* Check, if the timer was already in the past */
228 if (hrtimer_active(&ts->sched_timer))
229 goto out;
230 } else if(!tick_program_event(expires, 0))
231 goto out;
232 /*
233 * We are past the event already. So we crossed a
234 * jiffie boundary. Update jiffies and raise the
235 * softirq.
236 */
237 tick_do_update_jiffies64(ktime_get());
238 cpu_clear(cpu, nohz_cpu_mask);
239 }
240 raise_softirq_irqoff(TIMER_SOFTIRQ);
241out:
242 ts->next_jiffies = next_jiffies;
243 ts->last_jiffies = last_jiffies;
244end:
245 local_irq_restore(flags);
246}
247
248/**
249 * nohz_restart_sched_tick - restart the idle tick from the idle task
250 *
251 * Restart the idle tick when the CPU is woken up from idle
252 */
253void tick_nohz_restart_sched_tick(void)
254{
255 int cpu = smp_processor_id();
256 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
257 unsigned long ticks;
258 ktime_t now, delta;
259
260 if (!ts->tick_stopped)
261 return;
262
263 /* Update jiffies first */
264 now = ktime_get();
265
266 local_irq_disable();
267 tick_do_update_jiffies64(now);
268 cpu_clear(cpu, nohz_cpu_mask);
269
270 /* Account the idle time */
271 delta = ktime_sub(now, ts->idle_entrytime);
272 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
273
274 /*
275 * We stopped the tick in idle. Update process times would miss the
276 * time we slept as update_process_times does only a 1 tick
277 * accounting. Enforce that this is accounted to idle !
278 */
279 ticks = jiffies - ts->idle_jiffies;
280 /*
281 * We might be one off. Do not randomly account a huge number of ticks!
282 */
283 if (ticks && ticks < LONG_MAX) {
284 add_preempt_count(HARDIRQ_OFFSET);
285 account_system_time(current, HARDIRQ_OFFSET,
286 jiffies_to_cputime(ticks));
287 sub_preempt_count(HARDIRQ_OFFSET);
288 }
289
290 /*
291 * Cancel the scheduled timer and restore the tick
292 */
293 ts->tick_stopped = 0;
294 hrtimer_cancel(&ts->sched_timer);
295 ts->sched_timer.expires = ts->idle_tick;
296
297 while (1) {
298 /* Forward the time to expire in the future */
299 hrtimer_forward(&ts->sched_timer, now, tick_period);
300
301 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
302 hrtimer_start(&ts->sched_timer,
303 ts->sched_timer.expires,
304 HRTIMER_MODE_ABS);
305 /* Check, if the timer was already in the past */
306 if (hrtimer_active(&ts->sched_timer))
307 break;
308 } else {
309 if (!tick_program_event(ts->sched_timer.expires, 0))
310 break;
311 }
312 /* Update jiffies and reread time */
313 tick_do_update_jiffies64(now);
314 now = ktime_get();
315 }
316 local_irq_enable();
317}
318
319static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
320{
321 hrtimer_forward(&ts->sched_timer, now, tick_period);
322 return tick_program_event(ts->sched_timer.expires, 0);
323}
324
325/*
326 * The nohz low res interrupt handler
327 */
328static void tick_nohz_handler(struct clock_event_device *dev)
329{
330 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
331 struct pt_regs *regs = get_irq_regs();
332 ktime_t now = ktime_get();
333
334 dev->next_event.tv64 = KTIME_MAX;
335
336 /* Check, if the jiffies need an update */
337 tick_do_update_jiffies64(now);
338
339 /*
340 * When we are idle and the tick is stopped, we have to touch
341 * the watchdog as we might not schedule for a really long
342 * time. This happens on complete idle SMP systems while
343 * waiting on the login prompt. We also increment the "start
344 * of idle" jiffy stamp so the idle accounting adjustment we
345 * do when we go busy again does not account too much ticks.
346 */
347 if (ts->tick_stopped) {
348 touch_softlockup_watchdog();
349 ts->idle_jiffies++;
350 }
351
352 update_process_times(user_mode(regs));
353 profile_tick(CPU_PROFILING);
354
355 /* Do not restart, when we are in the idle loop */
356 if (ts->tick_stopped)
357 return;
358
359 while (tick_nohz_reprogram(ts, now)) {
360 now = ktime_get();
361 tick_do_update_jiffies64(now);
362 }
363}
364
365/**
366 * tick_nohz_switch_to_nohz - switch to nohz mode
367 */
368static void tick_nohz_switch_to_nohz(void)
369{
370 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
371 ktime_t next;
372
373 if (!tick_nohz_enabled)
374 return;
375
376 local_irq_disable();
377 if (tick_switch_to_oneshot(tick_nohz_handler)) {
378 local_irq_enable();
379 return;
380 }
381
382 ts->nohz_mode = NOHZ_MODE_LOWRES;
383
384 /*
385 * Recycle the hrtimer in ts, so we can share the
386 * hrtimer_forward with the highres code.
387 */
388 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
389 /* Get the next period */
390 next = tick_init_jiffy_update();
391
392 for (;;) {
393 ts->sched_timer.expires = next;
394 if (!tick_program_event(next, 0))
395 break;
396 next = ktime_add(next, tick_period);
397 }
398 local_irq_enable();
399
400 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n",
401 smp_processor_id());
402}
403
404#else
405
406static inline void tick_nohz_switch_to_nohz(void) { }
407
408#endif /* NO_HZ */
409
410/*
411 * High resolution timer specific code
412 */
413#ifdef CONFIG_HIGH_RES_TIMERS
414/*
415 * We rearm the timer until we get disabled by the idle code
416 * Called with interrupts disabled and timer->base->cpu_base->lock held.
417 */
418static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
419{
420 struct tick_sched *ts =
421 container_of(timer, struct tick_sched, sched_timer);
422 struct hrtimer_cpu_base *base = timer->base->cpu_base;
423 struct pt_regs *regs = get_irq_regs();
424 ktime_t now = ktime_get();
425
426 /* Check, if the jiffies need an update */
427 tick_do_update_jiffies64(now);
428
429 /*
430 * Do not call, when we are not in irq context and have
431 * no valid regs pointer
432 */
433 if (regs) {
434 /*
435 * When we are idle and the tick is stopped, we have to touch
436 * the watchdog as we might not schedule for a really long
437 * time. This happens on complete idle SMP systems while
438 * waiting on the login prompt. We also increment the "start of
439 * idle" jiffy stamp so the idle accounting adjustment we do
440 * when we go busy again does not account too much ticks.
441 */
442 if (ts->tick_stopped) {
443 touch_softlockup_watchdog();
444 ts->idle_jiffies++;
445 }
446 /*
447 * update_process_times() might take tasklist_lock, hence
448 * drop the base lock. sched-tick hrtimers are per-CPU and
449 * never accessible by userspace APIs, so this is safe to do.
450 */
451 spin_unlock(&base->lock);
452 update_process_times(user_mode(regs));
453 profile_tick(CPU_PROFILING);
454 spin_lock(&base->lock);
455 }
456
457 /* Do not restart, when we are in the idle loop */
458 if (ts->tick_stopped)
459 return HRTIMER_NORESTART;
460
461 hrtimer_forward(timer, now, tick_period);
462
463 return HRTIMER_RESTART;
464}
465
466/**
467 * tick_setup_sched_timer - setup the tick emulation timer
468 */
469void tick_setup_sched_timer(void)
470{
471 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
472 ktime_t now = ktime_get();
473
474 /*
475 * Emulate tick processing via per-CPU hrtimers:
476 */
477 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
478 ts->sched_timer.function = tick_sched_timer;
479 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
480
481 /* Get the next period */
482 ts->sched_timer.expires = tick_init_jiffy_update();
483
484 for (;;) {
485 hrtimer_forward(&ts->sched_timer, now, tick_period);
486 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
487 HRTIMER_MODE_ABS);
488 /* Check, if the timer was already in the past */
489 if (hrtimer_active(&ts->sched_timer))
490 break;
491 now = ktime_get();
492 }
493
494#ifdef CONFIG_NO_HZ
495 if (tick_nohz_enabled)
496 ts->nohz_mode = NOHZ_MODE_HIGHRES;
497#endif
498}
499
500void tick_cancel_sched_timer(int cpu)
501{
502 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
503
504 if (ts->sched_timer.base)
505 hrtimer_cancel(&ts->sched_timer);
506 ts->tick_stopped = 0;
507 ts->nohz_mode = NOHZ_MODE_INACTIVE;
508}
509#endif /* HIGH_RES_TIMERS */
510
511/**
512 * Async notification about clocksource changes
513 */
514void tick_clock_notify(void)
515{
516 int cpu;
517
518 for_each_possible_cpu(cpu)
519 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
520}
521
522/*
523 * Async notification about clock event changes
524 */
525void tick_oneshot_notify(void)
526{
527 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
528
529 set_bit(0, &ts->check_clocks);
530}
531
532/**
533 * Check, if a change happened, which makes oneshot possible.
534 *
535 * Called cyclic from the hrtimer softirq (driven by the timer
536 * softirq) allow_nohz signals, that we can switch into low-res nohz
537 * mode, because high resolution timers are disabled (either compile
538 * or runtime).
539 */
540int tick_check_oneshot_change(int allow_nohz)
541{
542 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
543
544 if (!test_and_clear_bit(0, &ts->check_clocks))
545 return 0;
546
547 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
548 return 0;
549
550 if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
551 return 0;
552
553 if (!allow_nohz)
554 return 1;
555
556 tick_nohz_switch_to_nohz();
557 return 0;
558}
diff --git a/kernel/timer.c b/kernel/timer.c
index 7d522bdf8265..f058e6cfd50c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,7 +34,7 @@
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/clockchips.h> 37#include <linux/tick.h>
38 38
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/unistd.h> 40#include <asm/unistd.h>
@@ -874,6 +874,8 @@ static void change_clocksource(void)
874 clock->xtime_nsec = 0; 874 clock->xtime_nsec = 0;
875 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 875 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
876 876
877 tick_clock_notify();
878
877 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 879 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
878 clock->name); 880 clock->name);
879} 881}
@@ -937,7 +939,6 @@ void __init timekeeping_init(void)
937 write_sequnlock_irqrestore(&xtime_lock, flags); 939 write_sequnlock_irqrestore(&xtime_lock, flags);
938} 940}
939 941
940
941/* flag for if timekeeping is suspended */ 942/* flag for if timekeeping is suspended */
942static int timekeeping_suspended; 943static int timekeeping_suspended;
943/* time in seconds when suspend began */ 944/* time in seconds when suspend began */