aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c20
-rw-r--r--kernel/softirq.c15
-rw-r--r--kernel/time/Kconfig15
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-broadcast.c191
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/time/tick-internal.h49
-rw-r--r--kernel/time/tick-oneshot.c84
-rw-r--r--kernel/time/tick-sched.c558
-rw-r--r--kernel/timer.c5
11 files changed, 959 insertions, 14 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index a2310d1bebe1..e04ef38ea3be 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -2,8 +2,8 @@
2 * linux/kernel/hrtimer.c 2 * linux/kernel/hrtimer.c
3 * 3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
7 * 7 *
8 * High-resolution kernel timers 8 * High-resolution kernel timers
9 * 9 *
@@ -38,6 +38,7 @@
38#include <linux/notifier.h> 38#include <linux/notifier.h>
39#include <linux/syscalls.h> 39#include <linux/syscalls.h>
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/tick.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43 44
@@ -288,7 +289,7 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
288/* 289/*
289 * Divide a ktime value by a nanosecond value 290 * Divide a ktime value by a nanosecond value
290 */ 291 */
291static unsigned long ktime_divns(const ktime_t kt, s64 div) 292unsigned long ktime_divns(const ktime_t kt, s64 div)
292{ 293{
293 u64 dclc, inc, dns; 294 u64 dclc, inc, dns;
294 int sft = 0; 295 int sft = 0;
@@ -305,9 +306,6 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div)
305 306
306 return (unsigned long) dclc; 307 return (unsigned long) dclc;
307} 308}
308
309#else /* BITS_PER_LONG < 64 */
310# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div))
311#endif /* BITS_PER_LONG >= 64 */ 309#endif /* BITS_PER_LONG >= 64 */
312 310
313/* 311/*
@@ -682,6 +680,16 @@ void hrtimer_run_queues(void)
682 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 680 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
683 int i; 681 int i;
684 682
683 /*
684 * This _is_ ugly: We have to check in the softirq context,
685 * whether we can switch to highres and / or nohz mode. The
686 * clocksource switch happens in the timer interrupt with
687 * xtime_lock held. Notification from there only sets the
688 * check bit in the tick_oneshot code, otherwise we might
689 * deadlock vs. xtime_lock.
690 */
691 tick_check_oneshot_change(1);
692
685 hrtimer_get_softirq_time(cpu_base); 693 hrtimer_get_softirq_time(cpu_base);
686 694
687 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 695 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 14e1a14f94d2..8b75008e2bd8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -17,6 +17,7 @@
17#include <linux/kthread.h> 17#include <linux/kthread.h>
18#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/tick.h>
20 21
21#include <asm/irq.h> 22#include <asm/irq.h>
22/* 23/*
@@ -278,9 +279,11 @@ EXPORT_SYMBOL(do_softirq);
278 */ 279 */
279void irq_enter(void) 280void irq_enter(void)
280{ 281{
281 account_system_vtime(current); 282 __irq_enter();
282 add_preempt_count(HARDIRQ_OFFSET); 283#ifdef CONFIG_NO_HZ
283 trace_hardirq_enter(); 284 if (idle_cpu(smp_processor_id()))
285 tick_nohz_update_jiffies();
286#endif
284} 287}
285 288
286#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 289#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -299,6 +302,12 @@ void irq_exit(void)
299 sub_preempt_count(IRQ_EXIT_OFFSET); 302 sub_preempt_count(IRQ_EXIT_OFFSET);
300 if (!in_interrupt() && local_softirq_pending()) 303 if (!in_interrupt() && local_softirq_pending())
301 invoke_softirq(); 304 invoke_softirq();
305
306#ifdef CONFIG_NO_HZ
307 /* Make sure that timer wheel updates are propagated */
308 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
309 tick_nohz_stop_sched_tick();
310#endif
302 preempt_enable_no_resched(); 311 preempt_enable_no_resched();
303} 312}
304 313
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
new file mode 100644
index 000000000000..9ec54eb3667f
--- /dev/null
+++ b/kernel/time/Kconfig
@@ -0,0 +1,15 @@
1#
2# Timer subsystem related configuration options
3#
4config TICK_ONESHOT
5 bool
6 default n
7
8config NO_HZ
9 bool "Tickless System (Dynamic Ticks)"
10 depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
11 select TICK_ONESHOT
12 help
13 This option enables a tickless system: timer interrupts will
14 only trigger on an as-needed basis both when the system is
15 busy and when the system is idle.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index a941743c3ff8..f246bc836b9a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -3,3 +3,5 @@ obj-y += ntp.o clocksource.o jiffies.o
3obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o 3obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
6obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
7obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3cb8ac978270..193a0793af95 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -29,6 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
32#include <linux/tick.h>
32 33
33/* XXX - Would like a better way for initializing curr_clocksource */ 34/* XXX - Would like a better way for initializing curr_clocksource */
34extern struct clocksource clocksource_jiffies; 35extern struct clocksource clocksource_jiffies;
@@ -109,6 +110,13 @@ static void clocksource_watchdog(unsigned long data)
109 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 110 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
110 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 111 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
111 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 112 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
113 /*
114 * We just marked the clocksource as
115 * highres-capable, notify the rest of the
116 * system as well so that we transition
117 * into high-res mode:
118 */
119 tick_clock_notify();
112 } 120 }
113 cs->flags |= CLOCK_SOURCE_WATCHDOG; 121 cs->flags |= CLOCK_SOURCE_WATCHDOG;
114 cs->wd_last = csnow; 122 cs->wd_last = csnow;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0ee4968ff791..8314ecb32d33 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -29,7 +29,7 @@
29 29
30struct tick_device tick_broadcast_device; 30struct tick_device tick_broadcast_device;
31static cpumask_t tick_broadcast_mask; 31static cpumask_t tick_broadcast_mask;
32DEFINE_SPINLOCK(tick_broadcast_lock); 32static DEFINE_SPINLOCK(tick_broadcast_lock);
33 33
34/* 34/*
35 * Start the device in periodic mode 35 * Start the device in periodic mode
@@ -215,6 +215,8 @@ static void tick_do_broadcast_on_off(void *why)
215 else { 215 else {
216 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 216 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
217 tick_broadcast_start_periodic(bc); 217 tick_broadcast_start_periodic(bc);
218 else
219 tick_broadcast_setup_oneshot(bc);
218 } 220 }
219out: 221out:
220 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 222 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -268,3 +270,190 @@ void tick_shutdown_broadcast(unsigned int *cpup)
268 270
269 spin_unlock_irqrestore(&tick_broadcast_lock, flags); 271 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
270} 272}
273
274#ifdef CONFIG_TICK_ONESHOT
275
276static cpumask_t tick_broadcast_oneshot_mask;
277
278static int tick_broadcast_set_event(ktime_t expires, int force)
279{
280 struct clock_event_device *bc = tick_broadcast_device.evtdev;
281 ktime_t now = ktime_get();
282 int res;
283
284 for(;;) {
285 res = clockevents_program_event(bc, expires, now);
286 if (!res || !force)
287 return res;
288 now = ktime_get();
289 expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
290 }
291}
292
293/*
294 * Reprogram the broadcast device:
295 *
296 * Called with tick_broadcast_lock held and interrupts disabled.
297 */
298static int tick_broadcast_reprogram(void)
299{
300 ktime_t expires = { .tv64 = KTIME_MAX };
301 struct tick_device *td;
302 int cpu;
303
304 /*
305 * Find the event which expires next:
306 */
307 for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
308 cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
309 td = &per_cpu(tick_cpu_device, cpu);
310 if (td->evtdev->next_event.tv64 < expires.tv64)
311 expires = td->evtdev->next_event;
312 }
313
314 if (expires.tv64 == KTIME_MAX)
315 return 0;
316
317 return tick_broadcast_set_event(expires, 0);
318}
319
320/*
321 * Handle oneshot mode broadcasting
322 */
323static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
324{
325 struct tick_device *td;
326 cpumask_t mask;
327 ktime_t now;
328 int cpu;
329
330 spin_lock(&tick_broadcast_lock);
331again:
332 dev->next_event.tv64 = KTIME_MAX;
333 mask = CPU_MASK_NONE;
334 now = ktime_get();
335 /* Find all expired events */
336 for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
337 cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
338 td = &per_cpu(tick_cpu_device, cpu);
339 if (td->evtdev->next_event.tv64 <= now.tv64)
340 cpu_set(cpu, mask);
341 }
342
343 /*
344 * Wakeup the cpus which have an expired event. The broadcast
345 * device is reprogrammed in the return from idle code.
346 */
347 if (!tick_do_broadcast(mask)) {
348 /*
349 * The global event did not expire any CPU local
350 * events. This happens in dyntick mode, as the
351 * maximum PIT delta is quite small.
352 */
353 if (tick_broadcast_reprogram())
354 goto again;
355 }
356 spin_unlock(&tick_broadcast_lock);
357}
358
359/*
360 * Powerstate information: The system enters/leaves a state, where
361 * affected devices might stop
362 */
363void tick_broadcast_oneshot_control(unsigned long reason)
364{
365 struct clock_event_device *bc, *dev;
366 struct tick_device *td;
367 unsigned long flags;
368 int cpu;
369
370 spin_lock_irqsave(&tick_broadcast_lock, flags);
371
372 /*
373 * Periodic mode does not care about the enter/exit of power
374 * states
375 */
376 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
377 goto out;
378
379 bc = tick_broadcast_device.evtdev;
380 cpu = smp_processor_id();
381 td = &per_cpu(tick_cpu_device, cpu);
382 dev = td->evtdev;
383
384 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
385 goto out;
386
387 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
388 if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
389 cpu_set(cpu, tick_broadcast_oneshot_mask);
390 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
391 if (dev->next_event.tv64 < bc->next_event.tv64)
392 tick_broadcast_set_event(dev->next_event, 1);
393 }
394 } else {
395 if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
396 cpu_clear(cpu, tick_broadcast_oneshot_mask);
397 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
398 if (dev->next_event.tv64 != KTIME_MAX)
399 tick_program_event(dev->next_event, 1);
400 }
401 }
402
403out:
404 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
405}
406
407/**
408 * tick_broadcast_setup_highres - setup the broadcast device for highres
409 */
410void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
411{
412 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) {
413 bc->event_handler = tick_handle_oneshot_broadcast;
414 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
415 bc->next_event.tv64 = KTIME_MAX;
416 }
417}
418
419/*
420 * Select oneshot operating mode for the broadcast device
421 */
422void tick_broadcast_switch_to_oneshot(void)
423{
424 struct clock_event_device *bc;
425 unsigned long flags;
426
427 spin_lock_irqsave(&tick_broadcast_lock, flags);
428
429 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
430 bc = tick_broadcast_device.evtdev;
431 if (bc)
432 tick_broadcast_setup_oneshot(bc);
433 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
434}
435
436
437/*
438 * Remove a dead CPU from broadcasting
439 */
440void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
441{
442 struct clock_event_device *bc;
443 unsigned long flags;
444 unsigned int cpu = *cpup;
445
446 spin_lock_irqsave(&tick_broadcast_lock, flags);
447
448 bc = tick_broadcast_device.evtdev;
449 cpu_clear(cpu, tick_broadcast_oneshot_mask);
450
451 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) {
452 if (bc && cpus_empty(tick_broadcast_oneshot_mask))
453 clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
454 }
455
456 spin_unlock_irqrestore(&tick_broadcast_lock, flags);
457}
458
459#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 48167a6ae55c..c35d449be031 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,6 +34,16 @@ ktime_t tick_period;
34static int tick_do_timer_cpu = -1; 34static int tick_do_timer_cpu = -1;
35DEFINE_SPINLOCK(tick_device_lock); 35DEFINE_SPINLOCK(tick_device_lock);
36 36
37/**
38 * tick_is_oneshot_available - check for a oneshot capable event device
39 */
40int tick_is_oneshot_available(void)
41{
42 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
43
44 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
45}
46
37/* 47/*
38 * Periodic tick 48 * Periodic tick
39 */ 49 */
@@ -162,6 +172,8 @@ static void tick_setup_device(struct tick_device *td,
162 172
163 if (td->mode == TICKDEV_MODE_PERIODIC) 173 if (td->mode == TICKDEV_MODE_PERIODIC)
164 tick_setup_periodic(newdev, 0); 174 tick_setup_periodic(newdev, 0);
175 else
176 tick_setup_oneshot(newdev, handler, next_event);
165} 177}
166 178
167/* 179/*
@@ -209,6 +221,12 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 */ 221 */
210 if (curdev) { 222 if (curdev) {
211 /* 223 /*
224 * Prefer one shot capable devices !
225 */
226 if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
227 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
228 goto out_bc;
229 /*
212 * Check the rating 230 * Check the rating
213 */ 231 */
214 if (curdev->rating >= newdev->rating) 232 if (curdev->rating >= newdev->rating)
@@ -226,6 +244,8 @@ static int tick_check_new_device(struct clock_event_device *newdev)
226 } 244 }
227 clockevents_exchange_device(curdev, newdev); 245 clockevents_exchange_device(curdev, newdev);
228 tick_setup_device(td, newdev, cpu, cpumask); 246 tick_setup_device(td, newdev, cpu, cpumask);
247 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
248 tick_oneshot_notify();
229 249
230 spin_unlock_irqrestore(&tick_device_lock, flags); 250 spin_unlock_irqrestore(&tick_device_lock, flags);
231 return NOTIFY_STOP; 251 return NOTIFY_STOP;
@@ -285,7 +305,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
285 tick_broadcast_on_off(reason, dev); 305 tick_broadcast_on_off(reason, dev);
286 break; 306 break;
287 307
308 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
309 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
310 tick_broadcast_oneshot_control(reason);
311 break;
312
288 case CLOCK_EVT_NOTIFY_CPU_DEAD: 313 case CLOCK_EVT_NOTIFY_CPU_DEAD:
314 tick_shutdown_broadcast_oneshot(dev);
289 tick_shutdown_broadcast(dev); 315 tick_shutdown_broadcast(dev);
290 tick_shutdown(dev); 316 tick_shutdown(dev);
291 break; 317 break;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 9272f446b21c..54861a0f29ff 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -10,12 +10,57 @@ extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
10extern void tick_handle_periodic(struct clock_event_device *dev); 10extern void tick_handle_periodic(struct clock_event_device *dev);
11 11
12/* 12/*
13 * NO_HZ / high resolution timer shared code
14 */
15#ifdef CONFIG_TICK_ONESHOT
16extern void tick_setup_oneshot(struct clock_event_device *newdev,
17 void (*handler)(struct clock_event_device *),
18 ktime_t nextevt);
19extern int tick_program_event(ktime_t expires, int force);
20extern void tick_oneshot_notify(void);
21extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
22
23# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
24extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
25extern void tick_broadcast_oneshot_control(unsigned long reason);
26extern void tick_broadcast_switch_to_oneshot(void);
27extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
28# else /* BROADCAST */
29static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
30{
31 BUG();
32}
33static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
34static inline void tick_broadcast_switch_to_oneshot(void) { }
35static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
36# endif /* !BROADCAST */
37
38#else /* !ONESHOT */
39static inline
40void tick_setup_oneshot(struct clock_event_device *newdev,
41 void (*handler)(struct clock_event_device *),
42 ktime_t nextevt)
43{
44 BUG();
45}
46static inline int tick_program_event(ktime_t expires, int force)
47{
48 return 0;
49}
50static inline void tick_oneshot_notify(void) { }
51static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
52{
53 BUG();
54}
55static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
56static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
57#endif /* !TICK_ONESHOT */
58
59/*
13 * Broadcasting support 60 * Broadcasting support
14 */ 61 */
15#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 62#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
16extern int tick_do_broadcast(cpumask_t mask); 63extern int tick_do_broadcast(cpumask_t mask);
17extern struct tick_device tick_broadcast_device;
18extern spinlock_t tick_broadcast_lock;
19 64
20extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); 65extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
21extern int tick_check_broadcast_device(struct clock_event_device *dev); 66extern int tick_check_broadcast_device(struct clock_event_device *dev);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
new file mode 100644
index 000000000000..2e8b7ff863cc
--- /dev/null
+++ b/kernel/time/tick-oneshot.c
@@ -0,0 +1,84 @@
1/*
2 * linux/kernel/time/tick-oneshot.c
3 *
4 * This file contains functions which manage high resolution tick
5 * related events.
6 *
7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10 *
11 * This code is licenced under the GPL version 2. For details see
12 * kernel-base/COPYING.
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/irq.h>
18#include <linux/percpu.h>
19#include <linux/profile.h>
20#include <linux/sched.h>
21#include <linux/tick.h>
22
23#include "tick-internal.h"
24
25/**
26 * tick_program_event
27 */
28int tick_program_event(ktime_t expires, int force)
29{
30 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
31 ktime_t now = ktime_get();
32
33 while (1) {
34 int ret = clockevents_program_event(dev, expires, now);
35
36 if (!ret || !force)
37 return ret;
38 now = ktime_get();
39 expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
40 }
41}
42
43/**
44 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
45 */
46void tick_setup_oneshot(struct clock_event_device *newdev,
47 void (*handler)(struct clock_event_device *),
48 ktime_t next_event)
49{
50 newdev->event_handler = handler;
51 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
52 clockevents_program_event(newdev, next_event, ktime_get());
53}
54
55/**
56 * tick_switch_to_oneshot - switch to oneshot mode
57 */
58int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
59{
60 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
61 struct clock_event_device *dev = td->evtdev;
62
63 if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
64 !tick_device_is_functional(dev))
65 return -EINVAL;
66
67 td->mode = TICKDEV_MODE_ONESHOT;
68 dev->event_handler = handler;
69 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
70 tick_broadcast_switch_to_oneshot();
71 return 0;
72}
73
74#ifdef CONFIG_HIGH_RES_TIMERS
75/**
76 * tick_init_highres - switch to high resolution mode
77 *
78 * Called with interrupts disabled.
79 */
80int tick_init_highres(void)
81{
82 return tick_switch_to_oneshot(hrtimer_interrupt);
83}
84#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
new file mode 100644
index 000000000000..99d35e2af182
--- /dev/null
+++ b/kernel/time/tick-sched.c
@@ -0,0 +1,558 @@
1/*
2 * linux/kernel/time/tick-sched.c
3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 *
8 * No idle tick implementation for low and high resolution timers
9 *
10 * Started by: Thomas Gleixner and Ingo Molnar
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/kernel_stat.h>
19#include <linux/percpu.h>
20#include <linux/profile.h>
21#include <linux/sched.h>
22#include <linux/tick.h>
23
24#include "tick-internal.h"
25
26/*
27 * Per cpu nohz control structure
28 */
29static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
30
31/*
32 * The time, when the last jiffy update happened. Protected by xtime_lock.
33 */
34static ktime_t last_jiffies_update;
35
36/*
37 * Must be called with interrupts disabled !
38 */
39static void tick_do_update_jiffies64(ktime_t now)
40{
41 unsigned long ticks = 0;
42 ktime_t delta;
43
44 /* Reevalute with xtime_lock held */
45 write_seqlock(&xtime_lock);
46
47 delta = ktime_sub(now, last_jiffies_update);
48 if (delta.tv64 >= tick_period.tv64) {
49
50 delta = ktime_sub(delta, tick_period);
51 last_jiffies_update = ktime_add(last_jiffies_update,
52 tick_period);
53
54 /* Slow path for long timeouts */
55 if (unlikely(delta.tv64 >= tick_period.tv64)) {
56 s64 incr = ktime_to_ns(tick_period);
57
58 ticks = ktime_divns(delta, incr);
59
60 last_jiffies_update = ktime_add_ns(last_jiffies_update,
61 incr * ticks);
62 }
63 do_timer(++ticks);
64 }
65 write_sequnlock(&xtime_lock);
66}
67
68/*
69 * Initialize and return retrieve the jiffies update.
70 */
71static ktime_t tick_init_jiffy_update(void)
72{
73 ktime_t period;
74
75 write_seqlock(&xtime_lock);
76 /* Did we start the jiffies update yet ? */
77 if (last_jiffies_update.tv64 == 0)
78 last_jiffies_update = tick_next_period;
79 period = last_jiffies_update;
80 write_sequnlock(&xtime_lock);
81 return period;
82}
83
84/*
85 * NOHZ - aka dynamic tick functionality
86 */
87#ifdef CONFIG_NO_HZ
88/*
89 * NO HZ enabled ?
90 */
91static int tick_nohz_enabled __read_mostly = 1;
92
93/*
94 * Enable / Disable tickless mode
95 */
96static int __init setup_tick_nohz(char *str)
97{
98 if (!strcmp(str, "off"))
99 tick_nohz_enabled = 0;
100 else if (!strcmp(str, "on"))
101 tick_nohz_enabled = 1;
102 else
103 return 0;
104 return 1;
105}
106
107__setup("nohz=", setup_tick_nohz);
108
109/**
110 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
111 *
112 * Called from interrupt entry when the CPU was idle
113 *
114 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
115 * must be updated. Otherwise an interrupt handler could use a stale jiffy
116 * value. We do this unconditionally on any cpu, as we don't know whether the
117 * cpu, which has the update task assigned is in a long sleep.
118 */
119void tick_nohz_update_jiffies(void)
120{
121 int cpu = smp_processor_id();
122 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
123 unsigned long flags;
124 ktime_t now;
125
126 if (!ts->tick_stopped)
127 return;
128
129 cpu_clear(cpu, nohz_cpu_mask);
130 now = ktime_get();
131
132 local_irq_save(flags);
133 tick_do_update_jiffies64(now);
134 local_irq_restore(flags);
135}
136
137/**
138 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
139 *
140 * When the next event is more than a tick into the future, stop the idle tick
141 * Called either from the idle loop or from irq_exit() when an idle period was
142 * just interrupted by an interrupt which did not cause a reschedule.
143 */
144void tick_nohz_stop_sched_tick(void)
145{
146 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
147 struct tick_sched *ts;
148 ktime_t last_update, expires, now, delta;
149 int cpu;
150
151 local_irq_save(flags);
152
153 cpu = smp_processor_id();
154 ts = &per_cpu(tick_cpu_sched, cpu);
155
156 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
157 goto end;
158
159 if (need_resched())
160 goto end;
161
162 cpu = smp_processor_id();
163 BUG_ON(local_softirq_pending());
164
165 now = ktime_get();
166 /*
167 * When called from irq_exit we need to account the idle sleep time
168 * correctly.
169 */
170 if (ts->tick_stopped) {
171 delta = ktime_sub(now, ts->idle_entrytime);
172 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
173 }
174
175 ts->idle_entrytime = now;
176 ts->idle_calls++;
177
178 /* Read jiffies and the time when jiffies were updated last */
179 do {
180 seq = read_seqbegin(&xtime_lock);
181 last_update = last_jiffies_update;
182 last_jiffies = jiffies;
183 } while (read_seqretry(&xtime_lock, seq));
184
185 /* Get the next timer wheel timer */
186 next_jiffies = get_next_timer_interrupt(last_jiffies);
187 delta_jiffies = next_jiffies - last_jiffies;
188
189 /*
190 * Do not stop the tick, if we are only one off
191 * or if the cpu is required for rcu
192 */
193 if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu)))
194 goto out;
195
196 /* Schedule the tick, if we are at least one jiffie off */
197 if ((long)delta_jiffies >= 1) {
198
199 if (rcu_needs_cpu(cpu))
200 delta_jiffies = 1;
201 else
202 cpu_set(cpu, nohz_cpu_mask);
203 /*
204 * nohz_stop_sched_tick can be called several times before
205 * the nohz_restart_sched_tick is called. This happens when
206 * interrupts arrive which do not cause a reschedule. In the
207 * first call we save the current tick time, so we can restart
208 * the scheduler tick in nohz_restart_sched_tick.
209 */
210 if (!ts->tick_stopped) {
211 ts->idle_tick = ts->sched_timer.expires;
212 ts->tick_stopped = 1;
213 ts->idle_jiffies = last_jiffies;
214 }
215 /*
216 * calculate the expiry time for the next timer wheel
217 * timer
218 */
219 expires = ktime_add_ns(last_update, tick_period.tv64 *
220 delta_jiffies);
221 ts->idle_expires = expires;
222 ts->idle_sleeps++;
223
224 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
225 hrtimer_start(&ts->sched_timer, expires,
226 HRTIMER_MODE_ABS);
227 /* Check, if the timer was already in the past */
228 if (hrtimer_active(&ts->sched_timer))
229 goto out;
230 } else if(!tick_program_event(expires, 0))
231 goto out;
232 /*
233 * We are past the event already. So we crossed a
234 * jiffie boundary. Update jiffies and raise the
235 * softirq.
236 */
237 tick_do_update_jiffies64(ktime_get());
238 cpu_clear(cpu, nohz_cpu_mask);
239 }
240 raise_softirq_irqoff(TIMER_SOFTIRQ);
241out:
242 ts->next_jiffies = next_jiffies;
243 ts->last_jiffies = last_jiffies;
244end:
245 local_irq_restore(flags);
246}
247
248/**
249 * nohz_restart_sched_tick - restart the idle tick from the idle task
250 *
251 * Restart the idle tick when the CPU is woken up from idle
252 */
253void tick_nohz_restart_sched_tick(void)
254{
255 int cpu = smp_processor_id();
256 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
257 unsigned long ticks;
258 ktime_t now, delta;
259
260 if (!ts->tick_stopped)
261 return;
262
263 /* Update jiffies first */
264 now = ktime_get();
265
266 local_irq_disable();
267 tick_do_update_jiffies64(now);
268 cpu_clear(cpu, nohz_cpu_mask);
269
270 /* Account the idle time */
271 delta = ktime_sub(now, ts->idle_entrytime);
272 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
273
274 /*
275 * We stopped the tick in idle. Update process times would miss the
276 * time we slept as update_process_times does only a 1 tick
277 * accounting. Enforce that this is accounted to idle !
278 */
279 ticks = jiffies - ts->idle_jiffies;
280 /*
281 * We might be one off. Do not randomly account a huge number of ticks!
282 */
283 if (ticks && ticks < LONG_MAX) {
284 add_preempt_count(HARDIRQ_OFFSET);
285 account_system_time(current, HARDIRQ_OFFSET,
286 jiffies_to_cputime(ticks));
287 sub_preempt_count(HARDIRQ_OFFSET);
288 }
289
290 /*
291 * Cancel the scheduled timer and restore the tick
292 */
293 ts->tick_stopped = 0;
294 hrtimer_cancel(&ts->sched_timer);
295 ts->sched_timer.expires = ts->idle_tick;
296
297 while (1) {
298 /* Forward the time to expire in the future */
299 hrtimer_forward(&ts->sched_timer, now, tick_period);
300
301 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
302 hrtimer_start(&ts->sched_timer,
303 ts->sched_timer.expires,
304 HRTIMER_MODE_ABS);
305 /* Check, if the timer was already in the past */
306 if (hrtimer_active(&ts->sched_timer))
307 break;
308 } else {
309 if (!tick_program_event(ts->sched_timer.expires, 0))
310 break;
311 }
312 /* Update jiffies and reread time */
313 tick_do_update_jiffies64(now);
314 now = ktime_get();
315 }
316 local_irq_enable();
317}
318
319static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
320{
321 hrtimer_forward(&ts->sched_timer, now, tick_period);
322 return tick_program_event(ts->sched_timer.expires, 0);
323}
324
325/*
326 * The nohz low res interrupt handler
327 */
328static void tick_nohz_handler(struct clock_event_device *dev)
329{
330 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
331 struct pt_regs *regs = get_irq_regs();
332 ktime_t now = ktime_get();
333
334 dev->next_event.tv64 = KTIME_MAX;
335
336 /* Check, if the jiffies need an update */
337 tick_do_update_jiffies64(now);
338
339 /*
340 * When we are idle and the tick is stopped, we have to touch
341 * the watchdog as we might not schedule for a really long
342 * time. This happens on complete idle SMP systems while
343 * waiting on the login prompt. We also increment the "start
344 * of idle" jiffy stamp so the idle accounting adjustment we
345 * do when we go busy again does not account too much ticks.
346 */
347 if (ts->tick_stopped) {
348 touch_softlockup_watchdog();
349 ts->idle_jiffies++;
350 }
351
352 update_process_times(user_mode(regs));
353 profile_tick(CPU_PROFILING);
354
355 /* Do not restart, when we are in the idle loop */
356 if (ts->tick_stopped)
357 return;
358
359 while (tick_nohz_reprogram(ts, now)) {
360 now = ktime_get();
361 tick_do_update_jiffies64(now);
362 }
363}
364
365/**
366 * tick_nohz_switch_to_nohz - switch to nohz mode
367 */
368static void tick_nohz_switch_to_nohz(void)
369{
370 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
371 ktime_t next;
372
373 if (!tick_nohz_enabled)
374 return;
375
376 local_irq_disable();
377 if (tick_switch_to_oneshot(tick_nohz_handler)) {
378 local_irq_enable();
379 return;
380 }
381
382 ts->nohz_mode = NOHZ_MODE_LOWRES;
383
384 /*
385 * Recycle the hrtimer in ts, so we can share the
386 * hrtimer_forward with the highres code.
387 */
388 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
389 /* Get the next period */
390 next = tick_init_jiffy_update();
391
392 for (;;) {
393 ts->sched_timer.expires = next;
394 if (!tick_program_event(next, 0))
395 break;
396 next = ktime_add(next, tick_period);
397 }
398 local_irq_enable();
399
400 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n",
401 smp_processor_id());
402}
403
404#else
405
406static inline void tick_nohz_switch_to_nohz(void) { }
407
408#endif /* NO_HZ */
409
410/*
411 * High resolution timer specific code
412 */
413#ifdef CONFIG_HIGH_RES_TIMERS
414/*
415 * We rearm the timer until we get disabled by the idle code
416 * Called with interrupts disabled and timer->base->cpu_base->lock held.
417 */
418static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
419{
420 struct tick_sched *ts =
421 container_of(timer, struct tick_sched, sched_timer);
422 struct hrtimer_cpu_base *base = timer->base->cpu_base;
423 struct pt_regs *regs = get_irq_regs();
424 ktime_t now = ktime_get();
425
426 /* Check, if the jiffies need an update */
427 tick_do_update_jiffies64(now);
428
429 /*
430 * Do not call, when we are not in irq context and have
431 * no valid regs pointer
432 */
433 if (regs) {
434 /*
435 * When we are idle and the tick is stopped, we have to touch
436 * the watchdog as we might not schedule for a really long
437 * time. This happens on complete idle SMP systems while
438 * waiting on the login prompt. We also increment the "start of
439 * idle" jiffy stamp so the idle accounting adjustment we do
440 * when we go busy again does not account too much ticks.
441 */
442 if (ts->tick_stopped) {
443 touch_softlockup_watchdog();
444 ts->idle_jiffies++;
445 }
446 /*
447 * update_process_times() might take tasklist_lock, hence
448 * drop the base lock. sched-tick hrtimers are per-CPU and
449 * never accessible by userspace APIs, so this is safe to do.
450 */
451 spin_unlock(&base->lock);
452 update_process_times(user_mode(regs));
453 profile_tick(CPU_PROFILING);
454 spin_lock(&base->lock);
455 }
456
457 /* Do not restart, when we are in the idle loop */
458 if (ts->tick_stopped)
459 return HRTIMER_NORESTART;
460
461 hrtimer_forward(timer, now, tick_period);
462
463 return HRTIMER_RESTART;
464}
465
466/**
467 * tick_setup_sched_timer - setup the tick emulation timer
468 */
469void tick_setup_sched_timer(void)
470{
471 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
472 ktime_t now = ktime_get();
473
474 /*
475 * Emulate tick processing via per-CPU hrtimers:
476 */
477 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
478 ts->sched_timer.function = tick_sched_timer;
479 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
480
481 /* Get the next period */
482 ts->sched_timer.expires = tick_init_jiffy_update();
483
484 for (;;) {
485 hrtimer_forward(&ts->sched_timer, now, tick_period);
486 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
487 HRTIMER_MODE_ABS);
488 /* Check, if the timer was already in the past */
489 if (hrtimer_active(&ts->sched_timer))
490 break;
491 now = ktime_get();
492 }
493
494#ifdef CONFIG_NO_HZ
495 if (tick_nohz_enabled)
496 ts->nohz_mode = NOHZ_MODE_HIGHRES;
497#endif
498}
499
500void tick_cancel_sched_timer(int cpu)
501{
502 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
503
504 if (ts->sched_timer.base)
505 hrtimer_cancel(&ts->sched_timer);
506 ts->tick_stopped = 0;
507 ts->nohz_mode = NOHZ_MODE_INACTIVE;
508}
509#endif /* HIGH_RES_TIMERS */
510
511/**
512 * Async notification about clocksource changes
513 */
514void tick_clock_notify(void)
515{
516 int cpu;
517
518 for_each_possible_cpu(cpu)
519 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
520}
521
522/*
523 * Async notification about clock event changes
524 */
525void tick_oneshot_notify(void)
526{
527 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
528
529 set_bit(0, &ts->check_clocks);
530}
531
532/**
533 * Check, if a change happened, which makes oneshot possible.
534 *
535 * Called cyclic from the hrtimer softirq (driven by the timer
536 * softirq) allow_nohz signals, that we can switch into low-res nohz
537 * mode, because high resolution timers are disabled (either compile
538 * or runtime).
539 */
540int tick_check_oneshot_change(int allow_nohz)
541{
542 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
543
544 if (!test_and_clear_bit(0, &ts->check_clocks))
545 return 0;
546
547 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
548 return 0;
549
550 if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
551 return 0;
552
553 if (!allow_nohz)
554 return 1;
555
556 tick_nohz_switch_to_nohz();
557 return 0;
558}
diff --git a/kernel/timer.c b/kernel/timer.c
index 7d522bdf8265..f058e6cfd50c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,7 +34,7 @@
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/clockchips.h> 37#include <linux/tick.h>
38 38
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/unistd.h> 40#include <asm/unistd.h>
@@ -874,6 +874,8 @@ static void change_clocksource(void)
874 clock->xtime_nsec = 0; 874 clock->xtime_nsec = 0;
875 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 875 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
876 876
877 tick_clock_notify();
878
877 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 879 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
878 clock->name); 880 clock->name);
879} 881}
@@ -937,7 +939,6 @@ void __init timekeeping_init(void)
937 write_sequnlock_irqrestore(&xtime_lock, flags); 939 write_sequnlock_irqrestore(&xtime_lock, flags);
938} 940}
939 941
940
941/* flag for if timekeeping is suspended */ 942/* flag for if timekeeping is suspended */
942static int timekeeping_suspended; 943static int timekeeping_suspended;
943/* time in seconds when suspend began */ 944/* time in seconds when suspend began */