aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPreeti U Murthy <preeti@linux.vnet.ibm.com>2014-02-07 03:06:32 -0500
committerThomas Gleixner <tglx@linutronix.de>2014-02-07 09:34:29 -0500
commit5d1638acb9f62fa7eb0c07cb85318bbe1f13b227 (patch)
treee6f6bf80186f74b021b1a236bd583b8227e5b7e1
parentba8f20c2eb4158a443e9d6a909aee5010efa0c69 (diff)
tick: Introduce hrtimer based broadcast
On some architectures, in certain CPU deep idle states the local timers stop. An external clock device is used to wakeup these CPUs. The kernel support for the wakeup of these CPUs is provided by the tick broadcast framework by using the external clock device as the wakeup source. However not all implementations of architectures provide such an external clock device. This patch includes support in the broadcast framework to handle the wakeup of the CPUs in deep idle states on such systems by queuing a hrtimer on one of the CPUs, which is meant to handle the wakeup of CPUs in deep idle states. This patchset introduces a pseudo clock device which can be registered by the archs as tick_broadcast_device in the absence of a real external clock device. Once registered, the broadcast framework will work as is for these architectures as long as the archs take care of the BROADCAST_ENTER notification failing for one of the CPUs. This CPU is made the stand by CPU to handle wakeup of the CPUs in deep idle and it *must not enter deep idle states*. The CPU with the earliest wakeup is chosen to be this CPU. Hence this way the stand by CPU dynamically moves around and so does the hrtimer which is queued to trigger at the next earliest wakeup time. This is consistent with the case where an external clock device is present. The smp affinity of this clock device is set to the CPU with the earliest wakeup. This patchset handles the hotplug of the stand by CPU as well by moving the hrtimer on to the CPU handling the CPU_DEAD notification. Originally-from: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080632.17187.80532.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c106
-rw-r--r--kernel/time/tick-broadcast.c54
4 files changed, 167 insertions, 4 deletions
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index e0c5a6c418de..dbe9e1457168 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -62,6 +62,11 @@ enum clock_event_mode {
62#define CLOCK_EVT_FEAT_DYNIRQ 0x000020 62#define CLOCK_EVT_FEAT_DYNIRQ 0x000020
63#define CLOCK_EVT_FEAT_PERCPU 0x000040 63#define CLOCK_EVT_FEAT_PERCPU 0x000040
64 64
65/*
66 * Clockevent device is based on a hrtimer for broadcast
67 */
68#define CLOCK_EVT_FEAT_HRTIMER 0x000080
69
65/** 70/**
66 * struct clock_event_device - clock event device descriptor 71 * struct clock_event_device - clock event device descriptor
67 * @event_handler: Assigned by the framework to be called by the low 72 * @event_handler: Assigned by the framework to be called by the low
@@ -83,6 +88,7 @@ enum clock_event_mode {
83 * @name: ptr to clock event name 88 * @name: ptr to clock event name
84 * @rating: variable to rate clock event devices 89 * @rating: variable to rate clock event devices
85 * @irq: IRQ number (only for non CPU local devices) 90 * @irq: IRQ number (only for non CPU local devices)
91 * @bound_on: Bound on CPU
86 * @cpumask: cpumask to indicate for which CPUs this device works 92 * @cpumask: cpumask to indicate for which CPUs this device works
87 * @list: list head for the management code 93 * @list: list head for the management code
88 * @owner: module reference 94 * @owner: module reference
@@ -113,6 +119,7 @@ struct clock_event_device {
113 const char *name; 119 const char *name;
114 int rating; 120 int rating;
115 int irq; 121 int irq;
122 int bound_on;
116 const struct cpumask *cpumask; 123 const struct cpumask *cpumask;
117 struct list_head list; 124 struct list_head list;
118 struct module *owner; 125 struct module *owner;
@@ -180,9 +187,11 @@ extern int tick_receive_broadcast(void);
180#endif 187#endif
181 188
182#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) 189#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
190extern void tick_setup_hrtimer_broadcast(void);
183extern int tick_check_broadcast_expired(void); 191extern int tick_check_broadcast_expired(void);
184#else 192#else
185static inline int tick_check_broadcast_expired(void) { return 0; } 193static inline int tick_check_broadcast_expired(void) { return 0; }
194static void tick_setup_hrtimer_broadcast(void) {};
186#endif 195#endif
187 196
188#ifdef CONFIG_GENERIC_CLOCKEVENTS 197#ifdef CONFIG_GENERIC_CLOCKEVENTS
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 9250130646f5..06151ef4a744 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -3,7 +3,7 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
3 3
4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
6obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o 6obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o
7obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o 7obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
8obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o 8obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
9obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o 9obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
new file mode 100644
index 000000000000..92425279312b
--- /dev/null
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -0,0 +1,106 @@
1/*
2 * linux/kernel/time/tick-broadcast-hrtimer.c
3 * This file emulates a local clock event device
4 * via a pseudo clock device.
5 */
6#include <linux/cpu.h>
7#include <linux/err.h>
8#include <linux/hrtimer.h>
9#include <linux/interrupt.h>
10#include <linux/percpu.h>
11#include <linux/profile.h>
12#include <linux/clockchips.h>
13#include <linux/sched.h>
14#include <linux/smp.h>
15#include <linux/module.h>
16
17#include "tick-internal.h"
18
19static struct hrtimer bctimer;
20
21static void bc_set_mode(enum clock_event_mode mode,
22 struct clock_event_device *bc)
23{
24 switch (mode) {
25 case CLOCK_EVT_MODE_SHUTDOWN:
26 /*
27 * Note, we cannot cancel the timer here as we might
28 * run into the following live lock scenario:
29 *
30 * cpu 0 cpu1
31 * lock(broadcast_lock);
32 * hrtimer_interrupt()
33 * bc_handler()
34 * tick_handle_oneshot_broadcast();
35 * lock(broadcast_lock);
36 * hrtimer_cancel()
37 * wait_for_callback()
38 */
39 hrtimer_try_to_cancel(&bctimer);
40 break;
41 default:
42 break;
43 }
44}
45
46/*
47 * This is called from the guts of the broadcast code when the cpu
48 * which is about to enter idle has the earliest broadcast timer event.
49 */
50static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
51{
52 /*
53 * We try to cancel the timer first. If the callback is on
54 * flight on some other cpu then we let it handle it. If we
55 * were able to cancel the timer nothing can rearm it as we
56 * own broadcast_lock.
57 *
58 * However we can also be called from the event handler of
59 * ce_broadcast_hrtimer itself when it expires. We cannot
60 * restart the timer because we are in the callback, but we
61 * can set the expiry time and let the callback return
62 * HRTIMER_RESTART.
63 */
64 if (hrtimer_try_to_cancel(&bctimer) >= 0) {
65 hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
66 /* Bind the "device" to the cpu */
67 bc->bound_on = smp_processor_id();
68 } else if (bc->bound_on == smp_processor_id()) {
69 hrtimer_set_expires(&bctimer, expires);
70 }
71 return 0;
72}
73
74static struct clock_event_device ce_broadcast_hrtimer = {
75 .set_mode = bc_set_mode,
76 .set_next_ktime = bc_set_next,
77 .features = CLOCK_EVT_FEAT_ONESHOT |
78 CLOCK_EVT_FEAT_KTIME |
79 CLOCK_EVT_FEAT_HRTIMER,
80 .rating = 0,
81 .bound_on = -1,
82 .min_delta_ns = 1,
83 .max_delta_ns = KTIME_MAX,
84 .min_delta_ticks = 1,
85 .max_delta_ticks = KTIME_MAX,
86 .mult = 1,
87 .shift = 0,
88 .cpumask = cpu_all_mask,
89};
90
91static enum hrtimer_restart bc_handler(struct hrtimer *t)
92{
93 ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
94
95 if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
96 return HRTIMER_NORESTART;
97
98 return HRTIMER_RESTART;
99}
100
101void tick_setup_hrtimer_broadcast(void)
102{
103 hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
104 bctimer.function = bc_handler;
105 clockevents_register_device(&ce_broadcast_hrtimer);
106}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 84c8fd91d744..63c7b2d9ed8e 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -643,6 +643,42 @@ again:
643 raw_spin_unlock(&tick_broadcast_lock); 643 raw_spin_unlock(&tick_broadcast_lock);
644} 644}
645 645
646static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
647{
648 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
649 return 0;
650 if (bc->next_event.tv64 == KTIME_MAX)
651 return 0;
652 return bc->bound_on == cpu ? -EBUSY : 0;
653}
654
655static void broadcast_shutdown_local(struct clock_event_device *bc,
656 struct clock_event_device *dev)
657{
658 /*
659 * For hrtimer based broadcasting we cannot shutdown the cpu
660 * local device if our own event is the first one to expire or
661 * if we own the broadcast timer.
662 */
663 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
664 if (broadcast_needs_cpu(bc, smp_processor_id()))
665 return;
666 if (dev->next_event.tv64 < bc->next_event.tv64)
667 return;
668 }
669 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
670}
671
672static void broadcast_move_bc(int deadcpu)
673{
674 struct clock_event_device *bc = tick_broadcast_device.evtdev;
675
676 if (!bc || !broadcast_needs_cpu(bc, deadcpu))
677 return;
678 /* This moves the broadcast assignment to this cpu */
679 clockevents_program_event(bc, bc->next_event, 1);
680}
681
646/* 682/*
647 * Powerstate information: The system enters/leaves a state, where 683 * Powerstate information: The system enters/leaves a state, where
648 * affected devices might stop 684 * affected devices might stop
@@ -661,7 +697,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
661 * states 697 * states
662 */ 698 */
663 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 699 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
664 return; 700 return 0;
665 701
666 /* 702 /*
667 * We are called with preemtion disabled from the depth of the 703 * We are called with preemtion disabled from the depth of the
@@ -672,7 +708,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
672 dev = td->evtdev; 708 dev = td->evtdev;
673 709
674 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 710 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
675 return; 711 return 0;
676 712
677 bc = tick_broadcast_device.evtdev; 713 bc = tick_broadcast_device.evtdev;
678 714
@@ -680,7 +716,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
680 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 716 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
681 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 717 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
682 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 718 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
683 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 719 broadcast_shutdown_local(bc, dev);
684 /* 720 /*
685 * We only reprogram the broadcast timer if we 721 * We only reprogram the broadcast timer if we
686 * did not mark ourself in the force mask and 722 * did not mark ourself in the force mask and
@@ -693,6 +729,16 @@ int tick_broadcast_oneshot_control(unsigned long reason)
693 dev->next_event.tv64 < bc->next_event.tv64) 729 dev->next_event.tv64 < bc->next_event.tv64)
694 tick_broadcast_set_event(bc, cpu, dev->next_event, 1); 730 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
695 } 731 }
732 /*
733 * If the current CPU owns the hrtimer broadcast
734 * mechanism, it cannot go deep idle and we remove the
735 * CPU from the broadcast mask. We don't have to go
736 * through the EXIT path as the local timer is not
737 * shutdown.
738 */
739 ret = broadcast_needs_cpu(bc, cpu);
740 if (ret)
741 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
696 } else { 742 } else {
697 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 743 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
698 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 744 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
@@ -866,6 +912,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
866 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 912 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
867 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 913 cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
868 914
915 broadcast_move_bc(cpu);
916
869 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 917 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
870} 918}
871 919