diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Kconfig | 25 | ||||
-rw-r--r-- | kernel/time/Makefile | 9 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 345 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 246 | ||||
-rw-r--r-- | kernel/time/jiffies.c | 1 | ||||
-rw-r--r-- | kernel/time/ntp.c | 30 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 480 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 346 | ||||
-rw-r--r-- | kernel/time/tick-internal.h | 110 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 84 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 563 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 287 | ||||
-rw-r--r-- | kernel/time/timer_stats.c | 411 |
13 files changed, 2862 insertions, 75 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig new file mode 100644 index 000000000000..f66351126544 --- /dev/null +++ b/kernel/time/Kconfig | |||
@@ -0,0 +1,25 @@ | |||
1 | # | ||
2 | # Timer subsystem related configuration options | ||
3 | # | ||
4 | config TICK_ONESHOT | ||
5 | bool | ||
6 | default n | ||
7 | |||
8 | config NO_HZ | ||
9 | bool "Tickless System (Dynamic Ticks)" | ||
10 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
11 | select TICK_ONESHOT | ||
12 | help | ||
13 | This option enables a tickless system: timer interrupts will | ||
14 | only trigger on an as-needed basis both when the system is | ||
15 | busy and when the system is idle. | ||
16 | |||
17 | config HIGH_RES_TIMERS | ||
18 | bool "High Resolution Timer Support" | ||
19 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
20 | select TICK_ONESHOT | ||
21 | help | ||
22 | This option enables high resolution timer support. If your | ||
23 | hardware is not capable then this option only increases | ||
24 | the size of the kernel image. | ||
25 | |||
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 61a3907d16fb..93bccba1f265 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -1 +1,8 @@ | |||
1 | obj-y += ntp.o clocksource.o jiffies.o | 1 | obj-y += ntp.o clocksource.o jiffies.o timer_list.o |
2 | |||
3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o | ||
4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | ||
5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | ||
6 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | ||
7 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | ||
8 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c new file mode 100644 index 000000000000..67932ea78c17 --- /dev/null +++ b/kernel/time/clockevents.c | |||
@@ -0,0 +1,345 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/clockevents.c | ||
3 | * | ||
4 | * This file contains functions which manage clock event devices. | ||
5 | * | ||
6 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
7 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
8 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
9 | * | ||
10 | * This code is licenced under the GPL version 2. For details see | ||
11 | * kernel-base/COPYING. | ||
12 | */ | ||
13 | |||
14 | #include <linux/clockchips.h> | ||
15 | #include <linux/hrtimer.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/notifier.h> | ||
19 | #include <linux/smp.h> | ||
20 | #include <linux/sysdev.h> | ||
21 | |||
22 | /* The registered clock event devices */ | ||
23 | static LIST_HEAD(clockevent_devices); | ||
24 | static LIST_HEAD(clockevents_released); | ||
25 | |||
26 | /* Notification for clock events */ | ||
27 | static RAW_NOTIFIER_HEAD(clockevents_chain); | ||
28 | |||
29 | /* Protection for the above */ | ||
30 | static DEFINE_SPINLOCK(clockevents_lock); | ||
31 | |||
32 | /** | ||
33 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds | ||
34 | * @latch: value to convert | ||
35 | * @evt: pointer to clock event device descriptor | ||
36 | * | ||
37 | * Math helper, returns latch value converted to nanoseconds (bound checked) | ||
38 | */ | ||
39 | unsigned long clockevent_delta2ns(unsigned long latch, | ||
40 | struct clock_event_device *evt) | ||
41 | { | ||
42 | u64 clc = ((u64) latch << evt->shift); | ||
43 | |||
44 | do_div(clc, evt->mult); | ||
45 | if (clc < 1000) | ||
46 | clc = 1000; | ||
47 | if (clc > LONG_MAX) | ||
48 | clc = LONG_MAX; | ||
49 | |||
50 | return (unsigned long) clc; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * clockevents_set_mode - set the operating mode of a clock event device | ||
55 | * @dev: device to modify | ||
56 | * @mode: new mode | ||
57 | * | ||
58 | * Must be called with interrupts disabled ! | ||
59 | */ | ||
60 | void clockevents_set_mode(struct clock_event_device *dev, | ||
61 | enum clock_event_mode mode) | ||
62 | { | ||
63 | if (dev->mode != mode) { | ||
64 | dev->set_mode(mode, dev); | ||
65 | dev->mode = mode; | ||
66 | } | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * clockevents_program_event - Reprogram the clock event device. | ||
71 | * @expires: absolute expiry time (monotonic clock) | ||
72 | * | ||
73 | * Returns 0 on success, -ETIME when the event is in the past. | ||
74 | */ | ||
75 | int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | ||
76 | ktime_t now) | ||
77 | { | ||
78 | unsigned long long clc; | ||
79 | int64_t delta; | ||
80 | |||
81 | delta = ktime_to_ns(ktime_sub(expires, now)); | ||
82 | |||
83 | if (delta <= 0) | ||
84 | return -ETIME; | ||
85 | |||
86 | dev->next_event = expires; | ||
87 | |||
88 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
89 | return 0; | ||
90 | |||
91 | if (delta > dev->max_delta_ns) | ||
92 | delta = dev->max_delta_ns; | ||
93 | if (delta < dev->min_delta_ns) | ||
94 | delta = dev->min_delta_ns; | ||
95 | |||
96 | clc = delta * dev->mult; | ||
97 | clc >>= dev->shift; | ||
98 | |||
99 | return dev->set_next_event((unsigned long) clc, dev); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * clockevents_register_notifier - register a clock events change listener | ||
104 | */ | ||
105 | int clockevents_register_notifier(struct notifier_block *nb) | ||
106 | { | ||
107 | int ret; | ||
108 | |||
109 | spin_lock(&clockevents_lock); | ||
110 | ret = raw_notifier_chain_register(&clockevents_chain, nb); | ||
111 | spin_unlock(&clockevents_lock); | ||
112 | |||
113 | return ret; | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * clockevents_unregister_notifier - unregister a clock events change listener | ||
118 | */ | ||
119 | void clockevents_unregister_notifier(struct notifier_block *nb) | ||
120 | { | ||
121 | spin_lock(&clockevents_lock); | ||
122 | raw_notifier_chain_unregister(&clockevents_chain, nb); | ||
123 | spin_unlock(&clockevents_lock); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Notify about a clock event change. Called with clockevents_lock | ||
128 | * held. | ||
129 | */ | ||
130 | static void clockevents_do_notify(unsigned long reason, void *dev) | ||
131 | { | ||
132 | raw_notifier_call_chain(&clockevents_chain, reason, dev); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Called after a notify add to make devices availble which were | ||
137 | * released from the notifier call. | ||
138 | */ | ||
139 | static void clockevents_notify_released(void) | ||
140 | { | ||
141 | struct clock_event_device *dev; | ||
142 | |||
143 | while (!list_empty(&clockevents_released)) { | ||
144 | dev = list_entry(clockevents_released.next, | ||
145 | struct clock_event_device, list); | ||
146 | list_del(&dev->list); | ||
147 | list_add(&dev->list, &clockevent_devices); | ||
148 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * clockevents_register_device - register a clock event device | ||
154 | * @dev: device to register | ||
155 | */ | ||
156 | void clockevents_register_device(struct clock_event_device *dev) | ||
157 | { | ||
158 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
159 | |||
160 | spin_lock(&clockevents_lock); | ||
161 | |||
162 | list_add(&dev->list, &clockevent_devices); | ||
163 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
164 | clockevents_notify_released(); | ||
165 | |||
166 | spin_unlock(&clockevents_lock); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Noop handler when we shut down an event device | ||
171 | */ | ||
172 | static void clockevents_handle_noop(struct clock_event_device *dev) | ||
173 | { | ||
174 | } | ||
175 | |||
176 | /** | ||
177 | * clockevents_exchange_device - release and request clock devices | ||
178 | * @old: device to release (can be NULL) | ||
179 | * @new: device to request (can be NULL) | ||
180 | * | ||
181 | * Called from the notifier chain. clockevents_lock is held already | ||
182 | */ | ||
183 | void clockevents_exchange_device(struct clock_event_device *old, | ||
184 | struct clock_event_device *new) | ||
185 | { | ||
186 | unsigned long flags; | ||
187 | |||
188 | local_irq_save(flags); | ||
189 | /* | ||
190 | * Caller releases a clock event device. We queue it into the | ||
191 | * released list and do a notify add later. | ||
192 | */ | ||
193 | if (old) { | ||
194 | old->event_handler = clockevents_handle_noop; | ||
195 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | ||
196 | list_del(&old->list); | ||
197 | list_add(&old->list, &clockevents_released); | ||
198 | } | ||
199 | |||
200 | if (new) { | ||
201 | BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); | ||
202 | clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN); | ||
203 | } | ||
204 | local_irq_restore(flags); | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * clockevents_request_device | ||
209 | */ | ||
210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
211 | cpumask_t cpumask) | ||
212 | { | ||
213 | struct clock_event_device *cur, *dev = NULL; | ||
214 | struct list_head *tmp; | ||
215 | |||
216 | spin_lock(&clockevents_lock); | ||
217 | |||
218 | list_for_each(tmp, &clockevent_devices) { | ||
219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
220 | |||
221 | if ((cur->features & features) == features && | ||
222 | cpus_equal(cpumask, cur->cpumask)) { | ||
223 | if (!dev || dev->rating < cur->rating) | ||
224 | dev = cur; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | clockevents_exchange_device(NULL, dev); | ||
229 | |||
230 | spin_unlock(&clockevents_lock); | ||
231 | |||
232 | return dev; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * clockevents_release_device | ||
237 | */ | ||
238 | void clockevents_release_device(struct clock_event_device *dev) | ||
239 | { | ||
240 | spin_lock(&clockevents_lock); | ||
241 | |||
242 | clockevents_exchange_device(dev, NULL); | ||
243 | clockevents_notify_released(); | ||
244 | |||
245 | spin_unlock(&clockevents_lock); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * clockevents_notify - notification about relevant events | ||
250 | */ | ||
251 | void clockevents_notify(unsigned long reason, void *arg) | ||
252 | { | ||
253 | spin_lock(&clockevents_lock); | ||
254 | clockevents_do_notify(reason, arg); | ||
255 | |||
256 | switch (reason) { | ||
257 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
258 | /* | ||
259 | * Unregister the clock event devices which were | ||
260 | * released from the users in the notify chain. | ||
261 | */ | ||
262 | while (!list_empty(&clockevents_released)) { | ||
263 | struct clock_event_device *dev; | ||
264 | |||
265 | dev = list_entry(clockevents_released.next, | ||
266 | struct clock_event_device, list); | ||
267 | list_del(&dev->list); | ||
268 | } | ||
269 | break; | ||
270 | default: | ||
271 | break; | ||
272 | } | ||
273 | spin_unlock(&clockevents_lock); | ||
274 | } | ||
275 | EXPORT_SYMBOL_GPL(clockevents_notify); | ||
276 | |||
277 | #ifdef CONFIG_SYSFS | ||
278 | |||
279 | /** | ||
280 | * clockevents_show_registered - sysfs interface for listing clockevents | ||
281 | * @dev: unused | ||
282 | * @buf: char buffer to be filled with clock events list | ||
283 | * | ||
284 | * Provides sysfs interface for listing registered clock event devices | ||
285 | */ | ||
286 | static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) | ||
287 | { | ||
288 | struct list_head *tmp; | ||
289 | char *p = buf; | ||
290 | int cpu; | ||
291 | |||
292 | spin_lock(&clockevents_lock); | ||
293 | |||
294 | list_for_each(tmp, &clockevent_devices) { | ||
295 | struct clock_event_device *ce; | ||
296 | |||
297 | ce = list_entry(tmp, struct clock_event_device, list); | ||
298 | p += sprintf(p, "%-20s F:%04x M:%d", ce->name, | ||
299 | ce->features, ce->mode); | ||
300 | p += sprintf(p, " C:"); | ||
301 | if (!cpus_equal(ce->cpumask, cpu_possible_map)) { | ||
302 | for_each_cpu_mask(cpu, ce->cpumask) | ||
303 | p += sprintf(p, " %d", cpu); | ||
304 | } else { | ||
305 | /* | ||
306 | * FIXME: Add the cpu which is handling this sucker | ||
307 | */ | ||
308 | } | ||
309 | p += sprintf(p, "\n"); | ||
310 | } | ||
311 | |||
312 | spin_unlock(&clockevents_lock); | ||
313 | |||
314 | return p - buf; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Sysfs setup bits: | ||
319 | */ | ||
320 | static SYSDEV_ATTR(registered, 0600, | ||
321 | clockevents_show_registered, NULL); | ||
322 | |||
323 | static struct sysdev_class clockevents_sysclass = { | ||
324 | set_kset_name("clockevents"), | ||
325 | }; | ||
326 | |||
327 | static struct sys_device clockevents_sys_device = { | ||
328 | .id = 0, | ||
329 | .cls = &clockevents_sysclass, | ||
330 | }; | ||
331 | |||
332 | static int __init clockevents_sysfs_init(void) | ||
333 | { | ||
334 | int error = sysdev_class_register(&clockevents_sysclass); | ||
335 | |||
336 | if (!error) | ||
337 | error = sysdev_register(&clockevents_sys_device); | ||
338 | if (!error) | ||
339 | error = sysdev_create_file( | ||
340 | &clockevents_sys_device, | ||
341 | &attr_registered); | ||
342 | return error; | ||
343 | } | ||
344 | device_initcall(clockevents_sysfs_init); | ||
345 | #endif | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d9ef176c4e09..193a0793af95 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
32 | #include <linux/tick.h> | ||
32 | 33 | ||
33 | /* XXX - Would like a better way for initializing curr_clocksource */ | 34 | /* XXX - Would like a better way for initializing curr_clocksource */ |
34 | extern struct clocksource clocksource_jiffies; | 35 | extern struct clocksource clocksource_jiffies; |
@@ -48,6 +49,7 @@ extern struct clocksource clocksource_jiffies; | |||
48 | */ | 49 | */ |
49 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | 50 | static struct clocksource *curr_clocksource = &clocksource_jiffies; |
50 | static struct clocksource *next_clocksource; | 51 | static struct clocksource *next_clocksource; |
52 | static struct clocksource *clocksource_override; | ||
51 | static LIST_HEAD(clocksource_list); | 53 | static LIST_HEAD(clocksource_list); |
52 | static DEFINE_SPINLOCK(clocksource_lock); | 54 | static DEFINE_SPINLOCK(clocksource_lock); |
53 | static char override_name[32]; | 55 | static char override_name[32]; |
@@ -62,9 +64,123 @@ static int __init clocksource_done_booting(void) | |||
62 | finished_booting = 1; | 64 | finished_booting = 1; |
63 | return 0; | 65 | return 0; |
64 | } | 66 | } |
65 | |||
66 | late_initcall(clocksource_done_booting); | 67 | late_initcall(clocksource_done_booting); |
67 | 68 | ||
69 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | ||
70 | static LIST_HEAD(watchdog_list); | ||
71 | static struct clocksource *watchdog; | ||
72 | static struct timer_list watchdog_timer; | ||
73 | static DEFINE_SPINLOCK(watchdog_lock); | ||
74 | static cycle_t watchdog_last; | ||
75 | /* | ||
76 | * Interval: 0.5sec Treshold: 0.0625s | ||
77 | */ | ||
78 | #define WATCHDOG_INTERVAL (HZ >> 1) | ||
79 | #define WATCHDOG_TRESHOLD (NSEC_PER_SEC >> 4) | ||
80 | |||
81 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | ||
82 | { | ||
83 | if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD) | ||
84 | return; | ||
85 | |||
86 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | ||
87 | cs->name, delta); | ||
88 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | ||
89 | clocksource_change_rating(cs, 0); | ||
90 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
91 | list_del(&cs->wd_list); | ||
92 | } | ||
93 | |||
94 | static void clocksource_watchdog(unsigned long data) | ||
95 | { | ||
96 | struct clocksource *cs, *tmp; | ||
97 | cycle_t csnow, wdnow; | ||
98 | int64_t wd_nsec, cs_nsec; | ||
99 | |||
100 | spin_lock(&watchdog_lock); | ||
101 | |||
102 | wdnow = watchdog->read(); | ||
103 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | ||
104 | watchdog_last = wdnow; | ||
105 | |||
106 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | ||
107 | csnow = cs->read(); | ||
108 | /* Initialized ? */ | ||
109 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | ||
110 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | ||
111 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | ||
112 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
113 | /* | ||
114 | * We just marked the clocksource as | ||
115 | * highres-capable, notify the rest of the | ||
116 | * system as well so that we transition | ||
117 | * into high-res mode: | ||
118 | */ | ||
119 | tick_clock_notify(); | ||
120 | } | ||
121 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | ||
122 | cs->wd_last = csnow; | ||
123 | } else { | ||
124 | cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); | ||
125 | cs->wd_last = csnow; | ||
126 | /* Check the delta. Might remove from the list ! */ | ||
127 | clocksource_ratewd(cs, cs_nsec - wd_nsec); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | if (!list_empty(&watchdog_list)) { | ||
132 | __mod_timer(&watchdog_timer, | ||
133 | watchdog_timer.expires + WATCHDOG_INTERVAL); | ||
134 | } | ||
135 | spin_unlock(&watchdog_lock); | ||
136 | } | ||
137 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
138 | { | ||
139 | struct clocksource *cse; | ||
140 | unsigned long flags; | ||
141 | |||
142 | spin_lock_irqsave(&watchdog_lock, flags); | ||
143 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | ||
144 | int started = !list_empty(&watchdog_list); | ||
145 | |||
146 | list_add(&cs->wd_list, &watchdog_list); | ||
147 | if (!started && watchdog) { | ||
148 | watchdog_last = watchdog->read(); | ||
149 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
150 | add_timer(&watchdog_timer); | ||
151 | } | ||
152 | } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { | ||
153 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
154 | |||
155 | if (!watchdog || cs->rating > watchdog->rating) { | ||
156 | if (watchdog) | ||
157 | del_timer(&watchdog_timer); | ||
158 | watchdog = cs; | ||
159 | init_timer(&watchdog_timer); | ||
160 | watchdog_timer.function = clocksource_watchdog; | ||
161 | |||
162 | /* Reset watchdog cycles */ | ||
163 | list_for_each_entry(cse, &watchdog_list, wd_list) | ||
164 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
165 | /* Start if list is not empty */ | ||
166 | if (!list_empty(&watchdog_list)) { | ||
167 | watchdog_last = watchdog->read(); | ||
168 | watchdog_timer.expires = | ||
169 | jiffies + WATCHDOG_INTERVAL; | ||
170 | add_timer(&watchdog_timer); | ||
171 | } | ||
172 | } | ||
173 | } | ||
174 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
175 | } | ||
176 | #else | ||
177 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
178 | { | ||
179 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | ||
180 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
181 | } | ||
182 | #endif | ||
183 | |||
68 | /** | 184 | /** |
69 | * clocksource_get_next - Returns the selected clocksource | 185 | * clocksource_get_next - Returns the selected clocksource |
70 | * | 186 | * |
@@ -84,60 +200,54 @@ struct clocksource *clocksource_get_next(void) | |||
84 | } | 200 | } |
85 | 201 | ||
86 | /** | 202 | /** |
87 | * select_clocksource - Finds the best registered clocksource. | 203 | * select_clocksource - Selects the best registered clocksource. |
88 | * | 204 | * |
89 | * Private function. Must hold clocksource_lock when called. | 205 | * Private function. Must hold clocksource_lock when called. |
90 | * | 206 | * |
91 | * Looks through the list of registered clocksources, returning | 207 | * Select the clocksource with the best rating, or the clocksource, |
92 | * the one with the highest rating value. If there is a clocksource | 208 | * which is selected by userspace override. |
93 | * name that matches the override string, it returns that clocksource. | ||
94 | */ | 209 | */ |
95 | static struct clocksource *select_clocksource(void) | 210 | static struct clocksource *select_clocksource(void) |
96 | { | 211 | { |
97 | struct clocksource *best = NULL; | 212 | struct clocksource *next; |
98 | struct list_head *tmp; | ||
99 | 213 | ||
100 | list_for_each(tmp, &clocksource_list) { | 214 | if (list_empty(&clocksource_list)) |
101 | struct clocksource *src; | 215 | return NULL; |
102 | 216 | ||
103 | src = list_entry(tmp, struct clocksource, list); | 217 | if (clocksource_override) |
104 | if (!best) | 218 | next = clocksource_override; |
105 | best = src; | 219 | else |
106 | 220 | next = list_entry(clocksource_list.next, struct clocksource, | |
107 | /* check for override: */ | 221 | list); |
108 | if (strlen(src->name) == strlen(override_name) && | 222 | |
109 | !strcmp(src->name, override_name)) { | 223 | if (next == curr_clocksource) |
110 | best = src; | 224 | return NULL; |
111 | break; | ||
112 | } | ||
113 | /* pick the highest rating: */ | ||
114 | if (src->rating > best->rating) | ||
115 | best = src; | ||
116 | } | ||
117 | 225 | ||
118 | return best; | 226 | return next; |
119 | } | 227 | } |
120 | 228 | ||
121 | /** | 229 | /* |
122 | * is_registered_source - Checks if clocksource is registered | 230 | * Enqueue the clocksource sorted by rating |
123 | * @c: pointer to a clocksource | ||
124 | * | ||
125 | * Private helper function. Must hold clocksource_lock when called. | ||
126 | * | ||
127 | * Returns one if the clocksource is already registered, zero otherwise. | ||
128 | */ | 231 | */ |
129 | static int is_registered_source(struct clocksource *c) | 232 | static int clocksource_enqueue(struct clocksource *c) |
130 | { | 233 | { |
131 | int len = strlen(c->name); | 234 | struct list_head *tmp, *entry = &clocksource_list; |
132 | struct list_head *tmp; | ||
133 | 235 | ||
134 | list_for_each(tmp, &clocksource_list) { | 236 | list_for_each(tmp, &clocksource_list) { |
135 | struct clocksource *src; | 237 | struct clocksource *cs; |
136 | 238 | ||
137 | src = list_entry(tmp, struct clocksource, list); | 239 | cs = list_entry(tmp, struct clocksource, list); |
138 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | 240 | if (cs == c) |
139 | return 1; | 241 | return -EBUSY; |
242 | /* Keep track of the place, where to insert */ | ||
243 | if (cs->rating >= c->rating) | ||
244 | entry = tmp; | ||
140 | } | 245 | } |
246 | list_add(&c->list, entry); | ||
247 | |||
248 | if (strlen(c->name) == strlen(override_name) && | ||
249 | !strcmp(c->name, override_name)) | ||
250 | clocksource_override = c; | ||
141 | 251 | ||
142 | return 0; | 252 | return 0; |
143 | } | 253 | } |
@@ -150,42 +260,35 @@ static int is_registered_source(struct clocksource *c) | |||
150 | */ | 260 | */ |
151 | int clocksource_register(struct clocksource *c) | 261 | int clocksource_register(struct clocksource *c) |
152 | { | 262 | { |
153 | int ret = 0; | ||
154 | unsigned long flags; | 263 | unsigned long flags; |
264 | int ret; | ||
155 | 265 | ||
156 | spin_lock_irqsave(&clocksource_lock, flags); | 266 | spin_lock_irqsave(&clocksource_lock, flags); |
157 | /* check if clocksource is already registered */ | 267 | ret = clocksource_enqueue(c); |
158 | if (is_registered_source(c)) { | 268 | if (!ret) |
159 | printk("register_clocksource: Cannot register %s. " | ||
160 | "Already registered!", c->name); | ||
161 | ret = -EBUSY; | ||
162 | } else { | ||
163 | /* register it */ | ||
164 | list_add(&c->list, &clocksource_list); | ||
165 | /* scan the registered clocksources, and pick the best one */ | ||
166 | next_clocksource = select_clocksource(); | 269 | next_clocksource = select_clocksource(); |
167 | } | ||
168 | spin_unlock_irqrestore(&clocksource_lock, flags); | 270 | spin_unlock_irqrestore(&clocksource_lock, flags); |
271 | if (!ret) | ||
272 | clocksource_check_watchdog(c); | ||
169 | return ret; | 273 | return ret; |
170 | } | 274 | } |
171 | EXPORT_SYMBOL(clocksource_register); | 275 | EXPORT_SYMBOL(clocksource_register); |
172 | 276 | ||
173 | /** | 277 | /** |
174 | * clocksource_reselect - Rescan list for next clocksource | 278 | * clocksource_change_rating - Change the rating of a registered clocksource |
175 | * | 279 | * |
176 | * A quick helper function to be used if a clocksource changes its | ||
177 | * rating. Forces the clocksource list to be re-scanned for the best | ||
178 | * clocksource. | ||
179 | */ | 280 | */ |
180 | void clocksource_reselect(void) | 281 | void clocksource_change_rating(struct clocksource *cs, int rating) |
181 | { | 282 | { |
182 | unsigned long flags; | 283 | unsigned long flags; |
183 | 284 | ||
184 | spin_lock_irqsave(&clocksource_lock, flags); | 285 | spin_lock_irqsave(&clocksource_lock, flags); |
286 | list_del(&cs->list); | ||
287 | cs->rating = rating; | ||
288 | clocksource_enqueue(cs); | ||
185 | next_clocksource = select_clocksource(); | 289 | next_clocksource = select_clocksource(); |
186 | spin_unlock_irqrestore(&clocksource_lock, flags); | 290 | spin_unlock_irqrestore(&clocksource_lock, flags); |
187 | } | 291 | } |
188 | EXPORT_SYMBOL(clocksource_reselect); | ||
189 | 292 | ||
190 | #ifdef CONFIG_SYSFS | 293 | #ifdef CONFIG_SYSFS |
191 | /** | 294 | /** |
@@ -221,7 +324,11 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | |||
221 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 324 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
222 | const char *buf, size_t count) | 325 | const char *buf, size_t count) |
223 | { | 326 | { |
327 | struct clocksource *ovr = NULL; | ||
328 | struct list_head *tmp; | ||
224 | size_t ret = count; | 329 | size_t ret = count; |
330 | int len; | ||
331 | |||
225 | /* strings from sysfs write are not 0 terminated! */ | 332 | /* strings from sysfs write are not 0 terminated! */ |
226 | if (count >= sizeof(override_name)) | 333 | if (count >= sizeof(override_name)) |
227 | return -EINVAL; | 334 | return -EINVAL; |
@@ -229,17 +336,32 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
229 | /* strip of \n: */ | 336 | /* strip of \n: */ |
230 | if (buf[count-1] == '\n') | 337 | if (buf[count-1] == '\n') |
231 | count--; | 338 | count--; |
232 | if (count < 1) | ||
233 | return -EINVAL; | ||
234 | 339 | ||
235 | spin_lock_irq(&clocksource_lock); | 340 | spin_lock_irq(&clocksource_lock); |
236 | 341 | ||
237 | /* copy the name given: */ | 342 | if (count > 0) |
238 | memcpy(override_name, buf, count); | 343 | memcpy(override_name, buf, count); |
239 | override_name[count] = 0; | 344 | override_name[count] = 0; |
240 | 345 | ||
241 | /* try to select it: */ | 346 | len = strlen(override_name); |
242 | next_clocksource = select_clocksource(); | 347 | if (len) { |
348 | ovr = clocksource_override; | ||
349 | /* try to select it: */ | ||
350 | list_for_each(tmp, &clocksource_list) { | ||
351 | struct clocksource *cs; | ||
352 | |||
353 | cs = list_entry(tmp, struct clocksource, list); | ||
354 | if (strlen(cs->name) == len && | ||
355 | !strcmp(cs->name, override_name)) | ||
356 | ovr = cs; | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /* Reselect, when the override name has changed */ | ||
361 | if (ovr != clocksource_override) { | ||
362 | clocksource_override = ovr; | ||
363 | next_clocksource = select_clocksource(); | ||
364 | } | ||
243 | 365 | ||
244 | spin_unlock_irq(&clocksource_lock); | 366 | spin_unlock_irq(&clocksource_lock); |
245 | 367 | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a99b2a6e6a07..3be8da8fed7e 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -62,7 +62,6 @@ struct clocksource clocksource_jiffies = { | |||
62 | .mask = 0xffffffff, /*32bits*/ | 62 | .mask = 0xffffffff, /*32bits*/ |
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
64 | .shift = JIFFIES_SHIFT, | 64 | .shift = JIFFIES_SHIFT, |
65 | .is_continuous = 0, /* tick based, not free running */ | ||
66 | }; | 65 | }; |
67 | 66 | ||
68 | static int __init init_jiffies_clocksource(void) | 67 | static int __init init_jiffies_clocksource(void) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3a73f9..eb12509e00bd 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base; | |||
24 | 24 | ||
25 | #define MAX_TICKADJ 500 /* microsecs */ | 25 | #define MAX_TICKADJ 500 /* microsecs */ |
26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ | 26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ |
27 | TICK_LENGTH_SHIFT) / HZ) | 27 | TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * phase-lock loop variables | 30 | * phase-lock loop variables |
@@ -46,13 +46,17 @@ long time_adjust; | |||
46 | 46 | ||
47 | static void ntp_update_frequency(void) | 47 | static void ntp_update_frequency(void) |
48 | { | 48 | { |
49 | tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; | 49 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) |
50 | tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; | 50 | << TICK_LENGTH_SHIFT; |
51 | tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | 51 | second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; |
52 | second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | ||
52 | 53 | ||
53 | do_div(tick_length_base, HZ); | 54 | tick_length_base = second_length; |
54 | 55 | ||
55 | tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; | 56 | do_div(second_length, HZ); |
57 | tick_nsec = second_length >> TICK_LENGTH_SHIFT; | ||
58 | |||
59 | do_div(tick_length_base, NTP_INTERVAL_FREQ); | ||
56 | } | 60 | } |
57 | 61 | ||
58 | /** | 62 | /** |
@@ -162,7 +166,7 @@ void second_overflow(void) | |||
162 | tick_length -= MAX_TICKADJ_SCALED; | 166 | tick_length -= MAX_TICKADJ_SCALED; |
163 | } else { | 167 | } else { |
164 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / | 168 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / |
165 | HZ) << TICK_LENGTH_SHIFT; | 169 | NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; |
166 | time_adjust = 0; | 170 | time_adjust = 0; |
167 | } | 171 | } |
168 | } | 172 | } |
@@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc) | |||
239 | result = -EINVAL; | 243 | result = -EINVAL; |
240 | goto leave; | 244 | goto leave; |
241 | } | 245 | } |
242 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); | 246 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) |
247 | >> (SHIFT_USEC - SHIFT_NSEC); | ||
243 | } | 248 | } |
244 | 249 | ||
245 | if (txc->modes & ADJ_MAXERROR) { | 250 | if (txc->modes & ADJ_MAXERROR) { |
@@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc) | |||
309 | freq_adj += time_freq; | 314 | freq_adj += time_freq; |
310 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | 315 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); |
311 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | 316 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); |
312 | time_offset = (time_offset / HZ) << SHIFT_UPDATE; | 317 | time_offset = (time_offset / NTP_INTERVAL_FREQ) |
318 | << SHIFT_UPDATE; | ||
313 | } /* STA_PLL */ | 319 | } /* STA_PLL */ |
314 | } /* txc->modes & ADJ_OFFSET */ | 320 | } /* txc->modes & ADJ_OFFSET */ |
315 | if (txc->modes & ADJ_TICK) | 321 | if (txc->modes & ADJ_TICK) |
@@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
324 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | 330 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
325 | txc->offset = save_adjust; | 331 | txc->offset = save_adjust; |
326 | else | 332 | else |
327 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; | 333 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) |
328 | txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); | 334 | * NTP_INTERVAL_FREQ / 1000; |
335 | txc->freq = (time_freq / NSEC_PER_USEC) | ||
336 | << (SHIFT_USEC - SHIFT_NSEC); | ||
329 | txc->maxerror = time_maxerror; | 337 | txc->maxerror = time_maxerror; |
330 | txc->esterror = time_esterror; | 338 | txc->esterror = time_esterror; |
331 | txc->status = time_status; | 339 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c new file mode 100644 index 000000000000..12b3efeb9f6f --- /dev/null +++ b/kernel/time/tick-broadcast.c | |||
@@ -0,0 +1,480 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-broadcast.c | ||
3 | * | ||
4 | * This file contains functions which emulate a local clock-event | ||
5 | * device via a broadcast event source. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /* | ||
26 | * Broadcast support for broken x86 hardware, where the local apic | ||
27 | * timer stops in C3 state. | ||
28 | */ | ||
29 | |||
30 | struct tick_device tick_broadcast_device; | ||
31 | static cpumask_t tick_broadcast_mask; | ||
32 | static DEFINE_SPINLOCK(tick_broadcast_lock); | ||
33 | |||
34 | /* | ||
35 | * Debugging: see timer_list.c | ||
36 | */ | ||
37 | struct tick_device *tick_get_broadcast_device(void) | ||
38 | { | ||
39 | return &tick_broadcast_device; | ||
40 | } | ||
41 | |||
42 | cpumask_t *tick_get_broadcast_mask(void) | ||
43 | { | ||
44 | return &tick_broadcast_mask; | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Start the device in periodic mode | ||
49 | */ | ||
50 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) | ||
51 | { | ||
52 | if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
53 | tick_setup_periodic(bc, 1); | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Check, if the device can be utilized as broadcast device: | ||
58 | */ | ||
59 | int tick_check_broadcast_device(struct clock_event_device *dev) | ||
60 | { | ||
61 | if (tick_broadcast_device.evtdev || | ||
62 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
63 | return 0; | ||
64 | |||
65 | clockevents_exchange_device(NULL, dev); | ||
66 | tick_broadcast_device.evtdev = dev; | ||
67 | if (!cpus_empty(tick_broadcast_mask)) | ||
68 | tick_broadcast_start_periodic(dev); | ||
69 | return 1; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Check, if the device is the broadcast device | ||
74 | */ | ||
75 | int tick_is_broadcast_device(struct clock_event_device *dev) | ||
76 | { | ||
77 | return (dev && tick_broadcast_device.evtdev == dev); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Check, if the device is disfunctional and a place holder, which | ||
82 | * needs to be handled by the broadcast device. | ||
83 | */ | ||
84 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | ||
85 | { | ||
86 | unsigned long flags; | ||
87 | int ret = 0; | ||
88 | |||
89 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
90 | |||
91 | /* | ||
92 | * Devices might be registered with both periodic and oneshot | ||
93 | * mode disabled. This signals, that the device needs to be | ||
94 | * operated from the broadcast device and is a placeholder for | ||
95 | * the cpu local device. | ||
96 | */ | ||
97 | if (!tick_device_is_functional(dev)) { | ||
98 | dev->event_handler = tick_handle_periodic; | ||
99 | cpu_set(cpu, tick_broadcast_mask); | ||
100 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | ||
101 | ret = 1; | ||
102 | } | ||
103 | |||
104 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Broadcast the event to the cpus, which are set in the mask | ||
110 | */ | ||
111 | int tick_do_broadcast(cpumask_t mask) | ||
112 | { | ||
113 | int ret = 0, cpu = smp_processor_id(); | ||
114 | struct tick_device *td; | ||
115 | |||
116 | /* | ||
117 | * Check, if the current cpu is in the mask | ||
118 | */ | ||
119 | if (cpu_isset(cpu, mask)) { | ||
120 | cpu_clear(cpu, mask); | ||
121 | td = &per_cpu(tick_cpu_device, cpu); | ||
122 | td->evtdev->event_handler(td->evtdev); | ||
123 | ret = 1; | ||
124 | } | ||
125 | |||
126 | if (!cpus_empty(mask)) { | ||
127 | /* | ||
128 | * It might be necessary to actually check whether the devices | ||
129 | * have different broadcast functions. For now, just use the | ||
130 | * one of the first device. This works as long as we have this | ||
131 | * misfeature only on x86 (lapic) | ||
132 | */ | ||
133 | cpu = first_cpu(mask); | ||
134 | td = &per_cpu(tick_cpu_device, cpu); | ||
135 | td->evtdev->broadcast(mask); | ||
136 | ret = 1; | ||
137 | } | ||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Periodic broadcast: | ||
143 | * - invoke the broadcast handlers | ||
144 | */ | ||
145 | static void tick_do_periodic_broadcast(void) | ||
146 | { | ||
147 | cpumask_t mask; | ||
148 | |||
149 | spin_lock(&tick_broadcast_lock); | ||
150 | |||
151 | cpus_and(mask, cpu_online_map, tick_broadcast_mask); | ||
152 | tick_do_broadcast(mask); | ||
153 | |||
154 | spin_unlock(&tick_broadcast_lock); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Event handler for periodic broadcast ticks | ||
159 | */ | ||
160 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | ||
161 | { | ||
162 | dev->next_event.tv64 = KTIME_MAX; | ||
163 | |||
164 | tick_do_periodic_broadcast(); | ||
165 | |||
166 | /* | ||
167 | * The device is in periodic mode. No reprogramming necessary: | ||
168 | */ | ||
169 | if (dev->mode == CLOCK_EVT_MODE_PERIODIC) | ||
170 | return; | ||
171 | |||
172 | /* | ||
173 | * Setup the next period for devices, which do not have | ||
174 | * periodic mode: | ||
175 | */ | ||
176 | for (;;) { | ||
177 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
178 | |||
179 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
180 | return; | ||
181 | tick_do_periodic_broadcast(); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Powerstate information: The system enters/leaves a state, where | ||
187 | * affected devices might stop | ||
188 | */ | ||
189 | static void tick_do_broadcast_on_off(void *why) | ||
190 | { | ||
191 | struct clock_event_device *bc, *dev; | ||
192 | struct tick_device *td; | ||
193 | unsigned long flags, *reason = why; | ||
194 | int cpu; | ||
195 | |||
196 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
197 | |||
198 | cpu = smp_processor_id(); | ||
199 | td = &per_cpu(tick_cpu_device, cpu); | ||
200 | dev = td->evtdev; | ||
201 | bc = tick_broadcast_device.evtdev; | ||
202 | |||
203 | /* | ||
204 | * Is the device in broadcast mode forever or is it not | ||
205 | * affected by the powerstate ? | ||
206 | */ | ||
207 | if (!dev || !tick_device_is_functional(dev) || | ||
208 | !(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
209 | goto out; | ||
210 | |||
211 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_ON) { | ||
212 | if (!cpu_isset(cpu, tick_broadcast_mask)) { | ||
213 | cpu_set(cpu, tick_broadcast_mask); | ||
214 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
215 | clockevents_set_mode(dev, | ||
216 | CLOCK_EVT_MODE_SHUTDOWN); | ||
217 | } | ||
218 | } else { | ||
219 | if (cpu_isset(cpu, tick_broadcast_mask)) { | ||
220 | cpu_clear(cpu, tick_broadcast_mask); | ||
221 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
222 | tick_setup_periodic(dev, 0); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | if (cpus_empty(tick_broadcast_mask)) | ||
227 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
228 | else { | ||
229 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
230 | tick_broadcast_start_periodic(bc); | ||
231 | else | ||
232 | tick_broadcast_setup_oneshot(bc); | ||
233 | } | ||
234 | out: | ||
235 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * Powerstate information: The system enters/leaves a state, where | ||
240 | * affected devices might stop. | ||
241 | */ | ||
242 | void tick_broadcast_on_off(unsigned long reason, int *oncpu) | ||
243 | { | ||
244 | int cpu = get_cpu(); | ||
245 | |||
246 | if (cpu == *oncpu) | ||
247 | tick_do_broadcast_on_off(&reason); | ||
248 | else | ||
249 | smp_call_function_single(*oncpu, tick_do_broadcast_on_off, | ||
250 | &reason, 1, 1); | ||
251 | put_cpu(); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Set the periodic handler depending on broadcast on/off | ||
256 | */ | ||
257 | void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) | ||
258 | { | ||
259 | if (!broadcast) | ||
260 | dev->event_handler = tick_handle_periodic; | ||
261 | else | ||
262 | dev->event_handler = tick_handle_periodic_broadcast; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Remove a CPU from broadcasting | ||
267 | */ | ||
268 | void tick_shutdown_broadcast(unsigned int *cpup) | ||
269 | { | ||
270 | struct clock_event_device *bc; | ||
271 | unsigned long flags; | ||
272 | unsigned int cpu = *cpup; | ||
273 | |||
274 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
275 | |||
276 | bc = tick_broadcast_device.evtdev; | ||
277 | cpu_clear(cpu, tick_broadcast_mask); | ||
278 | |||
279 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | ||
280 | if (bc && cpus_empty(tick_broadcast_mask)) | ||
281 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
282 | } | ||
283 | |||
284 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
285 | } | ||
286 | |||
287 | #ifdef CONFIG_TICK_ONESHOT | ||
288 | |||
289 | static cpumask_t tick_broadcast_oneshot_mask; | ||
290 | |||
291 | /* | ||
292 | * Debugging: see timer_list.c | ||
293 | */ | ||
294 | cpumask_t *tick_get_broadcast_oneshot_mask(void) | ||
295 | { | ||
296 | return &tick_broadcast_oneshot_mask; | ||
297 | } | ||
298 | |||
299 | static int tick_broadcast_set_event(ktime_t expires, int force) | ||
300 | { | ||
301 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
302 | ktime_t now = ktime_get(); | ||
303 | int res; | ||
304 | |||
305 | for(;;) { | ||
306 | res = clockevents_program_event(bc, expires, now); | ||
307 | if (!res || !force) | ||
308 | return res; | ||
309 | now = ktime_get(); | ||
310 | expires = ktime_add(now, ktime_set(0, bc->min_delta_ns)); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Reprogram the broadcast device: | ||
316 | * | ||
317 | * Called with tick_broadcast_lock held and interrupts disabled. | ||
318 | */ | ||
319 | static int tick_broadcast_reprogram(void) | ||
320 | { | ||
321 | ktime_t expires = { .tv64 = KTIME_MAX }; | ||
322 | struct tick_device *td; | ||
323 | int cpu; | ||
324 | |||
325 | /* | ||
326 | * Find the event which expires next: | ||
327 | */ | ||
328 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
329 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
330 | td = &per_cpu(tick_cpu_device, cpu); | ||
331 | if (td->evtdev->next_event.tv64 < expires.tv64) | ||
332 | expires = td->evtdev->next_event; | ||
333 | } | ||
334 | |||
335 | if (expires.tv64 == KTIME_MAX) | ||
336 | return 0; | ||
337 | |||
338 | return tick_broadcast_set_event(expires, 0); | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Handle oneshot mode broadcasting | ||
343 | */ | ||
344 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | ||
345 | { | ||
346 | struct tick_device *td; | ||
347 | cpumask_t mask; | ||
348 | ktime_t now; | ||
349 | int cpu; | ||
350 | |||
351 | spin_lock(&tick_broadcast_lock); | ||
352 | again: | ||
353 | dev->next_event.tv64 = KTIME_MAX; | ||
354 | mask = CPU_MASK_NONE; | ||
355 | now = ktime_get(); | ||
356 | /* Find all expired events */ | ||
357 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
358 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
359 | td = &per_cpu(tick_cpu_device, cpu); | ||
360 | if (td->evtdev->next_event.tv64 <= now.tv64) | ||
361 | cpu_set(cpu, mask); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Wakeup the cpus which have an expired event. The broadcast | ||
366 | * device is reprogrammed in the return from idle code. | ||
367 | */ | ||
368 | if (!tick_do_broadcast(mask)) { | ||
369 | /* | ||
370 | * The global event did not expire any CPU local | ||
371 | * events. This happens in dyntick mode, as the | ||
372 | * maximum PIT delta is quite small. | ||
373 | */ | ||
374 | if (tick_broadcast_reprogram()) | ||
375 | goto again; | ||
376 | } | ||
377 | spin_unlock(&tick_broadcast_lock); | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Powerstate information: The system enters/leaves a state, where | ||
382 | * affected devices might stop | ||
383 | */ | ||
384 | void tick_broadcast_oneshot_control(unsigned long reason) | ||
385 | { | ||
386 | struct clock_event_device *bc, *dev; | ||
387 | struct tick_device *td; | ||
388 | unsigned long flags; | ||
389 | int cpu; | ||
390 | |||
391 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
392 | |||
393 | /* | ||
394 | * Periodic mode does not care about the enter/exit of power | ||
395 | * states | ||
396 | */ | ||
397 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
398 | goto out; | ||
399 | |||
400 | bc = tick_broadcast_device.evtdev; | ||
401 | cpu = smp_processor_id(); | ||
402 | td = &per_cpu(tick_cpu_device, cpu); | ||
403 | dev = td->evtdev; | ||
404 | |||
405 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
406 | goto out; | ||
407 | |||
408 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | ||
409 | if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
410 | cpu_set(cpu, tick_broadcast_oneshot_mask); | ||
411 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | ||
412 | if (dev->next_event.tv64 < bc->next_event.tv64) | ||
413 | tick_broadcast_set_event(dev->next_event, 1); | ||
414 | } | ||
415 | } else { | ||
416 | if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
417 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
418 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
419 | if (dev->next_event.tv64 != KTIME_MAX) | ||
420 | tick_program_event(dev->next_event, 1); | ||
421 | } | ||
422 | } | ||
423 | |||
424 | out: | ||
425 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * tick_broadcast_setup_highres - setup the broadcast device for highres | ||
430 | */ | ||
431 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
432 | { | ||
433 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { | ||
434 | bc->event_handler = tick_handle_oneshot_broadcast; | ||
435 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
436 | bc->next_event.tv64 = KTIME_MAX; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * Select oneshot operating mode for the broadcast device | ||
442 | */ | ||
443 | void tick_broadcast_switch_to_oneshot(void) | ||
444 | { | ||
445 | struct clock_event_device *bc; | ||
446 | unsigned long flags; | ||
447 | |||
448 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
449 | |||
450 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | ||
451 | bc = tick_broadcast_device.evtdev; | ||
452 | if (bc) | ||
453 | tick_broadcast_setup_oneshot(bc); | ||
454 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
455 | } | ||
456 | |||
457 | |||
458 | /* | ||
459 | * Remove a dead CPU from broadcasting | ||
460 | */ | ||
461 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | ||
462 | { | ||
463 | struct clock_event_device *bc; | ||
464 | unsigned long flags; | ||
465 | unsigned int cpu = *cpup; | ||
466 | |||
467 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
468 | |||
469 | bc = tick_broadcast_device.evtdev; | ||
470 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
471 | |||
472 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { | ||
473 | if (bc && cpus_empty(tick_broadcast_oneshot_mask)) | ||
474 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
475 | } | ||
476 | |||
477 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
478 | } | ||
479 | |||
480 | #endif | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c new file mode 100644 index 000000000000..4500e347f1bb --- /dev/null +++ b/kernel/time/tick-common.c | |||
@@ -0,0 +1,346 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-common.c | ||
3 | * | ||
4 | * This file contains the base functions to manage periodic tick | ||
5 | * related events. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /* | ||
26 | * Tick devices | ||
27 | */ | ||
28 | DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | ||
29 | /* | ||
30 | * Tick next event: keeps track of the tick time | ||
31 | */ | ||
32 | ktime_t tick_next_period; | ||
33 | ktime_t tick_period; | ||
34 | static int tick_do_timer_cpu = -1; | ||
35 | DEFINE_SPINLOCK(tick_device_lock); | ||
36 | |||
37 | /* | ||
38 | * Debugging: see timer_list.c | ||
39 | */ | ||
40 | struct tick_device *tick_get_device(int cpu) | ||
41 | { | ||
42 | return &per_cpu(tick_cpu_device, cpu); | ||
43 | } | ||
44 | |||
45 | /** | ||
46 | * tick_is_oneshot_available - check for a oneshot capable event device | ||
47 | */ | ||
48 | int tick_is_oneshot_available(void) | ||
49 | { | ||
50 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
51 | |||
52 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Periodic tick | ||
57 | */ | ||
58 | static void tick_periodic(int cpu) | ||
59 | { | ||
60 | if (tick_do_timer_cpu == cpu) { | ||
61 | write_seqlock(&xtime_lock); | ||
62 | |||
63 | /* Keep track of the next tick event */ | ||
64 | tick_next_period = ktime_add(tick_next_period, tick_period); | ||
65 | |||
66 | do_timer(1); | ||
67 | write_sequnlock(&xtime_lock); | ||
68 | } | ||
69 | |||
70 | update_process_times(user_mode(get_irq_regs())); | ||
71 | profile_tick(CPU_PROFILING); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Event handler for periodic ticks | ||
76 | */ | ||
77 | void tick_handle_periodic(struct clock_event_device *dev) | ||
78 | { | ||
79 | int cpu = smp_processor_id(); | ||
80 | |||
81 | tick_periodic(cpu); | ||
82 | |||
83 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) | ||
84 | return; | ||
85 | /* | ||
86 | * Setup the next period for devices, which do not have | ||
87 | * periodic mode: | ||
88 | */ | ||
89 | for (;;) { | ||
90 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
91 | |||
92 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
93 | return; | ||
94 | tick_periodic(cpu); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Setup the device for a periodic tick | ||
100 | */ | ||
101 | void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | ||
102 | { | ||
103 | tick_set_periodic_handler(dev, broadcast); | ||
104 | |||
105 | /* Broadcast setup ? */ | ||
106 | if (!tick_device_is_functional(dev)) | ||
107 | return; | ||
108 | |||
109 | if (dev->features & CLOCK_EVT_FEAT_PERIODIC) { | ||
110 | clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); | ||
111 | } else { | ||
112 | unsigned long seq; | ||
113 | ktime_t next; | ||
114 | |||
115 | do { | ||
116 | seq = read_seqbegin(&xtime_lock); | ||
117 | next = tick_next_period; | ||
118 | } while (read_seqretry(&xtime_lock, seq)); | ||
119 | |||
120 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
121 | |||
122 | for (;;) { | ||
123 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
124 | return; | ||
125 | next = ktime_add(next, tick_period); | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Setup the tick device | ||
132 | */ | ||
133 | static void tick_setup_device(struct tick_device *td, | ||
134 | struct clock_event_device *newdev, int cpu, | ||
135 | cpumask_t cpumask) | ||
136 | { | ||
137 | ktime_t next_event; | ||
138 | void (*handler)(struct clock_event_device *) = NULL; | ||
139 | |||
140 | /* | ||
141 | * First device setup ? | ||
142 | */ | ||
143 | if (!td->evtdev) { | ||
144 | /* | ||
145 | * If no cpu took the do_timer update, assign it to | ||
146 | * this cpu: | ||
147 | */ | ||
148 | if (tick_do_timer_cpu == -1) { | ||
149 | tick_do_timer_cpu = cpu; | ||
150 | tick_next_period = ktime_get(); | ||
151 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Startup in periodic mode first. | ||
156 | */ | ||
157 | td->mode = TICKDEV_MODE_PERIODIC; | ||
158 | } else { | ||
159 | handler = td->evtdev->event_handler; | ||
160 | next_event = td->evtdev->next_event; | ||
161 | } | ||
162 | |||
163 | td->evtdev = newdev; | ||
164 | |||
165 | /* | ||
166 | * When the device is not per cpu, pin the interrupt to the | ||
167 | * current cpu: | ||
168 | */ | ||
169 | if (!cpus_equal(newdev->cpumask, cpumask)) | ||
170 | irq_set_affinity(newdev->irq, cpumask); | ||
171 | |||
172 | /* | ||
173 | * When global broadcasting is active, check if the current | ||
174 | * device is registered as a placeholder for broadcast mode. | ||
175 | * This allows us to handle this x86 misfeature in a generic | ||
176 | * way. | ||
177 | */ | ||
178 | if (tick_device_uses_broadcast(newdev, cpu)) | ||
179 | return; | ||
180 | |||
181 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
182 | tick_setup_periodic(newdev, 0); | ||
183 | else | ||
184 | tick_setup_oneshot(newdev, handler, next_event); | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Check, if the new registered device should be used. | ||
189 | */ | ||
190 | static int tick_check_new_device(struct clock_event_device *newdev) | ||
191 | { | ||
192 | struct clock_event_device *curdev; | ||
193 | struct tick_device *td; | ||
194 | int cpu, ret = NOTIFY_OK; | ||
195 | unsigned long flags; | ||
196 | cpumask_t cpumask; | ||
197 | |||
198 | spin_lock_irqsave(&tick_device_lock, flags); | ||
199 | |||
200 | cpu = smp_processor_id(); | ||
201 | if (!cpu_isset(cpu, newdev->cpumask)) | ||
202 | goto out; | ||
203 | |||
204 | td = &per_cpu(tick_cpu_device, cpu); | ||
205 | curdev = td->evtdev; | ||
206 | cpumask = cpumask_of_cpu(cpu); | ||
207 | |||
208 | /* cpu local device ? */ | ||
209 | if (!cpus_equal(newdev->cpumask, cpumask)) { | ||
210 | |||
211 | /* | ||
212 | * If the cpu affinity of the device interrupt can not | ||
213 | * be set, ignore it. | ||
214 | */ | ||
215 | if (!irq_can_set_affinity(newdev->irq)) | ||
216 | goto out_bc; | ||
217 | |||
218 | /* | ||
219 | * If we have a cpu local device already, do not replace it | ||
220 | * by a non cpu local device | ||
221 | */ | ||
222 | if (curdev && cpus_equal(curdev->cpumask, cpumask)) | ||
223 | goto out_bc; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * If we have an active device, then check the rating and the oneshot | ||
228 | * feature. | ||
229 | */ | ||
230 | if (curdev) { | ||
231 | /* | ||
232 | * Prefer one shot capable devices ! | ||
233 | */ | ||
234 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | ||
235 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
236 | goto out_bc; | ||
237 | /* | ||
238 | * Check the rating | ||
239 | */ | ||
240 | if (curdev->rating >= newdev->rating) | ||
241 | goto out_bc; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Replace the eventually existing device by the new | ||
246 | * device. If the current device is the broadcast device, do | ||
247 | * not give it back to the clockevents layer ! | ||
248 | */ | ||
249 | if (tick_is_broadcast_device(curdev)) { | ||
250 | clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN); | ||
251 | curdev = NULL; | ||
252 | } | ||
253 | clockevents_exchange_device(curdev, newdev); | ||
254 | tick_setup_device(td, newdev, cpu, cpumask); | ||
255 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | ||
256 | tick_oneshot_notify(); | ||
257 | |||
258 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
259 | return NOTIFY_STOP; | ||
260 | |||
261 | out_bc: | ||
262 | /* | ||
263 | * Can the new device be used as a broadcast device ? | ||
264 | */ | ||
265 | if (tick_check_broadcast_device(newdev)) | ||
266 | ret = NOTIFY_STOP; | ||
267 | out: | ||
268 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
269 | |||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Shutdown an event device on a given cpu: | ||
275 | * | ||
276 | * This is called on a life CPU, when a CPU is dead. So we cannot | ||
277 | * access the hardware device itself. | ||
278 | * We just set the mode and remove it from the lists. | ||
279 | */ | ||
280 | static void tick_shutdown(unsigned int *cpup) | ||
281 | { | ||
282 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | ||
283 | struct clock_event_device *dev = td->evtdev; | ||
284 | unsigned long flags; | ||
285 | |||
286 | spin_lock_irqsave(&tick_device_lock, flags); | ||
287 | td->mode = TICKDEV_MODE_PERIODIC; | ||
288 | if (dev) { | ||
289 | /* | ||
290 | * Prevent that the clock events layer tries to call | ||
291 | * the set mode function! | ||
292 | */ | ||
293 | dev->mode = CLOCK_EVT_MODE_UNUSED; | ||
294 | clockevents_exchange_device(dev, NULL); | ||
295 | td->evtdev = NULL; | ||
296 | } | ||
297 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * Notification about clock event devices | ||
302 | */ | ||
303 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | ||
304 | void *dev) | ||
305 | { | ||
306 | switch (reason) { | ||
307 | |||
308 | case CLOCK_EVT_NOTIFY_ADD: | ||
309 | return tick_check_new_device(dev); | ||
310 | |||
311 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
312 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
313 | tick_broadcast_on_off(reason, dev); | ||
314 | break; | ||
315 | |||
316 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
317 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
318 | tick_broadcast_oneshot_control(reason); | ||
319 | break; | ||
320 | |||
321 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
322 | tick_shutdown_broadcast_oneshot(dev); | ||
323 | tick_shutdown_broadcast(dev); | ||
324 | tick_shutdown(dev); | ||
325 | break; | ||
326 | |||
327 | default: | ||
328 | break; | ||
329 | } | ||
330 | |||
331 | return NOTIFY_OK; | ||
332 | } | ||
333 | |||
334 | static struct notifier_block tick_notifier = { | ||
335 | .notifier_call = tick_notify, | ||
336 | }; | ||
337 | |||
338 | /** | ||
339 | * tick_init - initialize the tick control | ||
340 | * | ||
341 | * Register the notifier with the clockevents framework | ||
342 | */ | ||
343 | void __init tick_init(void) | ||
344 | { | ||
345 | clockevents_register_notifier(&tick_notifier); | ||
346 | } | ||
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h new file mode 100644 index 000000000000..54861a0f29ff --- /dev/null +++ b/kernel/time/tick-internal.h | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * tick internal variable and functions used by low/high res code | ||
3 | */ | ||
4 | DECLARE_PER_CPU(struct tick_device, tick_cpu_device); | ||
5 | extern spinlock_t tick_device_lock; | ||
6 | extern ktime_t tick_next_period; | ||
7 | extern ktime_t tick_period; | ||
8 | |||
9 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); | ||
10 | extern void tick_handle_periodic(struct clock_event_device *dev); | ||
11 | |||
12 | /* | ||
13 | * NO_HZ / high resolution timer shared code | ||
14 | */ | ||
15 | #ifdef CONFIG_TICK_ONESHOT | ||
16 | extern void tick_setup_oneshot(struct clock_event_device *newdev, | ||
17 | void (*handler)(struct clock_event_device *), | ||
18 | ktime_t nextevt); | ||
19 | extern int tick_program_event(ktime_t expires, int force); | ||
20 | extern void tick_oneshot_notify(void); | ||
21 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); | ||
22 | |||
23 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
24 | extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); | ||
25 | extern void tick_broadcast_oneshot_control(unsigned long reason); | ||
26 | extern void tick_broadcast_switch_to_oneshot(void); | ||
27 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | ||
28 | # else /* BROADCAST */ | ||
29 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
30 | { | ||
31 | BUG(); | ||
32 | } | ||
33 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
34 | static inline void tick_broadcast_switch_to_oneshot(void) { } | ||
35 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
36 | # endif /* !BROADCAST */ | ||
37 | |||
38 | #else /* !ONESHOT */ | ||
39 | static inline | ||
40 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
41 | void (*handler)(struct clock_event_device *), | ||
42 | ktime_t nextevt) | ||
43 | { | ||
44 | BUG(); | ||
45 | } | ||
46 | static inline int tick_program_event(ktime_t expires, int force) | ||
47 | { | ||
48 | return 0; | ||
49 | } | ||
50 | static inline void tick_oneshot_notify(void) { } | ||
51 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
52 | { | ||
53 | BUG(); | ||
54 | } | ||
55 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
56 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
57 | #endif /* !TICK_ONESHOT */ | ||
58 | |||
59 | /* | ||
60 | * Broadcasting support | ||
61 | */ | ||
62 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
63 | extern int tick_do_broadcast(cpumask_t mask); | ||
64 | |||
65 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); | ||
66 | extern int tick_check_broadcast_device(struct clock_event_device *dev); | ||
67 | extern int tick_is_broadcast_device(struct clock_event_device *dev); | ||
68 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | ||
69 | extern void tick_shutdown_broadcast(unsigned int *cpup); | ||
70 | |||
71 | extern void | ||
72 | tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | ||
73 | |||
74 | #else /* !BROADCAST */ | ||
75 | |||
76 | static inline int tick_check_broadcast_device(struct clock_event_device *dev) | ||
77 | { | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | static inline int tick_device_uses_broadcast(struct clock_event_device *dev, | ||
86 | int cpu) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } | ||
91 | static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } | ||
92 | static inline void tick_shutdown_broadcast(unsigned int *cpup) { } | ||
93 | |||
94 | /* | ||
95 | * Set the periodic handler in non broadcast mode | ||
96 | */ | ||
97 | static inline void tick_set_periodic_handler(struct clock_event_device *dev, | ||
98 | int broadcast) | ||
99 | { | ||
100 | dev->event_handler = tick_handle_periodic; | ||
101 | } | ||
102 | #endif /* !BROADCAST */ | ||
103 | |||
104 | /* | ||
105 | * Check, if the device is functional or a dummy for broadcast | ||
106 | */ | ||
107 | static inline int tick_device_is_functional(struct clock_event_device *dev) | ||
108 | { | ||
109 | return !(dev->features & CLOCK_EVT_FEAT_DUMMY); | ||
110 | } | ||
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c new file mode 100644 index 000000000000..2e8b7ff863cc --- /dev/null +++ b/kernel/time/tick-oneshot.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-oneshot.c | ||
3 | * | ||
4 | * This file contains functions which manage high resolution tick | ||
5 | * related events. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /** | ||
26 | * tick_program_event | ||
27 | */ | ||
28 | int tick_program_event(ktime_t expires, int force) | ||
29 | { | ||
30 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
31 | ktime_t now = ktime_get(); | ||
32 | |||
33 | while (1) { | ||
34 | int ret = clockevents_program_event(dev, expires, now); | ||
35 | |||
36 | if (!ret || !force) | ||
37 | return ret; | ||
38 | now = ktime_get(); | ||
39 | expires = ktime_add(now, ktime_set(0, dev->min_delta_ns)); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) | ||
45 | */ | ||
46 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
47 | void (*handler)(struct clock_event_device *), | ||
48 | ktime_t next_event) | ||
49 | { | ||
50 | newdev->event_handler = handler; | ||
51 | clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); | ||
52 | clockevents_program_event(newdev, next_event, ktime_get()); | ||
53 | } | ||
54 | |||
55 | /** | ||
56 | * tick_switch_to_oneshot - switch to oneshot mode | ||
57 | */ | ||
58 | int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) | ||
59 | { | ||
60 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
61 | struct clock_event_device *dev = td->evtdev; | ||
62 | |||
63 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || | ||
64 | !tick_device_is_functional(dev)) | ||
65 | return -EINVAL; | ||
66 | |||
67 | td->mode = TICKDEV_MODE_ONESHOT; | ||
68 | dev->event_handler = handler; | ||
69 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
70 | tick_broadcast_switch_to_oneshot(); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
75 | /** | ||
76 | * tick_init_highres - switch to high resolution mode | ||
77 | * | ||
78 | * Called with interrupts disabled. | ||
79 | */ | ||
80 | int tick_init_highres(void) | ||
81 | { | ||
82 | return tick_switch_to_oneshot(hrtimer_interrupt); | ||
83 | } | ||
84 | #endif | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c new file mode 100644 index 000000000000..95e41f7f850b --- /dev/null +++ b/kernel/time/tick-sched.c | |||
@@ -0,0 +1,563 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-sched.c | ||
3 | * | ||
4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
7 | * | ||
8 | * No idle tick implementation for low and high resolution timers | ||
9 | * | ||
10 | * Started by: Thomas Gleixner and Ingo Molnar | ||
11 | * | ||
12 | * For licencing details see kernel-base/COPYING | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/profile.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/tick.h> | ||
23 | |||
24 | #include "tick-internal.h" | ||
25 | |||
26 | /* | ||
27 | * Per cpu nohz control structure | ||
28 | */ | ||
29 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | ||
30 | |||
31 | /* | ||
32 | * The time, when the last jiffy update happened. Protected by xtime_lock. | ||
33 | */ | ||
34 | static ktime_t last_jiffies_update; | ||
35 | |||
36 | struct tick_sched *tick_get_tick_sched(int cpu) | ||
37 | { | ||
38 | return &per_cpu(tick_cpu_sched, cpu); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Must be called with interrupts disabled ! | ||
43 | */ | ||
44 | static void tick_do_update_jiffies64(ktime_t now) | ||
45 | { | ||
46 | unsigned long ticks = 0; | ||
47 | ktime_t delta; | ||
48 | |||
49 | /* Reevalute with xtime_lock held */ | ||
50 | write_seqlock(&xtime_lock); | ||
51 | |||
52 | delta = ktime_sub(now, last_jiffies_update); | ||
53 | if (delta.tv64 >= tick_period.tv64) { | ||
54 | |||
55 | delta = ktime_sub(delta, tick_period); | ||
56 | last_jiffies_update = ktime_add(last_jiffies_update, | ||
57 | tick_period); | ||
58 | |||
59 | /* Slow path for long timeouts */ | ||
60 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | ||
61 | s64 incr = ktime_to_ns(tick_period); | ||
62 | |||
63 | ticks = ktime_divns(delta, incr); | ||
64 | |||
65 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | ||
66 | incr * ticks); | ||
67 | } | ||
68 | do_timer(++ticks); | ||
69 | } | ||
70 | write_sequnlock(&xtime_lock); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Initialize and return retrieve the jiffies update. | ||
75 | */ | ||
76 | static ktime_t tick_init_jiffy_update(void) | ||
77 | { | ||
78 | ktime_t period; | ||
79 | |||
80 | write_seqlock(&xtime_lock); | ||
81 | /* Did we start the jiffies update yet ? */ | ||
82 | if (last_jiffies_update.tv64 == 0) | ||
83 | last_jiffies_update = tick_next_period; | ||
84 | period = last_jiffies_update; | ||
85 | write_sequnlock(&xtime_lock); | ||
86 | return period; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * NOHZ - aka dynamic tick functionality | ||
91 | */ | ||
92 | #ifdef CONFIG_NO_HZ | ||
93 | /* | ||
94 | * NO HZ enabled ? | ||
95 | */ | ||
96 | static int tick_nohz_enabled __read_mostly = 1; | ||
97 | |||
98 | /* | ||
99 | * Enable / Disable tickless mode | ||
100 | */ | ||
101 | static int __init setup_tick_nohz(char *str) | ||
102 | { | ||
103 | if (!strcmp(str, "off")) | ||
104 | tick_nohz_enabled = 0; | ||
105 | else if (!strcmp(str, "on")) | ||
106 | tick_nohz_enabled = 1; | ||
107 | else | ||
108 | return 0; | ||
109 | return 1; | ||
110 | } | ||
111 | |||
112 | __setup("nohz=", setup_tick_nohz); | ||
113 | |||
114 | /** | ||
115 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | ||
116 | * | ||
117 | * Called from interrupt entry when the CPU was idle | ||
118 | * | ||
119 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | ||
120 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | ||
121 | * value. We do this unconditionally on any cpu, as we don't know whether the | ||
122 | * cpu, which has the update task assigned is in a long sleep. | ||
123 | */ | ||
124 | void tick_nohz_update_jiffies(void) | ||
125 | { | ||
126 | int cpu = smp_processor_id(); | ||
127 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
128 | unsigned long flags; | ||
129 | ktime_t now; | ||
130 | |||
131 | if (!ts->tick_stopped) | ||
132 | return; | ||
133 | |||
134 | cpu_clear(cpu, nohz_cpu_mask); | ||
135 | now = ktime_get(); | ||
136 | |||
137 | local_irq_save(flags); | ||
138 | tick_do_update_jiffies64(now); | ||
139 | local_irq_restore(flags); | ||
140 | } | ||
141 | |||
142 | /** | ||
143 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | ||
144 | * | ||
145 | * When the next event is more than a tick into the future, stop the idle tick | ||
146 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
147 | * just interrupted by an interrupt which did not cause a reschedule. | ||
148 | */ | ||
149 | void tick_nohz_stop_sched_tick(void) | ||
150 | { | ||
151 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | ||
152 | struct tick_sched *ts; | ||
153 | ktime_t last_update, expires, now, delta; | ||
154 | int cpu; | ||
155 | |||
156 | local_irq_save(flags); | ||
157 | |||
158 | cpu = smp_processor_id(); | ||
159 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
160 | |||
161 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
162 | goto end; | ||
163 | |||
164 | if (need_resched()) | ||
165 | goto end; | ||
166 | |||
167 | cpu = smp_processor_id(); | ||
168 | BUG_ON(local_softirq_pending()); | ||
169 | |||
170 | now = ktime_get(); | ||
171 | /* | ||
172 | * When called from irq_exit we need to account the idle sleep time | ||
173 | * correctly. | ||
174 | */ | ||
175 | if (ts->tick_stopped) { | ||
176 | delta = ktime_sub(now, ts->idle_entrytime); | ||
177 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
178 | } | ||
179 | |||
180 | ts->idle_entrytime = now; | ||
181 | ts->idle_calls++; | ||
182 | |||
183 | /* Read jiffies and the time when jiffies were updated last */ | ||
184 | do { | ||
185 | seq = read_seqbegin(&xtime_lock); | ||
186 | last_update = last_jiffies_update; | ||
187 | last_jiffies = jiffies; | ||
188 | } while (read_seqretry(&xtime_lock, seq)); | ||
189 | |||
190 | /* Get the next timer wheel timer */ | ||
191 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
192 | delta_jiffies = next_jiffies - last_jiffies; | ||
193 | |||
194 | /* | ||
195 | * Do not stop the tick, if we are only one off | ||
196 | * or if the cpu is required for rcu | ||
197 | */ | ||
198 | if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu))) | ||
199 | goto out; | ||
200 | |||
201 | /* Schedule the tick, if we are at least one jiffie off */ | ||
202 | if ((long)delta_jiffies >= 1) { | ||
203 | |||
204 | if (rcu_needs_cpu(cpu)) | ||
205 | delta_jiffies = 1; | ||
206 | else | ||
207 | cpu_set(cpu, nohz_cpu_mask); | ||
208 | /* | ||
209 | * nohz_stop_sched_tick can be called several times before | ||
210 | * the nohz_restart_sched_tick is called. This happens when | ||
211 | * interrupts arrive which do not cause a reschedule. In the | ||
212 | * first call we save the current tick time, so we can restart | ||
213 | * the scheduler tick in nohz_restart_sched_tick. | ||
214 | */ | ||
215 | if (!ts->tick_stopped) { | ||
216 | ts->idle_tick = ts->sched_timer.expires; | ||
217 | ts->tick_stopped = 1; | ||
218 | ts->idle_jiffies = last_jiffies; | ||
219 | } | ||
220 | /* | ||
221 | * calculate the expiry time for the next timer wheel | ||
222 | * timer | ||
223 | */ | ||
224 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
225 | delta_jiffies); | ||
226 | ts->idle_expires = expires; | ||
227 | ts->idle_sleeps++; | ||
228 | |||
229 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
230 | hrtimer_start(&ts->sched_timer, expires, | ||
231 | HRTIMER_MODE_ABS); | ||
232 | /* Check, if the timer was already in the past */ | ||
233 | if (hrtimer_active(&ts->sched_timer)) | ||
234 | goto out; | ||
235 | } else if(!tick_program_event(expires, 0)) | ||
236 | goto out; | ||
237 | /* | ||
238 | * We are past the event already. So we crossed a | ||
239 | * jiffie boundary. Update jiffies and raise the | ||
240 | * softirq. | ||
241 | */ | ||
242 | tick_do_update_jiffies64(ktime_get()); | ||
243 | cpu_clear(cpu, nohz_cpu_mask); | ||
244 | } | ||
245 | raise_softirq_irqoff(TIMER_SOFTIRQ); | ||
246 | out: | ||
247 | ts->next_jiffies = next_jiffies; | ||
248 | ts->last_jiffies = last_jiffies; | ||
249 | end: | ||
250 | local_irq_restore(flags); | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * nohz_restart_sched_tick - restart the idle tick from the idle task | ||
255 | * | ||
256 | * Restart the idle tick when the CPU is woken up from idle | ||
257 | */ | ||
258 | void tick_nohz_restart_sched_tick(void) | ||
259 | { | ||
260 | int cpu = smp_processor_id(); | ||
261 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
262 | unsigned long ticks; | ||
263 | ktime_t now, delta; | ||
264 | |||
265 | if (!ts->tick_stopped) | ||
266 | return; | ||
267 | |||
268 | /* Update jiffies first */ | ||
269 | now = ktime_get(); | ||
270 | |||
271 | local_irq_disable(); | ||
272 | tick_do_update_jiffies64(now); | ||
273 | cpu_clear(cpu, nohz_cpu_mask); | ||
274 | |||
275 | /* Account the idle time */ | ||
276 | delta = ktime_sub(now, ts->idle_entrytime); | ||
277 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
278 | |||
279 | /* | ||
280 | * We stopped the tick in idle. Update process times would miss the | ||
281 | * time we slept as update_process_times does only a 1 tick | ||
282 | * accounting. Enforce that this is accounted to idle ! | ||
283 | */ | ||
284 | ticks = jiffies - ts->idle_jiffies; | ||
285 | /* | ||
286 | * We might be one off. Do not randomly account a huge number of ticks! | ||
287 | */ | ||
288 | if (ticks && ticks < LONG_MAX) { | ||
289 | add_preempt_count(HARDIRQ_OFFSET); | ||
290 | account_system_time(current, HARDIRQ_OFFSET, | ||
291 | jiffies_to_cputime(ticks)); | ||
292 | sub_preempt_count(HARDIRQ_OFFSET); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Cancel the scheduled timer and restore the tick | ||
297 | */ | ||
298 | ts->tick_stopped = 0; | ||
299 | hrtimer_cancel(&ts->sched_timer); | ||
300 | ts->sched_timer.expires = ts->idle_tick; | ||
301 | |||
302 | while (1) { | ||
303 | /* Forward the time to expire in the future */ | ||
304 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
305 | |||
306 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
307 | hrtimer_start(&ts->sched_timer, | ||
308 | ts->sched_timer.expires, | ||
309 | HRTIMER_MODE_ABS); | ||
310 | /* Check, if the timer was already in the past */ | ||
311 | if (hrtimer_active(&ts->sched_timer)) | ||
312 | break; | ||
313 | } else { | ||
314 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
315 | break; | ||
316 | } | ||
317 | /* Update jiffies and reread time */ | ||
318 | tick_do_update_jiffies64(now); | ||
319 | now = ktime_get(); | ||
320 | } | ||
321 | local_irq_enable(); | ||
322 | } | ||
323 | |||
324 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | ||
325 | { | ||
326 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
327 | return tick_program_event(ts->sched_timer.expires, 0); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * The nohz low res interrupt handler | ||
332 | */ | ||
333 | static void tick_nohz_handler(struct clock_event_device *dev) | ||
334 | { | ||
335 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
336 | struct pt_regs *regs = get_irq_regs(); | ||
337 | ktime_t now = ktime_get(); | ||
338 | |||
339 | dev->next_event.tv64 = KTIME_MAX; | ||
340 | |||
341 | /* Check, if the jiffies need an update */ | ||
342 | tick_do_update_jiffies64(now); | ||
343 | |||
344 | /* | ||
345 | * When we are idle and the tick is stopped, we have to touch | ||
346 | * the watchdog as we might not schedule for a really long | ||
347 | * time. This happens on complete idle SMP systems while | ||
348 | * waiting on the login prompt. We also increment the "start | ||
349 | * of idle" jiffy stamp so the idle accounting adjustment we | ||
350 | * do when we go busy again does not account too much ticks. | ||
351 | */ | ||
352 | if (ts->tick_stopped) { | ||
353 | touch_softlockup_watchdog(); | ||
354 | ts->idle_jiffies++; | ||
355 | } | ||
356 | |||
357 | update_process_times(user_mode(regs)); | ||
358 | profile_tick(CPU_PROFILING); | ||
359 | |||
360 | /* Do not restart, when we are in the idle loop */ | ||
361 | if (ts->tick_stopped) | ||
362 | return; | ||
363 | |||
364 | while (tick_nohz_reprogram(ts, now)) { | ||
365 | now = ktime_get(); | ||
366 | tick_do_update_jiffies64(now); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | /** | ||
371 | * tick_nohz_switch_to_nohz - switch to nohz mode | ||
372 | */ | ||
373 | static void tick_nohz_switch_to_nohz(void) | ||
374 | { | ||
375 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
376 | ktime_t next; | ||
377 | |||
378 | if (!tick_nohz_enabled) | ||
379 | return; | ||
380 | |||
381 | local_irq_disable(); | ||
382 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | ||
383 | local_irq_enable(); | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | ts->nohz_mode = NOHZ_MODE_LOWRES; | ||
388 | |||
389 | /* | ||
390 | * Recycle the hrtimer in ts, so we can share the | ||
391 | * hrtimer_forward with the highres code. | ||
392 | */ | ||
393 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
394 | /* Get the next period */ | ||
395 | next = tick_init_jiffy_update(); | ||
396 | |||
397 | for (;;) { | ||
398 | ts->sched_timer.expires = next; | ||
399 | if (!tick_program_event(next, 0)) | ||
400 | break; | ||
401 | next = ktime_add(next, tick_period); | ||
402 | } | ||
403 | local_irq_enable(); | ||
404 | |||
405 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | ||
406 | smp_processor_id()); | ||
407 | } | ||
408 | |||
409 | #else | ||
410 | |||
411 | static inline void tick_nohz_switch_to_nohz(void) { } | ||
412 | |||
413 | #endif /* NO_HZ */ | ||
414 | |||
415 | /* | ||
416 | * High resolution timer specific code | ||
417 | */ | ||
418 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
419 | /* | ||
420 | * We rearm the timer until we get disabled by the idle code | ||
421 | * Called with interrupts disabled and timer->base->cpu_base->lock held. | ||
422 | */ | ||
423 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | ||
424 | { | ||
425 | struct tick_sched *ts = | ||
426 | container_of(timer, struct tick_sched, sched_timer); | ||
427 | struct hrtimer_cpu_base *base = timer->base->cpu_base; | ||
428 | struct pt_regs *regs = get_irq_regs(); | ||
429 | ktime_t now = ktime_get(); | ||
430 | |||
431 | /* Check, if the jiffies need an update */ | ||
432 | tick_do_update_jiffies64(now); | ||
433 | |||
434 | /* | ||
435 | * Do not call, when we are not in irq context and have | ||
436 | * no valid regs pointer | ||
437 | */ | ||
438 | if (regs) { | ||
439 | /* | ||
440 | * When we are idle and the tick is stopped, we have to touch | ||
441 | * the watchdog as we might not schedule for a really long | ||
442 | * time. This happens on complete idle SMP systems while | ||
443 | * waiting on the login prompt. We also increment the "start of | ||
444 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
445 | * when we go busy again does not account too much ticks. | ||
446 | */ | ||
447 | if (ts->tick_stopped) { | ||
448 | touch_softlockup_watchdog(); | ||
449 | ts->idle_jiffies++; | ||
450 | } | ||
451 | /* | ||
452 | * update_process_times() might take tasklist_lock, hence | ||
453 | * drop the base lock. sched-tick hrtimers are per-CPU and | ||
454 | * never accessible by userspace APIs, so this is safe to do. | ||
455 | */ | ||
456 | spin_unlock(&base->lock); | ||
457 | update_process_times(user_mode(regs)); | ||
458 | profile_tick(CPU_PROFILING); | ||
459 | spin_lock(&base->lock); | ||
460 | } | ||
461 | |||
462 | /* Do not restart, when we are in the idle loop */ | ||
463 | if (ts->tick_stopped) | ||
464 | return HRTIMER_NORESTART; | ||
465 | |||
466 | hrtimer_forward(timer, now, tick_period); | ||
467 | |||
468 | return HRTIMER_RESTART; | ||
469 | } | ||
470 | |||
471 | /** | ||
472 | * tick_setup_sched_timer - setup the tick emulation timer | ||
473 | */ | ||
474 | void tick_setup_sched_timer(void) | ||
475 | { | ||
476 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
477 | ktime_t now = ktime_get(); | ||
478 | |||
479 | /* | ||
480 | * Emulate tick processing via per-CPU hrtimers: | ||
481 | */ | ||
482 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
483 | ts->sched_timer.function = tick_sched_timer; | ||
484 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
485 | |||
486 | /* Get the next period */ | ||
487 | ts->sched_timer.expires = tick_init_jiffy_update(); | ||
488 | |||
489 | for (;;) { | ||
490 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
491 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | ||
492 | HRTIMER_MODE_ABS); | ||
493 | /* Check, if the timer was already in the past */ | ||
494 | if (hrtimer_active(&ts->sched_timer)) | ||
495 | break; | ||
496 | now = ktime_get(); | ||
497 | } | ||
498 | |||
499 | #ifdef CONFIG_NO_HZ | ||
500 | if (tick_nohz_enabled) | ||
501 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | ||
502 | #endif | ||
503 | } | ||
504 | |||
505 | void tick_cancel_sched_timer(int cpu) | ||
506 | { | ||
507 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
508 | |||
509 | if (ts->sched_timer.base) | ||
510 | hrtimer_cancel(&ts->sched_timer); | ||
511 | ts->tick_stopped = 0; | ||
512 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | ||
513 | } | ||
514 | #endif /* HIGH_RES_TIMERS */ | ||
515 | |||
516 | /** | ||
517 | * Async notification about clocksource changes | ||
518 | */ | ||
519 | void tick_clock_notify(void) | ||
520 | { | ||
521 | int cpu; | ||
522 | |||
523 | for_each_possible_cpu(cpu) | ||
524 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Async notification about clock event changes | ||
529 | */ | ||
530 | void tick_oneshot_notify(void) | ||
531 | { | ||
532 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
533 | |||
534 | set_bit(0, &ts->check_clocks); | ||
535 | } | ||
536 | |||
537 | /** | ||
538 | * Check, if a change happened, which makes oneshot possible. | ||
539 | * | ||
540 | * Called cyclic from the hrtimer softirq (driven by the timer | ||
541 | * softirq) allow_nohz signals, that we can switch into low-res nohz | ||
542 | * mode, because high resolution timers are disabled (either compile | ||
543 | * or runtime). | ||
544 | */ | ||
545 | int tick_check_oneshot_change(int allow_nohz) | ||
546 | { | ||
547 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
548 | |||
549 | if (!test_and_clear_bit(0, &ts->check_clocks)) | ||
550 | return 0; | ||
551 | |||
552 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | ||
553 | return 0; | ||
554 | |||
555 | if (!timekeeping_is_continuous() || !tick_is_oneshot_available()) | ||
556 | return 0; | ||
557 | |||
558 | if (!allow_nohz) | ||
559 | return 1; | ||
560 | |||
561 | tick_nohz_switch_to_nohz(); | ||
562 | return 0; | ||
563 | } | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c new file mode 100644 index 000000000000..f82c635c3d5c --- /dev/null +++ b/kernel/time/timer_list.c | |||
@@ -0,0 +1,287 @@ | |||
1 | /* | ||
2 | * kernel/time/timer_list.c | ||
3 | * | ||
4 | * List pending timers | ||
5 | * | ||
6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/proc_fs.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/spinlock.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/seq_file.h> | ||
18 | #include <linux/kallsyms.h> | ||
19 | #include <linux/tick.h> | ||
20 | |||
21 | #include <asm/uaccess.h> | ||
22 | |||
23 | typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); | ||
24 | |||
25 | DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | ||
26 | |||
27 | /* | ||
28 | * This allows printing both to /proc/timer_list and | ||
29 | * to the console (on SysRq-Q): | ||
30 | */ | ||
31 | #define SEQ_printf(m, x...) \ | ||
32 | do { \ | ||
33 | if (m) \ | ||
34 | seq_printf(m, x); \ | ||
35 | else \ | ||
36 | printk(x); \ | ||
37 | } while (0) | ||
38 | |||
39 | static void print_name_offset(struct seq_file *m, void *sym) | ||
40 | { | ||
41 | unsigned long addr = (unsigned long)sym; | ||
42 | char namebuf[KSYM_NAME_LEN+1]; | ||
43 | unsigned long size, offset; | ||
44 | const char *sym_name; | ||
45 | char *modname; | ||
46 | |||
47 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
48 | if (sym_name) | ||
49 | SEQ_printf(m, "%s", sym_name); | ||
50 | else | ||
51 | SEQ_printf(m, "<%p>", sym); | ||
52 | } | ||
53 | |||
54 | static void | ||
55 | print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | ||
56 | { | ||
57 | #ifdef CONFIG_TIMER_STATS | ||
58 | char tmp[TASK_COMM_LEN + 1]; | ||
59 | #endif | ||
60 | SEQ_printf(m, " #%d: ", idx); | ||
61 | print_name_offset(m, timer); | ||
62 | SEQ_printf(m, ", "); | ||
63 | print_name_offset(m, timer->function); | ||
64 | SEQ_printf(m, ", S:%02lx", timer->state); | ||
65 | #ifdef CONFIG_TIMER_STATS | ||
66 | SEQ_printf(m, ", "); | ||
67 | print_name_offset(m, timer->start_site); | ||
68 | memcpy(tmp, timer->start_comm, TASK_COMM_LEN); | ||
69 | tmp[TASK_COMM_LEN] = 0; | ||
70 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | ||
71 | #endif | ||
72 | SEQ_printf(m, "\n"); | ||
73 | SEQ_printf(m, " # expires at %Ld nsecs [in %Ld nsecs]\n", | ||
74 | (unsigned long long)ktime_to_ns(timer->expires), | ||
75 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | ||
76 | } | ||
77 | |||
78 | static void | ||
79 | print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, | ||
80 | u64 now) | ||
81 | { | ||
82 | struct hrtimer *timer, tmp; | ||
83 | unsigned long next = 0, i; | ||
84 | struct rb_node *curr; | ||
85 | unsigned long flags; | ||
86 | |||
87 | next_one: | ||
88 | i = 0; | ||
89 | spin_lock_irqsave(&base->cpu_base->lock, flags); | ||
90 | |||
91 | curr = base->first; | ||
92 | /* | ||
93 | * Crude but we have to do this O(N*N) thing, because | ||
94 | * we have to unlock the base when printing: | ||
95 | */ | ||
96 | while (curr && i < next) { | ||
97 | curr = rb_next(curr); | ||
98 | i++; | ||
99 | } | ||
100 | |||
101 | if (curr) { | ||
102 | |||
103 | timer = rb_entry(curr, struct hrtimer, node); | ||
104 | tmp = *timer; | ||
105 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
106 | |||
107 | print_timer(m, &tmp, i, now); | ||
108 | next++; | ||
109 | goto next_one; | ||
110 | } | ||
111 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
112 | } | ||
113 | |||
114 | static void | ||
115 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | ||
116 | { | ||
117 | SEQ_printf(m, " .index: %d\n", | ||
118 | base->index); | ||
119 | SEQ_printf(m, " .resolution: %Ld nsecs\n", | ||
120 | (unsigned long long)ktime_to_ns(base->resolution)); | ||
121 | SEQ_printf(m, " .get_time: "); | ||
122 | print_name_offset(m, base->get_time); | ||
123 | SEQ_printf(m, "\n"); | ||
124 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
125 | SEQ_printf(m, " .offset: %Ld nsecs\n", | ||
126 | ktime_to_ns(base->offset)); | ||
127 | #endif | ||
128 | SEQ_printf(m, "active timers:\n"); | ||
129 | print_active_timers(m, base, now); | ||
130 | } | ||
131 | |||
132 | static void print_cpu(struct seq_file *m, int cpu, u64 now) | ||
133 | { | ||
134 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | ||
135 | int i; | ||
136 | |||
137 | SEQ_printf(m, "\ncpu: %d\n", cpu); | ||
138 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
139 | SEQ_printf(m, " clock %d:\n", i); | ||
140 | print_base(m, cpu_base->clock_base + i, now); | ||
141 | } | ||
142 | #define P(x) \ | ||
143 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(cpu_base->x)) | ||
144 | #define P_ns(x) \ | ||
145 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
146 | (u64)(ktime_to_ns(cpu_base->x))) | ||
147 | |||
148 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
149 | P_ns(expires_next); | ||
150 | P(hres_active); | ||
151 | P(nr_events); | ||
152 | #endif | ||
153 | #undef P | ||
154 | #undef P_ns | ||
155 | |||
156 | #ifdef CONFIG_TICK_ONESHOT | ||
157 | # define P(x) \ | ||
158 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(ts->x)) | ||
159 | # define P_ns(x) \ | ||
160 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
161 | (u64)(ktime_to_ns(ts->x))) | ||
162 | { | ||
163 | struct tick_sched *ts = tick_get_tick_sched(cpu); | ||
164 | P(nohz_mode); | ||
165 | P_ns(idle_tick); | ||
166 | P(tick_stopped); | ||
167 | P(idle_jiffies); | ||
168 | P(idle_calls); | ||
169 | P(idle_sleeps); | ||
170 | P_ns(idle_entrytime); | ||
171 | P_ns(idle_sleeptime); | ||
172 | P(last_jiffies); | ||
173 | P(next_jiffies); | ||
174 | P_ns(idle_expires); | ||
175 | SEQ_printf(m, "jiffies: %Ld\n", (u64)jiffies); | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | #undef P | ||
180 | #undef P_ns | ||
181 | } | ||
182 | |||
183 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
184 | static void | ||
185 | print_tickdevice(struct seq_file *m, struct tick_device *td) | ||
186 | { | ||
187 | struct clock_event_device *dev = td->evtdev; | ||
188 | |||
189 | SEQ_printf(m, "\nTick Device: mode: %d\n", td->mode); | ||
190 | |||
191 | SEQ_printf(m, "Clock Event Device: "); | ||
192 | if (!dev) { | ||
193 | SEQ_printf(m, "<NULL>\n"); | ||
194 | return; | ||
195 | } | ||
196 | SEQ_printf(m, "%s\n", dev->name); | ||
197 | SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); | ||
198 | SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); | ||
199 | SEQ_printf(m, " mult: %ld\n", dev->mult); | ||
200 | SEQ_printf(m, " shift: %d\n", dev->shift); | ||
201 | SEQ_printf(m, " mode: %d\n", dev->mode); | ||
202 | SEQ_printf(m, " next_event: %Ld nsecs\n", | ||
203 | (unsigned long long) ktime_to_ns(dev->next_event)); | ||
204 | |||
205 | SEQ_printf(m, " set_next_event: "); | ||
206 | print_name_offset(m, dev->set_next_event); | ||
207 | SEQ_printf(m, "\n"); | ||
208 | |||
209 | SEQ_printf(m, " set_mode: "); | ||
210 | print_name_offset(m, dev->set_mode); | ||
211 | SEQ_printf(m, "\n"); | ||
212 | |||
213 | SEQ_printf(m, " event_handler: "); | ||
214 | print_name_offset(m, dev->event_handler); | ||
215 | SEQ_printf(m, "\n"); | ||
216 | } | ||
217 | |||
218 | static void timer_list_show_tickdevices(struct seq_file *m) | ||
219 | { | ||
220 | int cpu; | ||
221 | |||
222 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
223 | print_tickdevice(m, tick_get_broadcast_device()); | ||
224 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | ||
225 | tick_get_broadcast_mask()->bits[0]); | ||
226 | #ifdef CONFIG_TICK_ONESHOT | ||
227 | SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", | ||
228 | tick_get_broadcast_oneshot_mask()->bits[0]); | ||
229 | #endif | ||
230 | SEQ_printf(m, "\n"); | ||
231 | #endif | ||
232 | for_each_online_cpu(cpu) | ||
233 | print_tickdevice(m, tick_get_device(cpu)); | ||
234 | SEQ_printf(m, "\n"); | ||
235 | } | ||
236 | #else | ||
237 | static void timer_list_show_tickdevices(struct seq_file *m) { } | ||
238 | #endif | ||
239 | |||
240 | static int timer_list_show(struct seq_file *m, void *v) | ||
241 | { | ||
242 | u64 now = ktime_to_ns(ktime_get()); | ||
243 | int cpu; | ||
244 | |||
245 | SEQ_printf(m, "Timer List Version: v0.3\n"); | ||
246 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | ||
247 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | ||
248 | |||
249 | for_each_online_cpu(cpu) | ||
250 | print_cpu(m, cpu, now); | ||
251 | |||
252 | SEQ_printf(m, "\n"); | ||
253 | timer_list_show_tickdevices(m); | ||
254 | |||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | void sysrq_timer_list_show(void) | ||
259 | { | ||
260 | timer_list_show(NULL, NULL); | ||
261 | } | ||
262 | |||
263 | static int timer_list_open(struct inode *inode, struct file *filp) | ||
264 | { | ||
265 | return single_open(filp, timer_list_show, NULL); | ||
266 | } | ||
267 | |||
268 | static struct file_operations timer_list_fops = { | ||
269 | .open = timer_list_open, | ||
270 | .read = seq_read, | ||
271 | .llseek = seq_lseek, | ||
272 | .release = seq_release, | ||
273 | }; | ||
274 | |||
275 | static int __init init_timer_list_procfs(void) | ||
276 | { | ||
277 | struct proc_dir_entry *pe; | ||
278 | |||
279 | pe = create_proc_entry("timer_list", 0644, NULL); | ||
280 | if (!pe) | ||
281 | return -ENOMEM; | ||
282 | |||
283 | pe->proc_fops = &timer_list_fops; | ||
284 | |||
285 | return 0; | ||
286 | } | ||
287 | __initcall(init_timer_list_procfs); | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c new file mode 100644 index 000000000000..1bc4882e28e0 --- /dev/null +++ b/kernel/time/timer_stats.c | |||
@@ -0,0 +1,411 @@ | |||
1 | /* | ||
2 | * kernel/time/timer_stats.c | ||
3 | * | ||
4 | * Collect timer usage statistics. | ||
5 | * | ||
6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
7 | * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * timer_stats is based on timer_top, a similar functionality which was part of | ||
10 | * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the | ||
11 | * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based | ||
12 | * on dynamic allocation of the statistics entries and linear search based | ||
13 | * lookup combined with a global lock, rather than the static array, hash | ||
14 | * and per-CPU locking which is used by timer_stats. It was written for the | ||
15 | * pre hrtimer kernel code and therefore did not take hrtimers into account. | ||
16 | * Nevertheless it provided the base for the timer_stats implementation and | ||
17 | * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks | ||
18 | * for this effort. | ||
19 | * | ||
20 | * timer_top.c is | ||
21 | * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus | ||
22 | * Written by Daniel Petrini <d.pensator@gmail.com> | ||
23 | * timer_top.c was released under the GNU General Public License version 2 | ||
24 | * | ||
25 | * We export the addresses and counting of timer functions being called, | ||
26 | * the pid and cmdline from the owner process if applicable. | ||
27 | * | ||
28 | * Start/stop data collection: | ||
29 | * # echo 1[0] >/proc/timer_stats | ||
30 | * | ||
31 | * Display the information collected so far: | ||
32 | * # cat /proc/timer_stats | ||
33 | * | ||
34 | * This program is free software; you can redistribute it and/or modify | ||
35 | * it under the terms of the GNU General Public License version 2 as | ||
36 | * published by the Free Software Foundation. | ||
37 | */ | ||
38 | |||
39 | #include <linux/proc_fs.h> | ||
40 | #include <linux/module.h> | ||
41 | #include <linux/spinlock.h> | ||
42 | #include <linux/sched.h> | ||
43 | #include <linux/seq_file.h> | ||
44 | #include <linux/kallsyms.h> | ||
45 | |||
46 | #include <asm/uaccess.h> | ||
47 | |||
48 | /* | ||
49 | * This is our basic unit of interest: a timer expiry event identified | ||
50 | * by the timer, its start/expire functions and the PID of the task that | ||
51 | * started the timer. We count the number of times an event happens: | ||
52 | */ | ||
53 | struct entry { | ||
54 | /* | ||
55 | * Hash list: | ||
56 | */ | ||
57 | struct entry *next; | ||
58 | |||
59 | /* | ||
60 | * Hash keys: | ||
61 | */ | ||
62 | void *timer; | ||
63 | void *start_func; | ||
64 | void *expire_func; | ||
65 | pid_t pid; | ||
66 | |||
67 | /* | ||
68 | * Number of timeout events: | ||
69 | */ | ||
70 | unsigned long count; | ||
71 | |||
72 | /* | ||
73 | * We save the command-line string to preserve | ||
74 | * this information past task exit: | ||
75 | */ | ||
76 | char comm[TASK_COMM_LEN + 1]; | ||
77 | |||
78 | } ____cacheline_aligned_in_smp; | ||
79 | |||
80 | /* | ||
81 | * Spinlock protecting the tables - not taken during lookup: | ||
82 | */ | ||
83 | static DEFINE_SPINLOCK(table_lock); | ||
84 | |||
85 | /* | ||
86 | * Per-CPU lookup locks for fast hash lookup: | ||
87 | */ | ||
88 | static DEFINE_PER_CPU(spinlock_t, lookup_lock); | ||
89 | |||
90 | /* | ||
91 | * Mutex to serialize state changes with show-stats activities: | ||
92 | */ | ||
93 | static DEFINE_MUTEX(show_mutex); | ||
94 | |||
95 | /* | ||
96 | * Collection status, active/inactive: | ||
97 | */ | ||
98 | static int __read_mostly active; | ||
99 | |||
100 | /* | ||
101 | * Beginning/end timestamps of measurement: | ||
102 | */ | ||
103 | static ktime_t time_start, time_stop; | ||
104 | |||
105 | /* | ||
106 | * tstat entry structs only get allocated while collection is | ||
107 | * active and never freed during that time - this simplifies | ||
108 | * things quite a bit. | ||
109 | * | ||
110 | * They get freed when a new collection period is started. | ||
111 | */ | ||
112 | #define MAX_ENTRIES_BITS 10 | ||
113 | #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) | ||
114 | |||
115 | static unsigned long nr_entries; | ||
116 | static struct entry entries[MAX_ENTRIES]; | ||
117 | |||
118 | static atomic_t overflow_count; | ||
119 | |||
120 | static void reset_entries(void) | ||
121 | { | ||
122 | nr_entries = 0; | ||
123 | memset(entries, 0, sizeof(entries)); | ||
124 | atomic_set(&overflow_count, 0); | ||
125 | } | ||
126 | |||
127 | static struct entry *alloc_entry(void) | ||
128 | { | ||
129 | if (nr_entries >= MAX_ENTRIES) | ||
130 | return NULL; | ||
131 | |||
132 | return entries + nr_entries++; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * The entries are in a hash-table, for fast lookup: | ||
137 | */ | ||
138 | #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) | ||
139 | #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) | ||
140 | #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) | ||
141 | |||
142 | #define __tstat_hashfn(entry) \ | ||
143 | (((unsigned long)(entry)->timer ^ \ | ||
144 | (unsigned long)(entry)->start_func ^ \ | ||
145 | (unsigned long)(entry)->expire_func ^ \ | ||
146 | (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) | ||
147 | |||
148 | #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) | ||
149 | |||
150 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | ||
151 | |||
152 | static int match_entries(struct entry *entry1, struct entry *entry2) | ||
153 | { | ||
154 | return entry1->timer == entry2->timer && | ||
155 | entry1->start_func == entry2->start_func && | ||
156 | entry1->expire_func == entry2->expire_func && | ||
157 | entry1->pid == entry2->pid; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Look up whether an entry matching this item is present | ||
162 | * in the hash already. Must be called with irqs off and the | ||
163 | * lookup lock held: | ||
164 | */ | ||
165 | static struct entry *tstat_lookup(struct entry *entry, char *comm) | ||
166 | { | ||
167 | struct entry **head, *curr, *prev; | ||
168 | |||
169 | head = tstat_hashentry(entry); | ||
170 | curr = *head; | ||
171 | |||
172 | /* | ||
173 | * The fastpath is when the entry is already hashed, | ||
174 | * we do this with the lookup lock held, but with the | ||
175 | * table lock not held: | ||
176 | */ | ||
177 | while (curr) { | ||
178 | if (match_entries(curr, entry)) | ||
179 | return curr; | ||
180 | |||
181 | curr = curr->next; | ||
182 | } | ||
183 | /* | ||
184 | * Slowpath: allocate, set up and link a new hash entry: | ||
185 | */ | ||
186 | prev = NULL; | ||
187 | curr = *head; | ||
188 | |||
189 | spin_lock(&table_lock); | ||
190 | /* | ||
191 | * Make sure we have not raced with another CPU: | ||
192 | */ | ||
193 | while (curr) { | ||
194 | if (match_entries(curr, entry)) | ||
195 | goto out_unlock; | ||
196 | |||
197 | prev = curr; | ||
198 | curr = curr->next; | ||
199 | } | ||
200 | |||
201 | curr = alloc_entry(); | ||
202 | if (curr) { | ||
203 | *curr = *entry; | ||
204 | curr->count = 0; | ||
205 | memcpy(curr->comm, comm, TASK_COMM_LEN); | ||
206 | if (prev) | ||
207 | prev->next = curr; | ||
208 | else | ||
209 | *head = curr; | ||
210 | curr->next = NULL; | ||
211 | } | ||
212 | out_unlock: | ||
213 | spin_unlock(&table_lock); | ||
214 | |||
215 | return curr; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * timer_stats_update_stats - Update the statistics for a timer. | ||
220 | * @timer: pointer to either a timer_list or a hrtimer | ||
221 | * @pid: the pid of the task which set up the timer | ||
222 | * @startf: pointer to the function which did the timer setup | ||
223 | * @timerf: pointer to the timer callback function of the timer | ||
224 | * @comm: name of the process which set up the timer | ||
225 | * | ||
226 | * When the timer is already registered, then the event counter is | ||
227 | * incremented. Otherwise the timer is registered in a free slot. | ||
228 | */ | ||
229 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | ||
230 | void *timerf, char * comm) | ||
231 | { | ||
232 | /* | ||
233 | * It doesnt matter which lock we take: | ||
234 | */ | ||
235 | spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | ||
236 | struct entry *entry, input; | ||
237 | unsigned long flags; | ||
238 | |||
239 | input.timer = timer; | ||
240 | input.start_func = startf; | ||
241 | input.expire_func = timerf; | ||
242 | input.pid = pid; | ||
243 | |||
244 | spin_lock_irqsave(lock, flags); | ||
245 | if (!active) | ||
246 | goto out_unlock; | ||
247 | |||
248 | entry = tstat_lookup(&input, comm); | ||
249 | if (likely(entry)) | ||
250 | entry->count++; | ||
251 | else | ||
252 | atomic_inc(&overflow_count); | ||
253 | |||
254 | out_unlock: | ||
255 | spin_unlock_irqrestore(lock, flags); | ||
256 | } | ||
257 | |||
258 | static void print_name_offset(struct seq_file *m, unsigned long addr) | ||
259 | { | ||
260 | char namebuf[KSYM_NAME_LEN+1]; | ||
261 | unsigned long size, offset; | ||
262 | const char *sym_name; | ||
263 | char *modname; | ||
264 | |||
265 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
266 | if (sym_name) | ||
267 | seq_printf(m, "%s", sym_name); | ||
268 | else | ||
269 | seq_printf(m, "<%p>", (void *)addr); | ||
270 | } | ||
271 | |||
272 | static int tstats_show(struct seq_file *m, void *v) | ||
273 | { | ||
274 | struct timespec period; | ||
275 | struct entry *entry; | ||
276 | unsigned long ms; | ||
277 | long events = 0; | ||
278 | ktime_t time; | ||
279 | int i; | ||
280 | |||
281 | mutex_lock(&show_mutex); | ||
282 | /* | ||
283 | * If still active then calculate up to now: | ||
284 | */ | ||
285 | if (active) | ||
286 | time_stop = ktime_get(); | ||
287 | |||
288 | time = ktime_sub(time_stop, time_start); | ||
289 | |||
290 | period = ktime_to_timespec(time); | ||
291 | ms = period.tv_nsec / 1000000; | ||
292 | |||
293 | seq_puts(m, "Timer Stats Version: v0.1\n"); | ||
294 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | ||
295 | if (atomic_read(&overflow_count)) | ||
296 | seq_printf(m, "Overflow: %d entries\n", | ||
297 | atomic_read(&overflow_count)); | ||
298 | |||
299 | for (i = 0; i < nr_entries; i++) { | ||
300 | entry = entries + i; | ||
301 | seq_printf(m, "%4lu, %5d %-16s ", | ||
302 | entry->count, entry->pid, entry->comm); | ||
303 | |||
304 | print_name_offset(m, (unsigned long)entry->start_func); | ||
305 | seq_puts(m, " ("); | ||
306 | print_name_offset(m, (unsigned long)entry->expire_func); | ||
307 | seq_puts(m, ")\n"); | ||
308 | |||
309 | events += entry->count; | ||
310 | } | ||
311 | |||
312 | ms += period.tv_sec * 1000; | ||
313 | if (!ms) | ||
314 | ms = 1; | ||
315 | |||
316 | if (events && period.tv_sec) | ||
317 | seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | ||
318 | events / period.tv_sec, events * 1000 / ms); | ||
319 | else | ||
320 | seq_printf(m, "%ld total events\n", events); | ||
321 | |||
322 | mutex_unlock(&show_mutex); | ||
323 | |||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * After a state change, make sure all concurrent lookup/update | ||
329 | * activities have stopped: | ||
330 | */ | ||
331 | static void sync_access(void) | ||
332 | { | ||
333 | unsigned long flags; | ||
334 | int cpu; | ||
335 | |||
336 | for_each_online_cpu(cpu) { | ||
337 | spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | ||
338 | /* nothing */ | ||
339 | spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | ||
340 | } | ||
341 | } | ||
342 | |||
343 | static ssize_t tstats_write(struct file *file, const char __user *buf, | ||
344 | size_t count, loff_t *offs) | ||
345 | { | ||
346 | char ctl[2]; | ||
347 | |||
348 | if (count != 2 || *offs) | ||
349 | return -EINVAL; | ||
350 | |||
351 | if (copy_from_user(ctl, buf, count)) | ||
352 | return -EFAULT; | ||
353 | |||
354 | mutex_lock(&show_mutex); | ||
355 | switch (ctl[0]) { | ||
356 | case '0': | ||
357 | if (active) { | ||
358 | active = 0; | ||
359 | time_stop = ktime_get(); | ||
360 | sync_access(); | ||
361 | } | ||
362 | break; | ||
363 | case '1': | ||
364 | if (!active) { | ||
365 | reset_entries(); | ||
366 | time_start = ktime_get(); | ||
367 | active = 1; | ||
368 | } | ||
369 | break; | ||
370 | default: | ||
371 | count = -EINVAL; | ||
372 | } | ||
373 | mutex_unlock(&show_mutex); | ||
374 | |||
375 | return count; | ||
376 | } | ||
377 | |||
378 | static int tstats_open(struct inode *inode, struct file *filp) | ||
379 | { | ||
380 | return single_open(filp, tstats_show, NULL); | ||
381 | } | ||
382 | |||
383 | static struct file_operations tstats_fops = { | ||
384 | .open = tstats_open, | ||
385 | .read = seq_read, | ||
386 | .write = tstats_write, | ||
387 | .llseek = seq_lseek, | ||
388 | .release = seq_release, | ||
389 | }; | ||
390 | |||
391 | void __init init_timer_stats(void) | ||
392 | { | ||
393 | int cpu; | ||
394 | |||
395 | for_each_possible_cpu(cpu) | ||
396 | spin_lock_init(&per_cpu(lookup_lock, cpu)); | ||
397 | } | ||
398 | |||
399 | static int __init init_tstats_procfs(void) | ||
400 | { | ||
401 | struct proc_dir_entry *pe; | ||
402 | |||
403 | pe = create_proc_entry("timer_stats", 0644, NULL); | ||
404 | if (!pe) | ||
405 | return -ENOMEM; | ||
406 | |||
407 | pe->proc_fops = &tstats_fops; | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | __initcall(init_tstats_procfs); | ||