diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 824 |
1 files changed, 682 insertions, 142 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f44e499e8fca..476cb0c0b4a4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * linux/kernel/hrtimer.c | 2 | * linux/kernel/hrtimer.c |
3 | * | 3 | * |
4 | * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
6 | * | 7 | * |
7 | * High-resolution kernel timers | 8 | * High-resolution kernel timers |
8 | * | 9 | * |
@@ -31,12 +32,17 @@ | |||
31 | */ | 32 | */ |
32 | 33 | ||
33 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/irq.h> | ||
34 | #include <linux/module.h> | 36 | #include <linux/module.h> |
35 | #include <linux/percpu.h> | 37 | #include <linux/percpu.h> |
36 | #include <linux/hrtimer.h> | 38 | #include <linux/hrtimer.h> |
37 | #include <linux/notifier.h> | 39 | #include <linux/notifier.h> |
38 | #include <linux/syscalls.h> | 40 | #include <linux/syscalls.h> |
41 | #include <linux/kallsyms.h> | ||
39 | #include <linux/interrupt.h> | 42 | #include <linux/interrupt.h> |
43 | #include <linux/tick.h> | ||
44 | #include <linux/seq_file.h> | ||
45 | #include <linux/err.h> | ||
40 | 46 | ||
41 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
42 | 48 | ||
@@ -45,7 +51,7 @@ | |||
45 | * | 51 | * |
46 | * returns the time in ktime_t format | 52 | * returns the time in ktime_t format |
47 | */ | 53 | */ |
48 | static ktime_t ktime_get(void) | 54 | ktime_t ktime_get(void) |
49 | { | 55 | { |
50 | struct timespec now; | 56 | struct timespec now; |
51 | 57 | ||
@@ -59,7 +65,7 @@ static ktime_t ktime_get(void) | |||
59 | * | 65 | * |
60 | * returns the time in ktime_t format | 66 | * returns the time in ktime_t format |
61 | */ | 67 | */ |
62 | static ktime_t ktime_get_real(void) | 68 | ktime_t ktime_get_real(void) |
63 | { | 69 | { |
64 | struct timespec now; | 70 | struct timespec now; |
65 | 71 | ||
@@ -79,21 +85,22 @@ EXPORT_SYMBOL_GPL(ktime_get_real); | |||
79 | * This ensures that we capture erroneous accesses to these clock ids | 85 | * This ensures that we capture erroneous accesses to these clock ids |
80 | * rather than moving them into the range of valid clock id's. | 86 | * rather than moving them into the range of valid clock id's. |
81 | */ | 87 | */ |
82 | 88 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | |
83 | #define MAX_HRTIMER_BASES 2 | ||
84 | |||
85 | static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = | ||
86 | { | 89 | { |
90 | |||
91 | .clock_base = | ||
87 | { | 92 | { |
88 | .index = CLOCK_REALTIME, | 93 | { |
89 | .get_time = &ktime_get_real, | 94 | .index = CLOCK_REALTIME, |
90 | .resolution = KTIME_REALTIME_RES, | 95 | .get_time = &ktime_get_real, |
91 | }, | 96 | .resolution = KTIME_LOW_RES, |
92 | { | 97 | }, |
93 | .index = CLOCK_MONOTONIC, | 98 | { |
94 | .get_time = &ktime_get, | 99 | .index = CLOCK_MONOTONIC, |
95 | .resolution = KTIME_MONOTONIC_RES, | 100 | .get_time = &ktime_get, |
96 | }, | 101 | .resolution = KTIME_LOW_RES, |
102 | }, | ||
103 | } | ||
97 | }; | 104 | }; |
98 | 105 | ||
99 | /** | 106 | /** |
@@ -125,20 +132,35 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); | |||
125 | * Get the coarse grained time at the softirq based on xtime and | 132 | * Get the coarse grained time at the softirq based on xtime and |
126 | * wall_to_monotonic. | 133 | * wall_to_monotonic. |
127 | */ | 134 | */ |
128 | static void hrtimer_get_softirq_time(struct hrtimer_base *base) | 135 | static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) |
129 | { | 136 | { |
130 | ktime_t xtim, tomono; | 137 | ktime_t xtim, tomono; |
138 | struct timespec xts; | ||
131 | unsigned long seq; | 139 | unsigned long seq; |
132 | 140 | ||
133 | do { | 141 | do { |
134 | seq = read_seqbegin(&xtime_lock); | 142 | seq = read_seqbegin(&xtime_lock); |
135 | xtim = timespec_to_ktime(xtime); | 143 | #ifdef CONFIG_NO_HZ |
136 | tomono = timespec_to_ktime(wall_to_monotonic); | 144 | getnstimeofday(&xts); |
137 | 145 | #else | |
146 | xts = xtime; | ||
147 | #endif | ||
138 | } while (read_seqretry(&xtime_lock, seq)); | 148 | } while (read_seqretry(&xtime_lock, seq)); |
139 | 149 | ||
140 | base[CLOCK_REALTIME].softirq_time = xtim; | 150 | xtim = timespec_to_ktime(xts); |
141 | base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); | 151 | tomono = timespec_to_ktime(wall_to_monotonic); |
152 | base->clock_base[CLOCK_REALTIME].softirq_time = xtim; | ||
153 | base->clock_base[CLOCK_MONOTONIC].softirq_time = | ||
154 | ktime_add(xtim, tomono); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Helper function to check, whether the timer is running the callback | ||
159 | * function | ||
160 | */ | ||
161 | static inline int hrtimer_callback_running(struct hrtimer *timer) | ||
162 | { | ||
163 | return timer->state & HRTIMER_STATE_CALLBACK; | ||
142 | } | 164 | } |
143 | 165 | ||
144 | /* | 166 | /* |
@@ -147,8 +169,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
147 | */ | 169 | */ |
148 | #ifdef CONFIG_SMP | 170 | #ifdef CONFIG_SMP |
149 | 171 | ||
150 | #define set_curr_timer(b, t) do { (b)->curr_timer = (t); } while (0) | ||
151 | |||
152 | /* | 172 | /* |
153 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 173 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
154 | * means that all timers which are tied to this base via timer->base are | 174 | * means that all timers which are tied to this base via timer->base are |
@@ -161,19 +181,20 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
161 | * possible to set timer->base = NULL and drop the lock: the timer remains | 181 | * possible to set timer->base = NULL and drop the lock: the timer remains |
162 | * locked. | 182 | * locked. |
163 | */ | 183 | */ |
164 | static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | 184 | static |
165 | unsigned long *flags) | 185 | struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, |
186 | unsigned long *flags) | ||
166 | { | 187 | { |
167 | struct hrtimer_base *base; | 188 | struct hrtimer_clock_base *base; |
168 | 189 | ||
169 | for (;;) { | 190 | for (;;) { |
170 | base = timer->base; | 191 | base = timer->base; |
171 | if (likely(base != NULL)) { | 192 | if (likely(base != NULL)) { |
172 | spin_lock_irqsave(&base->lock, *flags); | 193 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
173 | if (likely(base == timer->base)) | 194 | if (likely(base == timer->base)) |
174 | return base; | 195 | return base; |
175 | /* The timer has migrated to another CPU: */ | 196 | /* The timer has migrated to another CPU: */ |
176 | spin_unlock_irqrestore(&base->lock, *flags); | 197 | spin_unlock_irqrestore(&base->cpu_base->lock, *flags); |
177 | } | 198 | } |
178 | cpu_relax(); | 199 | cpu_relax(); |
179 | } | 200 | } |
@@ -182,12 +203,14 @@ static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
182 | /* | 203 | /* |
183 | * Switch the timer base to the current CPU when possible. | 204 | * Switch the timer base to the current CPU when possible. |
184 | */ | 205 | */ |
185 | static inline struct hrtimer_base * | 206 | static inline struct hrtimer_clock_base * |
186 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | 207 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) |
187 | { | 208 | { |
188 | struct hrtimer_base *new_base; | 209 | struct hrtimer_clock_base *new_base; |
210 | struct hrtimer_cpu_base *new_cpu_base; | ||
189 | 211 | ||
190 | new_base = &__get_cpu_var(hrtimer_bases)[base->index]; | 212 | new_cpu_base = &__get_cpu_var(hrtimer_bases); |
213 | new_base = &new_cpu_base->clock_base[base->index]; | ||
191 | 214 | ||
192 | if (base != new_base) { | 215 | if (base != new_base) { |
193 | /* | 216 | /* |
@@ -199,13 +222,13 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
199 | * completed. There is no conflict as we hold the lock until | 222 | * completed. There is no conflict as we hold the lock until |
200 | * the timer is enqueued. | 223 | * the timer is enqueued. |
201 | */ | 224 | */ |
202 | if (unlikely(base->curr_timer == timer)) | 225 | if (unlikely(hrtimer_callback_running(timer))) |
203 | return base; | 226 | return base; |
204 | 227 | ||
205 | /* See the comment in lock_timer_base() */ | 228 | /* See the comment in lock_timer_base() */ |
206 | timer->base = NULL; | 229 | timer->base = NULL; |
207 | spin_unlock(&base->lock); | 230 | spin_unlock(&base->cpu_base->lock); |
208 | spin_lock(&new_base->lock); | 231 | spin_lock(&new_base->cpu_base->lock); |
209 | timer->base = new_base; | 232 | timer->base = new_base; |
210 | } | 233 | } |
211 | return new_base; | 234 | return new_base; |
@@ -213,19 +236,17 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
213 | 236 | ||
214 | #else /* CONFIG_SMP */ | 237 | #else /* CONFIG_SMP */ |
215 | 238 | ||
216 | #define set_curr_timer(b, t) do { } while (0) | 239 | static inline struct hrtimer_clock_base * |
217 | |||
218 | static inline struct hrtimer_base * | ||
219 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 240 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
220 | { | 241 | { |
221 | struct hrtimer_base *base = timer->base; | 242 | struct hrtimer_clock_base *base = timer->base; |
222 | 243 | ||
223 | spin_lock_irqsave(&base->lock, *flags); | 244 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
224 | 245 | ||
225 | return base; | 246 | return base; |
226 | } | 247 | } |
227 | 248 | ||
228 | #define switch_hrtimer_base(t, b) (b) | 249 | # define switch_hrtimer_base(t, b) (b) |
229 | 250 | ||
230 | #endif /* !CONFIG_SMP */ | 251 | #endif /* !CONFIG_SMP */ |
231 | 252 | ||
@@ -256,15 +277,12 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) | |||
256 | 277 | ||
257 | return ktime_add(kt, tmp); | 278 | return ktime_add(kt, tmp); |
258 | } | 279 | } |
259 | |||
260 | #else /* CONFIG_KTIME_SCALAR */ | ||
261 | |||
262 | # endif /* !CONFIG_KTIME_SCALAR */ | 280 | # endif /* !CONFIG_KTIME_SCALAR */ |
263 | 281 | ||
264 | /* | 282 | /* |
265 | * Divide a ktime value by a nanosecond value | 283 | * Divide a ktime value by a nanosecond value |
266 | */ | 284 | */ |
267 | static unsigned long ktime_divns(const ktime_t kt, s64 div) | 285 | unsigned long ktime_divns(const ktime_t kt, s64 div) |
268 | { | 286 | { |
269 | u64 dclc, inc, dns; | 287 | u64 dclc, inc, dns; |
270 | int sft = 0; | 288 | int sft = 0; |
@@ -281,18 +299,311 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div) | |||
281 | 299 | ||
282 | return (unsigned long) dclc; | 300 | return (unsigned long) dclc; |
283 | } | 301 | } |
284 | |||
285 | #else /* BITS_PER_LONG < 64 */ | ||
286 | # define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) | ||
287 | #endif /* BITS_PER_LONG >= 64 */ | 302 | #endif /* BITS_PER_LONG >= 64 */ |
288 | 303 | ||
304 | /* High resolution timer related functions */ | ||
305 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
306 | |||
307 | /* | ||
308 | * High resolution timer enabled ? | ||
309 | */ | ||
310 | static int hrtimer_hres_enabled __read_mostly = 1; | ||
311 | |||
312 | /* | ||
313 | * Enable / Disable high resolution mode | ||
314 | */ | ||
315 | static int __init setup_hrtimer_hres(char *str) | ||
316 | { | ||
317 | if (!strcmp(str, "off")) | ||
318 | hrtimer_hres_enabled = 0; | ||
319 | else if (!strcmp(str, "on")) | ||
320 | hrtimer_hres_enabled = 1; | ||
321 | else | ||
322 | return 0; | ||
323 | return 1; | ||
324 | } | ||
325 | |||
326 | __setup("highres=", setup_hrtimer_hres); | ||
327 | |||
328 | /* | ||
329 | * hrtimer_high_res_enabled - query, if the highres mode is enabled | ||
330 | */ | ||
331 | static inline int hrtimer_is_hres_enabled(void) | ||
332 | { | ||
333 | return hrtimer_hres_enabled; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Is the high resolution mode active ? | ||
338 | */ | ||
339 | static inline int hrtimer_hres_active(void) | ||
340 | { | ||
341 | return __get_cpu_var(hrtimer_bases).hres_active; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * Reprogram the event source with checking both queues for the | ||
346 | * next event | ||
347 | * Called with interrupts disabled and base->lock held | ||
348 | */ | ||
349 | static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) | ||
350 | { | ||
351 | int i; | ||
352 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
353 | ktime_t expires; | ||
354 | |||
355 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
356 | |||
357 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | ||
358 | struct hrtimer *timer; | ||
359 | |||
360 | if (!base->first) | ||
361 | continue; | ||
362 | timer = rb_entry(base->first, struct hrtimer, node); | ||
363 | expires = ktime_sub(timer->expires, base->offset); | ||
364 | if (expires.tv64 < cpu_base->expires_next.tv64) | ||
365 | cpu_base->expires_next = expires; | ||
366 | } | ||
367 | |||
368 | if (cpu_base->expires_next.tv64 != KTIME_MAX) | ||
369 | tick_program_event(cpu_base->expires_next, 1); | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Shared reprogramming for clock_realtime and clock_monotonic | ||
374 | * | ||
375 | * When a timer is enqueued and expires earlier than the already enqueued | ||
376 | * timers, we have to check, whether it expires earlier than the timer for | ||
377 | * which the clock event device was armed. | ||
378 | * | ||
379 | * Called with interrupts disabled and base->cpu_base.lock held | ||
380 | */ | ||
381 | static int hrtimer_reprogram(struct hrtimer *timer, | ||
382 | struct hrtimer_clock_base *base) | ||
383 | { | ||
384 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | ||
385 | ktime_t expires = ktime_sub(timer->expires, base->offset); | ||
386 | int res; | ||
387 | |||
388 | /* | ||
389 | * When the callback is running, we do not reprogram the clock event | ||
390 | * device. The timer callback is either running on a different CPU or | ||
391 | * the callback is executed in the hrtimer_interupt context. The | ||
392 | * reprogramming is handled either by the softirq, which called the | ||
393 | * callback or at the end of the hrtimer_interrupt. | ||
394 | */ | ||
395 | if (hrtimer_callback_running(timer)) | ||
396 | return 0; | ||
397 | |||
398 | if (expires.tv64 >= expires_next->tv64) | ||
399 | return 0; | ||
400 | |||
401 | /* | ||
402 | * Clockevents returns -ETIME, when the event was in the past. | ||
403 | */ | ||
404 | res = tick_program_event(expires, 0); | ||
405 | if (!IS_ERR_VALUE(res)) | ||
406 | *expires_next = expires; | ||
407 | return res; | ||
408 | } | ||
409 | |||
410 | |||
411 | /* | ||
412 | * Retrigger next event is called after clock was set | ||
413 | * | ||
414 | * Called with interrupts disabled via on_each_cpu() | ||
415 | */ | ||
416 | static void retrigger_next_event(void *arg) | ||
417 | { | ||
418 | struct hrtimer_cpu_base *base; | ||
419 | struct timespec realtime_offset; | ||
420 | unsigned long seq; | ||
421 | |||
422 | if (!hrtimer_hres_active()) | ||
423 | return; | ||
424 | |||
425 | do { | ||
426 | seq = read_seqbegin(&xtime_lock); | ||
427 | set_normalized_timespec(&realtime_offset, | ||
428 | -wall_to_monotonic.tv_sec, | ||
429 | -wall_to_monotonic.tv_nsec); | ||
430 | } while (read_seqretry(&xtime_lock, seq)); | ||
431 | |||
432 | base = &__get_cpu_var(hrtimer_bases); | ||
433 | |||
434 | /* Adjust CLOCK_REALTIME offset */ | ||
435 | spin_lock(&base->lock); | ||
436 | base->clock_base[CLOCK_REALTIME].offset = | ||
437 | timespec_to_ktime(realtime_offset); | ||
438 | |||
439 | hrtimer_force_reprogram(base); | ||
440 | spin_unlock(&base->lock); | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * Clock realtime was set | ||
445 | * | ||
446 | * Change the offset of the realtime clock vs. the monotonic | ||
447 | * clock. | ||
448 | * | ||
449 | * We might have to reprogram the high resolution timer interrupt. On | ||
450 | * SMP we call the architecture specific code to retrigger _all_ high | ||
451 | * resolution timer interrupts. On UP we just disable interrupts and | ||
452 | * call the high resolution interrupt code. | ||
453 | */ | ||
454 | void clock_was_set(void) | ||
455 | { | ||
456 | /* Retrigger the CPU local events everywhere */ | ||
457 | on_each_cpu(retrigger_next_event, NULL, 0, 1); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Check, whether the timer is on the callback pending list | ||
462 | */ | ||
463 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | ||
464 | { | ||
465 | return timer->state & HRTIMER_STATE_PENDING; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * Remove a timer from the callback pending list | ||
470 | */ | ||
471 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) | ||
472 | { | ||
473 | list_del_init(&timer->cb_entry); | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Initialize the high resolution related parts of cpu_base | ||
478 | */ | ||
479 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | ||
480 | { | ||
481 | base->expires_next.tv64 = KTIME_MAX; | ||
482 | base->hres_active = 0; | ||
483 | INIT_LIST_HEAD(&base->cb_pending); | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Initialize the high resolution related parts of a hrtimer | ||
488 | */ | ||
489 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | ||
490 | { | ||
491 | INIT_LIST_HEAD(&timer->cb_entry); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * When High resolution timers are active, try to reprogram. Note, that in case | ||
496 | * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry | ||
497 | * check happens. The timer gets enqueued into the rbtree. The reprogramming | ||
498 | * and expiry check is done in the hrtimer_interrupt or in the softirq. | ||
499 | */ | ||
500 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
501 | struct hrtimer_clock_base *base) | ||
502 | { | ||
503 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | ||
504 | |||
505 | /* Timer is expired, act upon the callback mode */ | ||
506 | switch(timer->cb_mode) { | ||
507 | case HRTIMER_CB_IRQSAFE_NO_RESTART: | ||
508 | /* | ||
509 | * We can call the callback from here. No restart | ||
510 | * happens, so no danger of recursion | ||
511 | */ | ||
512 | BUG_ON(timer->function(timer) != HRTIMER_NORESTART); | ||
513 | return 1; | ||
514 | case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: | ||
515 | /* | ||
516 | * This is solely for the sched tick emulation with | ||
517 | * dynamic tick support to ensure that we do not | ||
518 | * restart the tick right on the edge and end up with | ||
519 | * the tick timer in the softirq ! The calling site | ||
520 | * takes care of this. | ||
521 | */ | ||
522 | return 1; | ||
523 | case HRTIMER_CB_IRQSAFE: | ||
524 | case HRTIMER_CB_SOFTIRQ: | ||
525 | /* | ||
526 | * Move everything else into the softirq pending list ! | ||
527 | */ | ||
528 | list_add_tail(&timer->cb_entry, | ||
529 | &base->cpu_base->cb_pending); | ||
530 | timer->state = HRTIMER_STATE_PENDING; | ||
531 | raise_softirq(HRTIMER_SOFTIRQ); | ||
532 | return 1; | ||
533 | default: | ||
534 | BUG(); | ||
535 | } | ||
536 | } | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * Switch to high resolution mode | ||
542 | */ | ||
543 | static void hrtimer_switch_to_hres(void) | ||
544 | { | ||
545 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
546 | unsigned long flags; | ||
547 | |||
548 | if (base->hres_active) | ||
549 | return; | ||
550 | |||
551 | local_irq_save(flags); | ||
552 | |||
553 | if (tick_init_highres()) { | ||
554 | local_irq_restore(flags); | ||
555 | return; | ||
556 | } | ||
557 | base->hres_active = 1; | ||
558 | base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; | ||
559 | base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; | ||
560 | |||
561 | tick_setup_sched_timer(); | ||
562 | |||
563 | /* "Retrigger" the interrupt to get things going */ | ||
564 | retrigger_next_event(NULL); | ||
565 | local_irq_restore(flags); | ||
566 | printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", | ||
567 | smp_processor_id()); | ||
568 | } | ||
569 | |||
570 | #else | ||
571 | |||
572 | static inline int hrtimer_hres_active(void) { return 0; } | ||
573 | static inline int hrtimer_is_hres_enabled(void) { return 0; } | ||
574 | static inline void hrtimer_switch_to_hres(void) { } | ||
575 | static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } | ||
576 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
577 | struct hrtimer_clock_base *base) | ||
578 | { | ||
579 | return 0; | ||
580 | } | ||
581 | static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } | ||
582 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } | ||
583 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | ||
584 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | ||
585 | |||
586 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
587 | |||
588 | #ifdef CONFIG_TIMER_STATS | ||
589 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
590 | { | ||
591 | if (timer->start_site) | ||
592 | return; | ||
593 | |||
594 | timer->start_site = addr; | ||
595 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
596 | timer->start_pid = current->pid; | ||
597 | } | ||
598 | #endif | ||
599 | |||
289 | /* | 600 | /* |
290 | * Counterpart to lock_timer_base above: | 601 | * Counterpart to lock_timer_base above: |
291 | */ | 602 | */ |
292 | static inline | 603 | static inline |
293 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 604 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
294 | { | 605 | { |
295 | spin_unlock_irqrestore(&timer->base->lock, *flags); | 606 | spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); |
296 | } | 607 | } |
297 | 608 | ||
298 | /** | 609 | /** |
@@ -342,7 +653,8 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
342 | * The timer is inserted in expiry order. Insertion into the | 653 | * The timer is inserted in expiry order. Insertion into the |
343 | * red black tree is O(log(n)). Must hold the base lock. | 654 | * red black tree is O(log(n)). Must hold the base lock. |
344 | */ | 655 | */ |
345 | static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 656 | static void enqueue_hrtimer(struct hrtimer *timer, |
657 | struct hrtimer_clock_base *base, int reprogram) | ||
346 | { | 658 | { |
347 | struct rb_node **link = &base->active.rb_node; | 659 | struct rb_node **link = &base->active.rb_node; |
348 | struct rb_node *parent = NULL; | 660 | struct rb_node *parent = NULL; |
@@ -368,39 +680,85 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
368 | * Insert the timer to the rbtree and check whether it | 680 | * Insert the timer to the rbtree and check whether it |
369 | * replaces the first pending timer | 681 | * replaces the first pending timer |
370 | */ | 682 | */ |
371 | rb_link_node(&timer->node, parent, link); | ||
372 | rb_insert_color(&timer->node, &base->active); | ||
373 | |||
374 | if (!base->first || timer->expires.tv64 < | 683 | if (!base->first || timer->expires.tv64 < |
375 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) | 684 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) { |
685 | /* | ||
686 | * Reprogram the clock event device. When the timer is already | ||
687 | * expired hrtimer_enqueue_reprogram has either called the | ||
688 | * callback or added it to the pending list and raised the | ||
689 | * softirq. | ||
690 | * | ||
691 | * This is a NOP for !HIGHRES | ||
692 | */ | ||
693 | if (reprogram && hrtimer_enqueue_reprogram(timer, base)) | ||
694 | return; | ||
695 | |||
376 | base->first = &timer->node; | 696 | base->first = &timer->node; |
697 | } | ||
698 | |||
699 | rb_link_node(&timer->node, parent, link); | ||
700 | rb_insert_color(&timer->node, &base->active); | ||
701 | /* | ||
702 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | ||
703 | * state of a possibly running callback. | ||
704 | */ | ||
705 | timer->state |= HRTIMER_STATE_ENQUEUED; | ||
377 | } | 706 | } |
378 | 707 | ||
379 | /* | 708 | /* |
380 | * __remove_hrtimer - internal function to remove a timer | 709 | * __remove_hrtimer - internal function to remove a timer |
381 | * | 710 | * |
382 | * Caller must hold the base lock. | 711 | * Caller must hold the base lock. |
712 | * | ||
713 | * High resolution timer mode reprograms the clock event device when the | ||
714 | * timer is the one which expires next. The caller can disable this by setting | ||
715 | * reprogram to zero. This is useful, when the context does a reprogramming | ||
716 | * anyway (e.g. timer interrupt) | ||
383 | */ | 717 | */ |
384 | static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 718 | static void __remove_hrtimer(struct hrtimer *timer, |
719 | struct hrtimer_clock_base *base, | ||
720 | unsigned long newstate, int reprogram) | ||
385 | { | 721 | { |
386 | /* | 722 | /* High res. callback list. NOP for !HIGHRES */ |
387 | * Remove the timer from the rbtree and replace the | 723 | if (hrtimer_cb_pending(timer)) |
388 | * first entry pointer if necessary. | 724 | hrtimer_remove_cb_pending(timer); |
389 | */ | 725 | else { |
390 | if (base->first == &timer->node) | 726 | /* |
391 | base->first = rb_next(&timer->node); | 727 | * Remove the timer from the rbtree and replace the |
392 | rb_erase(&timer->node, &base->active); | 728 | * first entry pointer if necessary. |
393 | rb_set_parent(&timer->node, &timer->node); | 729 | */ |
730 | if (base->first == &timer->node) { | ||
731 | base->first = rb_next(&timer->node); | ||
732 | /* Reprogram the clock event device. if enabled */ | ||
733 | if (reprogram && hrtimer_hres_active()) | ||
734 | hrtimer_force_reprogram(base->cpu_base); | ||
735 | } | ||
736 | rb_erase(&timer->node, &base->active); | ||
737 | } | ||
738 | timer->state = newstate; | ||
394 | } | 739 | } |
395 | 740 | ||
396 | /* | 741 | /* |
397 | * remove hrtimer, called with base lock held | 742 | * remove hrtimer, called with base lock held |
398 | */ | 743 | */ |
399 | static inline int | 744 | static inline int |
400 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 745 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) |
401 | { | 746 | { |
402 | if (hrtimer_active(timer)) { | 747 | if (hrtimer_is_queued(timer)) { |
403 | __remove_hrtimer(timer, base); | 748 | int reprogram; |
749 | |||
750 | /* | ||
751 | * Remove the timer and force reprogramming when high | ||
752 | * resolution mode is active and the timer is on the current | ||
753 | * CPU. If we remove a timer on another CPU, reprogramming is | ||
754 | * skipped. The interrupt event on this CPU is fired and | ||
755 | * reprogramming happens in the interrupt handler. This is a | ||
756 | * rare case and less expensive than a smp call. | ||
757 | */ | ||
758 | timer_stats_hrtimer_clear_start_info(timer); | ||
759 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); | ||
760 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, | ||
761 | reprogram); | ||
404 | return 1; | 762 | return 1; |
405 | } | 763 | } |
406 | return 0; | 764 | return 0; |
@@ -419,7 +777,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
419 | int | 777 | int |
420 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 778 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) |
421 | { | 779 | { |
422 | struct hrtimer_base *base, *new_base; | 780 | struct hrtimer_clock_base *base, *new_base; |
423 | unsigned long flags; | 781 | unsigned long flags; |
424 | int ret; | 782 | int ret; |
425 | 783 | ||
@@ -431,7 +789,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
431 | /* Switch the timer base, if necessary: */ | 789 | /* Switch the timer base, if necessary: */ |
432 | new_base = switch_hrtimer_base(timer, base); | 790 | new_base = switch_hrtimer_base(timer, base); |
433 | 791 | ||
434 | if (mode == HRTIMER_REL) { | 792 | if (mode == HRTIMER_MODE_REL) { |
435 | tim = ktime_add(tim, new_base->get_time()); | 793 | tim = ktime_add(tim, new_base->get_time()); |
436 | /* | 794 | /* |
437 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 795 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
@@ -446,7 +804,9 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
446 | } | 804 | } |
447 | timer->expires = tim; | 805 | timer->expires = tim; |
448 | 806 | ||
449 | enqueue_hrtimer(timer, new_base); | 807 | timer_stats_hrtimer_set_start_info(timer); |
808 | |||
809 | enqueue_hrtimer(timer, new_base, base == new_base); | ||
450 | 810 | ||
451 | unlock_hrtimer_base(timer, &flags); | 811 | unlock_hrtimer_base(timer, &flags); |
452 | 812 | ||
@@ -466,13 +826,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start); | |||
466 | */ | 826 | */ |
467 | int hrtimer_try_to_cancel(struct hrtimer *timer) | 827 | int hrtimer_try_to_cancel(struct hrtimer *timer) |
468 | { | 828 | { |
469 | struct hrtimer_base *base; | 829 | struct hrtimer_clock_base *base; |
470 | unsigned long flags; | 830 | unsigned long flags; |
471 | int ret = -1; | 831 | int ret = -1; |
472 | 832 | ||
473 | base = lock_hrtimer_base(timer, &flags); | 833 | base = lock_hrtimer_base(timer, &flags); |
474 | 834 | ||
475 | if (base->curr_timer != timer) | 835 | if (!hrtimer_callback_running(timer)) |
476 | ret = remove_hrtimer(timer, base); | 836 | ret = remove_hrtimer(timer, base); |
477 | 837 | ||
478 | unlock_hrtimer_base(timer, &flags); | 838 | unlock_hrtimer_base(timer, &flags); |
@@ -508,19 +868,19 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); | |||
508 | */ | 868 | */ |
509 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | 869 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) |
510 | { | 870 | { |
511 | struct hrtimer_base *base; | 871 | struct hrtimer_clock_base *base; |
512 | unsigned long flags; | 872 | unsigned long flags; |
513 | ktime_t rem; | 873 | ktime_t rem; |
514 | 874 | ||
515 | base = lock_hrtimer_base(timer, &flags); | 875 | base = lock_hrtimer_base(timer, &flags); |
516 | rem = ktime_sub(timer->expires, timer->base->get_time()); | 876 | rem = ktime_sub(timer->expires, base->get_time()); |
517 | unlock_hrtimer_base(timer, &flags); | 877 | unlock_hrtimer_base(timer, &flags); |
518 | 878 | ||
519 | return rem; | 879 | return rem; |
520 | } | 880 | } |
521 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | 881 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); |
522 | 882 | ||
523 | #ifdef CONFIG_NO_IDLE_HZ | 883 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
524 | /** | 884 | /** |
525 | * hrtimer_get_next_event - get the time until next expiry event | 885 | * hrtimer_get_next_event - get the time until next expiry event |
526 | * | 886 | * |
@@ -529,26 +889,31 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | |||
529 | */ | 889 | */ |
530 | ktime_t hrtimer_get_next_event(void) | 890 | ktime_t hrtimer_get_next_event(void) |
531 | { | 891 | { |
532 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 892 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
893 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
533 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; | 894 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; |
534 | unsigned long flags; | 895 | unsigned long flags; |
535 | int i; | 896 | int i; |
536 | 897 | ||
537 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 898 | spin_lock_irqsave(&cpu_base->lock, flags); |
538 | struct hrtimer *timer; | ||
539 | 899 | ||
540 | spin_lock_irqsave(&base->lock, flags); | 900 | if (!hrtimer_hres_active()) { |
541 | if (!base->first) { | 901 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
542 | spin_unlock_irqrestore(&base->lock, flags); | 902 | struct hrtimer *timer; |
543 | continue; | 903 | |
904 | if (!base->first) | ||
905 | continue; | ||
906 | |||
907 | timer = rb_entry(base->first, struct hrtimer, node); | ||
908 | delta.tv64 = timer->expires.tv64; | ||
909 | delta = ktime_sub(delta, base->get_time()); | ||
910 | if (delta.tv64 < mindelta.tv64) | ||
911 | mindelta.tv64 = delta.tv64; | ||
544 | } | 912 | } |
545 | timer = rb_entry(base->first, struct hrtimer, node); | ||
546 | delta.tv64 = timer->expires.tv64; | ||
547 | spin_unlock_irqrestore(&base->lock, flags); | ||
548 | delta = ktime_sub(delta, base->get_time()); | ||
549 | if (delta.tv64 < mindelta.tv64) | ||
550 | mindelta.tv64 = delta.tv64; | ||
551 | } | 913 | } |
914 | |||
915 | spin_unlock_irqrestore(&cpu_base->lock, flags); | ||
916 | |||
552 | if (mindelta.tv64 < 0) | 917 | if (mindelta.tv64 < 0) |
553 | mindelta.tv64 = 0; | 918 | mindelta.tv64 = 0; |
554 | return mindelta; | 919 | return mindelta; |
@@ -564,17 +929,23 @@ ktime_t hrtimer_get_next_event(void) | |||
564 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | 929 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
565 | enum hrtimer_mode mode) | 930 | enum hrtimer_mode mode) |
566 | { | 931 | { |
567 | struct hrtimer_base *bases; | 932 | struct hrtimer_cpu_base *cpu_base; |
568 | 933 | ||
569 | memset(timer, 0, sizeof(struct hrtimer)); | 934 | memset(timer, 0, sizeof(struct hrtimer)); |
570 | 935 | ||
571 | bases = __raw_get_cpu_var(hrtimer_bases); | 936 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
572 | 937 | ||
573 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) | 938 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) |
574 | clock_id = CLOCK_MONOTONIC; | 939 | clock_id = CLOCK_MONOTONIC; |
575 | 940 | ||
576 | timer->base = &bases[clock_id]; | 941 | timer->base = &cpu_base->clock_base[clock_id]; |
577 | rb_set_parent(&timer->node, &timer->node); | 942 | hrtimer_init_timer_hres(timer); |
943 | |||
944 | #ifdef CONFIG_TIMER_STATS | ||
945 | timer->start_site = NULL; | ||
946 | timer->start_pid = -1; | ||
947 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
948 | #endif | ||
578 | } | 949 | } |
579 | EXPORT_SYMBOL_GPL(hrtimer_init); | 950 | EXPORT_SYMBOL_GPL(hrtimer_init); |
580 | 951 | ||
@@ -588,21 +959,159 @@ EXPORT_SYMBOL_GPL(hrtimer_init); | |||
588 | */ | 959 | */ |
589 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | 960 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) |
590 | { | 961 | { |
591 | struct hrtimer_base *bases; | 962 | struct hrtimer_cpu_base *cpu_base; |
592 | 963 | ||
593 | bases = __raw_get_cpu_var(hrtimer_bases); | 964 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
594 | *tp = ktime_to_timespec(bases[which_clock].resolution); | 965 | *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution); |
595 | 966 | ||
596 | return 0; | 967 | return 0; |
597 | } | 968 | } |
598 | EXPORT_SYMBOL_GPL(hrtimer_get_res); | 969 | EXPORT_SYMBOL_GPL(hrtimer_get_res); |
599 | 970 | ||
971 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
972 | |||
973 | /* | ||
974 | * High resolution timer interrupt | ||
975 | * Called with interrupts disabled | ||
976 | */ | ||
977 | void hrtimer_interrupt(struct clock_event_device *dev) | ||
978 | { | ||
979 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
980 | struct hrtimer_clock_base *base; | ||
981 | ktime_t expires_next, now; | ||
982 | int i, raise = 0; | ||
983 | |||
984 | BUG_ON(!cpu_base->hres_active); | ||
985 | cpu_base->nr_events++; | ||
986 | dev->next_event.tv64 = KTIME_MAX; | ||
987 | |||
988 | retry: | ||
989 | now = ktime_get(); | ||
990 | |||
991 | expires_next.tv64 = KTIME_MAX; | ||
992 | |||
993 | base = cpu_base->clock_base; | ||
994 | |||
995 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
996 | ktime_t basenow; | ||
997 | struct rb_node *node; | ||
998 | |||
999 | spin_lock(&cpu_base->lock); | ||
1000 | |||
1001 | basenow = ktime_add(now, base->offset); | ||
1002 | |||
1003 | while ((node = base->first)) { | ||
1004 | struct hrtimer *timer; | ||
1005 | |||
1006 | timer = rb_entry(node, struct hrtimer, node); | ||
1007 | |||
1008 | if (basenow.tv64 < timer->expires.tv64) { | ||
1009 | ktime_t expires; | ||
1010 | |||
1011 | expires = ktime_sub(timer->expires, | ||
1012 | base->offset); | ||
1013 | if (expires.tv64 < expires_next.tv64) | ||
1014 | expires_next = expires; | ||
1015 | break; | ||
1016 | } | ||
1017 | |||
1018 | /* Move softirq callbacks to the pending list */ | ||
1019 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { | ||
1020 | __remove_hrtimer(timer, base, | ||
1021 | HRTIMER_STATE_PENDING, 0); | ||
1022 | list_add_tail(&timer->cb_entry, | ||
1023 | &base->cpu_base->cb_pending); | ||
1024 | raise = 1; | ||
1025 | continue; | ||
1026 | } | ||
1027 | |||
1028 | __remove_hrtimer(timer, base, | ||
1029 | HRTIMER_STATE_CALLBACK, 0); | ||
1030 | timer_stats_account_hrtimer(timer); | ||
1031 | |||
1032 | /* | ||
1033 | * Note: We clear the CALLBACK bit after | ||
1034 | * enqueue_hrtimer to avoid reprogramming of | ||
1035 | * the event hardware. This happens at the end | ||
1036 | * of this function anyway. | ||
1037 | */ | ||
1038 | if (timer->function(timer) != HRTIMER_NORESTART) { | ||
1039 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | ||
1040 | enqueue_hrtimer(timer, base, 0); | ||
1041 | } | ||
1042 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1043 | } | ||
1044 | spin_unlock(&cpu_base->lock); | ||
1045 | base++; | ||
1046 | } | ||
1047 | |||
1048 | cpu_base->expires_next = expires_next; | ||
1049 | |||
1050 | /* Reprogramming necessary ? */ | ||
1051 | if (expires_next.tv64 != KTIME_MAX) { | ||
1052 | if (tick_program_event(expires_next, 0)) | ||
1053 | goto retry; | ||
1054 | } | ||
1055 | |||
1056 | /* Raise softirq ? */ | ||
1057 | if (raise) | ||
1058 | raise_softirq(HRTIMER_SOFTIRQ); | ||
1059 | } | ||
1060 | |||
1061 | static void run_hrtimer_softirq(struct softirq_action *h) | ||
1062 | { | ||
1063 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
1064 | |||
1065 | spin_lock_irq(&cpu_base->lock); | ||
1066 | |||
1067 | while (!list_empty(&cpu_base->cb_pending)) { | ||
1068 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1069 | struct hrtimer *timer; | ||
1070 | int restart; | ||
1071 | |||
1072 | timer = list_entry(cpu_base->cb_pending.next, | ||
1073 | struct hrtimer, cb_entry); | ||
1074 | |||
1075 | timer_stats_account_hrtimer(timer); | ||
1076 | |||
1077 | fn = timer->function; | ||
1078 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); | ||
1079 | spin_unlock_irq(&cpu_base->lock); | ||
1080 | |||
1081 | restart = fn(timer); | ||
1082 | |||
1083 | spin_lock_irq(&cpu_base->lock); | ||
1084 | |||
1085 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1086 | if (restart == HRTIMER_RESTART) { | ||
1087 | BUG_ON(hrtimer_active(timer)); | ||
1088 | /* | ||
1089 | * Enqueue the timer, allow reprogramming of the event | ||
1090 | * device | ||
1091 | */ | ||
1092 | enqueue_hrtimer(timer, timer->base, 1); | ||
1093 | } else if (hrtimer_active(timer)) { | ||
1094 | /* | ||
1095 | * If the timer was rearmed on another CPU, reprogram | ||
1096 | * the event device. | ||
1097 | */ | ||
1098 | if (timer->base->first == &timer->node) | ||
1099 | hrtimer_reprogram(timer, timer->base); | ||
1100 | } | ||
1101 | } | ||
1102 | spin_unlock_irq(&cpu_base->lock); | ||
1103 | } | ||
1104 | |||
1105 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
1106 | |||
600 | /* | 1107 | /* |
601 | * Expire the per base hrtimer-queue: | 1108 | * Expire the per base hrtimer-queue: |
602 | */ | 1109 | */ |
603 | static inline void run_hrtimer_queue(struct hrtimer_base *base) | 1110 | static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, |
1111 | int index) | ||
604 | { | 1112 | { |
605 | struct rb_node *node; | 1113 | struct rb_node *node; |
1114 | struct hrtimer_clock_base *base = &cpu_base->clock_base[index]; | ||
606 | 1115 | ||
607 | if (!base->first) | 1116 | if (!base->first) |
608 | return; | 1117 | return; |
@@ -610,53 +1119,72 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
610 | if (base->get_softirq_time) | 1119 | if (base->get_softirq_time) |
611 | base->softirq_time = base->get_softirq_time(); | 1120 | base->softirq_time = base->get_softirq_time(); |
612 | 1121 | ||
613 | spin_lock_irq(&base->lock); | 1122 | spin_lock_irq(&cpu_base->lock); |
614 | 1123 | ||
615 | while ((node = base->first)) { | 1124 | while ((node = base->first)) { |
616 | struct hrtimer *timer; | 1125 | struct hrtimer *timer; |
617 | int (*fn)(struct hrtimer *); | 1126 | enum hrtimer_restart (*fn)(struct hrtimer *); |
618 | int restart; | 1127 | int restart; |
619 | 1128 | ||
620 | timer = rb_entry(node, struct hrtimer, node); | 1129 | timer = rb_entry(node, struct hrtimer, node); |
621 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1130 | if (base->softirq_time.tv64 <= timer->expires.tv64) |
622 | break; | 1131 | break; |
623 | 1132 | ||
1133 | timer_stats_account_hrtimer(timer); | ||
1134 | |||
624 | fn = timer->function; | 1135 | fn = timer->function; |
625 | set_curr_timer(base, timer); | 1136 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); |
626 | __remove_hrtimer(timer, base); | 1137 | spin_unlock_irq(&cpu_base->lock); |
627 | spin_unlock_irq(&base->lock); | ||
628 | 1138 | ||
629 | restart = fn(timer); | 1139 | restart = fn(timer); |
630 | 1140 | ||
631 | spin_lock_irq(&base->lock); | 1141 | spin_lock_irq(&cpu_base->lock); |
632 | 1142 | ||
1143 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
633 | if (restart != HRTIMER_NORESTART) { | 1144 | if (restart != HRTIMER_NORESTART) { |
634 | BUG_ON(hrtimer_active(timer)); | 1145 | BUG_ON(hrtimer_active(timer)); |
635 | enqueue_hrtimer(timer, base); | 1146 | enqueue_hrtimer(timer, base, 0); |
636 | } | 1147 | } |
637 | } | 1148 | } |
638 | set_curr_timer(base, NULL); | 1149 | spin_unlock_irq(&cpu_base->lock); |
639 | spin_unlock_irq(&base->lock); | ||
640 | } | 1150 | } |
641 | 1151 | ||
642 | /* | 1152 | /* |
643 | * Called from timer softirq every jiffy, expire hrtimers: | 1153 | * Called from timer softirq every jiffy, expire hrtimers: |
1154 | * | ||
1155 | * For HRT its the fall back code to run the softirq in the timer | ||
1156 | * softirq context in case the hrtimer initialization failed or has | ||
1157 | * not been done yet. | ||
644 | */ | 1158 | */ |
645 | void hrtimer_run_queues(void) | 1159 | void hrtimer_run_queues(void) |
646 | { | 1160 | { |
647 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 1161 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
648 | int i; | 1162 | int i; |
649 | 1163 | ||
650 | hrtimer_get_softirq_time(base); | 1164 | if (hrtimer_hres_active()) |
1165 | return; | ||
1166 | |||
1167 | /* | ||
1168 | * This _is_ ugly: We have to check in the softirq context, | ||
1169 | * whether we can switch to highres and / or nohz mode. The | ||
1170 | * clocksource switch happens in the timer interrupt with | ||
1171 | * xtime_lock held. Notification from there only sets the | ||
1172 | * check bit in the tick_oneshot code, otherwise we might | ||
1173 | * deadlock vs. xtime_lock. | ||
1174 | */ | ||
1175 | if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) | ||
1176 | hrtimer_switch_to_hres(); | ||
651 | 1177 | ||
652 | for (i = 0; i < MAX_HRTIMER_BASES; i++) | 1178 | hrtimer_get_softirq_time(cpu_base); |
653 | run_hrtimer_queue(&base[i]); | 1179 | |
1180 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | ||
1181 | run_hrtimer_queue(cpu_base, i); | ||
654 | } | 1182 | } |
655 | 1183 | ||
656 | /* | 1184 | /* |
657 | * Sleep related functions: | 1185 | * Sleep related functions: |
658 | */ | 1186 | */ |
659 | static int hrtimer_wakeup(struct hrtimer *timer) | 1187 | static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) |
660 | { | 1188 | { |
661 | struct hrtimer_sleeper *t = | 1189 | struct hrtimer_sleeper *t = |
662 | container_of(timer, struct hrtimer_sleeper, timer); | 1190 | container_of(timer, struct hrtimer_sleeper, timer); |
@@ -673,6 +1201,9 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |||
673 | { | 1201 | { |
674 | sl->timer.function = hrtimer_wakeup; | 1202 | sl->timer.function = hrtimer_wakeup; |
675 | sl->task = task; | 1203 | sl->task = task; |
1204 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1205 | sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART; | ||
1206 | #endif | ||
676 | } | 1207 | } |
677 | 1208 | ||
678 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | 1209 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) |
@@ -683,10 +1214,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
683 | set_current_state(TASK_INTERRUPTIBLE); | 1214 | set_current_state(TASK_INTERRUPTIBLE); |
684 | hrtimer_start(&t->timer, t->timer.expires, mode); | 1215 | hrtimer_start(&t->timer, t->timer.expires, mode); |
685 | 1216 | ||
686 | schedule(); | 1217 | if (likely(t->task)) |
1218 | schedule(); | ||
687 | 1219 | ||
688 | hrtimer_cancel(&t->timer); | 1220 | hrtimer_cancel(&t->timer); |
689 | mode = HRTIMER_ABS; | 1221 | mode = HRTIMER_MODE_ABS; |
690 | 1222 | ||
691 | } while (t->task && !signal_pending(current)); | 1223 | } while (t->task && !signal_pending(current)); |
692 | 1224 | ||
@@ -702,10 +1234,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
702 | 1234 | ||
703 | restart->fn = do_no_restart_syscall; | 1235 | restart->fn = do_no_restart_syscall; |
704 | 1236 | ||
705 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS); | 1237 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); |
706 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; | 1238 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; |
707 | 1239 | ||
708 | if (do_nanosleep(&t, HRTIMER_ABS)) | 1240 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
709 | return 0; | 1241 | return 0; |
710 | 1242 | ||
711 | rmtp = (struct timespec __user *) restart->arg1; | 1243 | rmtp = (struct timespec __user *) restart->arg1; |
@@ -738,7 +1270,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
738 | return 0; | 1270 | return 0; |
739 | 1271 | ||
740 | /* Absolute timers do not update the rmtp value and restart: */ | 1272 | /* Absolute timers do not update the rmtp value and restart: */ |
741 | if (mode == HRTIMER_ABS) | 1273 | if (mode == HRTIMER_MODE_ABS) |
742 | return -ERESTARTNOHAND; | 1274 | return -ERESTARTNOHAND; |
743 | 1275 | ||
744 | if (rmtp) { | 1276 | if (rmtp) { |
@@ -771,7 +1303,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
771 | if (!timespec_valid(&tu)) | 1303 | if (!timespec_valid(&tu)) |
772 | return -EINVAL; | 1304 | return -EINVAL; |
773 | 1305 | ||
774 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_REL, CLOCK_MONOTONIC); | 1306 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
775 | } | 1307 | } |
776 | 1308 | ||
777 | /* | 1309 | /* |
@@ -779,56 +1311,60 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
779 | */ | 1311 | */ |
780 | static void __devinit init_hrtimers_cpu(int cpu) | 1312 | static void __devinit init_hrtimers_cpu(int cpu) |
781 | { | 1313 | { |
782 | struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); | 1314 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
783 | int i; | 1315 | int i; |
784 | 1316 | ||
785 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 1317 | spin_lock_init(&cpu_base->lock); |
786 | spin_lock_init(&base->lock); | 1318 | lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); |
787 | lockdep_set_class(&base->lock, &base->lock_key); | 1319 | |
788 | } | 1320 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
1321 | cpu_base->clock_base[i].cpu_base = cpu_base; | ||
1322 | |||
1323 | hrtimer_init_hres(cpu_base); | ||
789 | } | 1324 | } |
790 | 1325 | ||
791 | #ifdef CONFIG_HOTPLUG_CPU | 1326 | #ifdef CONFIG_HOTPLUG_CPU |
792 | 1327 | ||
793 | static void migrate_hrtimer_list(struct hrtimer_base *old_base, | 1328 | static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, |
794 | struct hrtimer_base *new_base) | 1329 | struct hrtimer_clock_base *new_base) |
795 | { | 1330 | { |
796 | struct hrtimer *timer; | 1331 | struct hrtimer *timer; |
797 | struct rb_node *node; | 1332 | struct rb_node *node; |
798 | 1333 | ||
799 | while ((node = rb_first(&old_base->active))) { | 1334 | while ((node = rb_first(&old_base->active))) { |
800 | timer = rb_entry(node, struct hrtimer, node); | 1335 | timer = rb_entry(node, struct hrtimer, node); |
801 | __remove_hrtimer(timer, old_base); | 1336 | BUG_ON(hrtimer_callback_running(timer)); |
1337 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); | ||
802 | timer->base = new_base; | 1338 | timer->base = new_base; |
803 | enqueue_hrtimer(timer, new_base); | 1339 | /* |
1340 | * Enqueue the timer. Allow reprogramming of the event device | ||
1341 | */ | ||
1342 | enqueue_hrtimer(timer, new_base, 1); | ||
804 | } | 1343 | } |
805 | } | 1344 | } |
806 | 1345 | ||
807 | static void migrate_hrtimers(int cpu) | 1346 | static void migrate_hrtimers(int cpu) |
808 | { | 1347 | { |
809 | struct hrtimer_base *old_base, *new_base; | 1348 | struct hrtimer_cpu_base *old_base, *new_base; |
810 | int i; | 1349 | int i; |
811 | 1350 | ||
812 | BUG_ON(cpu_online(cpu)); | 1351 | BUG_ON(cpu_online(cpu)); |
813 | old_base = per_cpu(hrtimer_bases, cpu); | 1352 | old_base = &per_cpu(hrtimer_bases, cpu); |
814 | new_base = get_cpu_var(hrtimer_bases); | 1353 | new_base = &get_cpu_var(hrtimer_bases); |
815 | |||
816 | local_irq_disable(); | ||
817 | 1354 | ||
818 | for (i = 0; i < MAX_HRTIMER_BASES; i++) { | 1355 | tick_cancel_sched_timer(cpu); |
819 | 1356 | ||
820 | spin_lock(&new_base->lock); | 1357 | local_irq_disable(); |
821 | spin_lock(&old_base->lock); | ||
822 | |||
823 | BUG_ON(old_base->curr_timer); | ||
824 | 1358 | ||
825 | migrate_hrtimer_list(old_base, new_base); | 1359 | spin_lock(&new_base->lock); |
1360 | spin_lock(&old_base->lock); | ||
826 | 1361 | ||
827 | spin_unlock(&old_base->lock); | 1362 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
828 | spin_unlock(&new_base->lock); | 1363 | migrate_hrtimer_list(&old_base->clock_base[i], |
829 | old_base++; | 1364 | &new_base->clock_base[i]); |
830 | new_base++; | ||
831 | } | 1365 | } |
1366 | spin_unlock(&old_base->lock); | ||
1367 | spin_unlock(&new_base->lock); | ||
832 | 1368 | ||
833 | local_irq_enable(); | 1369 | local_irq_enable(); |
834 | put_cpu_var(hrtimer_bases); | 1370 | put_cpu_var(hrtimer_bases); |
@@ -848,6 +1384,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
848 | 1384 | ||
849 | #ifdef CONFIG_HOTPLUG_CPU | 1385 | #ifdef CONFIG_HOTPLUG_CPU |
850 | case CPU_DEAD: | 1386 | case CPU_DEAD: |
1387 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); | ||
851 | migrate_hrtimers(cpu); | 1388 | migrate_hrtimers(cpu); |
852 | break; | 1389 | break; |
853 | #endif | 1390 | #endif |
@@ -868,5 +1405,8 @@ void __init hrtimers_init(void) | |||
868 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, | 1405 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, |
869 | (void *)(long)smp_processor_id()); | 1406 | (void *)(long)smp_processor_id()); |
870 | register_cpu_notifier(&hrtimers_nb); | 1407 | register_cpu_notifier(&hrtimers_nb); |
1408 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1409 | open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); | ||
1410 | #endif | ||
871 | } | 1411 | } |
872 | 1412 | ||