diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 170 |
1 files changed, 102 insertions, 68 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3e1c36e7998f..0086628b6e97 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
127 | for (;;) { | 127 | for (;;) { |
128 | base = timer->base; | 128 | base = timer->base; |
129 | if (likely(base != NULL)) { | 129 | if (likely(base != NULL)) { |
130 | spin_lock_irqsave(&base->cpu_base->lock, *flags); | 130 | raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); |
131 | if (likely(base == timer->base)) | 131 | if (likely(base == timer->base)) |
132 | return base; | 132 | return base; |
133 | /* The timer has migrated to another CPU: */ | 133 | /* The timer has migrated to another CPU: */ |
134 | spin_unlock_irqrestore(&base->cpu_base->lock, *flags); | 134 | raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); |
135 | } | 135 | } |
136 | cpu_relax(); | 136 | cpu_relax(); |
137 | } | 137 | } |
@@ -208,13 +208,13 @@ again: | |||
208 | 208 | ||
209 | /* See the comment in lock_timer_base() */ | 209 | /* See the comment in lock_timer_base() */ |
210 | timer->base = NULL; | 210 | timer->base = NULL; |
211 | spin_unlock(&base->cpu_base->lock); | 211 | raw_spin_unlock(&base->cpu_base->lock); |
212 | spin_lock(&new_base->cpu_base->lock); | 212 | raw_spin_lock(&new_base->cpu_base->lock); |
213 | 213 | ||
214 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { | 214 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { |
215 | cpu = this_cpu; | 215 | cpu = this_cpu; |
216 | spin_unlock(&new_base->cpu_base->lock); | 216 | raw_spin_unlock(&new_base->cpu_base->lock); |
217 | spin_lock(&base->cpu_base->lock); | 217 | raw_spin_lock(&base->cpu_base->lock); |
218 | timer->base = base; | 218 | timer->base = base; |
219 | goto again; | 219 | goto again; |
220 | } | 220 | } |
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
230 | { | 230 | { |
231 | struct hrtimer_clock_base *base = timer->base; | 231 | struct hrtimer_clock_base *base = timer->base; |
232 | 232 | ||
233 | spin_lock_irqsave(&base->cpu_base->lock, *flags); | 233 | raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); |
234 | 234 | ||
235 | return base; | 235 | return base; |
236 | } | 236 | } |
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
557 | static int hrtimer_reprogram(struct hrtimer *timer, | 557 | static int hrtimer_reprogram(struct hrtimer *timer, |
558 | struct hrtimer_clock_base *base) | 558 | struct hrtimer_clock_base *base) |
559 | { | 559 | { |
560 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | 560 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | 561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
562 | int res; | 562 | int res; |
563 | 563 | ||
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
582 | if (expires.tv64 < 0) | 582 | if (expires.tv64 < 0) |
583 | return -ETIME; | 583 | return -ETIME; |
584 | 584 | ||
585 | if (expires.tv64 >= expires_next->tv64) | 585 | if (expires.tv64 >= cpu_base->expires_next.tv64) |
586 | return 0; | ||
587 | |||
588 | /* | ||
589 | * If a hang was detected in the last timer interrupt then we | ||
590 | * do not schedule a timer which is earlier than the expiry | ||
591 | * which we enforced in the hang detection. We want the system | ||
592 | * to make progress. | ||
593 | */ | ||
594 | if (cpu_base->hang_detected) | ||
586 | return 0; | 595 | return 0; |
587 | 596 | ||
588 | /* | 597 | /* |
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
590 | */ | 599 | */ |
591 | res = tick_program_event(expires, 0); | 600 | res = tick_program_event(expires, 0); |
592 | if (!IS_ERR_VALUE(res)) | 601 | if (!IS_ERR_VALUE(res)) |
593 | *expires_next = expires; | 602 | cpu_base->expires_next = expires; |
594 | return res; | 603 | return res; |
595 | } | 604 | } |
596 | 605 | ||
@@ -619,12 +628,12 @@ static void retrigger_next_event(void *arg) | |||
619 | base = &__get_cpu_var(hrtimer_bases); | 628 | base = &__get_cpu_var(hrtimer_bases); |
620 | 629 | ||
621 | /* Adjust CLOCK_REALTIME offset */ | 630 | /* Adjust CLOCK_REALTIME offset */ |
622 | spin_lock(&base->lock); | 631 | raw_spin_lock(&base->lock); |
623 | base->clock_base[CLOCK_REALTIME].offset = | 632 | base->clock_base[CLOCK_REALTIME].offset = |
624 | timespec_to_ktime(realtime_offset); | 633 | timespec_to_ktime(realtime_offset); |
625 | 634 | ||
626 | hrtimer_force_reprogram(base, 0); | 635 | hrtimer_force_reprogram(base, 0); |
627 | spin_unlock(&base->lock); | 636 | raw_spin_unlock(&base->lock); |
628 | } | 637 | } |
629 | 638 | ||
630 | /* | 639 | /* |
@@ -685,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
685 | { | 694 | { |
686 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | 695 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { |
687 | if (wakeup) { | 696 | if (wakeup) { |
688 | spin_unlock(&base->cpu_base->lock); | 697 | raw_spin_unlock(&base->cpu_base->lock); |
689 | raise_softirq_irqoff(HRTIMER_SOFTIRQ); | 698 | raise_softirq_irqoff(HRTIMER_SOFTIRQ); |
690 | spin_lock(&base->cpu_base->lock); | 699 | raw_spin_lock(&base->cpu_base->lock); |
691 | } else | 700 | } else |
692 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); | 701 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); |
693 | 702 | ||
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | |||
747 | 756 | ||
748 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 757 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
749 | 758 | ||
750 | #ifdef CONFIG_TIMER_STATS | 759 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
751 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
752 | { | 760 | { |
761 | #ifdef CONFIG_TIMER_STATS | ||
753 | if (timer->start_site) | 762 | if (timer->start_site) |
754 | return; | 763 | return; |
755 | 764 | timer->start_site = __builtin_return_address(0); | |
756 | timer->start_site = addr; | ||
757 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 765 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
758 | timer->start_pid = current->pid; | 766 | timer->start_pid = current->pid; |
767 | #endif | ||
759 | } | 768 | } |
769 | |||
770 | static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | ||
771 | { | ||
772 | #ifdef CONFIG_TIMER_STATS | ||
773 | timer->start_site = NULL; | ||
774 | #endif | ||
775 | } | ||
776 | |||
777 | static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | ||
778 | { | ||
779 | #ifdef CONFIG_TIMER_STATS | ||
780 | if (likely(!timer_stats_active)) | ||
781 | return; | ||
782 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
783 | timer->function, timer->start_comm, 0); | ||
760 | #endif | 784 | #endif |
785 | } | ||
761 | 786 | ||
762 | /* | 787 | /* |
763 | * Counterpart to lock_hrtimer_base above: | 788 | * Counterpart to lock_hrtimer_base above: |
@@ -765,7 +790,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | |||
765 | static inline | 790 | static inline |
766 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 791 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
767 | { | 792 | { |
768 | spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); | 793 | raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); |
769 | } | 794 | } |
770 | 795 | ||
771 | /** | 796 | /** |
@@ -1098,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void) | |||
1098 | unsigned long flags; | 1123 | unsigned long flags; |
1099 | int i; | 1124 | int i; |
1100 | 1125 | ||
1101 | spin_lock_irqsave(&cpu_base->lock, flags); | 1126 | raw_spin_lock_irqsave(&cpu_base->lock, flags); |
1102 | 1127 | ||
1103 | if (!hrtimer_hres_active()) { | 1128 | if (!hrtimer_hres_active()) { |
1104 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | 1129 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
@@ -1115,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void) | |||
1115 | } | 1140 | } |
1116 | } | 1141 | } |
1117 | 1142 | ||
1118 | spin_unlock_irqrestore(&cpu_base->lock, flags); | 1143 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
1119 | 1144 | ||
1120 | if (mindelta.tv64 < 0) | 1145 | if (mindelta.tv64 < 0) |
1121 | mindelta.tv64 = 0; | 1146 | mindelta.tv64 = 0; |
@@ -1197,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
1197 | * they get migrated to another cpu, therefore its safe to unlock | 1222 | * they get migrated to another cpu, therefore its safe to unlock |
1198 | * the timer base. | 1223 | * the timer base. |
1199 | */ | 1224 | */ |
1200 | spin_unlock(&cpu_base->lock); | 1225 | raw_spin_unlock(&cpu_base->lock); |
1201 | trace_hrtimer_expire_entry(timer, now); | 1226 | trace_hrtimer_expire_entry(timer, now); |
1202 | restart = fn(timer); | 1227 | restart = fn(timer); |
1203 | trace_hrtimer_expire_exit(timer); | 1228 | trace_hrtimer_expire_exit(timer); |
1204 | spin_lock(&cpu_base->lock); | 1229 | raw_spin_lock(&cpu_base->lock); |
1205 | 1230 | ||
1206 | /* | 1231 | /* |
1207 | * Note: We clear the CALLBACK bit after enqueue_hrtimer and | 1232 | * Note: We clear the CALLBACK bit after enqueue_hrtimer and |
@@ -1217,29 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
1217 | 1242 | ||
1218 | #ifdef CONFIG_HIGH_RES_TIMERS | 1243 | #ifdef CONFIG_HIGH_RES_TIMERS |
1219 | 1244 | ||
1220 | static int force_clock_reprogram; | ||
1221 | |||
1222 | /* | ||
1223 | * After 5 iteration's attempts, we consider that hrtimer_interrupt() | ||
1224 | * is hanging, which could happen with something that slows the interrupt | ||
1225 | * such as the tracing. Then we force the clock reprogramming for each future | ||
1226 | * hrtimer interrupts to avoid infinite loops and use the min_delta_ns | ||
1227 | * threshold that we will overwrite. | ||
1228 | * The next tick event will be scheduled to 3 times we currently spend on | ||
1229 | * hrtimer_interrupt(). This gives a good compromise, the cpus will spend | ||
1230 | * 1/4 of their time to process the hrtimer interrupts. This is enough to | ||
1231 | * let it running without serious starvation. | ||
1232 | */ | ||
1233 | |||
1234 | static inline void | ||
1235 | hrtimer_interrupt_hanging(struct clock_event_device *dev, | ||
1236 | ktime_t try_time) | ||
1237 | { | ||
1238 | force_clock_reprogram = 1; | ||
1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | ||
1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " | ||
1241 | "forcing clock min delta to %lu ns\n", dev->min_delta_ns); | ||
1242 | } | ||
1243 | /* | 1245 | /* |
1244 | * High resolution timer interrupt | 1246 | * High resolution timer interrupt |
1245 | * Called with interrupts disabled | 1247 | * Called with interrupts disabled |
@@ -1248,24 +1250,18 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1248 | { | 1250 | { |
1249 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1251 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
1250 | struct hrtimer_clock_base *base; | 1252 | struct hrtimer_clock_base *base; |
1251 | ktime_t expires_next, now; | 1253 | ktime_t expires_next, now, entry_time, delta; |
1252 | int nr_retries = 0; | 1254 | int i, retries = 0; |
1253 | int i; | ||
1254 | 1255 | ||
1255 | BUG_ON(!cpu_base->hres_active); | 1256 | BUG_ON(!cpu_base->hres_active); |
1256 | cpu_base->nr_events++; | 1257 | cpu_base->nr_events++; |
1257 | dev->next_event.tv64 = KTIME_MAX; | 1258 | dev->next_event.tv64 = KTIME_MAX; |
1258 | 1259 | ||
1259 | retry: | 1260 | entry_time = now = ktime_get(); |
1260 | /* 5 retries is enough to notice a hang */ | 1261 | retry: |
1261 | if (!(++nr_retries % 5)) | ||
1262 | hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); | ||
1263 | |||
1264 | now = ktime_get(); | ||
1265 | |||
1266 | expires_next.tv64 = KTIME_MAX; | 1262 | expires_next.tv64 = KTIME_MAX; |
1267 | 1263 | ||
1268 | spin_lock(&cpu_base->lock); | 1264 | raw_spin_lock(&cpu_base->lock); |
1269 | /* | 1265 | /* |
1270 | * We set expires_next to KTIME_MAX here with cpu_base->lock | 1266 | * We set expires_next to KTIME_MAX here with cpu_base->lock |
1271 | * held to prevent that a timer is enqueued in our queue via | 1267 | * held to prevent that a timer is enqueued in our queue via |
@@ -1321,13 +1317,51 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1321 | * against it. | 1317 | * against it. |
1322 | */ | 1318 | */ |
1323 | cpu_base->expires_next = expires_next; | 1319 | cpu_base->expires_next = expires_next; |
1324 | spin_unlock(&cpu_base->lock); | 1320 | raw_spin_unlock(&cpu_base->lock); |
1325 | 1321 | ||
1326 | /* Reprogramming necessary ? */ | 1322 | /* Reprogramming necessary ? */ |
1327 | if (expires_next.tv64 != KTIME_MAX) { | 1323 | if (expires_next.tv64 == KTIME_MAX || |
1328 | if (tick_program_event(expires_next, force_clock_reprogram)) | 1324 | !tick_program_event(expires_next, 0)) { |
1329 | goto retry; | 1325 | cpu_base->hang_detected = 0; |
1326 | return; | ||
1330 | } | 1327 | } |
1328 | |||
1329 | /* | ||
1330 | * The next timer was already expired due to: | ||
1331 | * - tracing | ||
1332 | * - long lasting callbacks | ||
1333 | * - being scheduled away when running in a VM | ||
1334 | * | ||
1335 | * We need to prevent that we loop forever in the hrtimer | ||
1336 | * interrupt routine. We give it 3 attempts to avoid | ||
1337 | * overreacting on some spurious event. | ||
1338 | */ | ||
1339 | now = ktime_get(); | ||
1340 | cpu_base->nr_retries++; | ||
1341 | if (++retries < 3) | ||
1342 | goto retry; | ||
1343 | /* | ||
1344 | * Give the system a chance to do something else than looping | ||
1345 | * here. We stored the entry time, so we know exactly how long | ||
1346 | * we spent here. We schedule the next event this amount of | ||
1347 | * time away. | ||
1348 | */ | ||
1349 | cpu_base->nr_hangs++; | ||
1350 | cpu_base->hang_detected = 1; | ||
1351 | delta = ktime_sub(now, entry_time); | ||
1352 | if (delta.tv64 > cpu_base->max_hang_time.tv64) | ||
1353 | cpu_base->max_hang_time = delta; | ||
1354 | /* | ||
1355 | * Limit it to a sensible value as we enforce a longer | ||
1356 | * delay. Give the CPU at least 100ms to catch up. | ||
1357 | */ | ||
1358 | if (delta.tv64 > 100 * NSEC_PER_MSEC) | ||
1359 | expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); | ||
1360 | else | ||
1361 | expires_next = ktime_add(now, delta); | ||
1362 | tick_program_event(expires_next, 1); | ||
1363 | printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", | ||
1364 | ktime_to_ns(delta)); | ||
1331 | } | 1365 | } |
1332 | 1366 | ||
1333 | /* | 1367 | /* |
@@ -1423,7 +1457,7 @@ void hrtimer_run_queues(void) | |||
1423 | gettime = 0; | 1457 | gettime = 0; |
1424 | } | 1458 | } |
1425 | 1459 | ||
1426 | spin_lock(&cpu_base->lock); | 1460 | raw_spin_lock(&cpu_base->lock); |
1427 | 1461 | ||
1428 | while ((node = base->first)) { | 1462 | while ((node = base->first)) { |
1429 | struct hrtimer *timer; | 1463 | struct hrtimer *timer; |
@@ -1435,7 +1469,7 @@ void hrtimer_run_queues(void) | |||
1435 | 1469 | ||
1436 | __run_hrtimer(timer, &base->softirq_time); | 1470 | __run_hrtimer(timer, &base->softirq_time); |
1437 | } | 1471 | } |
1438 | spin_unlock(&cpu_base->lock); | 1472 | raw_spin_unlock(&cpu_base->lock); |
1439 | } | 1473 | } |
1440 | } | 1474 | } |
1441 | 1475 | ||
@@ -1591,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1591 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | 1625 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
1592 | int i; | 1626 | int i; |
1593 | 1627 | ||
1594 | spin_lock_init(&cpu_base->lock); | 1628 | raw_spin_lock_init(&cpu_base->lock); |
1595 | 1629 | ||
1596 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | 1630 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
1597 | cpu_base->clock_base[i].cpu_base = cpu_base; | 1631 | cpu_base->clock_base[i].cpu_base = cpu_base; |
@@ -1649,16 +1683,16 @@ static void migrate_hrtimers(int scpu) | |||
1649 | * The caller is globally serialized and nobody else | 1683 | * The caller is globally serialized and nobody else |
1650 | * takes two locks at once, deadlock is not possible. | 1684 | * takes two locks at once, deadlock is not possible. |
1651 | */ | 1685 | */ |
1652 | spin_lock(&new_base->lock); | 1686 | raw_spin_lock(&new_base->lock); |
1653 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1687 | raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1654 | 1688 | ||
1655 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1689 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1656 | migrate_hrtimer_list(&old_base->clock_base[i], | 1690 | migrate_hrtimer_list(&old_base->clock_base[i], |
1657 | &new_base->clock_base[i]); | 1691 | &new_base->clock_base[i]); |
1658 | } | 1692 | } |
1659 | 1693 | ||
1660 | spin_unlock(&old_base->lock); | 1694 | raw_spin_unlock(&old_base->lock); |
1661 | spin_unlock(&new_base->lock); | 1695 | raw_spin_unlock(&new_base->lock); |
1662 | 1696 | ||
1663 | /* Check, if we got expired work to do */ | 1697 | /* Check, if we got expired work to do */ |
1664 | __hrtimer_peek_ahead_timers(); | 1698 | __hrtimer_peek_ahead_timers(); |