aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/hrtimer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r--kernel/hrtimer.c172
1 files changed, 102 insertions, 70 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6d7020490f94..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
127 for (;;) { 127 for (;;) {
128 base = timer->base; 128 base = timer->base;
129 if (likely(base != NULL)) { 129 if (likely(base != NULL)) {
130 spin_lock_irqsave(&base->cpu_base->lock, *flags); 130 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
131 if (likely(base == timer->base)) 131 if (likely(base == timer->base))
132 return base; 132 return base;
133 /* The timer has migrated to another CPU: */ 133 /* The timer has migrated to another CPU: */
134 spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 134 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
135 } 135 }
136 cpu_relax(); 136 cpu_relax();
137 } 137 }
@@ -208,13 +208,13 @@ again:
208 208
209 /* See the comment in lock_timer_base() */ 209 /* See the comment in lock_timer_base() */
210 timer->base = NULL; 210 timer->base = NULL;
211 spin_unlock(&base->cpu_base->lock); 211 raw_spin_unlock(&base->cpu_base->lock);
212 spin_lock(&new_base->cpu_base->lock); 212 raw_spin_lock(&new_base->cpu_base->lock);
213 213
214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 214 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
215 cpu = this_cpu; 215 cpu = this_cpu;
216 spin_unlock(&new_base->cpu_base->lock); 216 raw_spin_unlock(&new_base->cpu_base->lock);
217 spin_lock(&base->cpu_base->lock); 217 raw_spin_lock(&base->cpu_base->lock);
218 timer->base = base; 218 timer->base = base;
219 goto again; 219 goto again;
220 } 220 }
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
230{ 230{
231 struct hrtimer_clock_base *base = timer->base; 231 struct hrtimer_clock_base *base = timer->base;
232 232
233 spin_lock_irqsave(&base->cpu_base->lock, *flags); 233 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
234 234
235 return base; 235 return base;
236} 236}
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
557static int hrtimer_reprogram(struct hrtimer *timer, 557static int hrtimer_reprogram(struct hrtimer *timer,
558 struct hrtimer_clock_base *base) 558 struct hrtimer_clock_base *base)
559{ 559{
560 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; 560 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 561 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
562 int res; 562 int res;
563 563
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
582 if (expires.tv64 < 0) 582 if (expires.tv64 < 0)
583 return -ETIME; 583 return -ETIME;
584 584
585 if (expires.tv64 >= expires_next->tv64) 585 if (expires.tv64 >= cpu_base->expires_next.tv64)
586 return 0;
587
588 /*
589 * If a hang was detected in the last timer interrupt then we
590 * do not schedule a timer which is earlier than the expiry
591 * which we enforced in the hang detection. We want the system
592 * to make progress.
593 */
594 if (cpu_base->hang_detected)
586 return 0; 595 return 0;
587 596
588 /* 597 /*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
590 */ 599 */
591 res = tick_program_event(expires, 0); 600 res = tick_program_event(expires, 0);
592 if (!IS_ERR_VALUE(res)) 601 if (!IS_ERR_VALUE(res))
593 *expires_next = expires; 602 cpu_base->expires_next = expires;
594 return res; 603 return res;
595} 604}
596 605
@@ -619,12 +628,12 @@ static void retrigger_next_event(void *arg)
619 base = &__get_cpu_var(hrtimer_bases); 628 base = &__get_cpu_var(hrtimer_bases);
620 629
621 /* Adjust CLOCK_REALTIME offset */ 630 /* Adjust CLOCK_REALTIME offset */
622 spin_lock(&base->lock); 631 raw_spin_lock(&base->lock);
623 base->clock_base[CLOCK_REALTIME].offset = 632 base->clock_base[CLOCK_REALTIME].offset =
624 timespec_to_ktime(realtime_offset); 633 timespec_to_ktime(realtime_offset);
625 634
626 hrtimer_force_reprogram(base, 0); 635 hrtimer_force_reprogram(base, 0);
627 spin_unlock(&base->lock); 636 raw_spin_unlock(&base->lock);
628} 637}
629 638
630/* 639/*
@@ -685,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
685{ 694{
686 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 695 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
687 if (wakeup) { 696 if (wakeup) {
688 spin_unlock(&base->cpu_base->lock); 697 raw_spin_unlock(&base->cpu_base->lock);
689 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 698 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
690 spin_lock(&base->cpu_base->lock); 699 raw_spin_lock(&base->cpu_base->lock);
691 } else 700 } else
692 __raise_softirq_irqoff(HRTIMER_SOFTIRQ); 701 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
693 702
@@ -726,8 +735,6 @@ static int hrtimer_switch_to_hres(void)
726 /* "Retrigger" the interrupt to get things going */ 735 /* "Retrigger" the interrupt to get things going */
727 retrigger_next_event(NULL); 736 retrigger_next_event(NULL);
728 local_irq_restore(flags); 737 local_irq_restore(flags);
729 printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
730 smp_processor_id());
731 return 1; 738 return 1;
732} 739}
733 740
@@ -749,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
749 756
750#endif /* CONFIG_HIGH_RES_TIMERS */ 757#endif /* CONFIG_HIGH_RES_TIMERS */
751 758
752#ifdef CONFIG_TIMER_STATS 759static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
753void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
754{ 760{
761#ifdef CONFIG_TIMER_STATS
755 if (timer->start_site) 762 if (timer->start_site)
756 return; 763 return;
757 764 timer->start_site = __builtin_return_address(0);
758 timer->start_site = addr;
759 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 765 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
760 timer->start_pid = current->pid; 766 timer->start_pid = current->pid;
767#endif
761} 768}
769
770static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
771{
772#ifdef CONFIG_TIMER_STATS
773 timer->start_site = NULL;
774#endif
775}
776
777static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
778{
779#ifdef CONFIG_TIMER_STATS
780 if (likely(!timer_stats_active))
781 return;
782 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
783 timer->function, timer->start_comm, 0);
762#endif 784#endif
785}
763 786
764/* 787/*
765 * Counterpart to lock_hrtimer_base above: 788 * Counterpart to lock_hrtimer_base above:
@@ -767,7 +790,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
767static inline 790static inline
768void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 791void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
769{ 792{
770 spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 793 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
771} 794}
772 795
773/** 796/**
@@ -1100,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
1100 unsigned long flags; 1123 unsigned long flags;
1101 int i; 1124 int i;
1102 1125
1103 spin_lock_irqsave(&cpu_base->lock, flags); 1126 raw_spin_lock_irqsave(&cpu_base->lock, flags);
1104 1127
1105 if (!hrtimer_hres_active()) { 1128 if (!hrtimer_hres_active()) {
1106 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 1129 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1117,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
1117 } 1140 }
1118 } 1141 }
1119 1142
1120 spin_unlock_irqrestore(&cpu_base->lock, flags); 1143 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1121 1144
1122 if (mindelta.tv64 < 0) 1145 if (mindelta.tv64 < 0)
1123 mindelta.tv64 = 0; 1146 mindelta.tv64 = 0;
@@ -1199,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1199 * they get migrated to another cpu, therefore its safe to unlock 1222 * they get migrated to another cpu, therefore its safe to unlock
1200 * the timer base. 1223 * the timer base.
1201 */ 1224 */
1202 spin_unlock(&cpu_base->lock); 1225 raw_spin_unlock(&cpu_base->lock);
1203 trace_hrtimer_expire_entry(timer, now); 1226 trace_hrtimer_expire_entry(timer, now);
1204 restart = fn(timer); 1227 restart = fn(timer);
1205 trace_hrtimer_expire_exit(timer); 1228 trace_hrtimer_expire_exit(timer);
1206 spin_lock(&cpu_base->lock); 1229 raw_spin_lock(&cpu_base->lock);
1207 1230
1208 /* 1231 /*
1209 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1232 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1219,29 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1219 1242
1220#ifdef CONFIG_HIGH_RES_TIMERS 1243#ifdef CONFIG_HIGH_RES_TIMERS
1221 1244
1222static int force_clock_reprogram;
1223
1224/*
1225 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1226 * is hanging, which could happen with something that slows the interrupt
1227 * such as the tracing. Then we force the clock reprogramming for each future
1228 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1229 * threshold that we will overwrite.
1230 * The next tick event will be scheduled to 3 times we currently spend on
1231 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1232 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1233 * let it running without serious starvation.
1234 */
1235
1236static inline void
1237hrtimer_interrupt_hanging(struct clock_event_device *dev,
1238 ktime_t try_time)
1239{
1240 force_clock_reprogram = 1;
1241 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1242 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1243 "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
1244}
1245/* 1245/*
1246 * High resolution timer interrupt 1246 * High resolution timer interrupt
1247 * Called with interrupts disabled 1247 * Called with interrupts disabled
@@ -1250,24 +1250,18 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1250{ 1250{
1251 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1251 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1252 struct hrtimer_clock_base *base; 1252 struct hrtimer_clock_base *base;
1253 ktime_t expires_next, now; 1253 ktime_t expires_next, now, entry_time, delta;
1254 int nr_retries = 0; 1254 int i, retries = 0;
1255 int i;
1256 1255
1257 BUG_ON(!cpu_base->hres_active); 1256 BUG_ON(!cpu_base->hres_active);
1258 cpu_base->nr_events++; 1257 cpu_base->nr_events++;
1259 dev->next_event.tv64 = KTIME_MAX; 1258 dev->next_event.tv64 = KTIME_MAX;
1260 1259
1261 retry: 1260 entry_time = now = ktime_get();
1262 /* 5 retries is enough to notice a hang */ 1261retry:
1263 if (!(++nr_retries % 5))
1264 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1265
1266 now = ktime_get();
1267
1268 expires_next.tv64 = KTIME_MAX; 1262 expires_next.tv64 = KTIME_MAX;
1269 1263
1270 spin_lock(&cpu_base->lock); 1264 raw_spin_lock(&cpu_base->lock);
1271 /* 1265 /*
1272 * We set expires_next to KTIME_MAX here with cpu_base->lock 1266 * We set expires_next to KTIME_MAX here with cpu_base->lock
1273 * held to prevent that a timer is enqueued in our queue via 1267 * held to prevent that a timer is enqueued in our queue via
@@ -1323,13 +1317,51 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1323 * against it. 1317 * against it.
1324 */ 1318 */
1325 cpu_base->expires_next = expires_next; 1319 cpu_base->expires_next = expires_next;
1326 spin_unlock(&cpu_base->lock); 1320 raw_spin_unlock(&cpu_base->lock);
1327 1321
1328 /* Reprogramming necessary ? */ 1322 /* Reprogramming necessary ? */
1329 if (expires_next.tv64 != KTIME_MAX) { 1323 if (expires_next.tv64 == KTIME_MAX ||
1330 if (tick_program_event(expires_next, force_clock_reprogram)) 1324 !tick_program_event(expires_next, 0)) {
1331 goto retry; 1325 cpu_base->hang_detected = 0;
1326 return;
1332 } 1327 }
1328
1329 /*
1330 * The next timer was already expired due to:
1331 * - tracing
1332 * - long lasting callbacks
1333 * - being scheduled away when running in a VM
1334 *
1335 * We need to prevent that we loop forever in the hrtimer
1336 * interrupt routine. We give it 3 attempts to avoid
1337 * overreacting on some spurious event.
1338 */
1339 now = ktime_get();
1340 cpu_base->nr_retries++;
1341 if (++retries < 3)
1342 goto retry;
1343 /*
1344 * Give the system a chance to do something else than looping
1345 * here. We stored the entry time, so we know exactly how long
1346 * we spent here. We schedule the next event this amount of
1347 * time away.
1348 */
1349 cpu_base->nr_hangs++;
1350 cpu_base->hang_detected = 1;
1351 delta = ktime_sub(now, entry_time);
1352 if (delta.tv64 > cpu_base->max_hang_time.tv64)
1353 cpu_base->max_hang_time = delta;
1354 /*
1355 * Limit it to a sensible value as we enforce a longer
1356 * delay. Give the CPU at least 100ms to catch up.
1357 */
1358 if (delta.tv64 > 100 * NSEC_PER_MSEC)
1359 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1360 else
1361 expires_next = ktime_add(now, delta);
1362 tick_program_event(expires_next, 1);
1363 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
1364 ktime_to_ns(delta));
1333} 1365}
1334 1366
1335/* 1367/*
@@ -1425,7 +1457,7 @@ void hrtimer_run_queues(void)
1425 gettime = 0; 1457 gettime = 0;
1426 } 1458 }
1427 1459
1428 spin_lock(&cpu_base->lock); 1460 raw_spin_lock(&cpu_base->lock);
1429 1461
1430 while ((node = base->first)) { 1462 while ((node = base->first)) {
1431 struct hrtimer *timer; 1463 struct hrtimer *timer;
@@ -1437,7 +1469,7 @@ void hrtimer_run_queues(void)
1437 1469
1438 __run_hrtimer(timer, &base->softirq_time); 1470 __run_hrtimer(timer, &base->softirq_time);
1439 } 1471 }
1440 spin_unlock(&cpu_base->lock); 1472 raw_spin_unlock(&cpu_base->lock);
1441 } 1473 }
1442} 1474}
1443 1475
@@ -1593,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1593 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1625 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1594 int i; 1626 int i;
1595 1627
1596 spin_lock_init(&cpu_base->lock); 1628 raw_spin_lock_init(&cpu_base->lock);
1597 1629
1598 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1630 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
1599 cpu_base->clock_base[i].cpu_base = cpu_base; 1631 cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1651,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
1651 * The caller is globally serialized and nobody else 1683 * The caller is globally serialized and nobody else
1652 * takes two locks at once, deadlock is not possible. 1684 * takes two locks at once, deadlock is not possible.
1653 */ 1685 */
1654 spin_lock(&new_base->lock); 1686 raw_spin_lock(&new_base->lock);
1655 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1687 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1656 1688
1657 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1689 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1658 migrate_hrtimer_list(&old_base->clock_base[i], 1690 migrate_hrtimer_list(&old_base->clock_base[i],
1659 &new_base->clock_base[i]); 1691 &new_base->clock_base[i]);
1660 } 1692 }
1661 1693
1662 spin_unlock(&old_base->lock); 1694 raw_spin_unlock(&old_base->lock);
1663 spin_unlock(&new_base->lock); 1695 raw_spin_unlock(&new_base->lock);
1664 1696
1665 /* Check, if we got expired work to do */ 1697 /* Check, if we got expired work to do */
1666 __hrtimer_peek_ahead_timers(); 1698 __hrtimer_peek_ahead_timers();