aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/timer.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2016-07-04 05:50:36 -0400
committerIngo Molnar <mingo@kernel.org>2016-07-07 04:35:11 -0400
commita683f390b93f4d1292f849fc48d28e322046120f (patch)
treede6367ae59e62bad9135eeff3b35512158e335e5 /kernel/time/timer.c
parentff00673292bd42a3688b33de47252a6a3c3f424c (diff)
timers: Forward the wheel clock whenever possible
The wheel clock is stale when a CPU goes into a long idle sleep. This has the side effect that timers which are queued end up in the outer wheel levels. That results in coarser granularity. To solve this, we keep track of the idle state and forward the wheel clock whenever possible. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Chris Mason <clm@fb.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: George Spelvin <linux@sciencehorizons.net> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Len Brown <lenb@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.512039360@linutronix.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r--kernel/time/timer.c128
1 files changed, 107 insertions, 21 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 658051c97a3c..9339d71ee998 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,9 +196,11 @@ struct timer_base {
196 spinlock_t lock; 196 spinlock_t lock;
197 struct timer_list *running_timer; 197 struct timer_list *running_timer;
198 unsigned long clk; 198 unsigned long clk;
199 unsigned long next_expiry;
199 unsigned int cpu; 200 unsigned int cpu;
200 bool migration_enabled; 201 bool migration_enabled;
201 bool nohz_active; 202 bool nohz_active;
203 bool is_idle;
202 DECLARE_BITMAP(pending_map, WHEEL_SIZE); 204 DECLARE_BITMAP(pending_map, WHEEL_SIZE);
203 struct hlist_head vectors[WHEEL_SIZE]; 205 struct hlist_head vectors[WHEEL_SIZE];
204} ____cacheline_aligned; 206} ____cacheline_aligned;
@@ -519,24 +521,37 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
519{ 521{
520 __internal_add_timer(base, timer); 522 __internal_add_timer(base, timer);
521 523
524 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
525 return;
526
522 /* 527 /*
523 * Check whether the other CPU is in dynticks mode and needs 528 * TODO: This wants some optimizing similar to the code below, but we
524 * to be triggered to reevaluate the timer wheel. We are 529 * will do that when we switch from push to pull for deferrable timers.
525 * protected against the other CPU fiddling with the timer by
526 * holding the timer base lock. This also makes sure that a
527 * CPU on the way to stop its tick can not evaluate the timer
528 * wheel.
529 *
530 * Spare the IPI for deferrable timers on idle targets though.
531 * The next busy ticks will take care of it. Except full dynticks
532 * require special care against races with idle_cpu(), lets deal
533 * with that later.
534 */ 530 */
535 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) { 531 if (timer->flags & TIMER_DEFERRABLE) {
536 if (!(timer->flags & TIMER_DEFERRABLE) || 532 if (tick_nohz_full_cpu(base->cpu))
537 tick_nohz_full_cpu(base->cpu))
538 wake_up_nohz_cpu(base->cpu); 533 wake_up_nohz_cpu(base->cpu);
534 return;
539 } 535 }
536
537 /*
538 * We might have to IPI the remote CPU if the base is idle and the
539 * timer is not deferrable. If the other CPU is on the way to idle
540 * then it can't set base->is_idle as we hold the base lock:
541 */
542 if (!base->is_idle)
543 return;
544
545 /* Check whether this is the new first expiring timer: */
546 if (time_after_eq(timer->expires, base->next_expiry))
547 return;
548
549 /*
550 * Set the next expiry time and kick the CPU so it can reevaluate the
551 * wheel:
552 */
553 base->next_expiry = timer->expires;
554 wake_up_nohz_cpu(base->cpu);
540} 555}
541 556
542#ifdef CONFIG_TIMER_STATS 557#ifdef CONFIG_TIMER_STATS
@@ -844,10 +859,11 @@ static inline struct timer_base *get_timer_base(u32 tflags)
844 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); 859 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
845} 860}
846 861
847static inline struct timer_base *get_target_base(struct timer_base *base, 862#ifdef CONFIG_NO_HZ_COMMON
848 unsigned tflags) 863static inline struct timer_base *
864__get_target_base(struct timer_base *base, unsigned tflags)
849{ 865{
850#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) 866#ifdef CONFIG_SMP
851 if ((tflags & TIMER_PINNED) || !base->migration_enabled) 867 if ((tflags & TIMER_PINNED) || !base->migration_enabled)
852 return get_timer_this_cpu_base(tflags); 868 return get_timer_this_cpu_base(tflags);
853 return get_timer_cpu_base(tflags, get_nohz_timer_target()); 869 return get_timer_cpu_base(tflags, get_nohz_timer_target());
@@ -856,6 +872,43 @@ static inline struct timer_base *get_target_base(struct timer_base *base,
856#endif 872#endif
857} 873}
858 874
875static inline void forward_timer_base(struct timer_base *base)
876{
877 /*
878 * We only forward the base when it's idle and we have a delta between
879 * base clock and jiffies.
880 */
881 if (!base->is_idle || (long) (jiffies - base->clk) < 2)
882 return;
883
884 /*
885 * If the next expiry value is > jiffies, then we fast forward to
886 * jiffies otherwise we forward to the next expiry value.
887 */
888 if (time_after(base->next_expiry, jiffies))
889 base->clk = jiffies;
890 else
891 base->clk = base->next_expiry;
892}
893#else
894static inline struct timer_base *
895__get_target_base(struct timer_base *base, unsigned tflags)
896{
897 return get_timer_this_cpu_base(tflags);
898}
899
900static inline void forward_timer_base(struct timer_base *base) { }
901#endif
902
903static inline struct timer_base *
904get_target_base(struct timer_base *base, unsigned tflags)
905{
906 struct timer_base *target = __get_target_base(base, tflags);
907
908 forward_timer_base(target);
909 return target;
910}
911
859/* 912/*
860 * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means 913 * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
861 * that all timers which are tied to this base are locked, and the base itself 914 * that all timers which are tied to this base are locked, and the base itself
@@ -1417,16 +1470,49 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1417 1470
1418 spin_lock(&base->lock); 1471 spin_lock(&base->lock);
1419 nextevt = __next_timer_interrupt(base); 1472 nextevt = __next_timer_interrupt(base);
1420 spin_unlock(&base->lock); 1473 base->next_expiry = nextevt;
1474 /*
1475 * We have a fresh next event. Check whether we can forward the base:
1476 */
1477 if (time_after(nextevt, jiffies))
1478 base->clk = jiffies;
1479 else if (time_after(nextevt, base->clk))
1480 base->clk = nextevt;
1421 1481
1422 if (time_before_eq(nextevt, basej)) 1482 if (time_before_eq(nextevt, basej)) {
1423 expires = basem; 1483 expires = basem;
1424 else 1484 base->is_idle = false;
1485 } else {
1425 expires = basem + (nextevt - basej) * TICK_NSEC; 1486 expires = basem + (nextevt - basej) * TICK_NSEC;
1487 /*
1488 * If we expect to sleep more than a tick, mark the base idle:
1489 */
1490 if ((expires - basem) > TICK_NSEC)
1491 base->is_idle = true;
1492 }
1493 spin_unlock(&base->lock);
1426 1494
1427 return cmp_next_hrtimer_event(basem, expires); 1495 return cmp_next_hrtimer_event(basem, expires);
1428} 1496}
1429 1497
1498/**
1499 * timer_clear_idle - Clear the idle state of the timer base
1500 *
1501 * Called with interrupts disabled
1502 */
1503void timer_clear_idle(void)
1504{
1505 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1506
1507 /*
1508 * We do this unlocked. The worst outcome is a remote enqueue sending
1509 * a pointless IPI, but taking the lock would just make the window for
1510 * sending the IPI a few instructions smaller for the cost of taking
1511 * the lock in the exit from idle path.
1512 */
1513 base->is_idle = false;
1514}
1515
1430static int collect_expired_timers(struct timer_base *base, 1516static int collect_expired_timers(struct timer_base *base,
1431 struct hlist_head *heads) 1517 struct hlist_head *heads)
1432{ 1518{
@@ -1440,7 +1526,7 @@ static int collect_expired_timers(struct timer_base *base,
1440 1526
1441 /* 1527 /*
1442 * If the next timer is ahead of time forward to current 1528 * If the next timer is ahead of time forward to current
1443 * jiffies, otherwise forward to the next expiry time. 1529 * jiffies, otherwise forward to the next expiry time:
1444 */ 1530 */
1445 if (time_after(next, jiffies)) { 1531 if (time_after(next, jiffies)) {
1446 /* The call site will increment clock! */ 1532 /* The call site will increment clock! */