diff options
Diffstat (limited to 'kernel/time/timer.c')
| -rw-r--r-- | kernel/time/timer.c | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8f5d1bf18854..f2674a056c26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
| @@ -203,6 +203,7 @@ struct timer_base { | |||
| 203 | bool migration_enabled; | 203 | bool migration_enabled; |
| 204 | bool nohz_active; | 204 | bool nohz_active; |
| 205 | bool is_idle; | 205 | bool is_idle; |
| 206 | bool must_forward_clk; | ||
| 206 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); | 207 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); |
| 207 | struct hlist_head vectors[WHEEL_SIZE]; | 208 | struct hlist_head vectors[WHEEL_SIZE]; |
| 208 | } ____cacheline_aligned; | 209 | } ____cacheline_aligned; |
| @@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags) | |||
| 856 | 857 | ||
| 857 | static inline void forward_timer_base(struct timer_base *base) | 858 | static inline void forward_timer_base(struct timer_base *base) |
| 858 | { | 859 | { |
| 859 | unsigned long jnow = READ_ONCE(jiffies); | 860 | unsigned long jnow; |
| 860 | 861 | ||
| 861 | /* | 862 | /* |
| 862 | * We only forward the base when it's idle and we have a delta between | 863 | * We only forward the base when we are idle or have just come out of |
| 863 | * base clock and jiffies. | 864 | * idle (must_forward_clk logic), and have a delta between base clock |
| 865 | * and jiffies. In the common case, run_timers will take care of it. | ||
| 864 | */ | 866 | */ |
| 865 | if (!base->is_idle || (long) (jnow - base->clk) < 2) | 867 | if (likely(!base->must_forward_clk)) |
| 868 | return; | ||
| 869 | |||
| 870 | jnow = READ_ONCE(jiffies); | ||
| 871 | base->must_forward_clk = base->is_idle; | ||
| 872 | if ((long)(jnow - base->clk) < 2) | ||
| 866 | return; | 873 | return; |
| 867 | 874 | ||
| 868 | /* | 875 | /* |
| @@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
| 938 | * same array bucket then just return: | 945 | * same array bucket then just return: |
| 939 | */ | 946 | */ |
| 940 | if (timer_pending(timer)) { | 947 | if (timer_pending(timer)) { |
| 948 | /* | ||
| 949 | * The downside of this optimization is that it can result in | ||
| 950 | * larger granularity than you would get from adding a new | ||
| 951 | * timer with this expiry. | ||
| 952 | */ | ||
| 941 | if (timer->expires == expires) | 953 | if (timer->expires == expires) |
| 942 | return 1; | 954 | return 1; |
| 943 | 955 | ||
| @@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
| 948 | * dequeue/enqueue dance. | 960 | * dequeue/enqueue dance. |
| 949 | */ | 961 | */ |
| 950 | base = lock_timer_base(timer, &flags); | 962 | base = lock_timer_base(timer, &flags); |
| 963 | forward_timer_base(base); | ||
| 951 | 964 | ||
| 952 | clk = base->clk; | 965 | clk = base->clk; |
| 953 | idx = calc_wheel_index(expires, clk); | 966 | idx = calc_wheel_index(expires, clk); |
| @@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
| 964 | } | 977 | } |
| 965 | } else { | 978 | } else { |
| 966 | base = lock_timer_base(timer, &flags); | 979 | base = lock_timer_base(timer, &flags); |
| 980 | forward_timer_base(base); | ||
| 967 | } | 981 | } |
| 968 | 982 | ||
| 969 | ret = detach_if_pending(timer, base, false); | 983 | ret = detach_if_pending(timer, base, false); |
| @@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
| 991 | raw_spin_lock(&base->lock); | 1005 | raw_spin_lock(&base->lock); |
| 992 | WRITE_ONCE(timer->flags, | 1006 | WRITE_ONCE(timer->flags, |
| 993 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); | 1007 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); |
| 1008 | forward_timer_base(base); | ||
| 994 | } | 1009 | } |
| 995 | } | 1010 | } |
| 996 | 1011 | ||
| 997 | /* Try to forward a stale timer base clock */ | ||
| 998 | forward_timer_base(base); | ||
| 999 | |||
| 1000 | timer->expires = expires; | 1012 | timer->expires = expires; |
| 1001 | /* | 1013 | /* |
| 1002 | * If 'idx' was calculated above and the base time did not advance | 1014 | * If 'idx' was calculated above and the base time did not advance |
| @@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
| 1112 | WRITE_ONCE(timer->flags, | 1124 | WRITE_ONCE(timer->flags, |
| 1113 | (timer->flags & ~TIMER_BASEMASK) | cpu); | 1125 | (timer->flags & ~TIMER_BASEMASK) | cpu); |
| 1114 | } | 1126 | } |
| 1127 | forward_timer_base(base); | ||
| 1115 | 1128 | ||
| 1116 | debug_activate(timer, timer->expires); | 1129 | debug_activate(timer, timer->expires); |
| 1117 | internal_add_timer(base, timer); | 1130 | internal_add_timer(base, timer); |
| @@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |||
| 1497 | if (!is_max_delta) | 1510 | if (!is_max_delta) |
| 1498 | expires = basem + (u64)(nextevt - basej) * TICK_NSEC; | 1511 | expires = basem + (u64)(nextevt - basej) * TICK_NSEC; |
| 1499 | /* | 1512 | /* |
| 1500 | * If we expect to sleep more than a tick, mark the base idle: | 1513 | * If we expect to sleep more than a tick, mark the base idle. |
| 1514 | * Also the tick is stopped so any added timer must forward | ||
| 1515 | * the base clk itself to keep granularity small. This idle | ||
| 1516 | * logic is only maintained for the BASE_STD base, deferrable | ||
| 1517 | * timers may still see large granularity skew (by design). | ||
| 1501 | */ | 1518 | */ |
| 1502 | if ((expires - basem) > TICK_NSEC) | 1519 | if ((expires - basem) > TICK_NSEC) { |
| 1520 | base->must_forward_clk = true; | ||
| 1503 | base->is_idle = true; | 1521 | base->is_idle = true; |
| 1522 | } | ||
| 1504 | } | 1523 | } |
| 1505 | raw_spin_unlock(&base->lock); | 1524 | raw_spin_unlock(&base->lock); |
| 1506 | 1525 | ||
| @@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) | |||
| 1611 | { | 1630 | { |
| 1612 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | 1631 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
| 1613 | 1632 | ||
| 1633 | /* | ||
| 1634 | * must_forward_clk must be cleared before running timers so that any | ||
| 1635 | * timer functions that call mod_timer will not try to forward the | ||
| 1636 | * base. idle trcking / clock forwarding logic is only used with | ||
| 1637 | * BASE_STD timers. | ||
| 1638 | * | ||
| 1639 | * The deferrable base does not do idle tracking at all, so we do | ||
| 1640 | * not forward it. This can result in very large variations in | ||
| 1641 | * granularity for deferrable timers, but they can be deferred for | ||
| 1642 | * long periods due to idle. | ||
| 1643 | */ | ||
| 1644 | base->must_forward_clk = false; | ||
| 1645 | |||
| 1614 | __run_timers(base); | 1646 | __run_timers(base); |
| 1615 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | 1647 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) |
| 1616 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | 1648 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); |
