aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/timer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 21:57:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 21:57:44 -0400
commit43224b96af3154cedd7220f7b90094905f07ac78 (patch)
tree44279acc4613b314ff031620fd62641db3c85b71 /kernel/time/timer.c
parentd70b3ef54ceaf1c7c92209f5a662a670d04cbed9 (diff)
parent1cb6c2151850584ee805fdcf088af0bb81f4b086 (diff)
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner: "A rather largish update for everything time and timer related: - Cache footprint optimizations for both hrtimers and timer wheel - Lower the NOHZ impact on systems which have NOHZ or timer migration disabled at runtime. - Optimize run time overhead of hrtimer interrupt by making the clock offset updates smarter - hrtimer cleanups and removal of restrictions to tackle some problems in sched/perf - Some more leap second tweaks - Another round of changes addressing the 2038 problem - First step to change the internals of clock event devices by introducing the necessary infrastructure - Allow constant folding for usecs/msecs_to_jiffies() - The usual pile of clockevent/clocksource driver updates The hrtimer changes contain updates to sched, perf and x86 as they depend on them plus changes all over the tree to cleanup API changes and redundant code, which got copied all over the place. The y2038 changes touch s390 to remove the last non 2038 safe code related to boot/persistant clock" * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits) clocksource: Increase dependencies of timer-stm32 to limit build wreckage timer: Minimize nohz off overhead timer: Reduce timer migration overhead if disabled timer: Stats: Simplify the flags handling timer: Replace timer base by a cpu index timer: Use hlist for the timer wheel hash buckets timer: Remove FIFO "guarantee" timers: Sanitize catchup_timer_jiffies() usage hrtimer: Allow hrtimer::function() to free the timer seqcount: Introduce raw_write_seqcount_barrier() seqcount: Rename write_seqcount_barrier() hrtimer: Fix hrtimer_is_queued() hole hrtimer: Remove HRTIMER_STATE_MIGRATE selftest: Timers: Avoid signal deadlock in leap-a-day timekeeping: Copy the shadow-timekeeper over the real timekeeper last clockevents: Check state instead of mode in suspend/resume path selftests: timers: Add leap-second timer edge testing to leap-a-day.c ntp: Do leapsecond adjustment in adjtimex read path time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap second edge ntp: Introduce and use SECS_PER_DAY macro instead of 86400 ...
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r--kernel/time/timer.c362
1 files changed, 168 insertions, 194 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2ece3aa5069c..520499dd85af 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -49,6 +49,8 @@
49#include <asm/timex.h> 49#include <asm/timex.h>
50#include <asm/io.h> 50#include <asm/io.h>
51 51
52#include "tick-internal.h"
53
52#define CREATE_TRACE_POINTS 54#define CREATE_TRACE_POINTS
53#include <trace/events/timer.h> 55#include <trace/events/timer.h>
54 56
@@ -68,11 +70,11 @@ EXPORT_SYMBOL(jiffies_64);
68#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) 70#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
69 71
70struct tvec { 72struct tvec {
71 struct list_head vec[TVN_SIZE]; 73 struct hlist_head vec[TVN_SIZE];
72}; 74};
73 75
74struct tvec_root { 76struct tvec_root {
75 struct list_head vec[TVR_SIZE]; 77 struct hlist_head vec[TVR_SIZE];
76}; 78};
77 79
78struct tvec_base { 80struct tvec_base {
@@ -83,6 +85,8 @@ struct tvec_base {
83 unsigned long active_timers; 85 unsigned long active_timers;
84 unsigned long all_timers; 86 unsigned long all_timers;
85 int cpu; 87 int cpu;
88 bool migration_enabled;
89 bool nohz_active;
86 struct tvec_root tv1; 90 struct tvec_root tv1;
87 struct tvec tv2; 91 struct tvec tv2;
88 struct tvec tv3; 92 struct tvec tv3;
@@ -90,43 +94,60 @@ struct tvec_base {
90 struct tvec tv5; 94 struct tvec tv5;
91} ____cacheline_aligned; 95} ____cacheline_aligned;
92 96
93/*
94 * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
95 * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
96 * pointer to per-cpu entries because we don't know where we'll map the section,
97 * even for the boot cpu.
98 *
99 * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
100 * rest of them.
101 */
102struct tvec_base boot_tvec_bases;
103EXPORT_SYMBOL(boot_tvec_bases);
104 97
105static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 98static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
99
100#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
101unsigned int sysctl_timer_migration = 1;
106 102
107/* Functions below help us manage 'deferrable' flag */ 103void timers_update_migration(bool update_nohz)
108static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
109{ 104{
110 return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); 105 bool on = sysctl_timer_migration && tick_nohz_active;
106 unsigned int cpu;
107
108 /* Avoid the loop, if nothing to update */
109 if (this_cpu_read(tvec_bases.migration_enabled) == on)
110 return;
111
112 for_each_possible_cpu(cpu) {
113 per_cpu(tvec_bases.migration_enabled, cpu) = on;
114 per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
115 if (!update_nohz)
116 continue;
117 per_cpu(tvec_bases.nohz_active, cpu) = true;
118 per_cpu(hrtimer_bases.nohz_active, cpu) = true;
119 }
111} 120}
112 121
113static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) 122int timer_migration_handler(struct ctl_table *table, int write,
123 void __user *buffer, size_t *lenp,
124 loff_t *ppos)
114{ 125{
115 return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); 126 static DEFINE_MUTEX(mutex);
127 int ret;
128
129 mutex_lock(&mutex);
130 ret = proc_dointvec(table, write, buffer, lenp, ppos);
131 if (!ret && write)
132 timers_update_migration(false);
133 mutex_unlock(&mutex);
134 return ret;
116} 135}
117 136
118static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 137static inline struct tvec_base *get_target_base(struct tvec_base *base,
138 int pinned)
119{ 139{
120 return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); 140 if (pinned || !base->migration_enabled)
141 return this_cpu_ptr(&tvec_bases);
142 return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
121} 143}
122 144#else
123static inline void 145static inline struct tvec_base *get_target_base(struct tvec_base *base,
124timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 146 int pinned)
125{ 147{
126 unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; 148 return this_cpu_ptr(&tvec_bases);
127
128 timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
129} 149}
150#endif
130 151
131static unsigned long round_jiffies_common(unsigned long j, int cpu, 152static unsigned long round_jiffies_common(unsigned long j, int cpu,
132 bool force_up) 153 bool force_up)
@@ -349,26 +370,12 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
349} 370}
350EXPORT_SYMBOL_GPL(set_timer_slack); 371EXPORT_SYMBOL_GPL(set_timer_slack);
351 372
352/*
353 * If the list is empty, catch up ->timer_jiffies to the current time.
354 * The caller must hold the tvec_base lock. Returns true if the list
355 * was empty and therefore ->timer_jiffies was updated.
356 */
357static bool catchup_timer_jiffies(struct tvec_base *base)
358{
359 if (!base->all_timers) {
360 base->timer_jiffies = jiffies;
361 return true;
362 }
363 return false;
364}
365
366static void 373static void
367__internal_add_timer(struct tvec_base *base, struct timer_list *timer) 374__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
368{ 375{
369 unsigned long expires = timer->expires; 376 unsigned long expires = timer->expires;
370 unsigned long idx = expires - base->timer_jiffies; 377 unsigned long idx = expires - base->timer_jiffies;
371 struct list_head *vec; 378 struct hlist_head *vec;
372 379
373 if (idx < TVR_SIZE) { 380 if (idx < TVR_SIZE) {
374 int i = expires & TVR_MASK; 381 int i = expires & TVR_MASK;
@@ -401,25 +408,25 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
401 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 408 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
402 vec = base->tv5.vec + i; 409 vec = base->tv5.vec + i;
403 } 410 }
404 /* 411
405 * Timers are FIFO: 412 hlist_add_head(&timer->entry, vec);
406 */
407 list_add_tail(&timer->entry, vec);
408} 413}
409 414
410static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 415static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
411{ 416{
412 (void)catchup_timer_jiffies(base); 417 /* Advance base->jiffies, if the base is empty */
418 if (!base->all_timers++)
419 base->timer_jiffies = jiffies;
420
413 __internal_add_timer(base, timer); 421 __internal_add_timer(base, timer);
414 /* 422 /*
415 * Update base->active_timers and base->next_timer 423 * Update base->active_timers and base->next_timer
416 */ 424 */
417 if (!tbase_get_deferrable(timer->base)) { 425 if (!(timer->flags & TIMER_DEFERRABLE)) {
418 if (!base->active_timers++ || 426 if (!base->active_timers++ ||
419 time_before(timer->expires, base->next_timer)) 427 time_before(timer->expires, base->next_timer))
420 base->next_timer = timer->expires; 428 base->next_timer = timer->expires;
421 } 429 }
422 base->all_timers++;
423 430
424 /* 431 /*
425 * Check whether the other CPU is in dynticks mode and needs 432 * Check whether the other CPU is in dynticks mode and needs
@@ -434,8 +441,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
434 * require special care against races with idle_cpu(), lets deal 441 * require special care against races with idle_cpu(), lets deal
435 * with that later. 442 * with that later.
436 */ 443 */
437 if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu)) 444 if (base->nohz_active) {
438 wake_up_nohz_cpu(base->cpu); 445 if (!(timer->flags & TIMER_DEFERRABLE) ||
446 tick_nohz_full_cpu(base->cpu))
447 wake_up_nohz_cpu(base->cpu);
448 }
439} 449}
440 450
441#ifdef CONFIG_TIMER_STATS 451#ifdef CONFIG_TIMER_STATS
@@ -451,15 +461,12 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
451 461
452static void timer_stats_account_timer(struct timer_list *timer) 462static void timer_stats_account_timer(struct timer_list *timer)
453{ 463{
454 unsigned int flag = 0;
455
456 if (likely(!timer->start_site)) 464 if (likely(!timer->start_site))
457 return; 465 return;
458 if (unlikely(tbase_get_deferrable(timer->base)))
459 flag |= TIMER_STATS_FLAG_DEFERRABLE;
460 466
461 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 467 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
462 timer->function, timer->start_comm, flag); 468 timer->function, timer->start_comm,
469 timer->flags);
463} 470}
464 471
465#else 472#else
@@ -516,8 +523,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
516 * statically initialized. We just make sure that it 523 * statically initialized. We just make sure that it
517 * is tracked in the object tracker. 524 * is tracked in the object tracker.
518 */ 525 */
519 if (timer->entry.next == NULL && 526 if (timer->entry.pprev == NULL &&
520 timer->entry.prev == TIMER_ENTRY_STATIC) { 527 timer->entry.next == TIMER_ENTRY_STATIC) {
521 debug_object_init(timer, &timer_debug_descr); 528 debug_object_init(timer, &timer_debug_descr);
522 debug_object_activate(timer, &timer_debug_descr); 529 debug_object_activate(timer, &timer_debug_descr);
523 return 0; 530 return 0;
@@ -563,7 +570,7 @@ static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
563 570
564 switch (state) { 571 switch (state) {
565 case ODEBUG_STATE_NOTAVAILABLE: 572 case ODEBUG_STATE_NOTAVAILABLE:
566 if (timer->entry.prev == TIMER_ENTRY_STATIC) { 573 if (timer->entry.next == TIMER_ENTRY_STATIC) {
567 /* 574 /*
568 * This is not really a fixup. The timer was 575 * This is not really a fixup. The timer was
569 * statically initialized. We just make sure that it 576 * statically initialized. We just make sure that it
@@ -648,7 +655,7 @@ static inline void
648debug_activate(struct timer_list *timer, unsigned long expires) 655debug_activate(struct timer_list *timer, unsigned long expires)
649{ 656{
650 debug_timer_activate(timer); 657 debug_timer_activate(timer);
651 trace_timer_start(timer, expires); 658 trace_timer_start(timer, expires, timer->flags);
652} 659}
653 660
654static inline void debug_deactivate(struct timer_list *timer) 661static inline void debug_deactivate(struct timer_list *timer)
@@ -665,10 +672,8 @@ static inline void debug_assert_init(struct timer_list *timer)
665static void do_init_timer(struct timer_list *timer, unsigned int flags, 672static void do_init_timer(struct timer_list *timer, unsigned int flags,
666 const char *name, struct lock_class_key *key) 673 const char *name, struct lock_class_key *key)
667{ 674{
668 struct tvec_base *base = raw_cpu_read(tvec_bases); 675 timer->entry.pprev = NULL;
669 676 timer->flags = flags | raw_smp_processor_id();
670 timer->entry.next = NULL;
671 timer->base = (void *)((unsigned long)base | flags);
672 timer->slack = -1; 677 timer->slack = -1;
673#ifdef CONFIG_TIMER_STATS 678#ifdef CONFIG_TIMER_STATS
674 timer->start_site = NULL; 679 timer->start_site = NULL;
@@ -699,24 +704,23 @@ EXPORT_SYMBOL(init_timer_key);
699 704
700static inline void detach_timer(struct timer_list *timer, bool clear_pending) 705static inline void detach_timer(struct timer_list *timer, bool clear_pending)
701{ 706{
702 struct list_head *entry = &timer->entry; 707 struct hlist_node *entry = &timer->entry;
703 708
704 debug_deactivate(timer); 709 debug_deactivate(timer);
705 710
706 __list_del(entry->prev, entry->next); 711 __hlist_del(entry);
707 if (clear_pending) 712 if (clear_pending)
708 entry->next = NULL; 713 entry->pprev = NULL;
709 entry->prev = LIST_POISON2; 714 entry->next = LIST_POISON2;
710} 715}
711 716
712static inline void 717static inline void
713detach_expired_timer(struct timer_list *timer, struct tvec_base *base) 718detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
714{ 719{
715 detach_timer(timer, true); 720 detach_timer(timer, true);
716 if (!tbase_get_deferrable(timer->base)) 721 if (!(timer->flags & TIMER_DEFERRABLE))
717 base->active_timers--; 722 base->active_timers--;
718 base->all_timers--; 723 base->all_timers--;
719 (void)catchup_timer_jiffies(base);
720} 724}
721 725
722static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, 726static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -726,13 +730,14 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
726 return 0; 730 return 0;
727 731
728 detach_timer(timer, clear_pending); 732 detach_timer(timer, clear_pending);
729 if (!tbase_get_deferrable(timer->base)) { 733 if (!(timer->flags & TIMER_DEFERRABLE)) {
730 base->active_timers--; 734 base->active_timers--;
731 if (timer->expires == base->next_timer) 735 if (timer->expires == base->next_timer)
732 base->next_timer = base->timer_jiffies; 736 base->next_timer = base->timer_jiffies;
733 } 737 }
734 base->all_timers--; 738 /* If this was the last timer, advance base->jiffies */
735 (void)catchup_timer_jiffies(base); 739 if (!--base->all_timers)
740 base->timer_jiffies = jiffies;
736 return 1; 741 return 1;
737} 742}
738 743
@@ -744,24 +749,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
744 * So __run_timers/migrate_timers can safely modify all timers which could 749 * So __run_timers/migrate_timers can safely modify all timers which could
745 * be found on ->tvX lists. 750 * be found on ->tvX lists.
746 * 751 *
747 * When the timer's base is locked, and the timer removed from list, it is 752 * When the timer's base is locked and removed from the list, the
748 * possible to set timer->base = NULL and drop the lock: the timer remains 753 * TIMER_MIGRATING flag is set, FIXME
749 * locked.
750 */ 754 */
751static struct tvec_base *lock_timer_base(struct timer_list *timer, 755static struct tvec_base *lock_timer_base(struct timer_list *timer,
752 unsigned long *flags) 756 unsigned long *flags)
753 __acquires(timer->base->lock) 757 __acquires(timer->base->lock)
754{ 758{
755 struct tvec_base *base;
756
757 for (;;) { 759 for (;;) {
758 struct tvec_base *prelock_base = timer->base; 760 u32 tf = timer->flags;
759 base = tbase_get_base(prelock_base); 761 struct tvec_base *base;
760 if (likely(base != NULL)) { 762
763 if (!(tf & TIMER_MIGRATING)) {
764 base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
761 spin_lock_irqsave(&base->lock, *flags); 765 spin_lock_irqsave(&base->lock, *flags);
762 if (likely(prelock_base == timer->base)) 766 if (timer->flags == tf)
763 return base; 767 return base;
764 /* The timer has migrated to another CPU */
765 spin_unlock_irqrestore(&base->lock, *flags); 768 spin_unlock_irqrestore(&base->lock, *flags);
766 } 769 }
767 cpu_relax(); 770 cpu_relax();
@@ -770,11 +773,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
770 773
771static inline int 774static inline int
772__mod_timer(struct timer_list *timer, unsigned long expires, 775__mod_timer(struct timer_list *timer, unsigned long expires,
773 bool pending_only, int pinned) 776 bool pending_only, int pinned)
774{ 777{
775 struct tvec_base *base, *new_base; 778 struct tvec_base *base, *new_base;
776 unsigned long flags; 779 unsigned long flags;
777 int ret = 0 , cpu; 780 int ret = 0;
778 781
779 timer_stats_timer_set_start_info(timer); 782 timer_stats_timer_set_start_info(timer);
780 BUG_ON(!timer->function); 783 BUG_ON(!timer->function);
@@ -787,8 +790,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
787 790
788 debug_activate(timer, expires); 791 debug_activate(timer, expires);
789 792
790 cpu = get_nohz_timer_target(pinned); 793 new_base = get_target_base(base, pinned);
791 new_base = per_cpu(tvec_bases, cpu);
792 794
793 if (base != new_base) { 795 if (base != new_base) {
794 /* 796 /*
@@ -800,11 +802,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
800 */ 802 */
801 if (likely(base->running_timer != timer)) { 803 if (likely(base->running_timer != timer)) {
802 /* See the comment in lock_timer_base() */ 804 /* See the comment in lock_timer_base() */
803 timer_set_base(timer, NULL); 805 timer->flags |= TIMER_MIGRATING;
806
804 spin_unlock(&base->lock); 807 spin_unlock(&base->lock);
805 base = new_base; 808 base = new_base;
806 spin_lock(&base->lock); 809 spin_lock(&base->lock);
807 timer_set_base(timer, base); 810 timer->flags &= ~TIMER_BASEMASK;
811 timer->flags |= base->cpu;
808 } 812 }
809 } 813 }
810 814
@@ -966,13 +970,13 @@ EXPORT_SYMBOL(add_timer);
966 */ 970 */
967void add_timer_on(struct timer_list *timer, int cpu) 971void add_timer_on(struct timer_list *timer, int cpu)
968{ 972{
969 struct tvec_base *base = per_cpu(tvec_bases, cpu); 973 struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
970 unsigned long flags; 974 unsigned long flags;
971 975
972 timer_stats_timer_set_start_info(timer); 976 timer_stats_timer_set_start_info(timer);
973 BUG_ON(timer_pending(timer) || !timer->function); 977 BUG_ON(timer_pending(timer) || !timer->function);
974 spin_lock_irqsave(&base->lock, flags); 978 spin_lock_irqsave(&base->lock, flags);
975 timer_set_base(timer, base); 979 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
976 debug_activate(timer, timer->expires); 980 debug_activate(timer, timer->expires);
977 internal_add_timer(base, timer); 981 internal_add_timer(base, timer);
978 spin_unlock_irqrestore(&base->lock, flags); 982 spin_unlock_irqrestore(&base->lock, flags);
@@ -1037,8 +1041,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
1037EXPORT_SYMBOL(try_to_del_timer_sync); 1041EXPORT_SYMBOL(try_to_del_timer_sync);
1038 1042
1039#ifdef CONFIG_SMP 1043#ifdef CONFIG_SMP
1040static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
1041
1042/** 1044/**
1043 * del_timer_sync - deactivate a timer and wait for the handler to finish. 1045 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1044 * @timer: the timer to be deactivated 1046 * @timer: the timer to be deactivated
@@ -1093,7 +1095,7 @@ int del_timer_sync(struct timer_list *timer)
1093 * don't use it in hardirq context, because it 1095 * don't use it in hardirq context, because it
1094 * could lead to deadlock. 1096 * could lead to deadlock.
1095 */ 1097 */
1096 WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); 1098 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1097 for (;;) { 1099 for (;;) {
1098 int ret = try_to_del_timer_sync(timer); 1100 int ret = try_to_del_timer_sync(timer);
1099 if (ret >= 0) 1101 if (ret >= 0)
@@ -1107,17 +1109,17 @@ EXPORT_SYMBOL(del_timer_sync);
1107static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1109static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1108{ 1110{
1109 /* cascade all the timers from tv up one level */ 1111 /* cascade all the timers from tv up one level */
1110 struct timer_list *timer, *tmp; 1112 struct timer_list *timer;
1111 struct list_head tv_list; 1113 struct hlist_node *tmp;
1114 struct hlist_head tv_list;
1112 1115
1113 list_replace_init(tv->vec + index, &tv_list); 1116 hlist_move_list(tv->vec + index, &tv_list);
1114 1117
1115 /* 1118 /*
1116 * We are removing _all_ timers from the list, so we 1119 * We are removing _all_ timers from the list, so we
1117 * don't have to detach them individually. 1120 * don't have to detach them individually.
1118 */ 1121 */
1119 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1122 hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1120 BUG_ON(tbase_get_base(timer->base) != base);
1121 /* No accounting, while moving them */ 1123 /* No accounting, while moving them */
1122 __internal_add_timer(base, timer); 1124 __internal_add_timer(base, timer);
1123 } 1125 }
@@ -1182,14 +1184,18 @@ static inline void __run_timers(struct tvec_base *base)
1182 struct timer_list *timer; 1184 struct timer_list *timer;
1183 1185
1184 spin_lock_irq(&base->lock); 1186 spin_lock_irq(&base->lock);
1185 if (catchup_timer_jiffies(base)) { 1187
1186 spin_unlock_irq(&base->lock);
1187 return;
1188 }
1189 while (time_after_eq(jiffies, base->timer_jiffies)) { 1188 while (time_after_eq(jiffies, base->timer_jiffies)) {
1190 struct list_head work_list; 1189 struct hlist_head work_list;
1191 struct list_head *head = &work_list; 1190 struct hlist_head *head = &work_list;
1192 int index = base->timer_jiffies & TVR_MASK; 1191 int index;
1192
1193 if (!base->all_timers) {
1194 base->timer_jiffies = jiffies;
1195 break;
1196 }
1197
1198 index = base->timer_jiffies & TVR_MASK;
1193 1199
1194 /* 1200 /*
1195 * Cascade timers: 1201 * Cascade timers:
@@ -1200,16 +1206,16 @@ static inline void __run_timers(struct tvec_base *base)
1200 !cascade(base, &base->tv4, INDEX(2))) 1206 !cascade(base, &base->tv4, INDEX(2)))
1201 cascade(base, &base->tv5, INDEX(3)); 1207 cascade(base, &base->tv5, INDEX(3));
1202 ++base->timer_jiffies; 1208 ++base->timer_jiffies;
1203 list_replace_init(base->tv1.vec + index, head); 1209 hlist_move_list(base->tv1.vec + index, head);
1204 while (!list_empty(head)) { 1210 while (!hlist_empty(head)) {
1205 void (*fn)(unsigned long); 1211 void (*fn)(unsigned long);
1206 unsigned long data; 1212 unsigned long data;
1207 bool irqsafe; 1213 bool irqsafe;
1208 1214
1209 timer = list_first_entry(head, struct timer_list,entry); 1215 timer = hlist_entry(head->first, struct timer_list, entry);
1210 fn = timer->function; 1216 fn = timer->function;
1211 data = timer->data; 1217 data = timer->data;
1212 irqsafe = tbase_get_irqsafe(timer->base); 1218 irqsafe = timer->flags & TIMER_IRQSAFE;
1213 1219
1214 timer_stats_account_timer(timer); 1220 timer_stats_account_timer(timer);
1215 1221
@@ -1248,8 +1254,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
1248 /* Look for timer events in tv1. */ 1254 /* Look for timer events in tv1. */
1249 index = slot = timer_jiffies & TVR_MASK; 1255 index = slot = timer_jiffies & TVR_MASK;
1250 do { 1256 do {
1251 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1257 hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
1252 if (tbase_get_deferrable(nte->base)) 1258 if (nte->flags & TIMER_DEFERRABLE)
1253 continue; 1259 continue;
1254 1260
1255 found = 1; 1261 found = 1;
@@ -1279,8 +1285,8 @@ cascade:
1279 1285
1280 index = slot = timer_jiffies & TVN_MASK; 1286 index = slot = timer_jiffies & TVN_MASK;
1281 do { 1287 do {
1282 list_for_each_entry(nte, varp->vec + slot, entry) { 1288 hlist_for_each_entry(nte, varp->vec + slot, entry) {
1283 if (tbase_get_deferrable(nte->base)) 1289 if (nte->flags & TIMER_DEFERRABLE)
1284 continue; 1290 continue;
1285 1291
1286 found = 1; 1292 found = 1;
@@ -1311,54 +1317,48 @@ cascade:
1311 * Check, if the next hrtimer event is before the next timer wheel 1317 * Check, if the next hrtimer event is before the next timer wheel
1312 * event: 1318 * event:
1313 */ 1319 */
1314static unsigned long cmp_next_hrtimer_event(unsigned long now, 1320static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1315 unsigned long expires)
1316{ 1321{
1317 ktime_t hr_delta = hrtimer_get_next_event(); 1322 u64 nextevt = hrtimer_get_next_event();
1318 struct timespec tsdelta;
1319 unsigned long delta;
1320
1321 if (hr_delta.tv64 == KTIME_MAX)
1322 return expires;
1323 1323
1324 /* 1324 /*
1325 * Expired timer available, let it expire in the next tick 1325 * If high resolution timers are enabled
1326 * hrtimer_get_next_event() returns KTIME_MAX.
1326 */ 1327 */
1327 if (hr_delta.tv64 <= 0) 1328 if (expires <= nextevt)
1328 return now + 1; 1329 return expires;
1329
1330 tsdelta = ktime_to_timespec(hr_delta);
1331 delta = timespec_to_jiffies(&tsdelta);
1332 1330
1333 /* 1331 /*
1334 * Limit the delta to the max value, which is checked in 1332 * If the next timer is already expired, return the tick base
1335 * tick_nohz_stop_sched_tick(): 1333 * time so the tick is fired immediately.
1336 */ 1334 */
1337 if (delta > NEXT_TIMER_MAX_DELTA) 1335 if (nextevt <= basem)
1338 delta = NEXT_TIMER_MAX_DELTA; 1336 return basem;
1339 1337
1340 /* 1338 /*
1341 * Take rounding errors in to account and make sure, that it 1339 * Round up to the next jiffie. High resolution timers are
1342 * expires in the next tick. Otherwise we go into an endless 1340 * off, so the hrtimers are expired in the tick and we need to
1343 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1341 * make sure that this tick really expires the timer to avoid
1344 * the timer softirq 1342 * a ping pong of the nohz stop code.
1343 *
1344 * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
1345 */ 1345 */
1346 if (delta < 1) 1346 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1347 delta = 1;
1348 now += delta;
1349 if (time_before(now, expires))
1350 return now;
1351 return expires;
1352} 1347}
1353 1348
1354/** 1349/**
1355 * get_next_timer_interrupt - return the jiffy of the next pending timer 1350 * get_next_timer_interrupt - return the time (clock mono) of the next timer
1356 * @now: current time (in jiffies) 1351 * @basej: base time jiffies
1352 * @basem: base time clock monotonic
1353 *
1354 * Returns the tick aligned clock monotonic time of the next pending
1355 * timer or KTIME_MAX if no timer is pending.
1357 */ 1356 */
1358unsigned long get_next_timer_interrupt(unsigned long now) 1357u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1359{ 1358{
1360 struct tvec_base *base = __this_cpu_read(tvec_bases); 1359 struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1361 unsigned long expires = now + NEXT_TIMER_MAX_DELTA; 1360 u64 expires = KTIME_MAX;
1361 unsigned long nextevt;
1362 1362
1363 /* 1363 /*
1364 * Pretend that there is no timer pending if the cpu is offline. 1364 * Pretend that there is no timer pending if the cpu is offline.
@@ -1371,14 +1371,15 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1371 if (base->active_timers) { 1371 if (base->active_timers) {
1372 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1372 if (time_before_eq(base->next_timer, base->timer_jiffies))
1373 base->next_timer = __next_timer_interrupt(base); 1373 base->next_timer = __next_timer_interrupt(base);
1374 expires = base->next_timer; 1374 nextevt = base->next_timer;
1375 if (time_before_eq(nextevt, basej))
1376 expires = basem;
1377 else
1378 expires = basem + (nextevt - basej) * TICK_NSEC;
1375 } 1379 }
1376 spin_unlock(&base->lock); 1380 spin_unlock(&base->lock);
1377 1381
1378 if (time_before_eq(expires, now)) 1382 return cmp_next_hrtimer_event(basem, expires);
1379 return now;
1380
1381 return cmp_next_hrtimer_event(now, expires);
1382} 1383}
1383#endif 1384#endif
1384 1385
@@ -1407,9 +1408,7 @@ void update_process_times(int user_tick)
1407 */ 1408 */
1408static void run_timer_softirq(struct softirq_action *h) 1409static void run_timer_softirq(struct softirq_action *h)
1409{ 1410{
1410 struct tvec_base *base = __this_cpu_read(tvec_bases); 1411 struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1411
1412 hrtimer_run_pending();
1413 1412
1414 if (time_after_eq(jiffies, base->timer_jiffies)) 1413 if (time_after_eq(jiffies, base->timer_jiffies))
1415 __run_timers(base); 1414 __run_timers(base);
@@ -1545,15 +1544,16 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1545EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1544EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1546 1545
1547#ifdef CONFIG_HOTPLUG_CPU 1546#ifdef CONFIG_HOTPLUG_CPU
1548static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1547static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
1549{ 1548{
1550 struct timer_list *timer; 1549 struct timer_list *timer;
1550 int cpu = new_base->cpu;
1551 1551
1552 while (!list_empty(head)) { 1552 while (!hlist_empty(head)) {
1553 timer = list_first_entry(head, struct timer_list, entry); 1553 timer = hlist_entry(head->first, struct timer_list, entry);
1554 /* We ignore the accounting on the dying cpu */ 1554 /* We ignore the accounting on the dying cpu */
1555 detach_timer(timer, false); 1555 detach_timer(timer, false);
1556 timer_set_base(timer, new_base); 1556 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1557 internal_add_timer(new_base, timer); 1557 internal_add_timer(new_base, timer);
1558 } 1558 }
1559} 1559}
@@ -1565,8 +1565,8 @@ static void migrate_timers(int cpu)
1565 int i; 1565 int i;
1566 1566
1567 BUG_ON(cpu_online(cpu)); 1567 BUG_ON(cpu_online(cpu));
1568 old_base = per_cpu(tvec_bases, cpu); 1568 old_base = per_cpu_ptr(&tvec_bases, cpu);
1569 new_base = get_cpu_var(tvec_bases); 1569 new_base = this_cpu_ptr(&tvec_bases);
1570 /* 1570 /*
1571 * The caller is globally serialized and nobody else 1571 * The caller is globally serialized and nobody else
1572 * takes two locks at once, deadlock is not possible. 1572 * takes two locks at once, deadlock is not possible.
@@ -1590,7 +1590,6 @@ static void migrate_timers(int cpu)
1590 1590
1591 spin_unlock(&old_base->lock); 1591 spin_unlock(&old_base->lock);
1592 spin_unlock_irq(&new_base->lock); 1592 spin_unlock_irq(&new_base->lock);
1593 put_cpu_var(tvec_bases);
1594} 1593}
1595 1594
1596static int timer_cpu_notify(struct notifier_block *self, 1595static int timer_cpu_notify(struct notifier_block *self,
@@ -1616,52 +1615,27 @@ static inline void timer_register_cpu_notifier(void)
1616static inline void timer_register_cpu_notifier(void) { } 1615static inline void timer_register_cpu_notifier(void) { }
1617#endif /* CONFIG_HOTPLUG_CPU */ 1616#endif /* CONFIG_HOTPLUG_CPU */
1618 1617
1619static void __init init_timer_cpu(struct tvec_base *base, int cpu) 1618static void __init init_timer_cpu(int cpu)
1620{ 1619{
1621 int j; 1620 struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
1622
1623 BUG_ON(base != tbase_get_base(base));
1624 1621
1625 base->cpu = cpu; 1622 base->cpu = cpu;
1626 per_cpu(tvec_bases, cpu) = base;
1627 spin_lock_init(&base->lock); 1623 spin_lock_init(&base->lock);
1628 1624
1629 for (j = 0; j < TVN_SIZE; j++) {
1630 INIT_LIST_HEAD(base->tv5.vec + j);
1631 INIT_LIST_HEAD(base->tv4.vec + j);
1632 INIT_LIST_HEAD(base->tv3.vec + j);
1633 INIT_LIST_HEAD(base->tv2.vec + j);
1634 }
1635 for (j = 0; j < TVR_SIZE; j++)
1636 INIT_LIST_HEAD(base->tv1.vec + j);
1637
1638 base->timer_jiffies = jiffies; 1625 base->timer_jiffies = jiffies;
1639 base->next_timer = base->timer_jiffies; 1626 base->next_timer = base->timer_jiffies;
1640} 1627}
1641 1628
1642static void __init init_timer_cpus(void) 1629static void __init init_timer_cpus(void)
1643{ 1630{
1644 struct tvec_base *base;
1645 int local_cpu = smp_processor_id();
1646 int cpu; 1631 int cpu;
1647 1632
1648 for_each_possible_cpu(cpu) { 1633 for_each_possible_cpu(cpu)
1649 if (cpu == local_cpu) 1634 init_timer_cpu(cpu);
1650 base = &boot_tvec_bases;
1651#ifdef CONFIG_SMP
1652 else
1653 base = per_cpu_ptr(&__tvec_bases, cpu);
1654#endif
1655
1656 init_timer_cpu(base, cpu);
1657 }
1658} 1635}
1659 1636
1660void __init init_timers(void) 1637void __init init_timers(void)
1661{ 1638{
1662 /* ensure there are enough low bits for flags in timer->base pointer */
1663 BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
1664
1665 init_timer_cpus(); 1639 init_timer_cpus();
1666 init_timer_stats(); 1640 init_timer_stats();
1667 timer_register_cpu_notifier(); 1641 timer_register_cpu_notifier();
@@ -1697,14 +1671,14 @@ unsigned long msleep_interruptible(unsigned int msecs)
1697 1671
1698EXPORT_SYMBOL(msleep_interruptible); 1672EXPORT_SYMBOL(msleep_interruptible);
1699 1673
1700static int __sched do_usleep_range(unsigned long min, unsigned long max) 1674static void __sched do_usleep_range(unsigned long min, unsigned long max)
1701{ 1675{
1702 ktime_t kmin; 1676 ktime_t kmin;
1703 unsigned long delta; 1677 unsigned long delta;
1704 1678
1705 kmin = ktime_set(0, min * NSEC_PER_USEC); 1679 kmin = ktime_set(0, min * NSEC_PER_USEC);
1706 delta = (max - min) * NSEC_PER_USEC; 1680 delta = (max - min) * NSEC_PER_USEC;
1707 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1681 schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1708} 1682}
1709 1683
1710/** 1684/**
@@ -1712,7 +1686,7 @@ static int __sched do_usleep_range(unsigned long min, unsigned long max)
1712 * @min: Minimum time in usecs to sleep 1686 * @min: Minimum time in usecs to sleep
1713 * @max: Maximum time in usecs to sleep 1687 * @max: Maximum time in usecs to sleep
1714 */ 1688 */
1715void usleep_range(unsigned long min, unsigned long max) 1689void __sched usleep_range(unsigned long min, unsigned long max)
1716{ 1690{
1717 __set_current_state(TASK_UNINTERRUPTIBLE); 1691 __set_current_state(TASK_UNINTERRUPTIBLE);
1718 do_usleep_range(min, max); 1692 do_usleep_range(min, max);