diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 21:57:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 21:57:44 -0400 |
commit | 43224b96af3154cedd7220f7b90094905f07ac78 (patch) | |
tree | 44279acc4613b314ff031620fd62641db3c85b71 /kernel/time/timer.c | |
parent | d70b3ef54ceaf1c7c92209f5a662a670d04cbed9 (diff) | |
parent | 1cb6c2151850584ee805fdcf088af0bb81f4b086 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner:
"A rather largish update for everything time and timer related:
- Cache footprint optimizations for both hrtimers and timer wheel
- Lower the NOHZ impact on systems which have NOHZ or timer migration
disabled at runtime.
- Optimize run time overhead of hrtimer interrupt by making the clock
offset updates smarter
- hrtimer cleanups and removal of restrictions to tackle some
problems in sched/perf
- Some more leap second tweaks
- Another round of changes addressing the 2038 problem
- First step to change the internals of clock event devices by
introducing the necessary infrastructure
- Allow constant folding for usecs/msecs_to_jiffies()
- The usual pile of clockevent/clocksource driver updates
The hrtimer changes contain updates to sched, perf and x86 as they
depend on them plus changes all over the tree to cleanup API changes
and redundant code, which got copied all over the place. The y2038
changes touch s390 to remove the last non 2038 safe code related to
boot/persistant clock"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits)
clocksource: Increase dependencies of timer-stm32 to limit build wreckage
timer: Minimize nohz off overhead
timer: Reduce timer migration overhead if disabled
timer: Stats: Simplify the flags handling
timer: Replace timer base by a cpu index
timer: Use hlist for the timer wheel hash buckets
timer: Remove FIFO "guarantee"
timers: Sanitize catchup_timer_jiffies() usage
hrtimer: Allow hrtimer::function() to free the timer
seqcount: Introduce raw_write_seqcount_barrier()
seqcount: Rename write_seqcount_barrier()
hrtimer: Fix hrtimer_is_queued() hole
hrtimer: Remove HRTIMER_STATE_MIGRATE
selftest: Timers: Avoid signal deadlock in leap-a-day
timekeeping: Copy the shadow-timekeeper over the real timekeeper last
clockevents: Check state instead of mode in suspend/resume path
selftests: timers: Add leap-second timer edge testing to leap-a-day.c
ntp: Do leapsecond adjustment in adjtimex read path
time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap second edge
ntp: Introduce and use SECS_PER_DAY macro instead of 86400
...
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r-- | kernel/time/timer.c | 362 |
1 files changed, 168 insertions, 194 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2ece3aa5069c..520499dd85af 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -49,6 +49,8 @@ | |||
49 | #include <asm/timex.h> | 49 | #include <asm/timex.h> |
50 | #include <asm/io.h> | 50 | #include <asm/io.h> |
51 | 51 | ||
52 | #include "tick-internal.h" | ||
53 | |||
52 | #define CREATE_TRACE_POINTS | 54 | #define CREATE_TRACE_POINTS |
53 | #include <trace/events/timer.h> | 55 | #include <trace/events/timer.h> |
54 | 56 | ||
@@ -68,11 +70,11 @@ EXPORT_SYMBOL(jiffies_64); | |||
68 | #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) | 70 | #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) |
69 | 71 | ||
70 | struct tvec { | 72 | struct tvec { |
71 | struct list_head vec[TVN_SIZE]; | 73 | struct hlist_head vec[TVN_SIZE]; |
72 | }; | 74 | }; |
73 | 75 | ||
74 | struct tvec_root { | 76 | struct tvec_root { |
75 | struct list_head vec[TVR_SIZE]; | 77 | struct hlist_head vec[TVR_SIZE]; |
76 | }; | 78 | }; |
77 | 79 | ||
78 | struct tvec_base { | 80 | struct tvec_base { |
@@ -83,6 +85,8 @@ struct tvec_base { | |||
83 | unsigned long active_timers; | 85 | unsigned long active_timers; |
84 | unsigned long all_timers; | 86 | unsigned long all_timers; |
85 | int cpu; | 87 | int cpu; |
88 | bool migration_enabled; | ||
89 | bool nohz_active; | ||
86 | struct tvec_root tv1; | 90 | struct tvec_root tv1; |
87 | struct tvec tv2; | 91 | struct tvec tv2; |
88 | struct tvec tv3; | 92 | struct tvec tv3; |
@@ -90,43 +94,60 @@ struct tvec_base { | |||
90 | struct tvec tv5; | 94 | struct tvec tv5; |
91 | } ____cacheline_aligned; | 95 | } ____cacheline_aligned; |
92 | 96 | ||
93 | /* | ||
94 | * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've | ||
95 | * made NULL special, hint: lock_timer_base()) and we cannot get a compile time | ||
96 | * pointer to per-cpu entries because we don't know where we'll map the section, | ||
97 | * even for the boot cpu. | ||
98 | * | ||
99 | * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the | ||
100 | * rest of them. | ||
101 | */ | ||
102 | struct tvec_base boot_tvec_bases; | ||
103 | EXPORT_SYMBOL(boot_tvec_bases); | ||
104 | 97 | ||
105 | static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | 98 | static DEFINE_PER_CPU(struct tvec_base, tvec_bases); |
99 | |||
100 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | ||
101 | unsigned int sysctl_timer_migration = 1; | ||
106 | 102 | ||
107 | /* Functions below help us manage 'deferrable' flag */ | 103 | void timers_update_migration(bool update_nohz) |
108 | static inline unsigned int tbase_get_deferrable(struct tvec_base *base) | ||
109 | { | 104 | { |
110 | return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); | 105 | bool on = sysctl_timer_migration && tick_nohz_active; |
106 | unsigned int cpu; | ||
107 | |||
108 | /* Avoid the loop, if nothing to update */ | ||
109 | if (this_cpu_read(tvec_bases.migration_enabled) == on) | ||
110 | return; | ||
111 | |||
112 | for_each_possible_cpu(cpu) { | ||
113 | per_cpu(tvec_bases.migration_enabled, cpu) = on; | ||
114 | per_cpu(hrtimer_bases.migration_enabled, cpu) = on; | ||
115 | if (!update_nohz) | ||
116 | continue; | ||
117 | per_cpu(tvec_bases.nohz_active, cpu) = true; | ||
118 | per_cpu(hrtimer_bases.nohz_active, cpu) = true; | ||
119 | } | ||
111 | } | 120 | } |
112 | 121 | ||
113 | static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) | 122 | int timer_migration_handler(struct ctl_table *table, int write, |
123 | void __user *buffer, size_t *lenp, | ||
124 | loff_t *ppos) | ||
114 | { | 125 | { |
115 | return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); | 126 | static DEFINE_MUTEX(mutex); |
127 | int ret; | ||
128 | |||
129 | mutex_lock(&mutex); | ||
130 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
131 | if (!ret && write) | ||
132 | timers_update_migration(false); | ||
133 | mutex_unlock(&mutex); | ||
134 | return ret; | ||
116 | } | 135 | } |
117 | 136 | ||
118 | static inline struct tvec_base *tbase_get_base(struct tvec_base *base) | 137 | static inline struct tvec_base *get_target_base(struct tvec_base *base, |
138 | int pinned) | ||
119 | { | 139 | { |
120 | return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); | 140 | if (pinned || !base->migration_enabled) |
141 | return this_cpu_ptr(&tvec_bases); | ||
142 | return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); | ||
121 | } | 143 | } |
122 | 144 | #else | |
123 | static inline void | 145 | static inline struct tvec_base *get_target_base(struct tvec_base *base, |
124 | timer_set_base(struct timer_list *timer, struct tvec_base *new_base) | 146 | int pinned) |
125 | { | 147 | { |
126 | unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; | 148 | return this_cpu_ptr(&tvec_bases); |
127 | |||
128 | timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags); | ||
129 | } | 149 | } |
150 | #endif | ||
130 | 151 | ||
131 | static unsigned long round_jiffies_common(unsigned long j, int cpu, | 152 | static unsigned long round_jiffies_common(unsigned long j, int cpu, |
132 | bool force_up) | 153 | bool force_up) |
@@ -349,26 +370,12 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) | |||
349 | } | 370 | } |
350 | EXPORT_SYMBOL_GPL(set_timer_slack); | 371 | EXPORT_SYMBOL_GPL(set_timer_slack); |
351 | 372 | ||
352 | /* | ||
353 | * If the list is empty, catch up ->timer_jiffies to the current time. | ||
354 | * The caller must hold the tvec_base lock. Returns true if the list | ||
355 | * was empty and therefore ->timer_jiffies was updated. | ||
356 | */ | ||
357 | static bool catchup_timer_jiffies(struct tvec_base *base) | ||
358 | { | ||
359 | if (!base->all_timers) { | ||
360 | base->timer_jiffies = jiffies; | ||
361 | return true; | ||
362 | } | ||
363 | return false; | ||
364 | } | ||
365 | |||
366 | static void | 373 | static void |
367 | __internal_add_timer(struct tvec_base *base, struct timer_list *timer) | 374 | __internal_add_timer(struct tvec_base *base, struct timer_list *timer) |
368 | { | 375 | { |
369 | unsigned long expires = timer->expires; | 376 | unsigned long expires = timer->expires; |
370 | unsigned long idx = expires - base->timer_jiffies; | 377 | unsigned long idx = expires - base->timer_jiffies; |
371 | struct list_head *vec; | 378 | struct hlist_head *vec; |
372 | 379 | ||
373 | if (idx < TVR_SIZE) { | 380 | if (idx < TVR_SIZE) { |
374 | int i = expires & TVR_MASK; | 381 | int i = expires & TVR_MASK; |
@@ -401,25 +408,25 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) | |||
401 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; | 408 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; |
402 | vec = base->tv5.vec + i; | 409 | vec = base->tv5.vec + i; |
403 | } | 410 | } |
404 | /* | 411 | |
405 | * Timers are FIFO: | 412 | hlist_add_head(&timer->entry, vec); |
406 | */ | ||
407 | list_add_tail(&timer->entry, vec); | ||
408 | } | 413 | } |
409 | 414 | ||
410 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | 415 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) |
411 | { | 416 | { |
412 | (void)catchup_timer_jiffies(base); | 417 | /* Advance base->jiffies, if the base is empty */ |
418 | if (!base->all_timers++) | ||
419 | base->timer_jiffies = jiffies; | ||
420 | |||
413 | __internal_add_timer(base, timer); | 421 | __internal_add_timer(base, timer); |
414 | /* | 422 | /* |
415 | * Update base->active_timers and base->next_timer | 423 | * Update base->active_timers and base->next_timer |
416 | */ | 424 | */ |
417 | if (!tbase_get_deferrable(timer->base)) { | 425 | if (!(timer->flags & TIMER_DEFERRABLE)) { |
418 | if (!base->active_timers++ || | 426 | if (!base->active_timers++ || |
419 | time_before(timer->expires, base->next_timer)) | 427 | time_before(timer->expires, base->next_timer)) |
420 | base->next_timer = timer->expires; | 428 | base->next_timer = timer->expires; |
421 | } | 429 | } |
422 | base->all_timers++; | ||
423 | 430 | ||
424 | /* | 431 | /* |
425 | * Check whether the other CPU is in dynticks mode and needs | 432 | * Check whether the other CPU is in dynticks mode and needs |
@@ -434,8 +441,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | |||
434 | * require special care against races with idle_cpu(), lets deal | 441 | * require special care against races with idle_cpu(), lets deal |
435 | * with that later. | 442 | * with that later. |
436 | */ | 443 | */ |
437 | if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu)) | 444 | if (base->nohz_active) { |
438 | wake_up_nohz_cpu(base->cpu); | 445 | if (!(timer->flags & TIMER_DEFERRABLE) || |
446 | tick_nohz_full_cpu(base->cpu)) | ||
447 | wake_up_nohz_cpu(base->cpu); | ||
448 | } | ||
439 | } | 449 | } |
440 | 450 | ||
441 | #ifdef CONFIG_TIMER_STATS | 451 | #ifdef CONFIG_TIMER_STATS |
@@ -451,15 +461,12 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | |||
451 | 461 | ||
452 | static void timer_stats_account_timer(struct timer_list *timer) | 462 | static void timer_stats_account_timer(struct timer_list *timer) |
453 | { | 463 | { |
454 | unsigned int flag = 0; | ||
455 | |||
456 | if (likely(!timer->start_site)) | 464 | if (likely(!timer->start_site)) |
457 | return; | 465 | return; |
458 | if (unlikely(tbase_get_deferrable(timer->base))) | ||
459 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | ||
460 | 466 | ||
461 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | 467 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, |
462 | timer->function, timer->start_comm, flag); | 468 | timer->function, timer->start_comm, |
469 | timer->flags); | ||
463 | } | 470 | } |
464 | 471 | ||
465 | #else | 472 | #else |
@@ -516,8 +523,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state) | |||
516 | * statically initialized. We just make sure that it | 523 | * statically initialized. We just make sure that it |
517 | * is tracked in the object tracker. | 524 | * is tracked in the object tracker. |
518 | */ | 525 | */ |
519 | if (timer->entry.next == NULL && | 526 | if (timer->entry.pprev == NULL && |
520 | timer->entry.prev == TIMER_ENTRY_STATIC) { | 527 | timer->entry.next == TIMER_ENTRY_STATIC) { |
521 | debug_object_init(timer, &timer_debug_descr); | 528 | debug_object_init(timer, &timer_debug_descr); |
522 | debug_object_activate(timer, &timer_debug_descr); | 529 | debug_object_activate(timer, &timer_debug_descr); |
523 | return 0; | 530 | return 0; |
@@ -563,7 +570,7 @@ static int timer_fixup_assert_init(void *addr, enum debug_obj_state state) | |||
563 | 570 | ||
564 | switch (state) { | 571 | switch (state) { |
565 | case ODEBUG_STATE_NOTAVAILABLE: | 572 | case ODEBUG_STATE_NOTAVAILABLE: |
566 | if (timer->entry.prev == TIMER_ENTRY_STATIC) { | 573 | if (timer->entry.next == TIMER_ENTRY_STATIC) { |
567 | /* | 574 | /* |
568 | * This is not really a fixup. The timer was | 575 | * This is not really a fixup. The timer was |
569 | * statically initialized. We just make sure that it | 576 | * statically initialized. We just make sure that it |
@@ -648,7 +655,7 @@ static inline void | |||
648 | debug_activate(struct timer_list *timer, unsigned long expires) | 655 | debug_activate(struct timer_list *timer, unsigned long expires) |
649 | { | 656 | { |
650 | debug_timer_activate(timer); | 657 | debug_timer_activate(timer); |
651 | trace_timer_start(timer, expires); | 658 | trace_timer_start(timer, expires, timer->flags); |
652 | } | 659 | } |
653 | 660 | ||
654 | static inline void debug_deactivate(struct timer_list *timer) | 661 | static inline void debug_deactivate(struct timer_list *timer) |
@@ -665,10 +672,8 @@ static inline void debug_assert_init(struct timer_list *timer) | |||
665 | static void do_init_timer(struct timer_list *timer, unsigned int flags, | 672 | static void do_init_timer(struct timer_list *timer, unsigned int flags, |
666 | const char *name, struct lock_class_key *key) | 673 | const char *name, struct lock_class_key *key) |
667 | { | 674 | { |
668 | struct tvec_base *base = raw_cpu_read(tvec_bases); | 675 | timer->entry.pprev = NULL; |
669 | 676 | timer->flags = flags | raw_smp_processor_id(); | |
670 | timer->entry.next = NULL; | ||
671 | timer->base = (void *)((unsigned long)base | flags); | ||
672 | timer->slack = -1; | 677 | timer->slack = -1; |
673 | #ifdef CONFIG_TIMER_STATS | 678 | #ifdef CONFIG_TIMER_STATS |
674 | timer->start_site = NULL; | 679 | timer->start_site = NULL; |
@@ -699,24 +704,23 @@ EXPORT_SYMBOL(init_timer_key); | |||
699 | 704 | ||
700 | static inline void detach_timer(struct timer_list *timer, bool clear_pending) | 705 | static inline void detach_timer(struct timer_list *timer, bool clear_pending) |
701 | { | 706 | { |
702 | struct list_head *entry = &timer->entry; | 707 | struct hlist_node *entry = &timer->entry; |
703 | 708 | ||
704 | debug_deactivate(timer); | 709 | debug_deactivate(timer); |
705 | 710 | ||
706 | __list_del(entry->prev, entry->next); | 711 | __hlist_del(entry); |
707 | if (clear_pending) | 712 | if (clear_pending) |
708 | entry->next = NULL; | 713 | entry->pprev = NULL; |
709 | entry->prev = LIST_POISON2; | 714 | entry->next = LIST_POISON2; |
710 | } | 715 | } |
711 | 716 | ||
712 | static inline void | 717 | static inline void |
713 | detach_expired_timer(struct timer_list *timer, struct tvec_base *base) | 718 | detach_expired_timer(struct timer_list *timer, struct tvec_base *base) |
714 | { | 719 | { |
715 | detach_timer(timer, true); | 720 | detach_timer(timer, true); |
716 | if (!tbase_get_deferrable(timer->base)) | 721 | if (!(timer->flags & TIMER_DEFERRABLE)) |
717 | base->active_timers--; | 722 | base->active_timers--; |
718 | base->all_timers--; | 723 | base->all_timers--; |
719 | (void)catchup_timer_jiffies(base); | ||
720 | } | 724 | } |
721 | 725 | ||
722 | static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, | 726 | static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, |
@@ -726,13 +730,14 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, | |||
726 | return 0; | 730 | return 0; |
727 | 731 | ||
728 | detach_timer(timer, clear_pending); | 732 | detach_timer(timer, clear_pending); |
729 | if (!tbase_get_deferrable(timer->base)) { | 733 | if (!(timer->flags & TIMER_DEFERRABLE)) { |
730 | base->active_timers--; | 734 | base->active_timers--; |
731 | if (timer->expires == base->next_timer) | 735 | if (timer->expires == base->next_timer) |
732 | base->next_timer = base->timer_jiffies; | 736 | base->next_timer = base->timer_jiffies; |
733 | } | 737 | } |
734 | base->all_timers--; | 738 | /* If this was the last timer, advance base->jiffies */ |
735 | (void)catchup_timer_jiffies(base); | 739 | if (!--base->all_timers) |
740 | base->timer_jiffies = jiffies; | ||
736 | return 1; | 741 | return 1; |
737 | } | 742 | } |
738 | 743 | ||
@@ -744,24 +749,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, | |||
744 | * So __run_timers/migrate_timers can safely modify all timers which could | 749 | * So __run_timers/migrate_timers can safely modify all timers which could |
745 | * be found on ->tvX lists. | 750 | * be found on ->tvX lists. |
746 | * | 751 | * |
747 | * When the timer's base is locked, and the timer removed from list, it is | 752 | * When the timer's base is locked and removed from the list, the |
748 | * possible to set timer->base = NULL and drop the lock: the timer remains | 753 | * TIMER_MIGRATING flag is set, FIXME |
749 | * locked. | ||
750 | */ | 754 | */ |
751 | static struct tvec_base *lock_timer_base(struct timer_list *timer, | 755 | static struct tvec_base *lock_timer_base(struct timer_list *timer, |
752 | unsigned long *flags) | 756 | unsigned long *flags) |
753 | __acquires(timer->base->lock) | 757 | __acquires(timer->base->lock) |
754 | { | 758 | { |
755 | struct tvec_base *base; | ||
756 | |||
757 | for (;;) { | 759 | for (;;) { |
758 | struct tvec_base *prelock_base = timer->base; | 760 | u32 tf = timer->flags; |
759 | base = tbase_get_base(prelock_base); | 761 | struct tvec_base *base; |
760 | if (likely(base != NULL)) { | 762 | |
763 | if (!(tf & TIMER_MIGRATING)) { | ||
764 | base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); | ||
761 | spin_lock_irqsave(&base->lock, *flags); | 765 | spin_lock_irqsave(&base->lock, *flags); |
762 | if (likely(prelock_base == timer->base)) | 766 | if (timer->flags == tf) |
763 | return base; | 767 | return base; |
764 | /* The timer has migrated to another CPU */ | ||
765 | spin_unlock_irqrestore(&base->lock, *flags); | 768 | spin_unlock_irqrestore(&base->lock, *flags); |
766 | } | 769 | } |
767 | cpu_relax(); | 770 | cpu_relax(); |
@@ -770,11 +773,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, | |||
770 | 773 | ||
771 | static inline int | 774 | static inline int |
772 | __mod_timer(struct timer_list *timer, unsigned long expires, | 775 | __mod_timer(struct timer_list *timer, unsigned long expires, |
773 | bool pending_only, int pinned) | 776 | bool pending_only, int pinned) |
774 | { | 777 | { |
775 | struct tvec_base *base, *new_base; | 778 | struct tvec_base *base, *new_base; |
776 | unsigned long flags; | 779 | unsigned long flags; |
777 | int ret = 0 , cpu; | 780 | int ret = 0; |
778 | 781 | ||
779 | timer_stats_timer_set_start_info(timer); | 782 | timer_stats_timer_set_start_info(timer); |
780 | BUG_ON(!timer->function); | 783 | BUG_ON(!timer->function); |
@@ -787,8 +790,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
787 | 790 | ||
788 | debug_activate(timer, expires); | 791 | debug_activate(timer, expires); |
789 | 792 | ||
790 | cpu = get_nohz_timer_target(pinned); | 793 | new_base = get_target_base(base, pinned); |
791 | new_base = per_cpu(tvec_bases, cpu); | ||
792 | 794 | ||
793 | if (base != new_base) { | 795 | if (base != new_base) { |
794 | /* | 796 | /* |
@@ -800,11 +802,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
800 | */ | 802 | */ |
801 | if (likely(base->running_timer != timer)) { | 803 | if (likely(base->running_timer != timer)) { |
802 | /* See the comment in lock_timer_base() */ | 804 | /* See the comment in lock_timer_base() */ |
803 | timer_set_base(timer, NULL); | 805 | timer->flags |= TIMER_MIGRATING; |
806 | |||
804 | spin_unlock(&base->lock); | 807 | spin_unlock(&base->lock); |
805 | base = new_base; | 808 | base = new_base; |
806 | spin_lock(&base->lock); | 809 | spin_lock(&base->lock); |
807 | timer_set_base(timer, base); | 810 | timer->flags &= ~TIMER_BASEMASK; |
811 | timer->flags |= base->cpu; | ||
808 | } | 812 | } |
809 | } | 813 | } |
810 | 814 | ||
@@ -966,13 +970,13 @@ EXPORT_SYMBOL(add_timer); | |||
966 | */ | 970 | */ |
967 | void add_timer_on(struct timer_list *timer, int cpu) | 971 | void add_timer_on(struct timer_list *timer, int cpu) |
968 | { | 972 | { |
969 | struct tvec_base *base = per_cpu(tvec_bases, cpu); | 973 | struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); |
970 | unsigned long flags; | 974 | unsigned long flags; |
971 | 975 | ||
972 | timer_stats_timer_set_start_info(timer); | 976 | timer_stats_timer_set_start_info(timer); |
973 | BUG_ON(timer_pending(timer) || !timer->function); | 977 | BUG_ON(timer_pending(timer) || !timer->function); |
974 | spin_lock_irqsave(&base->lock, flags); | 978 | spin_lock_irqsave(&base->lock, flags); |
975 | timer_set_base(timer, base); | 979 | timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; |
976 | debug_activate(timer, timer->expires); | 980 | debug_activate(timer, timer->expires); |
977 | internal_add_timer(base, timer); | 981 | internal_add_timer(base, timer); |
978 | spin_unlock_irqrestore(&base->lock, flags); | 982 | spin_unlock_irqrestore(&base->lock, flags); |
@@ -1037,8 +1041,6 @@ int try_to_del_timer_sync(struct timer_list *timer) | |||
1037 | EXPORT_SYMBOL(try_to_del_timer_sync); | 1041 | EXPORT_SYMBOL(try_to_del_timer_sync); |
1038 | 1042 | ||
1039 | #ifdef CONFIG_SMP | 1043 | #ifdef CONFIG_SMP |
1040 | static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); | ||
1041 | |||
1042 | /** | 1044 | /** |
1043 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 1045 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
1044 | * @timer: the timer to be deactivated | 1046 | * @timer: the timer to be deactivated |
@@ -1093,7 +1095,7 @@ int del_timer_sync(struct timer_list *timer) | |||
1093 | * don't use it in hardirq context, because it | 1095 | * don't use it in hardirq context, because it |
1094 | * could lead to deadlock. | 1096 | * could lead to deadlock. |
1095 | */ | 1097 | */ |
1096 | WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); | 1098 | WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); |
1097 | for (;;) { | 1099 | for (;;) { |
1098 | int ret = try_to_del_timer_sync(timer); | 1100 | int ret = try_to_del_timer_sync(timer); |
1099 | if (ret >= 0) | 1101 | if (ret >= 0) |
@@ -1107,17 +1109,17 @@ EXPORT_SYMBOL(del_timer_sync); | |||
1107 | static int cascade(struct tvec_base *base, struct tvec *tv, int index) | 1109 | static int cascade(struct tvec_base *base, struct tvec *tv, int index) |
1108 | { | 1110 | { |
1109 | /* cascade all the timers from tv up one level */ | 1111 | /* cascade all the timers from tv up one level */ |
1110 | struct timer_list *timer, *tmp; | 1112 | struct timer_list *timer; |
1111 | struct list_head tv_list; | 1113 | struct hlist_node *tmp; |
1114 | struct hlist_head tv_list; | ||
1112 | 1115 | ||
1113 | list_replace_init(tv->vec + index, &tv_list); | 1116 | hlist_move_list(tv->vec + index, &tv_list); |
1114 | 1117 | ||
1115 | /* | 1118 | /* |
1116 | * We are removing _all_ timers from the list, so we | 1119 | * We are removing _all_ timers from the list, so we |
1117 | * don't have to detach them individually. | 1120 | * don't have to detach them individually. |
1118 | */ | 1121 | */ |
1119 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { | 1122 | hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
1120 | BUG_ON(tbase_get_base(timer->base) != base); | ||
1121 | /* No accounting, while moving them */ | 1123 | /* No accounting, while moving them */ |
1122 | __internal_add_timer(base, timer); | 1124 | __internal_add_timer(base, timer); |
1123 | } | 1125 | } |
@@ -1182,14 +1184,18 @@ static inline void __run_timers(struct tvec_base *base) | |||
1182 | struct timer_list *timer; | 1184 | struct timer_list *timer; |
1183 | 1185 | ||
1184 | spin_lock_irq(&base->lock); | 1186 | spin_lock_irq(&base->lock); |
1185 | if (catchup_timer_jiffies(base)) { | 1187 | |
1186 | spin_unlock_irq(&base->lock); | ||
1187 | return; | ||
1188 | } | ||
1189 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 1188 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
1190 | struct list_head work_list; | 1189 | struct hlist_head work_list; |
1191 | struct list_head *head = &work_list; | 1190 | struct hlist_head *head = &work_list; |
1192 | int index = base->timer_jiffies & TVR_MASK; | 1191 | int index; |
1192 | |||
1193 | if (!base->all_timers) { | ||
1194 | base->timer_jiffies = jiffies; | ||
1195 | break; | ||
1196 | } | ||
1197 | |||
1198 | index = base->timer_jiffies & TVR_MASK; | ||
1193 | 1199 | ||
1194 | /* | 1200 | /* |
1195 | * Cascade timers: | 1201 | * Cascade timers: |
@@ -1200,16 +1206,16 @@ static inline void __run_timers(struct tvec_base *base) | |||
1200 | !cascade(base, &base->tv4, INDEX(2))) | 1206 | !cascade(base, &base->tv4, INDEX(2))) |
1201 | cascade(base, &base->tv5, INDEX(3)); | 1207 | cascade(base, &base->tv5, INDEX(3)); |
1202 | ++base->timer_jiffies; | 1208 | ++base->timer_jiffies; |
1203 | list_replace_init(base->tv1.vec + index, head); | 1209 | hlist_move_list(base->tv1.vec + index, head); |
1204 | while (!list_empty(head)) { | 1210 | while (!hlist_empty(head)) { |
1205 | void (*fn)(unsigned long); | 1211 | void (*fn)(unsigned long); |
1206 | unsigned long data; | 1212 | unsigned long data; |
1207 | bool irqsafe; | 1213 | bool irqsafe; |
1208 | 1214 | ||
1209 | timer = list_first_entry(head, struct timer_list,entry); | 1215 | timer = hlist_entry(head->first, struct timer_list, entry); |
1210 | fn = timer->function; | 1216 | fn = timer->function; |
1211 | data = timer->data; | 1217 | data = timer->data; |
1212 | irqsafe = tbase_get_irqsafe(timer->base); | 1218 | irqsafe = timer->flags & TIMER_IRQSAFE; |
1213 | 1219 | ||
1214 | timer_stats_account_timer(timer); | 1220 | timer_stats_account_timer(timer); |
1215 | 1221 | ||
@@ -1248,8 +1254,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base) | |||
1248 | /* Look for timer events in tv1. */ | 1254 | /* Look for timer events in tv1. */ |
1249 | index = slot = timer_jiffies & TVR_MASK; | 1255 | index = slot = timer_jiffies & TVR_MASK; |
1250 | do { | 1256 | do { |
1251 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { | 1257 | hlist_for_each_entry(nte, base->tv1.vec + slot, entry) { |
1252 | if (tbase_get_deferrable(nte->base)) | 1258 | if (nte->flags & TIMER_DEFERRABLE) |
1253 | continue; | 1259 | continue; |
1254 | 1260 | ||
1255 | found = 1; | 1261 | found = 1; |
@@ -1279,8 +1285,8 @@ cascade: | |||
1279 | 1285 | ||
1280 | index = slot = timer_jiffies & TVN_MASK; | 1286 | index = slot = timer_jiffies & TVN_MASK; |
1281 | do { | 1287 | do { |
1282 | list_for_each_entry(nte, varp->vec + slot, entry) { | 1288 | hlist_for_each_entry(nte, varp->vec + slot, entry) { |
1283 | if (tbase_get_deferrable(nte->base)) | 1289 | if (nte->flags & TIMER_DEFERRABLE) |
1284 | continue; | 1290 | continue; |
1285 | 1291 | ||
1286 | found = 1; | 1292 | found = 1; |
@@ -1311,54 +1317,48 @@ cascade: | |||
1311 | * Check, if the next hrtimer event is before the next timer wheel | 1317 | * Check, if the next hrtimer event is before the next timer wheel |
1312 | * event: | 1318 | * event: |
1313 | */ | 1319 | */ |
1314 | static unsigned long cmp_next_hrtimer_event(unsigned long now, | 1320 | static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) |
1315 | unsigned long expires) | ||
1316 | { | 1321 | { |
1317 | ktime_t hr_delta = hrtimer_get_next_event(); | 1322 | u64 nextevt = hrtimer_get_next_event(); |
1318 | struct timespec tsdelta; | ||
1319 | unsigned long delta; | ||
1320 | |||
1321 | if (hr_delta.tv64 == KTIME_MAX) | ||
1322 | return expires; | ||
1323 | 1323 | ||
1324 | /* | 1324 | /* |
1325 | * Expired timer available, let it expire in the next tick | 1325 | * If high resolution timers are enabled |
1326 | * hrtimer_get_next_event() returns KTIME_MAX. | ||
1326 | */ | 1327 | */ |
1327 | if (hr_delta.tv64 <= 0) | 1328 | if (expires <= nextevt) |
1328 | return now + 1; | 1329 | return expires; |
1329 | |||
1330 | tsdelta = ktime_to_timespec(hr_delta); | ||
1331 | delta = timespec_to_jiffies(&tsdelta); | ||
1332 | 1330 | ||
1333 | /* | 1331 | /* |
1334 | * Limit the delta to the max value, which is checked in | 1332 | * If the next timer is already expired, return the tick base |
1335 | * tick_nohz_stop_sched_tick(): | 1333 | * time so the tick is fired immediately. |
1336 | */ | 1334 | */ |
1337 | if (delta > NEXT_TIMER_MAX_DELTA) | 1335 | if (nextevt <= basem) |
1338 | delta = NEXT_TIMER_MAX_DELTA; | 1336 | return basem; |
1339 | 1337 | ||
1340 | /* | 1338 | /* |
1341 | * Take rounding errors in to account and make sure, that it | 1339 | * Round up to the next jiffie. High resolution timers are |
1342 | * expires in the next tick. Otherwise we go into an endless | 1340 | * off, so the hrtimers are expired in the tick and we need to |
1343 | * ping pong due to tick_nohz_stop_sched_tick() retriggering | 1341 | * make sure that this tick really expires the timer to avoid |
1344 | * the timer softirq | 1342 | * a ping pong of the nohz stop code. |
1343 | * | ||
1344 | * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3 | ||
1345 | */ | 1345 | */ |
1346 | if (delta < 1) | 1346 | return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; |
1347 | delta = 1; | ||
1348 | now += delta; | ||
1349 | if (time_before(now, expires)) | ||
1350 | return now; | ||
1351 | return expires; | ||
1352 | } | 1347 | } |
1353 | 1348 | ||
1354 | /** | 1349 | /** |
1355 | * get_next_timer_interrupt - return the jiffy of the next pending timer | 1350 | * get_next_timer_interrupt - return the time (clock mono) of the next timer |
1356 | * @now: current time (in jiffies) | 1351 | * @basej: base time jiffies |
1352 | * @basem: base time clock monotonic | ||
1353 | * | ||
1354 | * Returns the tick aligned clock monotonic time of the next pending | ||
1355 | * timer or KTIME_MAX if no timer is pending. | ||
1357 | */ | 1356 | */ |
1358 | unsigned long get_next_timer_interrupt(unsigned long now) | 1357 | u64 get_next_timer_interrupt(unsigned long basej, u64 basem) |
1359 | { | 1358 | { |
1360 | struct tvec_base *base = __this_cpu_read(tvec_bases); | 1359 | struct tvec_base *base = this_cpu_ptr(&tvec_bases); |
1361 | unsigned long expires = now + NEXT_TIMER_MAX_DELTA; | 1360 | u64 expires = KTIME_MAX; |
1361 | unsigned long nextevt; | ||
1362 | 1362 | ||
1363 | /* | 1363 | /* |
1364 | * Pretend that there is no timer pending if the cpu is offline. | 1364 | * Pretend that there is no timer pending if the cpu is offline. |
@@ -1371,14 +1371,15 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
1371 | if (base->active_timers) { | 1371 | if (base->active_timers) { |
1372 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1372 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
1373 | base->next_timer = __next_timer_interrupt(base); | 1373 | base->next_timer = __next_timer_interrupt(base); |
1374 | expires = base->next_timer; | 1374 | nextevt = base->next_timer; |
1375 | if (time_before_eq(nextevt, basej)) | ||
1376 | expires = basem; | ||
1377 | else | ||
1378 | expires = basem + (nextevt - basej) * TICK_NSEC; | ||
1375 | } | 1379 | } |
1376 | spin_unlock(&base->lock); | 1380 | spin_unlock(&base->lock); |
1377 | 1381 | ||
1378 | if (time_before_eq(expires, now)) | 1382 | return cmp_next_hrtimer_event(basem, expires); |
1379 | return now; | ||
1380 | |||
1381 | return cmp_next_hrtimer_event(now, expires); | ||
1382 | } | 1383 | } |
1383 | #endif | 1384 | #endif |
1384 | 1385 | ||
@@ -1407,9 +1408,7 @@ void update_process_times(int user_tick) | |||
1407 | */ | 1408 | */ |
1408 | static void run_timer_softirq(struct softirq_action *h) | 1409 | static void run_timer_softirq(struct softirq_action *h) |
1409 | { | 1410 | { |
1410 | struct tvec_base *base = __this_cpu_read(tvec_bases); | 1411 | struct tvec_base *base = this_cpu_ptr(&tvec_bases); |
1411 | |||
1412 | hrtimer_run_pending(); | ||
1413 | 1412 | ||
1414 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1413 | if (time_after_eq(jiffies, base->timer_jiffies)) |
1415 | __run_timers(base); | 1414 | __run_timers(base); |
@@ -1545,15 +1544,16 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) | |||
1545 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | 1544 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); |
1546 | 1545 | ||
1547 | #ifdef CONFIG_HOTPLUG_CPU | 1546 | #ifdef CONFIG_HOTPLUG_CPU |
1548 | static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) | 1547 | static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) |
1549 | { | 1548 | { |
1550 | struct timer_list *timer; | 1549 | struct timer_list *timer; |
1550 | int cpu = new_base->cpu; | ||
1551 | 1551 | ||
1552 | while (!list_empty(head)) { | 1552 | while (!hlist_empty(head)) { |
1553 | timer = list_first_entry(head, struct timer_list, entry); | 1553 | timer = hlist_entry(head->first, struct timer_list, entry); |
1554 | /* We ignore the accounting on the dying cpu */ | 1554 | /* We ignore the accounting on the dying cpu */ |
1555 | detach_timer(timer, false); | 1555 | detach_timer(timer, false); |
1556 | timer_set_base(timer, new_base); | 1556 | timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; |
1557 | internal_add_timer(new_base, timer); | 1557 | internal_add_timer(new_base, timer); |
1558 | } | 1558 | } |
1559 | } | 1559 | } |
@@ -1565,8 +1565,8 @@ static void migrate_timers(int cpu) | |||
1565 | int i; | 1565 | int i; |
1566 | 1566 | ||
1567 | BUG_ON(cpu_online(cpu)); | 1567 | BUG_ON(cpu_online(cpu)); |
1568 | old_base = per_cpu(tvec_bases, cpu); | 1568 | old_base = per_cpu_ptr(&tvec_bases, cpu); |
1569 | new_base = get_cpu_var(tvec_bases); | 1569 | new_base = this_cpu_ptr(&tvec_bases); |
1570 | /* | 1570 | /* |
1571 | * The caller is globally serialized and nobody else | 1571 | * The caller is globally serialized and nobody else |
1572 | * takes two locks at once, deadlock is not possible. | 1572 | * takes two locks at once, deadlock is not possible. |
@@ -1590,7 +1590,6 @@ static void migrate_timers(int cpu) | |||
1590 | 1590 | ||
1591 | spin_unlock(&old_base->lock); | 1591 | spin_unlock(&old_base->lock); |
1592 | spin_unlock_irq(&new_base->lock); | 1592 | spin_unlock_irq(&new_base->lock); |
1593 | put_cpu_var(tvec_bases); | ||
1594 | } | 1593 | } |
1595 | 1594 | ||
1596 | static int timer_cpu_notify(struct notifier_block *self, | 1595 | static int timer_cpu_notify(struct notifier_block *self, |
@@ -1616,52 +1615,27 @@ static inline void timer_register_cpu_notifier(void) | |||
1616 | static inline void timer_register_cpu_notifier(void) { } | 1615 | static inline void timer_register_cpu_notifier(void) { } |
1617 | #endif /* CONFIG_HOTPLUG_CPU */ | 1616 | #endif /* CONFIG_HOTPLUG_CPU */ |
1618 | 1617 | ||
1619 | static void __init init_timer_cpu(struct tvec_base *base, int cpu) | 1618 | static void __init init_timer_cpu(int cpu) |
1620 | { | 1619 | { |
1621 | int j; | 1620 | struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); |
1622 | |||
1623 | BUG_ON(base != tbase_get_base(base)); | ||
1624 | 1621 | ||
1625 | base->cpu = cpu; | 1622 | base->cpu = cpu; |
1626 | per_cpu(tvec_bases, cpu) = base; | ||
1627 | spin_lock_init(&base->lock); | 1623 | spin_lock_init(&base->lock); |
1628 | 1624 | ||
1629 | for (j = 0; j < TVN_SIZE; j++) { | ||
1630 | INIT_LIST_HEAD(base->tv5.vec + j); | ||
1631 | INIT_LIST_HEAD(base->tv4.vec + j); | ||
1632 | INIT_LIST_HEAD(base->tv3.vec + j); | ||
1633 | INIT_LIST_HEAD(base->tv2.vec + j); | ||
1634 | } | ||
1635 | for (j = 0; j < TVR_SIZE; j++) | ||
1636 | INIT_LIST_HEAD(base->tv1.vec + j); | ||
1637 | |||
1638 | base->timer_jiffies = jiffies; | 1625 | base->timer_jiffies = jiffies; |
1639 | base->next_timer = base->timer_jiffies; | 1626 | base->next_timer = base->timer_jiffies; |
1640 | } | 1627 | } |
1641 | 1628 | ||
1642 | static void __init init_timer_cpus(void) | 1629 | static void __init init_timer_cpus(void) |
1643 | { | 1630 | { |
1644 | struct tvec_base *base; | ||
1645 | int local_cpu = smp_processor_id(); | ||
1646 | int cpu; | 1631 | int cpu; |
1647 | 1632 | ||
1648 | for_each_possible_cpu(cpu) { | 1633 | for_each_possible_cpu(cpu) |
1649 | if (cpu == local_cpu) | 1634 | init_timer_cpu(cpu); |
1650 | base = &boot_tvec_bases; | ||
1651 | #ifdef CONFIG_SMP | ||
1652 | else | ||
1653 | base = per_cpu_ptr(&__tvec_bases, cpu); | ||
1654 | #endif | ||
1655 | |||
1656 | init_timer_cpu(base, cpu); | ||
1657 | } | ||
1658 | } | 1635 | } |
1659 | 1636 | ||
1660 | void __init init_timers(void) | 1637 | void __init init_timers(void) |
1661 | { | 1638 | { |
1662 | /* ensure there are enough low bits for flags in timer->base pointer */ | ||
1663 | BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); | ||
1664 | |||
1665 | init_timer_cpus(); | 1639 | init_timer_cpus(); |
1666 | init_timer_stats(); | 1640 | init_timer_stats(); |
1667 | timer_register_cpu_notifier(); | 1641 | timer_register_cpu_notifier(); |
@@ -1697,14 +1671,14 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
1697 | 1671 | ||
1698 | EXPORT_SYMBOL(msleep_interruptible); | 1672 | EXPORT_SYMBOL(msleep_interruptible); |
1699 | 1673 | ||
1700 | static int __sched do_usleep_range(unsigned long min, unsigned long max) | 1674 | static void __sched do_usleep_range(unsigned long min, unsigned long max) |
1701 | { | 1675 | { |
1702 | ktime_t kmin; | 1676 | ktime_t kmin; |
1703 | unsigned long delta; | 1677 | unsigned long delta; |
1704 | 1678 | ||
1705 | kmin = ktime_set(0, min * NSEC_PER_USEC); | 1679 | kmin = ktime_set(0, min * NSEC_PER_USEC); |
1706 | delta = (max - min) * NSEC_PER_USEC; | 1680 | delta = (max - min) * NSEC_PER_USEC; |
1707 | return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); | 1681 | schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); |
1708 | } | 1682 | } |
1709 | 1683 | ||
1710 | /** | 1684 | /** |
@@ -1712,7 +1686,7 @@ static int __sched do_usleep_range(unsigned long min, unsigned long max) | |||
1712 | * @min: Minimum time in usecs to sleep | 1686 | * @min: Minimum time in usecs to sleep |
1713 | * @max: Maximum time in usecs to sleep | 1687 | * @max: Maximum time in usecs to sleep |
1714 | */ | 1688 | */ |
1715 | void usleep_range(unsigned long min, unsigned long max) | 1689 | void __sched usleep_range(unsigned long min, unsigned long max) |
1716 | { | 1690 | { |
1717 | __set_current_state(TASK_UNINTERRUPTIBLE); | 1691 | __set_current_state(TASK_UNINTERRUPTIBLE); |
1718 | do_usleep_range(min, max); | 1692 | do_usleep_range(min, max); |