aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2017-04-21 10:00:54 -0400
committerIngo Molnar <mingo@kernel.org>2017-05-17 02:19:47 -0400
commit411fe24e6b7c283c3a1911450cdba6dd3aaea56e (patch)
treef9cc671e62a3349db965d6b26a124ef1cd758d57
parentce6cf9a15d62fd7ee92f4f9bb754883bacf85a3e (diff)
nohz: Fix collision between tick and other hrtimers, again
This restores commit: 24b91e360ef5: ("nohz: Fix collision between tick and other hrtimers") ... which got reverted by commit: 558e8e27e73f: ('Revert "nohz: Fix collision between tick and other hrtimers"') ... due to a regression where CPUs spuriously stopped ticking. The bug happened when a tick fired too early past its expected expiration: on IRQ exit the tick was scheduled again to the same deadline but skipped reprogramming because ts->next_tick still kept in cache the deadline. This has been fixed now with resetting ts->next_tick from the tick itself. Extra care has also been taken to prevent from obsolete values throughout CPU hotplug operations. When the tick is stopped and an interrupt occurs afterward, we check on that interrupt exit if the next tick needs to be rescheduled. If it doesn't need any update, we don't want to do anything. In order to check if the tick needs an update, we compare it against the clockevent device deadline. Now that's a problem because the clockevent device is at a lower level than the tick itself if it is implemented on top of hrtimer. Every hrtimer share this clockevent device. So comparing the next tick deadline against the clockevent device deadline is wrong because the device may be programmed for another hrtimer whose deadline collides with the tick. As a result we may end up not reprogramming the tick accidentally. In a worst case scenario under full dynticks mode, the tick stops firing as it is supposed to every 1hz, leaving /proc/stat stalled: Task in a full dynticks CPU ---------------------------- * hrtimer A is queued 2 seconds ahead * the tick is stopped, scheduled 1 second ahead * tick fires 1 second later * on tick exit, nohz schedules the tick 1 second ahead but sees the clockevent device is already programmed to that deadline, fooled by hrtimer A, the tick isn't rescheduled. * hrtimer A is cancelled before its deadline * tick never fires again until an interrupt happens... In order to fix this, store the next tick deadline to the tick_sched local structure and reuse that value later to check whether we need to reprogram the clock after an interrupt. On the other hand, ts->sleep_length still wants to know about the next clock event and not just the tick, so we want to improve the related comment to avoid confusion. Reported-and-tested-by: Tim Wright <tim@binbash.co.uk> Reported-and-tested-by: Pavel Machek <pavel@ucw.cz> Reported-by: James Hartsock <hartsjc@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1492783255-5051-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/time/tick-sched.c37
-rw-r--r--kernel/time/tick-sched.h2
2 files changed, 33 insertions, 6 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d212bb62bc08..764d2905e6a5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -150,6 +150,12 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
150 touch_softlockup_watchdog_sched(); 150 touch_softlockup_watchdog_sched();
151 if (is_idle_task(current)) 151 if (is_idle_task(current))
152 ts->idle_jiffies++; 152 ts->idle_jiffies++;
153 /*
154 * In case the current tick fired too early past its expected
155 * expiration, make sure we don't bypass the next clock reprogramming
156 * to the same deadline.
157 */
158 ts->next_tick = 0;
153 } 159 }
154#endif 160#endif
155 update_process_times(user_mode(regs)); 161 update_process_times(user_mode(regs));
@@ -660,6 +666,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
660 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 666 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
661 else 667 else
662 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); 668 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
669
670 /*
671 * Reset to make sure next tick stop doesn't get fooled by past
672 * cached clock deadline.
673 */
674 ts->next_tick = 0;
663} 675}
664 676
665static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 677static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
@@ -771,12 +783,15 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
771 tick = expires; 783 tick = expires;
772 784
773 /* Skip reprogram of event if its not changed */ 785 /* Skip reprogram of event if its not changed */
774 if (ts->tick_stopped) { 786 if (ts->tick_stopped && (expires == ts->next_tick)) {
775 if (hrtimer_active(&ts->sched_timer)) 787 /* Sanity check: make sure clockevent is actually programmed */
776 WARN_ON_ONCE(hrtimer_get_expires(&ts->sched_timer) < dev->next_event); 788 if (likely(dev->next_event <= ts->next_tick))
777
778 if (expires == dev->next_event)
779 goto out; 789 goto out;
790
791 WARN_ON_ONCE(1);
792 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
793 basemono, ts->next_tick, dev->next_event,
794 hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
780 } 795 }
781 796
782 /* 797 /*
@@ -796,6 +811,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
796 trace_tick_stop(1, TICK_DEP_MASK_NONE); 811 trace_tick_stop(1, TICK_DEP_MASK_NONE);
797 } 812 }
798 813
814 ts->next_tick = tick;
815
799 /* 816 /*
800 * If the expiration time == KTIME_MAX, then we simply stop 817 * If the expiration time == KTIME_MAX, then we simply stop
801 * the tick timer. 818 * the tick timer.
@@ -811,7 +828,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
811 else 828 else
812 tick_program_event(tick, 1); 829 tick_program_event(tick, 1);
813out: 830out:
814 /* Update the estimated sleep length */ 831 /*
832 * Update the estimated sleep length until the next timer
833 * (not only the tick).
834 */
815 ts->sleep_length = ktime_sub(dev->next_event, now); 835 ts->sleep_length = ktime_sub(dev->next_event, now);
816 return tick; 836 return tick;
817} 837}
@@ -869,6 +889,11 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
869 if (unlikely(!cpu_online(cpu))) { 889 if (unlikely(!cpu_online(cpu))) {
870 if (cpu == tick_do_timer_cpu) 890 if (cpu == tick_do_timer_cpu)
871 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 891 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
892 /*
893 * Make sure the CPU doesn't get fooled by obsolete tick
894 * deadline if it comes back online later.
895 */
896 ts->next_tick = 0;
872 return false; 897 return false;
873 } 898 }
874 899
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bf38226e5c17..075444e3d48e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -27,6 +27,7 @@ enum tick_nohz_mode {
27 * timer is modified for nohz sleeps. This is necessary 27 * timer is modified for nohz sleeps. This is necessary
28 * to resume the tick timer operation in the timeline 28 * to resume the tick timer operation in the timeline
29 * when the CPU returns from nohz sleep. 29 * when the CPU returns from nohz sleep.
30 * @next_tick: Next tick to be fired when in dynticks mode.
30 * @tick_stopped: Indicator that the idle tick has been stopped 31 * @tick_stopped: Indicator that the idle tick has been stopped
31 * @idle_jiffies: jiffies at the entry to idle for idle time accounting 32 * @idle_jiffies: jiffies at the entry to idle for idle time accounting
32 * @idle_calls: Total number of idle calls 33 * @idle_calls: Total number of idle calls
@@ -44,6 +45,7 @@ struct tick_sched {
44 unsigned long check_clocks; 45 unsigned long check_clocks;
45 enum tick_nohz_mode nohz_mode; 46 enum tick_nohz_mode nohz_mode;
46 ktime_t last_tick; 47 ktime_t last_tick;
48 ktime_t next_tick;
47 int inidle; 49 int inidle;
48 int tick_stopped; 50 int tick_stopped;
49 unsigned long idle_jiffies; 51 unsigned long idle_jiffies;