summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-03 17:17:11 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-09 05:54:07 -0400
commit554c8aa8ecade210d58a252173bb8f2106552a44 (patch)
tree6712ac8a8c4ccf95730c1c1c4cdafe595280a578
parenta59855cd8c613ba4bb95147f6176360d95f75e60 (diff)
sched: idle: Select idle state before stopping the tick
In order to address the issue with short idle duration predictions by the idle governor after the scheduler tick has been stopped, reorder the code in cpuidle_idle_call() so that the governor idle state selection runs before tick_nohz_idle_go_idle() and use the "nohz" hint returned by cpuidle_select() to decide whether or not to stop the tick. This isn't straightforward, because menu_select() invokes tick_nohz_get_sleep_length() to get the time to the next timer event and the number returned by the latter comes from __tick_nohz_idle_stop_tick(). Fortunately, however, it is possible to compute that number without actually stopping the tick and with the help of the existing code. Namely, tick_nohz_get_sleep_length() can be made call tick_nohz_next_event(), introduced earlier, to get the time to the next non-highres timer event. If that happens, tick_nohz_next_event() need not be called by __tick_nohz_idle_stop_tick() again. If it turns out that the scheduler tick cannot be stopped going forward or the next timer event is too close for the tick to be stopped, tick_nohz_get_sleep_length() can simply return the time to the next event currently programmed into the corresponding clock event device. In addition to knowing the return value of tick_nohz_next_event(), however, tick_nohz_get_sleep_length() needs to know the time to the next highres timer event, but with the scheduler tick timer excluded, which can be computed with the help of hrtimer_get_next_event(). That minimum of that number and the tick_nohz_next_event() return value is the total time to the next timer event with the assumption that the tick will be stopped. It can be returned to the idle governor which can use it for predicting idle duration (under the assumption that the tick will be stopped) and deciding whether or not it makes sense to stop the tick before putting the CPU into the selected idle state. With the above, the sleep_length field in struct tick_sched is not necessary any more, so drop it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227 Reported-by: Doug Smythies <dsmythies@telus.net> Reported-by: Thomas Ilsche <thomas.ilsche@tu-dresden.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
-rw-r--r--include/linux/tick.h2
-rw-r--r--kernel/sched/idle.c11
-rw-r--r--kernel/time/tick-sched.c61
-rw-r--r--kernel/time/tick-sched.h2
4 files changed, 59 insertions, 17 deletions
diff --git a/include/linux/tick.h b/include/linux/tick.h
index ef0717e5e526..e8e7ff16b929 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -116,6 +116,7 @@ extern bool tick_nohz_enabled;
116extern bool tick_nohz_tick_stopped(void); 116extern bool tick_nohz_tick_stopped(void);
117extern bool tick_nohz_tick_stopped_cpu(int cpu); 117extern bool tick_nohz_tick_stopped_cpu(int cpu);
118extern void tick_nohz_idle_stop_tick(void); 118extern void tick_nohz_idle_stop_tick(void);
119extern void tick_nohz_idle_retain_tick(void);
119extern void tick_nohz_idle_restart_tick(void); 120extern void tick_nohz_idle_restart_tick(void);
120extern void tick_nohz_idle_enter(void); 121extern void tick_nohz_idle_enter(void);
121extern void tick_nohz_idle_exit(void); 122extern void tick_nohz_idle_exit(void);
@@ -139,6 +140,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void)
139static inline int tick_nohz_tick_stopped(void) { return 0; } 140static inline int tick_nohz_tick_stopped(void) { return 0; }
140static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } 141static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
141static inline void tick_nohz_idle_stop_tick(void) { } 142static inline void tick_nohz_idle_stop_tick(void) { }
143static inline void tick_nohz_idle_retain_tick(void) { }
142static inline void tick_nohz_idle_restart_tick(void) { } 144static inline void tick_nohz_idle_restart_tick(void) { }
143static inline void tick_nohz_idle_enter(void) { } 145static inline void tick_nohz_idle_enter(void) { }
144static inline void tick_nohz_idle_exit(void) { } 146static inline void tick_nohz_idle_exit(void) { }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index a966bd2a6fa0..1a3e9bddd17b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -185,13 +185,18 @@ static void cpuidle_idle_call(void)
185 } else { 185 } else {
186 bool stop_tick = true; 186 bool stop_tick = true;
187 187
188 tick_nohz_idle_stop_tick();
189 rcu_idle_enter();
190
191 /* 188 /*
192 * Ask the cpuidle framework to choose a convenient idle state. 189 * Ask the cpuidle framework to choose a convenient idle state.
193 */ 190 */
194 next_state = cpuidle_select(drv, dev, &stop_tick); 191 next_state = cpuidle_select(drv, dev, &stop_tick);
192
193 if (stop_tick)
194 tick_nohz_idle_stop_tick();
195 else
196 tick_nohz_idle_retain_tick();
197
198 rcu_idle_enter();
199
195 entered_state = call_cpuidle(drv, dev, next_state); 200 entered_state = call_cpuidle(drv, dev, next_state);
196 /* 201 /*
197 * Give the governor an opportunity to reflect on the outcome 202 * Give the governor an opportunity to reflect on the outcome
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f56d2c695712..c57c98c7e953 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -913,16 +913,19 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
913 913
914static void __tick_nohz_idle_stop_tick(struct tick_sched *ts) 914static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
915{ 915{
916 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
917 ktime_t expires; 916 ktime_t expires;
918 int cpu = smp_processor_id(); 917 int cpu = smp_processor_id();
919 918
920 WARN_ON_ONCE(ts->timer_expires_base); 919 /*
921 920 * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
922 if (!can_stop_idle_tick(cpu, ts)) 921 * tick timer expiration time is known already.
923 goto out; 922 */
924 923 if (ts->timer_expires_base)
925 expires = tick_nohz_next_event(ts, cpu); 924 expires = ts->timer_expires;
925 else if (can_stop_idle_tick(cpu, ts))
926 expires = tick_nohz_next_event(ts, cpu);
927 else
928 return;
926 929
927 ts->idle_calls++; 930 ts->idle_calls++;
928 931
@@ -941,9 +944,6 @@ static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
941 } else { 944 } else {
942 tick_nohz_retain_tick(ts); 945 tick_nohz_retain_tick(ts);
943 } 946 }
944
945out:
946 ts->sleep_length = ktime_sub(dev->next_event, ts->idle_entrytime);
947} 947}
948 948
949/** 949/**
@@ -956,6 +956,16 @@ void tick_nohz_idle_stop_tick(void)
956 __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched)); 956 __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
957} 957}
958 958
959void tick_nohz_idle_retain_tick(void)
960{
961 tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
962 /*
963 * Undo the effect of get_next_timer_interrupt() called from
964 * tick_nohz_next_event().
965 */
966 timer_clear_idle();
967}
968
959/** 969/**
960 * tick_nohz_idle_enter - prepare for entering idle on the current CPU 970 * tick_nohz_idle_enter - prepare for entering idle on the current CPU
961 * 971 *
@@ -1012,15 +1022,42 @@ bool tick_nohz_idle_got_tick(void)
1012} 1022}
1013 1023
1014/** 1024/**
1015 * tick_nohz_get_sleep_length - return the length of the current sleep 1025 * tick_nohz_get_sleep_length - return the expected length of the current sleep
1016 * 1026 *
1017 * Called from power state control code with interrupts disabled 1027 * Called from power state control code with interrupts disabled
1018 */ 1028 */
1019ktime_t tick_nohz_get_sleep_length(void) 1029ktime_t tick_nohz_get_sleep_length(void)
1020{ 1030{
1031 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1021 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); 1032 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1033 int cpu = smp_processor_id();
1034 /*
1035 * The idle entry time is expected to be a sufficient approximation of
1036 * the current time at this point.
1037 */
1038 ktime_t now = ts->idle_entrytime;
1039 ktime_t next_event;
1040
1041 WARN_ON_ONCE(!ts->inidle);
1042
1043 if (!can_stop_idle_tick(cpu, ts))
1044 goto out_dev;
1045
1046 next_event = tick_nohz_next_event(ts, cpu);
1047 if (!next_event)
1048 goto out_dev;
1049
1050 /*
1051 * If the next highres timer to expire is earlier than next_event, the
1052 * idle governor needs to know that.
1053 */
1054 next_event = min_t(u64, next_event,
1055 hrtimer_next_event_without(&ts->sched_timer));
1056
1057 return ktime_sub(next_event, now);
1022 1058
1023 return ts->sleep_length; 1059out_dev:
1060 return ktime_sub(dev->next_event, now);
1024} 1061}
1025 1062
1026/** 1063/**
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 53e45a39bdbc..2b845f2c44b1 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -38,7 +38,6 @@ enum tick_nohz_mode {
38 * @idle_exittime: Time when the idle state was left 38 * @idle_exittime: Time when the idle state was left
39 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped 39 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
40 * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding 40 * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
41 * @sleep_length: Duration of the current idle sleep
42 * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped) 41 * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
43 * @timer_expires_base: Base time clock monotonic for @timer_expires 42 * @timer_expires_base: Base time clock monotonic for @timer_expires
44 * @do_timer_lst: CPU was the last one doing do_timer before going idle 43 * @do_timer_lst: CPU was the last one doing do_timer before going idle
@@ -60,7 +59,6 @@ struct tick_sched {
60 ktime_t idle_exittime; 59 ktime_t idle_exittime;
61 ktime_t idle_sleeptime; 60 ktime_t idle_sleeptime;
62 ktime_t iowait_sleeptime; 61 ktime_t iowait_sleeptime;
63 ktime_t sleep_length;
64 unsigned long last_jiffies; 62 unsigned long last_jiffies;
65 u64 timer_expires; 63 u64 timer_expires;
66 u64 timer_expires_base; 64 u64 timer_expires_base;