aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-05 13:12:34 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-09 05:54:56 -0400
commit296bb1e51a4838a6488ec5ce676607093482ecbc (patch)
tree15bb67b6593378aa3cc3b60e8bf754278f7c53aa
parent554c8aa8ecade210d58a252173bb8f2106552a44 (diff)
cpuidle: menu: Refine idle state selection for running tick
If the tick isn't stopped, the target residency of the state selected by the menu governor may be greater than the actual time to the next tick and that means lost energy. To avoid that, make tick_nohz_get_sleep_length() return the current time to the next event (before stopping the tick) in addition to the estimated one via an extra pointer argument and make menu_select() use that value to refine the state selection when necessary. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-rw-r--r--drivers/cpuidle/governors/menu.c27
-rw-r--r--include/linux/tick.h7
-rw-r--r--kernel/time/tick-sched.c12
3 files changed, 35 insertions, 11 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f53a929bd2bd..267982e471e0 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -295,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
295 unsigned int expected_interval; 295 unsigned int expected_interval;
296 unsigned long nr_iowaiters, cpu_load; 296 unsigned long nr_iowaiters, cpu_load;
297 int resume_latency = dev_pm_qos_raw_read_value(device); 297 int resume_latency = dev_pm_qos_raw_read_value(device);
298 ktime_t delta_next;
298 299
299 if (data->needs_update) { 300 if (data->needs_update) {
300 menu_update(drv, dev); 301 menu_update(drv, dev);
@@ -312,7 +313,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
312 } 313 }
313 314
314 /* determine the expected residency time, round up */ 315 /* determine the expected residency time, round up */
315 data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); 316 data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
316 317
317 get_iowait_load(&nr_iowaiters, &cpu_load); 318 get_iowait_load(&nr_iowaiters, &cpu_load);
318 data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); 319 data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@@ -396,9 +397,31 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
396 * expected idle duration is shorter than the tick period length. 397 * expected idle duration is shorter than the tick period length.
397 */ 398 */
398 if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || 399 if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
399 expected_interval < TICK_USEC) 400 expected_interval < TICK_USEC) {
401 unsigned int delta_next_us = ktime_to_us(delta_next);
402
400 *stop_tick = false; 403 *stop_tick = false;
401 404
405 if (!tick_nohz_tick_stopped() && idx > 0 &&
406 drv->states[idx].target_residency > delta_next_us) {
407 /*
408 * The tick is not going to be stopped and the target
409 * residency of the state to be returned is not within
410 * the time until the next timer event including the
411 * tick, so try to correct that.
412 */
413 for (i = idx - 1; i >= 0; i--) {
414 if (drv->states[i].disabled ||
415 dev->states_usage[i].disable)
416 continue;
417
418 idx = i;
419 if (drv->states[i].target_residency <= delta_next_us)
420 break;
421 }
422 }
423 }
424
402 data->last_state_idx = idx; 425 data->last_state_idx = idx;
403 426
404 return data->last_state_idx; 427 return data->last_state_idx;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index e8e7ff16b929..55388ab45fd4 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -122,7 +122,7 @@ extern void tick_nohz_idle_enter(void);
122extern void tick_nohz_idle_exit(void); 122extern void tick_nohz_idle_exit(void);
123extern void tick_nohz_irq_exit(void); 123extern void tick_nohz_irq_exit(void);
124extern bool tick_nohz_idle_got_tick(void); 124extern bool tick_nohz_idle_got_tick(void);
125extern ktime_t tick_nohz_get_sleep_length(void); 125extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
126extern unsigned long tick_nohz_get_idle_calls(void); 126extern unsigned long tick_nohz_get_idle_calls(void);
127extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); 127extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
128extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 128extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
@@ -146,9 +146,10 @@ static inline void tick_nohz_idle_enter(void) { }
146static inline void tick_nohz_idle_exit(void) { } 146static inline void tick_nohz_idle_exit(void) { }
147static inline bool tick_nohz_idle_got_tick(void) { return false; } 147static inline bool tick_nohz_idle_got_tick(void) { return false; }
148 148
149static inline ktime_t tick_nohz_get_sleep_length(void) 149static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
150{ 150{
151 return NSEC_PER_SEC / HZ; 151 *delta_next = TICK_NSEC;
152 return *delta_next;
152} 153}
153static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } 154static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
154static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } 155static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c57c98c7e953..edb9d49b4996 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1023,10 +1023,11 @@ bool tick_nohz_idle_got_tick(void)
1023 1023
1024/** 1024/**
1025 * tick_nohz_get_sleep_length - return the expected length of the current sleep 1025 * tick_nohz_get_sleep_length - return the expected length of the current sleep
1026 * @delta_next: duration until the next event if the tick cannot be stopped
1026 * 1027 *
1027 * Called from power state control code with interrupts disabled 1028 * Called from power state control code with interrupts disabled
1028 */ 1029 */
1029ktime_t tick_nohz_get_sleep_length(void) 1030ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1030{ 1031{
1031 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 1032 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1032 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); 1033 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
@@ -1040,12 +1041,14 @@ ktime_t tick_nohz_get_sleep_length(void)
1040 1041
1041 WARN_ON_ONCE(!ts->inidle); 1042 WARN_ON_ONCE(!ts->inidle);
1042 1043
1044 *delta_next = ktime_sub(dev->next_event, now);
1045
1043 if (!can_stop_idle_tick(cpu, ts)) 1046 if (!can_stop_idle_tick(cpu, ts))
1044 goto out_dev; 1047 return *delta_next;
1045 1048
1046 next_event = tick_nohz_next_event(ts, cpu); 1049 next_event = tick_nohz_next_event(ts, cpu);
1047 if (!next_event) 1050 if (!next_event)
1048 goto out_dev; 1051 return *delta_next;
1049 1052
1050 /* 1053 /*
1051 * If the next highres timer to expire is earlier than next_event, the 1054 * If the next highres timer to expire is earlier than next_event, the
@@ -1055,9 +1058,6 @@ ktime_t tick_nohz_get_sleep_length(void)
1055 hrtimer_next_event_without(&ts->sched_timer)); 1058 hrtimer_next_event_without(&ts->sched_timer));
1056 1059
1057 return ktime_sub(next_event, now); 1060 return ktime_sub(next_event, now);
1058
1059out_dev:
1060 return ktime_sub(dev->next_event, now);
1061} 1061}
1062 1062
1063/** 1063/**