aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAi Li <aili@codeaurora.org>2010-08-09 20:20:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-09 23:45:04 -0400
commit71abbbf856a0e70ca478782505c800891260ba84 (patch)
tree00ae494afd2868056753984035e1bfc0c2040257
parentd2997b1042ec150616c1963b5e5e919ffd0b0ebf (diff)
cpuidle: extend cpuidle and menu governor to handle dynamic states
On some SoC chips, HW resources may be in use during any particular idle period. As a consequence, the cpuidle states that the SoC is safe to enter can change from idle period to idle period. In addition, the latency and threshold of each cpuidle state can vary, depending on the operating condition when the CPU becomes idle, e.g. the current cpu frequency, the current state of the HW blocks, etc. cpuidle core and the menu governor, in the current form, are geared towards cpuidle states that are static, i.e. the availabiltiy of the states, their latencies, their thresholds are non-changing during run time. cpuidle does not provide any hook that cpuidle drivers can use to adjust those values on the fly for the current idle period before the menu governor selects the target cpuidle state. This patch extends cpuidle core and the menu governor to handle states that are dynamic. There are three additions in the patch and the patch maintains backwards-compatibility with existing cpuidle drivers. 1) add prepare() to struct cpuidle_device. A cpuidle driver can hook into the callback and cpuidle will call prepare() before calling the governor's select function. The callback gives the cpuidle driver a chance to update the dynamic information of the cpuidle states for the current idle period, e.g. state availability, latencies, thresholds, power values, etc. 2) add CPUIDLE_FLAG_IGNORE as one of the state flags. In the prepare() function, a cpuidle driver can set/clear the flag to indicate to the menu governor whether a cpuidle state should be ignored, i.e. not available, during the current idle period. 3) add power_specified bit to struct cpuidle_device. The menu governor currently assumes that the cpuidle states are arranged in the order of increasing latency, threshold, and power savings. This is true or can be made true for static states. Once the state parameters are dynamic, the latencies, thresholds, and power savings for the cpuidle states can increase or decrease by different amounts from idle period to idle period. So the assumption of increasing latency, threshold, and power savings from Cn to C(n+1) can no longer be guaranteed. It can be straightforward to calculate the power consumption of each available state and to specify it in power_usage for the idle period. Using the power_usage fields, the menu governor then selects the state that has the lowest power consumption and that still satisfies all other critieria. The power_specified bit defaults to 0. For existing cpuidle drivers, cpuidle detects that power_specified is 0 and fills in a dummy set of power_usage values. Signed-off-by: Ai Li <aili@codeaurora.org> Cc: Len Brown <len.brown@intel.com> Acked-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Venkatesh Pallipadi <venki@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/cpuidle/cpuidle.c31
-rw-r--r--drivers/cpuidle/governors/menu.c23
-rw-r--r--include/linux/cpuidle.h4
3 files changed, 51 insertions, 7 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index dbefe15bd58..a5071084337 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -74,6 +74,17 @@ static void cpuidle_idle_call(void)
74 */ 74 */
75 hrtimer_peek_ahead_timers(); 75 hrtimer_peek_ahead_timers();
76#endif 76#endif
77
78 /*
79 * Call the device's prepare function before calling the
80 * governor's select function. ->prepare gives the device's
81 * cpuidle driver a chance to update any dynamic information
82 * of its cpuidle states for the current idle period, e.g.
83 * state availability, latencies, residencies, etc.
84 */
85 if (dev->prepare)
86 dev->prepare(dev);
87
77 /* ask the governor for the next state */ 88 /* ask the governor for the next state */
78 next_state = cpuidle_curr_governor->select(dev); 89 next_state = cpuidle_curr_governor->select(dev);
79 if (need_resched()) { 90 if (need_resched()) {
@@ -282,6 +293,26 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
282 293
283 poll_idle_init(dev); 294 poll_idle_init(dev);
284 295
296 /*
297 * cpuidle driver should set the dev->power_specified bit
298 * before registering the device if the driver provides
299 * power_usage numbers.
300 *
301 * For those devices whose ->power_specified is not set,
302 * we fill in power_usage with decreasing values as the
303 * cpuidle code has an implicit assumption that state Cn
304 * uses less power than C(n-1).
305 *
306 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
307 * an power value of -1. So we use -2, -3, etc, for other
308 * c-states.
309 */
310 if (!dev->power_specified) {
311 int i;
312 for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++)
313 dev->states[i].power_usage = -1 - i;
314 }
315
285 per_cpu(cpuidle_devices, dev->cpu) = dev; 316 per_cpu(cpuidle_devices, dev->cpu) = dev;
286 list_add(&dev->device_list, &cpuidle_detected_devices); 317 list_add(&dev->device_list, &cpuidle_detected_devices);
287 if ((ret = cpuidle_add_sysfs(sys_dev))) { 318 if ((ret = cpuidle_add_sysfs(sys_dev))) {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 1b128702d30..c2408bbe9c2 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -234,6 +234,7 @@ static int menu_select(struct cpuidle_device *dev)
234{ 234{
235 struct menu_device *data = &__get_cpu_var(menu_devices); 235 struct menu_device *data = &__get_cpu_var(menu_devices);
236 int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); 236 int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
237 unsigned int power_usage = -1;
237 int i; 238 int i;
238 int multiplier; 239 int multiplier;
239 240
@@ -278,19 +279,27 @@ static int menu_select(struct cpuidle_device *dev)
278 if (data->expected_us > 5) 279 if (data->expected_us > 5)
279 data->last_state_idx = CPUIDLE_DRIVER_STATE_START; 280 data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
280 281
281 282 /*
282 /* find the deepest idle state that satisfies our constraints */ 283 * Find the idle state with the lowest power while satisfying
284 * our constraints.
285 */
283 for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) { 286 for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
284 struct cpuidle_state *s = &dev->states[i]; 287 struct cpuidle_state *s = &dev->states[i];
285 288
289 if (s->flags & CPUIDLE_FLAG_IGNORE)
290 continue;
286 if (s->target_residency > data->predicted_us) 291 if (s->target_residency > data->predicted_us)
287 break; 292 continue;
288 if (s->exit_latency > latency_req) 293 if (s->exit_latency > latency_req)
289 break; 294 continue;
290 if (s->exit_latency * multiplier > data->predicted_us) 295 if (s->exit_latency * multiplier > data->predicted_us)
291 break; 296 continue;
292 data->exit_us = s->exit_latency; 297
293 data->last_state_idx = i; 298 if (s->power_usage < power_usage) {
299 power_usage = s->power_usage;
300 data->last_state_idx = i;
301 data->exit_us = s->exit_latency;
302 }
294 } 303 }
295 304
296 return data->last_state_idx; 305 return data->last_state_idx;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 55215cce500..36ca9721a0c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -52,6 +52,7 @@ struct cpuidle_state {
52#define CPUIDLE_FLAG_SHALLOW (0x20) /* low latency, minimal savings */ 52#define CPUIDLE_FLAG_SHALLOW (0x20) /* low latency, minimal savings */
53#define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ 53#define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */
54#define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ 54#define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */
55#define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */
55 56
56#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) 57#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
57 58
@@ -84,6 +85,7 @@ struct cpuidle_state_kobj {
84struct cpuidle_device { 85struct cpuidle_device {
85 unsigned int registered:1; 86 unsigned int registered:1;
86 unsigned int enabled:1; 87 unsigned int enabled:1;
88 unsigned int power_specified:1;
87 unsigned int cpu; 89 unsigned int cpu;
88 90
89 int last_residency; 91 int last_residency;
@@ -97,6 +99,8 @@ struct cpuidle_device {
97 struct completion kobj_unregister; 99 struct completion kobj_unregister;
98 void *governor_data; 100 void *governor_data;
99 struct cpuidle_state *safe_state; 101 struct cpuidle_state *safe_state;
102
103 int (*prepare) (struct cpuidle_device *dev);
100}; 104};
101 105
102DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); 106DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);