aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorColin Cross <ccross@android.com>2012-05-07 20:57:41 -0400
committerLen Brown <len.brown@intel.com>2012-06-02 00:49:09 -0400
commit4126c0197bc8c58a0bb7fcda07b01b596b6fb4c5 (patch)
treec60aacfbc69627b96dc39dc8dc71ea8f0893c5e5 /drivers
parent3af272ab75c7a0c7fa5ae5507724d961f7e7718b (diff)
cpuidle: add support for states that affect multiple cpus
On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the cpus cannot be independently powered down, either due to sequencing restrictions (on Tegra 2, cpu 0 must be the last to power down), or due to HW bugs (on OMAP4460, a cpu powering up will corrupt the gic state unless the other cpu runs a work around). Each cpu has a power state that it can enter without coordinating with the other cpu (usually Wait For Interrupt, or WFI), and one or more "coupled" power states that affect blocks shared between the cpus (L2 cache, interrupt controller, and sometimes the whole SoC). Entering a coupled power state must be tightly controlled on both cpus. The easiest solution to implementing coupled cpu power states is to hotplug all but one cpu whenever possible, usually using a cpufreq governor that looks at cpu load to determine when to enable the secondary cpus. This causes problems, as hotplug is an expensive operation, so the number of hotplug transitions must be minimized, leading to very slow response to loads, often on the order of seconds. This file implements an alternative solution, where each cpu will wait in the WFI state until all cpus are ready to enter a coupled state, at which point the coupled state function will be called on all cpus at approximately the same time. Once all cpus are ready to enter idle, they are woken by an smp cross call. At this point, there is a chance that one of the cpus will find work to do, and choose not to enter idle. A final pass is needed to guarantee that all cpus will call the power state enter function at the same time. During this pass, each cpu will increment the ready counter, and continue once the ready counter matches the number of online coupled cpus. If any cpu exits idle, the other cpus will decrement their counter and retry. To use coupled cpuidle states, a cpuidle driver must: Set struct cpuidle_device.coupled_cpus to the mask of all coupled cpus, usually the same as cpu_possible_mask if all cpus are part of the same cluster. The coupled_cpus mask must be set in the struct cpuidle_device for each cpu. Set struct cpuidle_device.safe_state to a state that is not a coupled state. This is usually WFI. Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each state that affects multiple cpus. Provide a struct cpuidle_state.enter function for each state that affects multiple cpus. This function is guaranteed to be called on all cpus at approximately the same time. The driver should ensure that the cpus all abort together if any cpu tries to abort once the function is called. update1: cpuidle: coupled: fix count of online cpus online_count was never incremented on boot, and was also counting cpus that were not part of the coupled set. Fix both issues by introducting a new function that counts online coupled cpus, and call it from register as well as the hotplug notifier. update2: cpuidle: coupled: fix decrementing ready count cpuidle_coupled_set_not_ready sometimes refuses to decrement the ready count in order to prevent a race condition. This makes it unsuitable for use when finished with idle. Add a new function cpuidle_coupled_set_done that decrements both the ready count and waiting count, and call it after idle is complete. Cc: Amit Kucheria <amit.kucheria@linaro.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Trinabh Gupta <g.trinabh@gmail.com> Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Kevin Hilman <khilman@ti.com> Tested-by: Kevin Hilman <khilman@ti.com> Signed-off-by: Colin Cross <ccross@android.com> Acked-by: Rafael J. Wysocki <rjw@sisk.pl> Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/cpuidle/Kconfig3
-rw-r--r--drivers/cpuidle/Makefile1
-rw-r--r--drivers/cpuidle/coupled.c678
-rw-r--r--drivers/cpuidle/cpuidle.c15
-rw-r--r--drivers/cpuidle/cpuidle.h30
5 files changed, 726 insertions, 1 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 78a666d1e5f5..a76b689e553b 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -18,3 +18,6 @@ config CPU_IDLE_GOV_MENU
18 bool 18 bool
19 depends on CPU_IDLE && NO_HZ 19 depends on CPU_IDLE && NO_HZ
20 default y 20 default y
21
22config ARCH_NEEDS_CPU_IDLE_COUPLED
23 def_bool n
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 5634f88379df..38c8f69f30cf 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -3,3 +3,4 @@
3# 3#
4 4
5obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ 5obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
6obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
new file mode 100644
index 000000000000..aab6bba8daec
--- /dev/null
+++ b/drivers/cpuidle/coupled.c
@@ -0,0 +1,678 @@
1/*
2 * coupled.c - helper functions to enter the same idle state on multiple cpus
3 *
4 * Copyright (c) 2011 Google, Inc.
5 *
6 * Author: Colin Cross <ccross@android.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 */
18
19#include <linux/kernel.h>
20#include <linux/cpu.h>
21#include <linux/cpuidle.h>
22#include <linux/mutex.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/spinlock.h>
26
27#include "cpuidle.h"
28
29/**
30 * DOC: Coupled cpuidle states
31 *
32 * On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the
33 * cpus cannot be independently powered down, either due to
34 * sequencing restrictions (on Tegra 2, cpu 0 must be the last to
35 * power down), or due to HW bugs (on OMAP4460, a cpu powering up
36 * will corrupt the gic state unless the other cpu runs a work
37 * around). Each cpu has a power state that it can enter without
38 * coordinating with the other cpu (usually Wait For Interrupt, or
39 * WFI), and one or more "coupled" power states that affect blocks
40 * shared between the cpus (L2 cache, interrupt controller, and
41 * sometimes the whole SoC). Entering a coupled power state must
42 * be tightly controlled on both cpus.
43 *
44 * This file implements a solution, where each cpu will wait in the
45 * WFI state until all cpus are ready to enter a coupled state, at
46 * which point the coupled state function will be called on all
47 * cpus at approximately the same time.
48 *
49 * Once all cpus are ready to enter idle, they are woken by an smp
50 * cross call. At this point, there is a chance that one of the
51 * cpus will find work to do, and choose not to enter idle. A
52 * final pass is needed to guarantee that all cpus will call the
53 * power state enter function at the same time. During this pass,
54 * each cpu will increment the ready counter, and continue once the
55 * ready counter matches the number of online coupled cpus. If any
56 * cpu exits idle, the other cpus will decrement their counter and
57 * retry.
58 *
59 * requested_state stores the deepest coupled idle state each cpu
60 * is ready for. It is assumed that the states are indexed from
61 * shallowest (highest power, lowest exit latency) to deepest
62 * (lowest power, highest exit latency). The requested_state
63 * variable is not locked. It is only written from the cpu that
64 * it stores (or by the on/offlining cpu if that cpu is offline),
65 * and only read after all the cpus are ready for the coupled idle
66 * state are are no longer updating it.
67 *
68 * Three atomic counters are used. alive_count tracks the number
69 * of cpus in the coupled set that are currently or soon will be
70 * online. waiting_count tracks the number of cpus that are in
71 * the waiting loop, in the ready loop, or in the coupled idle state.
72 * ready_count tracks the number of cpus that are in the ready loop
73 * or in the coupled idle state.
74 *
75 * To use coupled cpuidle states, a cpuidle driver must:
76 *
77 * Set struct cpuidle_device.coupled_cpus to the mask of all
78 * coupled cpus, usually the same as cpu_possible_mask if all cpus
79 * are part of the same cluster. The coupled_cpus mask must be
80 * set in the struct cpuidle_device for each cpu.
81 *
82 * Set struct cpuidle_device.safe_state to a state that is not a
83 * coupled state. This is usually WFI.
84 *
85 * Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
86 * state that affects multiple cpus.
87 *
88 * Provide a struct cpuidle_state.enter function for each state
89 * that affects multiple cpus. This function is guaranteed to be
90 * called on all cpus at approximately the same time. The driver
91 * should ensure that the cpus all abort together if any cpu tries
92 * to abort once the function is called. The function should return
93 * with interrupts still disabled.
94 */
95
96/**
97 * struct cpuidle_coupled - data for set of cpus that share a coupled idle state
98 * @coupled_cpus: mask of cpus that are part of the coupled set
99 * @requested_state: array of requested states for cpus in the coupled set
100 * @ready_waiting_counts: combined count of cpus in ready or waiting loops
101 * @online_count: count of cpus that are online
102 * @refcnt: reference count of cpuidle devices that are using this struct
103 * @prevent: flag to prevent coupled idle while a cpu is hotplugging
104 */
105struct cpuidle_coupled {
106 cpumask_t coupled_cpus;
107 int requested_state[NR_CPUS];
108 atomic_t ready_waiting_counts;
109 int online_count;
110 int refcnt;
111 int prevent;
112};
113
114#define WAITING_BITS 16
115#define MAX_WAITING_CPUS (1 << WAITING_BITS)
116#define WAITING_MASK (MAX_WAITING_CPUS - 1)
117#define READY_MASK (~WAITING_MASK)
118
119#define CPUIDLE_COUPLED_NOT_IDLE (-1)
120
121static DEFINE_MUTEX(cpuidle_coupled_lock);
122static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
123
124/*
125 * The cpuidle_coupled_poked_mask mask is used to avoid calling
126 * __smp_call_function_single with the per cpu call_single_data struct already
127 * in use. This prevents a deadlock where two cpus are waiting for each others
128 * call_single_data struct to be available
129 */
130static cpumask_t cpuidle_coupled_poked_mask;
131
132/**
133 * cpuidle_state_is_coupled - check if a state is part of a coupled set
134 * @dev: struct cpuidle_device for the current cpu
135 * @drv: struct cpuidle_driver for the platform
136 * @state: index of the target state in drv->states
137 *
138 * Returns true if the target state is coupled with cpus besides this one
139 */
140bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
141 struct cpuidle_driver *drv, int state)
142{
143 return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
144}
145
146/**
147 * cpuidle_coupled_set_ready - mark a cpu as ready
148 * @coupled: the struct coupled that contains the current cpu
149 */
150static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled)
151{
152 atomic_add(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
153}
154
155/**
156 * cpuidle_coupled_set_not_ready - mark a cpu as not ready
157 * @coupled: the struct coupled that contains the current cpu
158 *
159 * Decrements the ready counter, unless the ready (and thus the waiting) counter
160 * is equal to the number of online cpus. Prevents a race where one cpu
161 * decrements the waiting counter and then re-increments it just before another
162 * cpu has decremented its ready counter, leading to the ready counter going
163 * down from the number of online cpus without going through the coupled idle
164 * state.
165 *
166 * Returns 0 if the counter was decremented successfully, -EINVAL if the ready
167 * counter was equal to the number of online cpus.
168 */
169static
170inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled)
171{
172 int all;
173 int ret;
174
175 all = coupled->online_count || (coupled->online_count << WAITING_BITS);
176 ret = atomic_add_unless(&coupled->ready_waiting_counts,
177 -MAX_WAITING_CPUS, all);
178
179 return ret ? 0 : -EINVAL;
180}
181
182/**
183 * cpuidle_coupled_no_cpus_ready - check if no cpus in a coupled set are ready
184 * @coupled: the struct coupled that contains the current cpu
185 *
186 * Returns true if all of the cpus in a coupled set are out of the ready loop.
187 */
188static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled)
189{
190 int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
191 return r == 0;
192}
193
194/**
195 * cpuidle_coupled_cpus_ready - check if all cpus in a coupled set are ready
196 * @coupled: the struct coupled that contains the current cpu
197 *
198 * Returns true if all cpus coupled to this target state are in the ready loop
199 */
200static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled)
201{
202 int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
203 return r == coupled->online_count;
204}
205
206/**
207 * cpuidle_coupled_cpus_waiting - check if all cpus in a coupled set are waiting
208 * @coupled: the struct coupled that contains the current cpu
209 *
210 * Returns true if all cpus coupled to this target state are in the wait loop
211 */
212static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled)
213{
214 int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
215 return w == coupled->online_count;
216}
217
218/**
219 * cpuidle_coupled_no_cpus_waiting - check if no cpus in coupled set are waiting
220 * @coupled: the struct coupled that contains the current cpu
221 *
222 * Returns true if all of the cpus in a coupled set are out of the waiting loop.
223 */
224static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled)
225{
226 int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
227 return w == 0;
228}
229
230/**
231 * cpuidle_coupled_get_state - determine the deepest idle state
232 * @dev: struct cpuidle_device for this cpu
233 * @coupled: the struct coupled that contains the current cpu
234 *
235 * Returns the deepest idle state that all coupled cpus can enter
236 */
237static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
238 struct cpuidle_coupled *coupled)
239{
240 int i;
241 int state = INT_MAX;
242
243 /*
244 * Read barrier ensures that read of requested_state is ordered after
245 * reads of ready_count. Matches the write barriers
246 * cpuidle_set_state_waiting.
247 */
248 smp_rmb();
249
250 for_each_cpu_mask(i, coupled->coupled_cpus)
251 if (cpu_online(i) && coupled->requested_state[i] < state)
252 state = coupled->requested_state[i];
253
254 return state;
255}
256
257static void cpuidle_coupled_poked(void *info)
258{
259 int cpu = (unsigned long)info;
260 cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
261}
262
263/**
264 * cpuidle_coupled_poke - wake up a cpu that may be waiting
265 * @cpu: target cpu
266 *
267 * Ensures that the target cpu exits it's waiting idle state (if it is in it)
268 * and will see updates to waiting_count before it re-enters it's waiting idle
269 * state.
270 *
271 * If cpuidle_coupled_poked_mask is already set for the target cpu, that cpu
272 * either has or will soon have a pending IPI that will wake it out of idle,
273 * or it is currently processing the IPI and is not in idle.
274 */
275static void cpuidle_coupled_poke(int cpu)
276{
277 struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
278
279 if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
280 __smp_call_function_single(cpu, csd, 0);
281}
282
283/**
284 * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting
285 * @dev: struct cpuidle_device for this cpu
286 * @coupled: the struct coupled that contains the current cpu
287 *
288 * Calls cpuidle_coupled_poke on all other online cpus.
289 */
290static void cpuidle_coupled_poke_others(int this_cpu,
291 struct cpuidle_coupled *coupled)
292{
293 int cpu;
294
295 for_each_cpu_mask(cpu, coupled->coupled_cpus)
296 if (cpu != this_cpu && cpu_online(cpu))
297 cpuidle_coupled_poke(cpu);
298}
299
300/**
301 * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop
302 * @dev: struct cpuidle_device for this cpu
303 * @coupled: the struct coupled that contains the current cpu
304 * @next_state: the index in drv->states of the requested state for this cpu
305 *
306 * Updates the requested idle state for the specified cpuidle device,
307 * poking all coupled cpus out of idle if necessary to let them see the new
308 * state.
309 */
310static void cpuidle_coupled_set_waiting(int cpu,
311 struct cpuidle_coupled *coupled, int next_state)
312{
313 int w;
314
315 coupled->requested_state[cpu] = next_state;
316
317 /*
318 * If this is the last cpu to enter the waiting state, poke
319 * all the other cpus out of their waiting state so they can
320 * enter a deeper state. This can race with one of the cpus
321 * exiting the waiting state due to an interrupt and
322 * decrementing waiting_count, see comment below.
323 *
324 * The atomic_inc_return provides a write barrier to order the write
325 * to requested_state with the later write that increments ready_count.
326 */
327 w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
328 if (w == coupled->online_count)
329 cpuidle_coupled_poke_others(cpu, coupled);
330}
331
332/**
333 * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop
334 * @dev: struct cpuidle_device for this cpu
335 * @coupled: the struct coupled that contains the current cpu
336 *
337 * Removes the requested idle state for the specified cpuidle device.
338 */
339static void cpuidle_coupled_set_not_waiting(int cpu,
340 struct cpuidle_coupled *coupled)
341{
342 /*
343 * Decrementing waiting count can race with incrementing it in
344 * cpuidle_coupled_set_waiting, but that's OK. Worst case, some
345 * cpus will increment ready_count and then spin until they
346 * notice that this cpu has cleared it's requested_state.
347 */
348 atomic_dec(&coupled->ready_waiting_counts);
349
350 coupled->requested_state[cpu] = CPUIDLE_COUPLED_NOT_IDLE;
351}
352
353/**
354 * cpuidle_coupled_set_done - mark this cpu as leaving the ready loop
355 * @cpu: the current cpu
356 * @coupled: the struct coupled that contains the current cpu
357 *
358 * Marks this cpu as no longer in the ready and waiting loops. Decrements
359 * the waiting count first to prevent another cpu looping back in and seeing
360 * this cpu as waiting just before it exits idle.
361 */
362static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
363{
364 cpuidle_coupled_set_not_waiting(cpu, coupled);
365 atomic_sub(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
366}
367
368/**
369 * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed
370 * @cpu - this cpu
371 *
372 * Turns on interrupts and spins until any outstanding poke interrupts have
373 * been processed and the poke bit has been cleared.
374 *
375 * Other interrupts may also be processed while interrupts are enabled, so
376 * need_resched() must be tested after turning interrupts off again to make sure
377 * the interrupt didn't schedule work that should take the cpu out of idle.
378 *
379 * Returns 0 if need_resched was false, -EINTR if need_resched was true.
380 */
381static int cpuidle_coupled_clear_pokes(int cpu)
382{
383 local_irq_enable();
384 while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
385 cpu_relax();
386 local_irq_disable();
387
388 return need_resched() ? -EINTR : 0;
389}
390
391/**
392 * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus
393 * @dev: struct cpuidle_device for the current cpu
394 * @drv: struct cpuidle_driver for the platform
395 * @next_state: index of the requested state in drv->states
396 *
397 * Coordinate with coupled cpus to enter the target state. This is a two
398 * stage process. In the first stage, the cpus are operating independently,
399 * and may call into cpuidle_enter_state_coupled at completely different times.
400 * To save as much power as possible, the first cpus to call this function will
401 * go to an intermediate state (the cpuidle_device's safe state), and wait for
402 * all the other cpus to call this function. Once all coupled cpus are idle,
403 * the second stage will start. Each coupled cpu will spin until all cpus have
404 * guaranteed that they will call the target_state.
405 *
406 * This function must be called with interrupts disabled. It may enable
407 * interrupts while preparing for idle, and it will always return with
408 * interrupts enabled.
409 */
410int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
411 struct cpuidle_driver *drv, int next_state)
412{
413 int entered_state = -1;
414 struct cpuidle_coupled *coupled = dev->coupled;
415
416 if (!coupled)
417 return -EINVAL;
418
419 while (coupled->prevent) {
420 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
421 local_irq_enable();
422 return entered_state;
423 }
424 entered_state = cpuidle_enter_state(dev, drv,
425 dev->safe_state_index);
426 }
427
428 /* Read barrier ensures online_count is read after prevent is cleared */
429 smp_rmb();
430
431 cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
432
433retry:
434 /*
435 * Wait for all coupled cpus to be idle, using the deepest state
436 * allowed for a single cpu.
437 */
438 while (!cpuidle_coupled_cpus_waiting(coupled)) {
439 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
440 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
441 goto out;
442 }
443
444 if (coupled->prevent) {
445 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
446 goto out;
447 }
448
449 entered_state = cpuidle_enter_state(dev, drv,
450 dev->safe_state_index);
451 }
452
453 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
454 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
455 goto out;
456 }
457
458 /*
459 * All coupled cpus are probably idle. There is a small chance that
460 * one of the other cpus just became active. Increment the ready count,
461 * and spin until all coupled cpus have incremented the counter. Once a
462 * cpu has incremented the ready counter, it cannot abort idle and must
463 * spin until either all cpus have incremented the ready counter, or
464 * another cpu leaves idle and decrements the waiting counter.
465 */
466
467 cpuidle_coupled_set_ready(coupled);
468 while (!cpuidle_coupled_cpus_ready(coupled)) {
469 /* Check if any other cpus bailed out of idle. */
470 if (!cpuidle_coupled_cpus_waiting(coupled))
471 if (!cpuidle_coupled_set_not_ready(coupled))
472 goto retry;
473
474 cpu_relax();
475 }
476
477 /* all cpus have acked the coupled state */
478 next_state = cpuidle_coupled_get_state(dev, coupled);
479
480 entered_state = cpuidle_enter_state(dev, drv, next_state);
481
482 cpuidle_coupled_set_done(dev->cpu, coupled);
483
484out:
485 /*
486 * Normal cpuidle states are expected to return with irqs enabled.
487 * That leads to an inefficiency where a cpu receiving an interrupt
488 * that brings it out of idle will process that interrupt before
489 * exiting the idle enter function and decrementing ready_count. All
490 * other cpus will need to spin waiting for the cpu that is processing
491 * the interrupt. If the driver returns with interrupts disabled,
492 * all other cpus will loop back into the safe idle state instead of
493 * spinning, saving power.
494 *
495 * Calling local_irq_enable here allows coupled states to return with
496 * interrupts disabled, but won't cause problems for drivers that
497 * exit with interrupts enabled.
498 */
499 local_irq_enable();
500
501 /*
502 * Wait until all coupled cpus have exited idle. There is no risk that
503 * a cpu exits and re-enters the ready state because this cpu has
504 * already decremented its waiting_count.
505 */
506 while (!cpuidle_coupled_no_cpus_ready(coupled))
507 cpu_relax();
508
509 return entered_state;
510}
511
512static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled)
513{
514 cpumask_t cpus;
515 cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
516 coupled->online_count = cpumask_weight(&cpus);
517}
518
519/**
520 * cpuidle_coupled_register_device - register a coupled cpuidle device
521 * @dev: struct cpuidle_device for the current cpu
522 *
523 * Called from cpuidle_register_device to handle coupled idle init. Finds the
524 * cpuidle_coupled struct for this set of coupled cpus, or creates one if none
525 * exists yet.
526 */
527int cpuidle_coupled_register_device(struct cpuidle_device *dev)
528{
529 int cpu;
530 struct cpuidle_device *other_dev;
531 struct call_single_data *csd;
532 struct cpuidle_coupled *coupled;
533
534 if (cpumask_empty(&dev->coupled_cpus))
535 return 0;
536
537 for_each_cpu_mask(cpu, dev->coupled_cpus) {
538 other_dev = per_cpu(cpuidle_devices, cpu);
539 if (other_dev && other_dev->coupled) {
540 coupled = other_dev->coupled;
541 goto have_coupled;
542 }
543 }
544
545 /* No existing coupled info found, create a new one */
546 coupled = kzalloc(sizeof(struct cpuidle_coupled), GFP_KERNEL);
547 if (!coupled)
548 return -ENOMEM;
549
550 coupled->coupled_cpus = dev->coupled_cpus;
551
552have_coupled:
553 dev->coupled = coupled;
554 if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus)))
555 coupled->prevent++;
556
557 cpuidle_coupled_update_online_cpus(coupled);
558
559 coupled->refcnt++;
560
561 csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
562 csd->func = cpuidle_coupled_poked;
563 csd->info = (void *)(unsigned long)dev->cpu;
564
565 return 0;
566}
567
568/**
569 * cpuidle_coupled_unregister_device - unregister a coupled cpuidle device
570 * @dev: struct cpuidle_device for the current cpu
571 *
572 * Called from cpuidle_unregister_device to tear down coupled idle. Removes the
573 * cpu from the coupled idle set, and frees the cpuidle_coupled_info struct if
574 * this was the last cpu in the set.
575 */
576void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
577{
578 struct cpuidle_coupled *coupled = dev->coupled;
579
580 if (cpumask_empty(&dev->coupled_cpus))
581 return;
582
583 if (--coupled->refcnt)
584 kfree(coupled);
585 dev->coupled = NULL;
586}
587
588/**
589 * cpuidle_coupled_prevent_idle - prevent cpus from entering a coupled state
590 * @coupled: the struct coupled that contains the cpu that is changing state
591 *
592 * Disables coupled cpuidle on a coupled set of cpus. Used to ensure that
593 * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
594 */
595static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled)
596{
597 int cpu = get_cpu();
598
599 /* Force all cpus out of the waiting loop. */
600 coupled->prevent++;
601 cpuidle_coupled_poke_others(cpu, coupled);
602 put_cpu();
603 while (!cpuidle_coupled_no_cpus_waiting(coupled))
604 cpu_relax();
605}
606
607/**
608 * cpuidle_coupled_allow_idle - allows cpus to enter a coupled state
609 * @coupled: the struct coupled that contains the cpu that is changing state
610 *
611 * Enables coupled cpuidle on a coupled set of cpus. Used to ensure that
612 * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
613 */
614static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled)
615{
616 int cpu = get_cpu();
617
618 /*
619 * Write barrier ensures readers see the new online_count when they
620 * see prevent == 0.
621 */
622 smp_wmb();
623 coupled->prevent--;
624 /* Force cpus out of the prevent loop. */
625 cpuidle_coupled_poke_others(cpu, coupled);
626 put_cpu();
627}
628
629/**
630 * cpuidle_coupled_cpu_notify - notifier called during hotplug transitions
631 * @nb: notifier block
632 * @action: hotplug transition
633 * @hcpu: target cpu number
634 *
635 * Called when a cpu is brought on or offline using hotplug. Updates the
636 * coupled cpu set appropriately
637 */
638static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
639 unsigned long action, void *hcpu)
640{
641 int cpu = (unsigned long)hcpu;
642 struct cpuidle_device *dev;
643
644 mutex_lock(&cpuidle_lock);
645
646 dev = per_cpu(cpuidle_devices, cpu);
647 if (!dev->coupled)
648 goto out;
649
650 switch (action & ~CPU_TASKS_FROZEN) {
651 case CPU_UP_PREPARE:
652 case CPU_DOWN_PREPARE:
653 cpuidle_coupled_prevent_idle(dev->coupled);
654 break;
655 case CPU_ONLINE:
656 case CPU_DEAD:
657 cpuidle_coupled_update_online_cpus(dev->coupled);
658 /* Fall through */
659 case CPU_UP_CANCELED:
660 case CPU_DOWN_FAILED:
661 cpuidle_coupled_allow_idle(dev->coupled);
662 break;
663 }
664
665out:
666 mutex_unlock(&cpuidle_lock);
667 return NOTIFY_OK;
668}
669
670static struct notifier_block cpuidle_coupled_cpu_notifier = {
671 .notifier_call = cpuidle_coupled_cpu_notify,
672};
673
674static int __init cpuidle_coupled_init(void)
675{
676 return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
677}
678core_initcall(cpuidle_coupled_init);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 4540672a2e1c..e81cfda295a5 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -171,7 +171,11 @@ int cpuidle_idle_call(void)
171 trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); 171 trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
172 trace_cpu_idle_rcuidle(next_state, dev->cpu); 172 trace_cpu_idle_rcuidle(next_state, dev->cpu);
173 173
174 entered_state = cpuidle_enter_state(dev, drv, next_state); 174 if (cpuidle_state_is_coupled(dev, drv, next_state))
175 entered_state = cpuidle_enter_state_coupled(dev, drv,
176 next_state);
177 else
178 entered_state = cpuidle_enter_state(dev, drv, next_state);
175 179
176 trace_power_end_rcuidle(dev->cpu); 180 trace_power_end_rcuidle(dev->cpu);
177 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); 181 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
@@ -407,9 +411,16 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
407 if (ret) 411 if (ret)
408 goto err_sysfs; 412 goto err_sysfs;
409 413
414 ret = cpuidle_coupled_register_device(dev);
415 if (ret)
416 goto err_coupled;
417
410 dev->registered = 1; 418 dev->registered = 1;
411 return 0; 419 return 0;
412 420
421err_coupled:
422 cpuidle_remove_sysfs(cpu_dev);
423 wait_for_completion(&dev->kobj_unregister);
413err_sysfs: 424err_sysfs:
414 list_del(&dev->device_list); 425 list_del(&dev->device_list);
415 per_cpu(cpuidle_devices, dev->cpu) = NULL; 426 per_cpu(cpuidle_devices, dev->cpu) = NULL;
@@ -464,6 +475,8 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
464 wait_for_completion(&dev->kobj_unregister); 475 wait_for_completion(&dev->kobj_unregister);
465 per_cpu(cpuidle_devices, dev->cpu) = NULL; 476 per_cpu(cpuidle_devices, dev->cpu) = NULL;
466 477
478 cpuidle_coupled_unregister_device(dev);
479
467 cpuidle_resume_and_unlock(); 480 cpuidle_resume_and_unlock();
468 481
469 module_put(cpuidle_driver->owner); 482 module_put(cpuidle_driver->owner);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index d8a3ccce8281..76e7f696ad8c 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -32,4 +32,34 @@ extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
32extern int cpuidle_add_sysfs(struct device *dev); 32extern int cpuidle_add_sysfs(struct device *dev);
33extern void cpuidle_remove_sysfs(struct device *dev); 33extern void cpuidle_remove_sysfs(struct device *dev);
34 34
35#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
36bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
37 struct cpuidle_driver *drv, int state);
38int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
39 struct cpuidle_driver *drv, int next_state);
40int cpuidle_coupled_register_device(struct cpuidle_device *dev);
41void cpuidle_coupled_unregister_device(struct cpuidle_device *dev);
42#else
43static inline bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
44 struct cpuidle_driver *drv, int state)
45{
46 return false;
47}
48
49static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
50 struct cpuidle_driver *drv, int next_state)
51{
52 return -1;
53}
54
55static inline int cpuidle_coupled_register_device(struct cpuidle_device *dev)
56{
57 return 0;
58}
59
60static inline void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
61{
62}
63#endif
64
35#endif /* __DRIVER_CPUIDLE_H */ 65#endif /* __DRIVER_CPUIDLE_H */