aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 17:28:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 17:28:55 -0400
commit476525004ac7e2f990b6956efcd44d0780c2ab4c (patch)
tree158cd2bbfb232b4f4327b6c20a4e14c6b095a438 /drivers/cpuidle
parentbd22dc17e49973d3d4925970260e9e37f7580a9f (diff)
parentec033d0a02901551346b9f43f8ff9bad51378891 (diff)
Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull ACPI & power management update from Len Brown: "Re-write of the turbostat tool. lower overhead was necessary for measuring very large system when they are very idle. IVB support in intel_idle It's what I run on my IVB, others should be able to also:-) ACPICA core update We have found some bugs due to divergence between Linux and the upstream ACPICA base. Most of these patches are to reduce that divergence to reduce the risk of future bugs. Some cpuidle updates, mostly for non-Intel More will be coming, as they depend on this part. Some thermal management changes needed by non-ACPI systems. Some _OST (OS Status Indication) updates for hot ACPI hot-plug." * 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (51 commits) Thermal: Documentation update Thermal: Add Hysteresis attributes Thermal: Make Thermal trip points writeable ACPI/AC: prevent OOPS on some boxes due to missing check power_supply_register() return value check tools/power: turbostat: fix large c1% issue tools/power: turbostat v2 - re-write for efficiency ACPICA: Update to version 20120711 ACPICA: AcpiSrc: Fix some translation issues for Linux conversion ACPICA: Update header files copyrights to 2012 ACPICA: Add new ACPI table load/unload external interfaces ACPICA: Split file: tbxface.c -> tbxfload.c ACPICA: Add PCC address space to space ID decode function ACPICA: Fix some comment fields ACPICA: Table manager: deploy new firmware error/warning interfaces ACPICA: Add new interfaces for BIOS(firmware) errors and warnings ACPICA: Split exception code utilities to a new file, utexcep.c ACPI: acpi_pad: tune round_robin_time ACPICA: Update to version 20120620 ACPICA: Add support for implicit notify on multiple devices ACPICA: Update comments; no functional change ...
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig3
-rw-r--r--drivers/cpuidle/Makefile1
-rw-r--r--drivers/cpuidle/coupled.c715
-rw-r--r--drivers/cpuidle/cpuidle.c85
-rw-r--r--drivers/cpuidle/cpuidle.h32
5 files changed, 808 insertions, 28 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 78a666d1e5f5..a76b689e553b 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -18,3 +18,6 @@ config CPU_IDLE_GOV_MENU
18 bool 18 bool
19 depends on CPU_IDLE && NO_HZ 19 depends on CPU_IDLE && NO_HZ
20 default y 20 default y
21
22config ARCH_NEEDS_CPU_IDLE_COUPLED
23 def_bool n
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 5634f88379df..38c8f69f30cf 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -3,3 +3,4 @@
3# 3#
4 4
5obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ 5obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
6obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
new file mode 100644
index 000000000000..2c9bf2692232
--- /dev/null
+++ b/drivers/cpuidle/coupled.c
@@ -0,0 +1,715 @@
1/*
2 * coupled.c - helper functions to enter the same idle state on multiple cpus
3 *
4 * Copyright (c) 2011 Google, Inc.
5 *
6 * Author: Colin Cross <ccross@android.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 */
18
19#include <linux/kernel.h>
20#include <linux/cpu.h>
21#include <linux/cpuidle.h>
22#include <linux/mutex.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/spinlock.h>
26
27#include "cpuidle.h"
28
29/**
30 * DOC: Coupled cpuidle states
31 *
32 * On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the
33 * cpus cannot be independently powered down, either due to
34 * sequencing restrictions (on Tegra 2, cpu 0 must be the last to
35 * power down), or due to HW bugs (on OMAP4460, a cpu powering up
36 * will corrupt the gic state unless the other cpu runs a work
37 * around). Each cpu has a power state that it can enter without
38 * coordinating with the other cpu (usually Wait For Interrupt, or
39 * WFI), and one or more "coupled" power states that affect blocks
40 * shared between the cpus (L2 cache, interrupt controller, and
41 * sometimes the whole SoC). Entering a coupled power state must
42 * be tightly controlled on both cpus.
43 *
44 * This file implements a solution, where each cpu will wait in the
45 * WFI state until all cpus are ready to enter a coupled state, at
46 * which point the coupled state function will be called on all
47 * cpus at approximately the same time.
48 *
49 * Once all cpus are ready to enter idle, they are woken by an smp
50 * cross call. At this point, there is a chance that one of the
51 * cpus will find work to do, and choose not to enter idle. A
52 * final pass is needed to guarantee that all cpus will call the
53 * power state enter function at the same time. During this pass,
54 * each cpu will increment the ready counter, and continue once the
55 * ready counter matches the number of online coupled cpus. If any
56 * cpu exits idle, the other cpus will decrement their counter and
57 * retry.
58 *
59 * requested_state stores the deepest coupled idle state each cpu
60 * is ready for. It is assumed that the states are indexed from
61 * shallowest (highest power, lowest exit latency) to deepest
62 * (lowest power, highest exit latency). The requested_state
63 * variable is not locked. It is only written from the cpu that
64 * it stores (or by the on/offlining cpu if that cpu is offline),
65 * and only read after all the cpus are ready for the coupled idle
66 * state are are no longer updating it.
67 *
68 * Three atomic counters are used. alive_count tracks the number
69 * of cpus in the coupled set that are currently or soon will be
70 * online. waiting_count tracks the number of cpus that are in
71 * the waiting loop, in the ready loop, or in the coupled idle state.
72 * ready_count tracks the number of cpus that are in the ready loop
73 * or in the coupled idle state.
74 *
75 * To use coupled cpuidle states, a cpuidle driver must:
76 *
77 * Set struct cpuidle_device.coupled_cpus to the mask of all
78 * coupled cpus, usually the same as cpu_possible_mask if all cpus
79 * are part of the same cluster. The coupled_cpus mask must be
80 * set in the struct cpuidle_device for each cpu.
81 *
82 * Set struct cpuidle_device.safe_state to a state that is not a
83 * coupled state. This is usually WFI.
84 *
85 * Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
86 * state that affects multiple cpus.
87 *
88 * Provide a struct cpuidle_state.enter function for each state
89 * that affects multiple cpus. This function is guaranteed to be
90 * called on all cpus at approximately the same time. The driver
91 * should ensure that the cpus all abort together if any cpu tries
92 * to abort once the function is called. The function should return
93 * with interrupts still disabled.
94 */
95
96/**
97 * struct cpuidle_coupled - data for set of cpus that share a coupled idle state
98 * @coupled_cpus: mask of cpus that are part of the coupled set
99 * @requested_state: array of requested states for cpus in the coupled set
100 * @ready_waiting_counts: combined count of cpus in ready or waiting loops
101 * @online_count: count of cpus that are online
102 * @refcnt: reference count of cpuidle devices that are using this struct
103 * @prevent: flag to prevent coupled idle while a cpu is hotplugging
104 */
105struct cpuidle_coupled {
106 cpumask_t coupled_cpus;
107 int requested_state[NR_CPUS];
108 atomic_t ready_waiting_counts;
109 int online_count;
110 int refcnt;
111 int prevent;
112};
113
114#define WAITING_BITS 16
115#define MAX_WAITING_CPUS (1 << WAITING_BITS)
116#define WAITING_MASK (MAX_WAITING_CPUS - 1)
117#define READY_MASK (~WAITING_MASK)
118
119#define CPUIDLE_COUPLED_NOT_IDLE (-1)
120
121static DEFINE_MUTEX(cpuidle_coupled_lock);
122static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
123
124/*
125 * The cpuidle_coupled_poked_mask mask is used to avoid calling
126 * __smp_call_function_single with the per cpu call_single_data struct already
127 * in use. This prevents a deadlock where two cpus are waiting for each others
128 * call_single_data struct to be available
129 */
130static cpumask_t cpuidle_coupled_poked_mask;
131
132/**
133 * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
134 * @dev: cpuidle_device of the calling cpu
135 * @a: atomic variable to hold the barrier
136 *
137 * No caller to this function will return from this function until all online
138 * cpus in the same coupled group have called this function. Once any caller
139 * has returned from this function, the barrier is immediately available for
140 * reuse.
141 *
142 * The atomic variable a must be initialized to 0 before any cpu calls
143 * this function, will be reset to 0 before any cpu returns from this function.
144 *
145 * Must only be called from within a coupled idle state handler
146 * (state.enter when state.flags has CPUIDLE_FLAG_COUPLED set).
147 *
148 * Provides full smp barrier semantics before and after calling.
149 */
150void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
151{
152 int n = dev->coupled->online_count;
153
154 smp_mb__before_atomic_inc();
155 atomic_inc(a);
156
157 while (atomic_read(a) < n)
158 cpu_relax();
159
160 if (atomic_inc_return(a) == n * 2) {
161 atomic_set(a, 0);
162 return;
163 }
164
165 while (atomic_read(a) > n)
166 cpu_relax();
167}
168
169/**
170 * cpuidle_state_is_coupled - check if a state is part of a coupled set
171 * @dev: struct cpuidle_device for the current cpu
172 * @drv: struct cpuidle_driver for the platform
173 * @state: index of the target state in drv->states
174 *
175 * Returns true if the target state is coupled with cpus besides this one
176 */
177bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
178 struct cpuidle_driver *drv, int state)
179{
180 return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
181}
182
183/**
184 * cpuidle_coupled_set_ready - mark a cpu as ready
185 * @coupled: the struct coupled that contains the current cpu
186 */
187static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled)
188{
189 atomic_add(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
190}
191
192/**
193 * cpuidle_coupled_set_not_ready - mark a cpu as not ready
194 * @coupled: the struct coupled that contains the current cpu
195 *
196 * Decrements the ready counter, unless the ready (and thus the waiting) counter
197 * is equal to the number of online cpus. Prevents a race where one cpu
198 * decrements the waiting counter and then re-increments it just before another
199 * cpu has decremented its ready counter, leading to the ready counter going
200 * down from the number of online cpus without going through the coupled idle
201 * state.
202 *
203 * Returns 0 if the counter was decremented successfully, -EINVAL if the ready
204 * counter was equal to the number of online cpus.
205 */
206static
207inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled)
208{
209 int all;
210 int ret;
211
212 all = coupled->online_count || (coupled->online_count << WAITING_BITS);
213 ret = atomic_add_unless(&coupled->ready_waiting_counts,
214 -MAX_WAITING_CPUS, all);
215
216 return ret ? 0 : -EINVAL;
217}
218
219/**
220 * cpuidle_coupled_no_cpus_ready - check if no cpus in a coupled set are ready
221 * @coupled: the struct coupled that contains the current cpu
222 *
223 * Returns true if all of the cpus in a coupled set are out of the ready loop.
224 */
225static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled)
226{
227 int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
228 return r == 0;
229}
230
231/**
232 * cpuidle_coupled_cpus_ready - check if all cpus in a coupled set are ready
233 * @coupled: the struct coupled that contains the current cpu
234 *
235 * Returns true if all cpus coupled to this target state are in the ready loop
236 */
237static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled)
238{
239 int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
240 return r == coupled->online_count;
241}
242
243/**
244 * cpuidle_coupled_cpus_waiting - check if all cpus in a coupled set are waiting
245 * @coupled: the struct coupled that contains the current cpu
246 *
247 * Returns true if all cpus coupled to this target state are in the wait loop
248 */
249static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled)
250{
251 int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
252 return w == coupled->online_count;
253}
254
255/**
256 * cpuidle_coupled_no_cpus_waiting - check if no cpus in coupled set are waiting
257 * @coupled: the struct coupled that contains the current cpu
258 *
259 * Returns true if all of the cpus in a coupled set are out of the waiting loop.
260 */
261static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled)
262{
263 int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
264 return w == 0;
265}
266
267/**
268 * cpuidle_coupled_get_state - determine the deepest idle state
269 * @dev: struct cpuidle_device for this cpu
270 * @coupled: the struct coupled that contains the current cpu
271 *
272 * Returns the deepest idle state that all coupled cpus can enter
273 */
274static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
275 struct cpuidle_coupled *coupled)
276{
277 int i;
278 int state = INT_MAX;
279
280 /*
281 * Read barrier ensures that read of requested_state is ordered after
282 * reads of ready_count. Matches the write barriers
283 * cpuidle_set_state_waiting.
284 */
285 smp_rmb();
286
287 for_each_cpu_mask(i, coupled->coupled_cpus)
288 if (cpu_online(i) && coupled->requested_state[i] < state)
289 state = coupled->requested_state[i];
290
291 return state;
292}
293
294static void cpuidle_coupled_poked(void *info)
295{
296 int cpu = (unsigned long)info;
297 cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
298}
299
300/**
301 * cpuidle_coupled_poke - wake up a cpu that may be waiting
302 * @cpu: target cpu
303 *
304 * Ensures that the target cpu exits it's waiting idle state (if it is in it)
305 * and will see updates to waiting_count before it re-enters it's waiting idle
306 * state.
307 *
308 * If cpuidle_coupled_poked_mask is already set for the target cpu, that cpu
309 * either has or will soon have a pending IPI that will wake it out of idle,
310 * or it is currently processing the IPI and is not in idle.
311 */
312static void cpuidle_coupled_poke(int cpu)
313{
314 struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
315
316 if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
317 __smp_call_function_single(cpu, csd, 0);
318}
319
320/**
321 * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting
322 * @dev: struct cpuidle_device for this cpu
323 * @coupled: the struct coupled that contains the current cpu
324 *
325 * Calls cpuidle_coupled_poke on all other online cpus.
326 */
327static void cpuidle_coupled_poke_others(int this_cpu,
328 struct cpuidle_coupled *coupled)
329{
330 int cpu;
331
332 for_each_cpu_mask(cpu, coupled->coupled_cpus)
333 if (cpu != this_cpu && cpu_online(cpu))
334 cpuidle_coupled_poke(cpu);
335}
336
337/**
338 * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop
339 * @dev: struct cpuidle_device for this cpu
340 * @coupled: the struct coupled that contains the current cpu
341 * @next_state: the index in drv->states of the requested state for this cpu
342 *
343 * Updates the requested idle state for the specified cpuidle device,
344 * poking all coupled cpus out of idle if necessary to let them see the new
345 * state.
346 */
347static void cpuidle_coupled_set_waiting(int cpu,
348 struct cpuidle_coupled *coupled, int next_state)
349{
350 int w;
351
352 coupled->requested_state[cpu] = next_state;
353
354 /*
355 * If this is the last cpu to enter the waiting state, poke
356 * all the other cpus out of their waiting state so they can
357 * enter a deeper state. This can race with one of the cpus
358 * exiting the waiting state due to an interrupt and
359 * decrementing waiting_count, see comment below.
360 *
361 * The atomic_inc_return provides a write barrier to order the write
362 * to requested_state with the later write that increments ready_count.
363 */
364 w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
365 if (w == coupled->online_count)
366 cpuidle_coupled_poke_others(cpu, coupled);
367}
368
369/**
370 * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop
371 * @dev: struct cpuidle_device for this cpu
372 * @coupled: the struct coupled that contains the current cpu
373 *
374 * Removes the requested idle state for the specified cpuidle device.
375 */
376static void cpuidle_coupled_set_not_waiting(int cpu,
377 struct cpuidle_coupled *coupled)
378{
379 /*
380 * Decrementing waiting count can race with incrementing it in
381 * cpuidle_coupled_set_waiting, but that's OK. Worst case, some
382 * cpus will increment ready_count and then spin until they
383 * notice that this cpu has cleared it's requested_state.
384 */
385 atomic_dec(&coupled->ready_waiting_counts);
386
387 coupled->requested_state[cpu] = CPUIDLE_COUPLED_NOT_IDLE;
388}
389
390/**
391 * cpuidle_coupled_set_done - mark this cpu as leaving the ready loop
392 * @cpu: the current cpu
393 * @coupled: the struct coupled that contains the current cpu
394 *
395 * Marks this cpu as no longer in the ready and waiting loops. Decrements
396 * the waiting count first to prevent another cpu looping back in and seeing
397 * this cpu as waiting just before it exits idle.
398 */
399static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
400{
401 cpuidle_coupled_set_not_waiting(cpu, coupled);
402 atomic_sub(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
403}
404
405/**
406 * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed
407 * @cpu - this cpu
408 *
409 * Turns on interrupts and spins until any outstanding poke interrupts have
410 * been processed and the poke bit has been cleared.
411 *
412 * Other interrupts may also be processed while interrupts are enabled, so
413 * need_resched() must be tested after turning interrupts off again to make sure
414 * the interrupt didn't schedule work that should take the cpu out of idle.
415 *
416 * Returns 0 if need_resched was false, -EINTR if need_resched was true.
417 */
418static int cpuidle_coupled_clear_pokes(int cpu)
419{
420 local_irq_enable();
421 while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
422 cpu_relax();
423 local_irq_disable();
424
425 return need_resched() ? -EINTR : 0;
426}
427
428/**
429 * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus
430 * @dev: struct cpuidle_device for the current cpu
431 * @drv: struct cpuidle_driver for the platform
432 * @next_state: index of the requested state in drv->states
433 *
434 * Coordinate with coupled cpus to enter the target state. This is a two
435 * stage process. In the first stage, the cpus are operating independently,
436 * and may call into cpuidle_enter_state_coupled at completely different times.
437 * To save as much power as possible, the first cpus to call this function will
438 * go to an intermediate state (the cpuidle_device's safe state), and wait for
439 * all the other cpus to call this function. Once all coupled cpus are idle,
440 * the second stage will start. Each coupled cpu will spin until all cpus have
441 * guaranteed that they will call the target_state.
442 *
443 * This function must be called with interrupts disabled. It may enable
444 * interrupts while preparing for idle, and it will always return with
445 * interrupts enabled.
446 */
447int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
448 struct cpuidle_driver *drv, int next_state)
449{
450 int entered_state = -1;
451 struct cpuidle_coupled *coupled = dev->coupled;
452
453 if (!coupled)
454 return -EINVAL;
455
456 while (coupled->prevent) {
457 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
458 local_irq_enable();
459 return entered_state;
460 }
461 entered_state = cpuidle_enter_state(dev, drv,
462 dev->safe_state_index);
463 }
464
465 /* Read barrier ensures online_count is read after prevent is cleared */
466 smp_rmb();
467
468 cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
469
470retry:
471 /*
472 * Wait for all coupled cpus to be idle, using the deepest state
473 * allowed for a single cpu.
474 */
475 while (!cpuidle_coupled_cpus_waiting(coupled)) {
476 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
477 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
478 goto out;
479 }
480
481 if (coupled->prevent) {
482 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
483 goto out;
484 }
485
486 entered_state = cpuidle_enter_state(dev, drv,
487 dev->safe_state_index);
488 }
489
490 if (cpuidle_coupled_clear_pokes(dev->cpu)) {
491 cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
492 goto out;
493 }
494
495 /*
496 * All coupled cpus are probably idle. There is a small chance that
497 * one of the other cpus just became active. Increment the ready count,
498 * and spin until all coupled cpus have incremented the counter. Once a
499 * cpu has incremented the ready counter, it cannot abort idle and must
500 * spin until either all cpus have incremented the ready counter, or
501 * another cpu leaves idle and decrements the waiting counter.
502 */
503
504 cpuidle_coupled_set_ready(coupled);
505 while (!cpuidle_coupled_cpus_ready(coupled)) {
506 /* Check if any other cpus bailed out of idle. */
507 if (!cpuidle_coupled_cpus_waiting(coupled))
508 if (!cpuidle_coupled_set_not_ready(coupled))
509 goto retry;
510
511 cpu_relax();
512 }
513
514 /* all cpus have acked the coupled state */
515 next_state = cpuidle_coupled_get_state(dev, coupled);
516
517 entered_state = cpuidle_enter_state(dev, drv, next_state);
518
519 cpuidle_coupled_set_done(dev->cpu, coupled);
520
521out:
522 /*
523 * Normal cpuidle states are expected to return with irqs enabled.
524 * That leads to an inefficiency where a cpu receiving an interrupt
525 * that brings it out of idle will process that interrupt before
526 * exiting the idle enter function and decrementing ready_count. All
527 * other cpus will need to spin waiting for the cpu that is processing
528 * the interrupt. If the driver returns with interrupts disabled,
529 * all other cpus will loop back into the safe idle state instead of
530 * spinning, saving power.
531 *
532 * Calling local_irq_enable here allows coupled states to return with
533 * interrupts disabled, but won't cause problems for drivers that
534 * exit with interrupts enabled.
535 */
536 local_irq_enable();
537
538 /*
539 * Wait until all coupled cpus have exited idle. There is no risk that
540 * a cpu exits and re-enters the ready state because this cpu has
541 * already decremented its waiting_count.
542 */
543 while (!cpuidle_coupled_no_cpus_ready(coupled))
544 cpu_relax();
545
546 return entered_state;
547}
548
549static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled)
550{
551 cpumask_t cpus;
552 cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
553 coupled->online_count = cpumask_weight(&cpus);
554}
555
556/**
557 * cpuidle_coupled_register_device - register a coupled cpuidle device
558 * @dev: struct cpuidle_device for the current cpu
559 *
560 * Called from cpuidle_register_device to handle coupled idle init. Finds the
561 * cpuidle_coupled struct for this set of coupled cpus, or creates one if none
562 * exists yet.
563 */
564int cpuidle_coupled_register_device(struct cpuidle_device *dev)
565{
566 int cpu;
567 struct cpuidle_device *other_dev;
568 struct call_single_data *csd;
569 struct cpuidle_coupled *coupled;
570
571 if (cpumask_empty(&dev->coupled_cpus))
572 return 0;
573
574 for_each_cpu_mask(cpu, dev->coupled_cpus) {
575 other_dev = per_cpu(cpuidle_devices, cpu);
576 if (other_dev && other_dev->coupled) {
577 coupled = other_dev->coupled;
578 goto have_coupled;
579 }
580 }
581
582 /* No existing coupled info found, create a new one */
583 coupled = kzalloc(sizeof(struct cpuidle_coupled), GFP_KERNEL);
584 if (!coupled)
585 return -ENOMEM;
586
587 coupled->coupled_cpus = dev->coupled_cpus;
588
589have_coupled:
590 dev->coupled = coupled;
591 if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus)))
592 coupled->prevent++;
593
594 cpuidle_coupled_update_online_cpus(coupled);
595
596 coupled->refcnt++;
597
598 csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
599 csd->func = cpuidle_coupled_poked;
600 csd->info = (void *)(unsigned long)dev->cpu;
601
602 return 0;
603}
604
605/**
606 * cpuidle_coupled_unregister_device - unregister a coupled cpuidle device
607 * @dev: struct cpuidle_device for the current cpu
608 *
609 * Called from cpuidle_unregister_device to tear down coupled idle. Removes the
610 * cpu from the coupled idle set, and frees the cpuidle_coupled_info struct if
611 * this was the last cpu in the set.
612 */
613void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
614{
615 struct cpuidle_coupled *coupled = dev->coupled;
616
617 if (cpumask_empty(&dev->coupled_cpus))
618 return;
619
620 if (--coupled->refcnt)
621 kfree(coupled);
622 dev->coupled = NULL;
623}
624
625/**
626 * cpuidle_coupled_prevent_idle - prevent cpus from entering a coupled state
627 * @coupled: the struct coupled that contains the cpu that is changing state
628 *
629 * Disables coupled cpuidle on a coupled set of cpus. Used to ensure that
630 * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
631 */
632static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled)
633{
634 int cpu = get_cpu();
635
636 /* Force all cpus out of the waiting loop. */
637 coupled->prevent++;
638 cpuidle_coupled_poke_others(cpu, coupled);
639 put_cpu();
640 while (!cpuidle_coupled_no_cpus_waiting(coupled))
641 cpu_relax();
642}
643
644/**
645 * cpuidle_coupled_allow_idle - allows cpus to enter a coupled state
646 * @coupled: the struct coupled that contains the cpu that is changing state
647 *
648 * Enables coupled cpuidle on a coupled set of cpus. Used to ensure that
649 * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
650 */
651static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled)
652{
653 int cpu = get_cpu();
654
655 /*
656 * Write barrier ensures readers see the new online_count when they
657 * see prevent == 0.
658 */
659 smp_wmb();
660 coupled->prevent--;
661 /* Force cpus out of the prevent loop. */
662 cpuidle_coupled_poke_others(cpu, coupled);
663 put_cpu();
664}
665
666/**
667 * cpuidle_coupled_cpu_notify - notifier called during hotplug transitions
668 * @nb: notifier block
669 * @action: hotplug transition
670 * @hcpu: target cpu number
671 *
672 * Called when a cpu is brought on or offline using hotplug. Updates the
673 * coupled cpu set appropriately
674 */
675static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
676 unsigned long action, void *hcpu)
677{
678 int cpu = (unsigned long)hcpu;
679 struct cpuidle_device *dev;
680
681 mutex_lock(&cpuidle_lock);
682
683 dev = per_cpu(cpuidle_devices, cpu);
684 if (!dev->coupled)
685 goto out;
686
687 switch (action & ~CPU_TASKS_FROZEN) {
688 case CPU_UP_PREPARE:
689 case CPU_DOWN_PREPARE:
690 cpuidle_coupled_prevent_idle(dev->coupled);
691 break;
692 case CPU_ONLINE:
693 case CPU_DEAD:
694 cpuidle_coupled_update_online_cpus(dev->coupled);
695 /* Fall through */
696 case CPU_UP_CANCELED:
697 case CPU_DOWN_FAILED:
698 cpuidle_coupled_allow_idle(dev->coupled);
699 break;
700 }
701
702out:
703 mutex_unlock(&cpuidle_lock);
704 return NOTIFY_OK;
705}
706
707static struct notifier_block cpuidle_coupled_cpu_notifier = {
708 .notifier_call = cpuidle_coupled_cpu_notify,
709};
710
711static int __init cpuidle_coupled_init(void)
712{
713 return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
714}
715core_initcall(cpuidle_coupled_init);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d6a533e68e0f..e28f6ea46f1a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -92,6 +92,34 @@ int cpuidle_play_dead(void)
92} 92}
93 93
94/** 94/**
95 * cpuidle_enter_state - enter the state and update stats
96 * @dev: cpuidle device for this cpu
97 * @drv: cpuidle driver for this cpu
98 * @next_state: index into drv->states of the state to enter
99 */
100int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
101 int next_state)
102{
103 int entered_state;
104
105 entered_state = cpuidle_enter_ops(dev, drv, next_state);
106
107 if (entered_state >= 0) {
108 /* Update cpuidle counters */
109 /* This can be moved to within driver enter routine
110 * but that results in multiple copies of same code.
111 */
112 dev->states_usage[entered_state].time +=
113 (unsigned long long)dev->last_residency;
114 dev->states_usage[entered_state].usage++;
115 } else {
116 dev->last_residency = 0;
117 }
118
119 return entered_state;
120}
121
122/**
95 * cpuidle_idle_call - the main idle loop 123 * cpuidle_idle_call - the main idle loop
96 * 124 *
97 * NOTE: no locks or semaphores should be used here 125 * NOTE: no locks or semaphores should be used here
@@ -113,15 +141,6 @@ int cpuidle_idle_call(void)
113 if (!dev || !dev->enabled) 141 if (!dev || !dev->enabled)
114 return -EBUSY; 142 return -EBUSY;
115 143
116#if 0
117 /* shows regressions, re-enable for 2.6.29 */
118 /*
119 * run any timers that can be run now, at this point
120 * before calculating the idle duration etc.
121 */
122 hrtimer_peek_ahead_timers();
123#endif
124
125 /* ask the governor for the next state */ 144 /* ask the governor for the next state */
126 next_state = cpuidle_curr_governor->select(drv, dev); 145 next_state = cpuidle_curr_governor->select(drv, dev);
127 if (need_resched()) { 146 if (need_resched()) {
@@ -132,23 +151,15 @@ int cpuidle_idle_call(void)
132 trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); 151 trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
133 trace_cpu_idle_rcuidle(next_state, dev->cpu); 152 trace_cpu_idle_rcuidle(next_state, dev->cpu);
134 153
135 entered_state = cpuidle_enter_ops(dev, drv, next_state); 154 if (cpuidle_state_is_coupled(dev, drv, next_state))
155 entered_state = cpuidle_enter_state_coupled(dev, drv,
156 next_state);
157 else
158 entered_state = cpuidle_enter_state(dev, drv, next_state);
136 159
137 trace_power_end_rcuidle(dev->cpu); 160 trace_power_end_rcuidle(dev->cpu);
138 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); 161 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
139 162
140 if (entered_state >= 0) {
141 /* Update cpuidle counters */
142 /* This can be moved to within driver enter routine
143 * but that results in multiple copies of same code.
144 */
145 dev->states_usage[entered_state].time +=
146 (unsigned long long)dev->last_residency;
147 dev->states_usage[entered_state].usage++;
148 } else {
149 dev->last_residency = 0;
150 }
151
152 /* give the governor an opportunity to reflect on the outcome */ 163 /* give the governor an opportunity to reflect on the outcome */
153 if (cpuidle_curr_governor->reflect) 164 if (cpuidle_curr_governor->reflect)
154 cpuidle_curr_governor->reflect(dev, entered_state); 165 cpuidle_curr_governor->reflect(dev, entered_state);
@@ -299,6 +310,9 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
299 int ret, i; 310 int ret, i;
300 struct cpuidle_driver *drv = cpuidle_get_driver(); 311 struct cpuidle_driver *drv = cpuidle_get_driver();
301 312
313 if (!dev)
314 return -EINVAL;
315
302 if (dev->enabled) 316 if (dev->enabled)
303 return 0; 317 return 0;
304 if (!drv || !cpuidle_curr_governor) 318 if (!drv || !cpuidle_curr_governor)
@@ -383,8 +397,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
383 struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); 397 struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
384 struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); 398 struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
385 399
386 if (!dev)
387 return -EINVAL;
388 if (!try_module_get(cpuidle_driver->owner)) 400 if (!try_module_get(cpuidle_driver->owner))
389 return -EINVAL; 401 return -EINVAL;
390 402
@@ -392,13 +404,25 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
392 404
393 per_cpu(cpuidle_devices, dev->cpu) = dev; 405 per_cpu(cpuidle_devices, dev->cpu) = dev;
394 list_add(&dev->device_list, &cpuidle_detected_devices); 406 list_add(&dev->device_list, &cpuidle_detected_devices);
395 if ((ret = cpuidle_add_sysfs(cpu_dev))) { 407 ret = cpuidle_add_sysfs(cpu_dev);
396 module_put(cpuidle_driver->owner); 408 if (ret)
397 return ret; 409 goto err_sysfs;
398 } 410
411 ret = cpuidle_coupled_register_device(dev);
412 if (ret)
413 goto err_coupled;
399 414
400 dev->registered = 1; 415 dev->registered = 1;
401 return 0; 416 return 0;
417
418err_coupled:
419 cpuidle_remove_sysfs(cpu_dev);
420 wait_for_completion(&dev->kobj_unregister);
421err_sysfs:
422 list_del(&dev->device_list);
423 per_cpu(cpuidle_devices, dev->cpu) = NULL;
424 module_put(cpuidle_driver->owner);
425 return ret;
402} 426}
403 427
404/** 428/**
@@ -409,6 +433,9 @@ int cpuidle_register_device(struct cpuidle_device *dev)
409{ 433{
410 int ret; 434 int ret;
411 435
436 if (!dev)
437 return -EINVAL;
438
412 mutex_lock(&cpuidle_lock); 439 mutex_lock(&cpuidle_lock);
413 440
414 if ((ret = __cpuidle_register_device(dev))) { 441 if ((ret = __cpuidle_register_device(dev))) {
@@ -448,6 +475,8 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
448 wait_for_completion(&dev->kobj_unregister); 475 wait_for_completion(&dev->kobj_unregister);
449 per_cpu(cpuidle_devices, dev->cpu) = NULL; 476 per_cpu(cpuidle_devices, dev->cpu) = NULL;
450 477
478 cpuidle_coupled_unregister_device(dev);
479
451 cpuidle_resume_and_unlock(); 480 cpuidle_resume_and_unlock();
452 481
453 module_put(cpuidle_driver->owner); 482 module_put(cpuidle_driver->owner);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 7db186685c27..76e7f696ad8c 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -14,6 +14,8 @@ extern struct list_head cpuidle_detected_devices;
14extern struct mutex cpuidle_lock; 14extern struct mutex cpuidle_lock;
15extern spinlock_t cpuidle_driver_lock; 15extern spinlock_t cpuidle_driver_lock;
16extern int cpuidle_disabled(void); 16extern int cpuidle_disabled(void);
17extern int cpuidle_enter_state(struct cpuidle_device *dev,
18 struct cpuidle_driver *drv, int next_state);
17 19
18/* idle loop */ 20/* idle loop */
19extern void cpuidle_install_idle_handler(void); 21extern void cpuidle_install_idle_handler(void);
@@ -30,4 +32,34 @@ extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
30extern int cpuidle_add_sysfs(struct device *dev); 32extern int cpuidle_add_sysfs(struct device *dev);
31extern void cpuidle_remove_sysfs(struct device *dev); 33extern void cpuidle_remove_sysfs(struct device *dev);
32 34
35#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
36bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
37 struct cpuidle_driver *drv, int state);
38int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
39 struct cpuidle_driver *drv, int next_state);
40int cpuidle_coupled_register_device(struct cpuidle_device *dev);
41void cpuidle_coupled_unregister_device(struct cpuidle_device *dev);
42#else
43static inline bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
44 struct cpuidle_driver *drv, int state)
45{
46 return false;
47}
48
49static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
50 struct cpuidle_driver *drv, int next_state)
51{
52 return -1;
53}
54
55static inline int cpuidle_coupled_register_device(struct cpuidle_device *dev)
56{
57 return 0;
58}
59
60static inline void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
61{
62}
63#endif
64
33#endif /* __DRIVER_CPUIDLE_H */ 65#endif /* __DRIVER_CPUIDLE_H */