aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2015-02-13 17:50:43 -0500
committerRafael J. Wysocki <rjw@rjwysocki.net>2015-02-15 13:40:09 -0500
commit124cf9117c5f93cc5b324530b7e105b09c729d5d (patch)
treee3416dc59e678015f41bd9b70dc1a8cc6145ca80
parent060407aed56c00960c9b5f70f5d19b2823adffd7 (diff)
PM / sleep: Make it possible to quiesce timers during suspend-to-idle
The efficiency of suspend-to-idle depends on being able to keep CPUs in the deepest available idle states for as much time as possible. Ideally, they should only be brought out of idle by system wakeup interrupts. However, timer interrupts occurring periodically prevent that from happening and it is not practical to chase all of the "misbehaving" timers in a whack-a-mole fashion. A much more effective approach is to suspend the local ticks for all CPUs and the entire timekeeping along the lines of what is done during full suspend, which also helps to keep suspend-to-idle and full suspend reasonably similar. The idea is to suspend the local tick on each CPU executing cpuidle_enter_freeze() and to make the last of them suspend the entire timekeeping. That should prevent timer interrupts from triggering until an IO interrupt wakes up one of the CPUs. It needs to be done with interrupts disabled on all of the CPUs, though, because otherwise the suspended clocksource might be accessed by an interrupt handler which might lead to fatal consequences. Unfortunately, the existing ->enter callbacks provided by cpuidle drivers generally cannot be used for implementing that, because some of them re-enable interrupts temporarily and some idle entry methods cause interrupts to be re-enabled automatically on exit. Also some of these callbacks manipulate local clock event devices of the CPUs which really shouldn't be done after suspending their ticks. To overcome that difficulty, introduce a new cpuidle state callback, ->enter_freeze, that will be guaranteed (1) to keep interrupts disabled all the time (and return with interrupts disabled) and (2) not to touch the CPU timer devices. Modify cpuidle_enter_freeze() to look for the deepest available idle state with ->enter_freeze present and to make the CPU execute that callback with suspended tick (and the last of the online CPUs to execute it with suspended timekeeping). Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-rw-r--r--drivers/cpuidle/cpuidle.c49
-rw-r--r--include/linux/cpuidle.h9
-rw-r--r--include/linux/tick.h6
-rw-r--r--kernel/time/tick-common.c50
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/time/timekeeping.h2
6 files changed, 112 insertions, 8 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 23a8d6cc8d30..4d534582514e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -20,6 +20,7 @@
20#include <linux/hrtimer.h> 20#include <linux/hrtimer.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/suspend.h> 22#include <linux/suspend.h>
23#include <linux/tick.h>
23#include <trace/events/power.h> 24#include <trace/events/power.h>
24 25
25#include "cpuidle.h" 26#include "cpuidle.h"
@@ -69,18 +70,20 @@ int cpuidle_play_dead(void)
69 * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. 70 * cpuidle_find_deepest_state - Find deepest state meeting specific conditions.
70 * @drv: cpuidle driver for the given CPU. 71 * @drv: cpuidle driver for the given CPU.
71 * @dev: cpuidle device for the given CPU. 72 * @dev: cpuidle device for the given CPU.
73 * @freeze: Whether or not the state should be suitable for suspend-to-idle.
72 */ 74 */
73static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, 75static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
74 struct cpuidle_device *dev) 76 struct cpuidle_device *dev, bool freeze)
75{ 77{
76 unsigned int latency_req = 0; 78 unsigned int latency_req = 0;
77 int i, ret = CPUIDLE_DRIVER_STATE_START - 1; 79 int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
78 80
79 for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { 81 for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
80 struct cpuidle_state *s = &drv->states[i]; 82 struct cpuidle_state *s = &drv->states[i];
81 struct cpuidle_state_usage *su = &dev->states_usage[i]; 83 struct cpuidle_state_usage *su = &dev->states_usage[i];
82 84
83 if (s->disabled || su->disable || s->exit_latency <= latency_req) 85 if (s->disabled || su->disable || s->exit_latency <= latency_req
86 || (freeze && !s->enter_freeze))
84 continue; 87 continue;
85 88
86 latency_req = s->exit_latency; 89 latency_req = s->exit_latency;
@@ -89,10 +92,31 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
89 return ret; 92 return ret;
90} 93}
91 94
95static void enter_freeze_proper(struct cpuidle_driver *drv,
96 struct cpuidle_device *dev, int index)
97{
98 tick_freeze();
99 /*
100 * The state used here cannot be a "coupled" one, because the "coupled"
101 * cpuidle mechanism enables interrupts and doing that with timekeeping
102 * suspended is generally unsafe.
103 */
104 drv->states[index].enter_freeze(dev, drv, index);
105 WARN_ON(!irqs_disabled());
106 /*
107 * timekeeping_resume() that will be called by tick_unfreeze() for the
108 * last CPU executing it calls functions containing RCU read-side
109 * critical sections, so tell RCU about that.
110 */
111 RCU_NONIDLE(tick_unfreeze());
112}
113
92/** 114/**
93 * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. 115 * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle.
94 * 116 *
95 * Find the deepest state available and enter it. 117 * If there are states with the ->enter_freeze callback, find the deepest of
118 * them and enter it with frozen tick. Otherwise, find the deepest state
119 * available and enter it normally.
96 */ 120 */
97void cpuidle_enter_freeze(void) 121void cpuidle_enter_freeze(void)
98{ 122{
@@ -100,7 +124,22 @@ void cpuidle_enter_freeze(void)
100 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 124 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
101 int index; 125 int index;
102 126
103 index = cpuidle_find_deepest_state(drv, dev); 127 /*
128 * Find the deepest state with ->enter_freeze present, which guarantees
129 * that interrupts won't be enabled when it exits and allows the tick to
130 * be frozen safely.
131 */
132 index = cpuidle_find_deepest_state(drv, dev, true);
133 if (index >= 0) {
134 enter_freeze_proper(drv, dev, index);
135 return;
136 }
137
138 /*
139 * It is not safe to freeze the tick, find the deepest state available
140 * at all and try to enter it normally.
141 */
142 index = cpuidle_find_deepest_state(drv, dev, false);
104 if (index >= 0) 143 if (index >= 0)
105 cpuidle_enter(drv, dev, index); 144 cpuidle_enter(drv, dev, index);
106 else 145 else
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index f63aabf4ee90..f551a9299ac9 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -50,6 +50,15 @@ struct cpuidle_state {
50 int index); 50 int index);
51 51
52 int (*enter_dead) (struct cpuidle_device *dev, int index); 52 int (*enter_dead) (struct cpuidle_device *dev, int index);
53
54 /*
55 * CPUs execute ->enter_freeze with the local tick or entire timekeeping
56 * suspended, so it must not re-enable interrupts at any point (even
57 * temporarily) or attempt to change states of clock event devices.
58 */
59 void (*enter_freeze) (struct cpuidle_device *dev,
60 struct cpuidle_driver *drv,
61 int index);
53}; 62};
54 63
55/* Idle State Flags */ 64/* Idle State Flags */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index eda850ca757a..9c085dc12ae9 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -79,6 +79,9 @@ extern void __init tick_init(void);
79extern int tick_is_oneshot_available(void); 79extern int tick_is_oneshot_available(void);
80extern struct tick_device *tick_get_device(int cpu); 80extern struct tick_device *tick_get_device(int cpu);
81 81
82extern void tick_freeze(void);
83extern void tick_unfreeze(void);
84
82# ifdef CONFIG_HIGH_RES_TIMERS 85# ifdef CONFIG_HIGH_RES_TIMERS
83extern int tick_init_highres(void); 86extern int tick_init_highres(void);
84extern int tick_program_event(ktime_t expires, int force); 87extern int tick_program_event(ktime_t expires, int force);
@@ -119,6 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
119 122
120#else /* CONFIG_GENERIC_CLOCKEVENTS */ 123#else /* CONFIG_GENERIC_CLOCKEVENTS */
121static inline void tick_init(void) { } 124static inline void tick_init(void) { }
125static inline void tick_freeze(void) { }
126static inline void tick_unfreeze(void) { }
122static inline void tick_cancel_sched_timer(int cpu) { } 127static inline void tick_cancel_sched_timer(int cpu) { }
123static inline void tick_clock_notify(void) { } 128static inline void tick_clock_notify(void) { }
124static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } 129static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
@@ -226,5 +231,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk)
226 __tick_nohz_task_switch(tsk); 231 __tick_nohz_task_switch(tsk);
227} 232}
228 233
229
230#endif 234#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 7efeedf53ebd..f7c515595b42 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -394,6 +394,56 @@ void tick_resume(void)
394 } 394 }
395} 395}
396 396
397static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
398static unsigned int tick_freeze_depth;
399
400/**
401 * tick_freeze - Suspend the local tick and (possibly) timekeeping.
402 *
403 * Check if this is the last online CPU executing the function and if so,
404 * suspend timekeeping. Otherwise suspend the local tick.
405 *
406 * Call with interrupts disabled. Must be balanced with %tick_unfreeze().
407 * Interrupts must not be enabled before the subsequent %tick_unfreeze().
408 */
409void tick_freeze(void)
410{
411 raw_spin_lock(&tick_freeze_lock);
412
413 tick_freeze_depth++;
414 if (tick_freeze_depth == num_online_cpus()) {
415 timekeeping_suspend();
416 } else {
417 tick_suspend();
418 tick_suspend_broadcast();
419 }
420
421 raw_spin_unlock(&tick_freeze_lock);
422}
423
424/**
425 * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
426 *
427 * Check if this is the first CPU executing the function and if so, resume
428 * timekeeping. Otherwise resume the local tick.
429 *
430 * Call with interrupts disabled. Must be balanced with %tick_freeze().
431 * Interrupts must not be enabled after the preceding %tick_freeze().
432 */
433void tick_unfreeze(void)
434{
435 raw_spin_lock(&tick_freeze_lock);
436
437 if (tick_freeze_depth == num_online_cpus())
438 timekeeping_resume();
439 else
440 tick_resume();
441
442 tick_freeze_depth--;
443
444 raw_spin_unlock(&tick_freeze_lock);
445}
446
397/** 447/**
398 * tick_init - initialize the tick control 448 * tick_init - initialize the tick control
399 */ 449 */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index aef5dc722abf..91db94136c10 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1197,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1197 * xtime/wall_to_monotonic/jiffies/etc are 1197 * xtime/wall_to_monotonic/jiffies/etc are
1198 * still managed by arch specific suspend/resume code. 1198 * still managed by arch specific suspend/resume code.
1199 */ 1199 */
1200static void timekeeping_resume(void) 1200void timekeeping_resume(void)
1201{ 1201{
1202 struct timekeeper *tk = &tk_core.timekeeper; 1202 struct timekeeper *tk = &tk_core.timekeeper;
1203 struct clocksource *clock = tk->tkr.clock; 1203 struct clocksource *clock = tk->tkr.clock;
@@ -1278,7 +1278,7 @@ static void timekeeping_resume(void)
1278 hrtimers_resume(); 1278 hrtimers_resume();
1279} 1279}
1280 1280
1281static int timekeeping_suspend(void) 1281int timekeeping_suspend(void)
1282{ 1282{
1283 struct timekeeper *tk = &tk_core.timekeeper; 1283 struct timekeeper *tk = &tk_core.timekeeper;
1284 unsigned long flags; 1284 unsigned long flags;
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index adc1fc98bde3..1d91416055d5 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts);
16extern s32 timekeeping_get_tai_offset(void); 16extern s32 timekeeping_get_tai_offset(void);
17extern void timekeeping_set_tai_offset(s32 tai_offset); 17extern void timekeeping_set_tai_offset(s32 tai_offset);
18extern void timekeeping_clocktai(struct timespec *ts); 18extern void timekeeping_clocktai(struct timespec *ts);
19extern int timekeeping_suspend(void);
20extern void timekeeping_resume(void);
19 21
20#endif 22#endif