aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-05-20 07:22:15 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-05-20 07:22:15 -0400
commit55cc33ceb75643d190ed215f423972e0b7ae7aeb (patch)
treee9573d995bfae0f81ce800433b424bb998f90eca
parent1f0b63866fc1be700260547be8edf8e6f0af37f2 (diff)
parentf71495f3f0c5f0801823d1235b271a4a415d3df8 (diff)
Merge branch 'pm-sleep' into acpi-pm
-rw-r--r--Documentation/power/devices.txt34
-rw-r--r--Documentation/power/runtime_pm.txt17
-rw-r--r--Documentation/power/swsusp.txt5
-rw-r--r--drivers/base/power/main.c66
-rw-r--r--drivers/cpuidle/cpuidle.c55
-rw-r--r--drivers/cpuidle/governors/menu.c17
-rw-r--r--include/linux/cpuidle.h7
-rw-r--r--include/linux/pm.h36
-rw-r--r--include/linux/pm_runtime.h6
-rw-r--r--kernel/power/hibernate.c27
-rw-r--r--kernel/power/suspend.c2
-rw-r--r--kernel/sched/idle.c20
12 files changed, 216 insertions, 76 deletions
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 47d46dff70f7..d172bce0fd49 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -2,6 +2,7 @@ Device Power Management
2 2
3Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu> 4Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
5Copyright (c) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
5 6
6 7
7Most of the code in Linux is device drivers, so most of the Linux power 8Most of the code in Linux is device drivers, so most of the Linux power
@@ -326,6 +327,20 @@ the phases are:
326 driver in some way for the upcoming system power transition, but it 327 driver in some way for the upcoming system power transition, but it
327 should not put the device into a low-power state. 328 should not put the device into a low-power state.
328 329
330 For devices supporting runtime power management, the return value of the
331 prepare callback can be used to indicate to the PM core that it may
332 safely leave the device in runtime suspend (if runtime-suspended
333 already), provided that all of the device's descendants are also left in
334 runtime suspend. Namely, if the prepare callback returns a positive
335 number and that happens for all of the descendants of the device too,
336 and all of them (including the device itself) are runtime-suspended, the
337 PM core will skip the suspend, suspend_late and suspend_noirq suspend
338 phases as well as the resume_noirq, resume_early and resume phases of
339 the following system resume for all of these devices. In that case,
340 the complete callback will be called directly after the prepare callback
341 and is entirely responsible for bringing the device back to the
342 functional state as appropriate.
343
329 2. The suspend methods should quiesce the device to stop it from performing 344 2. The suspend methods should quiesce the device to stop it from performing
330 I/O. They also may save the device registers and put it into the 345 I/O. They also may save the device registers and put it into the
331 appropriate low-power state, depending on the bus type the device is on, 346 appropriate low-power state, depending on the bus type the device is on,
@@ -400,12 +415,23 @@ When resuming from freeze, standby or memory sleep, the phases are:
400 the resume callbacks occur; it's not necessary to wait until the 415 the resume callbacks occur; it's not necessary to wait until the
401 complete phase. 416 complete phase.
402 417
418 Moreover, if the preceding prepare callback returned a positive number,
419 the device may have been left in runtime suspend throughout the whole
420 system suspend and resume (the suspend, suspend_late, suspend_noirq
421 phases of system suspend and the resume_noirq, resume_early, resume
422 phases of system resume may have been skipped for it). In that case,
423 the complete callback is entirely responsible for bringing the device
424 back to the functional state after system suspend if necessary. [For
425 example, it may need to queue up a runtime resume request for the device
426 for this purpose.] To check if that is the case, the complete callback
427 can consult the device's power.direct_complete flag. Namely, if that
428 flag is set when the complete callback is being run, it has been called
429 directly after the preceding prepare and special action may be required
430 to make the device work correctly afterward.
431
403At the end of these phases, drivers should be as functional as they were before 432At the end of these phases, drivers should be as functional as they were before
404suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are 433suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
405gated on. Even if the device was in a low-power state before the system sleep 434gated on.
406because of runtime power management, afterwards it should be back in its
407full-power state. There are multiple reasons why it's best to do this; they are
408discussed in more detail in Documentation/power/runtime_pm.txt.
409 435
410However, the details here may again be platform-specific. For example, 436However, the details here may again be platform-specific. For example,
411some systems support multiple "run" states, and the mode in effect at 437some systems support multiple "run" states, and the mode in effect at
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 5f96daf8566a..e1bee8a4aaac 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -2,6 +2,7 @@ Runtime Power Management Framework for I/O Devices
2 2
3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4(C) 2010 Alan Stern <stern@rowland.harvard.edu> 4(C) 2010 Alan Stern <stern@rowland.harvard.edu>
5(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
5 6
61. Introduction 71. Introduction
7 8
@@ -444,6 +445,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
444 bool pm_runtime_status_suspended(struct device *dev); 445 bool pm_runtime_status_suspended(struct device *dev);
445 - return true if the device's runtime PM status is 'suspended' 446 - return true if the device's runtime PM status is 'suspended'
446 447
448 bool pm_runtime_suspended_if_enabled(struct device *dev);
449 - return true if the device's runtime PM status is 'suspended' and its
450 'power.disable_depth' field is equal to 1
451
447 void pm_runtime_allow(struct device *dev); 452 void pm_runtime_allow(struct device *dev);
448 - set the power.runtime_auto flag for the device and decrease its usage 453 - set the power.runtime_auto flag for the device and decrease its usage
449 counter (used by the /sys/devices/.../power/control interface to 454 counter (used by the /sys/devices/.../power/control interface to
@@ -644,6 +649,18 @@ place (in particular, if the system is not waking up from hibernation), it may
644be more efficient to leave the devices that had been suspended before the system 649be more efficient to leave the devices that had been suspended before the system
645suspend began in the suspended state. 650suspend began in the suspended state.
646 651
652To this end, the PM core provides a mechanism allowing some coordination between
653different levels of device hierarchy. Namely, if a system suspend .prepare()
654callback returns a positive number for a device, that indicates to the PM core
655that the device appears to be runtime-suspended and its state is fine, so it
656may be left in runtime suspend provided that all of its descendants are also
657left in runtime suspend. If that happens, the PM core will not execute any
658system suspend and resume callbacks for all of those devices, except for the
659complete callback, which is then entirely responsible for handling the device
660as appropriate. This only applies to system suspend transitions that are not
661related to hibernation (see Documentation/power/devices.txt for more
662information).
663
647The PM core does its best to reduce the probability of race conditions between 664The PM core does its best to reduce the probability of race conditions between
648the runtime PM and system suspend/resume (and hibernation) callbacks by carrying 665the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
649out the following operations: 666out the following operations:
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index 079160e22bcc..f732a8321e8a 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -220,7 +220,10 @@ Q: After resuming, system is paging heavily, leading to very bad interactivity.
220 220
221A: Try running 221A: Try running
222 222
223cat `cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u` > /dev/null 223cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
224do
225 test -f "$file" && cat "$file" > /dev/null
226done
224 227
225after resume. swapoff -a; swapon -a may also be useful. 228after resume. swapoff -a; swapon -a may also be useful.
226 229
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 86d5e4fb5b98..343ffad59377 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -479,7 +479,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
479 TRACE_DEVICE(dev); 479 TRACE_DEVICE(dev);
480 TRACE_RESUME(0); 480 TRACE_RESUME(0);
481 481
482 if (dev->power.syscore) 482 if (dev->power.syscore || dev->power.direct_complete)
483 goto Out; 483 goto Out;
484 484
485 if (!dev->power.is_noirq_suspended) 485 if (!dev->power.is_noirq_suspended)
@@ -605,7 +605,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
605 TRACE_DEVICE(dev); 605 TRACE_DEVICE(dev);
606 TRACE_RESUME(0); 606 TRACE_RESUME(0);
607 607
608 if (dev->power.syscore) 608 if (dev->power.syscore || dev->power.direct_complete)
609 goto Out; 609 goto Out;
610 610
611 if (!dev->power.is_late_suspended) 611 if (!dev->power.is_late_suspended)
@@ -735,6 +735,12 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
735 if (dev->power.syscore) 735 if (dev->power.syscore)
736 goto Complete; 736 goto Complete;
737 737
738 if (dev->power.direct_complete) {
739 /* Match the pm_runtime_disable() in __device_suspend(). */
740 pm_runtime_enable(dev);
741 goto Complete;
742 }
743
738 dpm_wait(dev->parent, async); 744 dpm_wait(dev->parent, async);
739 dpm_watchdog_set(&wd, dev); 745 dpm_watchdog_set(&wd, dev);
740 device_lock(dev); 746 device_lock(dev);
@@ -1007,7 +1013,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
1007 goto Complete; 1013 goto Complete;
1008 } 1014 }
1009 1015
1010 if (dev->power.syscore) 1016 if (dev->power.syscore || dev->power.direct_complete)
1011 goto Complete; 1017 goto Complete;
1012 1018
1013 dpm_wait_for_children(dev, async); 1019 dpm_wait_for_children(dev, async);
@@ -1146,7 +1152,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
1146 goto Complete; 1152 goto Complete;
1147 } 1153 }
1148 1154
1149 if (dev->power.syscore) 1155 if (dev->power.syscore || dev->power.direct_complete)
1150 goto Complete; 1156 goto Complete;
1151 1157
1152 dpm_wait_for_children(dev, async); 1158 dpm_wait_for_children(dev, async);
@@ -1332,6 +1338,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
1332 if (dev->power.syscore) 1338 if (dev->power.syscore)
1333 goto Complete; 1339 goto Complete;
1334 1340
1341 if (dev->power.direct_complete) {
1342 if (pm_runtime_status_suspended(dev)) {
1343 pm_runtime_disable(dev);
1344 if (pm_runtime_suspended_if_enabled(dev))
1345 goto Complete;
1346
1347 pm_runtime_enable(dev);
1348 }
1349 dev->power.direct_complete = false;
1350 }
1351
1335 dpm_watchdog_set(&wd, dev); 1352 dpm_watchdog_set(&wd, dev);
1336 device_lock(dev); 1353 device_lock(dev);
1337 1354
@@ -1382,10 +1399,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
1382 1399
1383 End: 1400 End:
1384 if (!error) { 1401 if (!error) {
1402 struct device *parent = dev->parent;
1403
1385 dev->power.is_suspended = true; 1404 dev->power.is_suspended = true;
1386 if (dev->power.wakeup_path 1405 if (parent) {
1387 && dev->parent && !dev->parent->power.ignore_children) 1406 spin_lock_irq(&parent->power.lock);
1388 dev->parent->power.wakeup_path = true; 1407
1408 dev->parent->power.direct_complete = false;
1409 if (dev->power.wakeup_path
1410 && !dev->parent->power.ignore_children)
1411 dev->parent->power.wakeup_path = true;
1412
1413 spin_unlock_irq(&parent->power.lock);
1414 }
1389 } 1415 }
1390 1416
1391 device_unlock(dev); 1417 device_unlock(dev);
@@ -1487,7 +1513,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
1487{ 1513{
1488 int (*callback)(struct device *) = NULL; 1514 int (*callback)(struct device *) = NULL;
1489 char *info = NULL; 1515 char *info = NULL;
1490 int error = 0; 1516 int ret = 0;
1491 1517
1492 if (dev->power.syscore) 1518 if (dev->power.syscore)
1493 return 0; 1519 return 0;
@@ -1523,17 +1549,27 @@ static int device_prepare(struct device *dev, pm_message_t state)
1523 callback = dev->driver->pm->prepare; 1549 callback = dev->driver->pm->prepare;
1524 } 1550 }
1525 1551
1526 if (callback) { 1552 if (callback)
1527 error = callback(dev); 1553 ret = callback(dev);
1528 suspend_report_result(callback, error);
1529 }
1530 1554
1531 device_unlock(dev); 1555 device_unlock(dev);
1532 1556
1533 if (error) 1557 if (ret < 0) {
1558 suspend_report_result(callback, ret);
1534 pm_runtime_put(dev); 1559 pm_runtime_put(dev);
1535 1560 return ret;
1536 return error; 1561 }
1562 /*
1563 * A positive return value from ->prepare() means "this device appears
1564 * to be runtime-suspended and its state is fine, so if it really is
1565 * runtime-suspended, you can leave it in that state provided that you
1566 * will do the same thing with all of its descendants". This only
1567 * applies to suspend transitions, however.
1568 */
1569 spin_lock_irq(&dev->power.lock);
1570 dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
1571 spin_unlock_irq(&dev->power.lock);
1572 return 0;
1537} 1573}
1538 1574
1539/** 1575/**
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8236746e46bb..cb7019977c50 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -32,6 +32,7 @@ LIST_HEAD(cpuidle_detected_devices);
32static int enabled_devices; 32static int enabled_devices;
33static int off __read_mostly; 33static int off __read_mostly;
34static int initialized __read_mostly; 34static int initialized __read_mostly;
35static bool use_deepest_state __read_mostly;
35 36
36int cpuidle_disabled(void) 37int cpuidle_disabled(void)
37{ 38{
@@ -65,23 +66,42 @@ int cpuidle_play_dead(void)
65} 66}
66 67
67/** 68/**
68 * cpuidle_enabled - check if the cpuidle framework is ready 69 * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode.
69 * @dev: cpuidle device for this cpu 70 * @enable: Whether enable or disable the feature.
70 * @drv: cpuidle driver for this cpu 71 *
72 * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and
73 * always use the state with the greatest exit latency (out of the states that
74 * are not disabled).
71 * 75 *
72 * Return 0 on success, otherwise: 76 * This function can only be called after cpuidle_pause() to avoid races.
73 * -NODEV : the cpuidle framework is not available
74 * -EBUSY : the cpuidle framework is not initialized
75 */ 77 */
76int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev) 78void cpuidle_use_deepest_state(bool enable)
77{ 79{
78 if (off || !initialized) 80 use_deepest_state = enable;
79 return -ENODEV; 81}
80 82
81 if (!drv || !dev || !dev->enabled) 83/**
82 return -EBUSY; 84 * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
85 * @drv: cpuidle driver for a given CPU.
86 * @dev: cpuidle device for a given CPU.
87 */
88static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
89 struct cpuidle_device *dev)
90{
91 unsigned int latency_req = 0;
92 int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
83 93
84 return 0; 94 for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
95 struct cpuidle_state *s = &drv->states[i];
96 struct cpuidle_state_usage *su = &dev->states_usage[i];
97
98 if (s->disabled || su->disable || s->exit_latency <= latency_req)
99 continue;
100
101 latency_req = s->exit_latency;
102 ret = i;
103 }
104 return ret;
85} 105}
86 106
87/** 107/**
@@ -138,6 +158,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
138 */ 158 */
139int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) 159int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
140{ 160{
161 if (off || !initialized)
162 return -ENODEV;
163
164 if (!drv || !dev || !dev->enabled)
165 return -EBUSY;
166
167 if (unlikely(use_deepest_state))
168 return cpuidle_find_deepest_state(drv, dev);
169
141 return cpuidle_curr_governor->select(drv, dev); 170 return cpuidle_curr_governor->select(drv, dev);
142} 171}
143 172
@@ -169,7 +198,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
169 */ 198 */
170void cpuidle_reflect(struct cpuidle_device *dev, int index) 199void cpuidle_reflect(struct cpuidle_device *dev, int index)
171{ 200{
172 if (cpuidle_curr_governor->reflect) 201 if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state))
173 cpuidle_curr_governor->reflect(dev, index); 202 cpuidle_curr_governor->reflect(dev, index);
174} 203}
175 204
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 71b523293354..c4f80c15a48d 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -296,7 +296,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
296 data->needs_update = 0; 296 data->needs_update = 0;
297 } 297 }
298 298
299 data->last_state_idx = 0; 299 data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
300 300
301 /* Special case when user has set very strict latency requirement */ 301 /* Special case when user has set very strict latency requirement */
302 if (unlikely(latency_req == 0)) 302 if (unlikely(latency_req == 0))
@@ -311,13 +311,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
311 data->bucket = which_bucket(data->next_timer_us); 311 data->bucket = which_bucket(data->next_timer_us);
312 312
313 /* 313 /*
314 * if the correction factor is 0 (eg first time init or cpu hotplug
315 * etc), we actually want to start out with a unity factor.
316 */
317 if (data->correction_factor[data->bucket] == 0)
318 data->correction_factor[data->bucket] = RESOLUTION * DECAY;
319
320 /*
321 * Force the result of multiplication to be 64 bits even if both 314 * Force the result of multiplication to be 64 bits even if both
322 * operands are 32 bits. 315 * operands are 32 bits.
323 * Make sure to round up for half microseconds. 316 * Make sure to round up for half microseconds.
@@ -466,9 +459,17 @@ static int menu_enable_device(struct cpuidle_driver *drv,
466 struct cpuidle_device *dev) 459 struct cpuidle_device *dev)
467{ 460{
468 struct menu_device *data = &per_cpu(menu_devices, dev->cpu); 461 struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
462 int i;
469 463
470 memset(data, 0, sizeof(struct menu_device)); 464 memset(data, 0, sizeof(struct menu_device));
471 465
466 /*
467 * if the correction factor is 0 (eg first time init or cpu hotplug
468 * etc), we actually want to start out with a unity factor.
469 */
470 for(i = 0; i < BUCKETS; i++)
471 data->correction_factor[i] = RESOLUTION * DECAY;
472
472 return 0; 473 return 0;
473} 474}
474 475
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b0238cba440b..c51a436135c4 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -120,8 +120,6 @@ struct cpuidle_driver {
120#ifdef CONFIG_CPU_IDLE 120#ifdef CONFIG_CPU_IDLE
121extern void disable_cpuidle(void); 121extern void disable_cpuidle(void);
122 122
123extern int cpuidle_enabled(struct cpuidle_driver *drv,
124 struct cpuidle_device *dev);
125extern int cpuidle_select(struct cpuidle_driver *drv, 123extern int cpuidle_select(struct cpuidle_driver *drv,
126 struct cpuidle_device *dev); 124 struct cpuidle_device *dev);
127extern int cpuidle_enter(struct cpuidle_driver *drv, 125extern int cpuidle_enter(struct cpuidle_driver *drv,
@@ -145,13 +143,11 @@ extern void cpuidle_resume(void);
145extern int cpuidle_enable_device(struct cpuidle_device *dev); 143extern int cpuidle_enable_device(struct cpuidle_device *dev);
146extern void cpuidle_disable_device(struct cpuidle_device *dev); 144extern void cpuidle_disable_device(struct cpuidle_device *dev);
147extern int cpuidle_play_dead(void); 145extern int cpuidle_play_dead(void);
146extern void cpuidle_use_deepest_state(bool enable);
148 147
149extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); 148extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
150#else 149#else
151static inline void disable_cpuidle(void) { } 150static inline void disable_cpuidle(void) { }
152static inline int cpuidle_enabled(struct cpuidle_driver *drv,
153 struct cpuidle_device *dev)
154{return -ENODEV; }
155static inline int cpuidle_select(struct cpuidle_driver *drv, 151static inline int cpuidle_select(struct cpuidle_driver *drv,
156 struct cpuidle_device *dev) 152 struct cpuidle_device *dev)
157{return -ENODEV; } 153{return -ENODEV; }
@@ -180,6 +176,7 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
180{return -ENODEV; } 176{return -ENODEV; }
181static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } 177static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
182static inline int cpuidle_play_dead(void) {return -ENODEV; } 178static inline int cpuidle_play_dead(void) {return -ENODEV; }
179static inline void cpuidle_use_deepest_state(bool enable) {}
183static inline struct cpuidle_driver *cpuidle_get_cpu_driver( 180static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
184 struct cpuidle_device *dev) {return NULL; } 181 struct cpuidle_device *dev) {return NULL; }
185#endif 182#endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index d915d0345fa1..72c0fe098a27 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -93,13 +93,23 @@ typedef struct pm_message {
93 * been registered) to recover from the race condition. 93 * been registered) to recover from the race condition.
94 * This method is executed for all kinds of suspend transitions and is 94 * This method is executed for all kinds of suspend transitions and is
95 * followed by one of the suspend callbacks: @suspend(), @freeze(), or 95 * followed by one of the suspend callbacks: @suspend(), @freeze(), or
96 * @poweroff(). The PM core executes subsystem-level @prepare() for all 96 * @poweroff(). If the transition is a suspend to memory or standby (that
97 * devices before starting to invoke suspend callbacks for any of them, so 97 * is, not related to hibernation), the return value of @prepare() may be
98 * generally devices may be assumed to be functional or to respond to 98 * used to indicate to the PM core to leave the device in runtime suspend
99 * runtime resume requests while @prepare() is being executed. However, 99 * if applicable. Namely, if @prepare() returns a positive number, the PM
100 * device drivers may NOT assume anything about the availability of user 100 * core will understand that as a declaration that the device appears to be
101 * space at that time and it is NOT valid to request firmware from within 101 * runtime-suspended and it may be left in that state during the entire
102 * @prepare() (it's too late to do that). It also is NOT valid to allocate 102 * transition and during the subsequent resume if all of its descendants
103 * are left in runtime suspend too. If that happens, @complete() will be
104 * executed directly after @prepare() and it must ensure the proper
105 * functioning of the device after the system resume.
106 * The PM core executes subsystem-level @prepare() for all devices before
107 * starting to invoke suspend callbacks for any of them, so generally
108 * devices may be assumed to be functional or to respond to runtime resume
109 * requests while @prepare() is being executed. However, device drivers
110 * may NOT assume anything about the availability of user space at that
111 * time and it is NOT valid to request firmware from within @prepare()
112 * (it's too late to do that). It also is NOT valid to allocate
103 * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. 113 * substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
104 * [To work around these limitations, drivers may register suspend and 114 * [To work around these limitations, drivers may register suspend and
105 * hibernation notifiers to be executed before the freezing of tasks.] 115 * hibernation notifiers to be executed before the freezing of tasks.]
@@ -112,7 +122,16 @@ typedef struct pm_message {
112 * of the other devices that the PM core has unsuccessfully attempted to 122 * of the other devices that the PM core has unsuccessfully attempted to
113 * suspend earlier). 123 * suspend earlier).
114 * The PM core executes subsystem-level @complete() after it has executed 124 * The PM core executes subsystem-level @complete() after it has executed
115 * the appropriate resume callbacks for all devices. 125 * the appropriate resume callbacks for all devices. If the corresponding
126 * @prepare() at the beginning of the suspend transition returned a
127 * positive number and the device was left in runtime suspend (without
128 * executing any suspend and resume callbacks for it), @complete() will be
129 * the only callback executed for the device during resume. In that case,
130 * @complete() must be prepared to do whatever is necessary to ensure the
131 * proper functioning of the device after the system resume. To this end,
132 * @complete() can check the power.direct_complete flag of the device to
133 * learn whether (unset) or not (set) the previous suspend and resume
134 * callbacks have been executed for it.
116 * 135 *
117 * @suspend: Executed before putting the system into a sleep state in which the 136 * @suspend: Executed before putting the system into a sleep state in which the
118 * contents of main memory are preserved. The exact action to perform 137 * contents of main memory are preserved. The exact action to perform
@@ -546,6 +565,7 @@ struct dev_pm_info {
546 bool is_late_suspended:1; 565 bool is_late_suspended:1;
547 bool ignore_children:1; 566 bool ignore_children:1;
548 bool early_init:1; /* Owned by the PM core */ 567 bool early_init:1; /* Owned by the PM core */
568 bool direct_complete:1; /* Owned by the PM core */
549 spinlock_t lock; 569 spinlock_t lock;
550#ifdef CONFIG_PM_SLEEP 570#ifdef CONFIG_PM_SLEEP
551 struct list_head entry; 571 struct list_head entry;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 2a5897a4afbc..43fd6716f662 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -101,6 +101,11 @@ static inline bool pm_runtime_status_suspended(struct device *dev)
101 return dev->power.runtime_status == RPM_SUSPENDED; 101 return dev->power.runtime_status == RPM_SUSPENDED;
102} 102}
103 103
104static inline bool pm_runtime_suspended_if_enabled(struct device *dev)
105{
106 return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1;
107}
108
104static inline bool pm_runtime_enabled(struct device *dev) 109static inline bool pm_runtime_enabled(struct device *dev)
105{ 110{
106 return !dev->power.disable_depth; 111 return !dev->power.disable_depth;
@@ -150,6 +155,7 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {}
150static inline bool pm_runtime_suspended(struct device *dev) { return false; } 155static inline bool pm_runtime_suspended(struct device *dev) { return false; }
151static inline bool pm_runtime_active(struct device *dev) { return true; } 156static inline bool pm_runtime_active(struct device *dev) { return true; }
152static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } 157static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
158static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; }
153static inline bool pm_runtime_enabled(struct device *dev) { return false; } 159static inline bool pm_runtime_enabled(struct device *dev) { return false; }
154 160
155static inline void pm_runtime_no_callbacks(struct device *dev) {} 161static inline void pm_runtime_no_callbacks(struct device *dev) {}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f4f2073711d3..df88d55dc436 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -35,7 +35,7 @@
35static int nocompress; 35static int nocompress;
36static int noresume; 36static int noresume;
37static int resume_wait; 37static int resume_wait;
38static int resume_delay; 38static unsigned int resume_delay;
39static char resume_file[256] = CONFIG_PM_STD_PARTITION; 39static char resume_file[256] = CONFIG_PM_STD_PARTITION;
40dev_t swsusp_resume_device; 40dev_t swsusp_resume_device;
41sector_t swsusp_resume_block; 41sector_t swsusp_resume_block;
@@ -228,19 +228,23 @@ static void platform_recover(int platform_mode)
228void swsusp_show_speed(struct timeval *start, struct timeval *stop, 228void swsusp_show_speed(struct timeval *start, struct timeval *stop,
229 unsigned nr_pages, char *msg) 229 unsigned nr_pages, char *msg)
230{ 230{
231 s64 elapsed_centisecs64; 231 u64 elapsed_centisecs64;
232 int centisecs; 232 unsigned int centisecs;
233 int k; 233 unsigned int k;
234 int kps; 234 unsigned int kps;
235 235
236 elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); 236 elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
237 /*
238 * If "(s64)elapsed_centisecs64 < 0", it will print long elapsed time,
239 * it is obvious enough for what went wrong.
240 */
237 do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); 241 do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
238 centisecs = elapsed_centisecs64; 242 centisecs = elapsed_centisecs64;
239 if (centisecs == 0) 243 if (centisecs == 0)
240 centisecs = 1; /* avoid div-by-zero */ 244 centisecs = 1; /* avoid div-by-zero */
241 k = nr_pages * (PAGE_SIZE / 1024); 245 k = nr_pages * (PAGE_SIZE / 1024);
242 kps = (k * 100) / centisecs; 246 kps = (k * 100) / centisecs;
243 printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", 247 printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
244 msg, k, 248 msg, k,
245 centisecs / 100, centisecs % 100, 249 centisecs / 100, centisecs % 100,
246 kps / 1000, (kps % 1000) / 10); 250 kps / 1000, (kps % 1000) / 10);
@@ -595,7 +599,8 @@ static void power_down(void)
595 case HIBERNATION_PLATFORM: 599 case HIBERNATION_PLATFORM:
596 hibernation_platform_enter(); 600 hibernation_platform_enter();
597 case HIBERNATION_SHUTDOWN: 601 case HIBERNATION_SHUTDOWN:
598 kernel_power_off(); 602 if (pm_power_off)
603 kernel_power_off();
599 break; 604 break;
600#ifdef CONFIG_SUSPEND 605#ifdef CONFIG_SUSPEND
601 case HIBERNATION_SUSPEND: 606 case HIBERNATION_SUSPEND:
@@ -623,7 +628,8 @@ static void power_down(void)
623 * corruption after resume. 628 * corruption after resume.
624 */ 629 */
625 printk(KERN_CRIT "PM: Please power down manually\n"); 630 printk(KERN_CRIT "PM: Please power down manually\n");
626 while(1); 631 while (1)
632 cpu_relax();
627} 633}
628 634
629/** 635/**
@@ -1109,7 +1115,10 @@ static int __init resumewait_setup(char *str)
1109 1115
1110static int __init resumedelay_setup(char *str) 1116static int __init resumedelay_setup(char *str)
1111{ 1117{
1112 resume_delay = simple_strtoul(str, NULL, 0); 1118 int rc = kstrtouint(str, 0, &resume_delay);
1119
1120 if (rc)
1121 return rc;
1113 return 1; 1122 return 1;
1114} 1123}
1115 1124
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 73a905f83972..b62bea0bb624 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -62,9 +62,11 @@ static void freeze_begin(void)
62 62
63static void freeze_enter(void) 63static void freeze_enter(void)
64{ 64{
65 cpuidle_use_deepest_state(true);
65 cpuidle_resume(); 66 cpuidle_resume();
66 wait_event(suspend_freeze_wait_head, suspend_freeze_wake); 67 wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
67 cpuidle_pause(); 68 cpuidle_pause();
69 cpuidle_use_deepest_state(false);
68} 70}
69 71
70void freeze_wake(void) 72void freeze_wake(void)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8f4390a079c7..a8f12247ce7c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -101,19 +101,13 @@ static int cpuidle_idle_call(void)
101 rcu_idle_enter(); 101 rcu_idle_enter();
102 102
103 /* 103 /*
104 * Check if the cpuidle framework is ready, otherwise fallback 104 * Ask the cpuidle framework to choose a convenient idle state.
105 * to the default arch specific idle method 105 * Fall back to the default arch specific idle method on errors.
106 */ 106 */
107 ret = cpuidle_enabled(drv, dev); 107 next_state = cpuidle_select(drv, dev);
108
109 if (!ret) {
110 /*
111 * Ask the governor to choose an idle state it thinks
112 * it is convenient to go to. There is *always* a
113 * convenient idle state
114 */
115 next_state = cpuidle_select(drv, dev);
116 108
109 ret = next_state;
110 if (ret >= 0) {
117 /* 111 /*
118 * The idle task must be scheduled, it is pointless to 112 * The idle task must be scheduled, it is pointless to
119 * go to idle, just update no idle residency and get 113 * go to idle, just update no idle residency and get
@@ -140,7 +134,7 @@ static int cpuidle_idle_call(void)
140 CLOCK_EVT_NOTIFY_BROADCAST_ENTER, 134 CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
141 &dev->cpu); 135 &dev->cpu);
142 136
143 if (!ret) { 137 if (ret >= 0) {
144 trace_cpu_idle_rcuidle(next_state, dev->cpu); 138 trace_cpu_idle_rcuidle(next_state, dev->cpu);
145 139
146 /* 140 /*
@@ -175,7 +169,7 @@ static int cpuidle_idle_call(void)
175 * We can't use the cpuidle framework, let's use the default 169 * We can't use the cpuidle framework, let's use the default
176 * idle routine 170 * idle routine
177 */ 171 */
178 if (ret) 172 if (ret < 0)
179 arch_cpu_idle(); 173 arch_cpu_idle();
180 174
181 __current_set_polling(); 175 __current_set_polling();