aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power88
-rw-r--r--Documentation/ABI/testing/sysfs-power29
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/power/00-INDEX2
-rw-r--r--Documentation/power/interface.txt2
-rw-r--r--Documentation/power/opp.txt375
-rw-r--r--Documentation/power/runtime_pm.txt227
-rw-r--r--Documentation/power/s2ram.txt7
-rw-r--r--Documentation/power/swsusp.txt3
-rw-r--r--drivers/base/power/Makefile1
-rw-r--r--drivers/base/power/generic_ops.c4
-rw-r--r--drivers/base/power/main.c21
-rw-r--r--drivers/base/power/opp.c628
-rw-r--r--drivers/base/power/power.h2
-rw-r--r--drivers/base/power/runtime.c944
-rw-r--r--drivers/base/power/sysfs.c217
-rw-r--r--drivers/base/power/trace.c36
-rw-r--r--drivers/base/power/wakeup.c613
-rw-r--r--fs/sysfs/group.c59
-rw-r--r--include/linux/opp.h105
-rw-r--r--include/linux/pm.h38
-rw-r--r--include/linux/pm_runtime.h121
-rw-r--r--include/linux/pm_wakeup.h127
-rw-r--r--include/linux/resume-trace.h2
-rw-r--r--include/linux/suspend.h6
-rw-r--r--include/linux/sysfs.h15
-rw-r--r--kernel/power/Kconfig17
-rw-r--r--kernel/power/hibernate.c25
-rw-r--r--kernel/power/main.c29
-rw-r--r--kernel/power/power.h10
-rw-r--r--kernel/power/process.c11
-rw-r--r--kernel/power/snapshot.c13
-rw-r--r--kernel/power/swap.c300
33 files changed, 3447 insertions, 635 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 6123c523bfd7..7628cd1bc36a 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -77,3 +77,91 @@ Description:
77 devices this attribute is set to "enabled" by bus type code or 77 devices this attribute is set to "enabled" by bus type code or
78 device drivers and in that cases it should be safe to leave the 78 device drivers and in that cases it should be safe to leave the
79 default value. 79 default value.
80
81What: /sys/devices/.../power/wakeup_count
82Date: September 2010
83Contact: Rafael J. Wysocki <rjw@sisk.pl>
84Description:
85 The /sys/devices/.../wakeup_count attribute contains the number
86 of signaled wakeup events associated with the device. This
87 attribute is read-only. If the device is not enabled to wake up
88 the system from sleep states, this attribute is empty.
89
90What: /sys/devices/.../power/wakeup_active_count
91Date: September 2010
92Contact: Rafael J. Wysocki <rjw@sisk.pl>
93Description:
94 The /sys/devices/.../wakeup_active_count attribute contains the
95 number of times the processing of wakeup events associated with
96 the device was completed (at the kernel level). This attribute
97 is read-only. If the device is not enabled to wake up the
98 system from sleep states, this attribute is empty.
99
100What: /sys/devices/.../power/wakeup_hit_count
101Date: September 2010
102Contact: Rafael J. Wysocki <rjw@sisk.pl>
103Description:
104 The /sys/devices/.../wakeup_hit_count attribute contains the
105 number of times the processing of a wakeup event associated with
106 the device might prevent the system from entering a sleep state.
107 This attribute is read-only. If the device is not enabled to
108 wake up the system from sleep states, this attribute is empty.
109
110What: /sys/devices/.../power/wakeup_active
111Date: September 2010
112Contact: Rafael J. Wysocki <rjw@sisk.pl>
113Description:
114 The /sys/devices/.../wakeup_active attribute contains either 1,
115 or 0, depending on whether or not a wakeup event associated with
116 the device is being processed (1). This attribute is read-only.
117 If the device is not enabled to wake up the system from sleep
118 states, this attribute is empty.
119
120What: /sys/devices/.../power/wakeup_total_time_ms
121Date: September 2010
122Contact: Rafael J. Wysocki <rjw@sisk.pl>
123Description:
124 The /sys/devices/.../wakeup_total_time_ms attribute contains
125 the total time of processing wakeup events associated with the
126 device, in milliseconds. This attribute is read-only. If the
127 device is not enabled to wake up the system from sleep states,
128 this attribute is empty.
129
130What: /sys/devices/.../power/wakeup_max_time_ms
131Date: September 2010
132Contact: Rafael J. Wysocki <rjw@sisk.pl>
133Description:
134 The /sys/devices/.../wakeup_max_time_ms attribute contains
135 the maximum time of processing a single wakeup event associated
136 with the device, in milliseconds. This attribute is read-only.
137 If the device is not enabled to wake up the system from sleep
138 states, this attribute is empty.
139
140What: /sys/devices/.../power/wakeup_last_time_ms
141Date: September 2010
142Contact: Rafael J. Wysocki <rjw@sisk.pl>
143Description:
144 The /sys/devices/.../wakeup_last_time_ms attribute contains
145 the value of the monotonic clock corresponding to the time of
146 signaling the last wakeup event associated with the device, in
147 milliseconds. This attribute is read-only. If the device is
148 not enabled to wake up the system from sleep states, this
149 attribute is empty.
150
151What: /sys/devices/.../power/autosuspend_delay_ms
152Date: September 2010
153Contact: Alan Stern <stern@rowland.harvard.edu>
154Description:
155 The /sys/devices/.../power/autosuspend_delay_ms attribute
156 contains the autosuspend delay value (in milliseconds). Some
157 drivers do not want their device to suspend as soon as it
158 becomes idle at run time; they want the device to remain
159 inactive for a certain minimum period of time first. That
160 period is called the autosuspend delay. Negative values will
161 prevent the device from being suspended at run time (similar
162 to writing "on" to the power/control attribute). Values >=
163 1000 will cause the autosuspend timer expiration to be rounded
164 up to the nearest second.
165
166 Not all drivers support this attribute. If it isn't supported,
167 attempts to read or write it will yield I/O errors.
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 2875f1f74a07..194ca446ac28 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -99,9 +99,38 @@ Description:
99 99
100 dmesg -s 1000000 | grep 'hash matches' 100 dmesg -s 1000000 | grep 'hash matches'
101 101
102 If you do not get any matches (or they appear to be false
103 positives), it is possible that the last PM event point
104 referred to a device created by a loadable kernel module. In
105 this case cat /sys/power/pm_trace_dev_match (see below) after
106 your system is started up and the kernel modules are loaded.
107
102 CAUTION: Using it will cause your machine's real-time (CMOS) 108 CAUTION: Using it will cause your machine's real-time (CMOS)
103 clock to be set to a random invalid time after a resume. 109 clock to be set to a random invalid time after a resume.
104 110
111What; /sys/power/pm_trace_dev_match
112Date: October 2010
113Contact: James Hogan <james@albanarts.com>
114Description:
115 The /sys/power/pm_trace_dev_match file contains the name of the
116 device associated with the last PM event point saved in the RTC
117 across reboots when pm_trace has been used. More precisely it
118 contains the list of current devices (including those
119 registered by loadable kernel modules since boot) which match
120 the device hash in the RTC at boot, with a newline after each
121 one.
122
123 The advantage of this file over the hash matches printed to the
124 kernel log (see /sys/power/pm_trace), is that it includes
125 devices created after boot by loadable kernel modules.
126
127 Due to the small hash size necessary to fit in the RTC, it is
128 possible that more than one device matches the hash, in which
129 case further investigation is required to determine which
130 device is causing the problem. Note that genuine RTC clock
131 values (such as when pm_trace has not been used), can still
132 match a device and output it's name here.
133
105What: /sys/power/pm_async 134What: /sys/power/pm_async
106Date: January 2009 135Date: January 2009
107Contact: Rafael J. Wysocki <rjw@sisk.pl> 136Contact: Rafael J. Wysocki <rjw@sisk.pl>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3a0009e03d14..02f21d9220ce 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2170,6 +2170,11 @@ and is between 256 and 4096 characters. It is defined in the file
2170 in <PAGE_SIZE> units (needed only for swap files). 2170 in <PAGE_SIZE> units (needed only for swap files).
2171 See Documentation/power/swsusp-and-swap-files.txt 2171 See Documentation/power/swsusp-and-swap-files.txt
2172 2172
2173 hibernate= [HIBERNATION]
2174 noresume Don't check if there's a hibernation image
2175 present during boot.
2176 nocompress Don't compress/decompress hibernation images.
2177
2173 retain_initrd [RAM] Keep initrd memory after extraction 2178 retain_initrd [RAM] Keep initrd memory after extraction
2174 2179
2175 rhash_entries= [KNL,NET] 2180 rhash_entries= [KNL,NET]
diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
index fb742c213c9e..45e9d4a91284 100644
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@@ -14,6 +14,8 @@ interface.txt
14 - Power management user interface in /sys/power 14 - Power management user interface in /sys/power
15notifiers.txt 15notifiers.txt
16 - Registering suspend notifiers in device drivers 16 - Registering suspend notifiers in device drivers
17opp.txt
18 - Operating Performance Point library
17pci.txt 19pci.txt
18 - How the PCI Subsystem Does Power Management 20 - How the PCI Subsystem Does Power Management
19pm_qos_interface.txt 21pm_qos_interface.txt
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index e67211fe0ee2..c537834af005 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -57,7 +57,7 @@ smallest image possible. In particular, if "0" is written to this file, the
57suspend image will be as small as possible. 57suspend image will be as small as possible.
58 58
59Reading from this file will display the current image size limit, which 59Reading from this file will display the current image size limit, which
60is set to 500 MB by default. 60is set to 2/5 of available RAM by default.
61 61
62/sys/power/pm_trace controls the code which saves the last PM event point in 62/sys/power/pm_trace controls the code which saves the last PM event point in
63the RTC across reboots, so that you can debug a machine that just hangs 63the RTC across reboots, so that you can debug a machine that just hangs
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
new file mode 100644
index 000000000000..44d87ad3cea9
--- /dev/null
+++ b/Documentation/power/opp.txt
@@ -0,0 +1,375 @@
1*=============*
2* OPP Library *
3*=============*
4
5(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
6
7Contents
8--------
91. Introduction
102. Initial OPP List Registration
113. OPP Search Functions
124. OPP Availability Control Functions
135. OPP Data Retrieval Functions
146. Cpufreq Table Generation
157. Data Structures
16
171. Introduction
18===============
19Complex SoCs of today consists of a multiple sub-modules working in conjunction.
20In an operational system executing varied use cases, not all modules in the SoC
21need to function at their highest performing frequency all the time. To
22facilitate this, sub-modules in a SoC are grouped into domains, allowing some
23domains to run at lower voltage and frequency while other domains are loaded
24more. The set of discrete tuples consisting of frequency and voltage pairs that
25the device will support per domain are called Operating Performance Points or
26OPPs.
27
28OPP library provides a set of helper functions to organize and query the OPP
29information. The library is located in drivers/base/power/opp.c and the header
30is located in include/linux/opp.h. OPP library can be enabled by enabling
31CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
32CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
33optionally boot at a certain OPP without needing cpufreq.
34
35Typical usage of the OPP library is as follows:
36(users) -> registers a set of default OPPs -> (library)
37SoC framework -> modifies on required cases certain OPPs -> OPP layer
38 -> queries to search/retrieve information ->
39
40OPP layer expects each domain to be represented by a unique device pointer. SoC
41framework registers a set of initial OPPs per device with the OPP layer. This
42list is expected to be an optimally small number typically around 5 per device.
43This initial list contains a set of OPPs that the framework expects to be safely
44enabled by default in the system.
45
46Note on OPP Availability:
47------------------------
48As the system proceeds to operate, SoC framework may choose to make certain
49OPPs available or not available on each device based on various external
50factors. Example usage: Thermal management or other exceptional situations where
51SoC framework might choose to disable a higher frequency OPP to safely continue
52operations until that OPP could be re-enabled if possible.
53
54OPP library facilitates this concept in it's implementation. The following
55operational functions operate only on available opps:
56opp_find_freq_{ceil, floor}, opp_get_voltage, opp_get_freq, opp_get_opp_count
57and opp_init_cpufreq_table
58
59opp_find_freq_exact is meant to be used to find the opp pointer which can then
60be used for opp_enable/disable functions to make an opp available as required.
61
62WARNING: Users of OPP library should refresh their availability count using
63get_opp_count if opp_enable/disable functions are invoked for a device, the
64exact mechanism to trigger these or the notification mechanism to other
65dependent subsystems such as cpufreq are left to the discretion of the SoC
66specific framework which uses the OPP library. Similar care needs to be taken
67care to refresh the cpufreq table in cases of these operations.
68
69WARNING on OPP List locking mechanism:
70-------------------------------------------------
71OPP library uses RCU for exclusivity. RCU allows the query functions to operate
72in multiple contexts and this synchronization mechanism is optimal for a read
73intensive operations on data structure as the OPP library caters to.
74
75To ensure that the data retrieved are sane, the users such as SoC framework
76should ensure that the section of code operating on OPP queries are locked
77using RCU read locks. The opp_find_freq_{exact,ceil,floor},
78opp_get_{voltage, freq, opp_count} fall into this category.
79
80opp_{add,enable,disable} are updaters which use mutex and implement it's own
81RCU locking mechanisms. opp_init_cpufreq_table acts as an updater and uses
82mutex to implment RCU updater strategy. These functions should *NOT* be called
83under RCU locks and other contexts that prevent blocking functions in RCU or
84mutex operations from working.
85
862. Initial OPP List Registration
87================================
88The SoC implementation calls opp_add function iteratively to add OPPs per
89device. It is expected that the SoC framework will register the OPP entries
90optimally- typical numbers range to be less than 5. The list generated by
91registering the OPPs is maintained by OPP library throughout the device
92operation. The SoC framework can subsequently control the availability of the
93OPPs dynamically using the opp_enable / disable functions.
94
95opp_add - Add a new OPP for a specific domain represented by the device pointer.
96 The OPP is defined using the frequency and voltage. Once added, the OPP
97 is assumed to be available and control of it's availability can be done
98 with the opp_enable/disable functions. OPP library internally stores
99 and manages this information in the opp struct. This function may be
100 used by SoC framework to define a optimal list as per the demands of
101 SoC usage environment.
102
103 WARNING: Do not use this function in interrupt context.
104
105 Example:
106 soc_pm_init()
107 {
108 /* Do things */
109 r = opp_add(mpu_dev, 1000000, 900000);
110 if (!r) {
111 pr_err("%s: unable to register mpu opp(%d)\n", r);
112 goto no_cpufreq;
113 }
114 /* Do cpufreq things */
115 no_cpufreq:
116 /* Do remaining things */
117 }
118
1193. OPP Search Functions
120=======================
121High level framework such as cpufreq operates on frequencies. To map the
122frequency back to the corresponding OPP, OPP library provides handy functions
123to search the OPP list that OPP library internally manages. These search
124functions return the matching pointer representing the opp if a match is
125found, else returns error. These errors are expected to be handled by standard
126error checks such as IS_ERR() and appropriate actions taken by the caller.
127
128opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
129 availability. This function is especially useful to enable an OPP which
130 is not available by default.
131 Example: In a case when SoC framework detects a situation where a
132 higher frequency could be made available, it can use this function to
133 find the OPP prior to call the opp_enable to actually make it available.
134 rcu_read_lock();
135 opp = opp_find_freq_exact(dev, 1000000000, false);
136 rcu_read_unlock();
137 /* dont operate on the pointer.. just do a sanity check.. */
138 if (IS_ERR(opp)) {
139 pr_err("frequency not disabled!\n");
140 /* trigger appropriate actions.. */
141 } else {
142 opp_enable(dev,1000000000);
143 }
144
145 NOTE: This is the only search function that operates on OPPs which are
146 not available.
147
148opp_find_freq_floor - Search for an available OPP which is *at most* the
149 provided frequency. This function is useful while searching for a lesser
150 match OR operating on OPP information in the order of decreasing
151 frequency.
152 Example: To find the highest opp for a device:
153 freq = ULONG_MAX;
154 rcu_read_lock();
155 opp_find_freq_floor(dev, &freq);
156 rcu_read_unlock();
157
158opp_find_freq_ceil - Search for an available OPP which is *at least* the
159 provided frequency. This function is useful while searching for a
160 higher match OR operating on OPP information in the order of increasing
161 frequency.
162 Example 1: To find the lowest opp for a device:
163 freq = 0;
164 rcu_read_lock();
165 opp_find_freq_ceil(dev, &freq);
166 rcu_read_unlock();
167 Example 2: A simplified implementation of a SoC cpufreq_driver->target:
168 soc_cpufreq_target(..)
169 {
170 /* Do stuff like policy checks etc. */
171 /* Find the best frequency match for the req */
172 rcu_read_lock();
173 opp = opp_find_freq_ceil(dev, &freq);
174 rcu_read_unlock();
175 if (!IS_ERR(opp))
176 soc_switch_to_freq_voltage(freq);
177 else
178 /* do something when we cant satisfy the req */
179 /* do other stuff */
180 }
181
1824. OPP Availability Control Functions
183=====================================
184A default OPP list registered with the OPP library may not cater to all possible
185situation. The OPP library provides a set of functions to modify the
186availability of a OPP within the OPP list. This allows SoC frameworks to have
187fine grained dynamic control of which sets of OPPs are operationally available.
188These functions are intended to *temporarily* remove an OPP in conditions such
189as thermal considerations (e.g. don't use OPPx until the temperature drops).
190
191WARNING: Do not use these functions in interrupt context.
192
193opp_enable - Make a OPP available for operation.
194 Example: Lets say that 1GHz OPP is to be made available only if the
195 SoC temperature is lower than a certain threshold. The SoC framework
196 implementation might choose to do something as follows:
197 if (cur_temp < temp_low_thresh) {
198 /* Enable 1GHz if it was disabled */
199 rcu_read_lock();
200 opp = opp_find_freq_exact(dev, 1000000000, false);
201 rcu_read_unlock();
202 /* just error check */
203 if (!IS_ERR(opp))
204 ret = opp_enable(dev, 1000000000);
205 else
206 goto try_something_else;
207 }
208
209opp_disable - Make an OPP to be not available for operation
210 Example: Lets say that 1GHz OPP is to be disabled if the temperature
211 exceeds a threshold value. The SoC framework implementation might
212 choose to do something as follows:
213 if (cur_temp > temp_high_thresh) {
214 /* Disable 1GHz if it was enabled */
215 rcu_read_lock();
216 opp = opp_find_freq_exact(dev, 1000000000, true);
217 rcu_read_unlock();
218 /* just error check */
219 if (!IS_ERR(opp))
220 ret = opp_disable(dev, 1000000000);
221 else
222 goto try_something_else;
223 }
224
2255. OPP Data Retrieval Functions
226===============================
227Since OPP library abstracts away the OPP information, a set of functions to pull
228information from the OPP structure is necessary. Once an OPP pointer is
229retrieved using the search functions, the following functions can be used by SoC
230framework to retrieve the information represented inside the OPP layer.
231
232opp_get_voltage - Retrieve the voltage represented by the opp pointer.
233 Example: At a cpufreq transition to a different frequency, SoC
234 framework requires to set the voltage represented by the OPP using
235 the regulator framework to the Power Management chip providing the
236 voltage.
237 soc_switch_to_freq_voltage(freq)
238 {
239 /* do things */
240 rcu_read_lock();
241 opp = opp_find_freq_ceil(dev, &freq);
242 v = opp_get_voltage(opp);
243 rcu_read_unlock();
244 if (v)
245 regulator_set_voltage(.., v);
246 /* do other things */
247 }
248
249opp_get_freq - Retrieve the freq represented by the opp pointer.
250 Example: Lets say the SoC framework uses a couple of helper functions
251 we could pass opp pointers instead of doing additional parameters to
252 handle quiet a bit of data parameters.
253 soc_cpufreq_target(..)
254 {
255 /* do things.. */
256 max_freq = ULONG_MAX;
257 rcu_read_lock();
258 max_opp = opp_find_freq_floor(dev,&max_freq);
259 requested_opp = opp_find_freq_ceil(dev,&freq);
260 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
261 r = soc_test_validity(max_opp, requested_opp);
262 rcu_read_unlock();
263 /* do other things */
264 }
265 soc_test_validity(..)
266 {
267 if(opp_get_voltage(max_opp) < opp_get_voltage(requested_opp))
268 return -EINVAL;
269 if(opp_get_freq(max_opp) < opp_get_freq(requested_opp))
270 return -EINVAL;
271 /* do things.. */
272 }
273
274opp_get_opp_count - Retrieve the number of available opps for a device
275 Example: Lets say a co-processor in the SoC needs to know the available
276 frequencies in a table, the main processor can notify as following:
277 soc_notify_coproc_available_frequencies()
278 {
279 /* Do things */
280 rcu_read_lock();
281 num_available = opp_get_opp_count(dev);
282 speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
283 /* populate the table in increasing order */
284 freq = 0;
285 while (!IS_ERR(opp = opp_find_freq_ceil(dev, &freq))) {
286 speeds[i] = freq;
287 freq++;
288 i++;
289 }
290 rcu_read_unlock();
291
292 soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
293 /* Do other things */
294 }
295
2966. Cpufreq Table Generation
297===========================
298opp_init_cpufreq_table - cpufreq framework typically is initialized with
299 cpufreq_frequency_table_cpuinfo which is provided with the list of
300 frequencies that are available for operation. This function provides
301 a ready to use conversion routine to translate the OPP layer's internal
302 information about the available frequencies into a format readily
303 providable to cpufreq.
304
305 WARNING: Do not use this function in interrupt context.
306
307 Example:
308 soc_pm_init()
309 {
310 /* Do things */
311 r = opp_init_cpufreq_table(dev, &freq_table);
312 if (!r)
313 cpufreq_frequency_table_cpuinfo(policy, freq_table);
314 /* Do other things */
315 }
316
317 NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
318 addition to CONFIG_PM as power management feature is required to
319 dynamically scale voltage and frequency in a system.
320
3217. Data Structures
322==================
323Typically an SoC contains multiple voltage domains which are variable. Each
324domain is represented by a device pointer. The relationship to OPP can be
325represented as follows:
326SoC
327 |- device 1
328 | |- opp 1 (availability, freq, voltage)
329 | |- opp 2 ..
330 ... ...
331 | `- opp n ..
332 |- device 2
333 ...
334 `- device m
335
336OPP library maintains a internal list that the SoC framework populates and
337accessed by various functions as described above. However, the structures
338representing the actual OPPs and domains are internal to the OPP library itself
339to allow for suitable abstraction reusable across systems.
340
341struct opp - The internal data structure of OPP library which is used to
342 represent an OPP. In addition to the freq, voltage, availability
343 information, it also contains internal book keeping information required
344 for the OPP library to operate on. Pointer to this structure is
345 provided back to the users such as SoC framework to be used as a
346 identifier for OPP in the interactions with OPP layer.
347
348 WARNING: The struct opp pointer should not be parsed or modified by the
349 users. The defaults of for an instance is populated by opp_add, but the
350 availability of the OPP can be modified by opp_enable/disable functions.
351
352struct device - This is used to identify a domain to the OPP layer. The
353 nature of the device and it's implementation is left to the user of
354 OPP library such as the SoC framework.
355
356Overall, in a simplistic view, the data structure operations is represented as
357following:
358
359Initialization / modification:
360 +-----+ /- opp_enable
361opp_add --> | opp | <-------
362 | +-----+ \- opp_disable
363 \-------> domain_info(device)
364
365Search functions:
366 /-- opp_find_freq_ceil ---\ +-----+
367domain_info<---- opp_find_freq_exact -----> | opp |
368 \-- opp_find_freq_floor ---/ +-----+
369
370Retrieval functions:
371+-----+ /- opp_get_voltage
372| opp | <---
373+-----+ \- opp_get_freq
374
375domain_info <- opp_get_opp_count
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 55b859b3bc72..489e9bacd165 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -1,6 +1,7 @@
1Run-time Power Management Framework for I/O Devices 1Run-time Power Management Framework for I/O Devices
2 2
3(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4(C) 2010 Alan Stern <stern@rowland.harvard.edu>
4 5
51. Introduction 61. Introduction
6 7
@@ -157,7 +158,8 @@ rules:
157 to execute it, the other callbacks will not be executed for the same device. 158 to execute it, the other callbacks will not be executed for the same device.
158 159
159 * A request to execute ->runtime_resume() will cancel any pending or 160 * A request to execute ->runtime_resume() will cancel any pending or
160 scheduled requests to execute the other callbacks for the same device. 161 scheduled requests to execute the other callbacks for the same device,
162 except for scheduled autosuspends.
161 163
1623. Run-time PM Device Fields 1643. Run-time PM Device Fields
163 165
@@ -165,7 +167,7 @@ The following device run-time PM fields are present in 'struct dev_pm_info', as
165defined in include/linux/pm.h: 167defined in include/linux/pm.h:
166 168
167 struct timer_list suspend_timer; 169 struct timer_list suspend_timer;
168 - timer used for scheduling (delayed) suspend request 170 - timer used for scheduling (delayed) suspend and autosuspend requests
169 171
170 unsigned long timer_expires; 172 unsigned long timer_expires;
171 - timer expiration time, in jiffies (if this is different from zero, the 173 - timer expiration time, in jiffies (if this is different from zero, the
@@ -230,6 +232,28 @@ defined in include/linux/pm.h:
230 interface; it may only be modified with the help of the pm_runtime_allow() 232 interface; it may only be modified with the help of the pm_runtime_allow()
231 and pm_runtime_forbid() helper functions 233 and pm_runtime_forbid() helper functions
232 234
235 unsigned int no_callbacks;
236 - indicates that the device does not use the run-time PM callbacks (see
237 Section 8); it may be modified only by the pm_runtime_no_callbacks()
238 helper function
239
240 unsigned int use_autosuspend;
241 - indicates that the device's driver supports delayed autosuspend (see
242 Section 9); it may be modified only by the
243 pm_runtime{_dont}_use_autosuspend() helper functions
244
245 unsigned int timer_autosuspends;
246 - indicates that the PM core should attempt to carry out an autosuspend
247 when the timer expires rather than a normal suspend
248
249 int autosuspend_delay;
250 - the delay time (in milliseconds) to be used for autosuspend
251
252 unsigned long last_busy;
253 - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
254 function was last called for this device; used in calculating inactivity
255 periods for autosuspend
256
233All of the above fields are members of the 'power' member of 'struct device'. 257All of the above fields are members of the 'power' member of 'struct device'.
234 258
2354. Run-time PM Device Helper Functions 2594. Run-time PM Device Helper Functions
@@ -255,6 +279,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
255 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt 279 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
256 to suspend the device again in future 280 to suspend the device again in future
257 281
282 int pm_runtime_autosuspend(struct device *dev);
283 - same as pm_runtime_suspend() except that the autosuspend delay is taken
284 into account; if pm_runtime_autosuspend_expiration() says the delay has
285 not yet expired then an autosuspend is scheduled for the appropriate time
286 and 0 is returned
287
258 int pm_runtime_resume(struct device *dev); 288 int pm_runtime_resume(struct device *dev);
259 - execute the subsystem-level resume callback for the device; returns 0 on 289 - execute the subsystem-level resume callback for the device; returns 0 on
260 success, 1 if the device's run-time PM status was already 'active' or 290 success, 1 if the device's run-time PM status was already 'active' or
@@ -267,6 +297,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
267 device (the request is represented by a work item in pm_wq); returns 0 on 297 device (the request is represented by a work item in pm_wq); returns 0 on
268 success or error code if the request has not been queued up 298 success or error code if the request has not been queued up
269 299
300 int pm_request_autosuspend(struct device *dev);
301 - schedule the execution of the subsystem-level suspend callback for the
302 device when the autosuspend delay has expired; if the delay has already
303 expired then the work item is queued up immediately
304
270 int pm_schedule_suspend(struct device *dev, unsigned int delay); 305 int pm_schedule_suspend(struct device *dev, unsigned int delay);
271 - schedule the execution of the subsystem-level suspend callback for the 306 - schedule the execution of the subsystem-level suspend callback for the
272 device in future, where 'delay' is the time to wait before queuing up a 307 device in future, where 'delay' is the time to wait before queuing up a
@@ -298,12 +333,20 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
298 - decrement the device's usage counter 333 - decrement the device's usage counter
299 334
300 int pm_runtime_put(struct device *dev); 335 int pm_runtime_put(struct device *dev);
301 - decrement the device's usage counter, run pm_request_idle(dev) and return 336 - decrement the device's usage counter; if the result is 0 then run
302 its result 337 pm_request_idle(dev) and return its result
338
339 int pm_runtime_put_autosuspend(struct device *dev);
340 - decrement the device's usage counter; if the result is 0 then run
341 pm_request_autosuspend(dev) and return its result
303 342
304 int pm_runtime_put_sync(struct device *dev); 343 int pm_runtime_put_sync(struct device *dev);
305 - decrement the device's usage counter, run pm_runtime_idle(dev) and return 344 - decrement the device's usage counter; if the result is 0 then run
306 its result 345 pm_runtime_idle(dev) and return its result
346
347 int pm_runtime_put_sync_autosuspend(struct device *dev);
348 - decrement the device's usage counter; if the result is 0 then run
349 pm_runtime_autosuspend(dev) and return its result
307 350
308 void pm_runtime_enable(struct device *dev); 351 void pm_runtime_enable(struct device *dev);
309 - enable the run-time PM helper functions to run the device bus type's 352 - enable the run-time PM helper functions to run the device bus type's
@@ -349,19 +392,51 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
349 counter (used by the /sys/devices/.../power/control interface to 392 counter (used by the /sys/devices/.../power/control interface to
350 effectively prevent the device from being power managed at run time) 393 effectively prevent the device from being power managed at run time)
351 394
395 void pm_runtime_no_callbacks(struct device *dev);
396 - set the power.no_callbacks flag for the device and remove the run-time
397 PM attributes from /sys/devices/.../power (or prevent them from being
398 added when the device is registered)
399
400 void pm_runtime_mark_last_busy(struct device *dev);
401 - set the power.last_busy field to the current time
402
403 void pm_runtime_use_autosuspend(struct device *dev);
404 - set the power.use_autosuspend flag, enabling autosuspend delays
405
406 void pm_runtime_dont_use_autosuspend(struct device *dev);
407 - clear the power.use_autosuspend flag, disabling autosuspend delays
408
409 void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
410 - set the power.autosuspend_delay value to 'delay' (expressed in
411 milliseconds); if 'delay' is negative then run-time suspends are
412 prevented
413
414 unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
415 - calculate the time when the current autosuspend delay period will expire,
416 based on power.last_busy and power.autosuspend_delay; if the delay time
417 is 1000 ms or larger then the expiration time is rounded up to the
418 nearest second; returns 0 if the delay period has already expired or
419 power.use_autosuspend isn't set, otherwise returns the expiration time
420 in jiffies
421
352It is safe to execute the following helper functions from interrupt context: 422It is safe to execute the following helper functions from interrupt context:
353 423
354pm_request_idle() 424pm_request_idle()
425pm_request_autosuspend()
355pm_schedule_suspend() 426pm_schedule_suspend()
356pm_request_resume() 427pm_request_resume()
357pm_runtime_get_noresume() 428pm_runtime_get_noresume()
358pm_runtime_get() 429pm_runtime_get()
359pm_runtime_put_noidle() 430pm_runtime_put_noidle()
360pm_runtime_put() 431pm_runtime_put()
432pm_runtime_put_autosuspend()
433pm_runtime_enable()
361pm_suspend_ignore_children() 434pm_suspend_ignore_children()
362pm_runtime_set_active() 435pm_runtime_set_active()
363pm_runtime_set_suspended() 436pm_runtime_set_suspended()
364pm_runtime_enable() 437pm_runtime_suspended()
438pm_runtime_mark_last_busy()
439pm_runtime_autosuspend_expiration()
365 440
3665. Run-time PM Initialization, Device Probing and Removal 4415. Run-time PM Initialization, Device Probing and Removal
367 442
@@ -524,3 +599,141 @@ poweroff and run-time suspend callback, and similarly for system resume, thaw,
524restore, and run-time resume, can achieve this with the help of the 599restore, and run-time resume, can achieve this with the help of the
525UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its 600UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
526last argument to NULL). 601last argument to NULL).
602
6038. "No-Callback" Devices
604
605Some "devices" are only logical sub-devices of their parent and cannot be
606power-managed on their own. (The prototype example is a USB interface. Entire
607USB devices can go into low-power mode or send wake-up requests, but neither is
608possible for individual interfaces.) The drivers for these devices have no
609need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend()
610and ->runtime_resume() would always return 0 without doing anything else and
611->runtime_idle() would always call pm_runtime_suspend().
612
613Subsystems can tell the PM core about these devices by calling
614pm_runtime_no_callbacks(). This should be done after the device structure is
615initialized and before it is registered (although after device registration is
616also okay). The routine will set the device's power.no_callbacks flag and
617prevent the non-debugging run-time PM sysfs attributes from being created.
618
619When power.no_callbacks is set, the PM core will not invoke the
620->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
621Instead it will assume that suspends and resumes always succeed and that idle
622devices should be suspended.
623
624As a consequence, the PM core will never directly inform the device's subsystem
625or driver about run-time power changes. Instead, the driver for the device's
626parent must take responsibility for telling the device's driver when the
627parent's power state changes.
628
6299. Autosuspend, or automatically-delayed suspends
630
631Changing a device's power state isn't free; it requires both time and energy.
632A device should be put in a low-power state only when there's some reason to
633think it will remain in that state for a substantial time. A common heuristic
634says that a device which hasn't been used for a while is liable to remain
635unused; following this advice, drivers should not allow devices to be suspended
636at run-time until they have been inactive for some minimum period. Even when
637the heuristic ends up being non-optimal, it will still prevent devices from
638"bouncing" too rapidly between low-power and full-power states.
639
640The term "autosuspend" is an historical remnant. It doesn't mean that the
641device is automatically suspended (the subsystem or driver still has to call
642the appropriate PM routines); rather it means that run-time suspends will
643automatically be delayed until the desired period of inactivity has elapsed.
644
645Inactivity is determined based on the power.last_busy field. Drivers should
646call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
647typically just before calling pm_runtime_put_autosuspend(). The desired length
648of the inactivity period is a matter of policy. Subsystems can set this length
649initially by calling pm_runtime_set_autosuspend_delay(), but after device
650registration the length should be controlled by user space, using the
651/sys/devices/.../power/autosuspend_delay_ms attribute.
652
653In order to use autosuspend, subsystems or drivers must call
654pm_runtime_use_autosuspend() (preferably before registering the device), and
655thereafter they should use the various *_autosuspend() helper functions instead
656of the non-autosuspend counterparts:
657
658 Instead of: pm_runtime_suspend use: pm_runtime_autosuspend;
659 Instead of: pm_schedule_suspend use: pm_request_autosuspend;
660 Instead of: pm_runtime_put use: pm_runtime_put_autosuspend;
661 Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend.
662
663Drivers may also continue to use the non-autosuspend helper functions; they
664will behave normally, not taking the autosuspend delay into account.
665Similarly, if the power.use_autosuspend field isn't set then the autosuspend
666helper functions will behave just like the non-autosuspend counterparts.
667
668The implementation is well suited for asynchronous use in interrupt contexts.
669However such use inevitably involves races, because the PM core can't
670synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
671This synchronization must be handled by the driver, using its private lock.
672Here is a schematic pseudo-code example:
673
674 foo_read_or_write(struct foo_priv *foo, void *data)
675 {
676 lock(&foo->private_lock);
677 add_request_to_io_queue(foo, data);
678 if (foo->num_pending_requests++ == 0)
679 pm_runtime_get(&foo->dev);
680 if (!foo->is_suspended)
681 foo_process_next_request(foo);
682 unlock(&foo->private_lock);
683 }
684
685 foo_io_completion(struct foo_priv *foo, void *req)
686 {
687 lock(&foo->private_lock);
688 if (--foo->num_pending_requests == 0) {
689 pm_runtime_mark_last_busy(&foo->dev);
690 pm_runtime_put_autosuspend(&foo->dev);
691 } else {
692 foo_process_next_request(foo);
693 }
694 unlock(&foo->private_lock);
695 /* Send req result back to the user ... */
696 }
697
698 int foo_runtime_suspend(struct device *dev)
699 {
700 struct foo_priv foo = container_of(dev, ...);
701 int ret = 0;
702
703 lock(&foo->private_lock);
704 if (foo->num_pending_requests > 0) {
705 ret = -EBUSY;
706 } else {
707 /* ... suspend the device ... */
708 foo->is_suspended = 1;
709 }
710 unlock(&foo->private_lock);
711 return ret;
712 }
713
714 int foo_runtime_resume(struct device *dev)
715 {
716 struct foo_priv foo = container_of(dev, ...);
717
718 lock(&foo->private_lock);
719 /* ... resume the device ... */
720 foo->is_suspended = 0;
721 pm_runtime_mark_last_busy(&foo->dev);
722 if (foo->num_pending_requests > 0)
723 foo_process_requests(foo);
724 unlock(&foo->private_lock);
725 return 0;
726 }
727
728The important point is that after foo_io_completion() asks for an autosuspend,
729the foo_runtime_suspend() callback may race with foo_read_or_write().
730Therefore foo_runtime_suspend() has to check whether there are any pending I/O
731requests (while holding the private lock) before allowing the suspend to
732proceed.
733
734In addition, the power.autosuspend_delay field can be changed by user space at
735any time. If a driver cares about this, it can call
736pm_runtime_autosuspend_expiration() from within the ->runtime_suspend()
737callback while holding its private lock. If the function returns a nonzero
738value then the delay has not yet expired and the callback should return
739-EAGAIN.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
index 514b94fc931e..1bdfa0443773 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.txt
@@ -49,6 +49,13 @@ machine that doesn't boot) is:
49 device (lspci and /sys/devices/pci* is your friend), and see if you can 49 device (lspci and /sys/devices/pci* is your friend), and see if you can
50 fix it, disable it, or trace into its resume function. 50 fix it, disable it, or trace into its resume function.
51 51
52 If no device matches the hash (or any matches appear to be false positives),
53 the culprit may be a device from a loadable kernel module that is not loaded
54 until after the hash is checked. You can check the hash against the current
55 devices again after more modules are loaded using sysfs:
56
57 cat /sys/power/pm_trace_dev_match
58
52For example, the above happens to be the VGA device on my EVO, which I 59For example, the above happens to be the VGA device on my EVO, which I
53used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out 60used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out
54that "radeonfb" simply cannot resume that device - it tries to set the 61that "radeonfb" simply cannot resume that device - it tries to set the
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index 9d60ab717a7b..ea718891a665 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -66,7 +66,8 @@ swsusp saves the state of the machine into active swaps and then reboots or
66powerdowns. You must explicitly specify the swap partition to resume from with 66powerdowns. You must explicitly specify the swap partition to resume from with
67``resume='' kernel option. If signature is found it loads and restores saved 67``resume='' kernel option. If signature is found it loads and restores saved
68state. If the option ``noresume'' is specified as a boot parameter, it skips 68state. If the option ``noresume'' is specified as a boot parameter, it skips
69the resuming. 69the resuming. If the option ``hibernate=nocompress'' is specified as a boot
70parameter, it saves hibernation image without compression.
70 71
71In the meantime while the system is suspended you should not add/remove any 72In the meantime while the system is suspended you should not add/remove any
72of the hardware, write to the filesystems, etc. 73of the hardware, write to the filesystems, etc.
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index cbccf9a3cee4..abe46edfe5b4 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o
3obj-$(CONFIG_PM_RUNTIME) += runtime.o 3obj-$(CONFIG_PM_RUNTIME) += runtime.o
4obj-$(CONFIG_PM_OPS) += generic_ops.o 4obj-$(CONFIG_PM_OPS) += generic_ops.o
5obj-$(CONFIG_PM_TRACE_RTC) += trace.o 5obj-$(CONFIG_PM_TRACE_RTC) += trace.o
6obj-$(CONFIG_PM_OPP) += opp.o
6 7
7ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG 8ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
8ccflags-$(CONFIG_PM_VERBOSE) += -DDEBUG 9ccflags-$(CONFIG_PM_VERBOSE) += -DDEBUG
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 4b29d4981253..81f2c84697f4 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -46,7 +46,7 @@ int pm_generic_runtime_suspend(struct device *dev)
46 const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; 46 const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
47 int ret; 47 int ret;
48 48
49 ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : -EINVAL; 49 ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0;
50 50
51 return ret; 51 return ret;
52} 52}
@@ -65,7 +65,7 @@ int pm_generic_runtime_resume(struct device *dev)
65 const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; 65 const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
66 int ret; 66 int ret;
67 67
68 ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : -EINVAL; 68 ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0;
69 69
70 return ret; 70 return ret;
71} 71}
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 276d5a701dc3..31b526661ec4 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -51,6 +51,8 @@ static pm_message_t pm_transition;
51 */ 51 */
52static bool transition_started; 52static bool transition_started;
53 53
54static int async_error;
55
54/** 56/**
55 * device_pm_init - Initialize the PM-related part of a device object. 57 * device_pm_init - Initialize the PM-related part of a device object.
56 * @dev: Device object being initialized. 58 * @dev: Device object being initialized.
@@ -60,7 +62,8 @@ void device_pm_init(struct device *dev)
60 dev->power.status = DPM_ON; 62 dev->power.status = DPM_ON;
61 init_completion(&dev->power.completion); 63 init_completion(&dev->power.completion);
62 complete_all(&dev->power.completion); 64 complete_all(&dev->power.completion);
63 dev->power.wakeup_count = 0; 65 dev->power.wakeup = NULL;
66 spin_lock_init(&dev->power.lock);
64 pm_runtime_init(dev); 67 pm_runtime_init(dev);
65} 68}
66 69
@@ -120,6 +123,7 @@ void device_pm_remove(struct device *dev)
120 mutex_lock(&dpm_list_mtx); 123 mutex_lock(&dpm_list_mtx);
121 list_del_init(&dev->power.entry); 124 list_del_init(&dev->power.entry);
122 mutex_unlock(&dpm_list_mtx); 125 mutex_unlock(&dpm_list_mtx);
126 device_wakeup_disable(dev);
123 pm_runtime_remove(dev); 127 pm_runtime_remove(dev);
124} 128}
125 129
@@ -407,7 +411,7 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
407static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) 411static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
408{ 412{
409 ktime_t calltime; 413 ktime_t calltime;
410 s64 usecs64; 414 u64 usecs64;
411 int usecs; 415 int usecs;
412 416
413 calltime = ktime_get(); 417 calltime = ktime_get();
@@ -600,6 +604,7 @@ static void dpm_resume(pm_message_t state)
600 INIT_LIST_HEAD(&list); 604 INIT_LIST_HEAD(&list);
601 mutex_lock(&dpm_list_mtx); 605 mutex_lock(&dpm_list_mtx);
602 pm_transition = state; 606 pm_transition = state;
607 async_error = 0;
603 608
604 list_for_each_entry(dev, &dpm_list, power.entry) { 609 list_for_each_entry(dev, &dpm_list, power.entry) {
605 if (dev->power.status < DPM_OFF) 610 if (dev->power.status < DPM_OFF)
@@ -829,8 +834,6 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
829 return error; 834 return error;
830} 835}
831 836
832static int async_error;
833
834/** 837/**
835 * device_suspend - Execute "suspend" callbacks for given device. 838 * device_suspend - Execute "suspend" callbacks for given device.
836 * @dev: Device to handle. 839 * @dev: Device to handle.
@@ -885,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
885 device_unlock(dev); 888 device_unlock(dev);
886 complete_all(&dev->power.completion); 889 complete_all(&dev->power.completion);
887 890
891 if (error)
892 async_error = error;
893
888 return error; 894 return error;
889} 895}
890 896
@@ -894,10 +900,8 @@ static void async_suspend(void *data, async_cookie_t cookie)
894 int error; 900 int error;
895 901
896 error = __device_suspend(dev, pm_transition, true); 902 error = __device_suspend(dev, pm_transition, true);
897 if (error) { 903 if (error)
898 pm_dev_err(dev, pm_transition, " async", error); 904 pm_dev_err(dev, pm_transition, " async", error);
899 async_error = error;
900 }
901 905
902 put_device(dev); 906 put_device(dev);
903} 907}
@@ -1085,8 +1089,9 @@ EXPORT_SYMBOL_GPL(__suspend_report_result);
1085 * @dev: Device to wait for. 1089 * @dev: Device to wait for.
1086 * @subordinate: Device that needs to wait for @dev. 1090 * @subordinate: Device that needs to wait for @dev.
1087 */ 1091 */
1088void device_pm_wait_for_dev(struct device *subordinate, struct device *dev) 1092int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
1089{ 1093{
1090 dpm_wait(dev, subordinate->power.async_suspend); 1094 dpm_wait(dev, subordinate->power.async_suspend);
1095 return async_error;
1091} 1096}
1092EXPORT_SYMBOL_GPL(device_pm_wait_for_dev); 1097EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
new file mode 100644
index 000000000000..2bb9b4cf59d7
--- /dev/null
+++ b/drivers/base/power/opp.c
@@ -0,0 +1,628 @@
1/*
2 * Generic OPP Interface
3 *
4 * Copyright (C) 2009-2010 Texas Instruments Incorporated.
5 * Nishanth Menon
6 * Romit Dasgupta
7 * Kevin Hilman
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/errno.h>
16#include <linux/err.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/cpufreq.h>
20#include <linux/list.h>
21#include <linux/rculist.h>
22#include <linux/rcupdate.h>
23#include <linux/opp.h>
24
25/*
26 * Internal data structure organization with the OPP layer library is as
27 * follows:
28 * dev_opp_list (root)
29 * |- device 1 (represents voltage domain 1)
30 * | |- opp 1 (availability, freq, voltage)
31 * | |- opp 2 ..
32 * ... ...
33 * | `- opp n ..
34 * |- device 2 (represents the next voltage domain)
35 * ...
36 * `- device m (represents mth voltage domain)
37 * device 1, 2.. are represented by dev_opp structure while each opp
38 * is represented by the opp structure.
39 */
40
41/**
42 * struct opp - Generic OPP description structure
43 * @node: opp list node. The nodes are maintained throughout the lifetime
44 * of boot. It is expected only an optimal set of OPPs are
45 * added to the library by the SoC framework.
46 * RCU usage: opp list is traversed with RCU locks. node
47 * modification is possible realtime, hence the modifications
48 * are protected by the dev_opp_list_lock for integrity.
49 * IMPORTANT: the opp nodes should be maintained in increasing
50 * order.
51 * @available: true/false - marks if this OPP as available or not
52 * @rate: Frequency in hertz
53 * @u_volt: Nominal voltage in microvolts corresponding to this OPP
54 * @dev_opp: points back to the device_opp struct this opp belongs to
55 *
56 * This structure stores the OPP information for a given device.
57 */
58struct opp {
59 struct list_head node;
60
61 bool available;
62 unsigned long rate;
63 unsigned long u_volt;
64
65 struct device_opp *dev_opp;
66};
67
68/**
69 * struct device_opp - Device opp structure
70 * @node: list node - contains the devices with OPPs that
71 * have been registered. Nodes once added are not modified in this
72 * list.
73 * RCU usage: nodes are not modified in the list of device_opp,
74 * however addition is possible and is secured by dev_opp_list_lock
75 * @dev: device pointer
76 * @opp_list: list of opps
77 *
78 * This is an internal data structure maintaining the link to opps attached to
79 * a device. This structure is not meant to be shared to users as it is
80 * meant for book keeping and private to OPP library
81 */
82struct device_opp {
83 struct list_head node;
84
85 struct device *dev;
86 struct list_head opp_list;
87};
88
89/*
90 * The root of the list of all devices. All device_opp structures branch off
91 * from here, with each device_opp containing the list of opp it supports in
92 * various states of availability.
93 */
94static LIST_HEAD(dev_opp_list);
95/* Lock to allow exclusive modification to the device and opp lists */
96static DEFINE_MUTEX(dev_opp_list_lock);
97
98/**
99 * find_device_opp() - find device_opp struct using device pointer
100 * @dev: device pointer used to lookup device OPPs
101 *
102 * Search list of device OPPs for one containing matching device. Does a RCU
103 * reader operation to grab the pointer needed.
104 *
105 * Returns pointer to 'struct device_opp' if found, otherwise -ENODEV or
106 * -EINVAL based on type of error.
107 *
108 * Locking: This function must be called under rcu_read_lock(). device_opp
109 * is a RCU protected pointer. This means that device_opp is valid as long
110 * as we are under RCU lock.
111 */
112static struct device_opp *find_device_opp(struct device *dev)
113{
114 struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
115
116 if (unlikely(IS_ERR_OR_NULL(dev))) {
117 pr_err("%s: Invalid parameters\n", __func__);
118 return ERR_PTR(-EINVAL);
119 }
120
121 list_for_each_entry_rcu(tmp_dev_opp, &dev_opp_list, node) {
122 if (tmp_dev_opp->dev == dev) {
123 dev_opp = tmp_dev_opp;
124 break;
125 }
126 }
127
128 return dev_opp;
129}
130
131/**
132 * opp_get_voltage() - Gets the voltage corresponding to an available opp
133 * @opp: opp for which voltage has to be returned for
134 *
135 * Return voltage in micro volt corresponding to the opp, else
136 * return 0
137 *
138 * Locking: This function must be called under rcu_read_lock(). opp is a rcu
139 * protected pointer. This means that opp which could have been fetched by
140 * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
141 * under RCU lock. The pointer returned by the opp_find_freq family must be
142 * used in the same section as the usage of this function with the pointer
143 * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
144 * pointer.
145 */
146unsigned long opp_get_voltage(struct opp *opp)
147{
148 struct opp *tmp_opp;
149 unsigned long v = 0;
150
151 tmp_opp = rcu_dereference(opp);
152 if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
153 pr_err("%s: Invalid parameters\n", __func__);
154 else
155 v = tmp_opp->u_volt;
156
157 return v;
158}
159
160/**
161 * opp_get_freq() - Gets the frequency corresponding to an available opp
162 * @opp: opp for which frequency has to be returned for
163 *
164 * Return frequency in hertz corresponding to the opp, else
165 * return 0
166 *
167 * Locking: This function must be called under rcu_read_lock(). opp is a rcu
168 * protected pointer. This means that opp which could have been fetched by
169 * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
170 * under RCU lock. The pointer returned by the opp_find_freq family must be
171 * used in the same section as the usage of this function with the pointer
172 * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
173 * pointer.
174 */
175unsigned long opp_get_freq(struct opp *opp)
176{
177 struct opp *tmp_opp;
178 unsigned long f = 0;
179
180 tmp_opp = rcu_dereference(opp);
181 if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
182 pr_err("%s: Invalid parameters\n", __func__);
183 else
184 f = tmp_opp->rate;
185
186 return f;
187}
188
189/**
190 * opp_get_opp_count() - Get number of opps available in the opp list
191 * @dev: device for which we do this operation
192 *
193 * This function returns the number of available opps if there are any,
194 * else returns 0 if none or the corresponding error value.
195 *
196 * Locking: This function must be called under rcu_read_lock(). This function
197 * internally references two RCU protected structures: device_opp and opp which
198 * are safe as long as we are under a common RCU locked section.
199 */
200int opp_get_opp_count(struct device *dev)
201{
202 struct device_opp *dev_opp;
203 struct opp *temp_opp;
204 int count = 0;
205
206 dev_opp = find_device_opp(dev);
207 if (IS_ERR(dev_opp)) {
208 int r = PTR_ERR(dev_opp);
209 dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
210 return r;
211 }
212
213 list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
214 if (temp_opp->available)
215 count++;
216 }
217
218 return count;
219}
220
221/**
222 * opp_find_freq_exact() - search for an exact frequency
223 * @dev: device for which we do this operation
224 * @freq: frequency to search for
225 * @is_available: true/false - match for available opp
226 *
227 * Searches for exact match in the opp list and returns pointer to the matching
228 * opp if found, else returns ERR_PTR in case of error and should be handled
229 * using IS_ERR.
230 *
231 * Note: available is a modifier for the search. if available=true, then the
232 * match is for exact matching frequency and is available in the stored OPP
233 * table. if false, the match is for exact frequency which is not available.
234 *
235 * This provides a mechanism to enable an opp which is not available currently
236 * or the opposite as well.
237 *
238 * Locking: This function must be called under rcu_read_lock(). opp is a rcu
239 * protected pointer. The reason for the same is that the opp pointer which is
240 * returned will remain valid for use with opp_get_{voltage, freq} only while
241 * under the locked area. The pointer returned must be used prior to unlocking
242 * with rcu_read_unlock() to maintain the integrity of the pointer.
243 */
244struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
245 bool available)
246{
247 struct device_opp *dev_opp;
248 struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
249
250 dev_opp = find_device_opp(dev);
251 if (IS_ERR(dev_opp)) {
252 int r = PTR_ERR(dev_opp);
253 dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
254 return ERR_PTR(r);
255 }
256
257 list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
258 if (temp_opp->available == available &&
259 temp_opp->rate == freq) {
260 opp = temp_opp;
261 break;
262 }
263 }
264
265 return opp;
266}
267
268/**
269 * opp_find_freq_ceil() - Search for an rounded ceil freq
270 * @dev: device for which we do this operation
271 * @freq: Start frequency
272 *
273 * Search for the matching ceil *available* OPP from a starting freq
274 * for a device.
275 *
276 * Returns matching *opp and refreshes *freq accordingly, else returns
277 * ERR_PTR in case of error and should be handled using IS_ERR.
278 *
279 * Locking: This function must be called under rcu_read_lock(). opp is a rcu
280 * protected pointer. The reason for the same is that the opp pointer which is
281 * returned will remain valid for use with opp_get_{voltage, freq} only while
282 * under the locked area. The pointer returned must be used prior to unlocking
283 * with rcu_read_unlock() to maintain the integrity of the pointer.
284 */
285struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
286{
287 struct device_opp *dev_opp;
288 struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
289
290 if (!dev || !freq) {
291 dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
292 return ERR_PTR(-EINVAL);
293 }
294
295 dev_opp = find_device_opp(dev);
296 if (IS_ERR(dev_opp))
297 return opp;
298
299 list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
300 if (temp_opp->available && temp_opp->rate >= *freq) {
301 opp = temp_opp;
302 *freq = opp->rate;
303 break;
304 }
305 }
306
307 return opp;
308}
309
310/**
311 * opp_find_freq_floor() - Search for a rounded floor freq
312 * @dev: device for which we do this operation
313 * @freq: Start frequency
314 *
315 * Search for the matching floor *available* OPP from a starting freq
316 * for a device.
317 *
318 * Returns matching *opp and refreshes *freq accordingly, else returns
319 * ERR_PTR in case of error and should be handled using IS_ERR.
320 *
321 * Locking: This function must be called under rcu_read_lock(). opp is a rcu
322 * protected pointer. The reason for the same is that the opp pointer which is
323 * returned will remain valid for use with opp_get_{voltage, freq} only while
324 * under the locked area. The pointer returned must be used prior to unlocking
325 * with rcu_read_unlock() to maintain the integrity of the pointer.
326 */
327struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
328{
329 struct device_opp *dev_opp;
330 struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
331
332 if (!dev || !freq) {
333 dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
334 return ERR_PTR(-EINVAL);
335 }
336
337 dev_opp = find_device_opp(dev);
338 if (IS_ERR(dev_opp))
339 return opp;
340
341 list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
342 if (temp_opp->available) {
343 /* go to the next node, before choosing prev */
344 if (temp_opp->rate > *freq)
345 break;
346 else
347 opp = temp_opp;
348 }
349 }
350 if (!IS_ERR(opp))
351 *freq = opp->rate;
352
353 return opp;
354}
355
356/**
357 * opp_add() - Add an OPP table from a table definitions
358 * @dev: device for which we do this operation
359 * @freq: Frequency in Hz for this OPP
360 * @u_volt: Voltage in uVolts for this OPP
361 *
362 * This function adds an opp definition to the opp list and returns status.
363 * The opp is made available by default and it can be controlled using
364 * opp_enable/disable functions.
365 *
366 * Locking: The internal device_opp and opp structures are RCU protected.
367 * Hence this function internally uses RCU updater strategy with mutex locks
368 * to keep the integrity of the internal data structures. Callers should ensure
369 * that this function is *NOT* called under RCU protection or in contexts where
370 * mutex cannot be locked.
371 */
372int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
373{
374 struct device_opp *dev_opp = NULL;
375 struct opp *opp, *new_opp;
376 struct list_head *head;
377
378 /* allocate new OPP node */
379 new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
380 if (!new_opp) {
381 dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
382 return -ENOMEM;
383 }
384
385 /* Hold our list modification lock here */
386 mutex_lock(&dev_opp_list_lock);
387
388 /* Check for existing list for 'dev' */
389 dev_opp = find_device_opp(dev);
390 if (IS_ERR(dev_opp)) {
391 /*
392 * Allocate a new device OPP table. In the infrequent case
393 * where a new device is needed to be added, we pay this
394 * penalty.
395 */
396 dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
397 if (!dev_opp) {
398 mutex_unlock(&dev_opp_list_lock);
399 kfree(new_opp);
400 dev_warn(dev,
401 "%s: Unable to create device OPP structure\n",
402 __func__);
403 return -ENOMEM;
404 }
405
406 dev_opp->dev = dev;
407 INIT_LIST_HEAD(&dev_opp->opp_list);
408
409 /* Secure the device list modification */
410 list_add_rcu(&dev_opp->node, &dev_opp_list);
411 }
412
413 /* populate the opp table */
414 new_opp->dev_opp = dev_opp;
415 new_opp->rate = freq;
416 new_opp->u_volt = u_volt;
417 new_opp->available = true;
418
419 /* Insert new OPP in order of increasing frequency */
420 head = &dev_opp->opp_list;
421 list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
422 if (new_opp->rate < opp->rate)
423 break;
424 else
425 head = &opp->node;
426 }
427
428 list_add_rcu(&new_opp->node, head);
429 mutex_unlock(&dev_opp_list_lock);
430
431 return 0;
432}
433
434/**
435 * opp_set_availability() - helper to set the availability of an opp
436 * @dev: device for which we do this operation
437 * @freq: OPP frequency to modify availability
438 * @availability_req: availability status requested for this opp
439 *
440 * Set the availability of an OPP with an RCU operation, opp_{enable,disable}
441 * share a common logic which is isolated here.
442 *
443 * Returns -EINVAL for bad pointers, -ENOMEM if no memory available for the
444 * copy operation, returns 0 if no modifcation was done OR modification was
445 * successful.
446 *
447 * Locking: The internal device_opp and opp structures are RCU protected.
448 * Hence this function internally uses RCU updater strategy with mutex locks to
449 * keep the integrity of the internal data structures. Callers should ensure
450 * that this function is *NOT* called under RCU protection or in contexts where
451 * mutex locking or synchronize_rcu() blocking calls cannot be used.
452 */
453static int opp_set_availability(struct device *dev, unsigned long freq,
454 bool availability_req)
455{
456 struct device_opp *tmp_dev_opp, *dev_opp = NULL;
457 struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
458 int r = 0;
459
460 /* keep the node allocated */
461 new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
462 if (!new_opp) {
463 dev_warn(dev, "%s: Unable to create OPP\n", __func__);
464 return -ENOMEM;
465 }
466
467 mutex_lock(&dev_opp_list_lock);
468
469 /* Find the device_opp */
470 list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
471 if (dev == tmp_dev_opp->dev) {
472 dev_opp = tmp_dev_opp;
473 break;
474 }
475 }
476 if (IS_ERR(dev_opp)) {
477 r = PTR_ERR(dev_opp);
478 dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
479 goto unlock;
480 }
481
482 /* Do we have the frequency? */
483 list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
484 if (tmp_opp->rate == freq) {
485 opp = tmp_opp;
486 break;
487 }
488 }
489 if (IS_ERR(opp)) {
490 r = PTR_ERR(opp);
491 goto unlock;
492 }
493
494 /* Is update really needed? */
495 if (opp->available == availability_req)
496 goto unlock;
497 /* copy the old data over */
498 *new_opp = *opp;
499
500 /* plug in new node */
501 new_opp->available = availability_req;
502
503 list_replace_rcu(&opp->node, &new_opp->node);
504 mutex_unlock(&dev_opp_list_lock);
505 synchronize_rcu();
506
507 /* clean up old opp */
508 new_opp = opp;
509 goto out;
510
511unlock:
512 mutex_unlock(&dev_opp_list_lock);
513out:
514 kfree(new_opp);
515 return r;
516}
517
518/**
519 * opp_enable() - Enable a specific OPP
520 * @dev: device for which we do this operation
521 * @freq: OPP frequency to enable
522 *
523 * Enables a provided opp. If the operation is valid, this returns 0, else the
524 * corresponding error value. It is meant to be used for users an OPP available
525 * after being temporarily made unavailable with opp_disable.
526 *
527 * Locking: The internal device_opp and opp structures are RCU protected.
528 * Hence this function indirectly uses RCU and mutex locks to keep the
529 * integrity of the internal data structures. Callers should ensure that
530 * this function is *NOT* called under RCU protection or in contexts where
531 * mutex locking or synchronize_rcu() blocking calls cannot be used.
532 */
533int opp_enable(struct device *dev, unsigned long freq)
534{
535 return opp_set_availability(dev, freq, true);
536}
537
538/**
539 * opp_disable() - Disable a specific OPP
540 * @dev: device for which we do this operation
541 * @freq: OPP frequency to disable
542 *
543 * Disables a provided opp. If the operation is valid, this returns
544 * 0, else the corresponding error value. It is meant to be a temporary
545 * control by users to make this OPP not available until the circumstances are
546 * right to make it available again (with a call to opp_enable).
547 *
548 * Locking: The internal device_opp and opp structures are RCU protected.
549 * Hence this function indirectly uses RCU and mutex locks to keep the
550 * integrity of the internal data structures. Callers should ensure that
551 * this function is *NOT* called under RCU protection or in contexts where
552 * mutex locking or synchronize_rcu() blocking calls cannot be used.
553 */
554int opp_disable(struct device *dev, unsigned long freq)
555{
556 return opp_set_availability(dev, freq, false);
557}
558
559#ifdef CONFIG_CPU_FREQ
560/**
561 * opp_init_cpufreq_table() - create a cpufreq table for a device
562 * @dev: device for which we do this operation
563 * @table: Cpufreq table returned back to caller
564 *
565 * Generate a cpufreq table for a provided device- this assumes that the
566 * opp list is already initialized and ready for usage.
567 *
568 * This function allocates required memory for the cpufreq table. It is
569 * expected that the caller does the required maintenance such as freeing
570 * the table as required.
571 *
572 * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
573 * if no memory available for the operation (table is not populated), returns 0
574 * if successful and table is populated.
575 *
576 * WARNING: It is important for the callers to ensure refreshing their copy of
577 * the table if any of the mentioned functions have been invoked in the interim.
578 *
579 * Locking: The internal device_opp and opp structures are RCU protected.
580 * To simplify the logic, we pretend we are updater and hold relevant mutex here
581 * Callers should ensure that this function is *NOT* called under RCU protection
582 * or in contexts where mutex locking cannot be used.
583 */
584int opp_init_cpufreq_table(struct device *dev,
585 struct cpufreq_frequency_table **table)
586{
587 struct device_opp *dev_opp;
588 struct opp *opp;
589 struct cpufreq_frequency_table *freq_table;
590 int i = 0;
591
592 /* Pretend as if I am an updater */
593 mutex_lock(&dev_opp_list_lock);
594
595 dev_opp = find_device_opp(dev);
596 if (IS_ERR(dev_opp)) {
597 int r = PTR_ERR(dev_opp);
598 mutex_unlock(&dev_opp_list_lock);
599 dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r);
600 return r;
601 }
602
603 freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
604 (opp_get_opp_count(dev) + 1), GFP_KERNEL);
605 if (!freq_table) {
606 mutex_unlock(&dev_opp_list_lock);
607 dev_warn(dev, "%s: Unable to allocate frequency table\n",
608 __func__);
609 return -ENOMEM;
610 }
611
612 list_for_each_entry(opp, &dev_opp->opp_list, node) {
613 if (opp->available) {
614 freq_table[i].index = i;
615 freq_table[i].frequency = opp->rate / 1000;
616 i++;
617 }
618 }
619 mutex_unlock(&dev_opp_list_lock);
620
621 freq_table[i].index = i;
622 freq_table[i].frequency = CPUFREQ_TABLE_END;
623
624 *table = &freq_table[0];
625
626 return 0;
627}
628#endif /* CONFIG_CPU_FREQ */
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index c0bd03c83b9c..698dde742587 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -34,6 +34,7 @@ extern void device_pm_move_last(struct device *);
34 34
35static inline void device_pm_init(struct device *dev) 35static inline void device_pm_init(struct device *dev)
36{ 36{
37 spin_lock_init(&dev->power.lock);
37 pm_runtime_init(dev); 38 pm_runtime_init(dev);
38} 39}
39 40
@@ -59,6 +60,7 @@ static inline void device_pm_move_last(struct device *dev) {}
59 60
60extern int dpm_sysfs_add(struct device *); 61extern int dpm_sysfs_add(struct device *);
61extern void dpm_sysfs_remove(struct device *); 62extern void dpm_sysfs_remove(struct device *);
63extern void rpm_sysfs_remove(struct device *);
62 64
63#else /* CONFIG_PM */ 65#else /* CONFIG_PM */
64 66
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b78c401ffa73..1dd8676d7f55 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -2,17 +2,55 @@
2 * drivers/base/power/runtime.c - Helper functions for device run-time PM 2 * drivers/base/power/runtime.c - Helper functions for device run-time PM
3 * 3 *
4 * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 4 * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
5 * 6 *
6 * This file is released under the GPLv2. 7 * This file is released under the GPLv2.
7 */ 8 */
8 9
9#include <linux/sched.h> 10#include <linux/sched.h>
10#include <linux/pm_runtime.h> 11#include <linux/pm_runtime.h>
11#include <linux/jiffies.h> 12#include "power.h"
12 13
13static int __pm_runtime_resume(struct device *dev, bool from_wq); 14static int rpm_resume(struct device *dev, int rpmflags);
14static int __pm_request_idle(struct device *dev); 15static int rpm_suspend(struct device *dev, int rpmflags);
15static int __pm_request_resume(struct device *dev); 16
17/**
18 * update_pm_runtime_accounting - Update the time accounting of power states
19 * @dev: Device to update the accounting for
20 *
21 * In order to be able to have time accounting of the various power states
22 * (as used by programs such as PowerTOP to show the effectiveness of runtime
23 * PM), we need to track the time spent in each state.
24 * update_pm_runtime_accounting must be called each time before the
25 * runtime_status field is updated, to account the time in the old state
26 * correctly.
27 */
28void update_pm_runtime_accounting(struct device *dev)
29{
30 unsigned long now = jiffies;
31 int delta;
32
33 delta = now - dev->power.accounting_timestamp;
34
35 if (delta < 0)
36 delta = 0;
37
38 dev->power.accounting_timestamp = now;
39
40 if (dev->power.disable_depth > 0)
41 return;
42
43 if (dev->power.runtime_status == RPM_SUSPENDED)
44 dev->power.suspended_jiffies += delta;
45 else
46 dev->power.active_jiffies += delta;
47}
48
49static void __update_runtime_status(struct device *dev, enum rpm_status status)
50{
51 update_pm_runtime_accounting(dev);
52 dev->power.runtime_status = status;
53}
16 54
17/** 55/**
18 * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. 56 * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
@@ -40,62 +78,154 @@ static void pm_runtime_cancel_pending(struct device *dev)
40 dev->power.request = RPM_REQ_NONE; 78 dev->power.request = RPM_REQ_NONE;
41} 79}
42 80
43/** 81/*
44 * __pm_runtime_idle - Notify device bus type if the device can be suspended. 82 * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time.
45 * @dev: Device to notify the bus type about. 83 * @dev: Device to handle.
46 * 84 *
47 * This function must be called under dev->power.lock with interrupts disabled. 85 * Compute the autosuspend-delay expiration time based on the device's
86 * power.last_busy time. If the delay has already expired or is disabled
87 * (negative) or the power.use_autosuspend flag isn't set, return 0.
88 * Otherwise return the expiration time in jiffies (adjusted to be nonzero).
89 *
90 * This function may be called either with or without dev->power.lock held.
91 * Either way it can be racy, since power.last_busy may be updated at any time.
48 */ 92 */
49static int __pm_runtime_idle(struct device *dev) 93unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
50 __releases(&dev->power.lock) __acquires(&dev->power.lock) 94{
95 int autosuspend_delay;
96 long elapsed;
97 unsigned long last_busy;
98 unsigned long expires = 0;
99
100 if (!dev->power.use_autosuspend)
101 goto out;
102
103 autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay);
104 if (autosuspend_delay < 0)
105 goto out;
106
107 last_busy = ACCESS_ONCE(dev->power.last_busy);
108 elapsed = jiffies - last_busy;
109 if (elapsed < 0)
110 goto out; /* jiffies has wrapped around. */
111
112 /*
113 * If the autosuspend_delay is >= 1 second, align the timer by rounding
114 * up to the nearest second.
115 */
116 expires = last_busy + msecs_to_jiffies(autosuspend_delay);
117 if (autosuspend_delay >= 1000)
118 expires = round_jiffies(expires);
119 expires += !expires;
120 if (elapsed >= expires - last_busy)
121 expires = 0; /* Already expired. */
122
123 out:
124 return expires;
125}
126EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
127
128/**
129 * rpm_check_suspend_allowed - Test whether a device may be suspended.
130 * @dev: Device to test.
131 */
132static int rpm_check_suspend_allowed(struct device *dev)
51{ 133{
52 int retval = 0; 134 int retval = 0;
53 135
54 if (dev->power.runtime_error) 136 if (dev->power.runtime_error)
55 retval = -EINVAL; 137 retval = -EINVAL;
56 else if (dev->power.idle_notification)
57 retval = -EINPROGRESS;
58 else if (atomic_read(&dev->power.usage_count) > 0 138 else if (atomic_read(&dev->power.usage_count) > 0
59 || dev->power.disable_depth > 0 139 || dev->power.disable_depth > 0)
60 || dev->power.runtime_status != RPM_ACTIVE)
61 retval = -EAGAIN; 140 retval = -EAGAIN;
62 else if (!pm_children_suspended(dev)) 141 else if (!pm_children_suspended(dev))
63 retval = -EBUSY; 142 retval = -EBUSY;
143
144 /* Pending resume requests take precedence over suspends. */
145 else if ((dev->power.deferred_resume
146 && dev->power.status == RPM_SUSPENDING)
147 || (dev->power.request_pending
148 && dev->power.request == RPM_REQ_RESUME))
149 retval = -EAGAIN;
150 else if (dev->power.runtime_status == RPM_SUSPENDED)
151 retval = 1;
152
153 return retval;
154}
155
156/**
157 * rpm_idle - Notify device bus type if the device can be suspended.
158 * @dev: Device to notify the bus type about.
159 * @rpmflags: Flag bits.
160 *
161 * Check if the device's run-time PM status allows it to be suspended. If
162 * another idle notification has been started earlier, return immediately. If
163 * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
164 * run the ->runtime_idle() callback directly.
165 *
166 * This function must be called under dev->power.lock with interrupts disabled.
167 */
168static int rpm_idle(struct device *dev, int rpmflags)
169{
170 int (*callback)(struct device *);
171 int retval;
172
173 retval = rpm_check_suspend_allowed(dev);
174 if (retval < 0)
175 ; /* Conditions are wrong. */
176
177 /* Idle notifications are allowed only in the RPM_ACTIVE state. */
178 else if (dev->power.runtime_status != RPM_ACTIVE)
179 retval = -EAGAIN;
180
181 /*
182 * Any pending request other than an idle notification takes
183 * precedence over us, except that the timer may be running.
184 */
185 else if (dev->power.request_pending &&
186 dev->power.request > RPM_REQ_IDLE)
187 retval = -EAGAIN;
188
189 /* Act as though RPM_NOWAIT is always set. */
190 else if (dev->power.idle_notification)
191 retval = -EINPROGRESS;
64 if (retval) 192 if (retval)
65 goto out; 193 goto out;
66 194
67 if (dev->power.request_pending) { 195 /* Pending requests need to be canceled. */
68 /* 196 dev->power.request = RPM_REQ_NONE;
69 * If an idle notification request is pending, cancel it. Any 197
70 * other pending request takes precedence over us. 198 if (dev->power.no_callbacks) {
71 */ 199 /* Assume ->runtime_idle() callback would have suspended. */
72 if (dev->power.request == RPM_REQ_IDLE) { 200 retval = rpm_suspend(dev, rpmflags);
73 dev->power.request = RPM_REQ_NONE; 201 goto out;
74 } else if (dev->power.request != RPM_REQ_NONE) { 202 }
75 retval = -EAGAIN; 203
76 goto out; 204 /* Carry out an asynchronous or a synchronous idle notification. */
205 if (rpmflags & RPM_ASYNC) {
206 dev->power.request = RPM_REQ_IDLE;
207 if (!dev->power.request_pending) {
208 dev->power.request_pending = true;
209 queue_work(pm_wq, &dev->power.work);
77 } 210 }
211 goto out;
78 } 212 }
79 213
80 dev->power.idle_notification = true; 214 dev->power.idle_notification = true;
81 215
82 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) { 216 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle)
83 spin_unlock_irq(&dev->power.lock); 217 callback = dev->bus->pm->runtime_idle;
84 218 else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle)
85 dev->bus->pm->runtime_idle(dev); 219 callback = dev->type->pm->runtime_idle;
86 220 else if (dev->class && dev->class->pm)
87 spin_lock_irq(&dev->power.lock); 221 callback = dev->class->pm->runtime_idle;
88 } else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) { 222 else
89 spin_unlock_irq(&dev->power.lock); 223 callback = NULL;
90
91 dev->type->pm->runtime_idle(dev);
92 224
93 spin_lock_irq(&dev->power.lock); 225 if (callback) {
94 } else if (dev->class && dev->class->pm
95 && dev->class->pm->runtime_idle) {
96 spin_unlock_irq(&dev->power.lock); 226 spin_unlock_irq(&dev->power.lock);
97 227
98 dev->class->pm->runtime_idle(dev); 228 callback(dev);
99 229
100 spin_lock_irq(&dev->power.lock); 230 spin_lock_irq(&dev->power.lock);
101 } 231 }
@@ -108,113 +238,99 @@ static int __pm_runtime_idle(struct device *dev)
108} 238}
109 239
110/** 240/**
111 * pm_runtime_idle - Notify device bus type if the device can be suspended. 241 * rpm_callback - Run a given runtime PM callback for a given device.
112 * @dev: Device to notify the bus type about. 242 * @cb: Runtime PM callback to run.
243 * @dev: Device to run the callback for.
113 */ 244 */
114int pm_runtime_idle(struct device *dev) 245static int rpm_callback(int (*cb)(struct device *), struct device *dev)
246 __releases(&dev->power.lock) __acquires(&dev->power.lock)
115{ 247{
116 int retval; 248 int retval;
117 249
118 spin_lock_irq(&dev->power.lock); 250 if (!cb)
119 retval = __pm_runtime_idle(dev); 251 return -ENOSYS;
120 spin_unlock_irq(&dev->power.lock);
121 252
122 return retval; 253 spin_unlock_irq(&dev->power.lock);
123}
124EXPORT_SYMBOL_GPL(pm_runtime_idle);
125
126
127/**
128 * update_pm_runtime_accounting - Update the time accounting of power states
129 * @dev: Device to update the accounting for
130 *
131 * In order to be able to have time accounting of the various power states
132 * (as used by programs such as PowerTOP to show the effectiveness of runtime
133 * PM), we need to track the time spent in each state.
134 * update_pm_runtime_accounting must be called each time before the
135 * runtime_status field is updated, to account the time in the old state
136 * correctly.
137 */
138void update_pm_runtime_accounting(struct device *dev)
139{
140 unsigned long now = jiffies;
141 int delta;
142
143 delta = now - dev->power.accounting_timestamp;
144
145 if (delta < 0)
146 delta = 0;
147 254
148 dev->power.accounting_timestamp = now; 255 retval = cb(dev);
149 256
150 if (dev->power.disable_depth > 0) 257 spin_lock_irq(&dev->power.lock);
151 return; 258 dev->power.runtime_error = retval;
152
153 if (dev->power.runtime_status == RPM_SUSPENDED)
154 dev->power.suspended_jiffies += delta;
155 else
156 dev->power.active_jiffies += delta;
157}
158 259
159static void __update_runtime_status(struct device *dev, enum rpm_status status) 260 return retval;
160{
161 update_pm_runtime_accounting(dev);
162 dev->power.runtime_status = status;
163} 261}
164 262
165/** 263/**
166 * __pm_runtime_suspend - Carry out run-time suspend of given device. 264 * rpm_suspend - Carry out run-time suspend of given device.
167 * @dev: Device to suspend. 265 * @dev: Device to suspend.
168 * @from_wq: If set, the function has been called via pm_wq. 266 * @rpmflags: Flag bits.
169 * 267 *
170 * Check if the device can be suspended and run the ->runtime_suspend() callback 268 * Check if the device's run-time PM status allows it to be suspended. If
171 * provided by its bus type. If another suspend has been started earlier, wait 269 * another suspend has been started earlier, either return immediately or wait
172 * for it to finish. If an idle notification or suspend request is pending or 270 * for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC flags. Cancel a
173 * scheduled, cancel it. 271 * pending idle notification. If the RPM_ASYNC flag is set then queue a
272 * suspend request; otherwise run the ->runtime_suspend() callback directly.
273 * If a deferred resume was requested while the callback was running then carry
274 * it out; otherwise send an idle notification for the device (if the suspend
275 * failed) or for its parent (if the suspend succeeded).
174 * 276 *
175 * This function must be called under dev->power.lock with interrupts disabled. 277 * This function must be called under dev->power.lock with interrupts disabled.
176 */ 278 */
177int __pm_runtime_suspend(struct device *dev, bool from_wq) 279static int rpm_suspend(struct device *dev, int rpmflags)
178 __releases(&dev->power.lock) __acquires(&dev->power.lock) 280 __releases(&dev->power.lock) __acquires(&dev->power.lock)
179{ 281{
282 int (*callback)(struct device *);
180 struct device *parent = NULL; 283 struct device *parent = NULL;
181 bool notify = false; 284 int retval;
182 int retval = 0;
183 285
184 dev_dbg(dev, "__pm_runtime_suspend()%s!\n", 286 dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
185 from_wq ? " from workqueue" : "");
186 287
187 repeat: 288 repeat:
188 if (dev->power.runtime_error) { 289 retval = rpm_check_suspend_allowed(dev);
189 retval = -EINVAL;
190 goto out;
191 }
192 290
193 /* Pending resume requests take precedence over us. */ 291 if (retval < 0)
194 if (dev->power.request_pending 292 ; /* Conditions are wrong. */
195 && dev->power.request == RPM_REQ_RESUME) { 293
294 /* Synchronous suspends are not allowed in the RPM_RESUMING state. */
295 else if (dev->power.runtime_status == RPM_RESUMING &&
296 !(rpmflags & RPM_ASYNC))
196 retval = -EAGAIN; 297 retval = -EAGAIN;
298 if (retval)
197 goto out; 299 goto out;
300
301 /* If the autosuspend_delay time hasn't expired yet, reschedule. */
302 if ((rpmflags & RPM_AUTO)
303 && dev->power.runtime_status != RPM_SUSPENDING) {
304 unsigned long expires = pm_runtime_autosuspend_expiration(dev);
305
306 if (expires != 0) {
307 /* Pending requests need to be canceled. */
308 dev->power.request = RPM_REQ_NONE;
309
310 /*
311 * Optimization: If the timer is already running and is
312 * set to expire at or before the autosuspend delay,
313 * avoid the overhead of resetting it. Just let it
314 * expire; pm_suspend_timer_fn() will take care of the
315 * rest.
316 */
317 if (!(dev->power.timer_expires && time_before_eq(
318 dev->power.timer_expires, expires))) {
319 dev->power.timer_expires = expires;
320 mod_timer(&dev->power.suspend_timer, expires);
321 }
322 dev->power.timer_autosuspends = 1;
323 goto out;
324 }
198 } 325 }
199 326
200 /* Other scheduled or pending requests need to be canceled. */ 327 /* Other scheduled or pending requests need to be canceled. */
201 pm_runtime_cancel_pending(dev); 328 pm_runtime_cancel_pending(dev);
202 329
203 if (dev->power.runtime_status == RPM_SUSPENDED)
204 retval = 1;
205 else if (dev->power.runtime_status == RPM_RESUMING
206 || dev->power.disable_depth > 0
207 || atomic_read(&dev->power.usage_count) > 0)
208 retval = -EAGAIN;
209 else if (!pm_children_suspended(dev))
210 retval = -EBUSY;
211 if (retval)
212 goto out;
213
214 if (dev->power.runtime_status == RPM_SUSPENDING) { 330 if (dev->power.runtime_status == RPM_SUSPENDING) {
215 DEFINE_WAIT(wait); 331 DEFINE_WAIT(wait);
216 332
217 if (from_wq) { 333 if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
218 retval = -EINPROGRESS; 334 retval = -EINPROGRESS;
219 goto out; 335 goto out;
220 } 336 }
@@ -236,46 +352,42 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
236 goto repeat; 352 goto repeat;
237 } 353 }
238 354
239 __update_runtime_status(dev, RPM_SUSPENDING);
240 dev->power.deferred_resume = false; 355 dev->power.deferred_resume = false;
356 if (dev->power.no_callbacks)
357 goto no_callback; /* Assume success. */
358
359 /* Carry out an asynchronous or a synchronous suspend. */
360 if (rpmflags & RPM_ASYNC) {
361 dev->power.request = (rpmflags & RPM_AUTO) ?
362 RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND;
363 if (!dev->power.request_pending) {
364 dev->power.request_pending = true;
365 queue_work(pm_wq, &dev->power.work);
366 }
367 goto out;
368 }
241 369
242 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { 370 __update_runtime_status(dev, RPM_SUSPENDING);
243 spin_unlock_irq(&dev->power.lock);
244
245 retval = dev->bus->pm->runtime_suspend(dev);
246
247 spin_lock_irq(&dev->power.lock);
248 dev->power.runtime_error = retval;
249 } else if (dev->type && dev->type->pm
250 && dev->type->pm->runtime_suspend) {
251 spin_unlock_irq(&dev->power.lock);
252
253 retval = dev->type->pm->runtime_suspend(dev);
254
255 spin_lock_irq(&dev->power.lock);
256 dev->power.runtime_error = retval;
257 } else if (dev->class && dev->class->pm
258 && dev->class->pm->runtime_suspend) {
259 spin_unlock_irq(&dev->power.lock);
260
261 retval = dev->class->pm->runtime_suspend(dev);
262 371
263 spin_lock_irq(&dev->power.lock); 372 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
264 dev->power.runtime_error = retval; 373 callback = dev->bus->pm->runtime_suspend;
265 } else { 374 else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend)
266 retval = -ENOSYS; 375 callback = dev->type->pm->runtime_suspend;
267 } 376 else if (dev->class && dev->class->pm)
377 callback = dev->class->pm->runtime_suspend;
378 else
379 callback = NULL;
268 380
381 retval = rpm_callback(callback, dev);
269 if (retval) { 382 if (retval) {
270 __update_runtime_status(dev, RPM_ACTIVE); 383 __update_runtime_status(dev, RPM_ACTIVE);
271 if (retval == -EAGAIN || retval == -EBUSY) { 384 dev->power.deferred_resume = 0;
272 if (dev->power.timer_expires == 0) 385 if (retval == -EAGAIN || retval == -EBUSY)
273 notify = true;
274 dev->power.runtime_error = 0; 386 dev->power.runtime_error = 0;
275 } else { 387 else
276 pm_runtime_cancel_pending(dev); 388 pm_runtime_cancel_pending(dev);
277 }
278 } else { 389 } else {
390 no_callback:
279 __update_runtime_status(dev, RPM_SUSPENDED); 391 __update_runtime_status(dev, RPM_SUSPENDED);
280 pm_runtime_deactivate_timer(dev); 392 pm_runtime_deactivate_timer(dev);
281 393
@@ -287,14 +399,11 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
287 wake_up_all(&dev->power.wait_queue); 399 wake_up_all(&dev->power.wait_queue);
288 400
289 if (dev->power.deferred_resume) { 401 if (dev->power.deferred_resume) {
290 __pm_runtime_resume(dev, false); 402 rpm_resume(dev, 0);
291 retval = -EAGAIN; 403 retval = -EAGAIN;
292 goto out; 404 goto out;
293 } 405 }
294 406
295 if (notify)
296 __pm_runtime_idle(dev);
297
298 if (parent && !parent->power.ignore_children) { 407 if (parent && !parent->power.ignore_children) {
299 spin_unlock_irq(&dev->power.lock); 408 spin_unlock_irq(&dev->power.lock);
300 409
@@ -304,72 +413,69 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
304 } 413 }
305 414
306 out: 415 out:
307 dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval); 416 dev_dbg(dev, "%s returns %d\n", __func__, retval);
308
309 return retval;
310}
311
312/**
313 * pm_runtime_suspend - Carry out run-time suspend of given device.
314 * @dev: Device to suspend.
315 */
316int pm_runtime_suspend(struct device *dev)
317{
318 int retval;
319
320 spin_lock_irq(&dev->power.lock);
321 retval = __pm_runtime_suspend(dev, false);
322 spin_unlock_irq(&dev->power.lock);
323 417
324 return retval; 418 return retval;
325} 419}
326EXPORT_SYMBOL_GPL(pm_runtime_suspend);
327 420
328/** 421/**
329 * __pm_runtime_resume - Carry out run-time resume of given device. 422 * rpm_resume - Carry out run-time resume of given device.
330 * @dev: Device to resume. 423 * @dev: Device to resume.
331 * @from_wq: If set, the function has been called via pm_wq. 424 * @rpmflags: Flag bits.
332 * 425 *
333 * Check if the device can be woken up and run the ->runtime_resume() callback 426 * Check if the device's run-time PM status allows it to be resumed. Cancel
334 * provided by its bus type. If another resume has been started earlier, wait 427 * any scheduled or pending requests. If another resume has been started
335 * for it to finish. If there's a suspend running in parallel with this 428 * earlier, either return imediately or wait for it to finish, depending on the
336 * function, wait for it to finish and resume the device. Cancel any scheduled 429 * RPM_NOWAIT and RPM_ASYNC flags. Similarly, if there's a suspend running in
337 * or pending requests. 430 * parallel with this function, either tell the other process to resume after
431 * suspending (deferred_resume) or wait for it to finish. If the RPM_ASYNC
432 * flag is set then queue a resume request; otherwise run the
433 * ->runtime_resume() callback directly. Queue an idle notification for the
434 * device if the resume succeeded.
338 * 435 *
339 * This function must be called under dev->power.lock with interrupts disabled. 436 * This function must be called under dev->power.lock with interrupts disabled.
340 */ 437 */
341int __pm_runtime_resume(struct device *dev, bool from_wq) 438static int rpm_resume(struct device *dev, int rpmflags)
342 __releases(&dev->power.lock) __acquires(&dev->power.lock) 439 __releases(&dev->power.lock) __acquires(&dev->power.lock)
343{ 440{
441 int (*callback)(struct device *);
344 struct device *parent = NULL; 442 struct device *parent = NULL;
345 int retval = 0; 443 int retval = 0;
346 444
347 dev_dbg(dev, "__pm_runtime_resume()%s!\n", 445 dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
348 from_wq ? " from workqueue" : "");
349 446
350 repeat: 447 repeat:
351 if (dev->power.runtime_error) { 448 if (dev->power.runtime_error)
352 retval = -EINVAL; 449 retval = -EINVAL;
450 else if (dev->power.disable_depth > 0)
451 retval = -EAGAIN;
452 if (retval)
353 goto out; 453 goto out;
354 }
355 454
356 pm_runtime_cancel_pending(dev); 455 /*
456 * Other scheduled or pending requests need to be canceled. Small
457 * optimization: If an autosuspend timer is running, leave it running
458 * rather than cancelling it now only to restart it again in the near
459 * future.
460 */
461 dev->power.request = RPM_REQ_NONE;
462 if (!dev->power.timer_autosuspends)
463 pm_runtime_deactivate_timer(dev);
357 464
358 if (dev->power.runtime_status == RPM_ACTIVE) 465 if (dev->power.runtime_status == RPM_ACTIVE) {
359 retval = 1; 466 retval = 1;
360 else if (dev->power.disable_depth > 0)
361 retval = -EAGAIN;
362 if (retval)
363 goto out; 467 goto out;
468 }
364 469
365 if (dev->power.runtime_status == RPM_RESUMING 470 if (dev->power.runtime_status == RPM_RESUMING
366 || dev->power.runtime_status == RPM_SUSPENDING) { 471 || dev->power.runtime_status == RPM_SUSPENDING) {
367 DEFINE_WAIT(wait); 472 DEFINE_WAIT(wait);
368 473
369 if (from_wq) { 474 if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
370 if (dev->power.runtime_status == RPM_SUSPENDING) 475 if (dev->power.runtime_status == RPM_SUSPENDING)
371 dev->power.deferred_resume = true; 476 dev->power.deferred_resume = true;
372 retval = -EINPROGRESS; 477 else
478 retval = -EINPROGRESS;
373 goto out; 479 goto out;
374 } 480 }
375 481
@@ -391,6 +497,34 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
391 goto repeat; 497 goto repeat;
392 } 498 }
393 499
500 /*
501 * See if we can skip waking up the parent. This is safe only if
502 * power.no_callbacks is set, because otherwise we don't know whether
503 * the resume will actually succeed.
504 */
505 if (dev->power.no_callbacks && !parent && dev->parent) {
506 spin_lock(&dev->parent->power.lock);
507 if (dev->parent->power.disable_depth > 0
508 || dev->parent->power.ignore_children
509 || dev->parent->power.runtime_status == RPM_ACTIVE) {
510 atomic_inc(&dev->parent->power.child_count);
511 spin_unlock(&dev->parent->power.lock);
512 goto no_callback; /* Assume success. */
513 }
514 spin_unlock(&dev->parent->power.lock);
515 }
516
517 /* Carry out an asynchronous or a synchronous resume. */
518 if (rpmflags & RPM_ASYNC) {
519 dev->power.request = RPM_REQ_RESUME;
520 if (!dev->power.request_pending) {
521 dev->power.request_pending = true;
522 queue_work(pm_wq, &dev->power.work);
523 }
524 retval = 0;
525 goto out;
526 }
527
394 if (!parent && dev->parent) { 528 if (!parent && dev->parent) {
395 /* 529 /*
396 * Increment the parent's resume counter and resume it if 530 * Increment the parent's resume counter and resume it if
@@ -408,7 +542,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
408 */ 542 */
409 if (!parent->power.disable_depth 543 if (!parent->power.disable_depth
410 && !parent->power.ignore_children) { 544 && !parent->power.ignore_children) {
411 __pm_runtime_resume(parent, false); 545 rpm_resume(parent, 0);
412 if (parent->power.runtime_status != RPM_ACTIVE) 546 if (parent->power.runtime_status != RPM_ACTIVE)
413 retval = -EBUSY; 547 retval = -EBUSY;
414 } 548 }
@@ -420,39 +554,26 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
420 goto repeat; 554 goto repeat;
421 } 555 }
422 556
423 __update_runtime_status(dev, RPM_RESUMING); 557 if (dev->power.no_callbacks)
424 558 goto no_callback; /* Assume success. */
425 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
426 spin_unlock_irq(&dev->power.lock);
427
428 retval = dev->bus->pm->runtime_resume(dev);
429
430 spin_lock_irq(&dev->power.lock);
431 dev->power.runtime_error = retval;
432 } else if (dev->type && dev->type->pm
433 && dev->type->pm->runtime_resume) {
434 spin_unlock_irq(&dev->power.lock);
435
436 retval = dev->type->pm->runtime_resume(dev);
437 559
438 spin_lock_irq(&dev->power.lock); 560 __update_runtime_status(dev, RPM_RESUMING);
439 dev->power.runtime_error = retval;
440 } else if (dev->class && dev->class->pm
441 && dev->class->pm->runtime_resume) {
442 spin_unlock_irq(&dev->power.lock);
443
444 retval = dev->class->pm->runtime_resume(dev);
445 561
446 spin_lock_irq(&dev->power.lock); 562 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
447 dev->power.runtime_error = retval; 563 callback = dev->bus->pm->runtime_resume;
448 } else { 564 else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume)
449 retval = -ENOSYS; 565 callback = dev->type->pm->runtime_resume;
450 } 566 else if (dev->class && dev->class->pm)
567 callback = dev->class->pm->runtime_resume;
568 else
569 callback = NULL;
451 570
571 retval = rpm_callback(callback, dev);
452 if (retval) { 572 if (retval) {
453 __update_runtime_status(dev, RPM_SUSPENDED); 573 __update_runtime_status(dev, RPM_SUSPENDED);
454 pm_runtime_cancel_pending(dev); 574 pm_runtime_cancel_pending(dev);
455 } else { 575 } else {
576 no_callback:
456 __update_runtime_status(dev, RPM_ACTIVE); 577 __update_runtime_status(dev, RPM_ACTIVE);
457 if (parent) 578 if (parent)
458 atomic_inc(&parent->power.child_count); 579 atomic_inc(&parent->power.child_count);
@@ -460,7 +581,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
460 wake_up_all(&dev->power.wait_queue); 581 wake_up_all(&dev->power.wait_queue);
461 582
462 if (!retval) 583 if (!retval)
463 __pm_request_idle(dev); 584 rpm_idle(dev, RPM_ASYNC);
464 585
465 out: 586 out:
466 if (parent) { 587 if (parent) {
@@ -471,28 +592,12 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
471 spin_lock_irq(&dev->power.lock); 592 spin_lock_irq(&dev->power.lock);
472 } 593 }
473 594
474 dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval); 595 dev_dbg(dev, "%s returns %d\n", __func__, retval);
475 596
476 return retval; 597 return retval;
477} 598}
478 599
479/** 600/**
480 * pm_runtime_resume - Carry out run-time resume of given device.
481 * @dev: Device to suspend.
482 */
483int pm_runtime_resume(struct device *dev)
484{
485 int retval;
486
487 spin_lock_irq(&dev->power.lock);
488 retval = __pm_runtime_resume(dev, false);
489 spin_unlock_irq(&dev->power.lock);
490
491 return retval;
492}
493EXPORT_SYMBOL_GPL(pm_runtime_resume);
494
495/**
496 * pm_runtime_work - Universal run-time PM work function. 601 * pm_runtime_work - Universal run-time PM work function.
497 * @work: Work structure used for scheduling the execution of this function. 602 * @work: Work structure used for scheduling the execution of this function.
498 * 603 *
@@ -517,13 +622,16 @@ static void pm_runtime_work(struct work_struct *work)
517 case RPM_REQ_NONE: 622 case RPM_REQ_NONE:
518 break; 623 break;
519 case RPM_REQ_IDLE: 624 case RPM_REQ_IDLE:
520 __pm_runtime_idle(dev); 625 rpm_idle(dev, RPM_NOWAIT);
521 break; 626 break;
522 case RPM_REQ_SUSPEND: 627 case RPM_REQ_SUSPEND:
523 __pm_runtime_suspend(dev, true); 628 rpm_suspend(dev, RPM_NOWAIT);
629 break;
630 case RPM_REQ_AUTOSUSPEND:
631 rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
524 break; 632 break;
525 case RPM_REQ_RESUME: 633 case RPM_REQ_RESUME:
526 __pm_runtime_resume(dev, true); 634 rpm_resume(dev, RPM_NOWAIT);
527 break; 635 break;
528 } 636 }
529 637
@@ -532,117 +640,10 @@ static void pm_runtime_work(struct work_struct *work)
532} 640}
533 641
534/** 642/**
535 * __pm_request_idle - Submit an idle notification request for given device.
536 * @dev: Device to handle.
537 *
538 * Check if the device's run-time PM status is correct for suspending the device
539 * and queue up a request to run __pm_runtime_idle() for it.
540 *
541 * This function must be called under dev->power.lock with interrupts disabled.
542 */
543static int __pm_request_idle(struct device *dev)
544{
545 int retval = 0;
546
547 if (dev->power.runtime_error)
548 retval = -EINVAL;
549 else if (atomic_read(&dev->power.usage_count) > 0
550 || dev->power.disable_depth > 0
551 || dev->power.runtime_status == RPM_SUSPENDED
552 || dev->power.runtime_status == RPM_SUSPENDING)
553 retval = -EAGAIN;
554 else if (!pm_children_suspended(dev))
555 retval = -EBUSY;
556 if (retval)
557 return retval;
558
559 if (dev->power.request_pending) {
560 /* Any requests other then RPM_REQ_IDLE take precedence. */
561 if (dev->power.request == RPM_REQ_NONE)
562 dev->power.request = RPM_REQ_IDLE;
563 else if (dev->power.request != RPM_REQ_IDLE)
564 retval = -EAGAIN;
565 return retval;
566 }
567
568 dev->power.request = RPM_REQ_IDLE;
569 dev->power.request_pending = true;
570 queue_work(pm_wq, &dev->power.work);
571
572 return retval;
573}
574
575/**
576 * pm_request_idle - Submit an idle notification request for given device.
577 * @dev: Device to handle.
578 */
579int pm_request_idle(struct device *dev)
580{
581 unsigned long flags;
582 int retval;
583
584 spin_lock_irqsave(&dev->power.lock, flags);
585 retval = __pm_request_idle(dev);
586 spin_unlock_irqrestore(&dev->power.lock, flags);
587
588 return retval;
589}
590EXPORT_SYMBOL_GPL(pm_request_idle);
591
592/**
593 * __pm_request_suspend - Submit a suspend request for given device.
594 * @dev: Device to suspend.
595 *
596 * This function must be called under dev->power.lock with interrupts disabled.
597 */
598static int __pm_request_suspend(struct device *dev)
599{
600 int retval = 0;
601
602 if (dev->power.runtime_error)
603 return -EINVAL;
604
605 if (dev->power.runtime_status == RPM_SUSPENDED)
606 retval = 1;
607 else if (atomic_read(&dev->power.usage_count) > 0
608 || dev->power.disable_depth > 0)
609 retval = -EAGAIN;
610 else if (dev->power.runtime_status == RPM_SUSPENDING)
611 retval = -EINPROGRESS;
612 else if (!pm_children_suspended(dev))
613 retval = -EBUSY;
614 if (retval < 0)
615 return retval;
616
617 pm_runtime_deactivate_timer(dev);
618
619 if (dev->power.request_pending) {
620 /*
621 * Pending resume requests take precedence over us, but we can
622 * overtake any other pending request.
623 */
624 if (dev->power.request == RPM_REQ_RESUME)
625 retval = -EAGAIN;
626 else if (dev->power.request != RPM_REQ_SUSPEND)
627 dev->power.request = retval ?
628 RPM_REQ_NONE : RPM_REQ_SUSPEND;
629 return retval;
630 } else if (retval) {
631 return retval;
632 }
633
634 dev->power.request = RPM_REQ_SUSPEND;
635 dev->power.request_pending = true;
636 queue_work(pm_wq, &dev->power.work);
637
638 return 0;
639}
640
641/**
642 * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). 643 * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
643 * @data: Device pointer passed by pm_schedule_suspend(). 644 * @data: Device pointer passed by pm_schedule_suspend().
644 * 645 *
645 * Check if the time is right and execute __pm_request_suspend() in that case. 646 * Check if the time is right and queue a suspend request.
646 */ 647 */
647static void pm_suspend_timer_fn(unsigned long data) 648static void pm_suspend_timer_fn(unsigned long data)
648{ 649{
@@ -656,7 +657,8 @@ static void pm_suspend_timer_fn(unsigned long data)
656 /* If 'expire' is after 'jiffies' we've been called too early. */ 657 /* If 'expire' is after 'jiffies' we've been called too early. */
657 if (expires > 0 && !time_after(expires, jiffies)) { 658 if (expires > 0 && !time_after(expires, jiffies)) {
658 dev->power.timer_expires = 0; 659 dev->power.timer_expires = 0;
659 __pm_request_suspend(dev); 660 rpm_suspend(dev, dev->power.timer_autosuspends ?
661 (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
660 } 662 }
661 663
662 spin_unlock_irqrestore(&dev->power.lock, flags); 664 spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -670,47 +672,25 @@ static void pm_suspend_timer_fn(unsigned long data)
670int pm_schedule_suspend(struct device *dev, unsigned int delay) 672int pm_schedule_suspend(struct device *dev, unsigned int delay)
671{ 673{
672 unsigned long flags; 674 unsigned long flags;
673 int retval = 0; 675 int retval;
674 676
675 spin_lock_irqsave(&dev->power.lock, flags); 677 spin_lock_irqsave(&dev->power.lock, flags);
676 678
677 if (dev->power.runtime_error) {
678 retval = -EINVAL;
679 goto out;
680 }
681
682 if (!delay) { 679 if (!delay) {
683 retval = __pm_request_suspend(dev); 680 retval = rpm_suspend(dev, RPM_ASYNC);
684 goto out; 681 goto out;
685 } 682 }
686 683
687 pm_runtime_deactivate_timer(dev); 684 retval = rpm_check_suspend_allowed(dev);
688
689 if (dev->power.request_pending) {
690 /*
691 * Pending resume requests take precedence over us, but any
692 * other pending requests have to be canceled.
693 */
694 if (dev->power.request == RPM_REQ_RESUME) {
695 retval = -EAGAIN;
696 goto out;
697 }
698 dev->power.request = RPM_REQ_NONE;
699 }
700
701 if (dev->power.runtime_status == RPM_SUSPENDED)
702 retval = 1;
703 else if (atomic_read(&dev->power.usage_count) > 0
704 || dev->power.disable_depth > 0)
705 retval = -EAGAIN;
706 else if (!pm_children_suspended(dev))
707 retval = -EBUSY;
708 if (retval) 685 if (retval)
709 goto out; 686 goto out;
710 687
688 /* Other scheduled or pending requests need to be canceled. */
689 pm_runtime_cancel_pending(dev);
690
711 dev->power.timer_expires = jiffies + msecs_to_jiffies(delay); 691 dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
712 if (!dev->power.timer_expires) 692 dev->power.timer_expires += !dev->power.timer_expires;
713 dev->power.timer_expires = 1; 693 dev->power.timer_autosuspends = 0;
714 mod_timer(&dev->power.suspend_timer, dev->power.timer_expires); 694 mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
715 695
716 out: 696 out:
@@ -721,103 +701,88 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
721EXPORT_SYMBOL_GPL(pm_schedule_suspend); 701EXPORT_SYMBOL_GPL(pm_schedule_suspend);
722 702
723/** 703/**
724 * pm_request_resume - Submit a resume request for given device. 704 * __pm_runtime_idle - Entry point for run-time idle operations.
725 * @dev: Device to resume. 705 * @dev: Device to send idle notification for.
706 * @rpmflags: Flag bits.
726 * 707 *
727 * This function must be called under dev->power.lock with interrupts disabled. 708 * If the RPM_GET_PUT flag is set, decrement the device's usage count and
709 * return immediately if it is larger than zero. Then carry out an idle
710 * notification, either synchronous or asynchronous.
711 *
712 * This routine may be called in atomic context if the RPM_ASYNC flag is set.
728 */ 713 */
729static int __pm_request_resume(struct device *dev) 714int __pm_runtime_idle(struct device *dev, int rpmflags)
730{ 715{
731 int retval = 0; 716 unsigned long flags;
732 717 int retval;
733 if (dev->power.runtime_error)
734 return -EINVAL;
735
736 if (dev->power.runtime_status == RPM_ACTIVE)
737 retval = 1;
738 else if (dev->power.runtime_status == RPM_RESUMING)
739 retval = -EINPROGRESS;
740 else if (dev->power.disable_depth > 0)
741 retval = -EAGAIN;
742 if (retval < 0)
743 return retval;
744
745 pm_runtime_deactivate_timer(dev);
746 718
747 if (dev->power.runtime_status == RPM_SUSPENDING) { 719 if (rpmflags & RPM_GET_PUT) {
748 dev->power.deferred_resume = true; 720 if (!atomic_dec_and_test(&dev->power.usage_count))
749 return retval; 721 return 0;
750 } 722 }
751 if (dev->power.request_pending) {
752 /* If non-resume request is pending, we can overtake it. */
753 dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME;
754 return retval;
755 }
756 if (retval)
757 return retval;
758 723
759 dev->power.request = RPM_REQ_RESUME; 724 spin_lock_irqsave(&dev->power.lock, flags);
760 dev->power.request_pending = true; 725 retval = rpm_idle(dev, rpmflags);
761 queue_work(pm_wq, &dev->power.work); 726 spin_unlock_irqrestore(&dev->power.lock, flags);
762 727
763 return retval; 728 return retval;
764} 729}
730EXPORT_SYMBOL_GPL(__pm_runtime_idle);
765 731
766/** 732/**
767 * pm_request_resume - Submit a resume request for given device. 733 * __pm_runtime_suspend - Entry point for run-time put/suspend operations.
768 * @dev: Device to resume. 734 * @dev: Device to suspend.
735 * @rpmflags: Flag bits.
736 *
737 * If the RPM_GET_PUT flag is set, decrement the device's usage count and
738 * return immediately if it is larger than zero. Then carry out a suspend,
739 * either synchronous or asynchronous.
740 *
741 * This routine may be called in atomic context if the RPM_ASYNC flag is set.
769 */ 742 */
770int pm_request_resume(struct device *dev) 743int __pm_runtime_suspend(struct device *dev, int rpmflags)
771{ 744{
772 unsigned long flags; 745 unsigned long flags;
773 int retval; 746 int retval;
774 747
748 if (rpmflags & RPM_GET_PUT) {
749 if (!atomic_dec_and_test(&dev->power.usage_count))
750 return 0;
751 }
752
775 spin_lock_irqsave(&dev->power.lock, flags); 753 spin_lock_irqsave(&dev->power.lock, flags);
776 retval = __pm_request_resume(dev); 754 retval = rpm_suspend(dev, rpmflags);
777 spin_unlock_irqrestore(&dev->power.lock, flags); 755 spin_unlock_irqrestore(&dev->power.lock, flags);
778 756
779 return retval; 757 return retval;
780} 758}
781EXPORT_SYMBOL_GPL(pm_request_resume); 759EXPORT_SYMBOL_GPL(__pm_runtime_suspend);
782 760
783/** 761/**
784 * __pm_runtime_get - Reference count a device and wake it up, if necessary. 762 * __pm_runtime_resume - Entry point for run-time resume operations.
785 * @dev: Device to handle. 763 * @dev: Device to resume.
786 * @sync: If set and the device is suspended, resume it synchronously. 764 * @rpmflags: Flag bits.
765 *
766 * If the RPM_GET_PUT flag is set, increment the device's usage count. Then
767 * carry out a resume, either synchronous or asynchronous.
787 * 768 *
788 * Increment the usage count of the device and resume it or submit a resume 769 * This routine may be called in atomic context if the RPM_ASYNC flag is set.
789 * request for it, depending on the value of @sync.
790 */ 770 */
791int __pm_runtime_get(struct device *dev, bool sync) 771int __pm_runtime_resume(struct device *dev, int rpmflags)
792{ 772{
773 unsigned long flags;
793 int retval; 774 int retval;
794 775
795 atomic_inc(&dev->power.usage_count); 776 if (rpmflags & RPM_GET_PUT)
796 retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); 777 atomic_inc(&dev->power.usage_count);
797 778
798 return retval; 779 spin_lock_irqsave(&dev->power.lock, flags);
799} 780 retval = rpm_resume(dev, rpmflags);
800EXPORT_SYMBOL_GPL(__pm_runtime_get); 781 spin_unlock_irqrestore(&dev->power.lock, flags);
801
802/**
803 * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
804 * @dev: Device to handle.
805 * @sync: If the device's bus type is to be notified, do that synchronously.
806 *
807 * Decrement the usage count of the device and if it reaches zero, carry out a
808 * synchronous idle notification or submit an idle notification request for it,
809 * depending on the value of @sync.
810 */
811int __pm_runtime_put(struct device *dev, bool sync)
812{
813 int retval = 0;
814
815 if (atomic_dec_and_test(&dev->power.usage_count))
816 retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev);
817 782
818 return retval; 783 return retval;
819} 784}
820EXPORT_SYMBOL_GPL(__pm_runtime_put); 785EXPORT_SYMBOL_GPL(__pm_runtime_resume);
821 786
822/** 787/**
823 * __pm_runtime_set_status - Set run-time PM status of a device. 788 * __pm_runtime_set_status - Set run-time PM status of a device.
@@ -968,7 +933,7 @@ int pm_runtime_barrier(struct device *dev)
968 933
969 if (dev->power.request_pending 934 if (dev->power.request_pending
970 && dev->power.request == RPM_REQ_RESUME) { 935 && dev->power.request == RPM_REQ_RESUME) {
971 __pm_runtime_resume(dev, false); 936 rpm_resume(dev, 0);
972 retval = 1; 937 retval = 1;
973 } 938 }
974 939
@@ -1017,7 +982,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
1017 */ 982 */
1018 pm_runtime_get_noresume(dev); 983 pm_runtime_get_noresume(dev);
1019 984
1020 __pm_runtime_resume(dev, false); 985 rpm_resume(dev, 0);
1021 986
1022 pm_runtime_put_noidle(dev); 987 pm_runtime_put_noidle(dev);
1023 } 988 }
@@ -1065,7 +1030,7 @@ void pm_runtime_forbid(struct device *dev)
1065 1030
1066 dev->power.runtime_auto = false; 1031 dev->power.runtime_auto = false;
1067 atomic_inc(&dev->power.usage_count); 1032 atomic_inc(&dev->power.usage_count);
1068 __pm_runtime_resume(dev, false); 1033 rpm_resume(dev, 0);
1069 1034
1070 out: 1035 out:
1071 spin_unlock_irq(&dev->power.lock); 1036 spin_unlock_irq(&dev->power.lock);
@@ -1086,7 +1051,7 @@ void pm_runtime_allow(struct device *dev)
1086 1051
1087 dev->power.runtime_auto = true; 1052 dev->power.runtime_auto = true;
1088 if (atomic_dec_and_test(&dev->power.usage_count)) 1053 if (atomic_dec_and_test(&dev->power.usage_count))
1089 __pm_runtime_idle(dev); 1054 rpm_idle(dev, RPM_AUTO);
1090 1055
1091 out: 1056 out:
1092 spin_unlock_irq(&dev->power.lock); 1057 spin_unlock_irq(&dev->power.lock);
@@ -1094,13 +1059,110 @@ void pm_runtime_allow(struct device *dev)
1094EXPORT_SYMBOL_GPL(pm_runtime_allow); 1059EXPORT_SYMBOL_GPL(pm_runtime_allow);
1095 1060
1096/** 1061/**
1062 * pm_runtime_no_callbacks - Ignore run-time PM callbacks for a device.
1063 * @dev: Device to handle.
1064 *
1065 * Set the power.no_callbacks flag, which tells the PM core that this
1066 * device is power-managed through its parent and has no run-time PM
1067 * callbacks of its own. The run-time sysfs attributes will be removed.
1068 *
1069 */
1070void pm_runtime_no_callbacks(struct device *dev)
1071{
1072 spin_lock_irq(&dev->power.lock);
1073 dev->power.no_callbacks = 1;
1074 spin_unlock_irq(&dev->power.lock);
1075 if (device_is_registered(dev))
1076 rpm_sysfs_remove(dev);
1077}
1078EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
1079
1080/**
1081 * update_autosuspend - Handle a change to a device's autosuspend settings.
1082 * @dev: Device to handle.
1083 * @old_delay: The former autosuspend_delay value.
1084 * @old_use: The former use_autosuspend value.
1085 *
1086 * Prevent runtime suspend if the new delay is negative and use_autosuspend is
1087 * set; otherwise allow it. Send an idle notification if suspends are allowed.
1088 *
1089 * This function must be called under dev->power.lock with interrupts disabled.
1090 */
1091static void update_autosuspend(struct device *dev, int old_delay, int old_use)
1092{
1093 int delay = dev->power.autosuspend_delay;
1094
1095 /* Should runtime suspend be prevented now? */
1096 if (dev->power.use_autosuspend && delay < 0) {
1097
1098 /* If it used to be allowed then prevent it. */
1099 if (!old_use || old_delay >= 0) {
1100 atomic_inc(&dev->power.usage_count);
1101 rpm_resume(dev, 0);
1102 }
1103 }
1104
1105 /* Runtime suspend should be allowed now. */
1106 else {
1107
1108 /* If it used to be prevented then allow it. */
1109 if (old_use && old_delay < 0)
1110 atomic_dec(&dev->power.usage_count);
1111
1112 /* Maybe we can autosuspend now. */
1113 rpm_idle(dev, RPM_AUTO);
1114 }
1115}
1116
1117/**
1118 * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value.
1119 * @dev: Device to handle.
1120 * @delay: Value of the new delay in milliseconds.
1121 *
1122 * Set the device's power.autosuspend_delay value. If it changes to negative
1123 * and the power.use_autosuspend flag is set, prevent run-time suspends. If it
1124 * changes the other way, allow run-time suspends.
1125 */
1126void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
1127{
1128 int old_delay, old_use;
1129
1130 spin_lock_irq(&dev->power.lock);
1131 old_delay = dev->power.autosuspend_delay;
1132 old_use = dev->power.use_autosuspend;
1133 dev->power.autosuspend_delay = delay;
1134 update_autosuspend(dev, old_delay, old_use);
1135 spin_unlock_irq(&dev->power.lock);
1136}
1137EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);
1138
1139/**
1140 * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag.
1141 * @dev: Device to handle.
1142 * @use: New value for use_autosuspend.
1143 *
1144 * Set the device's power.use_autosuspend flag, and allow or prevent run-time
1145 * suspends as needed.
1146 */
1147void __pm_runtime_use_autosuspend(struct device *dev, bool use)
1148{
1149 int old_delay, old_use;
1150
1151 spin_lock_irq(&dev->power.lock);
1152 old_delay = dev->power.autosuspend_delay;
1153 old_use = dev->power.use_autosuspend;
1154 dev->power.use_autosuspend = use;
1155 update_autosuspend(dev, old_delay, old_use);
1156 spin_unlock_irq(&dev->power.lock);
1157}
1158EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
1159
1160/**
1097 * pm_runtime_init - Initialize run-time PM fields in given device object. 1161 * pm_runtime_init - Initialize run-time PM fields in given device object.
1098 * @dev: Device object to initialize. 1162 * @dev: Device object to initialize.
1099 */ 1163 */
1100void pm_runtime_init(struct device *dev) 1164void pm_runtime_init(struct device *dev)
1101{ 1165{
1102 spin_lock_init(&dev->power.lock);
1103
1104 dev->power.runtime_status = RPM_SUSPENDED; 1166 dev->power.runtime_status = RPM_SUSPENDED;
1105 dev->power.idle_notification = false; 1167 dev->power.idle_notification = false;
1106 1168
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index e56b4388fe61..0b1e46bf3e56 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -75,12 +75,27 @@
75 * attribute is set to "enabled" by bus type code or device drivers and in 75 * attribute is set to "enabled" by bus type code or device drivers and in
76 * that cases it should be safe to leave the default value. 76 * that cases it should be safe to leave the default value.
77 * 77 *
78 * autosuspend_delay_ms - Report/change a device's autosuspend_delay value
79 *
80 * Some drivers don't want to carry out a runtime suspend as soon as a
81 * device becomes idle; they want it always to remain idle for some period
82 * of time before suspending it. This period is the autosuspend_delay
83 * value (expressed in milliseconds) and it can be controlled by the user.
84 * If the value is negative then the device will never be runtime
85 * suspended.
86 *
87 * NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay
88 * value are used only if the driver calls pm_runtime_use_autosuspend().
89 *
78 * wakeup_count - Report the number of wakeup events related to the device 90 * wakeup_count - Report the number of wakeup events related to the device
79 */ 91 */
80 92
81static const char enabled[] = "enabled"; 93static const char enabled[] = "enabled";
82static const char disabled[] = "disabled"; 94static const char disabled[] = "disabled";
83 95
96const char power_group_name[] = "power";
97EXPORT_SYMBOL_GPL(power_group_name);
98
84#ifdef CONFIG_PM_RUNTIME 99#ifdef CONFIG_PM_RUNTIME
85static const char ctrl_auto[] = "auto"; 100static const char ctrl_auto[] = "auto";
86static const char ctrl_on[] = "on"; 101static const char ctrl_on[] = "on";
@@ -170,6 +185,33 @@ static ssize_t rtpm_status_show(struct device *dev,
170} 185}
171 186
172static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL); 187static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL);
188
189static ssize_t autosuspend_delay_ms_show(struct device *dev,
190 struct device_attribute *attr, char *buf)
191{
192 if (!dev->power.use_autosuspend)
193 return -EIO;
194 return sprintf(buf, "%d\n", dev->power.autosuspend_delay);
195}
196
197static ssize_t autosuspend_delay_ms_store(struct device *dev,
198 struct device_attribute *attr, const char *buf, size_t n)
199{
200 long delay;
201
202 if (!dev->power.use_autosuspend)
203 return -EIO;
204
205 if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay)
206 return -EINVAL;
207
208 pm_runtime_set_autosuspend_delay(dev, delay);
209 return n;
210}
211
212static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
213 autosuspend_delay_ms_store);
214
173#endif 215#endif
174 216
175static ssize_t 217static ssize_t
@@ -210,11 +252,122 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
210static ssize_t wakeup_count_show(struct device *dev, 252static ssize_t wakeup_count_show(struct device *dev,
211 struct device_attribute *attr, char *buf) 253 struct device_attribute *attr, char *buf)
212{ 254{
213 return sprintf(buf, "%lu\n", dev->power.wakeup_count); 255 unsigned long count = 0;
256 bool enabled = false;
257
258 spin_lock_irq(&dev->power.lock);
259 if (dev->power.wakeup) {
260 count = dev->power.wakeup->event_count;
261 enabled = true;
262 }
263 spin_unlock_irq(&dev->power.lock);
264 return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
214} 265}
215 266
216static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL); 267static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
217#endif 268
269static ssize_t wakeup_active_count_show(struct device *dev,
270 struct device_attribute *attr, char *buf)
271{
272 unsigned long count = 0;
273 bool enabled = false;
274
275 spin_lock_irq(&dev->power.lock);
276 if (dev->power.wakeup) {
277 count = dev->power.wakeup->active_count;
278 enabled = true;
279 }
280 spin_unlock_irq(&dev->power.lock);
281 return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
282}
283
284static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
285
286static ssize_t wakeup_hit_count_show(struct device *dev,
287 struct device_attribute *attr, char *buf)
288{
289 unsigned long count = 0;
290 bool enabled = false;
291
292 spin_lock_irq(&dev->power.lock);
293 if (dev->power.wakeup) {
294 count = dev->power.wakeup->hit_count;
295 enabled = true;
296 }
297 spin_unlock_irq(&dev->power.lock);
298 return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
299}
300
301static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
302
303static ssize_t wakeup_active_show(struct device *dev,
304 struct device_attribute *attr, char *buf)
305{
306 unsigned int active = 0;
307 bool enabled = false;
308
309 spin_lock_irq(&dev->power.lock);
310 if (dev->power.wakeup) {
311 active = dev->power.wakeup->active;
312 enabled = true;
313 }
314 spin_unlock_irq(&dev->power.lock);
315 return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n");
316}
317
318static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL);
319
320static ssize_t wakeup_total_time_show(struct device *dev,
321 struct device_attribute *attr, char *buf)
322{
323 s64 msec = 0;
324 bool enabled = false;
325
326 spin_lock_irq(&dev->power.lock);
327 if (dev->power.wakeup) {
328 msec = ktime_to_ms(dev->power.wakeup->total_time);
329 enabled = true;
330 }
331 spin_unlock_irq(&dev->power.lock);
332 return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
333}
334
335static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL);
336
337static ssize_t wakeup_max_time_show(struct device *dev,
338 struct device_attribute *attr, char *buf)
339{
340 s64 msec = 0;
341 bool enabled = false;
342
343 spin_lock_irq(&dev->power.lock);
344 if (dev->power.wakeup) {
345 msec = ktime_to_ms(dev->power.wakeup->max_time);
346 enabled = true;
347 }
348 spin_unlock_irq(&dev->power.lock);
349 return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
350}
351
352static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL);
353
354static ssize_t wakeup_last_time_show(struct device *dev,
355 struct device_attribute *attr, char *buf)
356{
357 s64 msec = 0;
358 bool enabled = false;
359
360 spin_lock_irq(&dev->power.lock);
361 if (dev->power.wakeup) {
362 msec = ktime_to_ms(dev->power.wakeup->last_time);
363 enabled = true;
364 }
365 spin_unlock_irq(&dev->power.lock);
366 return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
367}
368
369static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
370#endif /* CONFIG_PM_SLEEP */
218 371
219#ifdef CONFIG_PM_ADVANCED_DEBUG 372#ifdef CONFIG_PM_ADVANCED_DEBUG
220#ifdef CONFIG_PM_RUNTIME 373#ifdef CONFIG_PM_RUNTIME
@@ -279,19 +432,20 @@ static DEVICE_ATTR(async, 0644, async_show, async_store);
279#endif /* CONFIG_PM_ADVANCED_DEBUG */ 432#endif /* CONFIG_PM_ADVANCED_DEBUG */
280 433
281static struct attribute * power_attrs[] = { 434static struct attribute * power_attrs[] = {
282#ifdef CONFIG_PM_RUNTIME
283 &dev_attr_control.attr,
284 &dev_attr_runtime_status.attr,
285 &dev_attr_runtime_suspended_time.attr,
286 &dev_attr_runtime_active_time.attr,
287#endif
288 &dev_attr_wakeup.attr, 435 &dev_attr_wakeup.attr,
289#ifdef CONFIG_PM_SLEEP 436#ifdef CONFIG_PM_SLEEP
290 &dev_attr_wakeup_count.attr, 437 &dev_attr_wakeup_count.attr,
438 &dev_attr_wakeup_active_count.attr,
439 &dev_attr_wakeup_hit_count.attr,
440 &dev_attr_wakeup_active.attr,
441 &dev_attr_wakeup_total_time_ms.attr,
442 &dev_attr_wakeup_max_time_ms.attr,
443 &dev_attr_wakeup_last_time_ms.attr,
291#endif 444#endif
292#ifdef CONFIG_PM_ADVANCED_DEBUG 445#ifdef CONFIG_PM_ADVANCED_DEBUG
293 &dev_attr_async.attr, 446 &dev_attr_async.attr,
294#ifdef CONFIG_PM_RUNTIME 447#ifdef CONFIG_PM_RUNTIME
448 &dev_attr_runtime_status.attr,
295 &dev_attr_runtime_usage.attr, 449 &dev_attr_runtime_usage.attr,
296 &dev_attr_runtime_active_kids.attr, 450 &dev_attr_runtime_active_kids.attr,
297 &dev_attr_runtime_enabled.attr, 451 &dev_attr_runtime_enabled.attr,
@@ -300,10 +454,53 @@ static struct attribute * power_attrs[] = {
300 NULL, 454 NULL,
301}; 455};
302static struct attribute_group pm_attr_group = { 456static struct attribute_group pm_attr_group = {
303 .name = "power", 457 .name = power_group_name,
304 .attrs = power_attrs, 458 .attrs = power_attrs,
305}; 459};
306 460
461#ifdef CONFIG_PM_RUNTIME
462
463static struct attribute *runtime_attrs[] = {
464#ifndef CONFIG_PM_ADVANCED_DEBUG
465 &dev_attr_runtime_status.attr,
466#endif
467 &dev_attr_control.attr,
468 &dev_attr_runtime_suspended_time.attr,
469 &dev_attr_runtime_active_time.attr,
470 &dev_attr_autosuspend_delay_ms.attr,
471 NULL,
472};
473static struct attribute_group pm_runtime_attr_group = {
474 .name = power_group_name,
475 .attrs = runtime_attrs,
476};
477
478int dpm_sysfs_add(struct device *dev)
479{
480 int rc;
481
482 rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
483 if (rc == 0 && !dev->power.no_callbacks) {
484 rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
485 if (rc)
486 sysfs_remove_group(&dev->kobj, &pm_attr_group);
487 }
488 return rc;
489}
490
491void rpm_sysfs_remove(struct device *dev)
492{
493 sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
494}
495
496void dpm_sysfs_remove(struct device *dev)
497{
498 rpm_sysfs_remove(dev);
499 sysfs_remove_group(&dev->kobj, &pm_attr_group);
500}
501
502#else /* CONFIG_PM_RUNTIME */
503
307int dpm_sysfs_add(struct device * dev) 504int dpm_sysfs_add(struct device * dev)
308{ 505{
309 return sysfs_create_group(&dev->kobj, &pm_attr_group); 506 return sysfs_create_group(&dev->kobj, &pm_attr_group);
@@ -313,3 +510,5 @@ void dpm_sysfs_remove(struct device * dev)
313{ 510{
314 sysfs_remove_group(&dev->kobj, &pm_attr_group); 511 sysfs_remove_group(&dev->kobj, &pm_attr_group);
315} 512}
513
514#endif
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 0a1a2c4dbc6e..9f4258df4cfd 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -188,8 +188,10 @@ static int show_file_hash(unsigned int value)
188static int show_dev_hash(unsigned int value) 188static int show_dev_hash(unsigned int value)
189{ 189{
190 int match = 0; 190 int match = 0;
191 struct list_head *entry = dpm_list.prev; 191 struct list_head *entry;
192 192
193 device_pm_lock();
194 entry = dpm_list.prev;
193 while (entry != &dpm_list) { 195 while (entry != &dpm_list) {
194 struct device * dev = to_device(entry); 196 struct device * dev = to_device(entry);
195 unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH); 197 unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH);
@@ -199,11 +201,43 @@ static int show_dev_hash(unsigned int value)
199 } 201 }
200 entry = entry->prev; 202 entry = entry->prev;
201 } 203 }
204 device_pm_unlock();
202 return match; 205 return match;
203} 206}
204 207
205static unsigned int hash_value_early_read; 208static unsigned int hash_value_early_read;
206 209
210int show_trace_dev_match(char *buf, size_t size)
211{
212 unsigned int value = hash_value_early_read / (USERHASH * FILEHASH);
213 int ret = 0;
214 struct list_head *entry;
215
216 /*
217 * It's possible that multiple devices will match the hash and we can't
218 * tell which is the culprit, so it's best to output them all.
219 */
220 device_pm_lock();
221 entry = dpm_list.prev;
222 while (size && entry != &dpm_list) {
223 struct device *dev = to_device(entry);
224 unsigned int hash = hash_string(DEVSEED, dev_name(dev),
225 DEVHASH);
226 if (hash == value) {
227 int len = snprintf(buf, size, "%s\n",
228 dev_driver_string(dev));
229 if (len > size)
230 len = size;
231 buf += len;
232 ret += len;
233 size -= len;
234 }
235 entry = entry->prev;
236 }
237 device_pm_unlock();
238 return ret;
239}
240
207static int early_resume_init(void) 241static int early_resume_init(void)
208{ 242{
209 hash_value_early_read = read_magic_time(); 243 hash_value_early_read = read_magic_time();
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index eb594facfc3f..71c5528e1c35 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -11,7 +11,12 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/suspend.h> 13#include <linux/suspend.h>
14#include <linux/pm.h> 14#include <linux/seq_file.h>
15#include <linux/debugfs.h>
16
17#include "power.h"
18
19#define TIMEOUT 100
15 20
16/* 21/*
17 * If set, the suspend/hibernate code will abort transitions to a sleep state 22 * If set, the suspend/hibernate code will abort transitions to a sleep state
@@ -20,18 +25,244 @@
20bool events_check_enabled; 25bool events_check_enabled;
21 26
22/* The counter of registered wakeup events. */ 27/* The counter of registered wakeup events. */
23static unsigned long event_count; 28static atomic_t event_count = ATOMIC_INIT(0);
24/* A preserved old value of event_count. */ 29/* A preserved old value of event_count. */
25static unsigned long saved_event_count; 30static unsigned int saved_count;
26/* The counter of wakeup events being processed. */ 31/* The counter of wakeup events being processed. */
27static unsigned long events_in_progress; 32static atomic_t events_in_progress = ATOMIC_INIT(0);
28 33
29static DEFINE_SPINLOCK(events_lock); 34static DEFINE_SPINLOCK(events_lock);
30 35
31static void pm_wakeup_timer_fn(unsigned long data); 36static void pm_wakeup_timer_fn(unsigned long data);
32 37
33static DEFINE_TIMER(events_timer, pm_wakeup_timer_fn, 0, 0); 38static LIST_HEAD(wakeup_sources);
34static unsigned long events_timer_expires; 39
40/**
41 * wakeup_source_create - Create a struct wakeup_source object.
42 * @name: Name of the new wakeup source.
43 */
44struct wakeup_source *wakeup_source_create(const char *name)
45{
46 struct wakeup_source *ws;
47
48 ws = kzalloc(sizeof(*ws), GFP_KERNEL);
49 if (!ws)
50 return NULL;
51
52 spin_lock_init(&ws->lock);
53 if (name)
54 ws->name = kstrdup(name, GFP_KERNEL);
55
56 return ws;
57}
58EXPORT_SYMBOL_GPL(wakeup_source_create);
59
60/**
61 * wakeup_source_destroy - Destroy a struct wakeup_source object.
62 * @ws: Wakeup source to destroy.
63 */
64void wakeup_source_destroy(struct wakeup_source *ws)
65{
66 if (!ws)
67 return;
68
69 spin_lock_irq(&ws->lock);
70 while (ws->active) {
71 spin_unlock_irq(&ws->lock);
72
73 schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
74
75 spin_lock_irq(&ws->lock);
76 }
77 spin_unlock_irq(&ws->lock);
78
79 kfree(ws->name);
80 kfree(ws);
81}
82EXPORT_SYMBOL_GPL(wakeup_source_destroy);
83
84/**
85 * wakeup_source_add - Add given object to the list of wakeup sources.
86 * @ws: Wakeup source object to add to the list.
87 */
88void wakeup_source_add(struct wakeup_source *ws)
89{
90 if (WARN_ON(!ws))
91 return;
92
93 setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
94 ws->active = false;
95
96 spin_lock_irq(&events_lock);
97 list_add_rcu(&ws->entry, &wakeup_sources);
98 spin_unlock_irq(&events_lock);
99 synchronize_rcu();
100}
101EXPORT_SYMBOL_GPL(wakeup_source_add);
102
103/**
104 * wakeup_source_remove - Remove given object from the wakeup sources list.
105 * @ws: Wakeup source object to remove from the list.
106 */
107void wakeup_source_remove(struct wakeup_source *ws)
108{
109 if (WARN_ON(!ws))
110 return;
111
112 spin_lock_irq(&events_lock);
113 list_del_rcu(&ws->entry);
114 spin_unlock_irq(&events_lock);
115 synchronize_rcu();
116}
117EXPORT_SYMBOL_GPL(wakeup_source_remove);
118
119/**
120 * wakeup_source_register - Create wakeup source and add it to the list.
121 * @name: Name of the wakeup source to register.
122 */
123struct wakeup_source *wakeup_source_register(const char *name)
124{
125 struct wakeup_source *ws;
126
127 ws = wakeup_source_create(name);
128 if (ws)
129 wakeup_source_add(ws);
130
131 return ws;
132}
133EXPORT_SYMBOL_GPL(wakeup_source_register);
134
135/**
136 * wakeup_source_unregister - Remove wakeup source from the list and remove it.
137 * @ws: Wakeup source object to unregister.
138 */
139void wakeup_source_unregister(struct wakeup_source *ws)
140{
141 wakeup_source_remove(ws);
142 wakeup_source_destroy(ws);
143}
144EXPORT_SYMBOL_GPL(wakeup_source_unregister);
145
146/**
147 * device_wakeup_attach - Attach a wakeup source object to a device object.
148 * @dev: Device to handle.
149 * @ws: Wakeup source object to attach to @dev.
150 *
151 * This causes @dev to be treated as a wakeup device.
152 */
153static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
154{
155 spin_lock_irq(&dev->power.lock);
156 if (dev->power.wakeup) {
157 spin_unlock_irq(&dev->power.lock);
158 return -EEXIST;
159 }
160 dev->power.wakeup = ws;
161 spin_unlock_irq(&dev->power.lock);
162 return 0;
163}
164
165/**
166 * device_wakeup_enable - Enable given device to be a wakeup source.
167 * @dev: Device to handle.
168 *
169 * Create a wakeup source object, register it and attach it to @dev.
170 */
171int device_wakeup_enable(struct device *dev)
172{
173 struct wakeup_source *ws;
174 int ret;
175
176 if (!dev || !dev->power.can_wakeup)
177 return -EINVAL;
178
179 ws = wakeup_source_register(dev_name(dev));
180 if (!ws)
181 return -ENOMEM;
182
183 ret = device_wakeup_attach(dev, ws);
184 if (ret)
185 wakeup_source_unregister(ws);
186
187 return ret;
188}
189EXPORT_SYMBOL_GPL(device_wakeup_enable);
190
191/**
192 * device_wakeup_detach - Detach a device's wakeup source object from it.
193 * @dev: Device to detach the wakeup source object from.
194 *
195 * After it returns, @dev will not be treated as a wakeup device any more.
196 */
197static struct wakeup_source *device_wakeup_detach(struct device *dev)
198{
199 struct wakeup_source *ws;
200
201 spin_lock_irq(&dev->power.lock);
202 ws = dev->power.wakeup;
203 dev->power.wakeup = NULL;
204 spin_unlock_irq(&dev->power.lock);
205 return ws;
206}
207
208/**
209 * device_wakeup_disable - Do not regard a device as a wakeup source any more.
210 * @dev: Device to handle.
211 *
212 * Detach the @dev's wakeup source object from it, unregister this wakeup source
213 * object and destroy it.
214 */
215int device_wakeup_disable(struct device *dev)
216{
217 struct wakeup_source *ws;
218
219 if (!dev || !dev->power.can_wakeup)
220 return -EINVAL;
221
222 ws = device_wakeup_detach(dev);
223 if (ws)
224 wakeup_source_unregister(ws);
225
226 return 0;
227}
228EXPORT_SYMBOL_GPL(device_wakeup_disable);
229
230/**
231 * device_init_wakeup - Device wakeup initialization.
232 * @dev: Device to handle.
233 * @enable: Whether or not to enable @dev as a wakeup device.
234 *
235 * By default, most devices should leave wakeup disabled. The exceptions are
236 * devices that everyone expects to be wakeup sources: keyboards, power buttons,
237 * possibly network interfaces, etc.
238 */
239int device_init_wakeup(struct device *dev, bool enable)
240{
241 int ret = 0;
242
243 if (enable) {
244 device_set_wakeup_capable(dev, true);
245 ret = device_wakeup_enable(dev);
246 } else {
247 device_set_wakeup_capable(dev, false);
248 }
249
250 return ret;
251}
252EXPORT_SYMBOL_GPL(device_init_wakeup);
253
254/**
255 * device_set_wakeup_enable - Enable or disable a device to wake up the system.
256 * @dev: Device to handle.
257 */
258int device_set_wakeup_enable(struct device *dev, bool enable)
259{
260 if (!dev || !dev->power.can_wakeup)
261 return -EINVAL;
262
263 return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
264}
265EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
35 266
36/* 267/*
37 * The functions below use the observation that each wakeup event starts a 268 * The functions below use the observation that each wakeup event starts a
@@ -55,118 +286,259 @@ static unsigned long events_timer_expires;
55 * knowledge, however, may not be available to it, so it can simply specify time 286 * knowledge, however, may not be available to it, so it can simply specify time
56 * to wait before the system can be suspended and pass it as the second 287 * to wait before the system can be suspended and pass it as the second
57 * argument of pm_wakeup_event(). 288 * argument of pm_wakeup_event().
289 *
290 * It is valid to call pm_relax() after pm_wakeup_event(), in which case the
291 * "no suspend" period will be ended either by the pm_relax(), or by the timer
292 * function executed when the timer expires, whichever comes first.
58 */ 293 */
59 294
60/** 295/**
296 * wakup_source_activate - Mark given wakeup source as active.
297 * @ws: Wakeup source to handle.
298 *
299 * Update the @ws' statistics and, if @ws has just been activated, notify the PM
300 * core of the event by incrementing the counter of of wakeup events being
301 * processed.
302 */
303static void wakeup_source_activate(struct wakeup_source *ws)
304{
305 ws->active = true;
306 ws->active_count++;
307 ws->timer_expires = jiffies;
308 ws->last_time = ktime_get();
309
310 atomic_inc(&events_in_progress);
311}
312
313/**
314 * __pm_stay_awake - Notify the PM core of a wakeup event.
315 * @ws: Wakeup source object associated with the source of the event.
316 *
317 * It is safe to call this function from interrupt context.
318 */
319void __pm_stay_awake(struct wakeup_source *ws)
320{
321 unsigned long flags;
322
323 if (!ws)
324 return;
325
326 spin_lock_irqsave(&ws->lock, flags);
327 ws->event_count++;
328 if (!ws->active)
329 wakeup_source_activate(ws);
330 spin_unlock_irqrestore(&ws->lock, flags);
331}
332EXPORT_SYMBOL_GPL(__pm_stay_awake);
333
334/**
61 * pm_stay_awake - Notify the PM core that a wakeup event is being processed. 335 * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
62 * @dev: Device the wakeup event is related to. 336 * @dev: Device the wakeup event is related to.
63 * 337 *
64 * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the 338 * Notify the PM core of a wakeup event (signaled by @dev) by calling
65 * counter of wakeup events being processed. If @dev is not NULL, the counter 339 * __pm_stay_awake for the @dev's wakeup source object.
66 * of wakeup events related to @dev is incremented too.
67 * 340 *
68 * Call this function after detecting of a wakeup event if pm_relax() is going 341 * Call this function after detecting of a wakeup event if pm_relax() is going
69 * to be called directly after processing the event (and possibly passing it to 342 * to be called directly after processing the event (and possibly passing it to
70 * user space for further processing). 343 * user space for further processing).
71 *
72 * It is safe to call this function from interrupt context.
73 */ 344 */
74void pm_stay_awake(struct device *dev) 345void pm_stay_awake(struct device *dev)
75{ 346{
76 unsigned long flags; 347 unsigned long flags;
77 348
78 spin_lock_irqsave(&events_lock, flags); 349 if (!dev)
79 if (dev) 350 return;
80 dev->power.wakeup_count++;
81 351
82 events_in_progress++; 352 spin_lock_irqsave(&dev->power.lock, flags);
83 spin_unlock_irqrestore(&events_lock, flags); 353 __pm_stay_awake(dev->power.wakeup);
354 spin_unlock_irqrestore(&dev->power.lock, flags);
84} 355}
356EXPORT_SYMBOL_GPL(pm_stay_awake);
85 357
86/** 358/**
87 * pm_relax - Notify the PM core that processing of a wakeup event has ended. 359 * wakup_source_deactivate - Mark given wakeup source as inactive.
360 * @ws: Wakeup source to handle.
88 * 361 *
89 * Notify the PM core that a wakeup event has been processed by decrementing 362 * Update the @ws' statistics and notify the PM core that the wakeup source has
90 * the counter of wakeup events being processed and incrementing the counter 363 * become inactive by decrementing the counter of wakeup events being processed
91 * of registered wakeup events. 364 * and incrementing the counter of registered wakeup events.
365 */
366static void wakeup_source_deactivate(struct wakeup_source *ws)
367{
368 ktime_t duration;
369 ktime_t now;
370
371 ws->relax_count++;
372 /*
373 * __pm_relax() may be called directly or from a timer function.
374 * If it is called directly right after the timer function has been
375 * started, but before the timer function calls __pm_relax(), it is
376 * possible that __pm_stay_awake() will be called in the meantime and
377 * will set ws->active. Then, ws->active may be cleared immediately
378 * by the __pm_relax() called from the timer function, but in such a
379 * case ws->relax_count will be different from ws->active_count.
380 */
381 if (ws->relax_count != ws->active_count) {
382 ws->relax_count--;
383 return;
384 }
385
386 ws->active = false;
387
388 now = ktime_get();
389 duration = ktime_sub(now, ws->last_time);
390 ws->total_time = ktime_add(ws->total_time, duration);
391 if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
392 ws->max_time = duration;
393
394 del_timer(&ws->timer);
395
396 /*
397 * event_count has to be incremented before events_in_progress is
398 * modified, so that the callers of pm_check_wakeup_events() and
399 * pm_save_wakeup_count() don't see the old value of event_count and
400 * events_in_progress equal to zero at the same time.
401 */
402 atomic_inc(&event_count);
403 smp_mb__before_atomic_dec();
404 atomic_dec(&events_in_progress);
405}
406
407/**
408 * __pm_relax - Notify the PM core that processing of a wakeup event has ended.
409 * @ws: Wakeup source object associated with the source of the event.
92 * 410 *
93 * Call this function for wakeup events whose processing started with calling 411 * Call this function for wakeup events whose processing started with calling
94 * pm_stay_awake(). 412 * __pm_stay_awake().
95 * 413 *
96 * It is safe to call it from interrupt context. 414 * It is safe to call it from interrupt context.
97 */ 415 */
98void pm_relax(void) 416void __pm_relax(struct wakeup_source *ws)
99{ 417{
100 unsigned long flags; 418 unsigned long flags;
101 419
102 spin_lock_irqsave(&events_lock, flags); 420 if (!ws)
103 if (events_in_progress) { 421 return;
104 events_in_progress--; 422
105 event_count++; 423 spin_lock_irqsave(&ws->lock, flags);
106 } 424 if (ws->active)
107 spin_unlock_irqrestore(&events_lock, flags); 425 wakeup_source_deactivate(ws);
426 spin_unlock_irqrestore(&ws->lock, flags);
427}
428EXPORT_SYMBOL_GPL(__pm_relax);
429
430/**
431 * pm_relax - Notify the PM core that processing of a wakeup event has ended.
432 * @dev: Device that signaled the event.
433 *
434 * Execute __pm_relax() for the @dev's wakeup source object.
435 */
436void pm_relax(struct device *dev)
437{
438 unsigned long flags;
439
440 if (!dev)
441 return;
442
443 spin_lock_irqsave(&dev->power.lock, flags);
444 __pm_relax(dev->power.wakeup);
445 spin_unlock_irqrestore(&dev->power.lock, flags);
108} 446}
447EXPORT_SYMBOL_GPL(pm_relax);
109 448
110/** 449/**
111 * pm_wakeup_timer_fn - Delayed finalization of a wakeup event. 450 * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
451 * @data: Address of the wakeup source object associated with the event source.
112 * 452 *
113 * Decrease the counter of wakeup events being processed after it was increased 453 * Call __pm_relax() for the wakeup source whose address is stored in @data.
114 * by pm_wakeup_event().
115 */ 454 */
116static void pm_wakeup_timer_fn(unsigned long data) 455static void pm_wakeup_timer_fn(unsigned long data)
117{ 456{
457 __pm_relax((struct wakeup_source *)data);
458}
459
460/**
461 * __pm_wakeup_event - Notify the PM core of a wakeup event.
462 * @ws: Wakeup source object associated with the event source.
463 * @msec: Anticipated event processing time (in milliseconds).
464 *
465 * Notify the PM core of a wakeup event whose source is @ws that will take
466 * approximately @msec milliseconds to be processed by the kernel. If @ws is
467 * not active, activate it. If @msec is nonzero, set up the @ws' timer to
468 * execute pm_wakeup_timer_fn() in future.
469 *
470 * It is safe to call this function from interrupt context.
471 */
472void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
473{
118 unsigned long flags; 474 unsigned long flags;
475 unsigned long expires;
119 476
120 spin_lock_irqsave(&events_lock, flags); 477 if (!ws)
121 if (events_timer_expires 478 return;
122 && time_before_eq(events_timer_expires, jiffies)) { 479
123 events_in_progress--; 480 spin_lock_irqsave(&ws->lock, flags);
124 events_timer_expires = 0; 481
482 ws->event_count++;
483 if (!ws->active)
484 wakeup_source_activate(ws);
485
486 if (!msec) {
487 wakeup_source_deactivate(ws);
488 goto unlock;
125 } 489 }
126 spin_unlock_irqrestore(&events_lock, flags); 490
491 expires = jiffies + msecs_to_jiffies(msec);
492 if (!expires)
493 expires = 1;
494
495 if (time_after(expires, ws->timer_expires)) {
496 mod_timer(&ws->timer, expires);
497 ws->timer_expires = expires;
498 }
499
500 unlock:
501 spin_unlock_irqrestore(&ws->lock, flags);
127} 502}
503EXPORT_SYMBOL_GPL(__pm_wakeup_event);
504
128 505
129/** 506/**
130 * pm_wakeup_event - Notify the PM core of a wakeup event. 507 * pm_wakeup_event - Notify the PM core of a wakeup event.
131 * @dev: Device the wakeup event is related to. 508 * @dev: Device the wakeup event is related to.
132 * @msec: Anticipated event processing time (in milliseconds). 509 * @msec: Anticipated event processing time (in milliseconds).
133 * 510 *
134 * Notify the PM core of a wakeup event (signaled by @dev) that will take 511 * Call __pm_wakeup_event() for the @dev's wakeup source object.
135 * approximately @msec milliseconds to be processed by the kernel. Increment
136 * the counter of registered wakeup events and (if @msec is nonzero) set up
137 * the wakeup events timer to execute pm_wakeup_timer_fn() in future (if the
138 * timer has not been set up already, increment the counter of wakeup events
139 * being processed). If @dev is not NULL, the counter of wakeup events related
140 * to @dev is incremented too.
141 *
142 * It is safe to call this function from interrupt context.
143 */ 512 */
144void pm_wakeup_event(struct device *dev, unsigned int msec) 513void pm_wakeup_event(struct device *dev, unsigned int msec)
145{ 514{
146 unsigned long flags; 515 unsigned long flags;
147 516
148 spin_lock_irqsave(&events_lock, flags); 517 if (!dev)
149 event_count++; 518 return;
150 if (dev)
151 dev->power.wakeup_count++;
152
153 if (msec) {
154 unsigned long expires;
155 519
156 expires = jiffies + msecs_to_jiffies(msec); 520 spin_lock_irqsave(&dev->power.lock, flags);
157 if (!expires) 521 __pm_wakeup_event(dev->power.wakeup, msec);
158 expires = 1; 522 spin_unlock_irqrestore(&dev->power.lock, flags);
523}
524EXPORT_SYMBOL_GPL(pm_wakeup_event);
159 525
160 if (!events_timer_expires 526/**
161 || time_after(expires, events_timer_expires)) { 527 * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
162 if (!events_timer_expires) 528 */
163 events_in_progress++; 529static void pm_wakeup_update_hit_counts(void)
530{
531 unsigned long flags;
532 struct wakeup_source *ws;
164 533
165 mod_timer(&events_timer, expires); 534 rcu_read_lock();
166 events_timer_expires = expires; 535 list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
167 } 536 spin_lock_irqsave(&ws->lock, flags);
537 if (ws->active)
538 ws->hit_count++;
539 spin_unlock_irqrestore(&ws->lock, flags);
168 } 540 }
169 spin_unlock_irqrestore(&events_lock, flags); 541 rcu_read_unlock();
170} 542}
171 543
172/** 544/**
@@ -184,10 +556,13 @@ bool pm_check_wakeup_events(void)
184 556
185 spin_lock_irqsave(&events_lock, flags); 557 spin_lock_irqsave(&events_lock, flags);
186 if (events_check_enabled) { 558 if (events_check_enabled) {
187 ret = (event_count == saved_event_count) && !events_in_progress; 559 ret = ((unsigned int)atomic_read(&event_count) == saved_count)
560 && !atomic_read(&events_in_progress);
188 events_check_enabled = ret; 561 events_check_enabled = ret;
189 } 562 }
190 spin_unlock_irqrestore(&events_lock, flags); 563 spin_unlock_irqrestore(&events_lock, flags);
564 if (!ret)
565 pm_wakeup_update_hit_counts();
191 return ret; 566 return ret;
192} 567}
193 568
@@ -202,24 +577,20 @@ bool pm_check_wakeup_events(void)
202 * drop down to zero has been interrupted by a signal (and the current number 577 * drop down to zero has been interrupted by a signal (and the current number
203 * of wakeup events being processed is still nonzero). Otherwise return true. 578 * of wakeup events being processed is still nonzero). Otherwise return true.
204 */ 579 */
205bool pm_get_wakeup_count(unsigned long *count) 580bool pm_get_wakeup_count(unsigned int *count)
206{ 581{
207 bool ret; 582 bool ret;
208 583
209 spin_lock_irq(&events_lock);
210 if (capable(CAP_SYS_ADMIN)) 584 if (capable(CAP_SYS_ADMIN))
211 events_check_enabled = false; 585 events_check_enabled = false;
212 586
213 while (events_in_progress && !signal_pending(current)) { 587 while (atomic_read(&events_in_progress) && !signal_pending(current)) {
214 spin_unlock_irq(&events_lock); 588 pm_wakeup_update_hit_counts();
215 589 schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
216 schedule_timeout_interruptible(msecs_to_jiffies(100));
217
218 spin_lock_irq(&events_lock);
219 } 590 }
220 *count = event_count; 591
221 ret = !events_in_progress; 592 ret = !atomic_read(&events_in_progress);
222 spin_unlock_irq(&events_lock); 593 *count = atomic_read(&event_count);
223 return ret; 594 return ret;
224} 595}
225 596
@@ -232,16 +603,102 @@ bool pm_get_wakeup_count(unsigned long *count)
232 * old number of registered wakeup events to be used by pm_check_wakeup_events() 603 * old number of registered wakeup events to be used by pm_check_wakeup_events()
233 * and return true. Otherwise return false. 604 * and return true. Otherwise return false.
234 */ 605 */
235bool pm_save_wakeup_count(unsigned long count) 606bool pm_save_wakeup_count(unsigned int count)
236{ 607{
237 bool ret = false; 608 bool ret = false;
238 609
239 spin_lock_irq(&events_lock); 610 spin_lock_irq(&events_lock);
240 if (count == event_count && !events_in_progress) { 611 if (count == (unsigned int)atomic_read(&event_count)
241 saved_event_count = count; 612 && !atomic_read(&events_in_progress)) {
613 saved_count = count;
242 events_check_enabled = true; 614 events_check_enabled = true;
243 ret = true; 615 ret = true;
244 } 616 }
245 spin_unlock_irq(&events_lock); 617 spin_unlock_irq(&events_lock);
618 if (!ret)
619 pm_wakeup_update_hit_counts();
620 return ret;
621}
622
623static struct dentry *wakeup_sources_stats_dentry;
624
625/**
626 * print_wakeup_source_stats - Print wakeup source statistics information.
627 * @m: seq_file to print the statistics into.
628 * @ws: Wakeup source object to print the statistics for.
629 */
630static int print_wakeup_source_stats(struct seq_file *m,
631 struct wakeup_source *ws)
632{
633 unsigned long flags;
634 ktime_t total_time;
635 ktime_t max_time;
636 unsigned long active_count;
637 ktime_t active_time;
638 int ret;
639
640 spin_lock_irqsave(&ws->lock, flags);
641
642 total_time = ws->total_time;
643 max_time = ws->max_time;
644 active_count = ws->active_count;
645 if (ws->active) {
646 active_time = ktime_sub(ktime_get(), ws->last_time);
647 total_time = ktime_add(total_time, active_time);
648 if (active_time.tv64 > max_time.tv64)
649 max_time = active_time;
650 } else {
651 active_time = ktime_set(0, 0);
652 }
653
654 ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t"
655 "%lld\t\t%lld\t\t%lld\t\t%lld\n",
656 ws->name, active_count, ws->event_count, ws->hit_count,
657 ktime_to_ms(active_time), ktime_to_ms(total_time),
658 ktime_to_ms(max_time), ktime_to_ms(ws->last_time));
659
660 spin_unlock_irqrestore(&ws->lock, flags);
661
246 return ret; 662 return ret;
247} 663}
664
665/**
666 * wakeup_sources_stats_show - Print wakeup sources statistics information.
667 * @m: seq_file to print the statistics into.
668 */
669static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
670{
671 struct wakeup_source *ws;
672
673 seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t"
674 "active_since\ttotal_time\tmax_time\tlast_change\n");
675
676 rcu_read_lock();
677 list_for_each_entry_rcu(ws, &wakeup_sources, entry)
678 print_wakeup_source_stats(m, ws);
679 rcu_read_unlock();
680
681 return 0;
682}
683
684static int wakeup_sources_stats_open(struct inode *inode, struct file *file)
685{
686 return single_open(file, wakeup_sources_stats_show, NULL);
687}
688
689static const struct file_operations wakeup_sources_stats_fops = {
690 .owner = THIS_MODULE,
691 .open = wakeup_sources_stats_open,
692 .read = seq_read,
693 .llseek = seq_lseek,
694 .release = single_release,
695};
696
697static int __init wakeup_sources_debugfs_init(void)
698{
699 wakeup_sources_stats_dentry = debugfs_create_file("wakeup_sources",
700 S_IRUGO, NULL, NULL, &wakeup_sources_stats_fops);
701 return 0;
702}
703
704postcore_initcall(wakeup_sources_debugfs_init);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 23c1e598792a..442f34ff1af8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -148,6 +148,65 @@ void sysfs_remove_group(struct kobject * kobj,
148 sysfs_put(sd); 148 sysfs_put(sd);
149} 149}
150 150
151/**
152 * sysfs_merge_group - merge files into a pre-existing attribute group.
153 * @kobj: The kobject containing the group.
154 * @grp: The files to create and the attribute group they belong to.
155 *
156 * This function returns an error if the group doesn't exist or any of the
157 * files already exist in that group, in which case none of the new files
158 * are created.
159 */
160int sysfs_merge_group(struct kobject *kobj,
161 const struct attribute_group *grp)
162{
163 struct sysfs_dirent *dir_sd;
164 int error = 0;
165 struct attribute *const *attr;
166 int i;
167
168 if (grp)
169 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
170 else
171 dir_sd = sysfs_get(kobj->sd);
172 if (!dir_sd)
173 return -ENOENT;
174
175 for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
176 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
177 if (error) {
178 while (--i >= 0)
179 sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name);
180 }
181 sysfs_put(dir_sd);
182
183 return error;
184}
185EXPORT_SYMBOL_GPL(sysfs_merge_group);
186
187/**
188 * sysfs_unmerge_group - remove files from a pre-existing attribute group.
189 * @kobj: The kobject containing the group.
190 * @grp: The files to remove and the attribute group they belong to.
191 */
192void sysfs_unmerge_group(struct kobject *kobj,
193 const struct attribute_group *grp)
194{
195 struct sysfs_dirent *dir_sd;
196 struct attribute *const *attr;
197
198 if (grp)
199 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
200 else
201 dir_sd = sysfs_get(kobj->sd);
202 if (dir_sd) {
203 for (attr = grp->attrs; *attr; ++attr)
204 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
205 sysfs_put(dir_sd);
206 }
207}
208EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
209
151 210
152EXPORT_SYMBOL_GPL(sysfs_create_group); 211EXPORT_SYMBOL_GPL(sysfs_create_group);
153EXPORT_SYMBOL_GPL(sysfs_update_group); 212EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/include/linux/opp.h b/include/linux/opp.h
new file mode 100644
index 000000000000..5449945d589f
--- /dev/null
+++ b/include/linux/opp.h
@@ -0,0 +1,105 @@
1/*
2 * Generic OPP Interface
3 *
4 * Copyright (C) 2009-2010 Texas Instruments Incorporated.
5 * Nishanth Menon
6 * Romit Dasgupta
7 * Kevin Hilman
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#ifndef __LINUX_OPP_H__
15#define __LINUX_OPP_H__
16
17#include <linux/err.h>
18#include <linux/cpufreq.h>
19
20struct opp;
21
22#if defined(CONFIG_PM_OPP)
23
24unsigned long opp_get_voltage(struct opp *opp);
25
26unsigned long opp_get_freq(struct opp *opp);
27
28int opp_get_opp_count(struct device *dev);
29
30struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
31 bool available);
32
33struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq);
34
35struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq);
36
37int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt);
38
39int opp_enable(struct device *dev, unsigned long freq);
40
41int opp_disable(struct device *dev, unsigned long freq);
42
43#else
44static inline unsigned long opp_get_voltage(struct opp *opp)
45{
46 return 0;
47}
48
49static inline unsigned long opp_get_freq(struct opp *opp)
50{
51 return 0;
52}
53
54static inline int opp_get_opp_count(struct device *dev)
55{
56 return 0;
57}
58
59static inline struct opp *opp_find_freq_exact(struct device *dev,
60 unsigned long freq, bool available)
61{
62 return ERR_PTR(-EINVAL);
63}
64
65static inline struct opp *opp_find_freq_floor(struct device *dev,
66 unsigned long *freq)
67{
68 return ERR_PTR(-EINVAL);
69}
70
71static inline struct opp *opp_find_freq_ceil(struct device *dev,
72 unsigned long *freq)
73{
74 return ERR_PTR(-EINVAL);
75}
76
77static inline int opp_add(struct device *dev, unsigned long freq,
78 unsigned long u_volt)
79{
80 return -EINVAL;
81}
82
83static inline int opp_enable(struct device *dev, unsigned long freq)
84{
85 return 0;
86}
87
88static inline int opp_disable(struct device *dev, unsigned long freq)
89{
90 return 0;
91}
92#endif /* CONFIG_PM */
93
94#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
95int opp_init_cpufreq_table(struct device *dev,
96 struct cpufreq_frequency_table **table);
97#else
98static inline int opp_init_cpufreq_table(struct device *dev,
99 struct cpufreq_frequency_table **table)
100{
101 return -EINVAL;
102}
103#endif /* CONFIG_CPU_FREQ */
104
105#endif /* __LINUX_OPP_H__ */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 52e8c55ff314..40f3f45702ba 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void);
41 41
42struct device; 42struct device;
43 43
44#ifdef CONFIG_PM
45extern const char power_group_name[]; /* = "power" */
46#else
47#define power_group_name NULL
48#endif
49
44typedef struct pm_message { 50typedef struct pm_message {
45 int event; 51 int event;
46} pm_message_t; 52} pm_message_t;
@@ -438,6 +444,9 @@ enum rpm_status {
438 * 444 *
439 * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback 445 * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback
440 * 446 *
447 * RPM_REQ_AUTOSUSPEND Same as RPM_REQ_SUSPEND, but not until the device has
448 * been inactive for as long as power.autosuspend_delay
449 *
441 * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback 450 * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback
442 */ 451 */
443 452
@@ -445,26 +454,28 @@ enum rpm_request {
445 RPM_REQ_NONE = 0, 454 RPM_REQ_NONE = 0,
446 RPM_REQ_IDLE, 455 RPM_REQ_IDLE,
447 RPM_REQ_SUSPEND, 456 RPM_REQ_SUSPEND,
457 RPM_REQ_AUTOSUSPEND,
448 RPM_REQ_RESUME, 458 RPM_REQ_RESUME,
449}; 459};
450 460
461struct wakeup_source;
462
451struct dev_pm_info { 463struct dev_pm_info {
452 pm_message_t power_state; 464 pm_message_t power_state;
453 unsigned int can_wakeup:1; 465 unsigned int can_wakeup:1;
454 unsigned int should_wakeup:1;
455 unsigned async_suspend:1; 466 unsigned async_suspend:1;
456 enum dpm_state status; /* Owned by the PM core */ 467 enum dpm_state status; /* Owned by the PM core */
468 spinlock_t lock;
457#ifdef CONFIG_PM_SLEEP 469#ifdef CONFIG_PM_SLEEP
458 struct list_head entry; 470 struct list_head entry;
459 struct completion completion; 471 struct completion completion;
460 unsigned long wakeup_count; 472 struct wakeup_source *wakeup;
461#endif 473#endif
462#ifdef CONFIG_PM_RUNTIME 474#ifdef CONFIG_PM_RUNTIME
463 struct timer_list suspend_timer; 475 struct timer_list suspend_timer;
464 unsigned long timer_expires; 476 unsigned long timer_expires;
465 struct work_struct work; 477 struct work_struct work;
466 wait_queue_head_t wait_queue; 478 wait_queue_head_t wait_queue;
467 spinlock_t lock;
468 atomic_t usage_count; 479 atomic_t usage_count;
469 atomic_t child_count; 480 atomic_t child_count;
470 unsigned int disable_depth:3; 481 unsigned int disable_depth:3;
@@ -474,9 +485,14 @@ struct dev_pm_info {
474 unsigned int deferred_resume:1; 485 unsigned int deferred_resume:1;
475 unsigned int run_wake:1; 486 unsigned int run_wake:1;
476 unsigned int runtime_auto:1; 487 unsigned int runtime_auto:1;
488 unsigned int no_callbacks:1;
489 unsigned int use_autosuspend:1;
490 unsigned int timer_autosuspends:1;
477 enum rpm_request request; 491 enum rpm_request request;
478 enum rpm_status runtime_status; 492 enum rpm_status runtime_status;
479 int runtime_error; 493 int runtime_error;
494 int autosuspend_delay;
495 unsigned long last_busy;
480 unsigned long active_jiffies; 496 unsigned long active_jiffies;
481 unsigned long suspended_jiffies; 497 unsigned long suspended_jiffies;
482 unsigned long accounting_timestamp; 498 unsigned long accounting_timestamp;
@@ -558,12 +574,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
558 __suspend_report_result(__func__, fn, ret); \ 574 __suspend_report_result(__func__, fn, ret); \
559 } while (0) 575 } while (0)
560 576
561extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); 577extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
562
563/* drivers/base/power/wakeup.c */
564extern void pm_wakeup_event(struct device *dev, unsigned int msec);
565extern void pm_stay_awake(struct device *dev);
566extern void pm_relax(void);
567#else /* !CONFIG_PM_SLEEP */ 578#else /* !CONFIG_PM_SLEEP */
568 579
569#define device_pm_lock() do {} while (0) 580#define device_pm_lock() do {} while (0)
@@ -576,11 +587,10 @@ static inline int dpm_suspend_start(pm_message_t state)
576 587
577#define suspend_report_result(fn, ret) do {} while (0) 588#define suspend_report_result(fn, ret) do {} while (0)
578 589
579static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} 590static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
580 591{
581static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} 592 return 0;
582static inline void pm_stay_awake(struct device *dev) {} 593}
583static inline void pm_relax(void) {}
584#endif /* !CONFIG_PM_SLEEP */ 594#endif /* !CONFIG_PM_SLEEP */
585 595
586/* How to reorder dpm_list after device_move() */ 596/* How to reorder dpm_list after device_move() */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 6e81888c6222..3ec2358f8692 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -12,18 +12,24 @@
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/pm.h> 13#include <linux/pm.h>
14 14
15#include <linux/jiffies.h>
16
17/* Runtime PM flag argument bits */
18#define RPM_ASYNC 0x01 /* Request is asynchronous */
19#define RPM_NOWAIT 0x02 /* Don't wait for concurrent
20 state change */
21#define RPM_GET_PUT 0x04 /* Increment/decrement the
22 usage_count */
23#define RPM_AUTO 0x08 /* Use autosuspend_delay */
24
15#ifdef CONFIG_PM_RUNTIME 25#ifdef CONFIG_PM_RUNTIME
16 26
17extern struct workqueue_struct *pm_wq; 27extern struct workqueue_struct *pm_wq;
18 28
19extern int pm_runtime_idle(struct device *dev); 29extern int __pm_runtime_idle(struct device *dev, int rpmflags);
20extern int pm_runtime_suspend(struct device *dev); 30extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
21extern int pm_runtime_resume(struct device *dev); 31extern int __pm_runtime_resume(struct device *dev, int rpmflags);
22extern int pm_request_idle(struct device *dev);
23extern int pm_schedule_suspend(struct device *dev, unsigned int delay); 32extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
24extern int pm_request_resume(struct device *dev);
25extern int __pm_runtime_get(struct device *dev, bool sync);
26extern int __pm_runtime_put(struct device *dev, bool sync);
27extern int __pm_runtime_set_status(struct device *dev, unsigned int status); 33extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
28extern int pm_runtime_barrier(struct device *dev); 34extern int pm_runtime_barrier(struct device *dev);
29extern void pm_runtime_enable(struct device *dev); 35extern void pm_runtime_enable(struct device *dev);
@@ -33,6 +39,10 @@ extern void pm_runtime_forbid(struct device *dev);
33extern int pm_generic_runtime_idle(struct device *dev); 39extern int pm_generic_runtime_idle(struct device *dev);
34extern int pm_generic_runtime_suspend(struct device *dev); 40extern int pm_generic_runtime_suspend(struct device *dev);
35extern int pm_generic_runtime_resume(struct device *dev); 41extern int pm_generic_runtime_resume(struct device *dev);
42extern void pm_runtime_no_callbacks(struct device *dev);
43extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
44extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
45extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
36 46
37static inline bool pm_children_suspended(struct device *dev) 47static inline bool pm_children_suspended(struct device *dev)
38{ 48{
@@ -70,19 +80,29 @@ static inline bool pm_runtime_suspended(struct device *dev)
70 return dev->power.runtime_status == RPM_SUSPENDED; 80 return dev->power.runtime_status == RPM_SUSPENDED;
71} 81}
72 82
83static inline void pm_runtime_mark_last_busy(struct device *dev)
84{
85 ACCESS_ONCE(dev->power.last_busy) = jiffies;
86}
87
73#else /* !CONFIG_PM_RUNTIME */ 88#else /* !CONFIG_PM_RUNTIME */
74 89
75static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } 90static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
76static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } 91{
77static inline int pm_runtime_resume(struct device *dev) { return 0; } 92 return -ENOSYS;
78static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } 93}
94static inline int __pm_runtime_suspend(struct device *dev, int rpmflags)
95{
96 return -ENOSYS;
97}
98static inline int __pm_runtime_resume(struct device *dev, int rpmflags)
99{
100 return 1;
101}
79static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) 102static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
80{ 103{
81 return -ENOSYS; 104 return -ENOSYS;
82} 105}
83static inline int pm_request_resume(struct device *dev) { return 0; }
84static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; }
85static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; }
86static inline int __pm_runtime_set_status(struct device *dev, 106static inline int __pm_runtime_set_status(struct device *dev,
87 unsigned int status) { return 0; } 107 unsigned int status) { return 0; }
88static inline int pm_runtime_barrier(struct device *dev) { return 0; } 108static inline int pm_runtime_barrier(struct device *dev) { return 0; }
@@ -102,27 +122,82 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; }
102static inline int pm_generic_runtime_idle(struct device *dev) { return 0; } 122static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
103static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } 123static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
104static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } 124static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
125static inline void pm_runtime_no_callbacks(struct device *dev) {}
126
127static inline void pm_runtime_mark_last_busy(struct device *dev) {}
128static inline void __pm_runtime_use_autosuspend(struct device *dev,
129 bool use) {}
130static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
131 int delay) {}
132static inline unsigned long pm_runtime_autosuspend_expiration(
133 struct device *dev) { return 0; }
105 134
106#endif /* !CONFIG_PM_RUNTIME */ 135#endif /* !CONFIG_PM_RUNTIME */
107 136
137static inline int pm_runtime_idle(struct device *dev)
138{
139 return __pm_runtime_idle(dev, 0);
140}
141
142static inline int pm_runtime_suspend(struct device *dev)
143{
144 return __pm_runtime_suspend(dev, 0);
145}
146
147static inline int pm_runtime_autosuspend(struct device *dev)
148{
149 return __pm_runtime_suspend(dev, RPM_AUTO);
150}
151
152static inline int pm_runtime_resume(struct device *dev)
153{
154 return __pm_runtime_resume(dev, 0);
155}
156
157static inline int pm_request_idle(struct device *dev)
158{
159 return __pm_runtime_idle(dev, RPM_ASYNC);
160}
161
162static inline int pm_request_resume(struct device *dev)
163{
164 return __pm_runtime_resume(dev, RPM_ASYNC);
165}
166
167static inline int pm_request_autosuspend(struct device *dev)
168{
169 return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO);
170}
171
108static inline int pm_runtime_get(struct device *dev) 172static inline int pm_runtime_get(struct device *dev)
109{ 173{
110 return __pm_runtime_get(dev, false); 174 return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC);
111} 175}
112 176
113static inline int pm_runtime_get_sync(struct device *dev) 177static inline int pm_runtime_get_sync(struct device *dev)
114{ 178{
115 return __pm_runtime_get(dev, true); 179 return __pm_runtime_resume(dev, RPM_GET_PUT);
116} 180}
117 181
118static inline int pm_runtime_put(struct device *dev) 182static inline int pm_runtime_put(struct device *dev)
119{ 183{
120 return __pm_runtime_put(dev, false); 184 return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
185}
186
187static inline int pm_runtime_put_autosuspend(struct device *dev)
188{
189 return __pm_runtime_suspend(dev,
190 RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
121} 191}
122 192
123static inline int pm_runtime_put_sync(struct device *dev) 193static inline int pm_runtime_put_sync(struct device *dev)
124{ 194{
125 return __pm_runtime_put(dev, true); 195 return __pm_runtime_idle(dev, RPM_GET_PUT);
196}
197
198static inline int pm_runtime_put_sync_autosuspend(struct device *dev)
199{
200 return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO);
126} 201}
127 202
128static inline int pm_runtime_set_active(struct device *dev) 203static inline int pm_runtime_set_active(struct device *dev)
@@ -140,4 +215,14 @@ static inline void pm_runtime_disable(struct device *dev)
140 __pm_runtime_disable(dev, true); 215 __pm_runtime_disable(dev, true);
141} 216}
142 217
218static inline void pm_runtime_use_autosuspend(struct device *dev)
219{
220 __pm_runtime_use_autosuspend(dev, true);
221}
222
223static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
224{
225 __pm_runtime_use_autosuspend(dev, false);
226}
227
143#endif 228#endif
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 76aca48722ae..9cff00dd6b63 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -2,6 +2,7 @@
2 * pm_wakeup.h - Power management wakeup interface 2 * pm_wakeup.h - Power management wakeup interface
3 * 3 *
4 * Copyright (C) 2008 Alan Stern 4 * Copyright (C) 2008 Alan Stern
5 * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc.
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -27,19 +28,77 @@
27 28
28#include <linux/types.h> 29#include <linux/types.h>
29 30
30#ifdef CONFIG_PM 31/**
31 32 * struct wakeup_source - Representation of wakeup sources
32/* Changes to device_may_wakeup take effect on the next pm state change.
33 * 33 *
34 * By default, most devices should leave wakeup disabled. The exceptions 34 * @total_time: Total time this wakeup source has been active.
35 * are devices that everyone expects to be wakeup sources: keyboards, 35 * @max_time: Maximum time this wakeup source has been continuously active.
36 * power buttons, possibly network interfaces, etc. 36 * @last_time: Monotonic clock when the wakeup source's was activated last time.
37 * @event_count: Number of signaled wakeup events.
38 * @active_count: Number of times the wakeup sorce was activated.
39 * @relax_count: Number of times the wakeup sorce was deactivated.
40 * @hit_count: Number of times the wakeup sorce might abort system suspend.
41 * @active: Status of the wakeup source.
37 */ 42 */
38static inline void device_init_wakeup(struct device *dev, bool val) 43struct wakeup_source {
44 char *name;
45 struct list_head entry;
46 spinlock_t lock;
47 struct timer_list timer;
48 unsigned long timer_expires;
49 ktime_t total_time;
50 ktime_t max_time;
51 ktime_t last_time;
52 unsigned long event_count;
53 unsigned long active_count;
54 unsigned long relax_count;
55 unsigned long hit_count;
56 unsigned int active:1;
57};
58
59#ifdef CONFIG_PM_SLEEP
60
61/*
62 * Changes to device_may_wakeup take effect on the next pm state change.
63 */
64
65static inline void device_set_wakeup_capable(struct device *dev, bool capable)
66{
67 dev->power.can_wakeup = capable;
68}
69
70static inline bool device_can_wakeup(struct device *dev)
71{
72 return dev->power.can_wakeup;
73}
74
75
76
77static inline bool device_may_wakeup(struct device *dev)
39{ 78{
40 dev->power.can_wakeup = dev->power.should_wakeup = val; 79 return dev->power.can_wakeup && !!dev->power.wakeup;
41} 80}
42 81
82/* drivers/base/power/wakeup.c */
83extern struct wakeup_source *wakeup_source_create(const char *name);
84extern void wakeup_source_destroy(struct wakeup_source *ws);
85extern void wakeup_source_add(struct wakeup_source *ws);
86extern void wakeup_source_remove(struct wakeup_source *ws);
87extern struct wakeup_source *wakeup_source_register(const char *name);
88extern void wakeup_source_unregister(struct wakeup_source *ws);
89extern int device_wakeup_enable(struct device *dev);
90extern int device_wakeup_disable(struct device *dev);
91extern int device_init_wakeup(struct device *dev, bool val);
92extern int device_set_wakeup_enable(struct device *dev, bool enable);
93extern void __pm_stay_awake(struct wakeup_source *ws);
94extern void pm_stay_awake(struct device *dev);
95extern void __pm_relax(struct wakeup_source *ws);
96extern void pm_relax(struct device *dev);
97extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec);
98extern void pm_wakeup_event(struct device *dev, unsigned int msec);
99
100#else /* !CONFIG_PM_SLEEP */
101
43static inline void device_set_wakeup_capable(struct device *dev, bool capable) 102static inline void device_set_wakeup_capable(struct device *dev, bool capable)
44{ 103{
45 dev->power.can_wakeup = capable; 104 dev->power.can_wakeup = capable;
@@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev)
50 return dev->power.can_wakeup; 109 return dev->power.can_wakeup;
51} 110}
52 111
53static inline void device_set_wakeup_enable(struct device *dev, bool enable) 112static inline bool device_may_wakeup(struct device *dev)
54{ 113{
55 dev->power.should_wakeup = enable; 114 return false;
56} 115}
57 116
58static inline bool device_may_wakeup(struct device *dev) 117static inline struct wakeup_source *wakeup_source_create(const char *name)
59{ 118{
60 return dev->power.can_wakeup && dev->power.should_wakeup; 119 return NULL;
61} 120}
62 121
63#else /* !CONFIG_PM */ 122static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
123
124static inline void wakeup_source_add(struct wakeup_source *ws) {}
64 125
65/* For some reason the following routines work even without CONFIG_PM */ 126static inline void wakeup_source_remove(struct wakeup_source *ws) {}
66static inline void device_init_wakeup(struct device *dev, bool val) 127
128static inline struct wakeup_source *wakeup_source_register(const char *name)
67{ 129{
68 dev->power.can_wakeup = val; 130 return NULL;
69} 131}
70 132
71static inline void device_set_wakeup_capable(struct device *dev, bool capable) 133static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
134
135static inline int device_wakeup_enable(struct device *dev)
72{ 136{
73 dev->power.can_wakeup = capable; 137 return -EINVAL;
74} 138}
75 139
76static inline bool device_can_wakeup(struct device *dev) 140static inline int device_wakeup_disable(struct device *dev)
77{ 141{
78 return dev->power.can_wakeup; 142 return 0;
79} 143}
80 144
81static inline void device_set_wakeup_enable(struct device *dev, bool enable) 145static inline int device_init_wakeup(struct device *dev, bool val)
82{ 146{
147 dev->power.can_wakeup = val;
148 return val ? -EINVAL : 0;
83} 149}
84 150
85static inline bool device_may_wakeup(struct device *dev) 151
152static inline int device_set_wakeup_enable(struct device *dev, bool enable)
86{ 153{
87 return false; 154 return -EINVAL;
88} 155}
89 156
90#endif /* !CONFIG_PM */ 157static inline void __pm_stay_awake(struct wakeup_source *ws) {}
158
159static inline void pm_stay_awake(struct device *dev) {}
160
161static inline void __pm_relax(struct wakeup_source *ws) {}
162
163static inline void pm_relax(struct device *dev) {}
164
165static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {}
166
167static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
168
169#endif /* !CONFIG_PM_SLEEP */
91 170
92#endif /* _LINUX_PM_WAKEUP_H */ 171#endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index bc8c3881c729..f31db2368782 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,6 +3,7 @@
3 3
4#ifdef CONFIG_PM_TRACE 4#ifdef CONFIG_PM_TRACE
5#include <asm/resume-trace.h> 5#include <asm/resume-trace.h>
6#include <linux/types.h>
6 7
7extern int pm_trace_enabled; 8extern int pm_trace_enabled;
8 9
@@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void)
14struct device; 15struct device;
15extern void set_trace_device(struct device *); 16extern void set_trace_device(struct device *);
16extern void generate_resume_trace(const void *tracedata, unsigned int user); 17extern void generate_resume_trace(const void *tracedata, unsigned int user);
18extern int show_trace_dev_match(char *buf, size_t size);
17 19
18#define TRACE_DEVICE(dev) do { \ 20#define TRACE_DEVICE(dev) do { \
19 if (pm_trace_enabled) \ 21 if (pm_trace_enabled) \
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4af270ec2204..26697514c5ec 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
293extern bool events_check_enabled; 293extern bool events_check_enabled;
294 294
295extern bool pm_check_wakeup_events(void); 295extern bool pm_check_wakeup_events(void);
296extern bool pm_get_wakeup_count(unsigned long *count); 296extern bool pm_get_wakeup_count(unsigned int *count);
297extern bool pm_save_wakeup_count(unsigned long count); 297extern bool pm_save_wakeup_count(unsigned int count);
298#else /* !CONFIG_PM_SLEEP */ 298#else /* !CONFIG_PM_SLEEP */
299 299
300static inline int register_pm_notifier(struct notifier_block *nb) 300static inline int register_pm_notifier(struct notifier_block *nb)
@@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
308} 308}
309 309
310#define pm_notifier(fn, pri) do { (void)(fn); } while (0) 310#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
311
312static inline bool pm_check_wakeup_events(void) { return true; }
311#endif /* !CONFIG_PM_SLEEP */ 313#endif /* !CONFIG_PM_SLEEP */
312 314
313extern struct mutex pm_mutex; 315extern struct mutex pm_mutex;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 96eb576d82fd..30b881555fa5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj,
164 const struct attribute *attr, const char *group); 164 const struct attribute *attr, const char *group);
165void sysfs_remove_file_from_group(struct kobject *kobj, 165void sysfs_remove_file_from_group(struct kobject *kobj,
166 const struct attribute *attr, const char *group); 166 const struct attribute *attr, const char *group);
167int sysfs_merge_group(struct kobject *kobj,
168 const struct attribute_group *grp);
169void sysfs_unmerge_group(struct kobject *kobj,
170 const struct attribute_group *grp);
167 171
168void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); 172void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
169void sysfs_notify_dirent(struct sysfs_dirent *sd); 173void sysfs_notify_dirent(struct sysfs_dirent *sd);
@@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj,
302{ 306{
303} 307}
304 308
309static inline int sysfs_merge_group(struct kobject *kobj,
310 const struct attribute_group *grp)
311{
312 return 0;
313}
314
315static inline void sysfs_unmerge_group(struct kobject *kobj,
316 const struct attribute_group *grp)
317{
318}
319
305static inline void sysfs_notify(struct kobject *kobj, const char *dir, 320static inline void sysfs_notify(struct kobject *kobj, const char *dir,
306 const char *attr) 321 const char *attr)
307{ 322{
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ca6066a6952e..29bff6117abc 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -86,6 +86,7 @@ config PM_SLEEP_SMP
86 depends on SMP 86 depends on SMP
87 depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE 87 depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
88 depends on PM_SLEEP 88 depends on PM_SLEEP
89 select HOTPLUG
89 select HOTPLUG_CPU 90 select HOTPLUG_CPU
90 default y 91 default y
91 92
@@ -137,6 +138,8 @@ config SUSPEND_FREEZER
137config HIBERNATION 138config HIBERNATION
138 bool "Hibernation (aka 'suspend to disk')" 139 bool "Hibernation (aka 'suspend to disk')"
139 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 140 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
141 select LZO_COMPRESS
142 select LZO_DECOMPRESS
140 select SUSPEND_NVS if HAS_IOMEM 143 select SUSPEND_NVS if HAS_IOMEM
141 ---help--- 144 ---help---
142 Enable the suspend to disk (STD) functionality, which is usually 145 Enable the suspend to disk (STD) functionality, which is usually
@@ -242,3 +245,17 @@ config PM_OPS
242 bool 245 bool
243 depends on PM_SLEEP || PM_RUNTIME 246 depends on PM_SLEEP || PM_RUNTIME
244 default y 247 default y
248
249config PM_OPP
250 bool "Operating Performance Point (OPP) Layer library"
251 depends on PM
252 ---help---
253 SOCs have a standard set of tuples consisting of frequency and
254 voltage pairs that the device will support per voltage domain. This
255 is called Operating Performance Point or OPP. The actual definitions
256 of OPP varies over silicon within the same family of devices.
257
258 OPP layer organizes the data internally using device pointers
259 representing individual voltage domains and provides SOC
260 implementations a ready to use framework to manage OPPs.
261 For more information, read <file:Documentation/power/opp.txt>
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8dc31e02ae12..657272e91d0a 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -29,6 +29,7 @@
29#include "power.h" 29#include "power.h"
30 30
31 31
32static int nocompress = 0;
32static int noresume = 0; 33static int noresume = 0;
33static char resume_file[256] = CONFIG_PM_STD_PARTITION; 34static char resume_file[256] = CONFIG_PM_STD_PARTITION;
34dev_t swsusp_resume_device; 35dev_t swsusp_resume_device;
@@ -638,6 +639,8 @@ int hibernate(void)
638 639
639 if (hibernation_mode == HIBERNATION_PLATFORM) 640 if (hibernation_mode == HIBERNATION_PLATFORM)
640 flags |= SF_PLATFORM_MODE; 641 flags |= SF_PLATFORM_MODE;
642 if (nocompress)
643 flags |= SF_NOCOMPRESS_MODE;
641 pr_debug("PM: writing image.\n"); 644 pr_debug("PM: writing image.\n");
642 error = swsusp_write(flags); 645 error = swsusp_write(flags);
643 swsusp_free(); 646 swsusp_free();
@@ -705,7 +708,7 @@ static int software_resume(void)
705 goto Unlock; 708 goto Unlock;
706 } 709 }
707 710
708 pr_debug("PM: Checking image partition %s\n", resume_file); 711 pr_debug("PM: Checking hibernation image partition %s\n", resume_file);
709 712
710 /* Check if the device is there */ 713 /* Check if the device is there */
711 swsusp_resume_device = name_to_dev_t(resume_file); 714 swsusp_resume_device = name_to_dev_t(resume_file);
@@ -730,10 +733,10 @@ static int software_resume(void)
730 } 733 }
731 734
732 Check_image: 735 Check_image:
733 pr_debug("PM: Resume from partition %d:%d\n", 736 pr_debug("PM: Hibernation image partition %d:%d present\n",
734 MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); 737 MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
735 738
736 pr_debug("PM: Checking hibernation image.\n"); 739 pr_debug("PM: Looking for hibernation image.\n");
737 error = swsusp_check(); 740 error = swsusp_check();
738 if (error) 741 if (error)
739 goto Unlock; 742 goto Unlock;
@@ -765,14 +768,14 @@ static int software_resume(void)
765 goto Done; 768 goto Done;
766 } 769 }
767 770
768 pr_debug("PM: Reading hibernation image.\n"); 771 pr_debug("PM: Loading hibernation image.\n");
769 772
770 error = swsusp_read(&flags); 773 error = swsusp_read(&flags);
771 swsusp_close(FMODE_READ); 774 swsusp_close(FMODE_READ);
772 if (!error) 775 if (!error)
773 hibernation_restore(flags & SF_PLATFORM_MODE); 776 hibernation_restore(flags & SF_PLATFORM_MODE);
774 777
775 printk(KERN_ERR "PM: Restore failed, recovering.\n"); 778 printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
776 swsusp_free(); 779 swsusp_free();
777 thaw_processes(); 780 thaw_processes();
778 Done: 781 Done:
@@ -785,7 +788,7 @@ static int software_resume(void)
785 /* For success case, the suspend path will release the lock */ 788 /* For success case, the suspend path will release the lock */
786 Unlock: 789 Unlock:
787 mutex_unlock(&pm_mutex); 790 mutex_unlock(&pm_mutex);
788 pr_debug("PM: Resume from disk failed.\n"); 791 pr_debug("PM: Hibernation image not present or could not be loaded.\n");
789 return error; 792 return error;
790close_finish: 793close_finish:
791 swsusp_close(FMODE_READ); 794 swsusp_close(FMODE_READ);
@@ -1004,6 +1007,15 @@ static int __init resume_offset_setup(char *str)
1004 return 1; 1007 return 1;
1005} 1008}
1006 1009
1010static int __init hibernate_setup(char *str)
1011{
1012 if (!strncmp(str, "noresume", 8))
1013 noresume = 1;
1014 else if (!strncmp(str, "nocompress", 10))
1015 nocompress = 1;
1016 return 1;
1017}
1018
1007static int __init noresume_setup(char *str) 1019static int __init noresume_setup(char *str)
1008{ 1020{
1009 noresume = 1; 1021 noresume = 1;
@@ -1013,3 +1025,4 @@ static int __init noresume_setup(char *str)
1013__setup("noresume", noresume_setup); 1025__setup("noresume", noresume_setup);
1014__setup("resume_offset=", resume_offset_setup); 1026__setup("resume_offset=", resume_offset_setup);
1015__setup("resume=", resume_setup); 1027__setup("resume=", resume_setup);
1028__setup("hibernate=", hibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 62b0bc6e4983..7b5db6a8561e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -237,18 +237,18 @@ static ssize_t wakeup_count_show(struct kobject *kobj,
237 struct kobj_attribute *attr, 237 struct kobj_attribute *attr,
238 char *buf) 238 char *buf)
239{ 239{
240 unsigned long val; 240 unsigned int val;
241 241
242 return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR; 242 return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR;
243} 243}
244 244
245static ssize_t wakeup_count_store(struct kobject *kobj, 245static ssize_t wakeup_count_store(struct kobject *kobj,
246 struct kobj_attribute *attr, 246 struct kobj_attribute *attr,
247 const char *buf, size_t n) 247 const char *buf, size_t n)
248{ 248{
249 unsigned long val; 249 unsigned int val;
250 250
251 if (sscanf(buf, "%lu", &val) == 1) { 251 if (sscanf(buf, "%u", &val) == 1) {
252 if (pm_save_wakeup_count(val)) 252 if (pm_save_wakeup_count(val))
253 return n; 253 return n;
254 } 254 }
@@ -281,12 +281,30 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
281} 281}
282 282
283power_attr(pm_trace); 283power_attr(pm_trace);
284
285static ssize_t pm_trace_dev_match_show(struct kobject *kobj,
286 struct kobj_attribute *attr,
287 char *buf)
288{
289 return show_trace_dev_match(buf, PAGE_SIZE);
290}
291
292static ssize_t
293pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr,
294 const char *buf, size_t n)
295{
296 return -EINVAL;
297}
298
299power_attr(pm_trace_dev_match);
300
284#endif /* CONFIG_PM_TRACE */ 301#endif /* CONFIG_PM_TRACE */
285 302
286static struct attribute * g[] = { 303static struct attribute * g[] = {
287 &state_attr.attr, 304 &state_attr.attr,
288#ifdef CONFIG_PM_TRACE 305#ifdef CONFIG_PM_TRACE
289 &pm_trace_attr.attr, 306 &pm_trace_attr.attr,
307 &pm_trace_dev_match_attr.attr,
290#endif 308#endif
291#ifdef CONFIG_PM_SLEEP 309#ifdef CONFIG_PM_SLEEP
292 &pm_async_attr.attr, 310 &pm_async_attr.attr,
@@ -308,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq);
308 326
309static int __init pm_start_workqueue(void) 327static int __init pm_start_workqueue(void)
310{ 328{
311 pm_wq = create_freezeable_workqueue("pm"); 329 pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0);
312 330
313 return pm_wq ? 0 : -ENOMEM; 331 return pm_wq ? 0 : -ENOMEM;
314} 332}
@@ -321,6 +339,7 @@ static int __init pm_init(void)
321 int error = pm_start_workqueue(); 339 int error = pm_start_workqueue();
322 if (error) 340 if (error)
323 return error; 341 return error;
342 hibernate_image_size_init();
324 power_kobj = kobject_create_and_add("power", NULL); 343 power_kobj = kobject_create_and_add("power", NULL);
325 if (!power_kobj) 344 if (!power_kobj)
326 return -ENOMEM; 345 return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 006270fe382d..03634be55f62 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -14,6 +14,9 @@ struct swsusp_info {
14} __attribute__((aligned(PAGE_SIZE))); 14} __attribute__((aligned(PAGE_SIZE)));
15 15
16#ifdef CONFIG_HIBERNATION 16#ifdef CONFIG_HIBERNATION
17/* kernel/power/snapshot.c */
18extern void __init hibernate_image_size_init(void);
19
17#ifdef CONFIG_ARCH_HIBERNATION_HEADER 20#ifdef CONFIG_ARCH_HIBERNATION_HEADER
18/* Maximum size of architecture specific data in a hibernation header */ 21/* Maximum size of architecture specific data in a hibernation header */
19#define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4) 22#define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4)
@@ -49,7 +52,11 @@ static inline char *check_image_kernel(struct swsusp_info *info)
49extern int hibernation_snapshot(int platform_mode); 52extern int hibernation_snapshot(int platform_mode);
50extern int hibernation_restore(int platform_mode); 53extern int hibernation_restore(int platform_mode);
51extern int hibernation_platform_enter(void); 54extern int hibernation_platform_enter(void);
52#endif 55
56#else /* !CONFIG_HIBERNATION */
57
58static inline void hibernate_image_size_init(void) {}
59#endif /* !CONFIG_HIBERNATION */
53 60
54extern int pfn_is_nosave(unsigned long); 61extern int pfn_is_nosave(unsigned long);
55 62
@@ -134,6 +141,7 @@ extern int swsusp_swap_in_use(void);
134 * the image header. 141 * the image header.
135 */ 142 */
136#define SF_PLATFORM_MODE 1 143#define SF_PLATFORM_MODE 1
144#define SF_NOCOMPRESS_MODE 2
137 145
138/* kernel/power/hibernate.c */ 146/* kernel/power/hibernate.c */
139extern int swsusp_check(void); 147extern int swsusp_check(void);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 028a99598f49..e50b4c1b2a0f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -40,6 +40,7 @@ static int try_to_freeze_tasks(bool sig_only)
40 struct timeval start, end; 40 struct timeval start, end;
41 u64 elapsed_csecs64; 41 u64 elapsed_csecs64;
42 unsigned int elapsed_csecs; 42 unsigned int elapsed_csecs;
43 bool wakeup = false;
43 44
44 do_gettimeofday(&start); 45 do_gettimeofday(&start);
45 46
@@ -78,6 +79,11 @@ static int try_to_freeze_tasks(bool sig_only)
78 if (!todo || time_after(jiffies, end_time)) 79 if (!todo || time_after(jiffies, end_time))
79 break; 80 break;
80 81
82 if (!pm_check_wakeup_events()) {
83 wakeup = true;
84 break;
85 }
86
81 /* 87 /*
82 * We need to retry, but first give the freezing tasks some 88 * We need to retry, but first give the freezing tasks some
83 * time to enter the regrigerator. 89 * time to enter the regrigerator.
@@ -97,8 +103,9 @@ static int try_to_freeze_tasks(bool sig_only)
97 * but it cleans up leftover PF_FREEZE requests. 103 * but it cleans up leftover PF_FREEZE requests.
98 */ 104 */
99 printk("\n"); 105 printk("\n");
100 printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " 106 printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
101 "(%d tasks refusing to freeze, wq_busy=%d):\n", 107 "(%d tasks refusing to freeze, wq_busy=%d):\n",
108 wakeup ? "aborted" : "failed",
102 elapsed_csecs / 100, elapsed_csecs % 100, 109 elapsed_csecs / 100, elapsed_csecs % 100,
103 todo - wq_busy, wq_busy); 110 todo - wq_busy, wq_busy);
104 111
@@ -107,7 +114,7 @@ static int try_to_freeze_tasks(bool sig_only)
107 read_lock(&tasklist_lock); 114 read_lock(&tasklist_lock);
108 do_each_thread(g, p) { 115 do_each_thread(g, p) {
109 task_lock(p); 116 task_lock(p);
110 if (freezing(p) && !freezer_should_skip(p)) 117 if (!wakeup && freezing(p) && !freezer_should_skip(p))
111 sched_show_task(p); 118 sched_show_task(p);
112 cancel_freezing(p); 119 cancel_freezing(p);
113 task_unlock(p); 120 task_unlock(p);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d3f795f01bbc..ac7eb109f196 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -46,7 +46,12 @@ static void swsusp_unset_page_forbidden(struct page *);
46 * size will not exceed N bytes, but if that is impossible, it will 46 * size will not exceed N bytes, but if that is impossible, it will
47 * try to create the smallest image possible. 47 * try to create the smallest image possible.
48 */ 48 */
49unsigned long image_size = 500 * 1024 * 1024; 49unsigned long image_size;
50
51void __init hibernate_image_size_init(void)
52{
53 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
54}
50 55
51/* List of PBEs needed for restoring the pages that were allocated before 56/* List of PBEs needed for restoring the pages that were allocated before
52 * the suspend and included in the suspend image, but have also been 57 * the suspend and included in the suspend image, but have also been
@@ -1318,12 +1323,14 @@ int hibernate_preallocate_memory(void)
1318 1323
1319 /* Compute the maximum number of saveable pages to leave in memory. */ 1324 /* Compute the maximum number of saveable pages to leave in memory. */
1320 max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; 1325 max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
1326 /* Compute the desired number of image pages specified by image_size. */
1321 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1327 size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1322 if (size > max_size) 1328 if (size > max_size)
1323 size = max_size; 1329 size = max_size;
1324 /* 1330 /*
1325 * If the maximum is not less than the current number of saveable pages 1331 * If the desired number of image pages is at least as large as the
1326 * in memory, allocate page frames for the image and we're done. 1332 * current number of saveable pages in memory, allocate page frames for
1333 * the image and we're done.
1327 */ 1334 */
1328 if (size >= saveable) { 1335 if (size >= saveable) {
1329 pages = preallocate_image_highmem(save_highmem); 1336 pages = preallocate_image_highmem(save_highmem);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index e6a5bdf61a37..916eaa790399 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -24,10 +24,12 @@
24#include <linux/swapops.h> 24#include <linux/swapops.h>
25#include <linux/pm.h> 25#include <linux/pm.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/lzo.h>
28#include <linux/vmalloc.h>
27 29
28#include "power.h" 30#include "power.h"
29 31
30#define SWSUSP_SIG "S1SUSPEND" 32#define HIBERNATE_SIG "LINHIB0001"
31 33
32/* 34/*
33 * The swap map is a data structure used for keeping track of each page 35 * The swap map is a data structure used for keeping track of each page
@@ -193,7 +195,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
193 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || 195 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
194 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { 196 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
195 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); 197 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
196 memcpy(swsusp_header->sig,SWSUSP_SIG, 10); 198 memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
197 swsusp_header->image = handle->first_sector; 199 swsusp_header->image = handle->first_sector;
198 swsusp_header->flags = flags; 200 swsusp_header->flags = flags;
199 error = hib_bio_write_page(swsusp_resume_block, 201 error = hib_bio_write_page(swsusp_resume_block,
@@ -357,6 +359,18 @@ static int swap_writer_finish(struct swap_map_handle *handle,
357 return error; 359 return error;
358} 360}
359 361
362/* We need to remember how much compressed data we need to read. */
363#define LZO_HEADER sizeof(size_t)
364
365/* Number of pages/bytes we'll compress at one time. */
366#define LZO_UNC_PAGES 32
367#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE)
368
369/* Number of pages/bytes we need for compressed data (worst case). */
370#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
371 LZO_HEADER, PAGE_SIZE)
372#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
373
360/** 374/**
361 * save_image - save the suspend image data 375 * save_image - save the suspend image data
362 */ 376 */
@@ -404,6 +418,137 @@ static int save_image(struct swap_map_handle *handle,
404 return ret; 418 return ret;
405} 419}
406 420
421
422/**
423 * save_image_lzo - Save the suspend image data compressed with LZO.
424 * @handle: Swap mam handle to use for saving the image.
425 * @snapshot: Image to read data from.
426 * @nr_to_write: Number of pages to save.
427 */
428static int save_image_lzo(struct swap_map_handle *handle,
429 struct snapshot_handle *snapshot,
430 unsigned int nr_to_write)
431{
432 unsigned int m;
433 int ret = 0;
434 int nr_pages;
435 int err2;
436 struct bio *bio;
437 struct timeval start;
438 struct timeval stop;
439 size_t off, unc_len, cmp_len;
440 unsigned char *unc, *cmp, *wrk, *page;
441
442 page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
443 if (!page) {
444 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
445 return -ENOMEM;
446 }
447
448 wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
449 if (!wrk) {
450 printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
451 free_page((unsigned long)page);
452 return -ENOMEM;
453 }
454
455 unc = vmalloc(LZO_UNC_SIZE);
456 if (!unc) {
457 printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
458 vfree(wrk);
459 free_page((unsigned long)page);
460 return -ENOMEM;
461 }
462
463 cmp = vmalloc(LZO_CMP_SIZE);
464 if (!cmp) {
465 printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
466 vfree(unc);
467 vfree(wrk);
468 free_page((unsigned long)page);
469 return -ENOMEM;
470 }
471
472 printk(KERN_INFO
473 "PM: Compressing and saving image data (%u pages) ... ",
474 nr_to_write);
475 m = nr_to_write / 100;
476 if (!m)
477 m = 1;
478 nr_pages = 0;
479 bio = NULL;
480 do_gettimeofday(&start);
481 for (;;) {
482 for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
483 ret = snapshot_read_next(snapshot);
484 if (ret < 0)
485 goto out_finish;
486
487 if (!ret)
488 break;
489
490 memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
491
492 if (!(nr_pages % m))
493 printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
494 nr_pages++;
495 }
496
497 if (!off)
498 break;
499
500 unc_len = off;
501 ret = lzo1x_1_compress(unc, unc_len,
502 cmp + LZO_HEADER, &cmp_len, wrk);
503 if (ret < 0) {
504 printk(KERN_ERR "PM: LZO compression failed\n");
505 break;
506 }
507
508 if (unlikely(!cmp_len ||
509 cmp_len > lzo1x_worst_compress(unc_len))) {
510 printk(KERN_ERR "PM: Invalid LZO compressed length\n");
511 ret = -1;
512 break;
513 }
514
515 *(size_t *)cmp = cmp_len;
516
517 /*
518 * Given we are writing one page at a time to disk, we copy
519 * that much from the buffer, although the last bit will likely
520 * be smaller than full page. This is OK - we saved the length
521 * of the compressed data, so any garbage at the end will be
522 * discarded when we read it.
523 */
524 for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
525 memcpy(page, cmp + off, PAGE_SIZE);
526
527 ret = swap_write_page(handle, page, &bio);
528 if (ret)
529 goto out_finish;
530 }
531 }
532
533out_finish:
534 err2 = hib_wait_on_bio_chain(&bio);
535 do_gettimeofday(&stop);
536 if (!ret)
537 ret = err2;
538 if (!ret)
539 printk(KERN_CONT "\b\b\b\bdone\n");
540 else
541 printk(KERN_CONT "\n");
542 swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
543
544 vfree(cmp);
545 vfree(unc);
546 vfree(wrk);
547 free_page((unsigned long)page);
548
549 return ret;
550}
551
407/** 552/**
408 * enough_swap - Make sure we have enough swap to save the image. 553 * enough_swap - Make sure we have enough swap to save the image.
409 * 554 *
@@ -411,12 +556,16 @@ static int save_image(struct swap_map_handle *handle,
411 * space avaiable from the resume partition. 556 * space avaiable from the resume partition.
412 */ 557 */
413 558
414static int enough_swap(unsigned int nr_pages) 559static int enough_swap(unsigned int nr_pages, unsigned int flags)
415{ 560{
416 unsigned int free_swap = count_swap_pages(root_swap, 1); 561 unsigned int free_swap = count_swap_pages(root_swap, 1);
562 unsigned int required;
417 563
418 pr_debug("PM: Free swap pages: %u\n", free_swap); 564 pr_debug("PM: Free swap pages: %u\n", free_swap);
419 return free_swap > nr_pages + PAGES_FOR_IO; 565
566 required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
567 nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
568 return free_swap > required;
420} 569}
421 570
422/** 571/**
@@ -443,7 +592,7 @@ int swsusp_write(unsigned int flags)
443 printk(KERN_ERR "PM: Cannot get swap writer\n"); 592 printk(KERN_ERR "PM: Cannot get swap writer\n");
444 return error; 593 return error;
445 } 594 }
446 if (!enough_swap(pages)) { 595 if (!enough_swap(pages, flags)) {
447 printk(KERN_ERR "PM: Not enough free swap\n"); 596 printk(KERN_ERR "PM: Not enough free swap\n");
448 error = -ENOSPC; 597 error = -ENOSPC;
449 goto out_finish; 598 goto out_finish;
@@ -458,8 +607,11 @@ int swsusp_write(unsigned int flags)
458 } 607 }
459 header = (struct swsusp_info *)data_of(snapshot); 608 header = (struct swsusp_info *)data_of(snapshot);
460 error = swap_write_page(&handle, header, NULL); 609 error = swap_write_page(&handle, header, NULL);
461 if (!error) 610 if (!error) {
462 error = save_image(&handle, &snapshot, pages - 1); 611 error = (flags & SF_NOCOMPRESS_MODE) ?
612 save_image(&handle, &snapshot, pages - 1) :
613 save_image_lzo(&handle, &snapshot, pages - 1);
614 }
463out_finish: 615out_finish:
464 error = swap_writer_finish(&handle, flags, error); 616 error = swap_writer_finish(&handle, flags, error);
465 return error; 617 return error;
@@ -590,6 +742,127 @@ static int load_image(struct swap_map_handle *handle,
590} 742}
591 743
592/** 744/**
745 * load_image_lzo - Load compressed image data and decompress them with LZO.
746 * @handle: Swap map handle to use for loading data.
747 * @snapshot: Image to copy uncompressed data into.
748 * @nr_to_read: Number of pages to load.
749 */
750static int load_image_lzo(struct swap_map_handle *handle,
751 struct snapshot_handle *snapshot,
752 unsigned int nr_to_read)
753{
754 unsigned int m;
755 int error = 0;
756 struct timeval start;
757 struct timeval stop;
758 unsigned nr_pages;
759 size_t off, unc_len, cmp_len;
760 unsigned char *unc, *cmp, *page;
761
762 page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
763 if (!page) {
764 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
765 return -ENOMEM;
766 }
767
768 unc = vmalloc(LZO_UNC_SIZE);
769 if (!unc) {
770 printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
771 free_page((unsigned long)page);
772 return -ENOMEM;
773 }
774
775 cmp = vmalloc(LZO_CMP_SIZE);
776 if (!cmp) {
777 printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
778 vfree(unc);
779 free_page((unsigned long)page);
780 return -ENOMEM;
781 }
782
783 printk(KERN_INFO
784 "PM: Loading and decompressing image data (%u pages) ... ",
785 nr_to_read);
786 m = nr_to_read / 100;
787 if (!m)
788 m = 1;
789 nr_pages = 0;
790 do_gettimeofday(&start);
791
792 error = snapshot_write_next(snapshot);
793 if (error <= 0)
794 goto out_finish;
795
796 for (;;) {
797 error = swap_read_page(handle, page, NULL); /* sync */
798 if (error)
799 break;
800
801 cmp_len = *(size_t *)page;
802 if (unlikely(!cmp_len ||
803 cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
804 printk(KERN_ERR "PM: Invalid LZO compressed length\n");
805 error = -1;
806 break;
807 }
808
809 memcpy(cmp, page, PAGE_SIZE);
810 for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
811 error = swap_read_page(handle, page, NULL); /* sync */
812 if (error)
813 goto out_finish;
814
815 memcpy(cmp + off, page, PAGE_SIZE);
816 }
817
818 unc_len = LZO_UNC_SIZE;
819 error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
820 unc, &unc_len);
821 if (error < 0) {
822 printk(KERN_ERR "PM: LZO decompression failed\n");
823 break;
824 }
825
826 if (unlikely(!unc_len ||
827 unc_len > LZO_UNC_SIZE ||
828 unc_len & (PAGE_SIZE - 1))) {
829 printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
830 error = -1;
831 break;
832 }
833
834 for (off = 0; off < unc_len; off += PAGE_SIZE) {
835 memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
836
837 if (!(nr_pages % m))
838 printk("\b\b\b\b%3d%%", nr_pages / m);
839 nr_pages++;
840
841 error = snapshot_write_next(snapshot);
842 if (error <= 0)
843 goto out_finish;
844 }
845 }
846
847out_finish:
848 do_gettimeofday(&stop);
849 if (!error) {
850 printk("\b\b\b\bdone\n");
851 snapshot_write_finalize(snapshot);
852 if (!snapshot_image_loaded(snapshot))
853 error = -ENODATA;
854 } else
855 printk("\n");
856 swsusp_show_speed(&start, &stop, nr_to_read, "Read");
857
858 vfree(cmp);
859 vfree(unc);
860 free_page((unsigned long)page);
861
862 return error;
863}
864
865/**
593 * swsusp_read - read the hibernation image. 866 * swsusp_read - read the hibernation image.
594 * @flags_p: flags passed by the "frozen" kernel in the image header should 867 * @flags_p: flags passed by the "frozen" kernel in the image header should
595 * be written into this memeory location 868 * be written into this memeory location
@@ -612,8 +885,11 @@ int swsusp_read(unsigned int *flags_p)
612 goto end; 885 goto end;
613 if (!error) 886 if (!error)
614 error = swap_read_page(&handle, header, NULL); 887 error = swap_read_page(&handle, header, NULL);
615 if (!error) 888 if (!error) {
616 error = load_image(&handle, &snapshot, header->pages - 1); 889 error = (*flags_p & SF_NOCOMPRESS_MODE) ?
890 load_image(&handle, &snapshot, header->pages - 1) :
891 load_image_lzo(&handle, &snapshot, header->pages - 1);
892 }
617 swap_reader_finish(&handle); 893 swap_reader_finish(&handle);
618end: 894end:
619 if (!error) 895 if (!error)
@@ -640,7 +916,7 @@ int swsusp_check(void)
640 if (error) 916 if (error)
641 goto put; 917 goto put;
642 918
643 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { 919 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
644 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); 920 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
645 /* Reset swap signature now */ 921 /* Reset swap signature now */
646 error = hib_bio_write_page(swsusp_resume_block, 922 error = hib_bio_write_page(swsusp_resume_block,
@@ -653,13 +929,13 @@ put:
653 if (error) 929 if (error)
654 blkdev_put(hib_resume_bdev, FMODE_READ); 930 blkdev_put(hib_resume_bdev, FMODE_READ);
655 else 931 else
656 pr_debug("PM: Signature found, resuming\n"); 932 pr_debug("PM: Image signature found, resuming\n");
657 } else { 933 } else {
658 error = PTR_ERR(hib_resume_bdev); 934 error = PTR_ERR(hib_resume_bdev);
659 } 935 }
660 936
661 if (error) 937 if (error)
662 pr_debug("PM: Error %d checking image file\n", error); 938 pr_debug("PM: Image not found (code %d)\n", error);
663 939
664 return error; 940 return error;
665} 941}