aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rjw@sisk.pl>2010-07-05 16:43:53 -0400
committerRafael J. Wysocki <rjw@sisk.pl>2010-07-18 19:58:48 -0400
commitc125e96f044427f38d106fab7bc5e4a5e6a18262 (patch)
treed9bbd40cc933fe522dbdf8ca2f7edf7b6f2f7ca4
parentb14e033e17d0ea0ba12668d0d2f371cd31586994 (diff)
PM: Make it possible to avoid races between wakeup and system sleep
One of the arguments during the suspend blockers discussion was that the mainline kernel didn't contain any mechanisms making it possible to avoid races between wakeup and system suspend. Generally, there are two problems in that area. First, if a wakeup event occurs exactly when /sys/power/state is being written to, it may be delivered to user space right before the freezer kicks in, so the user space consumer of the event may not be able to process it before the system is suspended. Second, if a wakeup event occurs after user space has been frozen, it is not generally guaranteed that the ongoing transition of the system into a sleep state will be aborted. To address these issues introduce a new global sysfs attribute, /sys/power/wakeup_count, associated with a running counter of wakeup events and three helper functions, pm_stay_awake(), pm_relax(), and pm_wakeup_event(), that may be used by kernel subsystems to control the behavior of this attribute and to request the PM core to abort system transitions into a sleep state already in progress. The /sys/power/wakeup_count file may be read from or written to by user space. Reads will always succeed (unless interrupted by a signal) and return the current value of the wakeup events counter. Writes, however, will only succeed if the written number is equal to the current value of the wakeup events counter. If a write is successful, it will cause the kernel to save the current value of the wakeup events counter and to abort the subsequent system transition into a sleep state if any wakeup events are reported after the write has returned. [The assumption is that before writing to /sys/power/state user space will first read from /sys/power/wakeup_count. Next, user space consumers of wakeup events will have a chance to acknowledge or veto the upcoming system transition to a sleep state. Finally, if the transition is allowed to proceed, /sys/power/wakeup_count will be written to and if that succeeds, /sys/power/state will be written to as well. Still, if any wakeup events are reported to the PM core by kernel subsystems after that point, the transition will be aborted.] Additionally, put a wakeup events counter into struct dev_pm_info and make these per-device wakeup event counters available via sysfs, so that it's possible to check the activity of various wakeup event sources within the kernel. To illustrate how subsystems can use pm_wakeup_event(), make the low-level PCI runtime PM wakeup-handling code use it. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org> Acked-by: Greg Kroah-Hartman <gregkh@suse.de> Acked-by: markgross <markgross@thegnar.org> Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
-rw-r--r--Documentation/ABI/testing/sysfs-power15
-rw-r--r--drivers/base/power/Makefile2
-rw-r--r--drivers/base/power/main.c1
-rw-r--r--drivers/base/power/sysfs.c15
-rw-r--r--drivers/base/power/wakeup.c229
-rw-r--r--drivers/pci/pci-acpi.c1
-rw-r--r--drivers/pci/pci.c20
-rw-r--r--drivers/pci/pci.h1
-rw-r--r--drivers/pci/pcie/pme/pcie_pme.c5
-rw-r--r--include/linux/pm.h10
-rw-r--r--include/linux/suspend.h7
-rw-r--r--kernel/power/hibernate.c20
-rw-r--r--kernel/power/main.c55
-rw-r--r--kernel/power/suspend.c4
14 files changed, 375 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d6a801f45b48..2875f1f74a07 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -114,3 +114,18 @@ Description:
114 if this file contains "1", which is the default. It may be 114 if this file contains "1", which is the default. It may be
115 disabled by writing "0" to this file, in which case all devices 115 disabled by writing "0" to this file, in which case all devices
116 will be suspended and resumed synchronously. 116 will be suspended and resumed synchronously.
117
118What: /sys/power/wakeup_count
119Date: July 2010
120Contact: Rafael J. Wysocki <rjw@sisk.pl>
121Description:
122 The /sys/power/wakeup_count file allows user space to put the
123 system into a sleep state while taking into account the
124 concurrent arrival of wakeup events. Reading from it returns
125 the current number of registered wakeup events and it blocks if
126 some wakeup events are being processed at the time the file is
127 read from. Writing to it will only succeed if the current
128 number of wakeup events is equal to the written value and, if
129 successful, will make the kernel abort a subsequent transition
130 to a sleep state if any wakeup events are reported after the
131 write has returned.
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 89de75325cea..cbccf9a3cee4 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,5 +1,5 @@
1obj-$(CONFIG_PM) += sysfs.o 1obj-$(CONFIG_PM) += sysfs.o
2obj-$(CONFIG_PM_SLEEP) += main.o 2obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o
3obj-$(CONFIG_PM_RUNTIME) += runtime.o 3obj-$(CONFIG_PM_RUNTIME) += runtime.o
4obj-$(CONFIG_PM_OPS) += generic_ops.o 4obj-$(CONFIG_PM_OPS) += generic_ops.o
5obj-$(CONFIG_PM_TRACE_RTC) += trace.o 5obj-$(CONFIG_PM_TRACE_RTC) += trace.o
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 941fcb87e52a..5419a49ff135 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -59,6 +59,7 @@ void device_pm_init(struct device *dev)
59{ 59{
60 dev->power.status = DPM_ON; 60 dev->power.status = DPM_ON;
61 init_completion(&dev->power.completion); 61 init_completion(&dev->power.completion);
62 dev->power.wakeup_count = 0;
62 pm_runtime_init(dev); 63 pm_runtime_init(dev);
63} 64}
64 65
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index a4c33bc51257..81d344e0e95d 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -73,6 +73,8 @@
73 * device are known to the PM core. However, for some devices this 73 * device are known to the PM core. However, for some devices this
74 * attribute is set to "enabled" by bus type code or device drivers and in 74 * attribute is set to "enabled" by bus type code or device drivers and in
75 * that cases it should be safe to leave the default value. 75 * that cases it should be safe to leave the default value.
76 *
77 * wakeup_count - Report the number of wakeup events related to the device
76 */ 78 */
77 79
78static const char enabled[] = "enabled"; 80static const char enabled[] = "enabled";
@@ -144,6 +146,16 @@ wake_store(struct device * dev, struct device_attribute *attr,
144 146
145static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); 147static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
146 148
149#ifdef CONFIG_PM_SLEEP
150static ssize_t wakeup_count_show(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 return sprintf(buf, "%lu\n", dev->power.wakeup_count);
154}
155
156static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
157#endif
158
147#ifdef CONFIG_PM_ADVANCED_DEBUG 159#ifdef CONFIG_PM_ADVANCED_DEBUG
148#ifdef CONFIG_PM_RUNTIME 160#ifdef CONFIG_PM_RUNTIME
149 161
@@ -230,6 +242,9 @@ static struct attribute * power_attrs[] = {
230 &dev_attr_control.attr, 242 &dev_attr_control.attr,
231#endif 243#endif
232 &dev_attr_wakeup.attr, 244 &dev_attr_wakeup.attr,
245#ifdef CONFIG_PM_SLEEP
246 &dev_attr_wakeup_count.attr,
247#endif
233#ifdef CONFIG_PM_ADVANCED_DEBUG 248#ifdef CONFIG_PM_ADVANCED_DEBUG
234 &dev_attr_async.attr, 249 &dev_attr_async.attr,
235#ifdef CONFIG_PM_RUNTIME 250#ifdef CONFIG_PM_RUNTIME
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
new file mode 100644
index 000000000000..25599077c39c
--- /dev/null
+++ b/drivers/base/power/wakeup.c
@@ -0,0 +1,229 @@
1/*
2 * drivers/base/power/wakeup.c - System wakeup events framework
3 *
4 * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#include <linux/device.h>
10#include <linux/slab.h>
11#include <linux/sched.h>
12#include <linux/capability.h>
13#include <linux/suspend.h>
14#include <linux/pm.h>
15
16/*
17 * If set, the suspend/hibernate code will abort transitions to a sleep state
18 * if wakeup events are registered during or immediately before the transition.
19 */
20bool events_check_enabled;
21
22/* The counter of registered wakeup events. */
23static unsigned long event_count;
24/* A preserved old value of event_count. */
25static unsigned long saved_event_count;
26/* The counter of wakeup events being processed. */
27static unsigned long events_in_progress;
28
29static DEFINE_SPINLOCK(events_lock);
30
31/*
32 * The functions below use the observation that each wakeup event starts a
33 * period in which the system should not be suspended. The moment this period
34 * will end depends on how the wakeup event is going to be processed after being
35 * detected and all of the possible cases can be divided into two distinct
36 * groups.
37 *
38 * First, a wakeup event may be detected by the same functional unit that will
39 * carry out the entire processing of it and possibly will pass it to user space
40 * for further processing. In that case the functional unit that has detected
41 * the event may later "close" the "no suspend" period associated with it
42 * directly as soon as it has been dealt with. The pair of pm_stay_awake() and
43 * pm_relax(), balanced with each other, is supposed to be used in such
44 * situations.
45 *
46 * Second, a wakeup event may be detected by one functional unit and processed
47 * by another one. In that case the unit that has detected it cannot really
48 * "close" the "no suspend" period associated with it, unless it knows in
49 * advance what's going to happen to the event during processing. This
50 * knowledge, however, may not be available to it, so it can simply specify time
51 * to wait before the system can be suspended and pass it as the second
52 * argument of pm_wakeup_event().
53 */
54
55/**
56 * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
57 * @dev: Device the wakeup event is related to.
58 *
59 * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the
60 * counter of wakeup events being processed. If @dev is not NULL, the counter
61 * of wakeup events related to @dev is incremented too.
62 *
63 * Call this function after detecting of a wakeup event if pm_relax() is going
64 * to be called directly after processing the event (and possibly passing it to
65 * user space for further processing).
66 *
67 * It is safe to call this function from interrupt context.
68 */
69void pm_stay_awake(struct device *dev)
70{
71 unsigned long flags;
72
73 spin_lock_irqsave(&events_lock, flags);
74 if (dev)
75 dev->power.wakeup_count++;
76
77 events_in_progress++;
78 spin_unlock_irqrestore(&events_lock, flags);
79}
80
81/**
82 * pm_relax - Notify the PM core that processing of a wakeup event has ended.
83 *
84 * Notify the PM core that a wakeup event has been processed by decrementing
85 * the counter of wakeup events being processed and incrementing the counter
86 * of registered wakeup events.
87 *
88 * Call this function for wakeup events whose processing started with calling
89 * pm_stay_awake().
90 *
91 * It is safe to call it from interrupt context.
92 */
93void pm_relax(void)
94{
95 unsigned long flags;
96
97 spin_lock_irqsave(&events_lock, flags);
98 if (events_in_progress) {
99 events_in_progress--;
100 event_count++;
101 }
102 spin_unlock_irqrestore(&events_lock, flags);
103}
104
105/**
106 * pm_wakeup_work_fn - Deferred closing of a wakeup event.
107 *
108 * Execute pm_relax() for a wakeup event detected in the past and free the
109 * work item object used for queuing up the work.
110 */
111static void pm_wakeup_work_fn(struct work_struct *work)
112{
113 struct delayed_work *dwork = to_delayed_work(work);
114
115 pm_relax();
116 kfree(dwork);
117}
118
119/**
120 * pm_wakeup_event - Notify the PM core of a wakeup event.
121 * @dev: Device the wakeup event is related to.
122 * @msec: Anticipated event processing time (in milliseconds).
123 *
124 * Notify the PM core of a wakeup event (signaled by @dev) that will take
125 * approximately @msec milliseconds to be processed by the kernel. Increment
126 * the counter of wakeup events being processed and queue up a work item
127 * that will execute pm_relax() for the event after @msec milliseconds. If @dev
128 * is not NULL, the counter of wakeup events related to @dev is incremented too.
129 *
130 * It is safe to call this function from interrupt context.
131 */
132void pm_wakeup_event(struct device *dev, unsigned int msec)
133{
134 unsigned long flags;
135 struct delayed_work *dwork;
136
137 dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL;
138
139 spin_lock_irqsave(&events_lock, flags);
140 if (dev)
141 dev->power.wakeup_count++;
142
143 if (dwork) {
144 INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn);
145 schedule_delayed_work(dwork, msecs_to_jiffies(msec));
146
147 events_in_progress++;
148 } else {
149 event_count++;
150 }
151 spin_unlock_irqrestore(&events_lock, flags);
152}
153
154/**
155 * pm_check_wakeup_events - Check for new wakeup events.
156 *
157 * Compare the current number of registered wakeup events with its preserved
158 * value from the past to check if new wakeup events have been registered since
159 * the old value was stored. Check if the current number of wakeup events being
160 * processed is zero.
161 */
162bool pm_check_wakeup_events(void)
163{
164 unsigned long flags;
165 bool ret = true;
166
167 spin_lock_irqsave(&events_lock, flags);
168 if (events_check_enabled) {
169 ret = (event_count == saved_event_count) && !events_in_progress;
170 events_check_enabled = ret;
171 }
172 spin_unlock_irqrestore(&events_lock, flags);
173 return ret;
174}
175
176/**
177 * pm_get_wakeup_count - Read the number of registered wakeup events.
178 * @count: Address to store the value at.
179 *
180 * Store the number of registered wakeup events at the address in @count. Block
181 * if the current number of wakeup events being processed is nonzero.
182 *
183 * Return false if the wait for the number of wakeup events being processed to
184 * drop down to zero has been interrupted by a signal (and the current number
185 * of wakeup events being processed is still nonzero). Otherwise return true.
186 */
187bool pm_get_wakeup_count(unsigned long *count)
188{
189 bool ret;
190
191 spin_lock_irq(&events_lock);
192 if (capable(CAP_SYS_ADMIN))
193 events_check_enabled = false;
194
195 while (events_in_progress && !signal_pending(current)) {
196 spin_unlock_irq(&events_lock);
197
198 schedule_timeout_interruptible(msecs_to_jiffies(100));
199
200 spin_lock_irq(&events_lock);
201 }
202 *count = event_count;
203 ret = !events_in_progress;
204 spin_unlock_irq(&events_lock);
205 return ret;
206}
207
208/**
209 * pm_save_wakeup_count - Save the current number of registered wakeup events.
210 * @count: Value to compare with the current number of registered wakeup events.
211 *
212 * If @count is equal to the current number of registered wakeup events and the
213 * current number of wakeup events being processed is zero, store @count as the
214 * old number of registered wakeup events to be used by pm_check_wakeup_events()
215 * and return true. Otherwise return false.
216 */
217bool pm_save_wakeup_count(unsigned long count)
218{
219 bool ret = false;
220
221 spin_lock_irq(&events_lock);
222 if (count == event_count && !events_in_progress) {
223 saved_event_count = count;
224 events_check_enabled = true;
225 ret = true;
226 }
227 spin_unlock_irq(&events_lock);
228 return ret;
229}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2e7a3bf13824..1ab98bbe58dd 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -48,6 +48,7 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
48 if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { 48 if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
49 pci_check_pme_status(pci_dev); 49 pci_check_pme_status(pci_dev);
50 pm_runtime_resume(&pci_dev->dev); 50 pm_runtime_resume(&pci_dev->dev);
51 pci_wakeup_event(pci_dev);
51 if (pci_dev->subordinate) 52 if (pci_dev->subordinate)
52 pci_pme_wakeup_bus(pci_dev->subordinate); 53 pci_pme_wakeup_bus(pci_dev->subordinate);
53 } 54 }
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 740fb4ea9669..130ed1daf0f8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1275,6 +1275,22 @@ bool pci_check_pme_status(struct pci_dev *dev)
1275 return ret; 1275 return ret;
1276} 1276}
1277 1277
1278/*
1279 * Time to wait before the system can be put into a sleep state after reporting
1280 * a wakeup event signaled by a PCI device.
1281 */
1282#define PCI_WAKEUP_COOLDOWN 100
1283
1284/**
1285 * pci_wakeup_event - Report a wakeup event related to a given PCI device.
1286 * @dev: Device to report the wakeup event for.
1287 */
1288void pci_wakeup_event(struct pci_dev *dev)
1289{
1290 if (device_may_wakeup(&dev->dev))
1291 pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN);
1292}
1293
1278/** 1294/**
1279 * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set. 1295 * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
1280 * @dev: Device to handle. 1296 * @dev: Device to handle.
@@ -1285,8 +1301,10 @@ bool pci_check_pme_status(struct pci_dev *dev)
1285 */ 1301 */
1286static int pci_pme_wakeup(struct pci_dev *dev, void *ign) 1302static int pci_pme_wakeup(struct pci_dev *dev, void *ign)
1287{ 1303{
1288 if (pci_check_pme_status(dev)) 1304 if (pci_check_pme_status(dev)) {
1289 pm_request_resume(&dev->dev); 1305 pm_request_resume(&dev->dev);
1306 pci_wakeup_event(dev);
1307 }
1290 return 0; 1308 return 0;
1291} 1309}
1292 1310
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f8077b3c8c8c..c8b7fd056ccd 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -56,6 +56,7 @@ extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
56extern void pci_disable_enabled_device(struct pci_dev *dev); 56extern void pci_disable_enabled_device(struct pci_dev *dev);
57extern bool pci_check_pme_status(struct pci_dev *dev); 57extern bool pci_check_pme_status(struct pci_dev *dev);
58extern int pci_finish_runtime_suspend(struct pci_dev *dev); 58extern int pci_finish_runtime_suspend(struct pci_dev *dev);
59extern void pci_wakeup_event(struct pci_dev *dev);
59extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); 60extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
60extern void pci_pme_wakeup_bus(struct pci_bus *bus); 61extern void pci_pme_wakeup_bus(struct pci_bus *bus);
61extern void pci_pm_init(struct pci_dev *dev); 62extern void pci_pm_init(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c
index d672a0a63816..bbdea18693d9 100644
--- a/drivers/pci/pcie/pme/pcie_pme.c
+++ b/drivers/pci/pcie/pme/pcie_pme.c
@@ -154,6 +154,7 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus)
154 /* Skip PCIe devices in case we started from a root port. */ 154 /* Skip PCIe devices in case we started from a root port. */
155 if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { 155 if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) {
156 pm_request_resume(&dev->dev); 156 pm_request_resume(&dev->dev);
157 pci_wakeup_event(dev);
157 ret = true; 158 ret = true;
158 } 159 }
159 160
@@ -254,8 +255,10 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
254 if (found) { 255 if (found) {
255 /* The device is there, but we have to check its PME status. */ 256 /* The device is there, but we have to check its PME status. */
256 found = pci_check_pme_status(dev); 257 found = pci_check_pme_status(dev);
257 if (found) 258 if (found) {
258 pm_request_resume(&dev->dev); 259 pm_request_resume(&dev->dev);
260 pci_wakeup_event(dev);
261 }
259 pci_dev_put(dev); 262 pci_dev_put(dev);
260 } else if (devfn) { 263 } else if (devfn) {
261 /* 264 /*
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 8e258c727971..b417fc46f3fc 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -457,6 +457,7 @@ struct dev_pm_info {
457#ifdef CONFIG_PM_SLEEP 457#ifdef CONFIG_PM_SLEEP
458 struct list_head entry; 458 struct list_head entry;
459 struct completion completion; 459 struct completion completion;
460 unsigned long wakeup_count;
460#endif 461#endif
461#ifdef CONFIG_PM_RUNTIME 462#ifdef CONFIG_PM_RUNTIME
462 struct timer_list suspend_timer; 463 struct timer_list suspend_timer;
@@ -552,6 +553,11 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
552 } while (0) 553 } while (0)
553 554
554extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); 555extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
556
557/* drivers/base/power/wakeup.c */
558extern void pm_wakeup_event(struct device *dev, unsigned int msec);
559extern void pm_stay_awake(struct device *dev);
560extern void pm_relax(void);
555#else /* !CONFIG_PM_SLEEP */ 561#else /* !CONFIG_PM_SLEEP */
556 562
557#define device_pm_lock() do {} while (0) 563#define device_pm_lock() do {} while (0)
@@ -565,6 +571,10 @@ static inline int dpm_suspend_start(pm_message_t state)
565#define suspend_report_result(fn, ret) do {} while (0) 571#define suspend_report_result(fn, ret) do {} while (0)
566 572
567static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} 573static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
574
575static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
576static inline void pm_stay_awake(struct device *dev) {}
577static inline void pm_relax(void) {}
568#endif /* !CONFIG_PM_SLEEP */ 578#endif /* !CONFIG_PM_SLEEP */
569 579
570/* How to reorder dpm_list after device_move() */ 580/* How to reorder dpm_list after device_move() */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index bc7d6bb4cd8e..bf1bab7b059c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -286,6 +286,13 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
286 { .notifier_call = fn, .priority = pri }; \ 286 { .notifier_call = fn, .priority = pri }; \
287 register_pm_notifier(&fn##_nb); \ 287 register_pm_notifier(&fn##_nb); \
288} 288}
289
290/* drivers/base/power/wakeup.c */
291extern bool events_check_enabled;
292
293extern bool pm_check_wakeup_events(void);
294extern bool pm_get_wakeup_count(unsigned long *count);
295extern bool pm_save_wakeup_count(unsigned long count);
289#else /* !CONFIG_PM_SLEEP */ 296#else /* !CONFIG_PM_SLEEP */
290 297
291static inline int register_pm_notifier(struct notifier_block *nb) 298static inline int register_pm_notifier(struct notifier_block *nb)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa9e916da4d5..f61202916631 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -277,7 +277,7 @@ static int create_image(int platform_mode)
277 goto Enable_irqs; 277 goto Enable_irqs;
278 } 278 }
279 279
280 if (hibernation_test(TEST_CORE)) 280 if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
281 goto Power_up; 281 goto Power_up;
282 282
283 in_suspend = 1; 283 in_suspend = 1;
@@ -288,8 +288,10 @@ static int create_image(int platform_mode)
288 error); 288 error);
289 /* Restore control flow magically appears here */ 289 /* Restore control flow magically appears here */
290 restore_processor_state(); 290 restore_processor_state();
291 if (!in_suspend) 291 if (!in_suspend) {
292 events_check_enabled = false;
292 platform_leave(platform_mode); 293 platform_leave(platform_mode);
294 }
293 295
294 Power_up: 296 Power_up:
295 sysdev_resume(); 297 sysdev_resume();
@@ -511,14 +513,20 @@ int hibernation_platform_enter(void)
511 513
512 local_irq_disable(); 514 local_irq_disable();
513 sysdev_suspend(PMSG_HIBERNATE); 515 sysdev_suspend(PMSG_HIBERNATE);
516 if (!pm_check_wakeup_events()) {
517 error = -EAGAIN;
518 goto Power_up;
519 }
520
514 hibernation_ops->enter(); 521 hibernation_ops->enter();
515 /* We should never get here */ 522 /* We should never get here */
516 while (1); 523 while (1);
517 524
518 /* 525 Power_up:
519 * We don't need to reenable the nonboot CPUs or resume consoles, since 526 sysdev_resume();
520 * the system is going to be halted anyway. 527 local_irq_enable();
521 */ 528 enable_nonboot_cpus();
529
522 Platform_finish: 530 Platform_finish:
523 hibernation_ops->finish(); 531 hibernation_ops->finish();
524 532
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b58800b21fc0..62b0bc6e4983 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
204 204
205power_attr(state); 205power_attr(state);
206 206
207#ifdef CONFIG_PM_SLEEP
208/*
209 * The 'wakeup_count' attribute, along with the functions defined in
210 * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
211 * handled in a non-racy way.
212 *
213 * If a wakeup event occurs when the system is in a sleep state, it simply is
214 * woken up. In turn, if an event that would wake the system up from a sleep
215 * state occurs when it is undergoing a transition to that sleep state, the
216 * transition should be aborted. Moreover, if such an event occurs when the
217 * system is in the working state, an attempt to start a transition to the
218 * given sleep state should fail during certain period after the detection of
219 * the event. Using the 'state' attribute alone is not sufficient to satisfy
220 * these requirements, because a wakeup event may occur exactly when 'state'
221 * is being written to and may be delivered to user space right before it is
222 * frozen, so the event will remain only partially processed until the system is
223 * woken up by another event. In particular, it won't cause the transition to
224 * a sleep state to be aborted.
225 *
226 * This difficulty may be overcome if user space uses 'wakeup_count' before
227 * writing to 'state'. It first should read from 'wakeup_count' and store
228 * the read value. Then, after carrying out its own preparations for the system
229 * transition to a sleep state, it should write the stored value to
230 * 'wakeup_count'. If that fails, at least one wakeup event has occured since
231 * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it
232 * is allowed to write to 'state', but the transition will be aborted if there
233 * are any wakeup events detected after 'wakeup_count' was written to.
234 */
235
236static ssize_t wakeup_count_show(struct kobject *kobj,
237 struct kobj_attribute *attr,
238 char *buf)
239{
240 unsigned long val;
241
242 return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
243}
244
245static ssize_t wakeup_count_store(struct kobject *kobj,
246 struct kobj_attribute *attr,
247 const char *buf, size_t n)
248{
249 unsigned long val;
250
251 if (sscanf(buf, "%lu", &val) == 1) {
252 if (pm_save_wakeup_count(val))
253 return n;
254 }
255 return -EINVAL;
256}
257
258power_attr(wakeup_count);
259#endif /* CONFIG_PM_SLEEP */
260
207#ifdef CONFIG_PM_TRACE 261#ifdef CONFIG_PM_TRACE
208int pm_trace_enabled; 262int pm_trace_enabled;
209 263
@@ -236,6 +290,7 @@ static struct attribute * g[] = {
236#endif 290#endif
237#ifdef CONFIG_PM_SLEEP 291#ifdef CONFIG_PM_SLEEP
238 &pm_async_attr.attr, 292 &pm_async_attr.attr,
293 &wakeup_count_attr.attr,
239#ifdef CONFIG_PM_DEBUG 294#ifdef CONFIG_PM_DEBUG
240 &pm_test_attr.attr, 295 &pm_test_attr.attr,
241#endif 296#endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index f37cb7dd4402..5f8d09f94325 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state)
163 163
164 error = sysdev_suspend(PMSG_SUSPEND); 164 error = sysdev_suspend(PMSG_SUSPEND);
165 if (!error) { 165 if (!error) {
166 if (!suspend_test(TEST_CORE)) 166 if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
167 error = suspend_ops->enter(state); 167 error = suspend_ops->enter(state);
168 events_check_enabled = false;
169 }
168 sysdev_resume(); 170 sysdev_resume();
169 } 171 }
170 172