diff options
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 74 | ||||
-rw-r--r-- | kernel/power/Makefile | 11 | ||||
-rw-r--r-- | kernel/power/console.c | 58 | ||||
-rw-r--r-- | kernel/power/disk.c | 431 | ||||
-rw-r--r-- | kernel/power/main.c | 269 | ||||
-rw-r--r-- | kernel/power/pm.c | 265 | ||||
-rw-r--r-- | kernel/power/power.h | 52 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 45 | ||||
-rw-r--r-- | kernel/power/process.c | 121 | ||||
-rw-r--r-- | kernel/power/smp.c | 85 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 1433 |
11 files changed, 2844 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 000000000000..696387ffe49c --- /dev/null +++ b/kernel/power/Kconfig | |||
@@ -0,0 +1,74 @@ | |||
1 | config PM | ||
2 | bool "Power Management support" | ||
3 | ---help--- | ||
4 | "Power Management" means that parts of your computer are shut | ||
5 | off or put into a power conserving "sleep" mode if they are not | ||
6 | being used. There are two competing standards for doing this: APM | ||
7 | and ACPI. If you want to use either one, say Y here and then also | ||
8 | to the requisite support below. | ||
9 | |||
10 | Power Management is most important for battery powered laptop | ||
11 | computers; if you have a laptop, check out the Linux Laptop home | ||
12 | page on the WWW at <http://www.linux-on-laptops.com/> or | ||
13 | Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/> | ||
14 | and the Battery Powered Linux mini-HOWTO, available from | ||
15 | <http://www.tldp.org/docs.html#howto>. | ||
16 | |||
17 | Note that, even if you say N here, Linux on the x86 architecture | ||
18 | will issue the hlt instruction if nothing is to be done, thereby | ||
19 | sending the processor to sleep and saving power. | ||
20 | |||
21 | config PM_DEBUG | ||
22 | bool "Power Management Debug Support" | ||
23 | depends on PM | ||
24 | ---help--- | ||
25 | This option enables verbose debugging support in the Power Management | ||
26 | code. This is helpful when debugging and reporting various PM bugs, | ||
27 | like suspend support. | ||
28 | |||
29 | config SOFTWARE_SUSPEND | ||
30 | bool "Software Suspend (EXPERIMENTAL)" | ||
31 | depends on EXPERIMENTAL && PM && SWAP | ||
32 | ---help--- | ||
33 | Enable the possibility of suspending the machine. | ||
34 | It doesn't need APM. | ||
35 | You may suspend your machine by 'swsusp' or 'shutdown -z <time>' | ||
36 | (patch for sysvinit needed). | ||
37 | |||
38 | It creates an image which is saved in your active swap. Upon next | ||
39 | boot, pass the 'resume=/dev/swappartition' argument to the kernel to | ||
40 | have it detect the saved image, restore memory state from it, and | ||
41 | continue to run as before. If you do not want the previous state to | ||
42 | be reloaded, then use the 'noresume' kernel argument. However, note | ||
43 | that your partitions will be fsck'd and you must re-mkswap your swap | ||
44 | partitions. It does not work with swap files. | ||
45 | |||
46 | Right now you may boot without resuming and then later resume but | ||
47 | in meantime you cannot use those swap partitions/files which were | ||
48 | involved in suspending. Also in this case there is a risk that buffers | ||
49 | on disk won't match with saved ones. | ||
50 | |||
51 | For more information take a look at <file:Documentation/power/swsusp.txt>. | ||
52 | |||
53 | config PM_STD_PARTITION | ||
54 | string "Default resume partition" | ||
55 | depends on SOFTWARE_SUSPEND | ||
56 | default "" | ||
57 | ---help--- | ||
58 | The default resume partition is the partition that the suspend- | ||
59 | to-disk implementation will look for a suspended disk image. | ||
60 | |||
61 | The partition specified here will be different for almost every user. | ||
62 | It should be a valid swap partition (at least for now) that is turned | ||
63 | on before suspending. | ||
64 | |||
65 | The partition specified can be overridden by specifying: | ||
66 | |||
67 | resume=/dev/<other device> | ||
68 | |||
69 | which will set the resume partition to the device specified. | ||
70 | |||
71 | Note there is currently not a way to specify which device to save the | ||
72 | suspended image to. It will simply pick the first available swap | ||
73 | device. | ||
74 | |||
diff --git a/kernel/power/Makefile b/kernel/power/Makefile new file mode 100644 index 000000000000..fbdc634135a7 --- /dev/null +++ b/kernel/power/Makefile | |||
@@ -0,0 +1,11 @@ | |||
1 | |||
2 | ifeq ($(CONFIG_PM_DEBUG),y) | ||
3 | EXTRA_CFLAGS += -DDEBUG | ||
4 | endif | ||
5 | |||
6 | swsusp-smp-$(CONFIG_SMP) += smp.o | ||
7 | |||
8 | obj-y := main.o process.o console.o pm.o | ||
9 | obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o | ||
10 | |||
11 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | ||
diff --git a/kernel/power/console.c b/kernel/power/console.c new file mode 100644 index 000000000000..7ff375e7c95f --- /dev/null +++ b/kernel/power/console.c | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * drivers/power/process.c - Functions for saving/restoring console. | ||
3 | * | ||
4 | * Originally from swsusp. | ||
5 | */ | ||
6 | |||
7 | #include <linux/vt_kern.h> | ||
8 | #include <linux/kbd_kern.h> | ||
9 | #include <linux/console.h> | ||
10 | #include "power.h" | ||
11 | |||
12 | static int new_loglevel = 10; | ||
13 | static int orig_loglevel; | ||
14 | #ifdef SUSPEND_CONSOLE | ||
15 | static int orig_fgconsole, orig_kmsg; | ||
16 | #endif | ||
17 | |||
18 | int pm_prepare_console(void) | ||
19 | { | ||
20 | orig_loglevel = console_loglevel; | ||
21 | console_loglevel = new_loglevel; | ||
22 | |||
23 | #ifdef SUSPEND_CONSOLE | ||
24 | acquire_console_sem(); | ||
25 | |||
26 | orig_fgconsole = fg_console; | ||
27 | |||
28 | if (vc_allocate(SUSPEND_CONSOLE)) { | ||
29 | /* we can't have a free VC for now. Too bad, | ||
30 | * we don't want to mess the screen for now. */ | ||
31 | release_console_sem(); | ||
32 | return 1; | ||
33 | } | ||
34 | |||
35 | set_console(SUSPEND_CONSOLE); | ||
36 | release_console_sem(); | ||
37 | |||
38 | if (vt_waitactive(SUSPEND_CONSOLE)) { | ||
39 | pr_debug("Suspend: Can't switch VCs."); | ||
40 | return 1; | ||
41 | } | ||
42 | orig_kmsg = kmsg_redirect; | ||
43 | kmsg_redirect = SUSPEND_CONSOLE; | ||
44 | #endif | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | void pm_restore_console(void) | ||
49 | { | ||
50 | console_loglevel = orig_loglevel; | ||
51 | #ifdef SUSPEND_CONSOLE | ||
52 | acquire_console_sem(); | ||
53 | set_console(orig_fgconsole); | ||
54 | release_console_sem(); | ||
55 | kmsg_redirect = orig_kmsg; | ||
56 | #endif | ||
57 | return; | ||
58 | } | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c new file mode 100644 index 000000000000..02b6764034dc --- /dev/null +++ b/kernel/power/disk.c | |||
@@ -0,0 +1,431 @@ | |||
1 | /* | ||
2 | * kernel/power/disk.c - Suspend-to-disk support. | ||
3 | * | ||
4 | * Copyright (c) 2003 Patrick Mochel | ||
5 | * Copyright (c) 2003 Open Source Development Lab | ||
6 | * Copyright (c) 2004 Pavel Machek <pavel@suse.cz> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/suspend.h> | ||
13 | #include <linux/syscalls.h> | ||
14 | #include <linux/reboot.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/device.h> | ||
17 | #include <linux/delay.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include "power.h" | ||
20 | |||
21 | |||
22 | extern suspend_disk_method_t pm_disk_mode; | ||
23 | extern struct pm_ops * pm_ops; | ||
24 | |||
25 | extern int swsusp_suspend(void); | ||
26 | extern int swsusp_write(void); | ||
27 | extern int swsusp_check(void); | ||
28 | extern int swsusp_read(void); | ||
29 | extern void swsusp_close(void); | ||
30 | extern int swsusp_resume(void); | ||
31 | extern int swsusp_free(void); | ||
32 | |||
33 | |||
34 | static int noresume = 0; | ||
35 | char resume_file[256] = CONFIG_PM_STD_PARTITION; | ||
36 | dev_t swsusp_resume_device; | ||
37 | |||
38 | /** | ||
39 | * power_down - Shut machine down for hibernate. | ||
40 | * @mode: Suspend-to-disk mode | ||
41 | * | ||
42 | * Use the platform driver, if configured so, and return gracefully if it | ||
43 | * fails. | ||
44 | * Otherwise, try to power off and reboot. If they fail, halt the machine, | ||
45 | * there ain't no turning back. | ||
46 | */ | ||
47 | |||
48 | static void power_down(suspend_disk_method_t mode) | ||
49 | { | ||
50 | unsigned long flags; | ||
51 | int error = 0; | ||
52 | |||
53 | local_irq_save(flags); | ||
54 | switch(mode) { | ||
55 | case PM_DISK_PLATFORM: | ||
56 | device_shutdown(); | ||
57 | error = pm_ops->enter(PM_SUSPEND_DISK); | ||
58 | break; | ||
59 | case PM_DISK_SHUTDOWN: | ||
60 | printk("Powering off system\n"); | ||
61 | device_shutdown(); | ||
62 | machine_power_off(); | ||
63 | break; | ||
64 | case PM_DISK_REBOOT: | ||
65 | device_shutdown(); | ||
66 | machine_restart(NULL); | ||
67 | break; | ||
68 | } | ||
69 | machine_halt(); | ||
70 | /* Valid image is on the disk, if we continue we risk serious data corruption | ||
71 | after resume. */ | ||
72 | printk(KERN_CRIT "Please power me down manually\n"); | ||
73 | while(1); | ||
74 | } | ||
75 | |||
76 | |||
77 | static int in_suspend __nosavedata = 0; | ||
78 | |||
79 | |||
80 | /** | ||
81 | * free_some_memory - Try to free as much memory as possible | ||
82 | * | ||
83 | * ... but do not OOM-kill anyone | ||
84 | * | ||
85 | * Notice: all userland should be stopped at this point, or | ||
86 | * livelock is possible. | ||
87 | */ | ||
88 | |||
89 | static void free_some_memory(void) | ||
90 | { | ||
91 | unsigned int i = 0; | ||
92 | unsigned int tmp; | ||
93 | unsigned long pages = 0; | ||
94 | char *p = "-\\|/"; | ||
95 | |||
96 | printk("Freeing memory... "); | ||
97 | while ((tmp = shrink_all_memory(10000))) { | ||
98 | pages += tmp; | ||
99 | printk("\b%c", p[i]); | ||
100 | i++; | ||
101 | if (i > 3) | ||
102 | i = 0; | ||
103 | } | ||
104 | printk("\bdone (%li pages freed)\n", pages); | ||
105 | } | ||
106 | |||
107 | |||
108 | static inline void platform_finish(void) | ||
109 | { | ||
110 | if (pm_disk_mode == PM_DISK_PLATFORM) { | ||
111 | if (pm_ops && pm_ops->finish) | ||
112 | pm_ops->finish(PM_SUSPEND_DISK); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | static void finish(void) | ||
117 | { | ||
118 | device_resume(); | ||
119 | platform_finish(); | ||
120 | enable_nonboot_cpus(); | ||
121 | thaw_processes(); | ||
122 | pm_restore_console(); | ||
123 | } | ||
124 | |||
125 | |||
126 | static int prepare_processes(void) | ||
127 | { | ||
128 | int error; | ||
129 | |||
130 | pm_prepare_console(); | ||
131 | |||
132 | sys_sync(); | ||
133 | |||
134 | if (freeze_processes()) { | ||
135 | error = -EBUSY; | ||
136 | return error; | ||
137 | } | ||
138 | |||
139 | if (pm_disk_mode == PM_DISK_PLATFORM) { | ||
140 | if (pm_ops && pm_ops->prepare) { | ||
141 | if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) | ||
142 | return error; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* Free memory before shutting down devices. */ | ||
147 | free_some_memory(); | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static void unprepare_processes(void) | ||
153 | { | ||
154 | enable_nonboot_cpus(); | ||
155 | thaw_processes(); | ||
156 | pm_restore_console(); | ||
157 | } | ||
158 | |||
159 | static int prepare_devices(void) | ||
160 | { | ||
161 | int error; | ||
162 | |||
163 | disable_nonboot_cpus(); | ||
164 | if ((error = device_suspend(PMSG_FREEZE))) { | ||
165 | printk("Some devices failed to suspend\n"); | ||
166 | platform_finish(); | ||
167 | enable_nonboot_cpus(); | ||
168 | return error; | ||
169 | } | ||
170 | |||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | /** | ||
175 | * pm_suspend_disk - The granpappy of power management. | ||
176 | * | ||
177 | * If we're going through the firmware, then get it over with quickly. | ||
178 | * | ||
179 | * If not, then call swsusp to do its thing, then figure out how | ||
180 | * to power down the system. | ||
181 | */ | ||
182 | |||
183 | int pm_suspend_disk(void) | ||
184 | { | ||
185 | int error; | ||
186 | |||
187 | error = prepare_processes(); | ||
188 | if (!error) { | ||
189 | error = prepare_devices(); | ||
190 | } | ||
191 | |||
192 | if (error) { | ||
193 | unprepare_processes(); | ||
194 | return error; | ||
195 | } | ||
196 | |||
197 | pr_debug("PM: Attempting to suspend to disk.\n"); | ||
198 | if (pm_disk_mode == PM_DISK_FIRMWARE) | ||
199 | return pm_ops->enter(PM_SUSPEND_DISK); | ||
200 | |||
201 | pr_debug("PM: snapshotting memory.\n"); | ||
202 | in_suspend = 1; | ||
203 | if ((error = swsusp_suspend())) | ||
204 | goto Done; | ||
205 | |||
206 | if (in_suspend) { | ||
207 | pr_debug("PM: writing image.\n"); | ||
208 | error = swsusp_write(); | ||
209 | if (!error) | ||
210 | power_down(pm_disk_mode); | ||
211 | } else | ||
212 | pr_debug("PM: Image restored successfully.\n"); | ||
213 | swsusp_free(); | ||
214 | Done: | ||
215 | finish(); | ||
216 | return error; | ||
217 | } | ||
218 | |||
219 | |||
220 | /** | ||
221 | * software_resume - Resume from a saved image. | ||
222 | * | ||
223 | * Called as a late_initcall (so all devices are discovered and | ||
224 | * initialized), we call swsusp to see if we have a saved image or not. | ||
225 | * If so, we quiesce devices, the restore the saved image. We will | ||
226 | * return above (in pm_suspend_disk() ) if everything goes well. | ||
227 | * Otherwise, we fail gracefully and return to the normally | ||
228 | * scheduled program. | ||
229 | * | ||
230 | */ | ||
231 | |||
232 | static int software_resume(void) | ||
233 | { | ||
234 | int error; | ||
235 | |||
236 | if (noresume) { | ||
237 | /** | ||
238 | * FIXME: If noresume is specified, we need to find the partition | ||
239 | * and reset it back to normal swap space. | ||
240 | */ | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | pr_debug("PM: Checking swsusp image.\n"); | ||
245 | |||
246 | if ((error = swsusp_check())) | ||
247 | goto Done; | ||
248 | |||
249 | pr_debug("PM: Preparing processes for restore.\n"); | ||
250 | |||
251 | if ((error = prepare_processes())) { | ||
252 | swsusp_close(); | ||
253 | goto Cleanup; | ||
254 | } | ||
255 | |||
256 | pr_debug("PM: Reading swsusp image.\n"); | ||
257 | |||
258 | if ((error = swsusp_read())) | ||
259 | goto Cleanup; | ||
260 | |||
261 | pr_debug("PM: Preparing devices for restore.\n"); | ||
262 | |||
263 | if ((error = prepare_devices())) | ||
264 | goto Free; | ||
265 | |||
266 | mb(); | ||
267 | |||
268 | pr_debug("PM: Restoring saved image.\n"); | ||
269 | swsusp_resume(); | ||
270 | pr_debug("PM: Restore failed, recovering.n"); | ||
271 | finish(); | ||
272 | Free: | ||
273 | swsusp_free(); | ||
274 | Cleanup: | ||
275 | unprepare_processes(); | ||
276 | Done: | ||
277 | pr_debug("PM: Resume from disk failed.\n"); | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | late_initcall(software_resume); | ||
282 | |||
283 | |||
284 | static char * pm_disk_modes[] = { | ||
285 | [PM_DISK_FIRMWARE] = "firmware", | ||
286 | [PM_DISK_PLATFORM] = "platform", | ||
287 | [PM_DISK_SHUTDOWN] = "shutdown", | ||
288 | [PM_DISK_REBOOT] = "reboot", | ||
289 | }; | ||
290 | |||
291 | /** | ||
292 | * disk - Control suspend-to-disk mode | ||
293 | * | ||
294 | * Suspend-to-disk can be handled in several ways. The greatest | ||
295 | * distinction is who writes memory to disk - the firmware or the OS. | ||
296 | * If the firmware does it, we assume that it also handles suspending | ||
297 | * the system. | ||
298 | * If the OS does it, then we have three options for putting the system | ||
299 | * to sleep - using the platform driver (e.g. ACPI or other PM registers), | ||
300 | * powering off the system or rebooting the system (for testing). | ||
301 | * | ||
302 | * The system will support either 'firmware' or 'platform', and that is | ||
303 | * known a priori (and encoded in pm_ops). But, the user may choose | ||
304 | * 'shutdown' or 'reboot' as alternatives. | ||
305 | * | ||
306 | * show() will display what the mode is currently set to. | ||
307 | * store() will accept one of | ||
308 | * | ||
309 | * 'firmware' | ||
310 | * 'platform' | ||
311 | * 'shutdown' | ||
312 | * 'reboot' | ||
313 | * | ||
314 | * It will only change to 'firmware' or 'platform' if the system | ||
315 | * supports it (as determined from pm_ops->pm_disk_mode). | ||
316 | */ | ||
317 | |||
318 | static ssize_t disk_show(struct subsystem * subsys, char * buf) | ||
319 | { | ||
320 | return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); | ||
321 | } | ||
322 | |||
323 | |||
324 | static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | ||
325 | { | ||
326 | int error = 0; | ||
327 | int i; | ||
328 | int len; | ||
329 | char *p; | ||
330 | suspend_disk_method_t mode = 0; | ||
331 | |||
332 | p = memchr(buf, '\n', n); | ||
333 | len = p ? p - buf : n; | ||
334 | |||
335 | down(&pm_sem); | ||
336 | for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { | ||
337 | if (!strncmp(buf, pm_disk_modes[i], len)) { | ||
338 | mode = i; | ||
339 | break; | ||
340 | } | ||
341 | } | ||
342 | if (mode) { | ||
343 | if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) | ||
344 | pm_disk_mode = mode; | ||
345 | else { | ||
346 | if (pm_ops && pm_ops->enter && | ||
347 | (mode == pm_ops->pm_disk_mode)) | ||
348 | pm_disk_mode = mode; | ||
349 | else | ||
350 | error = -EINVAL; | ||
351 | } | ||
352 | } else | ||
353 | error = -EINVAL; | ||
354 | |||
355 | pr_debug("PM: suspend-to-disk mode set to '%s'\n", | ||
356 | pm_disk_modes[mode]); | ||
357 | up(&pm_sem); | ||
358 | return error ? error : n; | ||
359 | } | ||
360 | |||
361 | power_attr(disk); | ||
362 | |||
363 | static ssize_t resume_show(struct subsystem * subsys, char *buf) | ||
364 | { | ||
365 | return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), | ||
366 | MINOR(swsusp_resume_device)); | ||
367 | } | ||
368 | |||
369 | static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n) | ||
370 | { | ||
371 | int len; | ||
372 | char *p; | ||
373 | unsigned int maj, min; | ||
374 | int error = -EINVAL; | ||
375 | dev_t res; | ||
376 | |||
377 | p = memchr(buf, '\n', n); | ||
378 | len = p ? p - buf : n; | ||
379 | |||
380 | if (sscanf(buf, "%u:%u", &maj, &min) == 2) { | ||
381 | res = MKDEV(maj,min); | ||
382 | if (maj == MAJOR(res) && min == MINOR(res)) { | ||
383 | swsusp_resume_device = res; | ||
384 | printk("Attempting manual resume\n"); | ||
385 | noresume = 0; | ||
386 | software_resume(); | ||
387 | } | ||
388 | } | ||
389 | |||
390 | return error >= 0 ? n : error; | ||
391 | } | ||
392 | |||
393 | power_attr(resume); | ||
394 | |||
395 | static struct attribute * g[] = { | ||
396 | &disk_attr.attr, | ||
397 | &resume_attr.attr, | ||
398 | NULL, | ||
399 | }; | ||
400 | |||
401 | |||
402 | static struct attribute_group attr_group = { | ||
403 | .attrs = g, | ||
404 | }; | ||
405 | |||
406 | |||
407 | static int __init pm_disk_init(void) | ||
408 | { | ||
409 | return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); | ||
410 | } | ||
411 | |||
412 | core_initcall(pm_disk_init); | ||
413 | |||
414 | |||
415 | static int __init resume_setup(char *str) | ||
416 | { | ||
417 | if (noresume) | ||
418 | return 1; | ||
419 | |||
420 | strncpy( resume_file, str, 255 ); | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | static int __init noresume_setup(char *str) | ||
425 | { | ||
426 | noresume = 1; | ||
427 | return 1; | ||
428 | } | ||
429 | |||
430 | __setup("noresume", noresume_setup); | ||
431 | __setup("resume=", resume_setup); | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c new file mode 100644 index 000000000000..7960ddf04a57 --- /dev/null +++ b/kernel/power/main.c | |||
@@ -0,0 +1,269 @@ | |||
1 | /* | ||
2 | * kernel/power/main.c - PM subsystem core functionality. | ||
3 | * | ||
4 | * Copyright (c) 2003 Patrick Mochel | ||
5 | * Copyright (c) 2003 Open Source Development Lab | ||
6 | * | ||
7 | * This file is released under the GPLv2 | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/suspend.h> | ||
12 | #include <linux/kobject.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/delay.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/pm.h> | ||
18 | |||
19 | |||
20 | #include "power.h" | ||
21 | |||
22 | DECLARE_MUTEX(pm_sem); | ||
23 | |||
24 | struct pm_ops * pm_ops = NULL; | ||
25 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; | ||
26 | |||
27 | /** | ||
28 | * pm_set_ops - Set the global power method table. | ||
29 | * @ops: Pointer to ops structure. | ||
30 | */ | ||
31 | |||
32 | void pm_set_ops(struct pm_ops * ops) | ||
33 | { | ||
34 | down(&pm_sem); | ||
35 | pm_ops = ops; | ||
36 | up(&pm_sem); | ||
37 | } | ||
38 | |||
39 | |||
40 | /** | ||
41 | * suspend_prepare - Do prep work before entering low-power state. | ||
42 | * @state: State we're entering. | ||
43 | * | ||
44 | * This is common code that is called for each state that we're | ||
45 | * entering. Allocate a console, stop all processes, then make sure | ||
46 | * the platform can enter the requested state. | ||
47 | */ | ||
48 | |||
49 | static int suspend_prepare(suspend_state_t state) | ||
50 | { | ||
51 | int error = 0; | ||
52 | |||
53 | if (!pm_ops || !pm_ops->enter) | ||
54 | return -EPERM; | ||
55 | |||
56 | pm_prepare_console(); | ||
57 | |||
58 | if (freeze_processes()) { | ||
59 | error = -EAGAIN; | ||
60 | goto Thaw; | ||
61 | } | ||
62 | |||
63 | if (pm_ops->prepare) { | ||
64 | if ((error = pm_ops->prepare(state))) | ||
65 | goto Thaw; | ||
66 | } | ||
67 | |||
68 | if ((error = device_suspend(PMSG_SUSPEND))) { | ||
69 | printk(KERN_ERR "Some devices failed to suspend\n"); | ||
70 | goto Finish; | ||
71 | } | ||
72 | return 0; | ||
73 | Finish: | ||
74 | if (pm_ops->finish) | ||
75 | pm_ops->finish(state); | ||
76 | Thaw: | ||
77 | thaw_processes(); | ||
78 | pm_restore_console(); | ||
79 | return error; | ||
80 | } | ||
81 | |||
82 | |||
83 | static int suspend_enter(suspend_state_t state) | ||
84 | { | ||
85 | int error = 0; | ||
86 | unsigned long flags; | ||
87 | |||
88 | local_irq_save(flags); | ||
89 | |||
90 | if ((error = device_power_down(PMSG_SUSPEND))) { | ||
91 | printk(KERN_ERR "Some devices failed to power down\n"); | ||
92 | goto Done; | ||
93 | } | ||
94 | error = pm_ops->enter(state); | ||
95 | device_power_up(); | ||
96 | Done: | ||
97 | local_irq_restore(flags); | ||
98 | return error; | ||
99 | } | ||
100 | |||
101 | |||
102 | /** | ||
103 | * suspend_finish - Do final work before exiting suspend sequence. | ||
104 | * @state: State we're coming out of. | ||
105 | * | ||
106 | * Call platform code to clean up, restart processes, and free the | ||
107 | * console that we've allocated. This is not called for suspend-to-disk. | ||
108 | */ | ||
109 | |||
110 | static void suspend_finish(suspend_state_t state) | ||
111 | { | ||
112 | device_resume(); | ||
113 | if (pm_ops && pm_ops->finish) | ||
114 | pm_ops->finish(state); | ||
115 | thaw_processes(); | ||
116 | pm_restore_console(); | ||
117 | } | ||
118 | |||
119 | |||
120 | |||
121 | |||
122 | static char * pm_states[] = { | ||
123 | [PM_SUSPEND_STANDBY] = "standby", | ||
124 | [PM_SUSPEND_MEM] = "mem", | ||
125 | [PM_SUSPEND_DISK] = "disk", | ||
126 | NULL, | ||
127 | }; | ||
128 | |||
129 | |||
130 | /** | ||
131 | * enter_state - Do common work of entering low-power state. | ||
132 | * @state: pm_state structure for state we're entering. | ||
133 | * | ||
134 | * Make sure we're the only ones trying to enter a sleep state. Fail | ||
135 | * if someone has beat us to it, since we don't want anything weird to | ||
136 | * happen when we wake up. | ||
137 | * Then, do the setup for suspend, enter the state, and cleaup (after | ||
138 | * we've woken up). | ||
139 | */ | ||
140 | |||
141 | static int enter_state(suspend_state_t state) | ||
142 | { | ||
143 | int error; | ||
144 | |||
145 | if (down_trylock(&pm_sem)) | ||
146 | return -EBUSY; | ||
147 | |||
148 | if (state == PM_SUSPEND_DISK) { | ||
149 | error = pm_suspend_disk(); | ||
150 | goto Unlock; | ||
151 | } | ||
152 | |||
153 | /* Suspend is hard to get right on SMP. */ | ||
154 | if (num_online_cpus() != 1) { | ||
155 | error = -EPERM; | ||
156 | goto Unlock; | ||
157 | } | ||
158 | |||
159 | pr_debug("PM: Preparing system for suspend\n"); | ||
160 | if ((error = suspend_prepare(state))) | ||
161 | goto Unlock; | ||
162 | |||
163 | pr_debug("PM: Entering state.\n"); | ||
164 | error = suspend_enter(state); | ||
165 | |||
166 | pr_debug("PM: Finishing up.\n"); | ||
167 | suspend_finish(state); | ||
168 | Unlock: | ||
169 | up(&pm_sem); | ||
170 | return error; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * This is main interface to the outside world. It needs to be | ||
175 | * called from process context. | ||
176 | */ | ||
177 | int software_suspend(void) | ||
178 | { | ||
179 | return enter_state(PM_SUSPEND_DISK); | ||
180 | } | ||
181 | |||
182 | |||
183 | /** | ||
184 | * pm_suspend - Externally visible function for suspending system. | ||
185 | * @state: Enumarted value of state to enter. | ||
186 | * | ||
187 | * Determine whether or not value is within range, get state | ||
188 | * structure, and enter (above). | ||
189 | */ | ||
190 | |||
191 | int pm_suspend(suspend_state_t state) | ||
192 | { | ||
193 | if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) | ||
194 | return enter_state(state); | ||
195 | return -EINVAL; | ||
196 | } | ||
197 | |||
198 | |||
199 | |||
200 | decl_subsys(power,NULL,NULL); | ||
201 | |||
202 | |||
203 | /** | ||
204 | * state - control system power state. | ||
205 | * | ||
206 | * show() returns what states are supported, which is hard-coded to | ||
207 | * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and | ||
208 | * 'disk' (Suspend-to-Disk). | ||
209 | * | ||
210 | * store() accepts one of those strings, translates it into the | ||
211 | * proper enumerated value, and initiates a suspend transition. | ||
212 | */ | ||
213 | |||
214 | static ssize_t state_show(struct subsystem * subsys, char * buf) | ||
215 | { | ||
216 | int i; | ||
217 | char * s = buf; | ||
218 | |||
219 | for (i = 0; i < PM_SUSPEND_MAX; i++) { | ||
220 | if (pm_states[i]) | ||
221 | s += sprintf(s,"%s ",pm_states[i]); | ||
222 | } | ||
223 | s += sprintf(s,"\n"); | ||
224 | return (s - buf); | ||
225 | } | ||
226 | |||
227 | static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) | ||
228 | { | ||
229 | suspend_state_t state = PM_SUSPEND_STANDBY; | ||
230 | char ** s; | ||
231 | char *p; | ||
232 | int error; | ||
233 | int len; | ||
234 | |||
235 | p = memchr(buf, '\n', n); | ||
236 | len = p ? p - buf : n; | ||
237 | |||
238 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { | ||
239 | if (*s && !strncmp(buf, *s, len)) | ||
240 | break; | ||
241 | } | ||
242 | if (*s) | ||
243 | error = enter_state(state); | ||
244 | else | ||
245 | error = -EINVAL; | ||
246 | return error ? error : n; | ||
247 | } | ||
248 | |||
249 | power_attr(state); | ||
250 | |||
251 | static struct attribute * g[] = { | ||
252 | &state_attr.attr, | ||
253 | NULL, | ||
254 | }; | ||
255 | |||
256 | static struct attribute_group attr_group = { | ||
257 | .attrs = g, | ||
258 | }; | ||
259 | |||
260 | |||
261 | static int __init pm_init(void) | ||
262 | { | ||
263 | int error = subsystem_register(&power_subsys); | ||
264 | if (!error) | ||
265 | error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); | ||
266 | return error; | ||
267 | } | ||
268 | |||
269 | core_initcall(pm_init); | ||
diff --git a/kernel/power/pm.c b/kernel/power/pm.c new file mode 100644 index 000000000000..61deda04e39e --- /dev/null +++ b/kernel/power/pm.c | |||
@@ -0,0 +1,265 @@ | |||
1 | /* | ||
2 | * pm.c - Power management interface | ||
3 | * | ||
4 | * Copyright (C) 2000 Andrew Henroid | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/mm.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/pm.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | |||
28 | int pm_active; | ||
29 | |||
30 | /* | ||
31 | * Locking notes: | ||
32 | * pm_devs_lock can be a semaphore providing pm ops are not called | ||
33 | * from an interrupt handler (already a bad idea so no change here). Each | ||
34 | * change must be protected so that an unlink of an entry doesn't clash | ||
35 | * with a pm send - which is permitted to sleep in the current architecture | ||
36 | * | ||
37 | * Module unloads clashing with pm events now work out safely, the module | ||
38 | * unload path will block until the event has been sent. It may well block | ||
39 | * until a resume but that will be fine. | ||
40 | */ | ||
41 | |||
42 | static DECLARE_MUTEX(pm_devs_lock); | ||
43 | static LIST_HEAD(pm_devs); | ||
44 | |||
45 | /** | ||
46 | * pm_register - register a device with power management | ||
47 | * @type: device type | ||
48 | * @id: device ID | ||
49 | * @callback: callback function | ||
50 | * | ||
51 | * Add a device to the list of devices that wish to be notified about | ||
52 | * power management events. A &pm_dev structure is returned on success, | ||
53 | * on failure the return is %NULL. | ||
54 | * | ||
55 | * The callback function will be called in process context and | ||
56 | * it may sleep. | ||
57 | */ | ||
58 | |||
59 | struct pm_dev *pm_register(pm_dev_t type, | ||
60 | unsigned long id, | ||
61 | pm_callback callback) | ||
62 | { | ||
63 | struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); | ||
64 | if (dev) { | ||
65 | memset(dev, 0, sizeof(*dev)); | ||
66 | dev->type = type; | ||
67 | dev->id = id; | ||
68 | dev->callback = callback; | ||
69 | |||
70 | down(&pm_devs_lock); | ||
71 | list_add(&dev->entry, &pm_devs); | ||
72 | up(&pm_devs_lock); | ||
73 | } | ||
74 | return dev; | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * pm_unregister - unregister a device with power management | ||
79 | * @dev: device to unregister | ||
80 | * | ||
81 | * Remove a device from the power management notification lists. The | ||
82 | * dev passed must be a handle previously returned by pm_register. | ||
83 | */ | ||
84 | |||
85 | void pm_unregister(struct pm_dev *dev) | ||
86 | { | ||
87 | if (dev) { | ||
88 | down(&pm_devs_lock); | ||
89 | list_del(&dev->entry); | ||
90 | up(&pm_devs_lock); | ||
91 | |||
92 | kfree(dev); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | static void __pm_unregister(struct pm_dev *dev) | ||
97 | { | ||
98 | if (dev) { | ||
99 | list_del(&dev->entry); | ||
100 | kfree(dev); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /** | ||
105 | * pm_unregister_all - unregister all devices with matching callback | ||
106 | * @callback: callback function pointer | ||
107 | * | ||
108 | * Unregister every device that would call the callback passed. This | ||
109 | * is primarily meant as a helper function for loadable modules. It | ||
110 | * enables a module to give up all its managed devices without keeping | ||
111 | * its own private list. | ||
112 | */ | ||
113 | |||
114 | void pm_unregister_all(pm_callback callback) | ||
115 | { | ||
116 | struct list_head *entry; | ||
117 | |||
118 | if (!callback) | ||
119 | return; | ||
120 | |||
121 | down(&pm_devs_lock); | ||
122 | entry = pm_devs.next; | ||
123 | while (entry != &pm_devs) { | ||
124 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
125 | entry = entry->next; | ||
126 | if (dev->callback == callback) | ||
127 | __pm_unregister(dev); | ||
128 | } | ||
129 | up(&pm_devs_lock); | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * pm_send - send request to a single device | ||
134 | * @dev: device to send to | ||
135 | * @rqst: power management request | ||
136 | * @data: data for the callback | ||
137 | * | ||
138 | * Issue a power management request to a given device. The | ||
139 | * %PM_SUSPEND and %PM_RESUME events are handled specially. The | ||
140 | * data field must hold the intended next state. No call is made | ||
141 | * if the state matches. | ||
142 | * | ||
143 | * BUGS: what stops two power management requests occurring in parallel | ||
144 | * and conflicting. | ||
145 | * | ||
146 | * WARNING: Calling pm_send directly is not generally recommended, in | ||
147 | * particular there is no locking against the pm_dev going away. The | ||
148 | * caller must maintain all needed locking or have 'inside knowledge' | ||
149 | * on the safety. Also remember that this function is not locked against | ||
150 | * pm_unregister. This means that you must handle SMP races on callback | ||
151 | * execution and unload yourself. | ||
152 | */ | ||
153 | |||
154 | static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) | ||
155 | { | ||
156 | int status = 0; | ||
157 | unsigned long prev_state, next_state; | ||
158 | |||
159 | if (in_interrupt()) | ||
160 | BUG(); | ||
161 | |||
162 | switch (rqst) { | ||
163 | case PM_SUSPEND: | ||
164 | case PM_RESUME: | ||
165 | prev_state = dev->state; | ||
166 | next_state = (unsigned long) data; | ||
167 | if (prev_state != next_state) { | ||
168 | if (dev->callback) | ||
169 | status = (*dev->callback)(dev, rqst, data); | ||
170 | if (!status) { | ||
171 | dev->state = next_state; | ||
172 | dev->prev_state = prev_state; | ||
173 | } | ||
174 | } | ||
175 | else { | ||
176 | dev->prev_state = prev_state; | ||
177 | } | ||
178 | break; | ||
179 | default: | ||
180 | if (dev->callback) | ||
181 | status = (*dev->callback)(dev, rqst, data); | ||
182 | break; | ||
183 | } | ||
184 | return status; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Undo incomplete request | ||
189 | */ | ||
190 | static void pm_undo_all(struct pm_dev *last) | ||
191 | { | ||
192 | struct list_head *entry = last->entry.prev; | ||
193 | while (entry != &pm_devs) { | ||
194 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
195 | if (dev->state != dev->prev_state) { | ||
196 | /* previous state was zero (running) resume or | ||
197 | * previous state was non-zero (suspended) suspend | ||
198 | */ | ||
199 | pm_request_t undo = (dev->prev_state | ||
200 | ? PM_SUSPEND:PM_RESUME); | ||
201 | pm_send(dev, undo, (void*) dev->prev_state); | ||
202 | } | ||
203 | entry = entry->prev; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * pm_send_all - send request to all managed devices | ||
209 | * @rqst: power management request | ||
210 | * @data: data for the callback | ||
211 | * | ||
212 | * Issue a power management request to a all devices. The | ||
213 | * %PM_SUSPEND events are handled specially. Any device is | ||
214 | * permitted to fail a suspend by returning a non zero (error) | ||
215 | * value from its callback function. If any device vetoes a | ||
216 | * suspend request then all other devices that have suspended | ||
217 | * during the processing of this request are restored to their | ||
218 | * previous state. | ||
219 | * | ||
220 | * WARNING: This function takes the pm_devs_lock. The lock is not dropped until | ||
221 | * the callbacks have completed. This prevents races against pm locking | ||
222 | * functions, races against module unload pm_unregister code. It does | ||
223 | * mean however that you must not issue pm_ functions within the callback | ||
224 | * or you will deadlock and users will hate you. | ||
225 | * | ||
226 | * Zero is returned on success. If a suspend fails then the status | ||
227 | * from the device that vetoes the suspend is returned. | ||
228 | * | ||
229 | * BUGS: what stops two power management requests occurring in parallel | ||
230 | * and conflicting. | ||
231 | */ | ||
232 | |||
233 | int pm_send_all(pm_request_t rqst, void *data) | ||
234 | { | ||
235 | struct list_head *entry; | ||
236 | |||
237 | down(&pm_devs_lock); | ||
238 | entry = pm_devs.next; | ||
239 | while (entry != &pm_devs) { | ||
240 | struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); | ||
241 | if (dev->callback) { | ||
242 | int status = pm_send(dev, rqst, data); | ||
243 | if (status) { | ||
244 | /* return devices to previous state on | ||
245 | * failed suspend request | ||
246 | */ | ||
247 | if (rqst == PM_SUSPEND) | ||
248 | pm_undo_all(dev); | ||
249 | up(&pm_devs_lock); | ||
250 | return status; | ||
251 | } | ||
252 | } | ||
253 | entry = entry->next; | ||
254 | } | ||
255 | up(&pm_devs_lock); | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | EXPORT_SYMBOL(pm_register); | ||
260 | EXPORT_SYMBOL(pm_unregister); | ||
261 | EXPORT_SYMBOL(pm_unregister_all); | ||
262 | EXPORT_SYMBOL(pm_send_all); | ||
263 | EXPORT_SYMBOL(pm_active); | ||
264 | |||
265 | |||
diff --git a/kernel/power/power.h b/kernel/power/power.h new file mode 100644 index 000000000000..cd6a3493cc0d --- /dev/null +++ b/kernel/power/power.h | |||
@@ -0,0 +1,52 @@ | |||
1 | #include <linux/suspend.h> | ||
2 | #include <linux/utsname.h> | ||
3 | |||
4 | /* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but | ||
5 | we probably do not take enough locks for switching consoles, etc, | ||
6 | so bad things might happen. | ||
7 | */ | ||
8 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) | ||
9 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) | ||
10 | #endif | ||
11 | |||
12 | |||
13 | struct swsusp_info { | ||
14 | struct new_utsname uts; | ||
15 | u32 version_code; | ||
16 | unsigned long num_physpages; | ||
17 | int cpus; | ||
18 | unsigned long image_pages; | ||
19 | unsigned long pagedir_pages; | ||
20 | suspend_pagedir_t * suspend_pagedir; | ||
21 | swp_entry_t pagedir[768]; | ||
22 | } __attribute__((aligned(PAGE_SIZE))); | ||
23 | |||
24 | |||
25 | |||
26 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
27 | extern int pm_suspend_disk(void); | ||
28 | |||
29 | #else | ||
30 | static inline int pm_suspend_disk(void) | ||
31 | { | ||
32 | return -EPERM; | ||
33 | } | ||
34 | #endif | ||
35 | extern struct semaphore pm_sem; | ||
36 | #define power_attr(_name) \ | ||
37 | static struct subsys_attribute _name##_attr = { \ | ||
38 | .attr = { \ | ||
39 | .name = __stringify(_name), \ | ||
40 | .mode = 0644, \ | ||
41 | }, \ | ||
42 | .show = _name##_show, \ | ||
43 | .store = _name##_store, \ | ||
44 | } | ||
45 | |||
46 | extern struct subsystem power_subsys; | ||
47 | |||
48 | extern int freeze_processes(void); | ||
49 | extern void thaw_processes(void); | ||
50 | |||
51 | extern int pm_prepare_console(void); | ||
52 | extern void pm_restore_console(void); | ||
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c new file mode 100644 index 000000000000..715081b2d829 --- /dev/null +++ b/kernel/power/poweroff.c | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * poweroff.c - sysrq handler to gracefully power down machine. | ||
3 | * | ||
4 | * This file is released under the GPL v2 | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/sysrq.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/pm.h> | ||
11 | #include <linux/workqueue.h> | ||
12 | |||
13 | /* | ||
14 | * When the user hits Sys-Rq o to power down the machine this is the | ||
15 | * callback we use. | ||
16 | */ | ||
17 | |||
18 | static void do_poweroff(void *dummy) | ||
19 | { | ||
20 | if (pm_power_off) | ||
21 | pm_power_off(); | ||
22 | } | ||
23 | |||
24 | static DECLARE_WORK(poweroff_work, do_poweroff, NULL); | ||
25 | |||
26 | static void handle_poweroff(int key, struct pt_regs *pt_regs, | ||
27 | struct tty_struct *tty) | ||
28 | { | ||
29 | schedule_work(&poweroff_work); | ||
30 | } | ||
31 | |||
32 | static struct sysrq_key_op sysrq_poweroff_op = { | ||
33 | .handler = handle_poweroff, | ||
34 | .help_msg = "powerOff", | ||
35 | .action_msg = "Power Off", | ||
36 | .enable_mask = SYSRQ_ENABLE_BOOT, | ||
37 | }; | ||
38 | |||
39 | static int pm_sysrq_init(void) | ||
40 | { | ||
41 | register_sysrq_key('o', &sysrq_poweroff_op); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | subsys_initcall(pm_sysrq_init); | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c new file mode 100644 index 000000000000..78d92dc6a1ed --- /dev/null +++ b/kernel/power/process.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * drivers/power/process.c - Functions for starting/stopping processes on | ||
3 | * suspend transitions. | ||
4 | * | ||
5 | * Originally from swsusp. | ||
6 | */ | ||
7 | |||
8 | |||
9 | #undef DEBUG | ||
10 | |||
11 | #include <linux/smp_lock.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/suspend.h> | ||
14 | #include <linux/module.h> | ||
15 | |||
16 | /* | ||
17 | * Timeout for stopping processes | ||
18 | */ | ||
19 | #define TIMEOUT (6 * HZ) | ||
20 | |||
21 | |||
22 | static inline int freezeable(struct task_struct * p) | ||
23 | { | ||
24 | if ((p == current) || | ||
25 | (p->flags & PF_NOFREEZE) || | ||
26 | (p->exit_state == EXIT_ZOMBIE) || | ||
27 | (p->exit_state == EXIT_DEAD) || | ||
28 | (p->state == TASK_STOPPED) || | ||
29 | (p->state == TASK_TRACED)) | ||
30 | return 0; | ||
31 | return 1; | ||
32 | } | ||
33 | |||
34 | /* Refrigerator is place where frozen processes are stored :-). */ | ||
35 | void refrigerator(unsigned long flag) | ||
36 | { | ||
37 | /* Hmm, should we be allowed to suspend when there are realtime | ||
38 | processes around? */ | ||
39 | long save; | ||
40 | save = current->state; | ||
41 | current->state = TASK_UNINTERRUPTIBLE; | ||
42 | pr_debug("%s entered refrigerator\n", current->comm); | ||
43 | printk("="); | ||
44 | current->flags &= ~PF_FREEZE; | ||
45 | |||
46 | spin_lock_irq(¤t->sighand->siglock); | ||
47 | recalc_sigpending(); /* We sent fake signal, clean it up */ | ||
48 | spin_unlock_irq(¤t->sighand->siglock); | ||
49 | |||
50 | current->flags |= PF_FROZEN; | ||
51 | while (current->flags & PF_FROZEN) | ||
52 | schedule(); | ||
53 | pr_debug("%s left refrigerator\n", current->comm); | ||
54 | current->state = save; | ||
55 | } | ||
56 | |||
57 | /* 0 = success, else # of processes that we failed to stop */ | ||
58 | int freeze_processes(void) | ||
59 | { | ||
60 | int todo; | ||
61 | unsigned long start_time; | ||
62 | struct task_struct *g, *p; | ||
63 | |||
64 | printk( "Stopping tasks: " ); | ||
65 | start_time = jiffies; | ||
66 | do { | ||
67 | todo = 0; | ||
68 | read_lock(&tasklist_lock); | ||
69 | do_each_thread(g, p) { | ||
70 | unsigned long flags; | ||
71 | if (!freezeable(p)) | ||
72 | continue; | ||
73 | if ((p->flags & PF_FROZEN) || | ||
74 | (p->state == TASK_TRACED) || | ||
75 | (p->state == TASK_STOPPED)) | ||
76 | continue; | ||
77 | |||
78 | /* FIXME: smp problem here: we may not access other process' flags | ||
79 | without locking */ | ||
80 | p->flags |= PF_FREEZE; | ||
81 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
82 | signal_wake_up(p, 0); | ||
83 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
84 | todo++; | ||
85 | } while_each_thread(g, p); | ||
86 | read_unlock(&tasklist_lock); | ||
87 | yield(); /* Yield is okay here */ | ||
88 | if (time_after(jiffies, start_time + TIMEOUT)) { | ||
89 | printk( "\n" ); | ||
90 | printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); | ||
91 | return todo; | ||
92 | } | ||
93 | } while(todo); | ||
94 | |||
95 | printk( "|\n" ); | ||
96 | BUG_ON(in_atomic()); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | void thaw_processes(void) | ||
101 | { | ||
102 | struct task_struct *g, *p; | ||
103 | |||
104 | printk( "Restarting tasks..." ); | ||
105 | read_lock(&tasklist_lock); | ||
106 | do_each_thread(g, p) { | ||
107 | if (!freezeable(p)) | ||
108 | continue; | ||
109 | if (p->flags & PF_FROZEN) { | ||
110 | p->flags &= ~PF_FROZEN; | ||
111 | wake_up_process(p); | ||
112 | } else | ||
113 | printk(KERN_INFO " Strange, %s not stopped\n", p->comm ); | ||
114 | } while_each_thread(g, p); | ||
115 | |||
116 | read_unlock(&tasklist_lock); | ||
117 | schedule(); | ||
118 | printk( " done\n" ); | ||
119 | } | ||
120 | |||
121 | EXPORT_SYMBOL(refrigerator); | ||
diff --git a/kernel/power/smp.c b/kernel/power/smp.c new file mode 100644 index 000000000000..7fa7f6e2b7fb --- /dev/null +++ b/kernel/power/smp.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * drivers/power/smp.c - Functions for stopping other CPUs. | ||
3 | * | ||
4 | * Copyright 2004 Pavel Machek <pavel@suse.cz> | ||
5 | * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | */ | ||
9 | |||
10 | #undef DEBUG | ||
11 | |||
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/suspend.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <asm/atomic.h> | ||
17 | #include <asm/tlbflush.h> | ||
18 | |||
19 | static atomic_t cpu_counter, freeze; | ||
20 | |||
21 | |||
22 | static void smp_pause(void * data) | ||
23 | { | ||
24 | struct saved_context ctxt; | ||
25 | __save_processor_state(&ctxt); | ||
26 | printk("Sleeping in:\n"); | ||
27 | dump_stack(); | ||
28 | atomic_inc(&cpu_counter); | ||
29 | while (atomic_read(&freeze)) { | ||
30 | /* FIXME: restore takes place at random piece inside this. | ||
31 | This should probably be written in assembly, and | ||
32 | preserve general-purpose registers, too | ||
33 | |||
34 | What about stack? We may need to move to new stack here. | ||
35 | |||
36 | This should better be ran with interrupts disabled. | ||
37 | */ | ||
38 | cpu_relax(); | ||
39 | barrier(); | ||
40 | } | ||
41 | atomic_dec(&cpu_counter); | ||
42 | __restore_processor_state(&ctxt); | ||
43 | } | ||
44 | |||
45 | static cpumask_t oldmask; | ||
46 | |||
47 | void disable_nonboot_cpus(void) | ||
48 | { | ||
49 | printk("Freezing CPUs (at %d)", smp_processor_id()); | ||
50 | oldmask = current->cpus_allowed; | ||
51 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
52 | current->state = TASK_INTERRUPTIBLE; | ||
53 | schedule_timeout(HZ); | ||
54 | printk("..."); | ||
55 | BUG_ON(smp_processor_id() != 0); | ||
56 | |||
57 | /* FIXME: for this to work, all the CPUs must be running | ||
58 | * "idle" thread (or we deadlock). Is that guaranteed? */ | ||
59 | |||
60 | atomic_set(&cpu_counter, 0); | ||
61 | atomic_set(&freeze, 1); | ||
62 | smp_call_function(smp_pause, NULL, 0, 0); | ||
63 | while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { | ||
64 | cpu_relax(); | ||
65 | barrier(); | ||
66 | } | ||
67 | printk("ok\n"); | ||
68 | } | ||
69 | |||
70 | void enable_nonboot_cpus(void) | ||
71 | { | ||
72 | printk("Restarting CPUs"); | ||
73 | atomic_set(&freeze, 0); | ||
74 | while (atomic_read(&cpu_counter)) { | ||
75 | cpu_relax(); | ||
76 | barrier(); | ||
77 | } | ||
78 | printk("..."); | ||
79 | set_cpus_allowed(current, oldmask); | ||
80 | schedule(); | ||
81 | printk("ok\n"); | ||
82 | |||
83 | } | ||
84 | |||
85 | |||
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c new file mode 100644 index 000000000000..ae5bebc3b18f --- /dev/null +++ b/kernel/power/swsusp.c | |||
@@ -0,0 +1,1433 @@ | |||
1 | /* | ||
2 | * linux/kernel/power/swsusp.c | ||
3 | * | ||
4 | * This file is to realize architecture-independent | ||
5 | * machine suspend feature using pretty near only high-level routines | ||
6 | * | ||
7 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | ||
8 | * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz> | ||
9 | * | ||
10 | * This file is released under the GPLv2. | ||
11 | * | ||
12 | * I'd like to thank the following people for their work: | ||
13 | * | ||
14 | * Pavel Machek <pavel@ucw.cz>: | ||
15 | * Modifications, defectiveness pointing, being with me at the very beginning, | ||
16 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | ||
17 | * | ||
18 | * Steve Doddi <dirk@loth.demon.co.uk>: | ||
19 | * Support the possibility of hardware state restoring. | ||
20 | * | ||
21 | * Raph <grey.havens@earthling.net>: | ||
22 | * Support for preserving states of network devices and virtual console | ||
23 | * (including X and svgatextmode) | ||
24 | * | ||
25 | * Kurt Garloff <garloff@suse.de>: | ||
26 | * Straightened the critical function in order to prevent compilers from | ||
27 | * playing tricks with local variables. | ||
28 | * | ||
29 | * Andreas Mohr <a.mohr@mailto.de> | ||
30 | * | ||
31 | * Alex Badea <vampire@go.ro>: | ||
32 | * Fixed runaway init | ||
33 | * | ||
34 | * More state savers are welcome. Especially for the scsi layer... | ||
35 | * | ||
36 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | ||
37 | */ | ||
38 | |||
39 | #include <linux/module.h> | ||
40 | #include <linux/mm.h> | ||
41 | #include <linux/suspend.h> | ||
42 | #include <linux/smp_lock.h> | ||
43 | #include <linux/file.h> | ||
44 | #include <linux/utsname.h> | ||
45 | #include <linux/version.h> | ||
46 | #include <linux/delay.h> | ||
47 | #include <linux/reboot.h> | ||
48 | #include <linux/bitops.h> | ||
49 | #include <linux/vt_kern.h> | ||
50 | #include <linux/kbd_kern.h> | ||
51 | #include <linux/keyboard.h> | ||
52 | #include <linux/spinlock.h> | ||
53 | #include <linux/genhd.h> | ||
54 | #include <linux/kernel.h> | ||
55 | #include <linux/major.h> | ||
56 | #include <linux/swap.h> | ||
57 | #include <linux/pm.h> | ||
58 | #include <linux/device.h> | ||
59 | #include <linux/buffer_head.h> | ||
60 | #include <linux/swapops.h> | ||
61 | #include <linux/bootmem.h> | ||
62 | #include <linux/syscalls.h> | ||
63 | #include <linux/console.h> | ||
64 | #include <linux/highmem.h> | ||
65 | #include <linux/bio.h> | ||
66 | |||
67 | #include <asm/uaccess.h> | ||
68 | #include <asm/mmu_context.h> | ||
69 | #include <asm/pgtable.h> | ||
70 | #include <asm/tlbflush.h> | ||
71 | #include <asm/io.h> | ||
72 | |||
73 | #include "power.h" | ||
74 | |||
75 | /* References to section boundaries */ | ||
76 | extern const void __nosave_begin, __nosave_end; | ||
77 | |||
78 | /* Variables to be preserved over suspend */ | ||
79 | static int nr_copy_pages_check; | ||
80 | |||
81 | extern char resume_file[]; | ||
82 | |||
83 | /* Local variables that should not be affected by save */ | ||
84 | unsigned int nr_copy_pages __nosavedata = 0; | ||
85 | |||
86 | /* Suspend pagedir is allocated before final copy, therefore it | ||
87 | must be freed after resume | ||
88 | |||
89 | Warning: this is evil. There are actually two pagedirs at time of | ||
90 | resume. One is "pagedir_save", which is empty frame allocated at | ||
91 | time of suspend, that must be freed. Second is "pagedir_nosave", | ||
92 | allocated at time of resume, that travels through memory not to | ||
93 | collide with anything. | ||
94 | |||
95 | Warning: this is even more evil than it seems. Pagedirs this file | ||
96 | talks about are completely different from page directories used by | ||
97 | MMU hardware. | ||
98 | */ | ||
99 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; | ||
100 | static suspend_pagedir_t *pagedir_save; | ||
101 | |||
102 | #define SWSUSP_SIG "S1SUSPEND" | ||
103 | |||
104 | static struct swsusp_header { | ||
105 | char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; | ||
106 | swp_entry_t swsusp_info; | ||
107 | char orig_sig[10]; | ||
108 | char sig[10]; | ||
109 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; | ||
110 | |||
111 | static struct swsusp_info swsusp_info; | ||
112 | |||
113 | /* | ||
114 | * XXX: We try to keep some more pages free so that I/O operations succeed | ||
115 | * without paging. Might this be more? | ||
116 | */ | ||
117 | #define PAGES_FOR_IO 512 | ||
118 | |||
119 | /* | ||
120 | * Saving part... | ||
121 | */ | ||
122 | |||
123 | /* We memorize in swapfile_used what swap devices are used for suspension */ | ||
124 | #define SWAPFILE_UNUSED 0 | ||
125 | #define SWAPFILE_SUSPEND 1 /* This is the suspending device */ | ||
126 | #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ | ||
127 | |||
128 | static unsigned short swapfile_used[MAX_SWAPFILES]; | ||
129 | static unsigned short root_swap; | ||
130 | |||
131 | static int mark_swapfiles(swp_entry_t prev) | ||
132 | { | ||
133 | int error; | ||
134 | |||
135 | rw_swap_page_sync(READ, | ||
136 | swp_entry(root_swap, 0), | ||
137 | virt_to_page((unsigned long)&swsusp_header)); | ||
138 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | ||
139 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | ||
140 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | ||
141 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | ||
142 | swsusp_header.swsusp_info = prev; | ||
143 | error = rw_swap_page_sync(WRITE, | ||
144 | swp_entry(root_swap, 0), | ||
145 | virt_to_page((unsigned long) | ||
146 | &swsusp_header)); | ||
147 | } else { | ||
148 | pr_debug("swsusp: Partition is not swap space.\n"); | ||
149 | error = -ENODEV; | ||
150 | } | ||
151 | return error; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Check whether the swap device is the specified resume | ||
156 | * device, irrespective of whether they are specified by | ||
157 | * identical names. | ||
158 | * | ||
159 | * (Thus, device inode aliasing is allowed. You can say /dev/hda4 | ||
160 | * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] | ||
161 | * and they'll be considered the same device. This is *necessary* for | ||
162 | * devfs, since the resume code can only recognize the form /dev/hda4, | ||
163 | * but the suspend code would see the long name.) | ||
164 | */ | ||
165 | static int is_resume_device(const struct swap_info_struct *swap_info) | ||
166 | { | ||
167 | struct file *file = swap_info->swap_file; | ||
168 | struct inode *inode = file->f_dentry->d_inode; | ||
169 | |||
170 | return S_ISBLK(inode->i_mode) && | ||
171 | swsusp_resume_device == MKDEV(imajor(inode), iminor(inode)); | ||
172 | } | ||
173 | |||
174 | static int swsusp_swap_check(void) /* This is called before saving image */ | ||
175 | { | ||
176 | int i, len; | ||
177 | |||
178 | len=strlen(resume_file); | ||
179 | root_swap = 0xFFFF; | ||
180 | |||
181 | swap_list_lock(); | ||
182 | for(i=0; i<MAX_SWAPFILES; i++) { | ||
183 | if (swap_info[i].flags == 0) { | ||
184 | swapfile_used[i]=SWAPFILE_UNUSED; | ||
185 | } else { | ||
186 | if(!len) { | ||
187 | printk(KERN_WARNING "resume= option should be used to set suspend device" ); | ||
188 | if(root_swap == 0xFFFF) { | ||
189 | swapfile_used[i] = SWAPFILE_SUSPEND; | ||
190 | root_swap = i; | ||
191 | } else | ||
192 | swapfile_used[i] = SWAPFILE_IGNORED; | ||
193 | } else { | ||
194 | /* we ignore all swap devices that are not the resume_file */ | ||
195 | if (is_resume_device(&swap_info[i])) { | ||
196 | swapfile_used[i] = SWAPFILE_SUSPEND; | ||
197 | root_swap = i; | ||
198 | } else { | ||
199 | swapfile_used[i] = SWAPFILE_IGNORED; | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | } | ||
204 | swap_list_unlock(); | ||
205 | return (root_swap != 0xffff) ? 0 : -ENODEV; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * This is called after saving image so modification | ||
210 | * will be lost after resume... and that's what we want. | ||
211 | * we make the device unusable. A new call to | ||
212 | * lock_swapdevices can unlock the devices. | ||
213 | */ | ||
214 | static void lock_swapdevices(void) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | swap_list_lock(); | ||
219 | for(i = 0; i< MAX_SWAPFILES; i++) | ||
220 | if(swapfile_used[i] == SWAPFILE_IGNORED) { | ||
221 | swap_info[i].flags ^= 0xFF; | ||
222 | } | ||
223 | swap_list_unlock(); | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * write_swap_page - Write one page to a fresh swap location. | ||
228 | * @addr: Address we're writing. | ||
229 | * @loc: Place to store the entry we used. | ||
230 | * | ||
231 | * Allocate a new swap entry and 'sync' it. Note we discard -EIO | ||
232 | * errors. That is an artifact left over from swsusp. It did not | ||
233 | * check the return of rw_swap_page_sync() at all, since most pages | ||
234 | * written back to swap would return -EIO. | ||
235 | * This is a partial improvement, since we will at least return other | ||
236 | * errors, though we need to eventually fix the damn code. | ||
237 | */ | ||
238 | static int write_page(unsigned long addr, swp_entry_t * loc) | ||
239 | { | ||
240 | swp_entry_t entry; | ||
241 | int error = 0; | ||
242 | |||
243 | entry = get_swap_page(); | ||
244 | if (swp_offset(entry) && | ||
245 | swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { | ||
246 | error = rw_swap_page_sync(WRITE, entry, | ||
247 | virt_to_page(addr)); | ||
248 | if (error == -EIO) | ||
249 | error = 0; | ||
250 | if (!error) | ||
251 | *loc = entry; | ||
252 | } else | ||
253 | error = -ENOSPC; | ||
254 | return error; | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * data_free - Free the swap entries used by the saved image. | ||
259 | * | ||
260 | * Walk the list of used swap entries and free each one. | ||
261 | * This is only used for cleanup when suspend fails. | ||
262 | */ | ||
263 | static void data_free(void) | ||
264 | { | ||
265 | swp_entry_t entry; | ||
266 | int i; | ||
267 | |||
268 | for (i = 0; i < nr_copy_pages; i++) { | ||
269 | entry = (pagedir_nosave + i)->swap_address; | ||
270 | if (entry.val) | ||
271 | swap_free(entry); | ||
272 | else | ||
273 | break; | ||
274 | (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * data_write - Write saved image to swap. | ||
280 | * | ||
281 | * Walk the list of pages in the image and sync each one to swap. | ||
282 | */ | ||
283 | static int data_write(void) | ||
284 | { | ||
285 | int error = 0, i = 0; | ||
286 | unsigned int mod = nr_copy_pages / 100; | ||
287 | struct pbe *p; | ||
288 | |||
289 | if (!mod) | ||
290 | mod = 1; | ||
291 | |||
292 | printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); | ||
293 | for_each_pbe(p, pagedir_nosave) { | ||
294 | if (!(i%mod)) | ||
295 | printk( "\b\b\b\b%3d%%", i / mod ); | ||
296 | if ((error = write_page(p->address, &(p->swap_address)))) | ||
297 | return error; | ||
298 | i++; | ||
299 | } | ||
300 | printk("\b\b\b\bdone\n"); | ||
301 | return error; | ||
302 | } | ||
303 | |||
304 | static void dump_info(void) | ||
305 | { | ||
306 | pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); | ||
307 | pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); | ||
308 | pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); | ||
309 | pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); | ||
310 | pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); | ||
311 | pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); | ||
312 | pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); | ||
313 | pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); | ||
314 | pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); | ||
315 | pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); | ||
316 | pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); | ||
317 | } | ||
318 | |||
319 | static void init_header(void) | ||
320 | { | ||
321 | memset(&swsusp_info, 0, sizeof(swsusp_info)); | ||
322 | swsusp_info.version_code = LINUX_VERSION_CODE; | ||
323 | swsusp_info.num_physpages = num_physpages; | ||
324 | memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); | ||
325 | |||
326 | swsusp_info.suspend_pagedir = pagedir_nosave; | ||
327 | swsusp_info.cpus = num_online_cpus(); | ||
328 | swsusp_info.image_pages = nr_copy_pages; | ||
329 | } | ||
330 | |||
331 | static int close_swap(void) | ||
332 | { | ||
333 | swp_entry_t entry; | ||
334 | int error; | ||
335 | |||
336 | dump_info(); | ||
337 | error = write_page((unsigned long)&swsusp_info, &entry); | ||
338 | if (!error) { | ||
339 | printk( "S" ); | ||
340 | error = mark_swapfiles(entry); | ||
341 | printk( "|\n" ); | ||
342 | } | ||
343 | return error; | ||
344 | } | ||
345 | |||
346 | /** | ||
347 | * free_pagedir_entries - Free pages used by the page directory. | ||
348 | * | ||
349 | * This is used during suspend for error recovery. | ||
350 | */ | ||
351 | |||
352 | static void free_pagedir_entries(void) | ||
353 | { | ||
354 | int i; | ||
355 | |||
356 | for (i = 0; i < swsusp_info.pagedir_pages; i++) | ||
357 | swap_free(swsusp_info.pagedir[i]); | ||
358 | } | ||
359 | |||
360 | |||
361 | /** | ||
362 | * write_pagedir - Write the array of pages holding the page directory. | ||
363 | * @last: Last swap entry we write (needed for header). | ||
364 | */ | ||
365 | |||
366 | static int write_pagedir(void) | ||
367 | { | ||
368 | int error = 0; | ||
369 | unsigned n = 0; | ||
370 | struct pbe * pbe; | ||
371 | |||
372 | printk( "Writing pagedir..."); | ||
373 | for_each_pb_page(pbe, pagedir_nosave) { | ||
374 | if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) | ||
375 | return error; | ||
376 | } | ||
377 | |||
378 | swsusp_info.pagedir_pages = n; | ||
379 | printk("done (%u pages)\n", n); | ||
380 | return error; | ||
381 | } | ||
382 | |||
383 | /** | ||
384 | * write_suspend_image - Write entire image and metadata. | ||
385 | * | ||
386 | */ | ||
387 | |||
388 | static int write_suspend_image(void) | ||
389 | { | ||
390 | int error; | ||
391 | |||
392 | init_header(); | ||
393 | if ((error = data_write())) | ||
394 | goto FreeData; | ||
395 | |||
396 | if ((error = write_pagedir())) | ||
397 | goto FreePagedir; | ||
398 | |||
399 | if ((error = close_swap())) | ||
400 | goto FreePagedir; | ||
401 | Done: | ||
402 | return error; | ||
403 | FreePagedir: | ||
404 | free_pagedir_entries(); | ||
405 | FreeData: | ||
406 | data_free(); | ||
407 | goto Done; | ||
408 | } | ||
409 | |||
410 | |||
411 | #ifdef CONFIG_HIGHMEM | ||
412 | struct highmem_page { | ||
413 | char *data; | ||
414 | struct page *page; | ||
415 | struct highmem_page *next; | ||
416 | }; | ||
417 | |||
418 | static struct highmem_page *highmem_copy; | ||
419 | |||
420 | static int save_highmem_zone(struct zone *zone) | ||
421 | { | ||
422 | unsigned long zone_pfn; | ||
423 | mark_free_pages(zone); | ||
424 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | ||
425 | struct page *page; | ||
426 | struct highmem_page *save; | ||
427 | void *kaddr; | ||
428 | unsigned long pfn = zone_pfn + zone->zone_start_pfn; | ||
429 | |||
430 | if (!(pfn%1000)) | ||
431 | printk("."); | ||
432 | if (!pfn_valid(pfn)) | ||
433 | continue; | ||
434 | page = pfn_to_page(pfn); | ||
435 | /* | ||
436 | * This condition results from rvmalloc() sans vmalloc_32() | ||
437 | * and architectural memory reservations. This should be | ||
438 | * corrected eventually when the cases giving rise to this | ||
439 | * are better understood. | ||
440 | */ | ||
441 | if (PageReserved(page)) { | ||
442 | printk("highmem reserved page?!\n"); | ||
443 | continue; | ||
444 | } | ||
445 | BUG_ON(PageNosave(page)); | ||
446 | if (PageNosaveFree(page)) | ||
447 | continue; | ||
448 | save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); | ||
449 | if (!save) | ||
450 | return -ENOMEM; | ||
451 | save->next = highmem_copy; | ||
452 | save->page = page; | ||
453 | save->data = (void *) get_zeroed_page(GFP_ATOMIC); | ||
454 | if (!save->data) { | ||
455 | kfree(save); | ||
456 | return -ENOMEM; | ||
457 | } | ||
458 | kaddr = kmap_atomic(page, KM_USER0); | ||
459 | memcpy(save->data, kaddr, PAGE_SIZE); | ||
460 | kunmap_atomic(kaddr, KM_USER0); | ||
461 | highmem_copy = save; | ||
462 | } | ||
463 | return 0; | ||
464 | } | ||
465 | #endif /* CONFIG_HIGHMEM */ | ||
466 | |||
467 | |||
468 | static int save_highmem(void) | ||
469 | { | ||
470 | #ifdef CONFIG_HIGHMEM | ||
471 | struct zone *zone; | ||
472 | int res = 0; | ||
473 | |||
474 | pr_debug("swsusp: Saving Highmem\n"); | ||
475 | for_each_zone(zone) { | ||
476 | if (is_highmem(zone)) | ||
477 | res = save_highmem_zone(zone); | ||
478 | if (res) | ||
479 | return res; | ||
480 | } | ||
481 | #endif | ||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | static int restore_highmem(void) | ||
486 | { | ||
487 | #ifdef CONFIG_HIGHMEM | ||
488 | printk("swsusp: Restoring Highmem\n"); | ||
489 | while (highmem_copy) { | ||
490 | struct highmem_page *save = highmem_copy; | ||
491 | void *kaddr; | ||
492 | highmem_copy = save->next; | ||
493 | |||
494 | kaddr = kmap_atomic(save->page, KM_USER0); | ||
495 | memcpy(kaddr, save->data, PAGE_SIZE); | ||
496 | kunmap_atomic(kaddr, KM_USER0); | ||
497 | free_page((long) save->data); | ||
498 | kfree(save); | ||
499 | } | ||
500 | #endif | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | |||
505 | static int pfn_is_nosave(unsigned long pfn) | ||
506 | { | ||
507 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | ||
508 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | ||
509 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
510 | } | ||
511 | |||
512 | /** | ||
513 | * saveable - Determine whether a page should be cloned or not. | ||
514 | * @pfn: The page | ||
515 | * | ||
516 | * We save a page if it's Reserved, and not in the range of pages | ||
517 | * statically defined as 'unsaveable', or if it isn't reserved, and | ||
518 | * isn't part of a free chunk of pages. | ||
519 | */ | ||
520 | |||
521 | static int saveable(struct zone * zone, unsigned long * zone_pfn) | ||
522 | { | ||
523 | unsigned long pfn = *zone_pfn + zone->zone_start_pfn; | ||
524 | struct page * page; | ||
525 | |||
526 | if (!pfn_valid(pfn)) | ||
527 | return 0; | ||
528 | |||
529 | page = pfn_to_page(pfn); | ||
530 | BUG_ON(PageReserved(page) && PageNosave(page)); | ||
531 | if (PageNosave(page)) | ||
532 | return 0; | ||
533 | if (PageReserved(page) && pfn_is_nosave(pfn)) { | ||
534 | pr_debug("[nosave pfn 0x%lx]", pfn); | ||
535 | return 0; | ||
536 | } | ||
537 | if (PageNosaveFree(page)) | ||
538 | return 0; | ||
539 | |||
540 | return 1; | ||
541 | } | ||
542 | |||
543 | static void count_data_pages(void) | ||
544 | { | ||
545 | struct zone *zone; | ||
546 | unsigned long zone_pfn; | ||
547 | |||
548 | nr_copy_pages = 0; | ||
549 | |||
550 | for_each_zone(zone) { | ||
551 | if (is_highmem(zone)) | ||
552 | continue; | ||
553 | mark_free_pages(zone); | ||
554 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | ||
555 | nr_copy_pages += saveable(zone, &zone_pfn); | ||
556 | } | ||
557 | } | ||
558 | |||
559 | |||
560 | static void copy_data_pages(void) | ||
561 | { | ||
562 | struct zone *zone; | ||
563 | unsigned long zone_pfn; | ||
564 | struct pbe * pbe = pagedir_nosave; | ||
565 | |||
566 | pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages); | ||
567 | for_each_zone(zone) { | ||
568 | if (is_highmem(zone)) | ||
569 | continue; | ||
570 | mark_free_pages(zone); | ||
571 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | ||
572 | if (saveable(zone, &zone_pfn)) { | ||
573 | struct page * page; | ||
574 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | ||
575 | BUG_ON(!pbe); | ||
576 | pbe->orig_address = (long) page_address(page); | ||
577 | /* copy_page is not usable for copying task structs. */ | ||
578 | memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); | ||
579 | pbe = pbe->next; | ||
580 | } | ||
581 | } | ||
582 | } | ||
583 | BUG_ON(pbe); | ||
584 | } | ||
585 | |||
586 | |||
587 | /** | ||
588 | * calc_nr - Determine the number of pages needed for a pbe list. | ||
589 | */ | ||
590 | |||
591 | static int calc_nr(int nr_copy) | ||
592 | { | ||
593 | int extra = 0; | ||
594 | int mod = !!(nr_copy % PBES_PER_PAGE); | ||
595 | int diff = (nr_copy / PBES_PER_PAGE) + mod; | ||
596 | |||
597 | do { | ||
598 | extra += diff; | ||
599 | nr_copy += diff; | ||
600 | mod = !!(nr_copy % PBES_PER_PAGE); | ||
601 | diff = (nr_copy / PBES_PER_PAGE) + mod - extra; | ||
602 | } while (diff > 0); | ||
603 | |||
604 | return nr_copy; | ||
605 | } | ||
606 | |||
607 | /** | ||
608 | * free_pagedir - free pages allocated with alloc_pagedir() | ||
609 | */ | ||
610 | |||
611 | static inline void free_pagedir(struct pbe *pblist) | ||
612 | { | ||
613 | struct pbe *pbe; | ||
614 | |||
615 | while (pblist) { | ||
616 | pbe = (pblist + PB_PAGE_SKIP)->next; | ||
617 | free_page((unsigned long)pblist); | ||
618 | pblist = pbe; | ||
619 | } | ||
620 | } | ||
621 | |||
622 | /** | ||
623 | * fill_pb_page - Create a list of PBEs on a given memory page | ||
624 | */ | ||
625 | |||
626 | static inline void fill_pb_page(struct pbe *pbpage) | ||
627 | { | ||
628 | struct pbe *p; | ||
629 | |||
630 | p = pbpage; | ||
631 | pbpage += PB_PAGE_SKIP; | ||
632 | do | ||
633 | p->next = p + 1; | ||
634 | while (++p < pbpage); | ||
635 | } | ||
636 | |||
637 | /** | ||
638 | * create_pbe_list - Create a list of PBEs on top of a given chain | ||
639 | * of memory pages allocated with alloc_pagedir() | ||
640 | */ | ||
641 | |||
642 | static void create_pbe_list(struct pbe *pblist, unsigned nr_pages) | ||
643 | { | ||
644 | struct pbe *pbpage, *p; | ||
645 | unsigned num = PBES_PER_PAGE; | ||
646 | |||
647 | for_each_pb_page (pbpage, pblist) { | ||
648 | if (num >= nr_pages) | ||
649 | break; | ||
650 | |||
651 | fill_pb_page(pbpage); | ||
652 | num += PBES_PER_PAGE; | ||
653 | } | ||
654 | if (pbpage) { | ||
655 | for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) | ||
656 | p->next = p + 1; | ||
657 | p->next = NULL; | ||
658 | } | ||
659 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * alloc_pagedir - Allocate the page directory. | ||
664 | * | ||
665 | * First, determine exactly how many pages we need and | ||
666 | * allocate them. | ||
667 | * | ||
668 | * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE | ||
669 | * struct pbe elements (pbes) and the last element in the page points | ||
670 | * to the next page. | ||
671 | * | ||
672 | * On each page we set up a list of struct_pbe elements. | ||
673 | */ | ||
674 | |||
675 | static struct pbe * alloc_pagedir(unsigned nr_pages) | ||
676 | { | ||
677 | unsigned num; | ||
678 | struct pbe *pblist, *pbe; | ||
679 | |||
680 | if (!nr_pages) | ||
681 | return NULL; | ||
682 | |||
683 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); | ||
684 | pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | ||
685 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | ||
686 | pbe = pbe->next, num += PBES_PER_PAGE) { | ||
687 | pbe += PB_PAGE_SKIP; | ||
688 | pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | ||
689 | } | ||
690 | if (!pbe) { /* get_zeroed_page() failed */ | ||
691 | free_pagedir(pblist); | ||
692 | pblist = NULL; | ||
693 | } | ||
694 | return pblist; | ||
695 | } | ||
696 | |||
697 | /** | ||
698 | * free_image_pages - Free pages allocated for snapshot | ||
699 | */ | ||
700 | |||
701 | static void free_image_pages(void) | ||
702 | { | ||
703 | struct pbe * p; | ||
704 | |||
705 | for_each_pbe(p, pagedir_save) { | ||
706 | if (p->address) { | ||
707 | ClearPageNosave(virt_to_page(p->address)); | ||
708 | free_page(p->address); | ||
709 | p->address = 0; | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | /** | ||
715 | * alloc_image_pages - Allocate pages for the snapshot. | ||
716 | */ | ||
717 | |||
718 | static int alloc_image_pages(void) | ||
719 | { | ||
720 | struct pbe * p; | ||
721 | |||
722 | for_each_pbe(p, pagedir_save) { | ||
723 | p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | ||
724 | if (!p->address) | ||
725 | return -ENOMEM; | ||
726 | SetPageNosave(virt_to_page(p->address)); | ||
727 | } | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | void swsusp_free(void) | ||
732 | { | ||
733 | BUG_ON(PageNosave(virt_to_page(pagedir_save))); | ||
734 | BUG_ON(PageNosaveFree(virt_to_page(pagedir_save))); | ||
735 | free_image_pages(); | ||
736 | free_pagedir(pagedir_save); | ||
737 | } | ||
738 | |||
739 | |||
740 | /** | ||
741 | * enough_free_mem - Make sure we enough free memory to snapshot. | ||
742 | * | ||
743 | * Returns TRUE or FALSE after checking the number of available | ||
744 | * free pages. | ||
745 | */ | ||
746 | |||
747 | static int enough_free_mem(void) | ||
748 | { | ||
749 | if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) { | ||
750 | pr_debug("swsusp: Not enough free pages: Have %d\n", | ||
751 | nr_free_pages()); | ||
752 | return 0; | ||
753 | } | ||
754 | return 1; | ||
755 | } | ||
756 | |||
757 | |||
758 | /** | ||
759 | * enough_swap - Make sure we have enough swap to save the image. | ||
760 | * | ||
761 | * Returns TRUE or FALSE after checking the total amount of swap | ||
762 | * space avaiable. | ||
763 | * | ||
764 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
765 | * We should only consider resume_device. | ||
766 | */ | ||
767 | |||
768 | static int enough_swap(void) | ||
769 | { | ||
770 | struct sysinfo i; | ||
771 | |||
772 | si_swapinfo(&i); | ||
773 | if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) { | ||
774 | pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap); | ||
775 | return 0; | ||
776 | } | ||
777 | return 1; | ||
778 | } | ||
779 | |||
780 | static int swsusp_alloc(void) | ||
781 | { | ||
782 | int error; | ||
783 | |||
784 | pr_debug("suspend: (pages needed: %d + %d free: %d)\n", | ||
785 | nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); | ||
786 | |||
787 | pagedir_nosave = NULL; | ||
788 | if (!enough_free_mem()) | ||
789 | return -ENOMEM; | ||
790 | |||
791 | if (!enough_swap()) | ||
792 | return -ENOSPC; | ||
793 | |||
794 | nr_copy_pages = calc_nr(nr_copy_pages); | ||
795 | |||
796 | if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { | ||
797 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); | ||
798 | return -ENOMEM; | ||
799 | } | ||
800 | create_pbe_list(pagedir_save, nr_copy_pages); | ||
801 | pagedir_nosave = pagedir_save; | ||
802 | if ((error = alloc_image_pages())) { | ||
803 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | ||
804 | swsusp_free(); | ||
805 | return error; | ||
806 | } | ||
807 | |||
808 | nr_copy_pages_check = nr_copy_pages; | ||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | static int suspend_prepare_image(void) | ||
813 | { | ||
814 | int error; | ||
815 | |||
816 | pr_debug("swsusp: critical section: \n"); | ||
817 | if (save_highmem()) { | ||
818 | printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n"); | ||
819 | restore_highmem(); | ||
820 | return -ENOMEM; | ||
821 | } | ||
822 | |||
823 | drain_local_pages(); | ||
824 | count_data_pages(); | ||
825 | printk("swsusp: Need to copy %u pages\n", nr_copy_pages); | ||
826 | |||
827 | error = swsusp_alloc(); | ||
828 | if (error) | ||
829 | return error; | ||
830 | |||
831 | /* During allocating of suspend pagedir, new cold pages may appear. | ||
832 | * Kill them. | ||
833 | */ | ||
834 | drain_local_pages(); | ||
835 | copy_data_pages(); | ||
836 | |||
837 | /* | ||
838 | * End of critical section. From now on, we can write to memory, | ||
839 | * but we should not touch disk. This specially means we must _not_ | ||
840 | * touch swap space! Except we must write out our image of course. | ||
841 | */ | ||
842 | |||
843 | printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages ); | ||
844 | return 0; | ||
845 | } | ||
846 | |||
847 | |||
848 | /* It is important _NOT_ to umount filesystems at this point. We want | ||
849 | * them synced (in case something goes wrong) but we DO not want to mark | ||
850 | * filesystem clean: it is not. (And it does not matter, if we resume | ||
851 | * correctly, we'll mark system clean, anyway.) | ||
852 | */ | ||
853 | int swsusp_write(void) | ||
854 | { | ||
855 | int error; | ||
856 | device_resume(); | ||
857 | lock_swapdevices(); | ||
858 | error = write_suspend_image(); | ||
859 | /* This will unlock ignored swap devices since writing is finished */ | ||
860 | lock_swapdevices(); | ||
861 | return error; | ||
862 | |||
863 | } | ||
864 | |||
865 | |||
866 | extern asmlinkage int swsusp_arch_suspend(void); | ||
867 | extern asmlinkage int swsusp_arch_resume(void); | ||
868 | |||
869 | |||
870 | asmlinkage int swsusp_save(void) | ||
871 | { | ||
872 | int error = 0; | ||
873 | |||
874 | if ((error = swsusp_swap_check())) { | ||
875 | printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try " | ||
876 | "swapon -a!\n"); | ||
877 | return error; | ||
878 | } | ||
879 | return suspend_prepare_image(); | ||
880 | } | ||
881 | |||
882 | int swsusp_suspend(void) | ||
883 | { | ||
884 | int error; | ||
885 | if ((error = arch_prepare_suspend())) | ||
886 | return error; | ||
887 | local_irq_disable(); | ||
888 | /* At this point, device_suspend() has been called, but *not* | ||
889 | * device_power_down(). We *must* device_power_down() now. | ||
890 | * Otherwise, drivers for some devices (e.g. interrupt controllers) | ||
891 | * become desynchronized with the actual state of the hardware | ||
892 | * at resume time, and evil weirdness ensues. | ||
893 | */ | ||
894 | if ((error = device_power_down(PMSG_FREEZE))) { | ||
895 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); | ||
896 | local_irq_enable(); | ||
897 | swsusp_free(); | ||
898 | return error; | ||
899 | } | ||
900 | save_processor_state(); | ||
901 | if ((error = swsusp_arch_suspend())) | ||
902 | swsusp_free(); | ||
903 | /* Restore control flow magically appears here */ | ||
904 | restore_processor_state(); | ||
905 | BUG_ON (nr_copy_pages_check != nr_copy_pages); | ||
906 | restore_highmem(); | ||
907 | device_power_up(); | ||
908 | local_irq_enable(); | ||
909 | return error; | ||
910 | } | ||
911 | |||
912 | int swsusp_resume(void) | ||
913 | { | ||
914 | int error; | ||
915 | local_irq_disable(); | ||
916 | if (device_power_down(PMSG_FREEZE)) | ||
917 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); | ||
918 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | ||
919 | save_processor_state(); | ||
920 | error = swsusp_arch_resume(); | ||
921 | /* Code below is only ever reached in case of failure. Otherwise | ||
922 | * execution continues at place where swsusp_arch_suspend was called | ||
923 | */ | ||
924 | BUG_ON(!error); | ||
925 | restore_processor_state(); | ||
926 | restore_highmem(); | ||
927 | device_power_up(); | ||
928 | local_irq_enable(); | ||
929 | return error; | ||
930 | } | ||
931 | |||
932 | /* More restore stuff */ | ||
933 | |||
934 | /* | ||
935 | * Returns true if given address/order collides with any orig_address | ||
936 | */ | ||
937 | static int does_collide_order(unsigned long addr, int order) | ||
938 | { | ||
939 | int i; | ||
940 | |||
941 | for (i=0; i < (1<<order); i++) | ||
942 | if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE))) | ||
943 | return 1; | ||
944 | return 0; | ||
945 | } | ||
946 | |||
947 | /** | ||
948 | * On resume, for storing the PBE list and the image, | ||
949 | * we can only use memory pages that do not conflict with the pages | ||
950 | * which had been used before suspend. | ||
951 | * | ||
952 | * We don't know which pages are usable until we allocate them. | ||
953 | * | ||
954 | * Allocated but unusable (ie eaten) memory pages are linked together | ||
955 | * to create a list, so that we can free them easily | ||
956 | * | ||
957 | * We could have used a type other than (void *) | ||
958 | * for this purpose, but ... | ||
959 | */ | ||
960 | static void **eaten_memory = NULL; | ||
961 | |||
962 | static inline void eat_page(void *page) | ||
963 | { | ||
964 | void **c; | ||
965 | |||
966 | c = eaten_memory; | ||
967 | eaten_memory = page; | ||
968 | *eaten_memory = c; | ||
969 | } | ||
970 | |||
971 | static unsigned long get_usable_page(unsigned gfp_mask) | ||
972 | { | ||
973 | unsigned long m; | ||
974 | |||
975 | m = get_zeroed_page(gfp_mask); | ||
976 | while (does_collide_order(m, 0)) { | ||
977 | eat_page((void *)m); | ||
978 | m = get_zeroed_page(gfp_mask); | ||
979 | if (!m) | ||
980 | break; | ||
981 | } | ||
982 | return m; | ||
983 | } | ||
984 | |||
985 | static void free_eaten_memory(void) | ||
986 | { | ||
987 | unsigned long m; | ||
988 | void **c; | ||
989 | int i = 0; | ||
990 | |||
991 | c = eaten_memory; | ||
992 | while (c) { | ||
993 | m = (unsigned long)c; | ||
994 | c = *c; | ||
995 | free_page(m); | ||
996 | i++; | ||
997 | } | ||
998 | eaten_memory = NULL; | ||
999 | pr_debug("swsusp: %d unused pages freed\n", i); | ||
1000 | } | ||
1001 | |||
1002 | /** | ||
1003 | * check_pagedir - We ensure here that pages that the PBEs point to | ||
1004 | * won't collide with pages where we're going to restore from the loaded | ||
1005 | * pages later | ||
1006 | */ | ||
1007 | |||
1008 | static int check_pagedir(struct pbe *pblist) | ||
1009 | { | ||
1010 | struct pbe *p; | ||
1011 | |||
1012 | /* This is necessary, so that we can free allocated pages | ||
1013 | * in case of failure | ||
1014 | */ | ||
1015 | for_each_pbe (p, pblist) | ||
1016 | p->address = 0UL; | ||
1017 | |||
1018 | for_each_pbe (p, pblist) { | ||
1019 | p->address = get_usable_page(GFP_ATOMIC); | ||
1020 | if (!p->address) | ||
1021 | return -ENOMEM; | ||
1022 | } | ||
1023 | return 0; | ||
1024 | } | ||
1025 | |||
1026 | /** | ||
1027 | * swsusp_pagedir_relocate - It is possible, that some memory pages | ||
1028 | * occupied by the list of PBEs collide with pages where we're going to | ||
1029 | * restore from the loaded pages later. We relocate them here. | ||
1030 | */ | ||
1031 | |||
1032 | static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) | ||
1033 | { | ||
1034 | struct zone *zone; | ||
1035 | unsigned long zone_pfn; | ||
1036 | struct pbe *pbpage, *tail, *p; | ||
1037 | void *m; | ||
1038 | int rel = 0, error = 0; | ||
1039 | |||
1040 | if (!pblist) /* a sanity check */ | ||
1041 | return NULL; | ||
1042 | |||
1043 | pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", | ||
1044 | swsusp_info.pagedir_pages); | ||
1045 | |||
1046 | /* Set page flags */ | ||
1047 | |||
1048 | for_each_zone(zone) { | ||
1049 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | ||
1050 | SetPageNosaveFree(pfn_to_page(zone_pfn + | ||
1051 | zone->zone_start_pfn)); | ||
1052 | } | ||
1053 | |||
1054 | /* Clear orig addresses */ | ||
1055 | |||
1056 | for_each_pbe (p, pblist) | ||
1057 | ClearPageNosaveFree(virt_to_page(p->orig_address)); | ||
1058 | |||
1059 | tail = pblist + PB_PAGE_SKIP; | ||
1060 | |||
1061 | /* Relocate colliding pages */ | ||
1062 | |||
1063 | for_each_pb_page (pbpage, pblist) { | ||
1064 | if (does_collide_order((unsigned long)pbpage, 0)) { | ||
1065 | m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD); | ||
1066 | if (!m) { | ||
1067 | error = -ENOMEM; | ||
1068 | break; | ||
1069 | } | ||
1070 | memcpy(m, (void *)pbpage, PAGE_SIZE); | ||
1071 | if (pbpage == pblist) | ||
1072 | pblist = (struct pbe *)m; | ||
1073 | else | ||
1074 | tail->next = (struct pbe *)m; | ||
1075 | |||
1076 | eat_page((void *)pbpage); | ||
1077 | pbpage = (struct pbe *)m; | ||
1078 | |||
1079 | /* We have to link the PBEs again */ | ||
1080 | |||
1081 | for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) | ||
1082 | if (p->next) /* needed to save the end */ | ||
1083 | p->next = p + 1; | ||
1084 | |||
1085 | rel++; | ||
1086 | } | ||
1087 | tail = pbpage + PB_PAGE_SKIP; | ||
1088 | } | ||
1089 | |||
1090 | if (error) { | ||
1091 | printk("\nswsusp: Out of memory\n\n"); | ||
1092 | free_pagedir(pblist); | ||
1093 | free_eaten_memory(); | ||
1094 | pblist = NULL; | ||
1095 | } | ||
1096 | else | ||
1097 | printk("swsusp: Relocated %d pages\n", rel); | ||
1098 | |||
1099 | return pblist; | ||
1100 | } | ||
1101 | |||
1102 | /** | ||
1103 | * Using bio to read from swap. | ||
1104 | * This code requires a bit more work than just using buffer heads | ||
1105 | * but, it is the recommended way for 2.5/2.6. | ||
1106 | * The following are to signal the beginning and end of I/O. Bios | ||
1107 | * finish asynchronously, while we want them to happen synchronously. | ||
1108 | * A simple atomic_t, and a wait loop take care of this problem. | ||
1109 | */ | ||
1110 | |||
1111 | static atomic_t io_done = ATOMIC_INIT(0); | ||
1112 | |||
1113 | static int end_io(struct bio * bio, unsigned int num, int err) | ||
1114 | { | ||
1115 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
1116 | panic("I/O error reading memory image"); | ||
1117 | atomic_set(&io_done, 0); | ||
1118 | return 0; | ||
1119 | } | ||
1120 | |||
1121 | static struct block_device * resume_bdev; | ||
1122 | |||
1123 | /** | ||
1124 | * submit - submit BIO request. | ||
1125 | * @rw: READ or WRITE. | ||
1126 | * @off physical offset of page. | ||
1127 | * @page: page we're reading or writing. | ||
1128 | * | ||
1129 | * Straight from the textbook - allocate and initialize the bio. | ||
1130 | * If we're writing, make sure the page is marked as dirty. | ||
1131 | * Then submit it and wait. | ||
1132 | */ | ||
1133 | |||
1134 | static int submit(int rw, pgoff_t page_off, void * page) | ||
1135 | { | ||
1136 | int error = 0; | ||
1137 | struct bio * bio; | ||
1138 | |||
1139 | bio = bio_alloc(GFP_ATOMIC, 1); | ||
1140 | if (!bio) | ||
1141 | return -ENOMEM; | ||
1142 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | ||
1143 | bio_get(bio); | ||
1144 | bio->bi_bdev = resume_bdev; | ||
1145 | bio->bi_end_io = end_io; | ||
1146 | |||
1147 | if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { | ||
1148 | printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); | ||
1149 | error = -EFAULT; | ||
1150 | goto Done; | ||
1151 | } | ||
1152 | |||
1153 | if (rw == WRITE) | ||
1154 | bio_set_pages_dirty(bio); | ||
1155 | |||
1156 | atomic_set(&io_done, 1); | ||
1157 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
1158 | while (atomic_read(&io_done)) | ||
1159 | yield(); | ||
1160 | |||
1161 | Done: | ||
1162 | bio_put(bio); | ||
1163 | return error; | ||
1164 | } | ||
1165 | |||
1166 | static int bio_read_page(pgoff_t page_off, void * page) | ||
1167 | { | ||
1168 | return submit(READ, page_off, page); | ||
1169 | } | ||
1170 | |||
1171 | static int bio_write_page(pgoff_t page_off, void * page) | ||
1172 | { | ||
1173 | return submit(WRITE, page_off, page); | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Sanity check if this image makes sense with this kernel/swap context | ||
1178 | * I really don't think that it's foolproof but more than nothing.. | ||
1179 | */ | ||
1180 | |||
1181 | static const char * sanity_check(void) | ||
1182 | { | ||
1183 | dump_info(); | ||
1184 | if(swsusp_info.version_code != LINUX_VERSION_CODE) | ||
1185 | return "kernel version"; | ||
1186 | if(swsusp_info.num_physpages != num_physpages) | ||
1187 | return "memory size"; | ||
1188 | if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) | ||
1189 | return "system type"; | ||
1190 | if (strcmp(swsusp_info.uts.release,system_utsname.release)) | ||
1191 | return "kernel release"; | ||
1192 | if (strcmp(swsusp_info.uts.version,system_utsname.version)) | ||
1193 | return "version"; | ||
1194 | if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) | ||
1195 | return "machine"; | ||
1196 | if(swsusp_info.cpus != num_online_cpus()) | ||
1197 | return "number of cpus"; | ||
1198 | return NULL; | ||
1199 | } | ||
1200 | |||
1201 | |||
1202 | static int check_header(void) | ||
1203 | { | ||
1204 | const char * reason = NULL; | ||
1205 | int error; | ||
1206 | |||
1207 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) | ||
1208 | return error; | ||
1209 | |||
1210 | /* Is this same machine? */ | ||
1211 | if ((reason = sanity_check())) { | ||
1212 | printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); | ||
1213 | return -EPERM; | ||
1214 | } | ||
1215 | nr_copy_pages = swsusp_info.image_pages; | ||
1216 | return error; | ||
1217 | } | ||
1218 | |||
1219 | static int check_sig(void) | ||
1220 | { | ||
1221 | int error; | ||
1222 | |||
1223 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | ||
1224 | if ((error = bio_read_page(0, &swsusp_header))) | ||
1225 | return error; | ||
1226 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | ||
1227 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | ||
1228 | |||
1229 | /* | ||
1230 | * Reset swap signature now. | ||
1231 | */ | ||
1232 | error = bio_write_page(0, &swsusp_header); | ||
1233 | } else { | ||
1234 | printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n"); | ||
1235 | return -EINVAL; | ||
1236 | } | ||
1237 | if (!error) | ||
1238 | pr_debug("swsusp: Signature found, resuming\n"); | ||
1239 | return error; | ||
1240 | } | ||
1241 | |||
1242 | /** | ||
1243 | * data_read - Read image pages from swap. | ||
1244 | * | ||
1245 | * You do not need to check for overlaps, check_pagedir() | ||
1246 | * already did that. | ||
1247 | */ | ||
1248 | |||
1249 | static int data_read(struct pbe *pblist) | ||
1250 | { | ||
1251 | struct pbe * p; | ||
1252 | int error = 0; | ||
1253 | int i = 0; | ||
1254 | int mod = swsusp_info.image_pages / 100; | ||
1255 | |||
1256 | if (!mod) | ||
1257 | mod = 1; | ||
1258 | |||
1259 | printk("swsusp: Reading image data (%lu pages): ", | ||
1260 | swsusp_info.image_pages); | ||
1261 | |||
1262 | for_each_pbe (p, pblist) { | ||
1263 | if (!(i % mod)) | ||
1264 | printk("\b\b\b\b%3d%%", i / mod); | ||
1265 | |||
1266 | error = bio_read_page(swp_offset(p->swap_address), | ||
1267 | (void *)p->address); | ||
1268 | if (error) | ||
1269 | return error; | ||
1270 | |||
1271 | i++; | ||
1272 | } | ||
1273 | printk("\b\b\b\bdone\n"); | ||
1274 | return error; | ||
1275 | } | ||
1276 | |||
1277 | extern dev_t name_to_dev_t(const char *line); | ||
1278 | |||
1279 | /** | ||
1280 | * read_pagedir - Read page backup list pages from swap | ||
1281 | */ | ||
1282 | |||
1283 | static int read_pagedir(struct pbe *pblist) | ||
1284 | { | ||
1285 | struct pbe *pbpage, *p; | ||
1286 | unsigned i = 0; | ||
1287 | int error; | ||
1288 | |||
1289 | if (!pblist) | ||
1290 | return -EFAULT; | ||
1291 | |||
1292 | printk("swsusp: Reading pagedir (%lu pages)\n", | ||
1293 | swsusp_info.pagedir_pages); | ||
1294 | |||
1295 | for_each_pb_page (pbpage, pblist) { | ||
1296 | unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); | ||
1297 | |||
1298 | error = -EFAULT; | ||
1299 | if (offset) { | ||
1300 | p = (pbpage + PB_PAGE_SKIP)->next; | ||
1301 | error = bio_read_page(offset, (void *)pbpage); | ||
1302 | (pbpage + PB_PAGE_SKIP)->next = p; | ||
1303 | } | ||
1304 | if (error) | ||
1305 | break; | ||
1306 | } | ||
1307 | |||
1308 | if (error) | ||
1309 | free_page((unsigned long)pblist); | ||
1310 | |||
1311 | BUG_ON(i != swsusp_info.pagedir_pages); | ||
1312 | |||
1313 | return error; | ||
1314 | } | ||
1315 | |||
1316 | |||
1317 | static int check_suspend_image(void) | ||
1318 | { | ||
1319 | int error = 0; | ||
1320 | |||
1321 | if ((error = check_sig())) | ||
1322 | return error; | ||
1323 | |||
1324 | if ((error = check_header())) | ||
1325 | return error; | ||
1326 | |||
1327 | return 0; | ||
1328 | } | ||
1329 | |||
1330 | static int read_suspend_image(void) | ||
1331 | { | ||
1332 | int error = 0; | ||
1333 | struct pbe *p; | ||
1334 | |||
1335 | if (!(p = alloc_pagedir(nr_copy_pages))) | ||
1336 | return -ENOMEM; | ||
1337 | |||
1338 | if ((error = read_pagedir(p))) | ||
1339 | return error; | ||
1340 | |||
1341 | create_pbe_list(p, nr_copy_pages); | ||
1342 | |||
1343 | if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) | ||
1344 | return -ENOMEM; | ||
1345 | |||
1346 | /* Allocate memory for the image and read the data from swap */ | ||
1347 | |||
1348 | error = check_pagedir(pagedir_nosave); | ||
1349 | free_eaten_memory(); | ||
1350 | if (!error) | ||
1351 | error = data_read(pagedir_nosave); | ||
1352 | |||
1353 | if (error) { /* We fail cleanly */ | ||
1354 | for_each_pbe (p, pagedir_nosave) | ||
1355 | if (p->address) { | ||
1356 | free_page(p->address); | ||
1357 | p->address = 0UL; | ||
1358 | } | ||
1359 | free_pagedir(pagedir_nosave); | ||
1360 | } | ||
1361 | return error; | ||
1362 | } | ||
1363 | |||
1364 | /** | ||
1365 | * swsusp_check - Check for saved image in swap | ||
1366 | */ | ||
1367 | |||
1368 | int swsusp_check(void) | ||
1369 | { | ||
1370 | int error; | ||
1371 | |||
1372 | if (!swsusp_resume_device) { | ||
1373 | if (!strlen(resume_file)) | ||
1374 | return -ENOENT; | ||
1375 | swsusp_resume_device = name_to_dev_t(resume_file); | ||
1376 | pr_debug("swsusp: Resume From Partition %s\n", resume_file); | ||
1377 | } else { | ||
1378 | pr_debug("swsusp: Resume From Partition %d:%d\n", | ||
1379 | MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); | ||
1380 | } | ||
1381 | |||
1382 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | ||
1383 | if (!IS_ERR(resume_bdev)) { | ||
1384 | set_blocksize(resume_bdev, PAGE_SIZE); | ||
1385 | error = check_suspend_image(); | ||
1386 | if (error) | ||
1387 | blkdev_put(resume_bdev); | ||
1388 | } else | ||
1389 | error = PTR_ERR(resume_bdev); | ||
1390 | |||
1391 | if (!error) | ||
1392 | pr_debug("swsusp: resume file found\n"); | ||
1393 | else | ||
1394 | pr_debug("swsusp: Error %d check for resume file\n", error); | ||
1395 | return error; | ||
1396 | } | ||
1397 | |||
1398 | /** | ||
1399 | * swsusp_read - Read saved image from swap. | ||
1400 | */ | ||
1401 | |||
1402 | int swsusp_read(void) | ||
1403 | { | ||
1404 | int error; | ||
1405 | |||
1406 | if (IS_ERR(resume_bdev)) { | ||
1407 | pr_debug("swsusp: block device not initialised\n"); | ||
1408 | return PTR_ERR(resume_bdev); | ||
1409 | } | ||
1410 | |||
1411 | error = read_suspend_image(); | ||
1412 | blkdev_put(resume_bdev); | ||
1413 | |||
1414 | if (!error) | ||
1415 | pr_debug("swsusp: Reading resume file was successful\n"); | ||
1416 | else | ||
1417 | pr_debug("swsusp: Error %d resuming\n", error); | ||
1418 | return error; | ||
1419 | } | ||
1420 | |||
1421 | /** | ||
1422 | * swsusp_close - close swap device. | ||
1423 | */ | ||
1424 | |||
1425 | void swsusp_close(void) | ||
1426 | { | ||
1427 | if (IS_ERR(resume_bdev)) { | ||
1428 | pr_debug("swsusp: block device not initialised\n"); | ||
1429 | return; | ||
1430 | } | ||
1431 | |||
1432 | blkdev_put(resume_bdev); | ||
1433 | } | ||