aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/power
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /kernel/power
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'kernel/power')
-rw-r--r--kernel/power/Kconfig74
-rw-r--r--kernel/power/Makefile11
-rw-r--r--kernel/power/console.c58
-rw-r--r--kernel/power/disk.c431
-rw-r--r--kernel/power/main.c269
-rw-r--r--kernel/power/pm.c265
-rw-r--r--kernel/power/power.h52
-rw-r--r--kernel/power/poweroff.c45
-rw-r--r--kernel/power/process.c121
-rw-r--r--kernel/power/smp.c85
-rw-r--r--kernel/power/swsusp.c1433
11 files changed, 2844 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
new file mode 100644
index 000000000000..696387ffe49c
--- /dev/null
+++ b/kernel/power/Kconfig
@@ -0,0 +1,74 @@
1config PM
2 bool "Power Management support"
3 ---help---
4 "Power Management" means that parts of your computer are shut
5 off or put into a power conserving "sleep" mode if they are not
6 being used. There are two competing standards for doing this: APM
7 and ACPI. If you want to use either one, say Y here and then also
8 to the requisite support below.
9
10 Power Management is most important for battery powered laptop
11 computers; if you have a laptop, check out the Linux Laptop home
12 page on the WWW at <http://www.linux-on-laptops.com/> or
13 Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
14 and the Battery Powered Linux mini-HOWTO, available from
15 <http://www.tldp.org/docs.html#howto>.
16
17 Note that, even if you say N here, Linux on the x86 architecture
18 will issue the hlt instruction if nothing is to be done, thereby
19 sending the processor to sleep and saving power.
20
21config PM_DEBUG
22 bool "Power Management Debug Support"
23 depends on PM
24 ---help---
25 This option enables verbose debugging support in the Power Management
26 code. This is helpful when debugging and reporting various PM bugs,
27 like suspend support.
28
29config SOFTWARE_SUSPEND
30 bool "Software Suspend (EXPERIMENTAL)"
31 depends on EXPERIMENTAL && PM && SWAP
32 ---help---
33 Enable the possibility of suspending the machine.
34 It doesn't need APM.
35 You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
36 (patch for sysvinit needed).
37
38 It creates an image which is saved in your active swap. Upon next
39 boot, pass the 'resume=/dev/swappartition' argument to the kernel to
40 have it detect the saved image, restore memory state from it, and
41 continue to run as before. If you do not want the previous state to
42 be reloaded, then use the 'noresume' kernel argument. However, note
43 that your partitions will be fsck'd and you must re-mkswap your swap
44 partitions. It does not work with swap files.
45
46 Right now you may boot without resuming and then later resume but
47 in meantime you cannot use those swap partitions/files which were
48 involved in suspending. Also in this case there is a risk that buffers
49 on disk won't match with saved ones.
50
51 For more information take a look at <file:Documentation/power/swsusp.txt>.
52
53config PM_STD_PARTITION
54 string "Default resume partition"
55 depends on SOFTWARE_SUSPEND
56 default ""
57 ---help---
58 The default resume partition is the partition that the suspend-
59 to-disk implementation will look for a suspended disk image.
60
61 The partition specified here will be different for almost every user.
62 It should be a valid swap partition (at least for now) that is turned
63 on before suspending.
64
65 The partition specified can be overridden by specifying:
66
67 resume=/dev/<other device>
68
69 which will set the resume partition to the device specified.
70
71 Note there is currently not a way to specify which device to save the
72 suspended image to. It will simply pick the first available swap
73 device.
74
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
new file mode 100644
index 000000000000..fbdc634135a7
--- /dev/null
+++ b/kernel/power/Makefile
@@ -0,0 +1,11 @@
1
2ifeq ($(CONFIG_PM_DEBUG),y)
3EXTRA_CFLAGS += -DDEBUG
4endif
5
6swsusp-smp-$(CONFIG_SMP) += smp.o
7
8obj-y := main.o process.o console.o pm.o
9obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
10
11obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
new file mode 100644
index 000000000000..7ff375e7c95f
--- /dev/null
+++ b/kernel/power/console.c
@@ -0,0 +1,58 @@
1/*
2 * drivers/power/process.c - Functions for saving/restoring console.
3 *
4 * Originally from swsusp.
5 */
6
7#include <linux/vt_kern.h>
8#include <linux/kbd_kern.h>
9#include <linux/console.h>
10#include "power.h"
11
12static int new_loglevel = 10;
13static int orig_loglevel;
14#ifdef SUSPEND_CONSOLE
15static int orig_fgconsole, orig_kmsg;
16#endif
17
18int pm_prepare_console(void)
19{
20 orig_loglevel = console_loglevel;
21 console_loglevel = new_loglevel;
22
23#ifdef SUSPEND_CONSOLE
24 acquire_console_sem();
25
26 orig_fgconsole = fg_console;
27
28 if (vc_allocate(SUSPEND_CONSOLE)) {
29 /* we can't have a free VC for now. Too bad,
30 * we don't want to mess the screen for now. */
31 release_console_sem();
32 return 1;
33 }
34
35 set_console(SUSPEND_CONSOLE);
36 release_console_sem();
37
38 if (vt_waitactive(SUSPEND_CONSOLE)) {
39 pr_debug("Suspend: Can't switch VCs.");
40 return 1;
41 }
42 orig_kmsg = kmsg_redirect;
43 kmsg_redirect = SUSPEND_CONSOLE;
44#endif
45 return 0;
46}
47
48void pm_restore_console(void)
49{
50 console_loglevel = orig_loglevel;
51#ifdef SUSPEND_CONSOLE
52 acquire_console_sem();
53 set_console(orig_fgconsole);
54 release_console_sem();
55 kmsg_redirect = orig_kmsg;
56#endif
57 return;
58}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
new file mode 100644
index 000000000000..02b6764034dc
--- /dev/null
+++ b/kernel/power/disk.c
@@ -0,0 +1,431 @@
1/*
2 * kernel/power/disk.c - Suspend-to-disk support.
3 *
4 * Copyright (c) 2003 Patrick Mochel
5 * Copyright (c) 2003 Open Source Development Lab
6 * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
7 *
8 * This file is released under the GPLv2.
9 *
10 */
11
12#include <linux/suspend.h>
13#include <linux/syscalls.h>
14#include <linux/reboot.h>
15#include <linux/string.h>
16#include <linux/device.h>
17#include <linux/delay.h>
18#include <linux/fs.h>
19#include "power.h"
20
21
22extern suspend_disk_method_t pm_disk_mode;
23extern struct pm_ops * pm_ops;
24
25extern int swsusp_suspend(void);
26extern int swsusp_write(void);
27extern int swsusp_check(void);
28extern int swsusp_read(void);
29extern void swsusp_close(void);
30extern int swsusp_resume(void);
31extern int swsusp_free(void);
32
33
34static int noresume = 0;
35char resume_file[256] = CONFIG_PM_STD_PARTITION;
36dev_t swsusp_resume_device;
37
38/**
39 * power_down - Shut machine down for hibernate.
40 * @mode: Suspend-to-disk mode
41 *
42 * Use the platform driver, if configured so, and return gracefully if it
43 * fails.
44 * Otherwise, try to power off and reboot. If they fail, halt the machine,
45 * there ain't no turning back.
46 */
47
48static void power_down(suspend_disk_method_t mode)
49{
50 unsigned long flags;
51 int error = 0;
52
53 local_irq_save(flags);
54 switch(mode) {
55 case PM_DISK_PLATFORM:
56 device_shutdown();
57 error = pm_ops->enter(PM_SUSPEND_DISK);
58 break;
59 case PM_DISK_SHUTDOWN:
60 printk("Powering off system\n");
61 device_shutdown();
62 machine_power_off();
63 break;
64 case PM_DISK_REBOOT:
65 device_shutdown();
66 machine_restart(NULL);
67 break;
68 }
69 machine_halt();
70 /* Valid image is on the disk, if we continue we risk serious data corruption
71 after resume. */
72 printk(KERN_CRIT "Please power me down manually\n");
73 while(1);
74}
75
76
77static int in_suspend __nosavedata = 0;
78
79
80/**
81 * free_some_memory - Try to free as much memory as possible
82 *
83 * ... but do not OOM-kill anyone
84 *
85 * Notice: all userland should be stopped at this point, or
86 * livelock is possible.
87 */
88
89static void free_some_memory(void)
90{
91 unsigned int i = 0;
92 unsigned int tmp;
93 unsigned long pages = 0;
94 char *p = "-\\|/";
95
96 printk("Freeing memory... ");
97 while ((tmp = shrink_all_memory(10000))) {
98 pages += tmp;
99 printk("\b%c", p[i]);
100 i++;
101 if (i > 3)
102 i = 0;
103 }
104 printk("\bdone (%li pages freed)\n", pages);
105}
106
107
108static inline void platform_finish(void)
109{
110 if (pm_disk_mode == PM_DISK_PLATFORM) {
111 if (pm_ops && pm_ops->finish)
112 pm_ops->finish(PM_SUSPEND_DISK);
113 }
114}
115
116static void finish(void)
117{
118 device_resume();
119 platform_finish();
120 enable_nonboot_cpus();
121 thaw_processes();
122 pm_restore_console();
123}
124
125
126static int prepare_processes(void)
127{
128 int error;
129
130 pm_prepare_console();
131
132 sys_sync();
133
134 if (freeze_processes()) {
135 error = -EBUSY;
136 return error;
137 }
138
139 if (pm_disk_mode == PM_DISK_PLATFORM) {
140 if (pm_ops && pm_ops->prepare) {
141 if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
142 return error;
143 }
144 }
145
146 /* Free memory before shutting down devices. */
147 free_some_memory();
148
149 return 0;
150}
151
152static void unprepare_processes(void)
153{
154 enable_nonboot_cpus();
155 thaw_processes();
156 pm_restore_console();
157}
158
159static int prepare_devices(void)
160{
161 int error;
162
163 disable_nonboot_cpus();
164 if ((error = device_suspend(PMSG_FREEZE))) {
165 printk("Some devices failed to suspend\n");
166 platform_finish();
167 enable_nonboot_cpus();
168 return error;
169 }
170
171 return 0;
172}
173
174/**
175 * pm_suspend_disk - The granpappy of power management.
176 *
177 * If we're going through the firmware, then get it over with quickly.
178 *
179 * If not, then call swsusp to do its thing, then figure out how
180 * to power down the system.
181 */
182
183int pm_suspend_disk(void)
184{
185 int error;
186
187 error = prepare_processes();
188 if (!error) {
189 error = prepare_devices();
190 }
191
192 if (error) {
193 unprepare_processes();
194 return error;
195 }
196
197 pr_debug("PM: Attempting to suspend to disk.\n");
198 if (pm_disk_mode == PM_DISK_FIRMWARE)
199 return pm_ops->enter(PM_SUSPEND_DISK);
200
201 pr_debug("PM: snapshotting memory.\n");
202 in_suspend = 1;
203 if ((error = swsusp_suspend()))
204 goto Done;
205
206 if (in_suspend) {
207 pr_debug("PM: writing image.\n");
208 error = swsusp_write();
209 if (!error)
210 power_down(pm_disk_mode);
211 } else
212 pr_debug("PM: Image restored successfully.\n");
213 swsusp_free();
214 Done:
215 finish();
216 return error;
217}
218
219
220/**
221 * software_resume - Resume from a saved image.
222 *
223 * Called as a late_initcall (so all devices are discovered and
224 * initialized), we call swsusp to see if we have a saved image or not.
225 * If so, we quiesce devices, the restore the saved image. We will
226 * return above (in pm_suspend_disk() ) if everything goes well.
227 * Otherwise, we fail gracefully and return to the normally
228 * scheduled program.
229 *
230 */
231
232static int software_resume(void)
233{
234 int error;
235
236 if (noresume) {
237 /**
238 * FIXME: If noresume is specified, we need to find the partition
239 * and reset it back to normal swap space.
240 */
241 return 0;
242 }
243
244 pr_debug("PM: Checking swsusp image.\n");
245
246 if ((error = swsusp_check()))
247 goto Done;
248
249 pr_debug("PM: Preparing processes for restore.\n");
250
251 if ((error = prepare_processes())) {
252 swsusp_close();
253 goto Cleanup;
254 }
255
256 pr_debug("PM: Reading swsusp image.\n");
257
258 if ((error = swsusp_read()))
259 goto Cleanup;
260
261 pr_debug("PM: Preparing devices for restore.\n");
262
263 if ((error = prepare_devices()))
264 goto Free;
265
266 mb();
267
268 pr_debug("PM: Restoring saved image.\n");
269 swsusp_resume();
270 pr_debug("PM: Restore failed, recovering.n");
271 finish();
272 Free:
273 swsusp_free();
274 Cleanup:
275 unprepare_processes();
276 Done:
277 pr_debug("PM: Resume from disk failed.\n");
278 return 0;
279}
280
281late_initcall(software_resume);
282
283
284static char * pm_disk_modes[] = {
285 [PM_DISK_FIRMWARE] = "firmware",
286 [PM_DISK_PLATFORM] = "platform",
287 [PM_DISK_SHUTDOWN] = "shutdown",
288 [PM_DISK_REBOOT] = "reboot",
289};
290
291/**
292 * disk - Control suspend-to-disk mode
293 *
294 * Suspend-to-disk can be handled in several ways. The greatest
295 * distinction is who writes memory to disk - the firmware or the OS.
296 * If the firmware does it, we assume that it also handles suspending
297 * the system.
298 * If the OS does it, then we have three options for putting the system
299 * to sleep - using the platform driver (e.g. ACPI or other PM registers),
300 * powering off the system or rebooting the system (for testing).
301 *
302 * The system will support either 'firmware' or 'platform', and that is
303 * known a priori (and encoded in pm_ops). But, the user may choose
304 * 'shutdown' or 'reboot' as alternatives.
305 *
306 * show() will display what the mode is currently set to.
307 * store() will accept one of
308 *
309 * 'firmware'
310 * 'platform'
311 * 'shutdown'
312 * 'reboot'
313 *
314 * It will only change to 'firmware' or 'platform' if the system
315 * supports it (as determined from pm_ops->pm_disk_mode).
316 */
317
318static ssize_t disk_show(struct subsystem * subsys, char * buf)
319{
320 return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
321}
322
323
324static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
325{
326 int error = 0;
327 int i;
328 int len;
329 char *p;
330 suspend_disk_method_t mode = 0;
331
332 p = memchr(buf, '\n', n);
333 len = p ? p - buf : n;
334
335 down(&pm_sem);
336 for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
337 if (!strncmp(buf, pm_disk_modes[i], len)) {
338 mode = i;
339 break;
340 }
341 }
342 if (mode) {
343 if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
344 pm_disk_mode = mode;
345 else {
346 if (pm_ops && pm_ops->enter &&
347 (mode == pm_ops->pm_disk_mode))
348 pm_disk_mode = mode;
349 else
350 error = -EINVAL;
351 }
352 } else
353 error = -EINVAL;
354
355 pr_debug("PM: suspend-to-disk mode set to '%s'\n",
356 pm_disk_modes[mode]);
357 up(&pm_sem);
358 return error ? error : n;
359}
360
361power_attr(disk);
362
363static ssize_t resume_show(struct subsystem * subsys, char *buf)
364{
365 return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
366 MINOR(swsusp_resume_device));
367}
368
369static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n)
370{
371 int len;
372 char *p;
373 unsigned int maj, min;
374 int error = -EINVAL;
375 dev_t res;
376
377 p = memchr(buf, '\n', n);
378 len = p ? p - buf : n;
379
380 if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
381 res = MKDEV(maj,min);
382 if (maj == MAJOR(res) && min == MINOR(res)) {
383 swsusp_resume_device = res;
384 printk("Attempting manual resume\n");
385 noresume = 0;
386 software_resume();
387 }
388 }
389
390 return error >= 0 ? n : error;
391}
392
393power_attr(resume);
394
395static struct attribute * g[] = {
396 &disk_attr.attr,
397 &resume_attr.attr,
398 NULL,
399};
400
401
402static struct attribute_group attr_group = {
403 .attrs = g,
404};
405
406
407static int __init pm_disk_init(void)
408{
409 return sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
410}
411
412core_initcall(pm_disk_init);
413
414
415static int __init resume_setup(char *str)
416{
417 if (noresume)
418 return 1;
419
420 strncpy( resume_file, str, 255 );
421 return 1;
422}
423
424static int __init noresume_setup(char *str)
425{
426 noresume = 1;
427 return 1;
428}
429
430__setup("noresume", noresume_setup);
431__setup("resume=", resume_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
new file mode 100644
index 000000000000..7960ddf04a57
--- /dev/null
+++ b/kernel/power/main.c
@@ -0,0 +1,269 @@
1/*
2 * kernel/power/main.c - PM subsystem core functionality.
3 *
4 * Copyright (c) 2003 Patrick Mochel
5 * Copyright (c) 2003 Open Source Development Lab
6 *
7 * This file is released under the GPLv2
8 *
9 */
10
11#include <linux/suspend.h>
12#include <linux/kobject.h>
13#include <linux/string.h>
14#include <linux/delay.h>
15#include <linux/errno.h>
16#include <linux/init.h>
17#include <linux/pm.h>
18
19
20#include "power.h"
21
22DECLARE_MUTEX(pm_sem);
23
24struct pm_ops * pm_ops = NULL;
25suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
26
27/**
28 * pm_set_ops - Set the global power method table.
29 * @ops: Pointer to ops structure.
30 */
31
32void pm_set_ops(struct pm_ops * ops)
33{
34 down(&pm_sem);
35 pm_ops = ops;
36 up(&pm_sem);
37}
38
39
40/**
41 * suspend_prepare - Do prep work before entering low-power state.
42 * @state: State we're entering.
43 *
44 * This is common code that is called for each state that we're
45 * entering. Allocate a console, stop all processes, then make sure
46 * the platform can enter the requested state.
47 */
48
49static int suspend_prepare(suspend_state_t state)
50{
51 int error = 0;
52
53 if (!pm_ops || !pm_ops->enter)
54 return -EPERM;
55
56 pm_prepare_console();
57
58 if (freeze_processes()) {
59 error = -EAGAIN;
60 goto Thaw;
61 }
62
63 if (pm_ops->prepare) {
64 if ((error = pm_ops->prepare(state)))
65 goto Thaw;
66 }
67
68 if ((error = device_suspend(PMSG_SUSPEND))) {
69 printk(KERN_ERR "Some devices failed to suspend\n");
70 goto Finish;
71 }
72 return 0;
73 Finish:
74 if (pm_ops->finish)
75 pm_ops->finish(state);
76 Thaw:
77 thaw_processes();
78 pm_restore_console();
79 return error;
80}
81
82
83static int suspend_enter(suspend_state_t state)
84{
85 int error = 0;
86 unsigned long flags;
87
88 local_irq_save(flags);
89
90 if ((error = device_power_down(PMSG_SUSPEND))) {
91 printk(KERN_ERR "Some devices failed to power down\n");
92 goto Done;
93 }
94 error = pm_ops->enter(state);
95 device_power_up();
96 Done:
97 local_irq_restore(flags);
98 return error;
99}
100
101
102/**
103 * suspend_finish - Do final work before exiting suspend sequence.
104 * @state: State we're coming out of.
105 *
106 * Call platform code to clean up, restart processes, and free the
107 * console that we've allocated. This is not called for suspend-to-disk.
108 */
109
110static void suspend_finish(suspend_state_t state)
111{
112 device_resume();
113 if (pm_ops && pm_ops->finish)
114 pm_ops->finish(state);
115 thaw_processes();
116 pm_restore_console();
117}
118
119
120
121
122static char * pm_states[] = {
123 [PM_SUSPEND_STANDBY] = "standby",
124 [PM_SUSPEND_MEM] = "mem",
125 [PM_SUSPEND_DISK] = "disk",
126 NULL,
127};
128
129
130/**
131 * enter_state - Do common work of entering low-power state.
132 * @state: pm_state structure for state we're entering.
133 *
134 * Make sure we're the only ones trying to enter a sleep state. Fail
135 * if someone has beat us to it, since we don't want anything weird to
136 * happen when we wake up.
137 * Then, do the setup for suspend, enter the state, and cleaup (after
138 * we've woken up).
139 */
140
141static int enter_state(suspend_state_t state)
142{
143 int error;
144
145 if (down_trylock(&pm_sem))
146 return -EBUSY;
147
148 if (state == PM_SUSPEND_DISK) {
149 error = pm_suspend_disk();
150 goto Unlock;
151 }
152
153 /* Suspend is hard to get right on SMP. */
154 if (num_online_cpus() != 1) {
155 error = -EPERM;
156 goto Unlock;
157 }
158
159 pr_debug("PM: Preparing system for suspend\n");
160 if ((error = suspend_prepare(state)))
161 goto Unlock;
162
163 pr_debug("PM: Entering state.\n");
164 error = suspend_enter(state);
165
166 pr_debug("PM: Finishing up.\n");
167 suspend_finish(state);
168 Unlock:
169 up(&pm_sem);
170 return error;
171}
172
173/*
174 * This is main interface to the outside world. It needs to be
175 * called from process context.
176 */
177int software_suspend(void)
178{
179 return enter_state(PM_SUSPEND_DISK);
180}
181
182
183/**
184 * pm_suspend - Externally visible function for suspending system.
185 * @state: Enumarted value of state to enter.
186 *
187 * Determine whether or not value is within range, get state
188 * structure, and enter (above).
189 */
190
191int pm_suspend(suspend_state_t state)
192{
193 if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
194 return enter_state(state);
195 return -EINVAL;
196}
197
198
199
200decl_subsys(power,NULL,NULL);
201
202
203/**
204 * state - control system power state.
205 *
206 * show() returns what states are supported, which is hard-coded to
207 * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
208 * 'disk' (Suspend-to-Disk).
209 *
210 * store() accepts one of those strings, translates it into the
211 * proper enumerated value, and initiates a suspend transition.
212 */
213
214static ssize_t state_show(struct subsystem * subsys, char * buf)
215{
216 int i;
217 char * s = buf;
218
219 for (i = 0; i < PM_SUSPEND_MAX; i++) {
220 if (pm_states[i])
221 s += sprintf(s,"%s ",pm_states[i]);
222 }
223 s += sprintf(s,"\n");
224 return (s - buf);
225}
226
227static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
228{
229 suspend_state_t state = PM_SUSPEND_STANDBY;
230 char ** s;
231 char *p;
232 int error;
233 int len;
234
235 p = memchr(buf, '\n', n);
236 len = p ? p - buf : n;
237
238 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
239 if (*s && !strncmp(buf, *s, len))
240 break;
241 }
242 if (*s)
243 error = enter_state(state);
244 else
245 error = -EINVAL;
246 return error ? error : n;
247}
248
249power_attr(state);
250
251static struct attribute * g[] = {
252 &state_attr.attr,
253 NULL,
254};
255
256static struct attribute_group attr_group = {
257 .attrs = g,
258};
259
260
261static int __init pm_init(void)
262{
263 int error = subsystem_register(&power_subsys);
264 if (!error)
265 error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
266 return error;
267}
268
269core_initcall(pm_init);
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
new file mode 100644
index 000000000000..61deda04e39e
--- /dev/null
+++ b/kernel/power/pm.c
@@ -0,0 +1,265 @@
1/*
2 * pm.c - Power management interface
3 *
4 * Copyright (C) 2000 Andrew Henroid
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/spinlock.h>
23#include <linux/mm.h>
24#include <linux/slab.h>
25#include <linux/pm.h>
26#include <linux/interrupt.h>
27
28int pm_active;
29
30/*
31 * Locking notes:
32 * pm_devs_lock can be a semaphore providing pm ops are not called
33 * from an interrupt handler (already a bad idea so no change here). Each
34 * change must be protected so that an unlink of an entry doesn't clash
35 * with a pm send - which is permitted to sleep in the current architecture
36 *
37 * Module unloads clashing with pm events now work out safely, the module
38 * unload path will block until the event has been sent. It may well block
39 * until a resume but that will be fine.
40 */
41
42static DECLARE_MUTEX(pm_devs_lock);
43static LIST_HEAD(pm_devs);
44
45/**
46 * pm_register - register a device with power management
47 * @type: device type
48 * @id: device ID
49 * @callback: callback function
50 *
51 * Add a device to the list of devices that wish to be notified about
52 * power management events. A &pm_dev structure is returned on success,
53 * on failure the return is %NULL.
54 *
55 * The callback function will be called in process context and
56 * it may sleep.
57 */
58
59struct pm_dev *pm_register(pm_dev_t type,
60 unsigned long id,
61 pm_callback callback)
62{
63 struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
64 if (dev) {
65 memset(dev, 0, sizeof(*dev));
66 dev->type = type;
67 dev->id = id;
68 dev->callback = callback;
69
70 down(&pm_devs_lock);
71 list_add(&dev->entry, &pm_devs);
72 up(&pm_devs_lock);
73 }
74 return dev;
75}
76
77/**
78 * pm_unregister - unregister a device with power management
79 * @dev: device to unregister
80 *
81 * Remove a device from the power management notification lists. The
82 * dev passed must be a handle previously returned by pm_register.
83 */
84
85void pm_unregister(struct pm_dev *dev)
86{
87 if (dev) {
88 down(&pm_devs_lock);
89 list_del(&dev->entry);
90 up(&pm_devs_lock);
91
92 kfree(dev);
93 }
94}
95
96static void __pm_unregister(struct pm_dev *dev)
97{
98 if (dev) {
99 list_del(&dev->entry);
100 kfree(dev);
101 }
102}
103
104/**
105 * pm_unregister_all - unregister all devices with matching callback
106 * @callback: callback function pointer
107 *
108 * Unregister every device that would call the callback passed. This
109 * is primarily meant as a helper function for loadable modules. It
110 * enables a module to give up all its managed devices without keeping
111 * its own private list.
112 */
113
114void pm_unregister_all(pm_callback callback)
115{
116 struct list_head *entry;
117
118 if (!callback)
119 return;
120
121 down(&pm_devs_lock);
122 entry = pm_devs.next;
123 while (entry != &pm_devs) {
124 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
125 entry = entry->next;
126 if (dev->callback == callback)
127 __pm_unregister(dev);
128 }
129 up(&pm_devs_lock);
130}
131
132/**
133 * pm_send - send request to a single device
134 * @dev: device to send to
135 * @rqst: power management request
136 * @data: data for the callback
137 *
138 * Issue a power management request to a given device. The
139 * %PM_SUSPEND and %PM_RESUME events are handled specially. The
140 * data field must hold the intended next state. No call is made
141 * if the state matches.
142 *
143 * BUGS: what stops two power management requests occurring in parallel
144 * and conflicting.
145 *
146 * WARNING: Calling pm_send directly is not generally recommended, in
147 * particular there is no locking against the pm_dev going away. The
148 * caller must maintain all needed locking or have 'inside knowledge'
149 * on the safety. Also remember that this function is not locked against
150 * pm_unregister. This means that you must handle SMP races on callback
151 * execution and unload yourself.
152 */
153
154static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
155{
156 int status = 0;
157 unsigned long prev_state, next_state;
158
159 if (in_interrupt())
160 BUG();
161
162 switch (rqst) {
163 case PM_SUSPEND:
164 case PM_RESUME:
165 prev_state = dev->state;
166 next_state = (unsigned long) data;
167 if (prev_state != next_state) {
168 if (dev->callback)
169 status = (*dev->callback)(dev, rqst, data);
170 if (!status) {
171 dev->state = next_state;
172 dev->prev_state = prev_state;
173 }
174 }
175 else {
176 dev->prev_state = prev_state;
177 }
178 break;
179 default:
180 if (dev->callback)
181 status = (*dev->callback)(dev, rqst, data);
182 break;
183 }
184 return status;
185}
186
187/*
188 * Undo incomplete request
189 */
190static void pm_undo_all(struct pm_dev *last)
191{
192 struct list_head *entry = last->entry.prev;
193 while (entry != &pm_devs) {
194 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
195 if (dev->state != dev->prev_state) {
196 /* previous state was zero (running) resume or
197 * previous state was non-zero (suspended) suspend
198 */
199 pm_request_t undo = (dev->prev_state
200 ? PM_SUSPEND:PM_RESUME);
201 pm_send(dev, undo, (void*) dev->prev_state);
202 }
203 entry = entry->prev;
204 }
205}
206
207/**
208 * pm_send_all - send request to all managed devices
209 * @rqst: power management request
210 * @data: data for the callback
211 *
212 * Issue a power management request to a all devices. The
213 * %PM_SUSPEND events are handled specially. Any device is
214 * permitted to fail a suspend by returning a non zero (error)
215 * value from its callback function. If any device vetoes a
216 * suspend request then all other devices that have suspended
217 * during the processing of this request are restored to their
218 * previous state.
219 *
220 * WARNING: This function takes the pm_devs_lock. The lock is not dropped until
221 * the callbacks have completed. This prevents races against pm locking
222 * functions, races against module unload pm_unregister code. It does
223 * mean however that you must not issue pm_ functions within the callback
224 * or you will deadlock and users will hate you.
225 *
226 * Zero is returned on success. If a suspend fails then the status
227 * from the device that vetoes the suspend is returned.
228 *
229 * BUGS: what stops two power management requests occurring in parallel
230 * and conflicting.
231 */
232
233int pm_send_all(pm_request_t rqst, void *data)
234{
235 struct list_head *entry;
236
237 down(&pm_devs_lock);
238 entry = pm_devs.next;
239 while (entry != &pm_devs) {
240 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
241 if (dev->callback) {
242 int status = pm_send(dev, rqst, data);
243 if (status) {
244 /* return devices to previous state on
245 * failed suspend request
246 */
247 if (rqst == PM_SUSPEND)
248 pm_undo_all(dev);
249 up(&pm_devs_lock);
250 return status;
251 }
252 }
253 entry = entry->next;
254 }
255 up(&pm_devs_lock);
256 return 0;
257}
258
259EXPORT_SYMBOL(pm_register);
260EXPORT_SYMBOL(pm_unregister);
261EXPORT_SYMBOL(pm_unregister_all);
262EXPORT_SYMBOL(pm_send_all);
263EXPORT_SYMBOL(pm_active);
264
265
diff --git a/kernel/power/power.h b/kernel/power/power.h
new file mode 100644
index 000000000000..cd6a3493cc0d
--- /dev/null
+++ b/kernel/power/power.h
@@ -0,0 +1,52 @@
1#include <linux/suspend.h>
2#include <linux/utsname.h>
3
4/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
5 we probably do not take enough locks for switching consoles, etc,
6 so bad things might happen.
7*/
8#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
9#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
10#endif
11
12
13struct swsusp_info {
14 struct new_utsname uts;
15 u32 version_code;
16 unsigned long num_physpages;
17 int cpus;
18 unsigned long image_pages;
19 unsigned long pagedir_pages;
20 suspend_pagedir_t * suspend_pagedir;
21 swp_entry_t pagedir[768];
22} __attribute__((aligned(PAGE_SIZE)));
23
24
25
26#ifdef CONFIG_SOFTWARE_SUSPEND
27extern int pm_suspend_disk(void);
28
29#else
30static inline int pm_suspend_disk(void)
31{
32 return -EPERM;
33}
34#endif
35extern struct semaphore pm_sem;
36#define power_attr(_name) \
37static struct subsys_attribute _name##_attr = { \
38 .attr = { \
39 .name = __stringify(_name), \
40 .mode = 0644, \
41 }, \
42 .show = _name##_show, \
43 .store = _name##_store, \
44}
45
46extern struct subsystem power_subsys;
47
48extern int freeze_processes(void);
49extern void thaw_processes(void);
50
51extern int pm_prepare_console(void);
52extern void pm_restore_console(void);
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
new file mode 100644
index 000000000000..715081b2d829
--- /dev/null
+++ b/kernel/power/poweroff.c
@@ -0,0 +1,45 @@
1/*
2 * poweroff.c - sysrq handler to gracefully power down machine.
3 *
4 * This file is released under the GPL v2
5 */
6
7#include <linux/kernel.h>
8#include <linux/sysrq.h>
9#include <linux/init.h>
10#include <linux/pm.h>
11#include <linux/workqueue.h>
12
13/*
14 * When the user hits Sys-Rq o to power down the machine this is the
15 * callback we use.
16 */
17
18static void do_poweroff(void *dummy)
19{
20 if (pm_power_off)
21 pm_power_off();
22}
23
24static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
25
26static void handle_poweroff(int key, struct pt_regs *pt_regs,
27 struct tty_struct *tty)
28{
29 schedule_work(&poweroff_work);
30}
31
32static struct sysrq_key_op sysrq_poweroff_op = {
33 .handler = handle_poweroff,
34 .help_msg = "powerOff",
35 .action_msg = "Power Off",
36 .enable_mask = SYSRQ_ENABLE_BOOT,
37};
38
39static int pm_sysrq_init(void)
40{
41 register_sysrq_key('o', &sysrq_poweroff_op);
42 return 0;
43}
44
45subsys_initcall(pm_sysrq_init);
diff --git a/kernel/power/process.c b/kernel/power/process.c
new file mode 100644
index 000000000000..78d92dc6a1ed
--- /dev/null
+++ b/kernel/power/process.c
@@ -0,0 +1,121 @@
1/*
2 * drivers/power/process.c - Functions for starting/stopping processes on
3 * suspend transitions.
4 *
5 * Originally from swsusp.
6 */
7
8
9#undef DEBUG
10
11#include <linux/smp_lock.h>
12#include <linux/interrupt.h>
13#include <linux/suspend.h>
14#include <linux/module.h>
15
16/*
17 * Timeout for stopping processes
18 */
19#define TIMEOUT (6 * HZ)
20
21
22static inline int freezeable(struct task_struct * p)
23{
24 if ((p == current) ||
25 (p->flags & PF_NOFREEZE) ||
26 (p->exit_state == EXIT_ZOMBIE) ||
27 (p->exit_state == EXIT_DEAD) ||
28 (p->state == TASK_STOPPED) ||
29 (p->state == TASK_TRACED))
30 return 0;
31 return 1;
32}
33
34/* Refrigerator is place where frozen processes are stored :-). */
35void refrigerator(unsigned long flag)
36{
37 /* Hmm, should we be allowed to suspend when there are realtime
38 processes around? */
39 long save;
40 save = current->state;
41 current->state = TASK_UNINTERRUPTIBLE;
42 pr_debug("%s entered refrigerator\n", current->comm);
43 printk("=");
44 current->flags &= ~PF_FREEZE;
45
46 spin_lock_irq(&current->sighand->siglock);
47 recalc_sigpending(); /* We sent fake signal, clean it up */
48 spin_unlock_irq(&current->sighand->siglock);
49
50 current->flags |= PF_FROZEN;
51 while (current->flags & PF_FROZEN)
52 schedule();
53 pr_debug("%s left refrigerator\n", current->comm);
54 current->state = save;
55}
56
57/* 0 = success, else # of processes that we failed to stop */
58int freeze_processes(void)
59{
60 int todo;
61 unsigned long start_time;
62 struct task_struct *g, *p;
63
64 printk( "Stopping tasks: " );
65 start_time = jiffies;
66 do {
67 todo = 0;
68 read_lock(&tasklist_lock);
69 do_each_thread(g, p) {
70 unsigned long flags;
71 if (!freezeable(p))
72 continue;
73 if ((p->flags & PF_FROZEN) ||
74 (p->state == TASK_TRACED) ||
75 (p->state == TASK_STOPPED))
76 continue;
77
78 /* FIXME: smp problem here: we may not access other process' flags
79 without locking */
80 p->flags |= PF_FREEZE;
81 spin_lock_irqsave(&p->sighand->siglock, flags);
82 signal_wake_up(p, 0);
83 spin_unlock_irqrestore(&p->sighand->siglock, flags);
84 todo++;
85 } while_each_thread(g, p);
86 read_unlock(&tasklist_lock);
87 yield(); /* Yield is okay here */
88 if (time_after(jiffies, start_time + TIMEOUT)) {
89 printk( "\n" );
90 printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
91 return todo;
92 }
93 } while(todo);
94
95 printk( "|\n" );
96 BUG_ON(in_atomic());
97 return 0;
98}
99
100void thaw_processes(void)
101{
102 struct task_struct *g, *p;
103
104 printk( "Restarting tasks..." );
105 read_lock(&tasklist_lock);
106 do_each_thread(g, p) {
107 if (!freezeable(p))
108 continue;
109 if (p->flags & PF_FROZEN) {
110 p->flags &= ~PF_FROZEN;
111 wake_up_process(p);
112 } else
113 printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
114 } while_each_thread(g, p);
115
116 read_unlock(&tasklist_lock);
117 schedule();
118 printk( " done\n" );
119}
120
121EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
new file mode 100644
index 000000000000..7fa7f6e2b7fb
--- /dev/null
+++ b/kernel/power/smp.c
@@ -0,0 +1,85 @@
1/*
2 * drivers/power/smp.c - Functions for stopping other CPUs.
3 *
4 * Copyright 2004 Pavel Machek <pavel@suse.cz>
5 * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
6 *
7 * This file is released under the GPLv2.
8 */
9
10#undef DEBUG
11
12#include <linux/smp_lock.h>
13#include <linux/interrupt.h>
14#include <linux/suspend.h>
15#include <linux/module.h>
16#include <asm/atomic.h>
17#include <asm/tlbflush.h>
18
19static atomic_t cpu_counter, freeze;
20
21
22static void smp_pause(void * data)
23{
24 struct saved_context ctxt;
25 __save_processor_state(&ctxt);
26 printk("Sleeping in:\n");
27 dump_stack();
28 atomic_inc(&cpu_counter);
29 while (atomic_read(&freeze)) {
30 /* FIXME: restore takes place at random piece inside this.
31 This should probably be written in assembly, and
32 preserve general-purpose registers, too
33
34 What about stack? We may need to move to new stack here.
35
36 This should better be ran with interrupts disabled.
37 */
38 cpu_relax();
39 barrier();
40 }
41 atomic_dec(&cpu_counter);
42 __restore_processor_state(&ctxt);
43}
44
45static cpumask_t oldmask;
46
47void disable_nonboot_cpus(void)
48{
49 printk("Freezing CPUs (at %d)", smp_processor_id());
50 oldmask = current->cpus_allowed;
51 set_cpus_allowed(current, cpumask_of_cpu(0));
52 current->state = TASK_INTERRUPTIBLE;
53 schedule_timeout(HZ);
54 printk("...");
55 BUG_ON(smp_processor_id() != 0);
56
57 /* FIXME: for this to work, all the CPUs must be running
58 * "idle" thread (or we deadlock). Is that guaranteed? */
59
60 atomic_set(&cpu_counter, 0);
61 atomic_set(&freeze, 1);
62 smp_call_function(smp_pause, NULL, 0, 0);
63 while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
64 cpu_relax();
65 barrier();
66 }
67 printk("ok\n");
68}
69
70void enable_nonboot_cpus(void)
71{
72 printk("Restarting CPUs");
73 atomic_set(&freeze, 0);
74 while (atomic_read(&cpu_counter)) {
75 cpu_relax();
76 barrier();
77 }
78 printk("...");
79 set_cpus_allowed(current, oldmask);
80 schedule();
81 printk("ok\n");
82
83}
84
85
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
new file mode 100644
index 000000000000..ae5bebc3b18f
--- /dev/null
+++ b/kernel/power/swsusp.c
@@ -0,0 +1,1433 @@
1/*
2 * linux/kernel/power/swsusp.c
3 *
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
6 *
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
9 *
10 * This file is released under the GPLv2.
11 *
12 * I'd like to thank the following people for their work:
13 *
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
17 *
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
20 *
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
24 *
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
28 *
29 * Andreas Mohr <a.mohr@mailto.de>
30 *
31 * Alex Badea <vampire@go.ro>:
32 * Fixed runaway init
33 *
34 * More state savers are welcome. Especially for the scsi layer...
35 *
36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
37 */
38
39#include <linux/module.h>
40#include <linux/mm.h>
41#include <linux/suspend.h>
42#include <linux/smp_lock.h>
43#include <linux/file.h>
44#include <linux/utsname.h>
45#include <linux/version.h>
46#include <linux/delay.h>
47#include <linux/reboot.h>
48#include <linux/bitops.h>
49#include <linux/vt_kern.h>
50#include <linux/kbd_kern.h>
51#include <linux/keyboard.h>
52#include <linux/spinlock.h>
53#include <linux/genhd.h>
54#include <linux/kernel.h>
55#include <linux/major.h>
56#include <linux/swap.h>
57#include <linux/pm.h>
58#include <linux/device.h>
59#include <linux/buffer_head.h>
60#include <linux/swapops.h>
61#include <linux/bootmem.h>
62#include <linux/syscalls.h>
63#include <linux/console.h>
64#include <linux/highmem.h>
65#include <linux/bio.h>
66
67#include <asm/uaccess.h>
68#include <asm/mmu_context.h>
69#include <asm/pgtable.h>
70#include <asm/tlbflush.h>
71#include <asm/io.h>
72
73#include "power.h"
74
75/* References to section boundaries */
76extern const void __nosave_begin, __nosave_end;
77
78/* Variables to be preserved over suspend */
79static int nr_copy_pages_check;
80
81extern char resume_file[];
82
83/* Local variables that should not be affected by save */
84unsigned int nr_copy_pages __nosavedata = 0;
85
86/* Suspend pagedir is allocated before final copy, therefore it
87 must be freed after resume
88
89 Warning: this is evil. There are actually two pagedirs at time of
90 resume. One is "pagedir_save", which is empty frame allocated at
91 time of suspend, that must be freed. Second is "pagedir_nosave",
92 allocated at time of resume, that travels through memory not to
93 collide with anything.
94
95 Warning: this is even more evil than it seems. Pagedirs this file
96 talks about are completely different from page directories used by
97 MMU hardware.
98 */
99suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
100static suspend_pagedir_t *pagedir_save;
101
102#define SWSUSP_SIG "S1SUSPEND"
103
104static struct swsusp_header {
105 char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
106 swp_entry_t swsusp_info;
107 char orig_sig[10];
108 char sig[10];
109} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
110
111static struct swsusp_info swsusp_info;
112
113/*
114 * XXX: We try to keep some more pages free so that I/O operations succeed
115 * without paging. Might this be more?
116 */
117#define PAGES_FOR_IO 512
118
119/*
120 * Saving part...
121 */
122
123/* We memorize in swapfile_used what swap devices are used for suspension */
124#define SWAPFILE_UNUSED 0
125#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
126#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
127
128static unsigned short swapfile_used[MAX_SWAPFILES];
129static unsigned short root_swap;
130
131static int mark_swapfiles(swp_entry_t prev)
132{
133 int error;
134
135 rw_swap_page_sync(READ,
136 swp_entry(root_swap, 0),
137 virt_to_page((unsigned long)&swsusp_header));
138 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
139 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
140 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
141 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
142 swsusp_header.swsusp_info = prev;
143 error = rw_swap_page_sync(WRITE,
144 swp_entry(root_swap, 0),
145 virt_to_page((unsigned long)
146 &swsusp_header));
147 } else {
148 pr_debug("swsusp: Partition is not swap space.\n");
149 error = -ENODEV;
150 }
151 return error;
152}
153
154/*
155 * Check whether the swap device is the specified resume
156 * device, irrespective of whether they are specified by
157 * identical names.
158 *
159 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
160 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
161 * and they'll be considered the same device. This is *necessary* for
162 * devfs, since the resume code can only recognize the form /dev/hda4,
163 * but the suspend code would see the long name.)
164 */
165static int is_resume_device(const struct swap_info_struct *swap_info)
166{
167 struct file *file = swap_info->swap_file;
168 struct inode *inode = file->f_dentry->d_inode;
169
170 return S_ISBLK(inode->i_mode) &&
171 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
172}
173
174static int swsusp_swap_check(void) /* This is called before saving image */
175{
176 int i, len;
177
178 len=strlen(resume_file);
179 root_swap = 0xFFFF;
180
181 swap_list_lock();
182 for(i=0; i<MAX_SWAPFILES; i++) {
183 if (swap_info[i].flags == 0) {
184 swapfile_used[i]=SWAPFILE_UNUSED;
185 } else {
186 if(!len) {
187 printk(KERN_WARNING "resume= option should be used to set suspend device" );
188 if(root_swap == 0xFFFF) {
189 swapfile_used[i] = SWAPFILE_SUSPEND;
190 root_swap = i;
191 } else
192 swapfile_used[i] = SWAPFILE_IGNORED;
193 } else {
194 /* we ignore all swap devices that are not the resume_file */
195 if (is_resume_device(&swap_info[i])) {
196 swapfile_used[i] = SWAPFILE_SUSPEND;
197 root_swap = i;
198 } else {
199 swapfile_used[i] = SWAPFILE_IGNORED;
200 }
201 }
202 }
203 }
204 swap_list_unlock();
205 return (root_swap != 0xffff) ? 0 : -ENODEV;
206}
207
208/**
209 * This is called after saving image so modification
210 * will be lost after resume... and that's what we want.
211 * we make the device unusable. A new call to
212 * lock_swapdevices can unlock the devices.
213 */
214static void lock_swapdevices(void)
215{
216 int i;
217
218 swap_list_lock();
219 for(i = 0; i< MAX_SWAPFILES; i++)
220 if(swapfile_used[i] == SWAPFILE_IGNORED) {
221 swap_info[i].flags ^= 0xFF;
222 }
223 swap_list_unlock();
224}
225
226/**
227 * write_swap_page - Write one page to a fresh swap location.
228 * @addr: Address we're writing.
229 * @loc: Place to store the entry we used.
230 *
231 * Allocate a new swap entry and 'sync' it. Note we discard -EIO
232 * errors. That is an artifact left over from swsusp. It did not
233 * check the return of rw_swap_page_sync() at all, since most pages
234 * written back to swap would return -EIO.
235 * This is a partial improvement, since we will at least return other
236 * errors, though we need to eventually fix the damn code.
237 */
238static int write_page(unsigned long addr, swp_entry_t * loc)
239{
240 swp_entry_t entry;
241 int error = 0;
242
243 entry = get_swap_page();
244 if (swp_offset(entry) &&
245 swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
246 error = rw_swap_page_sync(WRITE, entry,
247 virt_to_page(addr));
248 if (error == -EIO)
249 error = 0;
250 if (!error)
251 *loc = entry;
252 } else
253 error = -ENOSPC;
254 return error;
255}
256
257/**
258 * data_free - Free the swap entries used by the saved image.
259 *
260 * Walk the list of used swap entries and free each one.
261 * This is only used for cleanup when suspend fails.
262 */
263static void data_free(void)
264{
265 swp_entry_t entry;
266 int i;
267
268 for (i = 0; i < nr_copy_pages; i++) {
269 entry = (pagedir_nosave + i)->swap_address;
270 if (entry.val)
271 swap_free(entry);
272 else
273 break;
274 (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
275 }
276}
277
278/**
279 * data_write - Write saved image to swap.
280 *
281 * Walk the list of pages in the image and sync each one to swap.
282 */
283static int data_write(void)
284{
285 int error = 0, i = 0;
286 unsigned int mod = nr_copy_pages / 100;
287 struct pbe *p;
288
289 if (!mod)
290 mod = 1;
291
292 printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
293 for_each_pbe(p, pagedir_nosave) {
294 if (!(i%mod))
295 printk( "\b\b\b\b%3d%%", i / mod );
296 if ((error = write_page(p->address, &(p->swap_address))))
297 return error;
298 i++;
299 }
300 printk("\b\b\b\bdone\n");
301 return error;
302}
303
304static void dump_info(void)
305{
306 pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
307 pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
308 pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
309 pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
310 pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
311 pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
312 pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
313 pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
314 pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
315 pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
316 pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
317}
318
319static void init_header(void)
320{
321 memset(&swsusp_info, 0, sizeof(swsusp_info));
322 swsusp_info.version_code = LINUX_VERSION_CODE;
323 swsusp_info.num_physpages = num_physpages;
324 memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
325
326 swsusp_info.suspend_pagedir = pagedir_nosave;
327 swsusp_info.cpus = num_online_cpus();
328 swsusp_info.image_pages = nr_copy_pages;
329}
330
331static int close_swap(void)
332{
333 swp_entry_t entry;
334 int error;
335
336 dump_info();
337 error = write_page((unsigned long)&swsusp_info, &entry);
338 if (!error) {
339 printk( "S" );
340 error = mark_swapfiles(entry);
341 printk( "|\n" );
342 }
343 return error;
344}
345
346/**
347 * free_pagedir_entries - Free pages used by the page directory.
348 *
349 * This is used during suspend for error recovery.
350 */
351
352static void free_pagedir_entries(void)
353{
354 int i;
355
356 for (i = 0; i < swsusp_info.pagedir_pages; i++)
357 swap_free(swsusp_info.pagedir[i]);
358}
359
360
361/**
362 * write_pagedir - Write the array of pages holding the page directory.
363 * @last: Last swap entry we write (needed for header).
364 */
365
366static int write_pagedir(void)
367{
368 int error = 0;
369 unsigned n = 0;
370 struct pbe * pbe;
371
372 printk( "Writing pagedir...");
373 for_each_pb_page(pbe, pagedir_nosave) {
374 if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
375 return error;
376 }
377
378 swsusp_info.pagedir_pages = n;
379 printk("done (%u pages)\n", n);
380 return error;
381}
382
383/**
384 * write_suspend_image - Write entire image and metadata.
385 *
386 */
387
388static int write_suspend_image(void)
389{
390 int error;
391
392 init_header();
393 if ((error = data_write()))
394 goto FreeData;
395
396 if ((error = write_pagedir()))
397 goto FreePagedir;
398
399 if ((error = close_swap()))
400 goto FreePagedir;
401 Done:
402 return error;
403 FreePagedir:
404 free_pagedir_entries();
405 FreeData:
406 data_free();
407 goto Done;
408}
409
410
411#ifdef CONFIG_HIGHMEM
412struct highmem_page {
413 char *data;
414 struct page *page;
415 struct highmem_page *next;
416};
417
418static struct highmem_page *highmem_copy;
419
420static int save_highmem_zone(struct zone *zone)
421{
422 unsigned long zone_pfn;
423 mark_free_pages(zone);
424 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
425 struct page *page;
426 struct highmem_page *save;
427 void *kaddr;
428 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
429
430 if (!(pfn%1000))
431 printk(".");
432 if (!pfn_valid(pfn))
433 continue;
434 page = pfn_to_page(pfn);
435 /*
436 * This condition results from rvmalloc() sans vmalloc_32()
437 * and architectural memory reservations. This should be
438 * corrected eventually when the cases giving rise to this
439 * are better understood.
440 */
441 if (PageReserved(page)) {
442 printk("highmem reserved page?!\n");
443 continue;
444 }
445 BUG_ON(PageNosave(page));
446 if (PageNosaveFree(page))
447 continue;
448 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
449 if (!save)
450 return -ENOMEM;
451 save->next = highmem_copy;
452 save->page = page;
453 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
454 if (!save->data) {
455 kfree(save);
456 return -ENOMEM;
457 }
458 kaddr = kmap_atomic(page, KM_USER0);
459 memcpy(save->data, kaddr, PAGE_SIZE);
460 kunmap_atomic(kaddr, KM_USER0);
461 highmem_copy = save;
462 }
463 return 0;
464}
465#endif /* CONFIG_HIGHMEM */
466
467
468static int save_highmem(void)
469{
470#ifdef CONFIG_HIGHMEM
471 struct zone *zone;
472 int res = 0;
473
474 pr_debug("swsusp: Saving Highmem\n");
475 for_each_zone(zone) {
476 if (is_highmem(zone))
477 res = save_highmem_zone(zone);
478 if (res)
479 return res;
480 }
481#endif
482 return 0;
483}
484
485static int restore_highmem(void)
486{
487#ifdef CONFIG_HIGHMEM
488 printk("swsusp: Restoring Highmem\n");
489 while (highmem_copy) {
490 struct highmem_page *save = highmem_copy;
491 void *kaddr;
492 highmem_copy = save->next;
493
494 kaddr = kmap_atomic(save->page, KM_USER0);
495 memcpy(kaddr, save->data, PAGE_SIZE);
496 kunmap_atomic(kaddr, KM_USER0);
497 free_page((long) save->data);
498 kfree(save);
499 }
500#endif
501 return 0;
502}
503
504
505static int pfn_is_nosave(unsigned long pfn)
506{
507 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
508 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
509 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
510}
511
512/**
513 * saveable - Determine whether a page should be cloned or not.
514 * @pfn: The page
515 *
516 * We save a page if it's Reserved, and not in the range of pages
517 * statically defined as 'unsaveable', or if it isn't reserved, and
518 * isn't part of a free chunk of pages.
519 */
520
521static int saveable(struct zone * zone, unsigned long * zone_pfn)
522{
523 unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
524 struct page * page;
525
526 if (!pfn_valid(pfn))
527 return 0;
528
529 page = pfn_to_page(pfn);
530 BUG_ON(PageReserved(page) && PageNosave(page));
531 if (PageNosave(page))
532 return 0;
533 if (PageReserved(page) && pfn_is_nosave(pfn)) {
534 pr_debug("[nosave pfn 0x%lx]", pfn);
535 return 0;
536 }
537 if (PageNosaveFree(page))
538 return 0;
539
540 return 1;
541}
542
543static void count_data_pages(void)
544{
545 struct zone *zone;
546 unsigned long zone_pfn;
547
548 nr_copy_pages = 0;
549
550 for_each_zone(zone) {
551 if (is_highmem(zone))
552 continue;
553 mark_free_pages(zone);
554 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
555 nr_copy_pages += saveable(zone, &zone_pfn);
556 }
557}
558
559
560static void copy_data_pages(void)
561{
562 struct zone *zone;
563 unsigned long zone_pfn;
564 struct pbe * pbe = pagedir_nosave;
565
566 pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
567 for_each_zone(zone) {
568 if (is_highmem(zone))
569 continue;
570 mark_free_pages(zone);
571 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
572 if (saveable(zone, &zone_pfn)) {
573 struct page * page;
574 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
575 BUG_ON(!pbe);
576 pbe->orig_address = (long) page_address(page);
577 /* copy_page is not usable for copying task structs. */
578 memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
579 pbe = pbe->next;
580 }
581 }
582 }
583 BUG_ON(pbe);
584}
585
586
587/**
588 * calc_nr - Determine the number of pages needed for a pbe list.
589 */
590
591static int calc_nr(int nr_copy)
592{
593 int extra = 0;
594 int mod = !!(nr_copy % PBES_PER_PAGE);
595 int diff = (nr_copy / PBES_PER_PAGE) + mod;
596
597 do {
598 extra += diff;
599 nr_copy += diff;
600 mod = !!(nr_copy % PBES_PER_PAGE);
601 diff = (nr_copy / PBES_PER_PAGE) + mod - extra;
602 } while (diff > 0);
603
604 return nr_copy;
605}
606
607/**
608 * free_pagedir - free pages allocated with alloc_pagedir()
609 */
610
611static inline void free_pagedir(struct pbe *pblist)
612{
613 struct pbe *pbe;
614
615 while (pblist) {
616 pbe = (pblist + PB_PAGE_SKIP)->next;
617 free_page((unsigned long)pblist);
618 pblist = pbe;
619 }
620}
621
622/**
623 * fill_pb_page - Create a list of PBEs on a given memory page
624 */
625
626static inline void fill_pb_page(struct pbe *pbpage)
627{
628 struct pbe *p;
629
630 p = pbpage;
631 pbpage += PB_PAGE_SKIP;
632 do
633 p->next = p + 1;
634 while (++p < pbpage);
635}
636
637/**
638 * create_pbe_list - Create a list of PBEs on top of a given chain
639 * of memory pages allocated with alloc_pagedir()
640 */
641
642static void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
643{
644 struct pbe *pbpage, *p;
645 unsigned num = PBES_PER_PAGE;
646
647 for_each_pb_page (pbpage, pblist) {
648 if (num >= nr_pages)
649 break;
650
651 fill_pb_page(pbpage);
652 num += PBES_PER_PAGE;
653 }
654 if (pbpage) {
655 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
656 p->next = p + 1;
657 p->next = NULL;
658 }
659 pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
660}
661
662/**
663 * alloc_pagedir - Allocate the page directory.
664 *
665 * First, determine exactly how many pages we need and
666 * allocate them.
667 *
668 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
669 * struct pbe elements (pbes) and the last element in the page points
670 * to the next page.
671 *
672 * On each page we set up a list of struct_pbe elements.
673 */
674
675static struct pbe * alloc_pagedir(unsigned nr_pages)
676{
677 unsigned num;
678 struct pbe *pblist, *pbe;
679
680 if (!nr_pages)
681 return NULL;
682
683 pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
684 pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
685 for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
686 pbe = pbe->next, num += PBES_PER_PAGE) {
687 pbe += PB_PAGE_SKIP;
688 pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
689 }
690 if (!pbe) { /* get_zeroed_page() failed */
691 free_pagedir(pblist);
692 pblist = NULL;
693 }
694 return pblist;
695}
696
697/**
698 * free_image_pages - Free pages allocated for snapshot
699 */
700
701static void free_image_pages(void)
702{
703 struct pbe * p;
704
705 for_each_pbe(p, pagedir_save) {
706 if (p->address) {
707 ClearPageNosave(virt_to_page(p->address));
708 free_page(p->address);
709 p->address = 0;
710 }
711 }
712}
713
714/**
715 * alloc_image_pages - Allocate pages for the snapshot.
716 */
717
718static int alloc_image_pages(void)
719{
720 struct pbe * p;
721
722 for_each_pbe(p, pagedir_save) {
723 p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
724 if (!p->address)
725 return -ENOMEM;
726 SetPageNosave(virt_to_page(p->address));
727 }
728 return 0;
729}
730
731void swsusp_free(void)
732{
733 BUG_ON(PageNosave(virt_to_page(pagedir_save)));
734 BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
735 free_image_pages();
736 free_pagedir(pagedir_save);
737}
738
739
740/**
741 * enough_free_mem - Make sure we enough free memory to snapshot.
742 *
743 * Returns TRUE or FALSE after checking the number of available
744 * free pages.
745 */
746
747static int enough_free_mem(void)
748{
749 if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) {
750 pr_debug("swsusp: Not enough free pages: Have %d\n",
751 nr_free_pages());
752 return 0;
753 }
754 return 1;
755}
756
757
758/**
759 * enough_swap - Make sure we have enough swap to save the image.
760 *
761 * Returns TRUE or FALSE after checking the total amount of swap
762 * space avaiable.
763 *
764 * FIXME: si_swapinfo(&i) returns all swap devices information.
765 * We should only consider resume_device.
766 */
767
768static int enough_swap(void)
769{
770 struct sysinfo i;
771
772 si_swapinfo(&i);
773 if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
774 pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
775 return 0;
776 }
777 return 1;
778}
779
780static int swsusp_alloc(void)
781{
782 int error;
783
784 pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
785 nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
786
787 pagedir_nosave = NULL;
788 if (!enough_free_mem())
789 return -ENOMEM;
790
791 if (!enough_swap())
792 return -ENOSPC;
793
794 nr_copy_pages = calc_nr(nr_copy_pages);
795
796 if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
797 printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
798 return -ENOMEM;
799 }
800 create_pbe_list(pagedir_save, nr_copy_pages);
801 pagedir_nosave = pagedir_save;
802 if ((error = alloc_image_pages())) {
803 printk(KERN_ERR "suspend: Allocating image pages failed.\n");
804 swsusp_free();
805 return error;
806 }
807
808 nr_copy_pages_check = nr_copy_pages;
809 return 0;
810}
811
812static int suspend_prepare_image(void)
813{
814 int error;
815
816 pr_debug("swsusp: critical section: \n");
817 if (save_highmem()) {
818 printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n");
819 restore_highmem();
820 return -ENOMEM;
821 }
822
823 drain_local_pages();
824 count_data_pages();
825 printk("swsusp: Need to copy %u pages\n", nr_copy_pages);
826
827 error = swsusp_alloc();
828 if (error)
829 return error;
830
831 /* During allocating of suspend pagedir, new cold pages may appear.
832 * Kill them.
833 */
834 drain_local_pages();
835 copy_data_pages();
836
837 /*
838 * End of critical section. From now on, we can write to memory,
839 * but we should not touch disk. This specially means we must _not_
840 * touch swap space! Except we must write out our image of course.
841 */
842
843 printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages );
844 return 0;
845}
846
847
848/* It is important _NOT_ to umount filesystems at this point. We want
849 * them synced (in case something goes wrong) but we DO not want to mark
850 * filesystem clean: it is not. (And it does not matter, if we resume
851 * correctly, we'll mark system clean, anyway.)
852 */
853int swsusp_write(void)
854{
855 int error;
856 device_resume();
857 lock_swapdevices();
858 error = write_suspend_image();
859 /* This will unlock ignored swap devices since writing is finished */
860 lock_swapdevices();
861 return error;
862
863}
864
865
866extern asmlinkage int swsusp_arch_suspend(void);
867extern asmlinkage int swsusp_arch_resume(void);
868
869
870asmlinkage int swsusp_save(void)
871{
872 int error = 0;
873
874 if ((error = swsusp_swap_check())) {
875 printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try "
876 "swapon -a!\n");
877 return error;
878 }
879 return suspend_prepare_image();
880}
881
882int swsusp_suspend(void)
883{
884 int error;
885 if ((error = arch_prepare_suspend()))
886 return error;
887 local_irq_disable();
888 /* At this point, device_suspend() has been called, but *not*
889 * device_power_down(). We *must* device_power_down() now.
890 * Otherwise, drivers for some devices (e.g. interrupt controllers)
891 * become desynchronized with the actual state of the hardware
892 * at resume time, and evil weirdness ensues.
893 */
894 if ((error = device_power_down(PMSG_FREEZE))) {
895 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
896 local_irq_enable();
897 swsusp_free();
898 return error;
899 }
900 save_processor_state();
901 if ((error = swsusp_arch_suspend()))
902 swsusp_free();
903 /* Restore control flow magically appears here */
904 restore_processor_state();
905 BUG_ON (nr_copy_pages_check != nr_copy_pages);
906 restore_highmem();
907 device_power_up();
908 local_irq_enable();
909 return error;
910}
911
912int swsusp_resume(void)
913{
914 int error;
915 local_irq_disable();
916 if (device_power_down(PMSG_FREEZE))
917 printk(KERN_ERR "Some devices failed to power down, very bad\n");
918 /* We'll ignore saved state, but this gets preempt count (etc) right */
919 save_processor_state();
920 error = swsusp_arch_resume();
921 /* Code below is only ever reached in case of failure. Otherwise
922 * execution continues at place where swsusp_arch_suspend was called
923 */
924 BUG_ON(!error);
925 restore_processor_state();
926 restore_highmem();
927 device_power_up();
928 local_irq_enable();
929 return error;
930}
931
932/* More restore stuff */
933
934/*
935 * Returns true if given address/order collides with any orig_address
936 */
937static int does_collide_order(unsigned long addr, int order)
938{
939 int i;
940
941 for (i=0; i < (1<<order); i++)
942 if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE)))
943 return 1;
944 return 0;
945}
946
947/**
948 * On resume, for storing the PBE list and the image,
949 * we can only use memory pages that do not conflict with the pages
950 * which had been used before suspend.
951 *
952 * We don't know which pages are usable until we allocate them.
953 *
954 * Allocated but unusable (ie eaten) memory pages are linked together
955 * to create a list, so that we can free them easily
956 *
957 * We could have used a type other than (void *)
958 * for this purpose, but ...
959 */
960static void **eaten_memory = NULL;
961
962static inline void eat_page(void *page)
963{
964 void **c;
965
966 c = eaten_memory;
967 eaten_memory = page;
968 *eaten_memory = c;
969}
970
971static unsigned long get_usable_page(unsigned gfp_mask)
972{
973 unsigned long m;
974
975 m = get_zeroed_page(gfp_mask);
976 while (does_collide_order(m, 0)) {
977 eat_page((void *)m);
978 m = get_zeroed_page(gfp_mask);
979 if (!m)
980 break;
981 }
982 return m;
983}
984
985static void free_eaten_memory(void)
986{
987 unsigned long m;
988 void **c;
989 int i = 0;
990
991 c = eaten_memory;
992 while (c) {
993 m = (unsigned long)c;
994 c = *c;
995 free_page(m);
996 i++;
997 }
998 eaten_memory = NULL;
999 pr_debug("swsusp: %d unused pages freed\n", i);
1000}
1001
1002/**
1003 * check_pagedir - We ensure here that pages that the PBEs point to
1004 * won't collide with pages where we're going to restore from the loaded
1005 * pages later
1006 */
1007
1008static int check_pagedir(struct pbe *pblist)
1009{
1010 struct pbe *p;
1011
1012 /* This is necessary, so that we can free allocated pages
1013 * in case of failure
1014 */
1015 for_each_pbe (p, pblist)
1016 p->address = 0UL;
1017
1018 for_each_pbe (p, pblist) {
1019 p->address = get_usable_page(GFP_ATOMIC);
1020 if (!p->address)
1021 return -ENOMEM;
1022 }
1023 return 0;
1024}
1025
1026/**
1027 * swsusp_pagedir_relocate - It is possible, that some memory pages
1028 * occupied by the list of PBEs collide with pages where we're going to
1029 * restore from the loaded pages later. We relocate them here.
1030 */
1031
1032static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
1033{
1034 struct zone *zone;
1035 unsigned long zone_pfn;
1036 struct pbe *pbpage, *tail, *p;
1037 void *m;
1038 int rel = 0, error = 0;
1039
1040 if (!pblist) /* a sanity check */
1041 return NULL;
1042
1043 pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
1044 swsusp_info.pagedir_pages);
1045
1046 /* Set page flags */
1047
1048 for_each_zone(zone) {
1049 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
1050 SetPageNosaveFree(pfn_to_page(zone_pfn +
1051 zone->zone_start_pfn));
1052 }
1053
1054 /* Clear orig addresses */
1055
1056 for_each_pbe (p, pblist)
1057 ClearPageNosaveFree(virt_to_page(p->orig_address));
1058
1059 tail = pblist + PB_PAGE_SKIP;
1060
1061 /* Relocate colliding pages */
1062
1063 for_each_pb_page (pbpage, pblist) {
1064 if (does_collide_order((unsigned long)pbpage, 0)) {
1065 m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
1066 if (!m) {
1067 error = -ENOMEM;
1068 break;
1069 }
1070 memcpy(m, (void *)pbpage, PAGE_SIZE);
1071 if (pbpage == pblist)
1072 pblist = (struct pbe *)m;
1073 else
1074 tail->next = (struct pbe *)m;
1075
1076 eat_page((void *)pbpage);
1077 pbpage = (struct pbe *)m;
1078
1079 /* We have to link the PBEs again */
1080
1081 for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++)
1082 if (p->next) /* needed to save the end */
1083 p->next = p + 1;
1084
1085 rel++;
1086 }
1087 tail = pbpage + PB_PAGE_SKIP;
1088 }
1089
1090 if (error) {
1091 printk("\nswsusp: Out of memory\n\n");
1092 free_pagedir(pblist);
1093 free_eaten_memory();
1094 pblist = NULL;
1095 }
1096 else
1097 printk("swsusp: Relocated %d pages\n", rel);
1098
1099 return pblist;
1100}
1101
1102/**
1103 * Using bio to read from swap.
1104 * This code requires a bit more work than just using buffer heads
1105 * but, it is the recommended way for 2.5/2.6.
1106 * The following are to signal the beginning and end of I/O. Bios
1107 * finish asynchronously, while we want them to happen synchronously.
1108 * A simple atomic_t, and a wait loop take care of this problem.
1109 */
1110
1111static atomic_t io_done = ATOMIC_INIT(0);
1112
1113static int end_io(struct bio * bio, unsigned int num, int err)
1114{
1115 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1116 panic("I/O error reading memory image");
1117 atomic_set(&io_done, 0);
1118 return 0;
1119}
1120
1121static struct block_device * resume_bdev;
1122
1123/**
1124 * submit - submit BIO request.
1125 * @rw: READ or WRITE.
1126 * @off physical offset of page.
1127 * @page: page we're reading or writing.
1128 *
1129 * Straight from the textbook - allocate and initialize the bio.
1130 * If we're writing, make sure the page is marked as dirty.
1131 * Then submit it and wait.
1132 */
1133
1134static int submit(int rw, pgoff_t page_off, void * page)
1135{
1136 int error = 0;
1137 struct bio * bio;
1138
1139 bio = bio_alloc(GFP_ATOMIC, 1);
1140 if (!bio)
1141 return -ENOMEM;
1142 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
1143 bio_get(bio);
1144 bio->bi_bdev = resume_bdev;
1145 bio->bi_end_io = end_io;
1146
1147 if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
1148 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
1149 error = -EFAULT;
1150 goto Done;
1151 }
1152
1153 if (rw == WRITE)
1154 bio_set_pages_dirty(bio);
1155
1156 atomic_set(&io_done, 1);
1157 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
1158 while (atomic_read(&io_done))
1159 yield();
1160
1161 Done:
1162 bio_put(bio);
1163 return error;
1164}
1165
1166static int bio_read_page(pgoff_t page_off, void * page)
1167{
1168 return submit(READ, page_off, page);
1169}
1170
1171static int bio_write_page(pgoff_t page_off, void * page)
1172{
1173 return submit(WRITE, page_off, page);
1174}
1175
1176/*
1177 * Sanity check if this image makes sense with this kernel/swap context
1178 * I really don't think that it's foolproof but more than nothing..
1179 */
1180
1181static const char * sanity_check(void)
1182{
1183 dump_info();
1184 if(swsusp_info.version_code != LINUX_VERSION_CODE)
1185 return "kernel version";
1186 if(swsusp_info.num_physpages != num_physpages)
1187 return "memory size";
1188 if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
1189 return "system type";
1190 if (strcmp(swsusp_info.uts.release,system_utsname.release))
1191 return "kernel release";
1192 if (strcmp(swsusp_info.uts.version,system_utsname.version))
1193 return "version";
1194 if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
1195 return "machine";
1196 if(swsusp_info.cpus != num_online_cpus())
1197 return "number of cpus";
1198 return NULL;
1199}
1200
1201
1202static int check_header(void)
1203{
1204 const char * reason = NULL;
1205 int error;
1206
1207 if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
1208 return error;
1209
1210 /* Is this same machine? */
1211 if ((reason = sanity_check())) {
1212 printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
1213 return -EPERM;
1214 }
1215 nr_copy_pages = swsusp_info.image_pages;
1216 return error;
1217}
1218
1219static int check_sig(void)
1220{
1221 int error;
1222
1223 memset(&swsusp_header, 0, sizeof(swsusp_header));
1224 if ((error = bio_read_page(0, &swsusp_header)))
1225 return error;
1226 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
1227 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
1228
1229 /*
1230 * Reset swap signature now.
1231 */
1232 error = bio_write_page(0, &swsusp_header);
1233 } else {
1234 printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
1235 return -EINVAL;
1236 }
1237 if (!error)
1238 pr_debug("swsusp: Signature found, resuming\n");
1239 return error;
1240}
1241
1242/**
1243 * data_read - Read image pages from swap.
1244 *
1245 * You do not need to check for overlaps, check_pagedir()
1246 * already did that.
1247 */
1248
1249static int data_read(struct pbe *pblist)
1250{
1251 struct pbe * p;
1252 int error = 0;
1253 int i = 0;
1254 int mod = swsusp_info.image_pages / 100;
1255
1256 if (!mod)
1257 mod = 1;
1258
1259 printk("swsusp: Reading image data (%lu pages): ",
1260 swsusp_info.image_pages);
1261
1262 for_each_pbe (p, pblist) {
1263 if (!(i % mod))
1264 printk("\b\b\b\b%3d%%", i / mod);
1265
1266 error = bio_read_page(swp_offset(p->swap_address),
1267 (void *)p->address);
1268 if (error)
1269 return error;
1270
1271 i++;
1272 }
1273 printk("\b\b\b\bdone\n");
1274 return error;
1275}
1276
1277extern dev_t name_to_dev_t(const char *line);
1278
1279/**
1280 * read_pagedir - Read page backup list pages from swap
1281 */
1282
1283static int read_pagedir(struct pbe *pblist)
1284{
1285 struct pbe *pbpage, *p;
1286 unsigned i = 0;
1287 int error;
1288
1289 if (!pblist)
1290 return -EFAULT;
1291
1292 printk("swsusp: Reading pagedir (%lu pages)\n",
1293 swsusp_info.pagedir_pages);
1294
1295 for_each_pb_page (pbpage, pblist) {
1296 unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);
1297
1298 error = -EFAULT;
1299 if (offset) {
1300 p = (pbpage + PB_PAGE_SKIP)->next;
1301 error = bio_read_page(offset, (void *)pbpage);
1302 (pbpage + PB_PAGE_SKIP)->next = p;
1303 }
1304 if (error)
1305 break;
1306 }
1307
1308 if (error)
1309 free_page((unsigned long)pblist);
1310
1311 BUG_ON(i != swsusp_info.pagedir_pages);
1312
1313 return error;
1314}
1315
1316
1317static int check_suspend_image(void)
1318{
1319 int error = 0;
1320
1321 if ((error = check_sig()))
1322 return error;
1323
1324 if ((error = check_header()))
1325 return error;
1326
1327 return 0;
1328}
1329
1330static int read_suspend_image(void)
1331{
1332 int error = 0;
1333 struct pbe *p;
1334
1335 if (!(p = alloc_pagedir(nr_copy_pages)))
1336 return -ENOMEM;
1337
1338 if ((error = read_pagedir(p)))
1339 return error;
1340
1341 create_pbe_list(p, nr_copy_pages);
1342
1343 if (!(pagedir_nosave = swsusp_pagedir_relocate(p)))
1344 return -ENOMEM;
1345
1346 /* Allocate memory for the image and read the data from swap */
1347
1348 error = check_pagedir(pagedir_nosave);
1349 free_eaten_memory();
1350 if (!error)
1351 error = data_read(pagedir_nosave);
1352
1353 if (error) { /* We fail cleanly */
1354 for_each_pbe (p, pagedir_nosave)
1355 if (p->address) {
1356 free_page(p->address);
1357 p->address = 0UL;
1358 }
1359 free_pagedir(pagedir_nosave);
1360 }
1361 return error;
1362}
1363
1364/**
1365 * swsusp_check - Check for saved image in swap
1366 */
1367
1368int swsusp_check(void)
1369{
1370 int error;
1371
1372 if (!swsusp_resume_device) {
1373 if (!strlen(resume_file))
1374 return -ENOENT;
1375 swsusp_resume_device = name_to_dev_t(resume_file);
1376 pr_debug("swsusp: Resume From Partition %s\n", resume_file);
1377 } else {
1378 pr_debug("swsusp: Resume From Partition %d:%d\n",
1379 MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
1380 }
1381
1382 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
1383 if (!IS_ERR(resume_bdev)) {
1384 set_blocksize(resume_bdev, PAGE_SIZE);
1385 error = check_suspend_image();
1386 if (error)
1387 blkdev_put(resume_bdev);
1388 } else
1389 error = PTR_ERR(resume_bdev);
1390
1391 if (!error)
1392 pr_debug("swsusp: resume file found\n");
1393 else
1394 pr_debug("swsusp: Error %d check for resume file\n", error);
1395 return error;
1396}
1397
1398/**
1399 * swsusp_read - Read saved image from swap.
1400 */
1401
1402int swsusp_read(void)
1403{
1404 int error;
1405
1406 if (IS_ERR(resume_bdev)) {
1407 pr_debug("swsusp: block device not initialised\n");
1408 return PTR_ERR(resume_bdev);
1409 }
1410
1411 error = read_suspend_image();
1412 blkdev_put(resume_bdev);
1413
1414 if (!error)
1415 pr_debug("swsusp: Reading resume file was successful\n");
1416 else
1417 pr_debug("swsusp: Error %d resuming\n", error);
1418 return error;
1419}
1420
1421/**
1422 * swsusp_close - close swap device.
1423 */
1424
1425void swsusp_close(void)
1426{
1427 if (IS_ERR(resume_bdev)) {
1428 pr_debug("swsusp: block device not initialised\n");
1429 return;
1430 }
1431
1432 blkdev_put(resume_bdev);
1433}