aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/power
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/power')
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/disk.c20
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/pm.c21
-rw-r--r--kernel/power/power.h75
-rw-r--r--kernel/power/process.c61
-rw-r--r--kernel/power/snapshot.c335
-rw-r--r--kernel/power/swap.c544
-rw-r--r--kernel/power/swsusp.c887
-rw-r--r--kernel/power/user.c333
10 files changed, 1393 insertions, 887 deletions
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 04be7d0d96a7..8d0af3d37a4b 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ endif
5 5
6obj-y := main.o process.o console.o 6obj-y := main.o process.o console.o
7obj-$(CONFIG_PM_LEGACY) += pm.o 7obj-$(CONFIG_PM_LEGACY) += pm.o
8obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o 8obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
9 9
10obj-$(CONFIG_SUSPEND_SMP) += smp.o 10obj-$(CONFIG_SUSPEND_SMP) += smp.o
11 11
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 0b43847dc980..81d4d982f3f0 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,17 +22,6 @@
22#include "power.h" 22#include "power.h"
23 23
24 24
25extern suspend_disk_method_t pm_disk_mode;
26
27extern int swsusp_shrink_memory(void);
28extern int swsusp_suspend(void);
29extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages);
30extern int swsusp_check(void);
31extern int swsusp_read(struct pbe **pblist_ptr);
32extern void swsusp_close(void);
33extern int swsusp_resume(void);
34
35
36static int noresume = 0; 25static int noresume = 0;
37char resume_file[256] = CONFIG_PM_STD_PARTITION; 26char resume_file[256] = CONFIG_PM_STD_PARTITION;
38dev_t swsusp_resume_device; 27dev_t swsusp_resume_device;
@@ -70,10 +59,6 @@ static void power_down(suspend_disk_method_t mode)
70 while(1); 59 while(1);
71} 60}
72 61
73
74static int in_suspend __nosavedata = 0;
75
76
77static inline void platform_finish(void) 62static inline void platform_finish(void)
78{ 63{
79 if (pm_disk_mode == PM_DISK_PLATFORM) { 64 if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -87,7 +72,6 @@ static int prepare_processes(void)
87 int error; 72 int error;
88 73
89 pm_prepare_console(); 74 pm_prepare_console();
90 sys_sync();
91 disable_nonboot_cpus(); 75 disable_nonboot_cpus();
92 76
93 if (freeze_processes()) { 77 if (freeze_processes()) {
@@ -145,7 +129,7 @@ int pm_suspend_disk(void)
145 if (in_suspend) { 129 if (in_suspend) {
146 device_resume(); 130 device_resume();
147 pr_debug("PM: writing image.\n"); 131 pr_debug("PM: writing image.\n");
148 error = swsusp_write(pagedir_nosave, nr_copy_pages); 132 error = swsusp_write();
149 if (!error) 133 if (!error)
150 power_down(pm_disk_mode); 134 power_down(pm_disk_mode);
151 else { 135 else {
@@ -216,7 +200,7 @@ static int software_resume(void)
216 200
217 pr_debug("PM: Reading swsusp image.\n"); 201 pr_debug("PM: Reading swsusp image.\n");
218 202
219 if ((error = swsusp_read(&pagedir_nosave))) { 203 if ((error = swsusp_read())) {
220 swsusp_free(); 204 swsusp_free();
221 goto Thaw; 205 goto Thaw;
222 } 206 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 9cb235cba4a9..ee371f50ccaa 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -103,7 +103,7 @@ static int suspend_prepare(suspend_state_t state)
103} 103}
104 104
105 105
106static int suspend_enter(suspend_state_t state) 106int suspend_enter(suspend_state_t state)
107{ 107{
108 int error = 0; 108 int error = 0;
109 unsigned long flags; 109 unsigned long flags;
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index 33c508e857dd..0f6908cce1dd 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -25,6 +25,7 @@
25#include <linux/pm.h> 25#include <linux/pm.h>
26#include <linux/pm_legacy.h> 26#include <linux/pm_legacy.h>
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <linux/mutex.h>
28 29
29int pm_active; 30int pm_active;
30 31
@@ -40,7 +41,7 @@ int pm_active;
40 * until a resume but that will be fine. 41 * until a resume but that will be fine.
41 */ 42 */
42 43
43static DECLARE_MUTEX(pm_devs_lock); 44static DEFINE_MUTEX(pm_devs_lock);
44static LIST_HEAD(pm_devs); 45static LIST_HEAD(pm_devs);
45 46
46/** 47/**
@@ -67,9 +68,9 @@ struct pm_dev *pm_register(pm_dev_t type,
67 dev->id = id; 68 dev->id = id;
68 dev->callback = callback; 69 dev->callback = callback;
69 70
70 down(&pm_devs_lock); 71 mutex_lock(&pm_devs_lock);
71 list_add(&dev->entry, &pm_devs); 72 list_add(&dev->entry, &pm_devs);
72 up(&pm_devs_lock); 73 mutex_unlock(&pm_devs_lock);
73 } 74 }
74 return dev; 75 return dev;
75} 76}
@@ -85,9 +86,9 @@ struct pm_dev *pm_register(pm_dev_t type,
85void pm_unregister(struct pm_dev *dev) 86void pm_unregister(struct pm_dev *dev)
86{ 87{
87 if (dev) { 88 if (dev) {
88 down(&pm_devs_lock); 89 mutex_lock(&pm_devs_lock);
89 list_del(&dev->entry); 90 list_del(&dev->entry);
90 up(&pm_devs_lock); 91 mutex_unlock(&pm_devs_lock);
91 92
92 kfree(dev); 93 kfree(dev);
93 } 94 }
@@ -118,7 +119,7 @@ void pm_unregister_all(pm_callback callback)
118 if (!callback) 119 if (!callback)
119 return; 120 return;
120 121
121 down(&pm_devs_lock); 122 mutex_lock(&pm_devs_lock);
122 entry = pm_devs.next; 123 entry = pm_devs.next;
123 while (entry != &pm_devs) { 124 while (entry != &pm_devs) {
124 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); 125 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
@@ -126,7 +127,7 @@ void pm_unregister_all(pm_callback callback)
126 if (dev->callback == callback) 127 if (dev->callback == callback)
127 __pm_unregister(dev); 128 __pm_unregister(dev);
128 } 129 }
129 up(&pm_devs_lock); 130 mutex_unlock(&pm_devs_lock);
130} 131}
131 132
132/** 133/**
@@ -234,7 +235,7 @@ int pm_send_all(pm_request_t rqst, void *data)
234{ 235{
235 struct list_head *entry; 236 struct list_head *entry;
236 237
237 down(&pm_devs_lock); 238 mutex_lock(&pm_devs_lock);
238 entry = pm_devs.next; 239 entry = pm_devs.next;
239 while (entry != &pm_devs) { 240 while (entry != &pm_devs) {
240 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); 241 struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
@@ -246,13 +247,13 @@ int pm_send_all(pm_request_t rqst, void *data)
246 */ 247 */
247 if (rqst == PM_SUSPEND) 248 if (rqst == PM_SUSPEND)
248 pm_undo_all(dev); 249 pm_undo_all(dev);
249 up(&pm_devs_lock); 250 mutex_unlock(&pm_devs_lock);
250 return status; 251 return status;
251 } 252 }
252 } 253 }
253 entry = entry->next; 254 entry = entry->next;
254 } 255 }
255 up(&pm_devs_lock); 256 mutex_unlock(&pm_devs_lock);
256 return 0; 257 return 0;
257} 258}
258 259
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 388dba680841..f06f12f21767 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -8,6 +8,7 @@ struct swsusp_info {
8 int cpus; 8 int cpus;
9 unsigned long image_pages; 9 unsigned long image_pages;
10 unsigned long pages; 10 unsigned long pages;
11 unsigned long size;
11} __attribute__((aligned(PAGE_SIZE))); 12} __attribute__((aligned(PAGE_SIZE)));
12 13
13 14
@@ -37,21 +38,79 @@ extern struct subsystem power_subsys;
37/* References to section boundaries */ 38/* References to section boundaries */
38extern const void __nosave_begin, __nosave_end; 39extern const void __nosave_begin, __nosave_end;
39 40
40extern unsigned int nr_copy_pages;
41extern struct pbe *pagedir_nosave; 41extern struct pbe *pagedir_nosave;
42 42
43/* Preferred image size in bytes (default 500 MB) */ 43/* Preferred image size in bytes (default 500 MB) */
44extern unsigned long image_size; 44extern unsigned long image_size;
45extern int in_suspend;
46extern dev_t swsusp_resume_device;
45 47
46extern asmlinkage int swsusp_arch_suspend(void); 48extern asmlinkage int swsusp_arch_suspend(void);
47extern asmlinkage int swsusp_arch_resume(void); 49extern asmlinkage int swsusp_arch_resume(void);
48 50
49extern unsigned int count_data_pages(void); 51extern unsigned int count_data_pages(void);
50extern void free_pagedir(struct pbe *pblist); 52
51extern void release_eaten_pages(void); 53struct snapshot_handle {
52extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); 54 loff_t offset;
55 unsigned int page;
56 unsigned int page_offset;
57 unsigned int prev;
58 struct pbe *pbe;
59 void *buffer;
60 unsigned int buf_offset;
61};
62
63#define data_of(handle) ((handle).buffer + (handle).buf_offset)
64
65extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
66extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
67int snapshot_image_loaded(struct snapshot_handle *handle);
68
69#define SNAPSHOT_IOC_MAGIC '3'
70#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
71#define SNAPSHOT_UNFREEZE _IO(SNAPSHOT_IOC_MAGIC, 2)
72#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
73#define SNAPSHOT_ATOMIC_RESTORE _IO(SNAPSHOT_IOC_MAGIC, 4)
74#define SNAPSHOT_FREE _IO(SNAPSHOT_IOC_MAGIC, 5)
75#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
76#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
77#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
78#define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9)
79#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
80#define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11)
81#define SNAPSHOT_IOC_MAXNR 11
82
83/**
84 * The bitmap is used for tracing allocated swap pages
85 *
86 * The entire bitmap consists of a number of bitmap_page
87 * structures linked with the help of the .next member.
88 * Thus each page can be allocated individually, so we only
89 * need to make 0-order memory allocations to create
90 * the bitmap.
91 */
92
93#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *))
94#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long))
95#define BITS_PER_CHUNK (sizeof(long) * 8)
96#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
97
98struct bitmap_page {
99 unsigned long chunks[BITMAP_PAGE_CHUNKS];
100 struct bitmap_page *next;
101};
102
103extern void free_bitmap(struct bitmap_page *bitmap);
104extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
107
108extern int swsusp_check(void);
109extern int swsusp_shrink_memory(void);
53extern void swsusp_free(void); 110extern void swsusp_free(void);
54extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); 111extern int swsusp_suspend(void);
55extern unsigned int snapshot_nr_pages(void); 112extern int swsusp_resume(void);
56extern struct pbe *snapshot_pblist(void); 113extern int swsusp_read(void);
57extern void snapshot_pblist_set(struct pbe *pblist); 114extern int swsusp_write(void);
115extern void swsusp_close(void);
116extern int suspend_enter(suspend_state_t state);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 28de118f7a0b..8ac7c35fad77 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -12,11 +12,12 @@
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13#include <linux/suspend.h> 13#include <linux/suspend.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/syscalls.h>
15 16
16/* 17/*
17 * Timeout for stopping processes 18 * Timeout for stopping processes
18 */ 19 */
19#define TIMEOUT (6 * HZ) 20#define TIMEOUT (20 * HZ)
20 21
21 22
22static inline int freezeable(struct task_struct * p) 23static inline int freezeable(struct task_struct * p)
@@ -54,38 +55,62 @@ void refrigerator(void)
54 current->state = save; 55 current->state = save;
55} 56}
56 57
58static inline void freeze_process(struct task_struct *p)
59{
60 unsigned long flags;
61
62 if (!freezing(p)) {
63 freeze(p);
64 spin_lock_irqsave(&p->sighand->siglock, flags);
65 signal_wake_up(p, 0);
66 spin_unlock_irqrestore(&p->sighand->siglock, flags);
67 }
68}
69
57/* 0 = success, else # of processes that we failed to stop */ 70/* 0 = success, else # of processes that we failed to stop */
58int freeze_processes(void) 71int freeze_processes(void)
59{ 72{
60 int todo; 73 int todo, nr_user, user_frozen;
61 unsigned long start_time; 74 unsigned long start_time;
62 struct task_struct *g, *p; 75 struct task_struct *g, *p;
63 unsigned long flags; 76 unsigned long flags;
64 77
65 printk( "Stopping tasks: " ); 78 printk( "Stopping tasks: " );
66 start_time = jiffies; 79 start_time = jiffies;
80 user_frozen = 0;
67 do { 81 do {
68 todo = 0; 82 nr_user = todo = 0;
69 read_lock(&tasklist_lock); 83 read_lock(&tasklist_lock);
70 do_each_thread(g, p) { 84 do_each_thread(g, p) {
71 if (!freezeable(p)) 85 if (!freezeable(p))
72 continue; 86 continue;
73 if (frozen(p)) 87 if (frozen(p))
74 continue; 88 continue;
75 89 if (p->mm && !(p->flags & PF_BORROWED_MM)) {
76 freeze(p); 90 /* The task is a user-space one.
77 spin_lock_irqsave(&p->sighand->siglock, flags); 91 * Freeze it unless there's a vfork completion
78 signal_wake_up(p, 0); 92 * pending
79 spin_unlock_irqrestore(&p->sighand->siglock, flags); 93 */
80 todo++; 94 if (!p->vfork_done)
95 freeze_process(p);
96 nr_user++;
97 } else {
98 /* Freeze only if the user space is frozen */
99 if (user_frozen)
100 freeze_process(p);
101 todo++;
102 }
81 } while_each_thread(g, p); 103 } while_each_thread(g, p);
82 read_unlock(&tasklist_lock); 104 read_unlock(&tasklist_lock);
105 todo += nr_user;
106 if (!user_frozen && !nr_user) {
107 sys_sync();
108 start_time = jiffies;
109 }
110 user_frozen = !nr_user;
83 yield(); /* Yield is okay here */ 111 yield(); /* Yield is okay here */
84 if (todo && time_after(jiffies, start_time + TIMEOUT)) { 112 if (todo && time_after(jiffies, start_time + TIMEOUT))
85 printk( "\n" );
86 printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
87 break; 113 break;
88 }
89 } while(todo); 114 } while(todo);
90 115
91 /* This does not unfreeze processes that are already frozen 116 /* This does not unfreeze processes that are already frozen
@@ -94,8 +119,14 @@ int freeze_processes(void)
94 * but it cleans up leftover PF_FREEZE requests. 119 * but it cleans up leftover PF_FREEZE requests.
95 */ 120 */
96 if (todo) { 121 if (todo) {
122 printk( "\n" );
123 printk(KERN_ERR " stopping tasks timed out "
124 "after %d seconds (%d tasks remaining):\n",
125 TIMEOUT / HZ, todo);
97 read_lock(&tasklist_lock); 126 read_lock(&tasklist_lock);
98 do_each_thread(g, p) 127 do_each_thread(g, p) {
128 if (freezeable(p) && !frozen(p))
129 printk(KERN_ERR " %s\n", p->comm);
99 if (freezing(p)) { 130 if (freezing(p)) {
100 pr_debug(" clean up: %s\n", p->comm); 131 pr_debug(" clean up: %s\n", p->comm);
101 p->flags &= ~PF_FREEZE; 132 p->flags &= ~PF_FREEZE;
@@ -103,7 +134,7 @@ int freeze_processes(void)
103 recalc_sigpending_tsk(p); 134 recalc_sigpending_tsk(p);
104 spin_unlock_irqrestore(&p->sighand->siglock, flags); 135 spin_unlock_irqrestore(&p->sighand->siglock, flags);
105 } 136 }
106 while_each_thread(g, p); 137 } while_each_thread(g, p);
107 read_unlock(&tasklist_lock); 138 read_unlock(&tasklist_lock);
108 return todo; 139 return todo;
109 } 140 }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 8d5a5986d621..c5863d02c89e 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12 12
13#include <linux/version.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/suspend.h> 16#include <linux/suspend.h>
@@ -34,7 +35,9 @@
34#include "power.h" 35#include "power.h"
35 36
36struct pbe *pagedir_nosave; 37struct pbe *pagedir_nosave;
37unsigned int nr_copy_pages; 38static unsigned int nr_copy_pages;
39static unsigned int nr_meta_pages;
40static unsigned long *buffer;
38 41
39#ifdef CONFIG_HIGHMEM 42#ifdef CONFIG_HIGHMEM
40unsigned int count_highmem_pages(void) 43unsigned int count_highmem_pages(void)
@@ -80,7 +83,7 @@ static int save_highmem_zone(struct zone *zone)
80 void *kaddr; 83 void *kaddr;
81 unsigned long pfn = zone_pfn + zone->zone_start_pfn; 84 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
82 85
83 if (!(pfn%1000)) 86 if (!(pfn%10000))
84 printk("."); 87 printk(".");
85 if (!pfn_valid(pfn)) 88 if (!pfn_valid(pfn))
86 continue; 89 continue;
@@ -119,13 +122,15 @@ int save_highmem(void)
119 struct zone *zone; 122 struct zone *zone;
120 int res = 0; 123 int res = 0;
121 124
122 pr_debug("swsusp: Saving Highmem\n"); 125 pr_debug("swsusp: Saving Highmem");
126 drain_local_pages();
123 for_each_zone (zone) { 127 for_each_zone (zone) {
124 if (is_highmem(zone)) 128 if (is_highmem(zone))
125 res = save_highmem_zone(zone); 129 res = save_highmem_zone(zone);
126 if (res) 130 if (res)
127 return res; 131 return res;
128 } 132 }
133 printk("\n");
129 return 0; 134 return 0;
130} 135}
131 136
@@ -235,7 +240,7 @@ static void copy_data_pages(struct pbe *pblist)
235 * free_pagedir - free pages allocated with alloc_pagedir() 240 * free_pagedir - free pages allocated with alloc_pagedir()
236 */ 241 */
237 242
238void free_pagedir(struct pbe *pblist) 243static void free_pagedir(struct pbe *pblist)
239{ 244{
240 struct pbe *pbe; 245 struct pbe *pbe;
241 246
@@ -301,7 +306,7 @@ struct eaten_page {
301 306
302static struct eaten_page *eaten_pages = NULL; 307static struct eaten_page *eaten_pages = NULL;
303 308
304void release_eaten_pages(void) 309static void release_eaten_pages(void)
305{ 310{
306 struct eaten_page *p, *q; 311 struct eaten_page *p, *q;
307 312
@@ -376,7 +381,6 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
376 if (!nr_pages) 381 if (!nr_pages)
377 return NULL; 382 return NULL;
378 383
379 pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
380 pblist = alloc_image_page(gfp_mask, safe_needed); 384 pblist = alloc_image_page(gfp_mask, safe_needed);
381 /* FIXME: rewrite this ugly loop */ 385 /* FIXME: rewrite this ugly loop */
382 for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; 386 for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
@@ -388,7 +392,7 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
388 free_pagedir(pblist); 392 free_pagedir(pblist);
389 pblist = NULL; 393 pblist = NULL;
390 } else 394 } else
391 create_pbe_list(pblist, nr_pages); 395 create_pbe_list(pblist, nr_pages);
392 return pblist; 396 return pblist;
393} 397}
394 398
@@ -414,6 +418,10 @@ void swsusp_free(void)
414 } 418 }
415 } 419 }
416 } 420 }
421 nr_copy_pages = 0;
422 nr_meta_pages = 0;
423 pagedir_nosave = NULL;
424 buffer = NULL;
417} 425}
418 426
419 427
@@ -437,7 +445,7 @@ static int enough_free_mem(unsigned int nr_pages)
437 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); 445 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
438} 446}
439 447
440int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) 448static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
441{ 449{
442 struct pbe *p; 450 struct pbe *p;
443 451
@@ -504,7 +512,318 @@ asmlinkage int swsusp_save(void)
504 */ 512 */
505 513
506 nr_copy_pages = nr_pages; 514 nr_copy_pages = nr_pages;
515 nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT;
507 516
508 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); 517 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
509 return 0; 518 return 0;
510} 519}
520
521static void init_header(struct swsusp_info *info)
522{
523 memset(info, 0, sizeof(struct swsusp_info));
524 info->version_code = LINUX_VERSION_CODE;
525 info->num_physpages = num_physpages;
526 memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
527 info->cpus = num_online_cpus();
528 info->image_pages = nr_copy_pages;
529 info->pages = nr_copy_pages + nr_meta_pages + 1;
530 info->size = info->pages;
531 info->size <<= PAGE_SHIFT;
532}
533
534/**
535 * pack_orig_addresses - the .orig_address fields of the PBEs from the
536 * list starting at @pbe are stored in the array @buf[] (1 page)
537 */
538
539static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe)
540{
541 int j;
542
543 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
544 buf[j] = pbe->orig_address;
545 pbe = pbe->next;
546 }
547 if (!pbe)
548 for (; j < PAGE_SIZE / sizeof(long); j++)
549 buf[j] = 0;
550 return pbe;
551}
552
553/**
554 * snapshot_read_next - used for reading the system memory snapshot.
555 *
556 * On the first call to it @handle should point to a zeroed
557 * snapshot_handle structure. The structure gets updated and a pointer
558 * to it should be passed to this function every next time.
559 *
560 * The @count parameter should contain the number of bytes the caller
561 * wants to read from the snapshot. It must not be zero.
562 *
563 * On success the function returns a positive number. Then, the caller
564 * is allowed to read up to the returned number of bytes from the memory
565 * location computed by the data_of() macro. The number returned
566 * may be smaller than @count, but this only happens if the read would
567 * cross a page boundary otherwise.
568 *
569 * The function returns 0 to indicate the end of data stream condition,
570 * and a negative number is returned on error. In such cases the
571 * structure pointed to by @handle is not updated and should not be used
572 * any more.
573 */
574
575int snapshot_read_next(struct snapshot_handle *handle, size_t count)
576{
577 if (handle->page > nr_meta_pages + nr_copy_pages)
578 return 0;
579 if (!buffer) {
580 /* This makes the buffer be freed by swsusp_free() */
581 buffer = alloc_image_page(GFP_ATOMIC, 0);
582 if (!buffer)
583 return -ENOMEM;
584 }
585 if (!handle->offset) {
586 init_header((struct swsusp_info *)buffer);
587 handle->buffer = buffer;
588 handle->pbe = pagedir_nosave;
589 }
590 if (handle->prev < handle->page) {
591 if (handle->page <= nr_meta_pages) {
592 handle->pbe = pack_orig_addresses(buffer, handle->pbe);
593 if (!handle->pbe)
594 handle->pbe = pagedir_nosave;
595 } else {
596 handle->buffer = (void *)handle->pbe->address;
597 handle->pbe = handle->pbe->next;
598 }
599 handle->prev = handle->page;
600 }
601 handle->buf_offset = handle->page_offset;
602 if (handle->page_offset + count >= PAGE_SIZE) {
603 count = PAGE_SIZE - handle->page_offset;
604 handle->page_offset = 0;
605 handle->page++;
606 } else {
607 handle->page_offset += count;
608 }
609 handle->offset += count;
610 return count;
611}
612
613/**
614 * mark_unsafe_pages - mark the pages that cannot be used for storing
615 * the image during resume, because they conflict with the pages that
616 * had been used before suspend
617 */
618
619static int mark_unsafe_pages(struct pbe *pblist)
620{
621 struct zone *zone;
622 unsigned long zone_pfn;
623 struct pbe *p;
624
625 if (!pblist) /* a sanity check */
626 return -EINVAL;
627
628 /* Clear page flags */
629 for_each_zone (zone) {
630 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
631 if (pfn_valid(zone_pfn + zone->zone_start_pfn))
632 ClearPageNosaveFree(pfn_to_page(zone_pfn +
633 zone->zone_start_pfn));
634 }
635
636 /* Mark orig addresses */
637 for_each_pbe (p, pblist) {
638 if (virt_addr_valid(p->orig_address))
639 SetPageNosaveFree(virt_to_page(p->orig_address));
640 else
641 return -EFAULT;
642 }
643
644 return 0;
645}
646
647static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
648{
649 /* We assume both lists contain the same number of elements */
650 while (src) {
651 dst->orig_address = src->orig_address;
652 dst = dst->next;
653 src = src->next;
654 }
655}
656
657static int check_header(struct swsusp_info *info)
658{
659 char *reason = NULL;
660
661 if (info->version_code != LINUX_VERSION_CODE)
662 reason = "kernel version";
663 if (info->num_physpages != num_physpages)
664 reason = "memory size";
665 if (strcmp(info->uts.sysname,system_utsname.sysname))
666 reason = "system type";
667 if (strcmp(info->uts.release,system_utsname.release))
668 reason = "kernel release";
669 if (strcmp(info->uts.version,system_utsname.version))
670 reason = "version";
671 if (strcmp(info->uts.machine,system_utsname.machine))
672 reason = "machine";
673 if (reason) {
674 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
675 return -EPERM;
676 }
677 return 0;
678}
679
680/**
681 * load header - check the image header and copy data from it
682 */
683
684static int load_header(struct snapshot_handle *handle,
685 struct swsusp_info *info)
686{
687 int error;
688 struct pbe *pblist;
689
690 error = check_header(info);
691 if (!error) {
692 pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0);
693 if (!pblist)
694 return -ENOMEM;
695 pagedir_nosave = pblist;
696 handle->pbe = pblist;
697 nr_copy_pages = info->image_pages;
698 nr_meta_pages = info->pages - info->image_pages - 1;
699 }
700 return error;
701}
702
703/**
704 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
705 * the PBEs in the list starting at @pbe
706 */
707
708static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
709 struct pbe *pbe)
710{
711 int j;
712
713 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
714 pbe->orig_address = buf[j];
715 pbe = pbe->next;
716 }
717 return pbe;
718}
719
720/**
721 * create_image - use metadata contained in the PBE list
722 * pointed to by pagedir_nosave to mark the pages that will
723 * be overwritten in the process of restoring the system
724 * memory state from the image and allocate memory for
725 * the image avoiding these pages
726 */
727
728static int create_image(struct snapshot_handle *handle)
729{
730 int error = 0;
731 struct pbe *p, *pblist;
732
733 p = pagedir_nosave;
734 error = mark_unsafe_pages(p);
735 if (!error) {
736 pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
737 if (pblist)
738 copy_page_backup_list(pblist, p);
739 free_pagedir(p);
740 if (!pblist)
741 error = -ENOMEM;
742 }
743 if (!error)
744 error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
745 if (!error) {
746 release_eaten_pages();
747 pagedir_nosave = pblist;
748 } else {
749 pagedir_nosave = NULL;
750 handle->pbe = NULL;
751 nr_copy_pages = 0;
752 nr_meta_pages = 0;
753 }
754 return error;
755}
756
757/**
758 * snapshot_write_next - used for writing the system memory snapshot.
759 *
760 * On the first call to it @handle should point to a zeroed
761 * snapshot_handle structure. The structure gets updated and a pointer
762 * to it should be passed to this function every next time.
763 *
764 * The @count parameter should contain the number of bytes the caller
765 * wants to write to the image. It must not be zero.
766 *
767 * On success the function returns a positive number. Then, the caller
768 * is allowed to write up to the returned number of bytes to the memory
769 * location computed by the data_of() macro. The number returned
770 * may be smaller than @count, but this only happens if the write would
771 * cross a page boundary otherwise.
772 *
773 * The function returns 0 to indicate the "end of file" condition,
774 * and a negative number is returned on error. In such cases the
775 * structure pointed to by @handle is not updated and should not be used
776 * any more.
777 */
778
779int snapshot_write_next(struct snapshot_handle *handle, size_t count)
780{
781 int error = 0;
782
783 if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages)
784 return 0;
785 if (!buffer) {
786 /* This makes the buffer be freed by swsusp_free() */
787 buffer = alloc_image_page(GFP_ATOMIC, 0);
788 if (!buffer)
789 return -ENOMEM;
790 }
791 if (!handle->offset)
792 handle->buffer = buffer;
793 if (handle->prev < handle->page) {
794 if (!handle->prev) {
795 error = load_header(handle, (struct swsusp_info *)buffer);
796 if (error)
797 return error;
798 } else if (handle->prev <= nr_meta_pages) {
799 handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
800 if (!handle->pbe) {
801 error = create_image(handle);
802 if (error)
803 return error;
804 handle->pbe = pagedir_nosave;
805 handle->buffer = (void *)handle->pbe->address;
806 }
807 } else {
808 handle->pbe = handle->pbe->next;
809 handle->buffer = (void *)handle->pbe->address;
810 }
811 handle->prev = handle->page;
812 }
813 handle->buf_offset = handle->page_offset;
814 if (handle->page_offset + count >= PAGE_SIZE) {
815 count = PAGE_SIZE - handle->page_offset;
816 handle->page_offset = 0;
817 handle->page++;
818 } else {
819 handle->page_offset += count;
820 }
821 handle->offset += count;
822 return count;
823}
824
825int snapshot_image_loaded(struct snapshot_handle *handle)
826{
827 return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
828 handle->page <= nr_meta_pages + nr_copy_pages);
829}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
new file mode 100644
index 000000000000..9177f3f73a6c
--- /dev/null
+++ b/kernel/power/swap.c
@@ -0,0 +1,544 @@
1/*
2 * linux/kernel/power/swap.c
3 *
4 * This file provides functions for reading the suspend image from
5 * and writing it to a swap partition.
6 *
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9 *
10 * This file is released under the GPLv2.
11 *
12 */
13
14#include <linux/module.h>
15#include <linux/smp_lock.h>
16#include <linux/file.h>
17#include <linux/utsname.h>
18#include <linux/version.h>
19#include <linux/delay.h>
20#include <linux/bitops.h>
21#include <linux/genhd.h>
22#include <linux/device.h>
23#include <linux/buffer_head.h>
24#include <linux/bio.h>
25#include <linux/swap.h>
26#include <linux/swapops.h>
27#include <linux/pm.h>
28
29#include "power.h"
30
31extern char resume_file[];
32
33#define SWSUSP_SIG "S1SUSPEND"
34
35static struct swsusp_header {
36 char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
37 swp_entry_t image;
38 char orig_sig[10];
39 char sig[10];
40} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
41
42/*
43 * Saving part...
44 */
45
46static unsigned short root_swap = 0xffff;
47
48static int mark_swapfiles(swp_entry_t start)
49{
50 int error;
51
52 rw_swap_page_sync(READ,
53 swp_entry(root_swap, 0),
54 virt_to_page((unsigned long)&swsusp_header));
55 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
56 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
57 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
58 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
59 swsusp_header.image = start;
60 error = rw_swap_page_sync(WRITE,
61 swp_entry(root_swap, 0),
62 virt_to_page((unsigned long)
63 &swsusp_header));
64 } else {
65 pr_debug("swsusp: Partition is not swap space.\n");
66 error = -ENODEV;
67 }
68 return error;
69}
70
71/**
72 * swsusp_swap_check - check if the resume device is a swap device
73 * and get its index (if so)
74 */
75
76static int swsusp_swap_check(void) /* This is called before saving image */
77{
78 int res = swap_type_of(swsusp_resume_device);
79
80 if (res >= 0) {
81 root_swap = res;
82 return 0;
83 }
84 return res;
85}
86
87/**
88 * write_page - Write one page to given swap location.
89 * @buf: Address we're writing.
90 * @offset: Offset of the swap page we're writing to.
91 */
92
93static int write_page(void *buf, unsigned long offset)
94{
95 swp_entry_t entry;
96 int error = -ENOSPC;
97
98 if (offset) {
99 entry = swp_entry(root_swap, offset);
100 error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf));
101 }
102 return error;
103}
104
105/*
106 * The swap map is a data structure used for keeping track of each page
107 * written to a swap partition. It consists of many swap_map_page
108 * structures that contain each an array of MAP_PAGE_SIZE swap entries.
109 * These structures are stored on the swap and linked together with the
110 * help of the .next_swap member.
111 *
112 * The swap map is created during suspend. The swap map pages are
113 * allocated and populated one at a time, so we only need one memory
114 * page to set up the entire structure.
115 *
116 * During resume we also only need to use one swap_map_page structure
117 * at a time.
118 */
119
120#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1)
121
122struct swap_map_page {
123 unsigned long entries[MAP_PAGE_ENTRIES];
124 unsigned long next_swap;
125};
126
127/**
128 * The swap_map_handle structure is used for handling swap in
129 * a file-alike way
130 */
131
132struct swap_map_handle {
133 struct swap_map_page *cur;
134 unsigned long cur_swap;
135 struct bitmap_page *bitmap;
136 unsigned int k;
137};
138
139static void release_swap_writer(struct swap_map_handle *handle)
140{
141 if (handle->cur)
142 free_page((unsigned long)handle->cur);
143 handle->cur = NULL;
144 if (handle->bitmap)
145 free_bitmap(handle->bitmap);
146 handle->bitmap = NULL;
147}
148
149static int get_swap_writer(struct swap_map_handle *handle)
150{
151 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
152 if (!handle->cur)
153 return -ENOMEM;
154 handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0));
155 if (!handle->bitmap) {
156 release_swap_writer(handle);
157 return -ENOMEM;
158 }
159 handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap);
160 if (!handle->cur_swap) {
161 release_swap_writer(handle);
162 return -ENOSPC;
163 }
164 handle->k = 0;
165 return 0;
166}
167
168static int swap_write_page(struct swap_map_handle *handle, void *buf)
169{
170 int error;
171 unsigned long offset;
172
173 if (!handle->cur)
174 return -EINVAL;
175 offset = alloc_swap_page(root_swap, handle->bitmap);
176 error = write_page(buf, offset);
177 if (error)
178 return error;
179 handle->cur->entries[handle->k++] = offset;
180 if (handle->k >= MAP_PAGE_ENTRIES) {
181 offset = alloc_swap_page(root_swap, handle->bitmap);
182 if (!offset)
183 return -ENOSPC;
184 handle->cur->next_swap = offset;
185 error = write_page(handle->cur, handle->cur_swap);
186 if (error)
187 return error;
188 memset(handle->cur, 0, PAGE_SIZE);
189 handle->cur_swap = offset;
190 handle->k = 0;
191 }
192 return 0;
193}
194
195static int flush_swap_writer(struct swap_map_handle *handle)
196{
197 if (handle->cur && handle->cur_swap)
198 return write_page(handle->cur, handle->cur_swap);
199 else
200 return -EINVAL;
201}
202
203/**
204 * save_image - save the suspend image data
205 */
206
207static int save_image(struct swap_map_handle *handle,
208 struct snapshot_handle *snapshot,
209 unsigned int nr_pages)
210{
211 unsigned int m;
212 int ret;
213 int error = 0;
214
215 printk("Saving image data pages (%u pages) ... ", nr_pages);
216 m = nr_pages / 100;
217 if (!m)
218 m = 1;
219 nr_pages = 0;
220 do {
221 ret = snapshot_read_next(snapshot, PAGE_SIZE);
222 if (ret > 0) {
223 error = swap_write_page(handle, data_of(*snapshot));
224 if (error)
225 break;
226 if (!(nr_pages % m))
227 printk("\b\b\b\b%3d%%", nr_pages / m);
228 nr_pages++;
229 }
230 } while (ret > 0);
231 if (!error)
232 printk("\b\b\b\bdone\n");
233 return error;
234}
235
236/**
237 * enough_swap - Make sure we have enough swap to save the image.
238 *
239 * Returns TRUE or FALSE after checking the total amount of swap
240 * space avaiable from the resume partition.
241 */
242
243static int enough_swap(unsigned int nr_pages)
244{
245 unsigned int free_swap = count_swap_pages(root_swap, 1);
246
247 pr_debug("swsusp: free swap pages: %u\n", free_swap);
248 return free_swap > (nr_pages + PAGES_FOR_IO +
249 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
250}
251
252/**
253 * swsusp_write - Write entire image and metadata.
254 *
255 * It is important _NOT_ to umount filesystems at this point. We want
256 * them synced (in case something goes wrong) but we DO not want to mark
257 * filesystem clean: it is not. (And it does not matter, if we resume
258 * correctly, we'll mark system clean, anyway.)
259 */
260
261int swsusp_write(void)
262{
263 struct swap_map_handle handle;
264 struct snapshot_handle snapshot;
265 struct swsusp_info *header;
266 unsigned long start;
267 int error;
268
269 if ((error = swsusp_swap_check())) {
270 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
271 return error;
272 }
273 memset(&snapshot, 0, sizeof(struct snapshot_handle));
274 error = snapshot_read_next(&snapshot, PAGE_SIZE);
275 if (error < PAGE_SIZE)
276 return error < 0 ? error : -EFAULT;
277 header = (struct swsusp_info *)data_of(snapshot);
278 if (!enough_swap(header->pages)) {
279 printk(KERN_ERR "swsusp: Not enough free swap\n");
280 return -ENOSPC;
281 }
282 error = get_swap_writer(&handle);
283 if (!error) {
284 start = handle.cur_swap;
285 error = swap_write_page(&handle, header);
286 }
287 if (!error)
288 error = save_image(&handle, &snapshot, header->pages - 1);
289 if (!error) {
290 flush_swap_writer(&handle);
291 printk("S");
292 error = mark_swapfiles(swp_entry(root_swap, start));
293 printk("|\n");
294 }
295 if (error)
296 free_all_swap_pages(root_swap, handle.bitmap);
297 release_swap_writer(&handle);
298 return error;
299}
300
301/*
302 * Using bio to read from swap.
303 * This code requires a bit more work than just using buffer heads
304 * but, it is the recommended way for 2.5/2.6.
305 * The following are to signal the beginning and end of I/O. Bios
306 * finish asynchronously, while we want them to happen synchronously.
307 * A simple atomic_t, and a wait loop take care of this problem.
308 */
309
310static atomic_t io_done = ATOMIC_INIT(0);
311
312static int end_io(struct bio *bio, unsigned int num, int err)
313{
314 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
315 panic("I/O error reading memory image");
316 atomic_set(&io_done, 0);
317 return 0;
318}
319
320static struct block_device *resume_bdev;
321
322/**
323 * submit - submit BIO request.
324 * @rw: READ or WRITE.
325 * @off physical offset of page.
326 * @page: page we're reading or writing.
327 *
328 * Straight from the textbook - allocate and initialize the bio.
329 * If we're writing, make sure the page is marked as dirty.
330 * Then submit it and wait.
331 */
332
333static int submit(int rw, pgoff_t page_off, void *page)
334{
335 int error = 0;
336 struct bio *bio;
337
338 bio = bio_alloc(GFP_ATOMIC, 1);
339 if (!bio)
340 return -ENOMEM;
341 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
342 bio->bi_bdev = resume_bdev;
343 bio->bi_end_io = end_io;
344
345 if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
346 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
347 error = -EFAULT;
348 goto Done;
349 }
350
351 atomic_set(&io_done, 1);
352 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
353 while (atomic_read(&io_done))
354 yield();
355 if (rw == READ)
356 bio_set_pages_dirty(bio);
357 Done:
358 bio_put(bio);
359 return error;
360}
361
362static int bio_read_page(pgoff_t page_off, void *page)
363{
364 return submit(READ, page_off, page);
365}
366
367static int bio_write_page(pgoff_t page_off, void *page)
368{
369 return submit(WRITE, page_off, page);
370}
371
372/**
373 * The following functions allow us to read data using a swap map
374 * in a file-alike way
375 */
376
377static void release_swap_reader(struct swap_map_handle *handle)
378{
379 if (handle->cur)
380 free_page((unsigned long)handle->cur);
381 handle->cur = NULL;
382}
383
384static int get_swap_reader(struct swap_map_handle *handle,
385 swp_entry_t start)
386{
387 int error;
388
389 if (!swp_offset(start))
390 return -EINVAL;
391 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
392 if (!handle->cur)
393 return -ENOMEM;
394 error = bio_read_page(swp_offset(start), handle->cur);
395 if (error) {
396 release_swap_reader(handle);
397 return error;
398 }
399 handle->k = 0;
400 return 0;
401}
402
403static int swap_read_page(struct swap_map_handle *handle, void *buf)
404{
405 unsigned long offset;
406 int error;
407
408 if (!handle->cur)
409 return -EINVAL;
410 offset = handle->cur->entries[handle->k];
411 if (!offset)
412 return -EFAULT;
413 error = bio_read_page(offset, buf);
414 if (error)
415 return error;
416 if (++handle->k >= MAP_PAGE_ENTRIES) {
417 handle->k = 0;
418 offset = handle->cur->next_swap;
419 if (!offset)
420 release_swap_reader(handle);
421 else
422 error = bio_read_page(offset, handle->cur);
423 }
424 return error;
425}
426
427/**
428 * load_image - load the image using the swap map handle
429 * @handle and the snapshot handle @snapshot
430 * (assume there are @nr_pages pages to load)
431 */
432
433static int load_image(struct swap_map_handle *handle,
434 struct snapshot_handle *snapshot,
435 unsigned int nr_pages)
436{
437 unsigned int m;
438 int ret;
439 int error = 0;
440
441 printk("Loading image data pages (%u pages) ... ", nr_pages);
442 m = nr_pages / 100;
443 if (!m)
444 m = 1;
445 nr_pages = 0;
446 do {
447 ret = snapshot_write_next(snapshot, PAGE_SIZE);
448 if (ret > 0) {
449 error = swap_read_page(handle, data_of(*snapshot));
450 if (error)
451 break;
452 if (!(nr_pages % m))
453 printk("\b\b\b\b%3d%%", nr_pages / m);
454 nr_pages++;
455 }
456 } while (ret > 0);
457 if (!error)
458 printk("\b\b\b\bdone\n");
459 if (!snapshot_image_loaded(snapshot))
460 error = -ENODATA;
461 return error;
462}
463
464int swsusp_read(void)
465{
466 int error;
467 struct swap_map_handle handle;
468 struct snapshot_handle snapshot;
469 struct swsusp_info *header;
470
471 if (IS_ERR(resume_bdev)) {
472 pr_debug("swsusp: block device not initialised\n");
473 return PTR_ERR(resume_bdev);
474 }
475
476 memset(&snapshot, 0, sizeof(struct snapshot_handle));
477 error = snapshot_write_next(&snapshot, PAGE_SIZE);
478 if (error < PAGE_SIZE)
479 return error < 0 ? error : -EFAULT;
480 header = (struct swsusp_info *)data_of(snapshot);
481 error = get_swap_reader(&handle, swsusp_header.image);
482 if (!error)
483 error = swap_read_page(&handle, header);
484 if (!error)
485 error = load_image(&handle, &snapshot, header->pages - 1);
486 release_swap_reader(&handle);
487
488 blkdev_put(resume_bdev);
489
490 if (!error)
491 pr_debug("swsusp: Reading resume file was successful\n");
492 else
493 pr_debug("swsusp: Error %d resuming\n", error);
494 return error;
495}
496
497/**
498 * swsusp_check - Check for swsusp signature in the resume device
499 */
500
501int swsusp_check(void)
502{
503 int error;
504
505 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
506 if (!IS_ERR(resume_bdev)) {
507 set_blocksize(resume_bdev, PAGE_SIZE);
508 memset(&swsusp_header, 0, sizeof(swsusp_header));
509 if ((error = bio_read_page(0, &swsusp_header)))
510 return error;
511 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
512 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
513 /* Reset swap signature now */
514 error = bio_write_page(0, &swsusp_header);
515 } else {
516 return -EINVAL;
517 }
518 if (error)
519 blkdev_put(resume_bdev);
520 else
521 pr_debug("swsusp: Signature found, resuming\n");
522 } else {
523 error = PTR_ERR(resume_bdev);
524 }
525
526 if (error)
527 pr_debug("swsusp: Error %d check for resume file\n", error);
528
529 return error;
530}
531
532/**
533 * swsusp_close - close swap device.
534 */
535
536void swsusp_close(void)
537{
538 if (IS_ERR(resume_bdev)) {
539 pr_debug("swsusp: block device not initialised\n");
540 return;
541 }
542
543 blkdev_put(resume_bdev);
544}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 2d9d08f72f76..c4016cbbd3e0 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -31,41 +31,24 @@
31 * Fixed runaway init 31 * Fixed runaway init
32 * 32 *
33 * Rafael J. Wysocki <rjw@sisk.pl> 33 * Rafael J. Wysocki <rjw@sisk.pl>
34 * Added the swap map data structure and reworked the handling of swap 34 * Reworked the freeing of memory and the handling of swap
35 * 35 *
36 * More state savers are welcome. Especially for the scsi layer... 36 * More state savers are welcome. Especially for the scsi layer...
37 * 37 *
38 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt 38 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39 */ 39 */
40 40
41#include <linux/module.h>
42#include <linux/mm.h> 41#include <linux/mm.h>
43#include <linux/suspend.h> 42#include <linux/suspend.h>
44#include <linux/smp_lock.h>
45#include <linux/file.h>
46#include <linux/utsname.h>
47#include <linux/version.h>
48#include <linux/delay.h>
49#include <linux/bitops.h>
50#include <linux/spinlock.h> 43#include <linux/spinlock.h>
51#include <linux/genhd.h>
52#include <linux/kernel.h> 44#include <linux/kernel.h>
53#include <linux/major.h> 45#include <linux/major.h>
54#include <linux/swap.h> 46#include <linux/swap.h>
55#include <linux/pm.h> 47#include <linux/pm.h>
56#include <linux/device.h>
57#include <linux/buffer_head.h>
58#include <linux/swapops.h> 48#include <linux/swapops.h>
59#include <linux/bootmem.h> 49#include <linux/bootmem.h>
60#include <linux/syscalls.h> 50#include <linux/syscalls.h>
61#include <linux/highmem.h> 51#include <linux/highmem.h>
62#include <linux/bio.h>
63
64#include <asm/uaccess.h>
65#include <asm/mmu_context.h>
66#include <asm/pgtable.h>
67#include <asm/tlbflush.h>
68#include <asm/io.h>
69 52
70#include "power.h" 53#include "power.h"
71 54
@@ -77,6 +60,8 @@
77 */ 60 */
78unsigned long image_size = 500 * 1024 * 1024; 61unsigned long image_size = 500 * 1024 * 1024;
79 62
63int in_suspend __nosavedata = 0;
64
80#ifdef CONFIG_HIGHMEM 65#ifdef CONFIG_HIGHMEM
81unsigned int count_highmem_pages(void); 66unsigned int count_highmem_pages(void);
82int save_highmem(void); 67int save_highmem(void);
@@ -87,471 +72,97 @@ static int restore_highmem(void) { return 0; }
87static unsigned int count_highmem_pages(void) { return 0; } 72static unsigned int count_highmem_pages(void) { return 0; }
88#endif 73#endif
89 74
90extern char resume_file[];
91
92#define SWSUSP_SIG "S1SUSPEND"
93
94static struct swsusp_header {
95 char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
96 swp_entry_t image;
97 char orig_sig[10];
98 char sig[10];
99} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
100
101static struct swsusp_info swsusp_info;
102
103/*
104 * Saving part...
105 */
106
107static unsigned short root_swap = 0xffff;
108
109static int mark_swapfiles(swp_entry_t start)
110{
111 int error;
112
113 rw_swap_page_sync(READ,
114 swp_entry(root_swap, 0),
115 virt_to_page((unsigned long)&swsusp_header));
116 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
117 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
118 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
119 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120 swsusp_header.image = start;
121 error = rw_swap_page_sync(WRITE,
122 swp_entry(root_swap, 0),
123 virt_to_page((unsigned long)
124 &swsusp_header));
125 } else {
126 pr_debug("swsusp: Partition is not swap space.\n");
127 error = -ENODEV;
128 }
129 return error;
130}
131
132/*
133 * Check whether the swap device is the specified resume
134 * device, irrespective of whether they are specified by
135 * identical names.
136 *
137 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
138 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
139 * and they'll be considered the same device. This is *necessary* for
140 * devfs, since the resume code can only recognize the form /dev/hda4,
141 * but the suspend code would see the long name.)
142 */
143static inline int is_resume_device(const struct swap_info_struct *swap_info)
144{
145 struct file *file = swap_info->swap_file;
146 struct inode *inode = file->f_dentry->d_inode;
147
148 return S_ISBLK(inode->i_mode) &&
149 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
150}
151
152static int swsusp_swap_check(void) /* This is called before saving image */
153{
154 int i;
155
156 spin_lock(&swap_lock);
157 for (i = 0; i < MAX_SWAPFILES; i++) {
158 if (!(swap_info[i].flags & SWP_WRITEOK))
159 continue;
160 if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
161 spin_unlock(&swap_lock);
162 root_swap = i;
163 return 0;
164 }
165 }
166 spin_unlock(&swap_lock);
167 return -ENODEV;
168}
169
170/**
171 * write_page - Write one page to a fresh swap location.
172 * @addr: Address we're writing.
173 * @loc: Place to store the entry we used.
174 *
175 * Allocate a new swap entry and 'sync' it. Note we discard -EIO
176 * errors. That is an artifact left over from swsusp. It did not
177 * check the return of rw_swap_page_sync() at all, since most pages
178 * written back to swap would return -EIO.
179 * This is a partial improvement, since we will at least return other
180 * errors, though we need to eventually fix the damn code.
181 */
182static int write_page(unsigned long addr, swp_entry_t *loc)
183{
184 swp_entry_t entry;
185 int error = -ENOSPC;
186
187 entry = get_swap_page_of_type(root_swap);
188 if (swp_offset(entry)) {
189 error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
190 if (!error || error == -EIO)
191 *loc = entry;
192 }
193 return error;
194}
195
196/** 75/**
197 * Swap map-handling functions 76 * The following functions are used for tracing the allocated
198 * 77 * swap pages, so that they can be freed in case of an error.
199 * The swap map is a data structure used for keeping track of each page
200 * written to the swap. It consists of many swap_map_page structures
201 * that contain each an array of MAP_PAGE_SIZE swap entries.
202 * These structures are linked together with the help of either the
203 * .next (in memory) or the .next_swap (in swap) member.
204 * 78 *
205 * The swap map is created during suspend. At that time we need to keep 79 * The functions operate on a linked bitmap structure defined
206 * it in memory, because we have to free all of the allocated swap 80 * in power.h
207 * entries if an error occurs. The memory needed is preallocated
208 * so that we know in advance if there's enough of it.
209 *
210 * The first swap_map_page structure is filled with the swap entries that
211 * correspond to the first MAP_PAGE_SIZE data pages written to swap and
212 * so on. After the all of the data pages have been written, the order
213 * of the swap_map_page structures in the map is reversed so that they
214 * can be read from swap in the original order. This causes the data
215 * pages to be loaded in exactly the same order in which they have been
216 * saved.
217 *
218 * During resume we only need to use one swap_map_page structure
219 * at a time, which means that we only need to use two memory pages for
220 * reading the image - one for reading the swap_map_page structures
221 * and the second for reading the data pages from swap.
222 */ 81 */
223 82
224#define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \ 83void free_bitmap(struct bitmap_page *bitmap)
225 / sizeof(swp_entry_t))
226
227struct swap_map_page {
228 swp_entry_t entries[MAP_PAGE_SIZE];
229 swp_entry_t next_swap;
230 struct swap_map_page *next;
231};
232
233static inline void free_swap_map(struct swap_map_page *swap_map)
234{ 84{
235 struct swap_map_page *swp; 85 struct bitmap_page *bp;
236 86
237 while (swap_map) { 87 while (bitmap) {
238 swp = swap_map->next; 88 bp = bitmap->next;
239 free_page((unsigned long)swap_map); 89 free_page((unsigned long)bitmap);
240 swap_map = swp; 90 bitmap = bp;
241 } 91 }
242} 92}
243 93
244static struct swap_map_page *alloc_swap_map(unsigned int nr_pages) 94struct bitmap_page *alloc_bitmap(unsigned int nr_bits)
245{ 95{
246 struct swap_map_page *swap_map, *swp; 96 struct bitmap_page *bitmap, *bp;
247 unsigned n = 0; 97 unsigned int n;
248 98
249 if (!nr_pages) 99 if (!nr_bits)
250 return NULL; 100 return NULL;
251 101
252 pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages); 102 bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
253 swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); 103 bp = bitmap;
254 swp = swap_map; 104 for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) {
255 for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) { 105 bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
256 swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); 106 bp = bp->next;
257 swp = swp->next; 107 if (!bp) {
258 if (!swp) { 108 free_bitmap(bitmap);
259 free_swap_map(swap_map);
260 return NULL; 109 return NULL;
261 } 110 }
262 } 111 }
263 return swap_map; 112 return bitmap;
264} 113}
265 114
266/** 115static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
267 * reverse_swap_map - reverse the order of pages in the swap map
268 * @swap_map
269 */
270
271static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
272{
273 struct swap_map_page *prev, *next;
274
275 prev = NULL;
276 while (swap_map) {
277 next = swap_map->next;
278 swap_map->next = prev;
279 prev = swap_map;
280 swap_map = next;
281 }
282 return prev;
283}
284
285/**
286 * free_swap_map_entries - free the swap entries allocated to store
287 * the swap map @swap_map (this is only called in case of an error)
288 */
289static inline void free_swap_map_entries(struct swap_map_page *swap_map)
290{
291 while (swap_map) {
292 if (swap_map->next_swap.val)
293 swap_free(swap_map->next_swap);
294 swap_map = swap_map->next;
295 }
296}
297
298/**
299 * save_swap_map - save the swap map used for tracing the data pages
300 * stored in the swap
301 */
302
303static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
304{
305 swp_entry_t entry = (swp_entry_t){0};
306 int error;
307
308 while (swap_map) {
309 swap_map->next_swap = entry;
310 if ((error = write_page((unsigned long)swap_map, &entry)))
311 return error;
312 swap_map = swap_map->next;
313 }
314 *start = entry;
315 return 0;
316}
317
318/**
319 * free_image_entries - free the swap entries allocated to store
320 * the image data pages (this is only called in case of an error)
321 */
322
323static inline void free_image_entries(struct swap_map_page *swp)
324{ 116{
325 unsigned k; 117 unsigned int n;
326 118
327 while (swp) { 119 n = BITMAP_PAGE_BITS;
328 for (k = 0; k < MAP_PAGE_SIZE; k++) 120 while (bitmap && n <= bit) {
329 if (swp->entries[k].val) 121 n += BITMAP_PAGE_BITS;
330 swap_free(swp->entries[k]); 122 bitmap = bitmap->next;
331 swp = swp->next;
332 } 123 }
333} 124 if (!bitmap)
334 125 return -EINVAL;
335/** 126 n -= BITMAP_PAGE_BITS;
336 * The swap_map_handle structure is used for handling the swap map in 127 bit -= n;
337 * a file-alike way 128 n = 0;
338 */ 129 while (bit >= BITS_PER_CHUNK) {
339 130 bit -= BITS_PER_CHUNK;
340struct swap_map_handle { 131 n++;
341 struct swap_map_page *cur;
342 unsigned int k;
343};
344
345static inline void init_swap_map_handle(struct swap_map_handle *handle,
346 struct swap_map_page *map)
347{
348 handle->cur = map;
349 handle->k = 0;
350}
351
352static inline int swap_map_write_page(struct swap_map_handle *handle,
353 unsigned long addr)
354{
355 int error;
356
357 error = write_page(addr, handle->cur->entries + handle->k);
358 if (error)
359 return error;
360 if (++handle->k >= MAP_PAGE_SIZE) {
361 handle->cur = handle->cur->next;
362 handle->k = 0;
363 } 132 }
133 bitmap->chunks[n] |= (1UL << bit);
364 return 0; 134 return 0;
365} 135}
366 136
367/** 137unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap)
368 * save_image_data - save the data pages pointed to by the PBEs
369 * from the list @pblist using the swap map handle @handle
370 * (assume there are @nr_pages data pages to save)
371 */
372
373static int save_image_data(struct pbe *pblist,
374 struct swap_map_handle *handle,
375 unsigned int nr_pages)
376{
377 unsigned int m;
378 struct pbe *p;
379 int error = 0;
380
381 printk("Saving image data pages (%u pages) ... ", nr_pages);
382 m = nr_pages / 100;
383 if (!m)
384 m = 1;
385 nr_pages = 0;
386 for_each_pbe (p, pblist) {
387 error = swap_map_write_page(handle, p->address);
388 if (error)
389 break;
390 if (!(nr_pages % m))
391 printk("\b\b\b\b%3d%%", nr_pages / m);
392 nr_pages++;
393 }
394 if (!error)
395 printk("\b\b\b\bdone\n");
396 return error;
397}
398
399static void dump_info(void)
400{
401 pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
402 pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
403 pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
404 pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
405 pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
406 pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
407 pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
408 pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
409 pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
410 pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
411 pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
412}
413
414static void init_header(unsigned int nr_pages)
415{
416 memset(&swsusp_info, 0, sizeof(swsusp_info));
417 swsusp_info.version_code = LINUX_VERSION_CODE;
418 swsusp_info.num_physpages = num_physpages;
419 memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
420
421 swsusp_info.cpus = num_online_cpus();
422 swsusp_info.image_pages = nr_pages;
423 swsusp_info.pages = nr_pages +
424 ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
425}
426
427/**
428 * pack_orig_addresses - the .orig_address fields of the PBEs from the
429 * list starting at @pbe are stored in the array @buf[] (1 page)
430 */
431
432static inline struct pbe *pack_orig_addresses(unsigned long *buf,
433 struct pbe *pbe)
434{
435 int j;
436
437 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
438 buf[j] = pbe->orig_address;
439 pbe = pbe->next;
440 }
441 if (!pbe)
442 for (; j < PAGE_SIZE / sizeof(long); j++)
443 buf[j] = 0;
444 return pbe;
445}
446
447/**
448 * save_image_metadata - save the .orig_address fields of the PBEs
449 * from the list @pblist using the swap map handle @handle
450 */
451
452static int save_image_metadata(struct pbe *pblist,
453 struct swap_map_handle *handle)
454{ 138{
455 unsigned long *buf; 139 unsigned long offset;
456 unsigned int n = 0;
457 struct pbe *p;
458 int error = 0;
459 140
460 printk("Saving image metadata ... "); 141 offset = swp_offset(get_swap_page_of_type(swap));
461 buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); 142 if (offset) {
462 if (!buf) 143 if (bitmap_set(bitmap, offset)) {
463 return -ENOMEM; 144 swap_free(swp_entry(swap, offset));
464 p = pblist; 145 offset = 0;
465 while (p) { 146 }
466 p = pack_orig_addresses(buf, p);
467 error = swap_map_write_page(handle, (unsigned long)buf);
468 if (error)
469 break;
470 n++;
471 } 147 }
472 free_page((unsigned long)buf); 148 return offset;
473 if (!error)
474 printk("done (%u pages saved)\n", n);
475 return error;
476} 149}
477 150
478/** 151void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
479 * enough_swap - Make sure we have enough swap to save the image.
480 *
481 * Returns TRUE or FALSE after checking the total amount of swap
482 * space avaiable from the resume partition.
483 */
484
485static int enough_swap(unsigned int nr_pages)
486{ 152{
487 unsigned int free_swap = swap_info[root_swap].pages - 153 unsigned int bit, n;
488 swap_info[root_swap].inuse_pages; 154 unsigned long test;
489
490 pr_debug("swsusp: free swap pages: %u\n", free_swap);
491 return free_swap > (nr_pages + PAGES_FOR_IO +
492 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
493}
494 155
495/** 156 bit = 0;
496 * swsusp_write - Write entire image and metadata. 157 while (bitmap) {
497 * 158 for (n = 0; n < BITMAP_PAGE_CHUNKS; n++)
498 * It is important _NOT_ to umount filesystems at this point. We want 159 for (test = 1UL; test; test <<= 1) {
499 * them synced (in case something goes wrong) but we DO not want to mark 160 if (bitmap->chunks[n] & test)
500 * filesystem clean: it is not. (And it does not matter, if we resume 161 swap_free(swp_entry(swap, bit));
501 * correctly, we'll mark system clean, anyway.) 162 bit++;
502 */ 163 }
503 164 bitmap = bitmap->next;
504int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
505{
506 struct swap_map_page *swap_map;
507 struct swap_map_handle handle;
508 swp_entry_t start;
509 int error;
510
511 if ((error = swsusp_swap_check())) {
512 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
513 return error;
514 }
515 if (!enough_swap(nr_pages)) {
516 printk(KERN_ERR "swsusp: Not enough free swap\n");
517 return -ENOSPC;
518 } 165 }
519
520 init_header(nr_pages);
521 swap_map = alloc_swap_map(swsusp_info.pages);
522 if (!swap_map)
523 return -ENOMEM;
524 init_swap_map_handle(&handle, swap_map);
525
526 error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
527 if (!error)
528 error = save_image_metadata(pblist, &handle);
529 if (!error)
530 error = save_image_data(pblist, &handle, nr_pages);
531 if (error)
532 goto Free_image_entries;
533
534 swap_map = reverse_swap_map(swap_map);
535 error = save_swap_map(swap_map, &start);
536 if (error)
537 goto Free_map_entries;
538
539 dump_info();
540 printk( "S" );
541 error = mark_swapfiles(start);
542 printk( "|\n" );
543 if (error)
544 goto Free_map_entries;
545
546Free_swap_map:
547 free_swap_map(swap_map);
548 return error;
549
550Free_map_entries:
551 free_swap_map_entries(swap_map);
552Free_image_entries:
553 free_image_entries(swap_map);
554 goto Free_swap_map;
555} 166}
556 167
557/** 168/**
@@ -660,379 +271,3 @@ int swsusp_resume(void)
660 local_irq_enable(); 271 local_irq_enable();
661 return error; 272 return error;
662} 273}
663
664/**
665 * mark_unsafe_pages - mark the pages that cannot be used for storing
666 * the image during resume, because they conflict with the pages that
667 * had been used before suspend
668 */
669
670static void mark_unsafe_pages(struct pbe *pblist)
671{
672 struct zone *zone;
673 unsigned long zone_pfn;
674 struct pbe *p;
675
676 if (!pblist) /* a sanity check */
677 return;
678
679 /* Clear page flags */
680 for_each_zone (zone) {
681 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
682 if (pfn_valid(zone_pfn + zone->zone_start_pfn))
683 ClearPageNosaveFree(pfn_to_page(zone_pfn +
684 zone->zone_start_pfn));
685 }
686
687 /* Mark orig addresses */
688 for_each_pbe (p, pblist)
689 SetPageNosaveFree(virt_to_page(p->orig_address));
690
691}
692
693static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
694{
695 /* We assume both lists contain the same number of elements */
696 while (src) {
697 dst->orig_address = src->orig_address;
698 dst = dst->next;
699 src = src->next;
700 }
701}
702
703/*
704 * Using bio to read from swap.
705 * This code requires a bit more work than just using buffer heads
706 * but, it is the recommended way for 2.5/2.6.
707 * The following are to signal the beginning and end of I/O. Bios
708 * finish asynchronously, while we want them to happen synchronously.
709 * A simple atomic_t, and a wait loop take care of this problem.
710 */
711
712static atomic_t io_done = ATOMIC_INIT(0);
713
714static int end_io(struct bio *bio, unsigned int num, int err)
715{
716 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
717 panic("I/O error reading memory image");
718 atomic_set(&io_done, 0);
719 return 0;
720}
721
722static struct block_device *resume_bdev;
723
724/**
725 * submit - submit BIO request.
726 * @rw: READ or WRITE.
727 * @off physical offset of page.
728 * @page: page we're reading or writing.
729 *
730 * Straight from the textbook - allocate and initialize the bio.
731 * If we're writing, make sure the page is marked as dirty.
732 * Then submit it and wait.
733 */
734
735static int submit(int rw, pgoff_t page_off, void *page)
736{
737 int error = 0;
738 struct bio *bio;
739
740 bio = bio_alloc(GFP_ATOMIC, 1);
741 if (!bio)
742 return -ENOMEM;
743 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
744 bio->bi_bdev = resume_bdev;
745 bio->bi_end_io = end_io;
746
747 if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
748 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
749 error = -EFAULT;
750 goto Done;
751 }
752
753
754 atomic_set(&io_done, 1);
755 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
756 while (atomic_read(&io_done))
757 yield();
758 if (rw == READ)
759 bio_set_pages_dirty(bio);
760 Done:
761 bio_put(bio);
762 return error;
763}
764
765static int bio_read_page(pgoff_t page_off, void *page)
766{
767 return submit(READ, page_off, page);
768}
769
770static int bio_write_page(pgoff_t page_off, void *page)
771{
772 return submit(WRITE, page_off, page);
773}
774
775/**
776 * The following functions allow us to read data using a swap map
777 * in a file-alike way
778 */
779
780static inline void release_swap_map_reader(struct swap_map_handle *handle)
781{
782 if (handle->cur)
783 free_page((unsigned long)handle->cur);
784 handle->cur = NULL;
785}
786
787static inline int get_swap_map_reader(struct swap_map_handle *handle,
788 swp_entry_t start)
789{
790 int error;
791
792 if (!swp_offset(start))
793 return -EINVAL;
794 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
795 if (!handle->cur)
796 return -ENOMEM;
797 error = bio_read_page(swp_offset(start), handle->cur);
798 if (error) {
799 release_swap_map_reader(handle);
800 return error;
801 }
802 handle->k = 0;
803 return 0;
804}
805
806static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
807{
808 unsigned long offset;
809 int error;
810
811 if (!handle->cur)
812 return -EINVAL;
813 offset = swp_offset(handle->cur->entries[handle->k]);
814 if (!offset)
815 return -EINVAL;
816 error = bio_read_page(offset, buf);
817 if (error)
818 return error;
819 if (++handle->k >= MAP_PAGE_SIZE) {
820 handle->k = 0;
821 offset = swp_offset(handle->cur->next_swap);
822 if (!offset)
823 release_swap_map_reader(handle);
824 else
825 error = bio_read_page(offset, handle->cur);
826 }
827 return error;
828}
829
830static int check_header(void)
831{
832 char *reason = NULL;
833
834 dump_info();
835 if (swsusp_info.version_code != LINUX_VERSION_CODE)
836 reason = "kernel version";
837 if (swsusp_info.num_physpages != num_physpages)
838 reason = "memory size";
839 if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
840 reason = "system type";
841 if (strcmp(swsusp_info.uts.release,system_utsname.release))
842 reason = "kernel release";
843 if (strcmp(swsusp_info.uts.version,system_utsname.version))
844 reason = "version";
845 if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
846 reason = "machine";
847 if (reason) {
848 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
849 return -EPERM;
850 }
851 return 0;
852}
853
854/**
855 * load_image_data - load the image data using the swap map handle
856 * @handle and store them using the page backup list @pblist
857 * (assume there are @nr_pages pages to load)
858 */
859
860static int load_image_data(struct pbe *pblist,
861 struct swap_map_handle *handle,
862 unsigned int nr_pages)
863{
864 int error;
865 unsigned int m;
866 struct pbe *p;
867
868 if (!pblist)
869 return -EINVAL;
870 printk("Loading image data pages (%u pages) ... ", nr_pages);
871 m = nr_pages / 100;
872 if (!m)
873 m = 1;
874 nr_pages = 0;
875 p = pblist;
876 while (p) {
877 error = swap_map_read_page(handle, (void *)p->address);
878 if (error)
879 break;
880 p = p->next;
881 if (!(nr_pages % m))
882 printk("\b\b\b\b%3d%%", nr_pages / m);
883 nr_pages++;
884 }
885 if (!error)
886 printk("\b\b\b\bdone\n");
887 return error;
888}
889
890/**
891 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
892 * the PBEs in the list starting at @pbe
893 */
894
895static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
896 struct pbe *pbe)
897{
898 int j;
899
900 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
901 pbe->orig_address = buf[j];
902 pbe = pbe->next;
903 }
904 return pbe;
905}
906
907/**
908 * load_image_metadata - load the image metadata using the swap map
909 * handle @handle and put them into the PBEs in the list @pblist
910 */
911
912static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
913{
914 struct pbe *p;
915 unsigned long *buf;
916 unsigned int n = 0;
917 int error = 0;
918
919 printk("Loading image metadata ... ");
920 buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
921 if (!buf)
922 return -ENOMEM;
923 p = pblist;
924 while (p) {
925 error = swap_map_read_page(handle, buf);
926 if (error)
927 break;
928 p = unpack_orig_addresses(buf, p);
929 n++;
930 }
931 free_page((unsigned long)buf);
932 if (!error)
933 printk("done (%u pages loaded)\n", n);
934 return error;
935}
936
937int swsusp_read(struct pbe **pblist_ptr)
938{
939 int error;
940 struct pbe *p, *pblist;
941 struct swap_map_handle handle;
942 unsigned int nr_pages;
943
944 if (IS_ERR(resume_bdev)) {
945 pr_debug("swsusp: block device not initialised\n");
946 return PTR_ERR(resume_bdev);
947 }
948
949 error = get_swap_map_reader(&handle, swsusp_header.image);
950 if (!error)
951 error = swap_map_read_page(&handle, &swsusp_info);
952 if (!error)
953 error = check_header();
954 if (error)
955 return error;
956 nr_pages = swsusp_info.image_pages;
957 p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
958 if (!p)
959 return -ENOMEM;
960 error = load_image_metadata(p, &handle);
961 if (!error) {
962 mark_unsafe_pages(p);
963 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
964 if (pblist)
965 copy_page_backup_list(pblist, p);
966 free_pagedir(p);
967 if (!pblist)
968 error = -ENOMEM;
969
970 /* Allocate memory for the image and read the data from swap */
971 if (!error)
972 error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
973 if (!error) {
974 release_eaten_pages();
975 error = load_image_data(pblist, &handle, nr_pages);
976 }
977 if (!error)
978 *pblist_ptr = pblist;
979 }
980 release_swap_map_reader(&handle);
981
982 blkdev_put(resume_bdev);
983
984 if (!error)
985 pr_debug("swsusp: Reading resume file was successful\n");
986 else
987 pr_debug("swsusp: Error %d resuming\n", error);
988 return error;
989}
990
991/**
992 * swsusp_check - Check for swsusp signature in the resume device
993 */
994
995int swsusp_check(void)
996{
997 int error;
998
999 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
1000 if (!IS_ERR(resume_bdev)) {
1001 set_blocksize(resume_bdev, PAGE_SIZE);
1002 memset(&swsusp_header, 0, sizeof(swsusp_header));
1003 if ((error = bio_read_page(0, &swsusp_header)))
1004 return error;
1005 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
1006 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
1007 /* Reset swap signature now */
1008 error = bio_write_page(0, &swsusp_header);
1009 } else {
1010 return -EINVAL;
1011 }
1012 if (error)
1013 blkdev_put(resume_bdev);
1014 else
1015 pr_debug("swsusp: Signature found, resuming\n");
1016 } else {
1017 error = PTR_ERR(resume_bdev);
1018 }
1019
1020 if (error)
1021 pr_debug("swsusp: Error %d check for resume file\n", error);
1022
1023 return error;
1024}
1025
1026/**
1027 * swsusp_close - close swap device.
1028 */
1029
1030void swsusp_close(void)
1031{
1032 if (IS_ERR(resume_bdev)) {
1033 pr_debug("swsusp: block device not initialised\n");
1034 return;
1035 }
1036
1037 blkdev_put(resume_bdev);
1038}
diff --git a/kernel/power/user.c b/kernel/power/user.c
new file mode 100644
index 000000000000..3f1539fbe48a
--- /dev/null
+++ b/kernel/power/user.c
@@ -0,0 +1,333 @@
1/*
2 * linux/kernel/power/user.c
3 *
4 * This file provides the user space interface for software suspend/resume.
5 *
6 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
7 *
8 * This file is released under the GPLv2.
9 *
10 */
11
12#include <linux/suspend.h>
13#include <linux/syscalls.h>
14#include <linux/string.h>
15#include <linux/device.h>
16#include <linux/miscdevice.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pm.h>
21#include <linux/fs.h>
22
23#include <asm/uaccess.h>
24
25#include "power.h"
26
27#define SNAPSHOT_MINOR 231
28
29static struct snapshot_data {
30 struct snapshot_handle handle;
31 int swap;
32 struct bitmap_page *bitmap;
33 int mode;
34 char frozen;
35 char ready;
36} snapshot_state;
37
38static atomic_t device_available = ATOMIC_INIT(1);
39
40static int snapshot_open(struct inode *inode, struct file *filp)
41{
42 struct snapshot_data *data;
43
44 if (!atomic_add_unless(&device_available, -1, 0))
45 return -EBUSY;
46
47 if ((filp->f_flags & O_ACCMODE) == O_RDWR)
48 return -ENOSYS;
49
50 nonseekable_open(inode, filp);
51 data = &snapshot_state;
52 filp->private_data = data;
53 memset(&data->handle, 0, sizeof(struct snapshot_handle));
54 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
55 data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1;
56 data->mode = O_RDONLY;
57 } else {
58 data->swap = -1;
59 data->mode = O_WRONLY;
60 }
61 data->bitmap = NULL;
62 data->frozen = 0;
63 data->ready = 0;
64
65 return 0;
66}
67
68static int snapshot_release(struct inode *inode, struct file *filp)
69{
70 struct snapshot_data *data;
71
72 swsusp_free();
73 data = filp->private_data;
74 free_all_swap_pages(data->swap, data->bitmap);
75 free_bitmap(data->bitmap);
76 if (data->frozen) {
77 down(&pm_sem);
78 thaw_processes();
79 enable_nonboot_cpus();
80 up(&pm_sem);
81 }
82 atomic_inc(&device_available);
83 return 0;
84}
85
86static ssize_t snapshot_read(struct file *filp, char __user *buf,
87 size_t count, loff_t *offp)
88{
89 struct snapshot_data *data;
90 ssize_t res;
91
92 data = filp->private_data;
93 res = snapshot_read_next(&data->handle, count);
94 if (res > 0) {
95 if (copy_to_user(buf, data_of(data->handle), res))
96 res = -EFAULT;
97 else
98 *offp = data->handle.offset;
99 }
100 return res;
101}
102
103static ssize_t snapshot_write(struct file *filp, const char __user *buf,
104 size_t count, loff_t *offp)
105{
106 struct snapshot_data *data;
107 ssize_t res;
108
109 data = filp->private_data;
110 res = snapshot_write_next(&data->handle, count);
111 if (res > 0) {
112 if (copy_from_user(data_of(data->handle), buf, res))
113 res = -EFAULT;
114 else
115 *offp = data->handle.offset;
116 }
117 return res;
118}
119
120static int snapshot_ioctl(struct inode *inode, struct file *filp,
121 unsigned int cmd, unsigned long arg)
122{
123 int error = 0;
124 struct snapshot_data *data;
125 loff_t offset, avail;
126
127 if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
128 return -ENOTTY;
129 if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
130 return -ENOTTY;
131 if (!capable(CAP_SYS_ADMIN))
132 return -EPERM;
133
134 data = filp->private_data;
135
136 switch (cmd) {
137
138 case SNAPSHOT_FREEZE:
139 if (data->frozen)
140 break;
141 down(&pm_sem);
142 disable_nonboot_cpus();
143 if (freeze_processes()) {
144 thaw_processes();
145 enable_nonboot_cpus();
146 error = -EBUSY;
147 }
148 up(&pm_sem);
149 if (!error)
150 data->frozen = 1;
151 break;
152
153 case SNAPSHOT_UNFREEZE:
154 if (!data->frozen)
155 break;
156 down(&pm_sem);
157 thaw_processes();
158 enable_nonboot_cpus();
159 up(&pm_sem);
160 data->frozen = 0;
161 break;
162
163 case SNAPSHOT_ATOMIC_SNAPSHOT:
164 if (data->mode != O_RDONLY || !data->frozen || data->ready) {
165 error = -EPERM;
166 break;
167 }
168 down(&pm_sem);
169 /* Free memory before shutting down devices. */
170 error = swsusp_shrink_memory();
171 if (!error) {
172 error = device_suspend(PMSG_FREEZE);
173 if (!error) {
174 in_suspend = 1;
175 error = swsusp_suspend();
176 device_resume();
177 }
178 }
179 up(&pm_sem);
180 if (!error)
181 error = put_user(in_suspend, (unsigned int __user *)arg);
182 if (!error)
183 data->ready = 1;
184 break;
185
186 case SNAPSHOT_ATOMIC_RESTORE:
187 if (data->mode != O_WRONLY || !data->frozen ||
188 !snapshot_image_loaded(&data->handle)) {
189 error = -EPERM;
190 break;
191 }
192 down(&pm_sem);
193 pm_prepare_console();
194 error = device_suspend(PMSG_FREEZE);
195 if (!error) {
196 error = swsusp_resume();
197 device_resume();
198 }
199 pm_restore_console();
200 up(&pm_sem);
201 break;
202
203 case SNAPSHOT_FREE:
204 swsusp_free();
205 memset(&data->handle, 0, sizeof(struct snapshot_handle));
206 data->ready = 0;
207 break;
208
209 case SNAPSHOT_SET_IMAGE_SIZE:
210 image_size = arg;
211 break;
212
213 case SNAPSHOT_AVAIL_SWAP:
214 avail = count_swap_pages(data->swap, 1);
215 avail <<= PAGE_SHIFT;
216 error = put_user(avail, (loff_t __user *)arg);
217 break;
218
219 case SNAPSHOT_GET_SWAP_PAGE:
220 if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
221 error = -ENODEV;
222 break;
223 }
224 if (!data->bitmap) {
225 data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0));
226 if (!data->bitmap) {
227 error = -ENOMEM;
228 break;
229 }
230 }
231 offset = alloc_swap_page(data->swap, data->bitmap);
232 if (offset) {
233 offset <<= PAGE_SHIFT;
234 error = put_user(offset, (loff_t __user *)arg);
235 } else {
236 error = -ENOSPC;
237 }
238 break;
239
240 case SNAPSHOT_FREE_SWAP_PAGES:
241 if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
242 error = -ENODEV;
243 break;
244 }
245 free_all_swap_pages(data->swap, data->bitmap);
246 free_bitmap(data->bitmap);
247 data->bitmap = NULL;
248 break;
249
250 case SNAPSHOT_SET_SWAP_FILE:
251 if (!data->bitmap) {
252 /*
253 * User space encodes device types as two-byte values,
254 * so we need to recode them
255 */
256 if (old_decode_dev(arg)) {
257 data->swap = swap_type_of(old_decode_dev(arg));
258 if (data->swap < 0)
259 error = -ENODEV;
260 } else {
261 data->swap = -1;
262 error = -EINVAL;
263 }
264 } else {
265 error = -EPERM;
266 }
267 break;
268
269 case SNAPSHOT_S2RAM:
270 if (!data->frozen) {
271 error = -EPERM;
272 break;
273 }
274
275 if (down_trylock(&pm_sem)) {
276 error = -EBUSY;
277 break;
278 }
279
280 if (pm_ops->prepare) {
281 error = pm_ops->prepare(PM_SUSPEND_MEM);
282 if (error)
283 goto OutS3;
284 }
285
286 /* Put devices to sleep */
287 error = device_suspend(PMSG_SUSPEND);
288 if (error) {
289 printk(KERN_ERR "Failed to suspend some devices.\n");
290 } else {
291 /* Enter S3, system is already frozen */
292 suspend_enter(PM_SUSPEND_MEM);
293
294 /* Wake up devices */
295 device_resume();
296 }
297
298 if (pm_ops->finish)
299 pm_ops->finish(PM_SUSPEND_MEM);
300
301OutS3:
302 up(&pm_sem);
303 break;
304
305 default:
306 error = -ENOTTY;
307
308 }
309
310 return error;
311}
312
313static struct file_operations snapshot_fops = {
314 .open = snapshot_open,
315 .release = snapshot_release,
316 .read = snapshot_read,
317 .write = snapshot_write,
318 .llseek = no_llseek,
319 .ioctl = snapshot_ioctl,
320};
321
322static struct miscdevice snapshot_device = {
323 .minor = SNAPSHOT_MINOR,
324 .name = "snapshot",
325 .fops = &snapshot_fops,
326};
327
328static int __init snapshot_device_init(void)
329{
330 return misc_register(&snapshot_device);
331};
332
333device_initcall(snapshot_device_init);