aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2007-05-07 23:37:51 -0400
committerPaul Mackerras <paulus@samba.org>2007-05-07 23:37:51 -0400
commit02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1 (patch)
tree04ef573cd4de095c500c9fc3477f4278c0b36300 /kernel
parent7487a2245b8841c77ba9db406cf99a483b9334e9 (diff)
parent5b94f675f57e4ff16c8fda09088d7480a84dcd91 (diff)
Merge branch 'linux-2.6'
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c22
-rw-r--r--kernel/delayacct.c6
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/irq/chip.c3
-rw-r--r--kernel/ksysfs.c12
-rw-r--r--kernel/module.c18
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/disk.c77
-rw-r--r--kernel/power/main.c29
-rw-r--r--kernel/power/power.h49
-rw-r--r--kernel/power/process.c6
-rw-r--r--kernel/power/snapshot.c309
-rw-r--r--kernel/power/swap.c60
-rw-r--r--kernel/power/swsusp.c139
-rw-r--r--kernel/power/user.c39
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/taskstats.c4
22 files changed, 563 insertions, 265 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f382b0f775e1..d240349cbf0f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2351 * z's node is in our tasks mems_allowed, yes. If it's not a 2351 * z's node is in our tasks mems_allowed, yes. If it's not a
2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest
2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
2354 * If the task has been OOM killed and has access to memory reserves
2355 * as specified by the TIF_MEMDIE flag, yes.
2354 * Otherwise, no. 2356 * Otherwise, no.
2355 * 2357 *
2356 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() 2358 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
@@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2368 * calls get to this routine, we should just shut up and say 'yes'. 2370 * calls get to this routine, we should just shut up and say 'yes'.
2369 * 2371 *
2370 * GFP_USER allocations are marked with the __GFP_HARDWALL bit, 2372 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
2371 * and do not allow allocations outside the current tasks cpuset. 2373 * and do not allow allocations outside the current tasks cpuset
2374 * unless the task has been OOM killed as is marked TIF_MEMDIE.
2372 * GFP_KERNEL allocations are not so marked, so can escape to the 2375 * GFP_KERNEL allocations are not so marked, so can escape to the
2373 * nearest enclosing mem_exclusive ancestor cpuset. 2376 * nearest enclosing mem_exclusive ancestor cpuset.
2374 * 2377 *
@@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2392 * affect that: 2395 * affect that:
2393 * in_interrupt - any node ok (current task context irrelevant) 2396 * in_interrupt - any node ok (current task context irrelevant)
2394 * GFP_ATOMIC - any node ok 2397 * GFP_ATOMIC - any node ok
2398 * TIF_MEMDIE - any node ok
2395 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2399 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
2396 * GFP_USER - only nodes in current tasks mems allowed ok. 2400 * GFP_USER - only nodes in current tasks mems allowed ok.
2397 * 2401 *
@@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2413 might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); 2417 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2414 if (node_isset(node, current->mems_allowed)) 2418 if (node_isset(node, current->mems_allowed))
2415 return 1; 2419 return 1;
2420 /*
2421 * Allow tasks that have access to memory reserves because they have
2422 * been OOM killed to get memory anywhere.
2423 */
2424 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2425 return 1;
2416 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ 2426 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
2417 return 0; 2427 return 0;
2418 2428
@@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2438 * 2448 *
2439 * If we're in interrupt, yes, we can always allocate. 2449 * If we're in interrupt, yes, we can always allocate.
2440 * If __GFP_THISNODE is set, yes, we can always allocate. If zone 2450 * If __GFP_THISNODE is set, yes, we can always allocate. If zone
2441 * z's node is in our tasks mems_allowed, yes. Otherwise, no. 2451 * z's node is in our tasks mems_allowed, yes. If the task has been
2452 * OOM killed and has access to memory reserves as specified by the
2453 * TIF_MEMDIE flag, yes. Otherwise, no.
2442 * 2454 *
2443 * The __GFP_THISNODE placement logic is really handled elsewhere, 2455 * The __GFP_THISNODE placement logic is really handled elsewhere,
2444 * by forcibly using a zonelist starting at a specified node, and by 2456 * by forcibly using a zonelist starting at a specified node, and by
@@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2462 node = zone_to_nid(z); 2474 node = zone_to_nid(z);
2463 if (node_isset(node, current->mems_allowed)) 2475 if (node_isset(node, current->mems_allowed))
2464 return 1; 2476 return 1;
2477 /*
2478 * Allow tasks that have access to memory reserves because they have
2479 * been OOM killed to get memory anywhere.
2480 */
2481 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2482 return 1;
2465 return 0; 2483 return 0;
2466} 2484}
2467 2485
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 766d5912b26a..c0148ae992c4 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -31,11 +31,7 @@ __setup("nodelayacct", delayacct_setup_disable);
31 31
32void delayacct_init(void) 32void delayacct_init(void)
33{ 33{
34 delayacct_cache = kmem_cache_create("delayacct_cache", 34 delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC);
35 sizeof(struct task_delay_info),
36 0,
37 SLAB_PANIC,
38 NULL, NULL);
39 delayacct_tsk_init(&init_task); 35 delayacct_tsk_init(&init_task);
40} 36}
41 37
diff --git a/kernel/exit.c b/kernel/exit.c
index b55ed4cc9104..92369240d91d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1033,6 +1033,8 @@ asmlinkage void sys_exit_group(int error_code)
1033 1033
1034static int eligible_child(pid_t pid, int options, struct task_struct *p) 1034static int eligible_child(pid_t pid, int options, struct task_struct *p)
1035{ 1035{
1036 int err;
1037
1036 if (pid > 0) { 1038 if (pid > 0) {
1037 if (p->pid != pid) 1039 if (p->pid != pid)
1038 return 0; 1040 return 0;
@@ -1066,8 +1068,9 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p)
1066 if (delay_group_leader(p)) 1068 if (delay_group_leader(p))
1067 return 2; 1069 return 2;
1068 1070
1069 if (security_task_wait(p)) 1071 err = security_task_wait(p);
1070 return 0; 1072 if (err)
1073 return err;
1071 1074
1072 return 1; 1075 return 1;
1073} 1076}
@@ -1449,6 +1452,7 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
1449 DECLARE_WAITQUEUE(wait, current); 1452 DECLARE_WAITQUEUE(wait, current);
1450 struct task_struct *tsk; 1453 struct task_struct *tsk;
1451 int flag, retval; 1454 int flag, retval;
1455 int allowed, denied;
1452 1456
1453 add_wait_queue(&current->signal->wait_chldexit,&wait); 1457 add_wait_queue(&current->signal->wait_chldexit,&wait);
1454repeat: 1458repeat:
@@ -1457,6 +1461,7 @@ repeat:
1457 * match our criteria, even if we are not able to reap it yet. 1461 * match our criteria, even if we are not able to reap it yet.
1458 */ 1462 */
1459 flag = 0; 1463 flag = 0;
1464 allowed = denied = 0;
1460 current->state = TASK_INTERRUPTIBLE; 1465 current->state = TASK_INTERRUPTIBLE;
1461 read_lock(&tasklist_lock); 1466 read_lock(&tasklist_lock);
1462 tsk = current; 1467 tsk = current;
@@ -1472,6 +1477,12 @@ repeat:
1472 if (!ret) 1477 if (!ret)
1473 continue; 1478 continue;
1474 1479
1480 if (unlikely(ret < 0)) {
1481 denied = ret;
1482 continue;
1483 }
1484 allowed = 1;
1485
1475 switch (p->state) { 1486 switch (p->state) {
1476 case TASK_TRACED: 1487 case TASK_TRACED:
1477 /* 1488 /*
@@ -1570,6 +1581,8 @@ check_continued:
1570 goto repeat; 1581 goto repeat;
1571 } 1582 }
1572 retval = -ECHILD; 1583 retval = -ECHILD;
1584 if (unlikely(denied) && !allowed)
1585 retval = denied;
1573end: 1586end:
1574 current->state = TASK_RUNNING; 1587 current->state = TASK_RUNNING;
1575 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1588 remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6af959c034d8..b7d169def942 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -286,6 +286,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
286 if (retval) 286 if (retval)
287 goto out; 287 goto out;
288 } 288 }
289 /* a new mm has just been created */
290 arch_dup_mmap(oldmm, mm);
289 retval = 0; 291 retval = 0;
290out: 292out:
291 up_write(&mm->mmap_sem); 293 up_write(&mm->mmap_sem);
@@ -1423,8 +1425,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long fl
1423{ 1425{
1424 struct sighand_struct *sighand = data; 1426 struct sighand_struct *sighand = data;
1425 1427
1426 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 1428 if (flags & SLAB_CTOR_CONSTRUCTOR)
1427 SLAB_CTOR_CONSTRUCTOR)
1428 spin_lock_init(&sighand->siglock); 1429 spin_lock_init(&sighand->siglock);
1429} 1430}
1430 1431
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0133f4f9e9f0..615ce97c6cfd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -11,6 +11,7 @@
11 */ 11 */
12 12
13#include <linux/irq.h> 13#include <linux/irq.h>
14#include <linux/msi.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/interrupt.h> 16#include <linux/interrupt.h>
16#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
@@ -185,6 +186,8 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
185 desc = irq_desc + irq; 186 desc = irq_desc + irq;
186 spin_lock_irqsave(&desc->lock, flags); 187 spin_lock_irqsave(&desc->lock, flags);
187 desc->msi_desc = entry; 188 desc->msi_desc = entry;
189 if (entry)
190 entry->irq = irq;
188 spin_unlock_irqrestore(&desc->lock, flags); 191 spin_unlock_irqrestore(&desc->lock, flags);
189 return 0; 192 return 0;
190} 193}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e0ffe4ab0917..559deca5ed15 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -24,18 +24,18 @@ static struct subsys_attribute _name##_attr = \
24 24
25#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) 25#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
26/* current uevent sequence number */ 26/* current uevent sequence number */
27static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) 27static ssize_t uevent_seqnum_show(struct kset *kset, char *page)
28{ 28{
29 return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum); 29 return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
30} 30}
31KERNEL_ATTR_RO(uevent_seqnum); 31KERNEL_ATTR_RO(uevent_seqnum);
32 32
33/* uevent helper program, used during early boo */ 33/* uevent helper program, used during early boo */
34static ssize_t uevent_helper_show(struct subsystem *subsys, char *page) 34static ssize_t uevent_helper_show(struct kset *kset, char *page)
35{ 35{
36 return sprintf(page, "%s\n", uevent_helper); 36 return sprintf(page, "%s\n", uevent_helper);
37} 37}
38static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count) 38static ssize_t uevent_helper_store(struct kset *kset, const char *page, size_t count)
39{ 39{
40 if (count+1 > UEVENT_HELPER_PATH_LEN) 40 if (count+1 > UEVENT_HELPER_PATH_LEN)
41 return -ENOENT; 41 return -ENOENT;
@@ -49,13 +49,13 @@ KERNEL_ATTR_RW(uevent_helper);
49#endif 49#endif
50 50
51#ifdef CONFIG_KEXEC 51#ifdef CONFIG_KEXEC
52static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) 52static ssize_t kexec_loaded_show(struct kset *kset, char *page)
53{ 53{
54 return sprintf(page, "%d\n", !!kexec_image); 54 return sprintf(page, "%d\n", !!kexec_image);
55} 55}
56KERNEL_ATTR_RO(kexec_loaded); 56KERNEL_ATTR_RO(kexec_loaded);
57 57
58static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) 58static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page)
59{ 59{
60 return sprintf(page, "%d\n", !!kexec_crash_image); 60 return sprintf(page, "%d\n", !!kexec_crash_image);
61} 61}
@@ -85,7 +85,7 @@ static int __init ksysfs_init(void)
85{ 85{
86 int error = subsystem_register(&kernel_subsys); 86 int error = subsystem_register(&kernel_subsys);
87 if (!error) 87 if (!error)
88 error = sysfs_create_group(&kernel_subsys.kset.kobj, 88 error = sysfs_create_group(&kernel_subsys.kobj,
89 &kernel_attr_group); 89 &kernel_attr_group);
90 90
91 return error; 91 return error;
diff --git a/kernel/module.c b/kernel/module.c
index 9da5af668a20..1eb8ca565ba0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -45,6 +45,8 @@
45#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
46#include <linux/license.h> 46#include <linux/license.h>
47 47
48extern int module_sysfs_initialized;
49
48#if 0 50#if 0
49#define DEBUGP printk 51#define DEBUGP printk
50#else 52#else
@@ -346,10 +348,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
346 unsigned int i; 348 unsigned int i;
347 void *ptr; 349 void *ptr;
348 350
349 if (align > SMP_CACHE_BYTES) { 351 if (align > PAGE_SIZE) {
350 printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", 352 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
351 name, align, SMP_CACHE_BYTES); 353 name, align, PAGE_SIZE);
352 align = SMP_CACHE_BYTES; 354 align = PAGE_SIZE;
353 } 355 }
354 356
355 ptr = __per_cpu_start; 357 ptr = __per_cpu_start;
@@ -430,7 +432,7 @@ static int percpu_modinit(void)
430 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, 432 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
431 GFP_KERNEL); 433 GFP_KERNEL);
432 /* Static in-kernel percpu data (used). */ 434 /* Static in-kernel percpu data (used). */
433 pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); 435 pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
434 /* Free room. */ 436 /* Free room. */
435 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; 437 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
436 if (pcpu_size[1] < 0) { 438 if (pcpu_size[1] < 0) {
@@ -1117,8 +1119,8 @@ int mod_sysfs_init(struct module *mod)
1117{ 1119{
1118 int err; 1120 int err;
1119 1121
1120 if (!module_subsys.kset.subsys) { 1122 if (!module_sysfs_initialized) {
1121 printk(KERN_ERR "%s: module_subsys not initialized\n", 1123 printk(KERN_ERR "%s: module sysfs not initialized\n",
1122 mod->name); 1124 mod->name);
1123 err = -EINVAL; 1125 err = -EINVAL;
1124 goto out; 1126 goto out;
@@ -2385,7 +2387,7 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
2385 struct kobject *mkobj; 2387 struct kobject *mkobj;
2386 2388
2387 /* Lookup built-in module entry in /sys/modules */ 2389 /* Lookup built-in module entry in /sys/modules */
2388 mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); 2390 mkobj = kset_find_obj(&module_subsys, drv->mod_name);
2389 if (mkobj) { 2391 if (mkobj) {
2390 mk = container_of(mkobj, struct module_kobject, kobj); 2392 mk = container_of(mkobj, struct module_kobject, kobj);
2391 /* remember our module structure */ 2393 /* remember our module structure */
diff --git a/kernel/params.c b/kernel/params.c
index 1fc4ac746cd8..312172320b4c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -691,6 +691,7 @@ static struct kset_uevent_ops module_uevent_ops = {
691}; 691};
692 692
693decl_subsys(module, &module_ktype, &module_uevent_ops); 693decl_subsys(module, &module_ktype, &module_uevent_ops);
694int module_sysfs_initialized;
694 695
695static struct kobj_type module_ktype = { 696static struct kobj_type module_ktype = {
696 .sysfs_ops = &module_sysfs_ops, 697 .sysfs_ops = &module_sysfs_ops,
@@ -709,6 +710,7 @@ static int __init param_sysfs_init(void)
709 __FILE__, __LINE__, ret); 710 __FILE__, __LINE__, ret);
710 return ret; 711 return ret;
711 } 712 }
713 module_sysfs_initialized = 1;
712 714
713 param_sysfs_builtin(); 715 param_sysfs_builtin();
714 716
diff --git a/kernel/pid.c b/kernel/pid.c
index 78f2aee90f54..9c80bc23d6b8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -412,7 +412,5 @@ void __init pidmap_init(void)
412 set_bit(0, init_pid_ns.pidmap[0].page); 412 set_bit(0, init_pid_ns.pidmap[0].page);
413 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 413 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
414 414
415 pid_cachep = kmem_cache_create("pid", sizeof(struct pid), 415 pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
416 __alignof__(struct pid),
417 SLAB_PANIC, NULL, NULL);
418} 416}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5001c652028c..495b7d4dd330 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED
78 are likely to be bus or driver specific. 78 are likely to be bus or driver specific.
79 79
80config SOFTWARE_SUSPEND 80config SOFTWARE_SUSPEND
81 bool "Software Suspend" 81 bool "Software Suspend (Hibernation)"
82 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) 82 depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
83 ---help--- 83 ---help---
84 Enable the suspend to disk (STD) functionality. 84 Enable the suspend to disk (STD) functionality, which is usually
85 called "hibernation" in user interfaces. STD checkpoints the
86 system and powers it off; and restores that checkpoint on reboot.
85 87
86 You can suspend your machine with 'echo disk > /sys/power/state'. 88 You can suspend your machine with 'echo disk > /sys/power/state'.
87 Alternatively, you can use the additional userland tools available 89 Alternatively, you can use the additional userland tools available
88 from <http://suspend.sf.net>. 90 from <http://suspend.sf.net>.
89 91
90 In principle it does not require ACPI or APM, although for example 92 In principle it does not require ACPI or APM, although for example
91 ACPI will be used if available. 93 ACPI will be used for the final steps when it is available. One
94 of the reasons to use software suspend is that the firmware hooks
95 for suspend states like suspend-to-RAM (STR) often don't work very
96 well with Linux.
92 97
93 It creates an image which is saved in your active swap. Upon the next 98 It creates an image which is saved in your active swap. Upon the next
94 boot, pass the 'resume=/dev/swappartition' argument to the kernel to 99 boot, pass the 'resume=/dev/swappartition' argument to the kernel to
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 02e4fb69111a..06331374d862 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -130,15 +130,25 @@ int pm_suspend_disk(void)
130{ 130{
131 int error; 131 int error;
132 132
133 /* The snapshot device should not be opened while we're running */
134 if (!atomic_add_unless(&snapshot_device_available, -1, 0))
135 return -EBUSY;
136
137 /* Allocate memory management structures */
138 error = create_basic_memory_bitmaps();
139 if (error)
140 goto Exit;
141
133 error = prepare_processes(); 142 error = prepare_processes();
134 if (error) 143 if (error)
135 return error; 144 goto Finish;
136 145
137 if (pm_disk_mode == PM_DISK_TESTPROC) { 146 if (pm_disk_mode == PM_DISK_TESTPROC) {
138 printk("swsusp debug: Waiting for 5 seconds.\n"); 147 printk("swsusp debug: Waiting for 5 seconds.\n");
139 mdelay(5000); 148 mdelay(5000);
140 goto Thaw; 149 goto Thaw;
141 } 150 }
151
142 /* Free memory before shutting down devices. */ 152 /* Free memory before shutting down devices. */
143 error = swsusp_shrink_memory(); 153 error = swsusp_shrink_memory();
144 if (error) 154 if (error)
@@ -196,6 +206,10 @@ int pm_suspend_disk(void)
196 resume_console(); 206 resume_console();
197 Thaw: 207 Thaw:
198 unprepare_processes(); 208 unprepare_processes();
209 Finish:
210 free_basic_memory_bitmaps();
211 Exit:
212 atomic_inc(&snapshot_device_available);
199 return error; 213 return error;
200} 214}
201 215
@@ -239,13 +253,21 @@ static int software_resume(void)
239 } 253 }
240 254
241 pr_debug("PM: Checking swsusp image.\n"); 255 pr_debug("PM: Checking swsusp image.\n");
242
243 error = swsusp_check(); 256 error = swsusp_check();
244 if (error) 257 if (error)
245 goto Done; 258 goto Unlock;
246 259
247 pr_debug("PM: Preparing processes for restore.\n"); 260 /* The snapshot device should not be opened while we're running */
261 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
262 error = -EBUSY;
263 goto Unlock;
264 }
248 265
266 error = create_basic_memory_bitmaps();
267 if (error)
268 goto Finish;
269
270 pr_debug("PM: Preparing processes for restore.\n");
249 error = prepare_processes(); 271 error = prepare_processes();
250 if (error) { 272 if (error) {
251 swsusp_close(); 273 swsusp_close();
@@ -280,7 +302,11 @@ static int software_resume(void)
280 printk(KERN_ERR "PM: Restore failed, recovering.\n"); 302 printk(KERN_ERR "PM: Restore failed, recovering.\n");
281 unprepare_processes(); 303 unprepare_processes();
282 Done: 304 Done:
305 free_basic_memory_bitmaps();
306 Finish:
307 atomic_inc(&snapshot_device_available);
283 /* For success case, the suspend path will release the lock */ 308 /* For success case, the suspend path will release the lock */
309 Unlock:
284 mutex_unlock(&pm_mutex); 310 mutex_unlock(&pm_mutex);
285 pr_debug("PM: Resume from disk failed.\n"); 311 pr_debug("PM: Resume from disk failed.\n");
286 return 0; 312 return 0;
@@ -322,13 +348,40 @@ static const char * const pm_disk_modes[] = {
322 * supports it (as determined from pm_ops->pm_disk_mode). 348 * supports it (as determined from pm_ops->pm_disk_mode).
323 */ 349 */
324 350
325static ssize_t disk_show(struct subsystem * subsys, char * buf) 351static ssize_t disk_show(struct kset *kset, char *buf)
326{ 352{
327 return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); 353 int i;
354 char *start = buf;
355
356 for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) {
357 if (!pm_disk_modes[i])
358 continue;
359 switch (i) {
360 case PM_DISK_SHUTDOWN:
361 case PM_DISK_REBOOT:
362 case PM_DISK_TEST:
363 case PM_DISK_TESTPROC:
364 break;
365 default:
366 if (pm_ops && pm_ops->enter &&
367 (i == pm_ops->pm_disk_mode))
368 break;
369 /* not a valid mode, continue with loop */
370 continue;
371 }
372 if (i == pm_disk_mode)
373 buf += sprintf(buf, "[%s]", pm_disk_modes[i]);
374 else
375 buf += sprintf(buf, "%s", pm_disk_modes[i]);
376 if (i+1 != PM_DISK_MAX)
377 buf += sprintf(buf, " ");
378 }
379 buf += sprintf(buf, "\n");
380 return buf-start;
328} 381}
329 382
330 383
331static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) 384static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
332{ 385{
333 int error = 0; 386 int error = 0;
334 int i; 387 int i;
@@ -373,13 +426,13 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
373 426
374power_attr(disk); 427power_attr(disk);
375 428
376static ssize_t resume_show(struct subsystem * subsys, char *buf) 429static ssize_t resume_show(struct kset *kset, char *buf)
377{ 430{
378 return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), 431 return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
379 MINOR(swsusp_resume_device)); 432 MINOR(swsusp_resume_device));
380} 433}
381 434
382static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) 435static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
383{ 436{
384 unsigned int maj, min; 437 unsigned int maj, min;
385 dev_t res; 438 dev_t res;
@@ -405,12 +458,12 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
405 458
406power_attr(resume); 459power_attr(resume);
407 460
408static ssize_t image_size_show(struct subsystem * subsys, char *buf) 461static ssize_t image_size_show(struct kset *kset, char *buf)
409{ 462{
410 return sprintf(buf, "%lu\n", image_size); 463 return sprintf(buf, "%lu\n", image_size);
411} 464}
412 465
413static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) 466static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n)
414{ 467{
415 unsigned long size; 468 unsigned long size;
416 469
@@ -439,7 +492,7 @@ static struct attribute_group attr_group = {
439 492
440static int __init pm_disk_init(void) 493static int __init pm_disk_init(void)
441{ 494{
442 return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); 495 return sysfs_create_group(&power_subsys.kobj, &attr_group);
443} 496}
444 497
445core_initcall(pm_disk_init); 498core_initcall(pm_disk_init);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 72419a3b1beb..f6dda685e7e2 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -184,17 +184,21 @@ static void suspend_finish(suspend_state_t state)
184static const char * const pm_states[PM_SUSPEND_MAX] = { 184static const char * const pm_states[PM_SUSPEND_MAX] = {
185 [PM_SUSPEND_STANDBY] = "standby", 185 [PM_SUSPEND_STANDBY] = "standby",
186 [PM_SUSPEND_MEM] = "mem", 186 [PM_SUSPEND_MEM] = "mem",
187#ifdef CONFIG_SOFTWARE_SUSPEND
188 [PM_SUSPEND_DISK] = "disk", 187 [PM_SUSPEND_DISK] = "disk",
189#endif
190}; 188};
191 189
192static inline int valid_state(suspend_state_t state) 190static inline int valid_state(suspend_state_t state)
193{ 191{
194 /* Suspend-to-disk does not really need low-level support. 192 /* Suspend-to-disk does not really need low-level support.
195 * It can work with reboot if needed. */ 193 * It can work with shutdown/reboot if needed. If it isn't
194 * configured, then it cannot be supported.
195 */
196 if (state == PM_SUSPEND_DISK) 196 if (state == PM_SUSPEND_DISK)
197#ifdef CONFIG_SOFTWARE_SUSPEND
197 return 1; 198 return 1;
199#else
200 return 0;
201#endif
198 202
199 /* all other states need lowlevel support and need to be 203 /* all other states need lowlevel support and need to be
200 * valid to the lowlevel implementation, no valid callback 204 * valid to the lowlevel implementation, no valid callback
@@ -244,15 +248,6 @@ static int enter_state(suspend_state_t state)
244 return error; 248 return error;
245} 249}
246 250
247/*
248 * This is main interface to the outside world. It needs to be
249 * called from process context.
250 */
251int software_suspend(void)
252{
253 return enter_state(PM_SUSPEND_DISK);
254}
255
256 251
257/** 252/**
258 * pm_suspend - Externally visible function for suspending system. 253 * pm_suspend - Externally visible function for suspending system.
@@ -285,7 +280,7 @@ decl_subsys(power,NULL,NULL);
285 * proper enumerated value, and initiates a suspend transition. 280 * proper enumerated value, and initiates a suspend transition.
286 */ 281 */
287 282
288static ssize_t state_show(struct subsystem * subsys, char * buf) 283static ssize_t state_show(struct kset *kset, char *buf)
289{ 284{
290 int i; 285 int i;
291 char * s = buf; 286 char * s = buf;
@@ -298,7 +293,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
298 return (s - buf); 293 return (s - buf);
299} 294}
300 295
301static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) 296static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
302{ 297{
303 suspend_state_t state = PM_SUSPEND_STANDBY; 298 suspend_state_t state = PM_SUSPEND_STANDBY;
304 const char * const *s; 299 const char * const *s;
@@ -325,13 +320,13 @@ power_attr(state);
325#ifdef CONFIG_PM_TRACE 320#ifdef CONFIG_PM_TRACE
326int pm_trace_enabled; 321int pm_trace_enabled;
327 322
328static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) 323static ssize_t pm_trace_show(struct kset *kset, char *buf)
329{ 324{
330 return sprintf(buf, "%d\n", pm_trace_enabled); 325 return sprintf(buf, "%d\n", pm_trace_enabled);
331} 326}
332 327
333static ssize_t 328static ssize_t
334pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) 329pm_trace_store(struct kset *kset, const char *buf, size_t n)
335{ 330{
336 int val; 331 int val;
337 332
@@ -365,7 +360,7 @@ static int __init pm_init(void)
365{ 360{
366 int error = subsystem_register(&power_subsys); 361 int error = subsystem_register(&power_subsys);
367 if (!error) 362 if (!error)
368 error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); 363 error = sysfs_create_group(&power_subsys.kobj,&attr_group);
369 return error; 364 return error;
370} 365}
371 366
diff --git a/kernel/power/power.h b/kernel/power/power.h
index eb461b816bf4..34b43542785a 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -14,8 +14,18 @@ struct swsusp_info {
14 14
15 15
16#ifdef CONFIG_SOFTWARE_SUSPEND 16#ifdef CONFIG_SOFTWARE_SUSPEND
17extern int pm_suspend_disk(void); 17/*
18 * Keep some memory free so that I/O operations can succeed without paging
19 * [Might this be more than 4 MB?]
20 */
21#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT)
22/*
23 * Keep 1 MB of memory free so that device drivers can allocate some pages in
24 * their .suspend() routines without breaking the suspend to disk.
25 */
26#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
18 27
28extern int pm_suspend_disk(void);
19#else 29#else
20static inline int pm_suspend_disk(void) 30static inline int pm_suspend_disk(void)
21{ 31{
@@ -23,6 +33,8 @@ static inline int pm_suspend_disk(void)
23} 33}
24#endif 34#endif
25 35
36extern int pfn_is_nosave(unsigned long);
37
26extern struct mutex pm_mutex; 38extern struct mutex pm_mutex;
27 39
28#define power_attr(_name) \ 40#define power_attr(_name) \
@@ -35,10 +47,7 @@ static struct subsys_attribute _name##_attr = { \
35 .store = _name##_store, \ 47 .store = _name##_store, \
36} 48}
37 49
38extern struct subsystem power_subsys; 50extern struct kset power_subsys;
39
40/* References to section boundaries */
41extern const void __nosave_begin, __nosave_end;
42 51
43/* Preferred image size in bytes (default 500 MB) */ 52/* Preferred image size in bytes (default 500 MB) */
44extern unsigned long image_size; 53extern unsigned long image_size;
@@ -49,6 +58,8 @@ extern sector_t swsusp_resume_block;
49extern asmlinkage int swsusp_arch_suspend(void); 58extern asmlinkage int swsusp_arch_suspend(void);
50extern asmlinkage int swsusp_arch_resume(void); 59extern asmlinkage int swsusp_arch_resume(void);
51 60
61extern int create_basic_memory_bitmaps(void);
62extern void free_basic_memory_bitmaps(void);
52extern unsigned int count_data_pages(void); 63extern unsigned int count_data_pages(void);
53 64
54/** 65/**
@@ -139,30 +150,12 @@ struct resume_swap_area {
139#define PMOPS_ENTER 2 150#define PMOPS_ENTER 2
140#define PMOPS_FINISH 3 151#define PMOPS_FINISH 3
141 152
142/** 153/* If unset, the snapshot device cannot be open. */
143 * The bitmap is used for tracing allocated swap pages 154extern atomic_t snapshot_device_available;
144 *
145 * The entire bitmap consists of a number of bitmap_page
146 * structures linked with the help of the .next member.
147 * Thus each page can be allocated individually, so we only
148 * need to make 0-order memory allocations to create
149 * the bitmap.
150 */
151
152#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *))
153#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long))
154#define BITS_PER_CHUNK (sizeof(long) * 8)
155#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
156
157struct bitmap_page {
158 unsigned long chunks[BITMAP_PAGE_CHUNKS];
159 struct bitmap_page *next;
160};
161 155
162extern void free_bitmap(struct bitmap_page *bitmap); 156extern sector_t alloc_swapdev_block(int swap);
163extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); 157extern void free_all_swap_pages(int swap);
164extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); 158extern int swsusp_swap_in_use(void);
165extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
166 159
167extern int swsusp_check(void); 160extern int swsusp_check(void);
168extern int swsusp_shrink_memory(void); 161extern int swsusp_shrink_memory(void);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 6d566bf7085c..0eb5c420e8ed 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -47,8 +47,10 @@ void refrigerator(void)
47 recalc_sigpending(); /* We sent fake signal, clean it up */ 47 recalc_sigpending(); /* We sent fake signal, clean it up */
48 spin_unlock_irq(&current->sighand->siglock); 48 spin_unlock_irq(&current->sighand->siglock);
49 49
50 while (frozen(current)) { 50 for (;;) {
51 current->state = TASK_UNINTERRUPTIBLE; 51 set_current_state(TASK_UNINTERRUPTIBLE);
52 if (!frozen(current))
53 break;
52 schedule(); 54 schedule();
53 } 55 }
54 pr_debug("%s left refrigerator\n", current->comm); 56 pr_debug("%s left refrigerator\n", current->comm);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index fc53ad068128..128da11f01c2 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -21,6 +21,7 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/pm.h> 22#include <linux/pm.h>
23#include <linux/device.h> 23#include <linux/device.h>
24#include <linux/init.h>
24#include <linux/bootmem.h> 25#include <linux/bootmem.h>
25#include <linux/syscalls.h> 26#include <linux/syscalls.h>
26#include <linux/console.h> 27#include <linux/console.h>
@@ -34,6 +35,10 @@
34 35
35#include "power.h" 36#include "power.h"
36 37
38static int swsusp_page_is_free(struct page *);
39static void swsusp_set_page_forbidden(struct page *);
40static void swsusp_unset_page_forbidden(struct page *);
41
37/* List of PBEs needed for restoring the pages that were allocated before 42/* List of PBEs needed for restoring the pages that were allocated before
38 * the suspend and included in the suspend image, but have also been 43 * the suspend and included in the suspend image, but have also been
39 * allocated by the "resume" kernel, so their contents cannot be written 44 * allocated by the "resume" kernel, so their contents cannot be written
@@ -67,15 +72,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
67 72
68 res = (void *)get_zeroed_page(gfp_mask); 73 res = (void *)get_zeroed_page(gfp_mask);
69 if (safe_needed) 74 if (safe_needed)
70 while (res && PageNosaveFree(virt_to_page(res))) { 75 while (res && swsusp_page_is_free(virt_to_page(res))) {
71 /* The page is unsafe, mark it for swsusp_free() */ 76 /* The page is unsafe, mark it for swsusp_free() */
72 SetPageNosave(virt_to_page(res)); 77 swsusp_set_page_forbidden(virt_to_page(res));
73 allocated_unsafe_pages++; 78 allocated_unsafe_pages++;
74 res = (void *)get_zeroed_page(gfp_mask); 79 res = (void *)get_zeroed_page(gfp_mask);
75 } 80 }
76 if (res) { 81 if (res) {
77 SetPageNosave(virt_to_page(res)); 82 swsusp_set_page_forbidden(virt_to_page(res));
78 SetPageNosaveFree(virt_to_page(res)); 83 swsusp_set_page_free(virt_to_page(res));
79 } 84 }
80 return res; 85 return res;
81} 86}
@@ -91,8 +96,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask)
91 96
92 page = alloc_page(gfp_mask); 97 page = alloc_page(gfp_mask);
93 if (page) { 98 if (page) {
94 SetPageNosave(page); 99 swsusp_set_page_forbidden(page);
95 SetPageNosaveFree(page); 100 swsusp_set_page_free(page);
96 } 101 }
97 return page; 102 return page;
98} 103}
@@ -110,9 +115,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
110 115
111 page = virt_to_page(addr); 116 page = virt_to_page(addr);
112 117
113 ClearPageNosave(page); 118 swsusp_unset_page_forbidden(page);
114 if (clear_nosave_free) 119 if (clear_nosave_free)
115 ClearPageNosaveFree(page); 120 swsusp_unset_page_free(page);
116 121
117 __free_page(page); 122 __free_page(page);
118} 123}
@@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
224 * of type unsigned long each). It also contains the pfns that 229 * of type unsigned long each). It also contains the pfns that
225 * correspond to the start and end of the represented memory area and 230 * correspond to the start and end of the represented memory area and
226 * the number of bit chunks in the block. 231 * the number of bit chunks in the block.
227 *
228 * NOTE: Memory bitmaps are used for two types of operations only:
229 * "set a bit" and "find the next bit set". Moreover, the searching
230 * is always carried out after all of the "set a bit" operations
231 * on given bitmap.
232 */ 232 */
233 233
234#define BM_END_OF_MAP (~0UL) 234#define BM_END_OF_MAP (~0UL)
@@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
443} 443}
444 444
445/** 445/**
446 * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds 446 * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
447 * to given pfn. The cur_zone_bm member of @bm and the cur_block member 447 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
448 * of @bm->cur_zone_bm are updated. 448 * of @bm->cur_zone_bm are updated.
449 *
450 * If the bit cannot be set, the function returns -EINVAL .
451 */ 449 */
452 450
453static int 451static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
454memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 452 void **addr, unsigned int *bit_nr)
455{ 453{
456 struct zone_bitmap *zone_bm; 454 struct zone_bitmap *zone_bm;
457 struct bm_block *bb; 455 struct bm_block *bb;
@@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
463 /* We don't assume that the zones are sorted by pfns */ 461 /* We don't assume that the zones are sorted by pfns */
464 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { 462 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
465 zone_bm = zone_bm->next; 463 zone_bm = zone_bm->next;
466 if (unlikely(!zone_bm)) 464
467 return -EINVAL; 465 BUG_ON(!zone_bm);
468 } 466 }
469 bm->cur.zone_bm = zone_bm; 467 bm->cur.zone_bm = zone_bm;
470 } 468 }
@@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
475 473
476 while (pfn >= bb->end_pfn) { 474 while (pfn >= bb->end_pfn) {
477 bb = bb->next; 475 bb = bb->next;
478 if (unlikely(!bb)) 476
479 return -EINVAL; 477 BUG_ON(!bb);
480 } 478 }
481 zone_bm->cur_block = bb; 479 zone_bm->cur_block = bb;
482 pfn -= bb->start_pfn; 480 pfn -= bb->start_pfn;
483 set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); 481 *bit_nr = pfn % BM_BITS_PER_CHUNK;
484 return 0; 482 *addr = bb->data + pfn / BM_BITS_PER_CHUNK;
483}
484
485static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
486{
487 void *addr;
488 unsigned int bit;
489
490 memory_bm_find_bit(bm, pfn, &addr, &bit);
491 set_bit(bit, addr);
492}
493
494static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
495{
496 void *addr;
497 unsigned int bit;
498
499 memory_bm_find_bit(bm, pfn, &addr, &bit);
500 clear_bit(bit, addr);
501}
502
503static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
504{
505 void *addr;
506 unsigned int bit;
507
508 memory_bm_find_bit(bm, pfn, &addr, &bit);
509 return test_bit(bit, addr);
485} 510}
486 511
487/* Two auxiliary functions for memory_bm_next_pfn */ 512/* Two auxiliary functions for memory_bm_next_pfn */
@@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
564} 589}
565 590
566/** 591/**
592 * This structure represents a range of page frames the contents of which
593 * should not be saved during the suspend.
594 */
595
596struct nosave_region {
597 struct list_head list;
598 unsigned long start_pfn;
599 unsigned long end_pfn;
600};
601
602static LIST_HEAD(nosave_regions);
603
604/**
605 * register_nosave_region - register a range of page frames the contents
606 * of which should not be saved during the suspend (to be used in the early
607 * initialization code)
608 */
609
610void __init
611register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
612{
613 struct nosave_region *region;
614
615 if (start_pfn >= end_pfn)
616 return;
617
618 if (!list_empty(&nosave_regions)) {
619 /* Try to extend the previous region (they should be sorted) */
620 region = list_entry(nosave_regions.prev,
621 struct nosave_region, list);
622 if (region->end_pfn == start_pfn) {
623 region->end_pfn = end_pfn;
624 goto Report;
625 }
626 }
627 /* This allocation cannot fail */
628 region = alloc_bootmem_low(sizeof(struct nosave_region));
629 region->start_pfn = start_pfn;
630 region->end_pfn = end_pfn;
631 list_add_tail(&region->list, &nosave_regions);
632 Report:
633 printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
634 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
635}
636
637/*
638 * Set bits in this map correspond to the page frames the contents of which
639 * should not be saved during the suspend.
640 */
641static struct memory_bitmap *forbidden_pages_map;
642
643/* Set bits in this map correspond to free page frames. */
644static struct memory_bitmap *free_pages_map;
645
646/*
647 * Each page frame allocated for creating the image is marked by setting the
648 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
649 */
650
651void swsusp_set_page_free(struct page *page)
652{
653 if (free_pages_map)
654 memory_bm_set_bit(free_pages_map, page_to_pfn(page));
655}
656
657static int swsusp_page_is_free(struct page *page)
658{
659 return free_pages_map ?
660 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
661}
662
663void swsusp_unset_page_free(struct page *page)
664{
665 if (free_pages_map)
666 memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
667}
668
669static void swsusp_set_page_forbidden(struct page *page)
670{
671 if (forbidden_pages_map)
672 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
673}
674
675int swsusp_page_is_forbidden(struct page *page)
676{
677 return forbidden_pages_map ?
678 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
679}
680
681static void swsusp_unset_page_forbidden(struct page *page)
682{
683 if (forbidden_pages_map)
684 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
685}
686
687/**
688 * mark_nosave_pages - set bits corresponding to the page frames the
689 * contents of which should not be saved in a given bitmap.
690 */
691
692static void mark_nosave_pages(struct memory_bitmap *bm)
693{
694 struct nosave_region *region;
695
696 if (list_empty(&nosave_regions))
697 return;
698
699 list_for_each_entry(region, &nosave_regions, list) {
700 unsigned long pfn;
701
702 printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
703 region->start_pfn << PAGE_SHIFT,
704 region->end_pfn << PAGE_SHIFT);
705
706 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
707 memory_bm_set_bit(bm, pfn);
708 }
709}
710
711/**
712 * create_basic_memory_bitmaps - create bitmaps needed for marking page
713 * frames that should not be saved and free page frames. The pointers
714 * forbidden_pages_map and free_pages_map are only modified if everything
715 * goes well, because we don't want the bits to be used before both bitmaps
716 * are set up.
717 */
718
719int create_basic_memory_bitmaps(void)
720{
721 struct memory_bitmap *bm1, *bm2;
722 int error = 0;
723
724 BUG_ON(forbidden_pages_map || free_pages_map);
725
726 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
727 if (!bm1)
728 return -ENOMEM;
729
730 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
731 if (error)
732 goto Free_first_object;
733
734 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
735 if (!bm2)
736 goto Free_first_bitmap;
737
738 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
739 if (error)
740 goto Free_second_object;
741
742 forbidden_pages_map = bm1;
743 free_pages_map = bm2;
744 mark_nosave_pages(forbidden_pages_map);
745
746 printk("swsusp: Basic memory bitmaps created\n");
747
748 return 0;
749
750 Free_second_object:
751 kfree(bm2);
752 Free_first_bitmap:
753 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
754 Free_first_object:
755 kfree(bm1);
756 return -ENOMEM;
757}
758
759/**
760 * free_basic_memory_bitmaps - free memory bitmaps allocated by
761 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary
762 * so that the bitmaps themselves are not referred to while they are being
763 * freed.
764 */
765
766void free_basic_memory_bitmaps(void)
767{
768 struct memory_bitmap *bm1, *bm2;
769
770 BUG_ON(!(forbidden_pages_map && free_pages_map));
771
772 bm1 = forbidden_pages_map;
773 bm2 = free_pages_map;
774 forbidden_pages_map = NULL;
775 free_pages_map = NULL;
776 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
777 kfree(bm1);
778 memory_bm_free(bm2, PG_UNSAFE_CLEAR);
779 kfree(bm2);
780
781 printk("swsusp: Basic memory bitmaps freed\n");
782}
783
784/**
567 * snapshot_additional_pages - estimate the number of additional pages 785 * snapshot_additional_pages - estimate the number of additional pages
568 * be needed for setting up the suspend image data structures for given 786 * be needed for setting up the suspend image data structures for given
569 * zone (usually the returned value is greater than the exact number) 787 * zone (usually the returned value is greater than the exact number)
@@ -615,7 +833,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
615 833
616 BUG_ON(!PageHighMem(page)); 834 BUG_ON(!PageHighMem(page));
617 835
618 if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) 836 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
837 PageReserved(page))
619 return NULL; 838 return NULL;
620 839
621 return page; 840 return page;
@@ -651,17 +870,6 @@ static inline unsigned int count_highmem_pages(void) { return 0; }
651#endif /* CONFIG_HIGHMEM */ 870#endif /* CONFIG_HIGHMEM */
652 871
653/** 872/**
654 * pfn_is_nosave - check if given pfn is in the 'nosave' section
655 */
656
657static inline int pfn_is_nosave(unsigned long pfn)
658{
659 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
660 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
661 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
662}
663
664/**
665 * saveable - Determine whether a non-highmem page should be included in 873 * saveable - Determine whether a non-highmem page should be included in
666 * the suspend image. 874 * the suspend image.
667 * 875 *
@@ -681,7 +889,7 @@ static struct page *saveable_page(unsigned long pfn)
681 889
682 BUG_ON(PageHighMem(page)); 890 BUG_ON(PageHighMem(page));
683 891
684 if (PageNosave(page) || PageNosaveFree(page)) 892 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
685 return NULL; 893 return NULL;
686 894
687 if (PageReserved(page) && pfn_is_nosave(pfn)) 895 if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -821,9 +1029,10 @@ void swsusp_free(void)
821 if (pfn_valid(pfn)) { 1029 if (pfn_valid(pfn)) {
822 struct page *page = pfn_to_page(pfn); 1030 struct page *page = pfn_to_page(pfn);
823 1031
824 if (PageNosave(page) && PageNosaveFree(page)) { 1032 if (swsusp_page_is_forbidden(page) &&
825 ClearPageNosave(page); 1033 swsusp_page_is_free(page)) {
826 ClearPageNosaveFree(page); 1034 swsusp_unset_page_forbidden(page);
1035 swsusp_unset_page_free(page);
827 __free_page(page); 1036 __free_page(page);
828 } 1037 }
829 } 1038 }
@@ -1146,7 +1355,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
1146 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1355 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1147 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1356 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1148 if (pfn_valid(pfn)) 1357 if (pfn_valid(pfn))
1149 ClearPageNosaveFree(pfn_to_page(pfn)); 1358 swsusp_unset_page_free(pfn_to_page(pfn));
1150 } 1359 }
1151 1360
1152 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 1361 /* Mark pages that correspond to the "original" pfns as "unsafe" */
@@ -1155,7 +1364,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
1155 pfn = memory_bm_next_pfn(bm); 1364 pfn = memory_bm_next_pfn(bm);
1156 if (likely(pfn != BM_END_OF_MAP)) { 1365 if (likely(pfn != BM_END_OF_MAP)) {
1157 if (likely(pfn_valid(pfn))) 1366 if (likely(pfn_valid(pfn)))
1158 SetPageNosaveFree(pfn_to_page(pfn)); 1367 swsusp_set_page_free(pfn_to_page(pfn));
1159 else 1368 else
1160 return -EFAULT; 1369 return -EFAULT;
1161 } 1370 }
@@ -1321,14 +1530,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1321 struct page *page; 1530 struct page *page;
1322 1531
1323 page = alloc_page(__GFP_HIGHMEM); 1532 page = alloc_page(__GFP_HIGHMEM);
1324 if (!PageNosaveFree(page)) { 1533 if (!swsusp_page_is_free(page)) {
1325 /* The page is "safe", set its bit the bitmap */ 1534 /* The page is "safe", set its bit the bitmap */
1326 memory_bm_set_bit(bm, page_to_pfn(page)); 1535 memory_bm_set_bit(bm, page_to_pfn(page));
1327 safe_highmem_pages++; 1536 safe_highmem_pages++;
1328 } 1537 }
1329 /* Mark the page as allocated */ 1538 /* Mark the page as allocated */
1330 SetPageNosave(page); 1539 swsusp_set_page_forbidden(page);
1331 SetPageNosaveFree(page); 1540 swsusp_set_page_free(page);
1332 } 1541 }
1333 memory_bm_position_reset(bm); 1542 memory_bm_position_reset(bm);
1334 safe_highmem_bm = bm; 1543 safe_highmem_bm = bm;
@@ -1360,7 +1569,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1360 struct highmem_pbe *pbe; 1569 struct highmem_pbe *pbe;
1361 void *kaddr; 1570 void *kaddr;
1362 1571
1363 if (PageNosave(page) && PageNosaveFree(page)) { 1572 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1364 /* We have allocated the "original" page frame and we can 1573 /* We have allocated the "original" page frame and we can
1365 * use it directly to store the loaded page. 1574 * use it directly to store the loaded page.
1366 */ 1575 */
@@ -1522,14 +1731,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1522 error = -ENOMEM; 1731 error = -ENOMEM;
1523 goto Free; 1732 goto Free;
1524 } 1733 }
1525 if (!PageNosaveFree(virt_to_page(lp))) { 1734 if (!swsusp_page_is_free(virt_to_page(lp))) {
1526 /* The page is "safe", add it to the list */ 1735 /* The page is "safe", add it to the list */
1527 lp->next = safe_pages_list; 1736 lp->next = safe_pages_list;
1528 safe_pages_list = lp; 1737 safe_pages_list = lp;
1529 } 1738 }
1530 /* Mark the page as allocated */ 1739 /* Mark the page as allocated */
1531 SetPageNosave(virt_to_page(lp)); 1740 swsusp_set_page_forbidden(virt_to_page(lp));
1532 SetPageNosaveFree(virt_to_page(lp)); 1741 swsusp_set_page_free(virt_to_page(lp));
1533 nr_pages--; 1742 nr_pages--;
1534 } 1743 }
1535 /* Free the reserved safe pages so that chain_alloc() can use them */ 1744 /* Free the reserved safe pages so that chain_alloc() can use them */
@@ -1558,7 +1767,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1558 if (PageHighMem(page)) 1767 if (PageHighMem(page))
1559 return get_highmem_page_buffer(page, ca); 1768 return get_highmem_page_buffer(page, ca);
1560 1769
1561 if (PageNosave(page) && PageNosaveFree(page)) 1770 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1562 /* We have allocated the "original" page frame and we can 1771 /* We have allocated the "original" page frame and we can
1563 * use it directly to store the loaded page. 1772 * use it directly to store the loaded page.
1564 */ 1773 */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 3581f8f86acd..e83ed9945a80 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -33,12 +33,14 @@ extern char resume_file[];
33 33
34#define SWSUSP_SIG "S1SUSPEND" 34#define SWSUSP_SIG "S1SUSPEND"
35 35
36static struct swsusp_header { 36struct swsusp_header {
37 char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; 37 char reserved[PAGE_SIZE - 20 - sizeof(sector_t)];
38 sector_t image; 38 sector_t image;
39 char orig_sig[10]; 39 char orig_sig[10];
40 char sig[10]; 40 char sig[10];
41} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; 41} __attribute__((packed));
42
43static struct swsusp_header *swsusp_header;
42 44
43/* 45/*
44 * General things 46 * General things
@@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start)
141{ 143{
142 int error; 144 int error;
143 145
144 bio_read_page(swsusp_resume_block, &swsusp_header, NULL); 146 bio_read_page(swsusp_resume_block, swsusp_header, NULL);
145 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || 147 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
146 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { 148 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
147 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); 149 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
148 memcpy(swsusp_header.sig,SWSUSP_SIG, 10); 150 memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
149 swsusp_header.image = start; 151 swsusp_header->image = start;
150 error = bio_write_page(swsusp_resume_block, 152 error = bio_write_page(swsusp_resume_block,
151 &swsusp_header, NULL); 153 swsusp_header, NULL);
152 } else { 154 } else {
153 printk(KERN_ERR "swsusp: Swap header not found!\n"); 155 printk(KERN_ERR "swsusp: Swap header not found!\n");
154 error = -ENODEV; 156 error = -ENODEV;
@@ -241,7 +243,6 @@ struct swap_map_page {
241struct swap_map_handle { 243struct swap_map_handle {
242 struct swap_map_page *cur; 244 struct swap_map_page *cur;
243 sector_t cur_swap; 245 sector_t cur_swap;
244 struct bitmap_page *bitmap;
245 unsigned int k; 246 unsigned int k;
246}; 247};
247 248
@@ -250,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle)
250 if (handle->cur) 251 if (handle->cur)
251 free_page((unsigned long)handle->cur); 252 free_page((unsigned long)handle->cur);
252 handle->cur = NULL; 253 handle->cur = NULL;
253 if (handle->bitmap)
254 free_bitmap(handle->bitmap);
255 handle->bitmap = NULL;
256} 254}
257 255
258static int get_swap_writer(struct swap_map_handle *handle) 256static int get_swap_writer(struct swap_map_handle *handle)
@@ -260,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
260 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); 258 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
261 if (!handle->cur) 259 if (!handle->cur)
262 return -ENOMEM; 260 return -ENOMEM;
263 handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0)); 261 handle->cur_swap = alloc_swapdev_block(root_swap);
264 if (!handle->bitmap) {
265 release_swap_writer(handle);
266 return -ENOMEM;
267 }
268 handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
269 if (!handle->cur_swap) { 262 if (!handle->cur_swap) {
270 release_swap_writer(handle); 263 release_swap_writer(handle);
271 return -ENOSPC; 264 return -ENOSPC;
@@ -282,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
282 275
283 if (!handle->cur) 276 if (!handle->cur)
284 return -EINVAL; 277 return -EINVAL;
285 offset = alloc_swapdev_block(root_swap, handle->bitmap); 278 offset = alloc_swapdev_block(root_swap);
286 error = write_page(buf, offset, bio_chain); 279 error = write_page(buf, offset, bio_chain);
287 if (error) 280 if (error)
288 return error; 281 return error;
@@ -291,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
291 error = wait_on_bio_chain(bio_chain); 284 error = wait_on_bio_chain(bio_chain);
292 if (error) 285 if (error)
293 goto out; 286 goto out;
294 offset = alloc_swapdev_block(root_swap, handle->bitmap); 287 offset = alloc_swapdev_block(root_swap);
295 if (!offset) 288 if (!offset)
296 return -ENOSPC; 289 return -ENOSPC;
297 handle->cur->next_swap = offset; 290 handle->cur->next_swap = offset;
@@ -428,7 +421,8 @@ int swsusp_write(void)
428 } 421 }
429 } 422 }
430 if (error) 423 if (error)
431 free_all_swap_pages(root_swap, handle.bitmap); 424 free_all_swap_pages(root_swap);
425
432 release_swap_writer(&handle); 426 release_swap_writer(&handle);
433 out: 427 out:
434 swsusp_close(); 428 swsusp_close();
@@ -564,7 +558,7 @@ int swsusp_read(void)
564 if (error < PAGE_SIZE) 558 if (error < PAGE_SIZE)
565 return error < 0 ? error : -EFAULT; 559 return error < 0 ? error : -EFAULT;
566 header = (struct swsusp_info *)data_of(snapshot); 560 header = (struct swsusp_info *)data_of(snapshot);
567 error = get_swap_reader(&handle, swsusp_header.image); 561 error = get_swap_reader(&handle, swsusp_header->image);
568 if (!error) 562 if (!error)
569 error = swap_read_page(&handle, header, NULL); 563 error = swap_read_page(&handle, header, NULL);
570 if (!error) 564 if (!error)
@@ -591,17 +585,17 @@ int swsusp_check(void)
591 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 585 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
592 if (!IS_ERR(resume_bdev)) { 586 if (!IS_ERR(resume_bdev)) {
593 set_blocksize(resume_bdev, PAGE_SIZE); 587 set_blocksize(resume_bdev, PAGE_SIZE);
594 memset(&swsusp_header, 0, sizeof(swsusp_header)); 588 memset(swsusp_header, 0, sizeof(PAGE_SIZE));
595 error = bio_read_page(swsusp_resume_block, 589 error = bio_read_page(swsusp_resume_block,
596 &swsusp_header, NULL); 590 swsusp_header, NULL);
597 if (error) 591 if (error)
598 return error; 592 return error;
599 593
600 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { 594 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
601 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); 595 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
602 /* Reset swap signature now */ 596 /* Reset swap signature now */
603 error = bio_write_page(swsusp_resume_block, 597 error = bio_write_page(swsusp_resume_block,
604 &swsusp_header, NULL); 598 swsusp_header, NULL);
605 } else { 599 } else {
606 return -EINVAL; 600 return -EINVAL;
607 } 601 }
@@ -632,3 +626,13 @@ void swsusp_close(void)
632 626
633 blkdev_put(resume_bdev); 627 blkdev_put(resume_bdev);
634} 628}
629
630static int swsusp_header_init(void)
631{
632 swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
633 if (!swsusp_header)
634 panic("Could not allocate memory for swsusp_header\n");
635 return 0;
636}
637
638core_initcall(swsusp_header_init);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 175370824f37..5da304c8f1f6 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -50,6 +50,7 @@
50#include <linux/syscalls.h> 50#include <linux/syscalls.h>
51#include <linux/highmem.h> 51#include <linux/highmem.h>
52#include <linux/time.h> 52#include <linux/time.h>
53#include <linux/rbtree.h>
53 54
54#include "power.h" 55#include "power.h"
55 56
@@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; }
74/** 75/**
75 * The following functions are used for tracing the allocated 76 * The following functions are used for tracing the allocated
76 * swap pages, so that they can be freed in case of an error. 77 * swap pages, so that they can be freed in case of an error.
77 *
78 * The functions operate on a linked bitmap structure defined
79 * in power.h
80 */ 78 */
81 79
82void free_bitmap(struct bitmap_page *bitmap) 80struct swsusp_extent {
83{ 81 struct rb_node node;
84 struct bitmap_page *bp; 82 unsigned long start;
83 unsigned long end;
84};
85 85
86 while (bitmap) { 86static struct rb_root swsusp_extents = RB_ROOT;
87 bp = bitmap->next;
88 free_page((unsigned long)bitmap);
89 bitmap = bp;
90 }
91}
92 87
93struct bitmap_page *alloc_bitmap(unsigned int nr_bits) 88static int swsusp_extents_insert(unsigned long swap_offset)
94{ 89{
95 struct bitmap_page *bitmap, *bp; 90 struct rb_node **new = &(swsusp_extents.rb_node);
96 unsigned int n; 91 struct rb_node *parent = NULL;
97 92 struct swsusp_extent *ext;
98 if (!nr_bits) 93
99 return NULL; 94 /* Figure out where to put the new node */
100 95 while (*new) {
101 bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); 96 ext = container_of(*new, struct swsusp_extent, node);
102 bp = bitmap; 97 parent = *new;
103 for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { 98 if (swap_offset < ext->start) {
104 bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); 99 /* Try to merge */
105 bp = bp->next; 100 if (swap_offset == ext->start - 1) {
106 if (!bp) { 101 ext->start--;
107 free_bitmap(bitmap); 102 return 0;
108 return NULL; 103 }
104 new = &((*new)->rb_left);
105 } else if (swap_offset > ext->end) {
106 /* Try to merge */
107 if (swap_offset == ext->end + 1) {
108 ext->end++;
109 return 0;
110 }
111 new = &((*new)->rb_right);
112 } else {
113 /* It already is in the tree */
114 return -EINVAL;
109 } 115 }
110 } 116 }
111 return bitmap; 117 /* Add the new node and rebalance the tree. */
112} 118 ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
113 119 if (!ext)
114static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) 120 return -ENOMEM;
115{ 121
116 unsigned int n; 122 ext->start = swap_offset;
117 123 ext->end = swap_offset;
118 n = BITMAP_PAGE_BITS; 124 rb_link_node(&ext->node, parent, new);
119 while (bitmap && n <= bit) { 125 rb_insert_color(&ext->node, &swsusp_extents);
120 n += BITMAP_PAGE_BITS;
121 bitmap = bitmap->next;
122 }
123 if (!bitmap)
124 return -EINVAL;
125 n -= BITMAP_PAGE_BITS;
126 bit -= n;
127 n = 0;
128 while (bit >= BITS_PER_CHUNK) {
129 bit -= BITS_PER_CHUNK;
130 n++;
131 }
132 bitmap->chunks[n] |= (1UL << bit);
133 return 0; 126 return 0;
134} 127}
135 128
136sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) 129/**
130 * alloc_swapdev_block - allocate a swap page and register that it has
131 * been allocated, so that it can be freed in case of an error.
132 */
133
134sector_t alloc_swapdev_block(int swap)
137{ 135{
138 unsigned long offset; 136 unsigned long offset;
139 137
140 offset = swp_offset(get_swap_page_of_type(swap)); 138 offset = swp_offset(get_swap_page_of_type(swap));
141 if (offset) { 139 if (offset) {
142 if (bitmap_set(bitmap, offset)) 140 if (swsusp_extents_insert(offset))
143 swap_free(swp_entry(swap, offset)); 141 swap_free(swp_entry(swap, offset));
144 else 142 else
145 return swapdev_block(swap, offset); 143 return swapdev_block(swap, offset);
@@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
147 return 0; 145 return 0;
148} 146}
149 147
150void free_all_swap_pages(int swap, struct bitmap_page *bitmap) 148/**
149 * free_all_swap_pages - free swap pages allocated for saving image data.
150 * It also frees the extents used to register which swap entres had been
151 * allocated.
152 */
153
154void free_all_swap_pages(int swap)
151{ 155{
152 unsigned int bit, n; 156 struct rb_node *node;
153 unsigned long test; 157
154 158 while ((node = swsusp_extents.rb_node)) {
155 bit = 0; 159 struct swsusp_extent *ext;
156 while (bitmap) { 160 unsigned long offset;
157 for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) 161
158 for (test = 1UL; test; test <<= 1) { 162 ext = container_of(node, struct swsusp_extent, node);
159 if (bitmap->chunks[n] & test) 163 rb_erase(node, &swsusp_extents);
160 swap_free(swp_entry(swap, bit)); 164 for (offset = ext->start; offset <= ext->end; offset++)
161 bit++; 165 swap_free(swp_entry(swap, offset));
162 } 166
163 bitmap = bitmap->next; 167 kfree(ext);
164 } 168 }
165} 169}
166 170
171int swsusp_swap_in_use(void)
172{
173 return (swsusp_extents.rb_node != NULL);
174}
175
167/** 176/**
168 * swsusp_show_speed - print the time elapsed between two events represented by 177 * swsusp_show_speed - print the time elapsed between two events represented by
169 * @start and @stop 178 * @start and @stop
@@ -224,7 +233,7 @@ int swsusp_shrink_memory(void)
224 long size, highmem_size; 233 long size, highmem_size;
225 234
226 highmem_size = count_highmem_pages(); 235 highmem_size = count_highmem_pages();
227 size = count_data_pages() + PAGES_FOR_IO; 236 size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
228 tmp = size; 237 tmp = size;
229 size += highmem_size; 238 size += highmem_size;
230 for_each_zone (zone) 239 for_each_zone (zone)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 7cf6713b2325..040560d9c312 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -33,25 +33,29 @@
33static struct snapshot_data { 33static struct snapshot_data {
34 struct snapshot_handle handle; 34 struct snapshot_handle handle;
35 int swap; 35 int swap;
36 struct bitmap_page *bitmap;
37 int mode; 36 int mode;
38 char frozen; 37 char frozen;
39 char ready; 38 char ready;
40 char platform_suspend; 39 char platform_suspend;
41} snapshot_state; 40} snapshot_state;
42 41
43static atomic_t device_available = ATOMIC_INIT(1); 42atomic_t snapshot_device_available = ATOMIC_INIT(1);
44 43
45static int snapshot_open(struct inode *inode, struct file *filp) 44static int snapshot_open(struct inode *inode, struct file *filp)
46{ 45{
47 struct snapshot_data *data; 46 struct snapshot_data *data;
48 47
49 if (!atomic_add_unless(&device_available, -1, 0)) 48 if (!atomic_add_unless(&snapshot_device_available, -1, 0))
50 return -EBUSY; 49 return -EBUSY;
51 50
52 if ((filp->f_flags & O_ACCMODE) == O_RDWR) 51 if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
52 atomic_inc(&snapshot_device_available);
53 return -ENOSYS; 53 return -ENOSYS;
54 54 }
55 if(create_basic_memory_bitmaps()) {
56 atomic_inc(&snapshot_device_available);
57 return -ENOMEM;
58 }
55 nonseekable_open(inode, filp); 59 nonseekable_open(inode, filp);
56 data = &snapshot_state; 60 data = &snapshot_state;
57 filp->private_data = data; 61 filp->private_data = data;
@@ -64,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
64 data->swap = -1; 68 data->swap = -1;
65 data->mode = O_WRONLY; 69 data->mode = O_WRONLY;
66 } 70 }
67 data->bitmap = NULL;
68 data->frozen = 0; 71 data->frozen = 0;
69 data->ready = 0; 72 data->ready = 0;
70 data->platform_suspend = 0; 73 data->platform_suspend = 0;
@@ -77,16 +80,15 @@ static int snapshot_release(struct inode *inode, struct file *filp)
77 struct snapshot_data *data; 80 struct snapshot_data *data;
78 81
79 swsusp_free(); 82 swsusp_free();
83 free_basic_memory_bitmaps();
80 data = filp->private_data; 84 data = filp->private_data;
81 free_all_swap_pages(data->swap, data->bitmap); 85 free_all_swap_pages(data->swap);
82 free_bitmap(data->bitmap);
83 if (data->frozen) { 86 if (data->frozen) {
84 mutex_lock(&pm_mutex); 87 mutex_lock(&pm_mutex);
85 thaw_processes(); 88 thaw_processes();
86 enable_nonboot_cpus();
87 mutex_unlock(&pm_mutex); 89 mutex_unlock(&pm_mutex);
88 } 90 }
89 atomic_inc(&device_available); 91 atomic_inc(&snapshot_device_available);
90 return 0; 92 return 0;
91} 93}
92 94
@@ -294,14 +296,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
294 error = -ENODEV; 296 error = -ENODEV;
295 break; 297 break;
296 } 298 }
297 if (!data->bitmap) { 299 offset = alloc_swapdev_block(data->swap);
298 data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0));
299 if (!data->bitmap) {
300 error = -ENOMEM;
301 break;
302 }
303 }
304 offset = alloc_swapdev_block(data->swap, data->bitmap);
305 if (offset) { 300 if (offset) {
306 offset <<= PAGE_SHIFT; 301 offset <<= PAGE_SHIFT;
307 error = put_user(offset, (sector_t __user *)arg); 302 error = put_user(offset, (sector_t __user *)arg);
@@ -315,13 +310,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
315 error = -ENODEV; 310 error = -ENODEV;
316 break; 311 break;
317 } 312 }
318 free_all_swap_pages(data->swap, data->bitmap); 313 free_all_swap_pages(data->swap);
319 free_bitmap(data->bitmap);
320 data->bitmap = NULL;
321 break; 314 break;
322 315
323 case SNAPSHOT_SET_SWAP_FILE: 316 case SNAPSHOT_SET_SWAP_FILE:
324 if (!data->bitmap) { 317 if (!swsusp_swap_in_use()) {
325 /* 318 /*
326 * User space encodes device types as two-byte values, 319 * User space encodes device types as two-byte values,
327 * so we need to recode them 320 * so we need to recode them
@@ -420,7 +413,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
420 break; 413 break;
421 414
422 case SNAPSHOT_SET_SWAP_AREA: 415 case SNAPSHOT_SET_SWAP_AREA:
423 if (data->bitmap) { 416 if (swsusp_swap_in_use()) {
424 error = -EPERM; 417 error = -EPERM;
425 } else { 418 } else {
426 struct resume_swap_area swap_area; 419 struct resume_swap_area swap_area;
diff --git a/kernel/sched.c b/kernel/sched.c
index 960d7c5fca39..0227f1625a75 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5244,6 +5244,11 @@ int __init migration_init(void)
5244#endif 5244#endif
5245 5245
5246#ifdef CONFIG_SMP 5246#ifdef CONFIG_SMP
5247
5248/* Number of possible processor ids */
5249int nr_cpu_ids __read_mostly = NR_CPUS;
5250EXPORT_SYMBOL(nr_cpu_ids);
5251
5247#undef SCHED_DOMAIN_DEBUG 5252#undef SCHED_DOMAIN_DEBUG
5248#ifdef SCHED_DOMAIN_DEBUG 5253#ifdef SCHED_DOMAIN_DEBUG
5249static void sched_domain_debug(struct sched_domain *sd, int cpu) 5254static void sched_domain_debug(struct sched_domain *sd, int cpu)
@@ -6726,6 +6731,7 @@ int in_sched_functions(unsigned long addr)
6726void __init sched_init(void) 6731void __init sched_init(void)
6727{ 6732{
6728 int i, j, k; 6733 int i, j, k;
6734 int highest_cpu = 0;
6729 6735
6730 for_each_possible_cpu(i) { 6736 for_each_possible_cpu(i) {
6731 struct prio_array *array; 6737 struct prio_array *array;
@@ -6760,11 +6766,13 @@ void __init sched_init(void)
6760 // delimiter for bitsearch 6766 // delimiter for bitsearch
6761 __set_bit(MAX_PRIO, array->bitmap); 6767 __set_bit(MAX_PRIO, array->bitmap);
6762 } 6768 }
6769 highest_cpu = i;
6763 } 6770 }
6764 6771
6765 set_load_weight(&init_task); 6772 set_load_weight(&init_task);
6766 6773
6767#ifdef CONFIG_SMP 6774#ifdef CONFIG_SMP
6775 nr_cpu_ids = highest_cpu + 1;
6768 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); 6776 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
6769#endif 6777#endif
6770 6778
diff --git a/kernel/signal.c b/kernel/signal.c
index 3670225ecbc0..2b4087d545a3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2636,9 +2636,5 @@ __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2636 2636
2637void __init signals_init(void) 2637void __init signals_init(void)
2638{ 2638{
2639 sigqueue_cachep = 2639 sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2640 kmem_cache_create("sigqueue",
2641 sizeof(struct sigqueue),
2642 __alignof__(struct sigqueue),
2643 SLAB_PANIC, NULL, NULL);
2644} 2640}
diff --git a/kernel/sys.c b/kernel/sys.c
index 123b165080e6..fe1f3ab20477 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -881,7 +881,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
881#ifdef CONFIG_SOFTWARE_SUSPEND 881#ifdef CONFIG_SOFTWARE_SUSPEND
882 case LINUX_REBOOT_CMD_SW_SUSPEND: 882 case LINUX_REBOOT_CMD_SW_SUSPEND:
883 { 883 {
884 int ret = software_suspend(); 884 int ret = pm_suspend(PM_SUSPEND_DISK);
885 unlock_kernel(); 885 unlock_kernel();
886 return ret; 886 return ret;
887 } 887 }
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index ad7d2392cb0e..906cae771585 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -524,9 +524,7 @@ void __init taskstats_init_early(void)
524{ 524{
525 unsigned int i; 525 unsigned int i;
526 526
527 taskstats_cache = kmem_cache_create("taskstats_cache", 527 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
528 sizeof(struct taskstats),
529 0, SLAB_PANIC, NULL, NULL);
530 for_each_possible_cpu(i) { 528 for_each_possible_cpu(i) {
531 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 529 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
532 init_rwsem(&(per_cpu(listener_array, i).sem)); 530 init_rwsem(&(per_cpu(listener_array, i).sem));