diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 14 | ||||
-rw-r--r-- | kernel/auditfilter.c | 2 | ||||
-rw-r--r-- | kernel/auditsc.c | 6 | ||||
-rw-r--r-- | kernel/cpu.c | 138 | ||||
-rw-r--r-- | kernel/cpuset.c | 6 | ||||
-rw-r--r-- | kernel/irq/handle.c | 2 | ||||
-rw-r--r-- | kernel/power/Kconfig | 11 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/disk.c | 7 | ||||
-rw-r--r-- | kernel/power/main.c | 40 | ||||
-rw-r--r-- | kernel/power/power.h | 59 | ||||
-rw-r--r-- | kernel/power/smp.c | 62 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 1155 | ||||
-rw-r--r-- | kernel/power/swap.c | 270 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 5 | ||||
-rw-r--r-- | kernel/power/user.c | 15 | ||||
-rw-r--r-- | kernel/printk.c | 3 | ||||
-rw-r--r-- | kernel/profile.c | 16 | ||||
-rw-r--r-- | kernel/sched.c | 54 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 |
20 files changed, 1299 insertions, 579 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 963fd15c9621..f9889ee77825 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) | |||
244 | char *ctx = NULL; | 244 | char *ctx = NULL; |
245 | u32 len; | 245 | u32 len; |
246 | int rc; | 246 | int rc; |
247 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | 247 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) |
248 | return rc; | 248 | return rc; |
249 | else | 249 | else |
250 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 250 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | |||
267 | char *ctx = NULL; | 267 | char *ctx = NULL; |
268 | u32 len; | 268 | u32 len; |
269 | int rc; | 269 | int rc; |
270 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | 270 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) |
271 | return rc; | 271 | return rc; |
272 | else | 272 | else |
273 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 273 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | |||
293 | char *ctx = NULL; | 293 | char *ctx = NULL; |
294 | u32 len; | 294 | u32 len; |
295 | int rc; | 295 | int rc; |
296 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | 296 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) |
297 | return rc; | 297 | return rc; |
298 | else | 298 | else |
299 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 299 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) | |||
321 | char *ctx = NULL; | 321 | char *ctx = NULL; |
322 | u32 len; | 322 | u32 len; |
323 | int rc; | 323 | int rc; |
324 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | 324 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) |
325 | return rc; | 325 | return rc; |
326 | else | 326 | else |
327 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 327 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
538 | if (status_get->mask & AUDIT_STATUS_PID) { | 538 | if (status_get->mask & AUDIT_STATUS_PID) { |
539 | int old = audit_pid; | 539 | int old = audit_pid; |
540 | if (sid) { | 540 | if (sid) { |
541 | if ((err = selinux_ctxid_to_string( | 541 | if ((err = selinux_sid_to_string( |
542 | sid, &ctx, &len))) | 542 | sid, &ctx, &len))) |
543 | return err; | 543 | return err; |
544 | else | 544 | else |
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
576 | "user pid=%d uid=%u auid=%u", | 576 | "user pid=%d uid=%u auid=%u", |
577 | pid, uid, loginuid); | 577 | pid, uid, loginuid); |
578 | if (sid) { | 578 | if (sid) { |
579 | if (selinux_ctxid_to_string( | 579 | if (selinux_sid_to_string( |
580 | sid, &ctx, &len)) { | 580 | sid, &ctx, &len)) { |
581 | audit_log_format(ab, | 581 | audit_log_format(ab, |
582 | " ssid=%u", sid); | 582 | " ssid=%u", sid); |
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
614 | loginuid, sid); | 614 | loginuid, sid); |
615 | break; | 615 | break; |
616 | case AUDIT_SIGNAL_INFO: | 616 | case AUDIT_SIGNAL_INFO: |
617 | err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); | 617 | err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); |
618 | if (err) | 618 | if (err) |
619 | return err; | 619 | return err; |
620 | sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); | 620 | sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a44879b0c72f..1a58a81fb09d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, | |||
1398 | if (sid) { | 1398 | if (sid) { |
1399 | char *ctx = NULL; | 1399 | char *ctx = NULL; |
1400 | u32 len; | 1400 | u32 len; |
1401 | if (selinux_ctxid_to_string(sid, &ctx, &len)) | 1401 | if (selinux_sid_to_string(sid, &ctx, &len)) |
1402 | audit_log_format(ab, " ssid=%u", sid); | 1402 | audit_log_format(ab, " ssid=%u", sid); |
1403 | else | 1403 | else |
1404 | audit_log_format(ab, " subj=%s", ctx); | 1404 | audit_log_format(ab, " subj=%s", ctx); |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1bd8827a0102..fb83c5cb8c32 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
385 | logged upon error */ | 385 | logged upon error */ |
386 | if (f->se_rule) { | 386 | if (f->se_rule) { |
387 | if (need_sid) { | 387 | if (need_sid) { |
388 | selinux_task_ctxid(tsk, &sid); | 388 | selinux_get_task_sid(tsk, &sid); |
389 | need_sid = 0; | 389 | need_sid = 0; |
390 | } | 390 | } |
391 | result = selinux_audit_rule_match(sid, f->type, | 391 | result = selinux_audit_rule_match(sid, f->type, |
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
898 | if (axi->osid != 0) { | 898 | if (axi->osid != 0) { |
899 | char *ctx = NULL; | 899 | char *ctx = NULL; |
900 | u32 len; | 900 | u32 len; |
901 | if (selinux_ctxid_to_string( | 901 | if (selinux_sid_to_string( |
902 | axi->osid, &ctx, &len)) { | 902 | axi->osid, &ctx, &len)) { |
903 | audit_log_format(ab, " osid=%u", | 903 | audit_log_format(ab, " osid=%u", |
904 | axi->osid); | 904 | axi->osid); |
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
1005 | if (n->osid != 0) { | 1005 | if (n->osid != 0) { |
1006 | char *ctx = NULL; | 1006 | char *ctx = NULL; |
1007 | u32 len; | 1007 | u32 len; |
1008 | if (selinux_ctxid_to_string( | 1008 | if (selinux_sid_to_string( |
1009 | n->osid, &ctx, &len)) { | 1009 | n->osid, &ctx, &len)) { |
1010 | audit_log_format(ab, " osid=%u", n->osid); | 1010 | audit_log_format(ab, " osid=%u", n->osid); |
1011 | call_panic = 2; | 1011 | call_panic = 2; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index f230f9ae01c2..32c96628463e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock); | |||
21 | 21 | ||
22 | static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); | 22 | static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); |
23 | 23 | ||
24 | /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. | ||
25 | * Should always be manipulated under cpu_add_remove_lock | ||
26 | */ | ||
27 | static int cpu_hotplug_disabled; | ||
28 | |||
24 | #ifdef CONFIG_HOTPLUG_CPU | 29 | #ifdef CONFIG_HOTPLUG_CPU |
25 | 30 | ||
26 | /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ | 31 | /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ |
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused) | |||
108 | return 0; | 113 | return 0; |
109 | } | 114 | } |
110 | 115 | ||
111 | int cpu_down(unsigned int cpu) | 116 | /* Requires cpu_add_remove_lock to be held */ |
117 | static int _cpu_down(unsigned int cpu) | ||
112 | { | 118 | { |
113 | int err; | 119 | int err; |
114 | struct task_struct *p; | 120 | struct task_struct *p; |
115 | cpumask_t old_allowed, tmp; | 121 | cpumask_t old_allowed, tmp; |
116 | 122 | ||
117 | mutex_lock(&cpu_add_remove_lock); | 123 | if (num_online_cpus() == 1) |
118 | if (num_online_cpus() == 1) { | 124 | return -EBUSY; |
119 | err = -EBUSY; | ||
120 | goto out; | ||
121 | } | ||
122 | 125 | ||
123 | if (!cpu_online(cpu)) { | 126 | if (!cpu_online(cpu)) |
124 | err = -EINVAL; | 127 | return -EINVAL; |
125 | goto out; | ||
126 | } | ||
127 | 128 | ||
128 | err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 129 | err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, |
129 | (void *)(long)cpu); | 130 | (void *)(long)cpu); |
130 | if (err == NOTIFY_BAD) { | 131 | if (err == NOTIFY_BAD) { |
131 | printk("%s: attempt to take down CPU %u failed\n", | 132 | printk("%s: attempt to take down CPU %u failed\n", |
132 | __FUNCTION__, cpu); | 133 | __FUNCTION__, cpu); |
133 | err = -EINVAL; | 134 | return -EINVAL; |
134 | goto out; | ||
135 | } | 135 | } |
136 | 136 | ||
137 | /* Ensure that we are not runnable on dying cpu */ | 137 | /* Ensure that we are not runnable on dying cpu */ |
@@ -179,22 +179,32 @@ out_thread: | |||
179 | err = kthread_stop(p); | 179 | err = kthread_stop(p); |
180 | out_allowed: | 180 | out_allowed: |
181 | set_cpus_allowed(current, old_allowed); | 181 | set_cpus_allowed(current, old_allowed); |
182 | out: | 182 | return err; |
183 | } | ||
184 | |||
185 | int cpu_down(unsigned int cpu) | ||
186 | { | ||
187 | int err = 0; | ||
188 | |||
189 | mutex_lock(&cpu_add_remove_lock); | ||
190 | if (cpu_hotplug_disabled) | ||
191 | err = -EBUSY; | ||
192 | else | ||
193 | err = _cpu_down(cpu); | ||
194 | |||
183 | mutex_unlock(&cpu_add_remove_lock); | 195 | mutex_unlock(&cpu_add_remove_lock); |
184 | return err; | 196 | return err; |
185 | } | 197 | } |
186 | #endif /*CONFIG_HOTPLUG_CPU*/ | 198 | #endif /*CONFIG_HOTPLUG_CPU*/ |
187 | 199 | ||
188 | int __devinit cpu_up(unsigned int cpu) | 200 | /* Requires cpu_add_remove_lock to be held */ |
201 | static int __devinit _cpu_up(unsigned int cpu) | ||
189 | { | 202 | { |
190 | int ret; | 203 | int ret; |
191 | void *hcpu = (void *)(long)cpu; | 204 | void *hcpu = (void *)(long)cpu; |
192 | 205 | ||
193 | mutex_lock(&cpu_add_remove_lock); | 206 | if (cpu_online(cpu) || !cpu_present(cpu)) |
194 | if (cpu_online(cpu) || !cpu_present(cpu)) { | 207 | return -EINVAL; |
195 | ret = -EINVAL; | ||
196 | goto out; | ||
197 | } | ||
198 | 208 | ||
199 | ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 209 | ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); |
200 | if (ret == NOTIFY_BAD) { | 210 | if (ret == NOTIFY_BAD) { |
@@ -219,7 +229,95 @@ out_notify: | |||
219 | if (ret != 0) | 229 | if (ret != 0) |
220 | blocking_notifier_call_chain(&cpu_chain, | 230 | blocking_notifier_call_chain(&cpu_chain, |
221 | CPU_UP_CANCELED, hcpu); | 231 | CPU_UP_CANCELED, hcpu); |
232 | |||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | int __devinit cpu_up(unsigned int cpu) | ||
237 | { | ||
238 | int err = 0; | ||
239 | |||
240 | mutex_lock(&cpu_add_remove_lock); | ||
241 | if (cpu_hotplug_disabled) | ||
242 | err = -EBUSY; | ||
243 | else | ||
244 | err = _cpu_up(cpu); | ||
245 | |||
246 | mutex_unlock(&cpu_add_remove_lock); | ||
247 | return err; | ||
248 | } | ||
249 | |||
250 | #ifdef CONFIG_SUSPEND_SMP | ||
251 | static cpumask_t frozen_cpus; | ||
252 | |||
253 | int disable_nonboot_cpus(void) | ||
254 | { | ||
255 | int cpu, first_cpu, error; | ||
256 | |||
257 | mutex_lock(&cpu_add_remove_lock); | ||
258 | first_cpu = first_cpu(cpu_present_map); | ||
259 | if (!cpu_online(first_cpu)) { | ||
260 | error = _cpu_up(first_cpu); | ||
261 | if (error) { | ||
262 | printk(KERN_ERR "Could not bring CPU%d up.\n", | ||
263 | first_cpu); | ||
264 | goto out; | ||
265 | } | ||
266 | } | ||
267 | error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); | ||
268 | if (error) { | ||
269 | printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); | ||
270 | goto out; | ||
271 | } | ||
272 | /* We take down all of the non-boot CPUs in one shot to avoid races | ||
273 | * with the userspace trying to use the CPU hotplug at the same time | ||
274 | */ | ||
275 | cpus_clear(frozen_cpus); | ||
276 | printk("Disabling non-boot CPUs ...\n"); | ||
277 | for_each_online_cpu(cpu) { | ||
278 | if (cpu == first_cpu) | ||
279 | continue; | ||
280 | error = _cpu_down(cpu); | ||
281 | if (!error) { | ||
282 | cpu_set(cpu, frozen_cpus); | ||
283 | printk("CPU%d is down\n", cpu); | ||
284 | } else { | ||
285 | printk(KERN_ERR "Error taking CPU%d down: %d\n", | ||
286 | cpu, error); | ||
287 | break; | ||
288 | } | ||
289 | } | ||
290 | if (!error) { | ||
291 | BUG_ON(num_online_cpus() > 1); | ||
292 | /* Make sure the CPUs won't be enabled by someone else */ | ||
293 | cpu_hotplug_disabled = 1; | ||
294 | } else { | ||
295 | printk(KERN_ERR "Non-boot CPUs are not disabled"); | ||
296 | } | ||
222 | out: | 297 | out: |
223 | mutex_unlock(&cpu_add_remove_lock); | 298 | mutex_unlock(&cpu_add_remove_lock); |
224 | return ret; | 299 | return error; |
300 | } | ||
301 | |||
302 | void enable_nonboot_cpus(void) | ||
303 | { | ||
304 | int cpu, error; | ||
305 | |||
306 | /* Allow everyone to use the CPU hotplug again */ | ||
307 | mutex_lock(&cpu_add_remove_lock); | ||
308 | cpu_hotplug_disabled = 0; | ||
309 | mutex_unlock(&cpu_add_remove_lock); | ||
310 | |||
311 | printk("Enabling non-boot CPUs ...\n"); | ||
312 | for_each_cpu_mask(cpu, frozen_cpus) { | ||
313 | error = cpu_up(cpu); | ||
314 | if (!error) { | ||
315 | printk("CPU%d is up\n", cpu); | ||
316 | continue; | ||
317 | } | ||
318 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", | ||
319 | cpu, error); | ||
320 | } | ||
321 | cpus_clear(frozen_cpus); | ||
225 | } | 322 | } |
323 | #endif | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4ea6f0dc2fc5..cff41511269f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
2245 | int i; | 2245 | int i; |
2246 | 2246 | ||
2247 | for (i = 0; zl->zones[i]; i++) { | 2247 | for (i = 0; zl->zones[i]; i++) { |
2248 | int nid = zl->zones[i]->zone_pgdat->node_id; | 2248 | int nid = zone_to_nid(zl->zones[i]); |
2249 | 2249 | ||
2250 | if (node_isset(nid, current->mems_allowed)) | 2250 | if (node_isset(nid, current->mems_allowed)) |
2251 | return 1; | 2251 | return 1; |
@@ -2316,9 +2316,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
2316 | const struct cpuset *cs; /* current cpuset ancestors */ | 2316 | const struct cpuset *cs; /* current cpuset ancestors */ |
2317 | int allowed; /* is allocation in zone z allowed? */ | 2317 | int allowed; /* is allocation in zone z allowed? */ |
2318 | 2318 | ||
2319 | if (in_interrupt()) | 2319 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) |
2320 | return 1; | 2320 | return 1; |
2321 | node = z->zone_pgdat->node_id; | 2321 | node = zone_to_nid(z); |
2322 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | 2322 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); |
2323 | if (node_isset(node, current->mems_allowed)) | 2323 | if (node_isset(node, current->mems_allowed)) |
2324 | return 1; | 2324 | return 1; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 48a53f68af96..4c6cdbaed661 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, | |||
154 | return retval; | 154 | return retval; |
155 | } | 155 | } |
156 | 156 | ||
157 | #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
157 | /** | 158 | /** |
158 | * __do_IRQ - original all in one highlevel IRQ handler | 159 | * __do_IRQ - original all in one highlevel IRQ handler |
159 | * @irq: the interrupt number | 160 | * @irq: the interrupt number |
@@ -253,6 +254,7 @@ out: | |||
253 | 254 | ||
254 | return 1; | 255 | return 1; |
255 | } | 256 | } |
257 | #endif | ||
256 | 258 | ||
257 | #ifdef CONFIG_TRACE_IRQFLAGS | 259 | #ifdef CONFIG_TRACE_IRQFLAGS |
258 | 260 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 1ed972070d19..825068ca3479 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -36,6 +36,17 @@ config PM_DEBUG | |||
36 | code. This is helpful when debugging and reporting various PM bugs, | 36 | code. This is helpful when debugging and reporting various PM bugs, |
37 | like suspend support. | 37 | like suspend support. |
38 | 38 | ||
39 | config DISABLE_CONSOLE_SUSPEND | ||
40 | bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" | ||
41 | depends on PM && PM_DEBUG | ||
42 | default n | ||
43 | ---help--- | ||
44 | This option turns off the console suspend mechanism that prevents | ||
45 | debug messages from reaching the console during the suspend/resume | ||
46 | operations. This may be helpful when debugging device drivers' | ||
47 | suspend/resume routines, but may itself lead to problems, for example | ||
48 | if netconsole is used. | ||
49 | |||
39 | config PM_TRACE | 50 | config PM_TRACE |
40 | bool "Suspend/resume event tracing" | 51 | bool "Suspend/resume event tracing" |
41 | depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL | 52 | depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 8d0af3d37a4b..38725f526afc 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -7,6 +7,4 @@ obj-y := main.o process.o console.o | |||
7 | obj-$(CONFIG_PM_LEGACY) += pm.o | 7 | obj-$(CONFIG_PM_LEGACY) += pm.o |
8 | obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o | 8 | obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o |
9 | 9 | ||
10 | obj-$(CONFIG_SUSPEND_SMP) += smp.o | ||
11 | |||
12 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 10 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index a3c34fb14321..d72234942798 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/pm.h> | 20 | #include <linux/pm.h> |
21 | #include <linux/cpu.h> | ||
21 | 22 | ||
22 | #include "power.h" | 23 | #include "power.h" |
23 | 24 | ||
@@ -72,7 +73,10 @@ static int prepare_processes(void) | |||
72 | int error; | 73 | int error; |
73 | 74 | ||
74 | pm_prepare_console(); | 75 | pm_prepare_console(); |
75 | disable_nonboot_cpus(); | 76 | |
77 | error = disable_nonboot_cpus(); | ||
78 | if (error) | ||
79 | goto enable_cpus; | ||
76 | 80 | ||
77 | if (freeze_processes()) { | 81 | if (freeze_processes()) { |
78 | error = -EBUSY; | 82 | error = -EBUSY; |
@@ -84,6 +88,7 @@ static int prepare_processes(void) | |||
84 | return 0; | 88 | return 0; |
85 | thaw: | 89 | thaw: |
86 | thaw_processes(); | 90 | thaw_processes(); |
91 | enable_cpus: | ||
87 | enable_nonboot_cpus(); | 92 | enable_nonboot_cpus(); |
88 | pm_restore_console(); | 93 | pm_restore_console(); |
89 | return error; | 94 | return error; |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c776794..873228c71dab 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
18 | #include <linux/console.h> | 18 | #include <linux/console.h> |
19 | #include <linux/cpu.h> | ||
20 | #include <linux/resume-trace.h> | ||
19 | 21 | ||
20 | #include "power.h" | 22 | #include "power.h" |
21 | 23 | ||
@@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops) | |||
51 | 53 | ||
52 | static int suspend_prepare(suspend_state_t state) | 54 | static int suspend_prepare(suspend_state_t state) |
53 | { | 55 | { |
54 | int error = 0; | 56 | int error; |
55 | unsigned int free_pages; | 57 | unsigned int free_pages; |
56 | 58 | ||
57 | if (!pm_ops || !pm_ops->enter) | 59 | if (!pm_ops || !pm_ops->enter) |
@@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state) | |||
59 | 61 | ||
60 | pm_prepare_console(); | 62 | pm_prepare_console(); |
61 | 63 | ||
62 | disable_nonboot_cpus(); | 64 | error = disable_nonboot_cpus(); |
63 | 65 | if (error) | |
64 | if (num_online_cpus() != 1) { | ||
65 | error = -EPERM; | ||
66 | goto Enable_cpu; | 66 | goto Enable_cpu; |
67 | } | ||
68 | 67 | ||
69 | if (freeze_processes()) { | 68 | if (freeze_processes()) { |
70 | error = -EAGAIN; | 69 | error = -EAGAIN; |
@@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n | |||
283 | 282 | ||
284 | power_attr(state); | 283 | power_attr(state); |
285 | 284 | ||
285 | #ifdef CONFIG_PM_TRACE | ||
286 | int pm_trace_enabled; | ||
287 | |||
288 | static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) | ||
289 | { | ||
290 | return sprintf(buf, "%d\n", pm_trace_enabled); | ||
291 | } | ||
292 | |||
293 | static ssize_t | ||
294 | pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) | ||
295 | { | ||
296 | int val; | ||
297 | |||
298 | if (sscanf(buf, "%d", &val) == 1) { | ||
299 | pm_trace_enabled = !!val; | ||
300 | return n; | ||
301 | } | ||
302 | return -EINVAL; | ||
303 | } | ||
304 | |||
305 | power_attr(pm_trace); | ||
306 | |||
307 | static struct attribute * g[] = { | ||
308 | &state_attr.attr, | ||
309 | &pm_trace_attr.attr, | ||
310 | NULL, | ||
311 | }; | ||
312 | #else | ||
286 | static struct attribute * g[] = { | 313 | static struct attribute * g[] = { |
287 | &state_attr.attr, | 314 | &state_attr.attr, |
288 | NULL, | 315 | NULL, |
289 | }; | 316 | }; |
317 | #endif /* CONFIG_PM_TRACE */ | ||
290 | 318 | ||
291 | static struct attribute_group attr_group = { | 319 | static struct attribute_group attr_group = { |
292 | .attrs = g, | 320 | .attrs = g, |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 57a792982fb9..bfe999f7b272 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -38,8 +38,6 @@ extern struct subsystem power_subsys; | |||
38 | /* References to section boundaries */ | 38 | /* References to section boundaries */ |
39 | extern const void __nosave_begin, __nosave_end; | 39 | extern const void __nosave_begin, __nosave_end; |
40 | 40 | ||
41 | extern struct pbe *pagedir_nosave; | ||
42 | |||
43 | /* Preferred image size in bytes (default 500 MB) */ | 41 | /* Preferred image size in bytes (default 500 MB) */ |
44 | extern unsigned long image_size; | 42 | extern unsigned long image_size; |
45 | extern int in_suspend; | 43 | extern int in_suspend; |
@@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void); | |||
50 | 48 | ||
51 | extern unsigned int count_data_pages(void); | 49 | extern unsigned int count_data_pages(void); |
52 | 50 | ||
51 | /** | ||
52 | * Auxiliary structure used for reading the snapshot image data and | ||
53 | * metadata from and writing them to the list of page backup entries | ||
54 | * (PBEs) which is the main data structure of swsusp. | ||
55 | * | ||
56 | * Using struct snapshot_handle we can transfer the image, including its | ||
57 | * metadata, as a continuous sequence of bytes with the help of | ||
58 | * snapshot_read_next() and snapshot_write_next(). | ||
59 | * | ||
60 | * The code that writes the image to a storage or transfers it to | ||
61 | * the user land is required to use snapshot_read_next() for this | ||
62 | * purpose and it should not make any assumptions regarding the internal | ||
63 | * structure of the image. Similarly, the code that reads the image from | ||
64 | * a storage or transfers it from the user land is required to use | ||
65 | * snapshot_write_next(). | ||
66 | * | ||
67 | * This may allow us to change the internal structure of the image | ||
68 | * in the future with considerably less effort. | ||
69 | */ | ||
70 | |||
53 | struct snapshot_handle { | 71 | struct snapshot_handle { |
54 | loff_t offset; | 72 | loff_t offset; /* number of the last byte ready for reading |
55 | unsigned int page; | 73 | * or writing in the sequence |
56 | unsigned int page_offset; | 74 | */ |
57 | unsigned int prev; | 75 | unsigned int cur; /* number of the block of PAGE_SIZE bytes the |
58 | struct pbe *pbe, *last_pbe; | 76 | * next operation will refer to (ie. current) |
59 | void *buffer; | 77 | */ |
60 | unsigned int buf_offset; | 78 | unsigned int cur_offset; /* offset with respect to the current |
79 | * block (for the next operation) | ||
80 | */ | ||
81 | unsigned int prev; /* number of the block of PAGE_SIZE bytes that | ||
82 | * was the current one previously | ||
83 | */ | ||
84 | void *buffer; /* address of the block to read from | ||
85 | * or write to | ||
86 | */ | ||
87 | unsigned int buf_offset; /* location to read from or write to, | ||
88 | * given as a displacement from 'buffer' | ||
89 | */ | ||
90 | int sync_read; /* Set to one to notify the caller of | ||
91 | * snapshot_write_next() that it may | ||
92 | * need to call wait_on_bio_chain() | ||
93 | */ | ||
61 | }; | 94 | }; |
62 | 95 | ||
96 | /* This macro returns the address from/to which the caller of | ||
97 | * snapshot_read_next()/snapshot_write_next() is allowed to | ||
98 | * read/write data after the function returns | ||
99 | */ | ||
63 | #define data_of(handle) ((handle).buffer + (handle).buf_offset) | 100 | #define data_of(handle) ((handle).buffer + (handle).buf_offset) |
64 | 101 | ||
102 | extern unsigned int snapshot_additional_pages(struct zone *zone); | ||
65 | extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); | 103 | extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); |
66 | extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); | 104 | extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); |
67 | int snapshot_image_loaded(struct snapshot_handle *handle); | 105 | extern int snapshot_image_loaded(struct snapshot_handle *handle); |
106 | extern void snapshot_free_unused_memory(struct snapshot_handle *handle); | ||
68 | 107 | ||
69 | #define SNAPSHOT_IOC_MAGIC '3' | 108 | #define SNAPSHOT_IOC_MAGIC '3' |
70 | #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) | 109 | #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) |
diff --git a/kernel/power/smp.c b/kernel/power/smp.c deleted file mode 100644 index 5957312b2d68..000000000000 --- a/kernel/power/smp.c +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | /* | ||
2 | * drivers/power/smp.c - Functions for stopping other CPUs. | ||
3 | * | ||
4 | * Copyright 2004 Pavel Machek <pavel@suse.cz> | ||
5 | * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | */ | ||
9 | |||
10 | #undef DEBUG | ||
11 | |||
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/suspend.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/cpu.h> | ||
17 | #include <asm/atomic.h> | ||
18 | #include <asm/tlbflush.h> | ||
19 | |||
20 | /* This is protected by pm_sem semaphore */ | ||
21 | static cpumask_t frozen_cpus; | ||
22 | |||
23 | void disable_nonboot_cpus(void) | ||
24 | { | ||
25 | int cpu, error; | ||
26 | |||
27 | error = 0; | ||
28 | cpus_clear(frozen_cpus); | ||
29 | printk("Freezing cpus ...\n"); | ||
30 | for_each_online_cpu(cpu) { | ||
31 | if (cpu == 0) | ||
32 | continue; | ||
33 | error = cpu_down(cpu); | ||
34 | if (!error) { | ||
35 | cpu_set(cpu, frozen_cpus); | ||
36 | printk("CPU%d is down\n", cpu); | ||
37 | continue; | ||
38 | } | ||
39 | printk("Error taking cpu %d down: %d\n", cpu, error); | ||
40 | } | ||
41 | BUG_ON(raw_smp_processor_id() != 0); | ||
42 | if (error) | ||
43 | panic("cpus not sleeping"); | ||
44 | } | ||
45 | |||
46 | void enable_nonboot_cpus(void) | ||
47 | { | ||
48 | int cpu, error; | ||
49 | |||
50 | printk("Thawing cpus ...\n"); | ||
51 | for_each_cpu_mask(cpu, frozen_cpus) { | ||
52 | error = cpu_up(cpu); | ||
53 | if (!error) { | ||
54 | printk("CPU%d is up\n", cpu); | ||
55 | continue; | ||
56 | } | ||
57 | printk("Error taking cpu %d up: %d\n", cpu, error); | ||
58 | panic("Not enough cpus"); | ||
59 | } | ||
60 | cpus_clear(frozen_cpus); | ||
61 | } | ||
62 | |||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 75d4886e648e..1b84313cbab5 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -34,10 +34,12 @@ | |||
34 | 34 | ||
35 | #include "power.h" | 35 | #include "power.h" |
36 | 36 | ||
37 | struct pbe *pagedir_nosave; | 37 | /* List of PBEs used for creating and restoring the suspend image */ |
38 | struct pbe *restore_pblist; | ||
39 | |||
38 | static unsigned int nr_copy_pages; | 40 | static unsigned int nr_copy_pages; |
39 | static unsigned int nr_meta_pages; | 41 | static unsigned int nr_meta_pages; |
40 | static unsigned long *buffer; | 42 | static void *buffer; |
41 | 43 | ||
42 | #ifdef CONFIG_HIGHMEM | 44 | #ifdef CONFIG_HIGHMEM |
43 | unsigned int count_highmem_pages(void) | 45 | unsigned int count_highmem_pages(void) |
@@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;} | |||
156 | static inline int restore_highmem(void) {return 0;} | 158 | static inline int restore_highmem(void) {return 0;} |
157 | #endif | 159 | #endif |
158 | 160 | ||
159 | static int pfn_is_nosave(unsigned long pfn) | 161 | /** |
162 | * @safe_needed - on resume, for storing the PBE list and the image, | ||
163 | * we can only use memory pages that do not conflict with the pages | ||
164 | * used before suspend. | ||
165 | * | ||
166 | * The unsafe pages are marked with the PG_nosave_free flag | ||
167 | * and we count them using unsafe_pages | ||
168 | */ | ||
169 | |||
170 | #define PG_ANY 0 | ||
171 | #define PG_SAFE 1 | ||
172 | #define PG_UNSAFE_CLEAR 1 | ||
173 | #define PG_UNSAFE_KEEP 0 | ||
174 | |||
175 | static unsigned int allocated_unsafe_pages; | ||
176 | |||
177 | static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | ||
160 | { | 178 | { |
161 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | 179 | void *res; |
162 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | 180 | |
163 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | 181 | res = (void *)get_zeroed_page(gfp_mask); |
182 | if (safe_needed) | ||
183 | while (res && PageNosaveFree(virt_to_page(res))) { | ||
184 | /* The page is unsafe, mark it for swsusp_free() */ | ||
185 | SetPageNosave(virt_to_page(res)); | ||
186 | allocated_unsafe_pages++; | ||
187 | res = (void *)get_zeroed_page(gfp_mask); | ||
188 | } | ||
189 | if (res) { | ||
190 | SetPageNosave(virt_to_page(res)); | ||
191 | SetPageNosaveFree(virt_to_page(res)); | ||
192 | } | ||
193 | return res; | ||
194 | } | ||
195 | |||
196 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
197 | { | ||
198 | return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); | ||
164 | } | 199 | } |
165 | 200 | ||
166 | /** | 201 | /** |
167 | * saveable - Determine whether a page should be cloned or not. | 202 | * free_image_page - free page represented by @addr, allocated with |
168 | * @pfn: The page | 203 | * alloc_image_page (page flags set by it must be cleared) |
169 | * | ||
170 | * We save a page if it's Reserved, and not in the range of pages | ||
171 | * statically defined as 'unsaveable', or if it isn't reserved, and | ||
172 | * isn't part of a free chunk of pages. | ||
173 | */ | 204 | */ |
174 | 205 | ||
175 | static int saveable(struct zone *zone, unsigned long *zone_pfn) | 206 | static inline void free_image_page(void *addr, int clear_nosave_free) |
176 | { | 207 | { |
177 | unsigned long pfn = *zone_pfn + zone->zone_start_pfn; | 208 | ClearPageNosave(virt_to_page(addr)); |
178 | struct page *page; | 209 | if (clear_nosave_free) |
210 | ClearPageNosaveFree(virt_to_page(addr)); | ||
211 | free_page((unsigned long)addr); | ||
212 | } | ||
179 | 213 | ||
180 | if (!pfn_valid(pfn)) | 214 | /* struct linked_page is used to build chains of pages */ |
181 | return 0; | ||
182 | 215 | ||
183 | page = pfn_to_page(pfn); | 216 | #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) |
184 | BUG_ON(PageReserved(page) && PageNosave(page)); | ||
185 | if (PageNosave(page)) | ||
186 | return 0; | ||
187 | if (PageReserved(page) && pfn_is_nosave(pfn)) | ||
188 | return 0; | ||
189 | if (PageNosaveFree(page)) | ||
190 | return 0; | ||
191 | 217 | ||
192 | return 1; | 218 | struct linked_page { |
193 | } | 219 | struct linked_page *next; |
220 | char data[LINKED_PAGE_DATA_SIZE]; | ||
221 | } __attribute__((packed)); | ||
194 | 222 | ||
195 | unsigned int count_data_pages(void) | 223 | static inline void |
224 | free_list_of_pages(struct linked_page *list, int clear_page_nosave) | ||
196 | { | 225 | { |
197 | struct zone *zone; | 226 | while (list) { |
198 | unsigned long zone_pfn; | 227 | struct linked_page *lp = list->next; |
199 | unsigned int n = 0; | ||
200 | 228 | ||
201 | for_each_zone (zone) { | 229 | free_image_page(list, clear_page_nosave); |
202 | if (is_highmem(zone)) | 230 | list = lp; |
203 | continue; | ||
204 | mark_free_pages(zone); | ||
205 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | ||
206 | n += saveable(zone, &zone_pfn); | ||
207 | } | 231 | } |
208 | return n; | ||
209 | } | 232 | } |
210 | 233 | ||
211 | static void copy_data_pages(struct pbe *pblist) | 234 | /** |
235 | * struct chain_allocator is used for allocating small objects out of | ||
236 | * a linked list of pages called 'the chain'. | ||
237 | * | ||
238 | * The chain grows each time when there is no room for a new object in | ||
239 | * the current page. The allocated objects cannot be freed individually. | ||
240 | * It is only possible to free them all at once, by freeing the entire | ||
241 | * chain. | ||
242 | * | ||
243 | * NOTE: The chain allocator may be inefficient if the allocated objects | ||
244 | * are not much smaller than PAGE_SIZE. | ||
245 | */ | ||
246 | |||
247 | struct chain_allocator { | ||
248 | struct linked_page *chain; /* the chain */ | ||
249 | unsigned int used_space; /* total size of objects allocated out | ||
250 | * of the current page | ||
251 | */ | ||
252 | gfp_t gfp_mask; /* mask for allocating pages */ | ||
253 | int safe_needed; /* if set, only "safe" pages are allocated */ | ||
254 | }; | ||
255 | |||
256 | static void | ||
257 | chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) | ||
212 | { | 258 | { |
213 | struct zone *zone; | 259 | ca->chain = NULL; |
214 | unsigned long zone_pfn; | 260 | ca->used_space = LINKED_PAGE_DATA_SIZE; |
215 | struct pbe *pbe, *p; | 261 | ca->gfp_mask = gfp_mask; |
262 | ca->safe_needed = safe_needed; | ||
263 | } | ||
216 | 264 | ||
217 | pbe = pblist; | 265 | static void *chain_alloc(struct chain_allocator *ca, unsigned int size) |
218 | for_each_zone (zone) { | 266 | { |
219 | if (is_highmem(zone)) | 267 | void *ret; |
220 | continue; | 268 | |
221 | mark_free_pages(zone); | 269 | if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { |
222 | /* This is necessary for swsusp_free() */ | 270 | struct linked_page *lp; |
223 | for_each_pb_page (p, pblist) | 271 | |
224 | SetPageNosaveFree(virt_to_page(p)); | 272 | lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); |
225 | for_each_pbe (p, pblist) | 273 | if (!lp) |
226 | SetPageNosaveFree(virt_to_page(p->address)); | 274 | return NULL; |
227 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | 275 | |
228 | if (saveable(zone, &zone_pfn)) { | 276 | lp->next = ca->chain; |
229 | struct page *page; | 277 | ca->chain = lp; |
230 | long *src, *dst; | 278 | ca->used_space = 0; |
231 | int n; | ||
232 | |||
233 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | ||
234 | BUG_ON(!pbe); | ||
235 | pbe->orig_address = (unsigned long)page_address(page); | ||
236 | /* copy_page and memcpy are not usable for copying task structs. */ | ||
237 | dst = (long *)pbe->address; | ||
238 | src = (long *)pbe->orig_address; | ||
239 | for (n = PAGE_SIZE / sizeof(long); n; n--) | ||
240 | *dst++ = *src++; | ||
241 | pbe = pbe->next; | ||
242 | } | ||
243 | } | ||
244 | } | 279 | } |
245 | BUG_ON(pbe); | 280 | ret = ca->chain->data + ca->used_space; |
281 | ca->used_space += size; | ||
282 | return ret; | ||
246 | } | 283 | } |
247 | 284 | ||
285 | static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | ||
286 | { | ||
287 | free_list_of_pages(ca->chain, clear_page_nosave); | ||
288 | memset(ca, 0, sizeof(struct chain_allocator)); | ||
289 | } | ||
248 | 290 | ||
249 | /** | 291 | /** |
250 | * free_pagedir - free pages allocated with alloc_pagedir() | 292 | * Data types related to memory bitmaps. |
293 | * | ||
294 | * Memory bitmap is a structure consiting of many linked lists of | ||
295 | * objects. The main list's elements are of type struct zone_bitmap | ||
296 | * and each of them corresonds to one zone. For each zone bitmap | ||
297 | * object there is a list of objects of type struct bm_block that | ||
298 | * represent each blocks of bit chunks in which information is | ||
299 | * stored. | ||
300 | * | ||
301 | * struct memory_bitmap contains a pointer to the main list of zone | ||
302 | * bitmap objects, a struct bm_position used for browsing the bitmap, | ||
303 | * and a pointer to the list of pages used for allocating all of the | ||
304 | * zone bitmap objects and bitmap block objects. | ||
305 | * | ||
306 | * NOTE: It has to be possible to lay out the bitmap in memory | ||
307 | * using only allocations of order 0. Additionally, the bitmap is | ||
308 | * designed to work with arbitrary number of zones (this is over the | ||
309 | * top for now, but let's avoid making unnecessary assumptions ;-). | ||
310 | * | ||
311 | * struct zone_bitmap contains a pointer to a list of bitmap block | ||
312 | * objects and a pointer to the bitmap block object that has been | ||
313 | * most recently used for setting bits. Additionally, it contains the | ||
314 | * pfns that correspond to the start and end of the represented zone. | ||
315 | * | ||
316 | * struct bm_block contains a pointer to the memory page in which | ||
317 | * information is stored (in the form of a block of bit chunks | ||
318 | * of type unsigned long each). It also contains the pfns that | ||
319 | * correspond to the start and end of the represented memory area and | ||
320 | * the number of bit chunks in the block. | ||
321 | * | ||
322 | * NOTE: Memory bitmaps are used for two types of operations only: | ||
323 | * "set a bit" and "find the next bit set". Moreover, the searching | ||
324 | * is always carried out after all of the "set a bit" operations | ||
325 | * on given bitmap. | ||
251 | */ | 326 | */ |
252 | 327 | ||
253 | static void free_pagedir(struct pbe *pblist, int clear_nosave_free) | 328 | #define BM_END_OF_MAP (~0UL) |
329 | |||
330 | #define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long)) | ||
331 | #define BM_BITS_PER_CHUNK (sizeof(long) << 3) | ||
332 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | ||
333 | |||
334 | struct bm_block { | ||
335 | struct bm_block *next; /* next element of the list */ | ||
336 | unsigned long start_pfn; /* pfn represented by the first bit */ | ||
337 | unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | ||
338 | unsigned int size; /* number of bit chunks */ | ||
339 | unsigned long *data; /* chunks of bits representing pages */ | ||
340 | }; | ||
341 | |||
342 | struct zone_bitmap { | ||
343 | struct zone_bitmap *next; /* next element of the list */ | ||
344 | unsigned long start_pfn; /* minimal pfn in this zone */ | ||
345 | unsigned long end_pfn; /* maximal pfn in this zone plus 1 */ | ||
346 | struct bm_block *bm_blocks; /* list of bitmap blocks */ | ||
347 | struct bm_block *cur_block; /* recently used bitmap block */ | ||
348 | }; | ||
349 | |||
350 | /* strcut bm_position is used for browsing memory bitmaps */ | ||
351 | |||
352 | struct bm_position { | ||
353 | struct zone_bitmap *zone_bm; | ||
354 | struct bm_block *block; | ||
355 | int chunk; | ||
356 | int bit; | ||
357 | }; | ||
358 | |||
359 | struct memory_bitmap { | ||
360 | struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ | ||
361 | struct linked_page *p_list; /* list of pages used to store zone | ||
362 | * bitmap objects and bitmap block | ||
363 | * objects | ||
364 | */ | ||
365 | struct bm_position cur; /* most recently used bit position */ | ||
366 | }; | ||
367 | |||
368 | /* Functions that operate on memory bitmaps */ | ||
369 | |||
370 | static inline void memory_bm_reset_chunk(struct memory_bitmap *bm) | ||
254 | { | 371 | { |
255 | struct pbe *pbe; | 372 | bm->cur.chunk = 0; |
373 | bm->cur.bit = -1; | ||
374 | } | ||
256 | 375 | ||
257 | while (pblist) { | 376 | static void memory_bm_position_reset(struct memory_bitmap *bm) |
258 | pbe = (pblist + PB_PAGE_SKIP)->next; | 377 | { |
259 | ClearPageNosave(virt_to_page(pblist)); | 378 | struct zone_bitmap *zone_bm; |
260 | if (clear_nosave_free) | 379 | |
261 | ClearPageNosaveFree(virt_to_page(pblist)); | 380 | zone_bm = bm->zone_bm_list; |
262 | free_page((unsigned long)pblist); | 381 | bm->cur.zone_bm = zone_bm; |
263 | pblist = pbe; | 382 | bm->cur.block = zone_bm->bm_blocks; |
264 | } | 383 | memory_bm_reset_chunk(bm); |
265 | } | 384 | } |
266 | 385 | ||
386 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | ||
387 | |||
267 | /** | 388 | /** |
268 | * fill_pb_page - Create a list of PBEs on a given memory page | 389 | * create_bm_block_list - create a list of block bitmap objects |
269 | */ | 390 | */ |
270 | 391 | ||
271 | static inline void fill_pb_page(struct pbe *pbpage) | 392 | static inline struct bm_block * |
393 | create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) | ||
272 | { | 394 | { |
273 | struct pbe *p; | 395 | struct bm_block *bblist = NULL; |
396 | |||
397 | while (nr_blocks-- > 0) { | ||
398 | struct bm_block *bb; | ||
274 | 399 | ||
275 | p = pbpage; | 400 | bb = chain_alloc(ca, sizeof(struct bm_block)); |
276 | pbpage += PB_PAGE_SKIP; | 401 | if (!bb) |
277 | do | 402 | return NULL; |
278 | p->next = p + 1; | 403 | |
279 | while (++p < pbpage); | 404 | bb->next = bblist; |
405 | bblist = bb; | ||
406 | } | ||
407 | return bblist; | ||
280 | } | 408 | } |
281 | 409 | ||
282 | /** | 410 | /** |
283 | * create_pbe_list - Create a list of PBEs on top of a given chain | 411 | * create_zone_bm_list - create a list of zone bitmap objects |
284 | * of memory pages allocated with alloc_pagedir() | ||
285 | */ | 412 | */ |
286 | 413 | ||
287 | static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) | 414 | static inline struct zone_bitmap * |
415 | create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) | ||
288 | { | 416 | { |
289 | struct pbe *pbpage, *p; | 417 | struct zone_bitmap *zbmlist = NULL; |
290 | unsigned int num = PBES_PER_PAGE; | ||
291 | 418 | ||
292 | for_each_pb_page (pbpage, pblist) { | 419 | while (nr_zones-- > 0) { |
293 | if (num >= nr_pages) | 420 | struct zone_bitmap *zbm; |
294 | break; | 421 | |
422 | zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); | ||
423 | if (!zbm) | ||
424 | return NULL; | ||
425 | |||
426 | zbm->next = zbmlist; | ||
427 | zbmlist = zbm; | ||
428 | } | ||
429 | return zbmlist; | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * memory_bm_create - allocate memory for a memory bitmap | ||
434 | */ | ||
435 | |||
436 | static int | ||
437 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | ||
438 | { | ||
439 | struct chain_allocator ca; | ||
440 | struct zone *zone; | ||
441 | struct zone_bitmap *zone_bm; | ||
442 | struct bm_block *bb; | ||
443 | unsigned int nr; | ||
444 | |||
445 | chain_init(&ca, gfp_mask, safe_needed); | ||
295 | 446 | ||
296 | fill_pb_page(pbpage); | 447 | /* Compute the number of zones */ |
297 | num += PBES_PER_PAGE; | 448 | nr = 0; |
449 | for_each_zone (zone) | ||
450 | if (populated_zone(zone) && !is_highmem(zone)) | ||
451 | nr++; | ||
452 | |||
453 | /* Allocate the list of zones bitmap objects */ | ||
454 | zone_bm = create_zone_bm_list(nr, &ca); | ||
455 | bm->zone_bm_list = zone_bm; | ||
456 | if (!zone_bm) { | ||
457 | chain_free(&ca, PG_UNSAFE_CLEAR); | ||
458 | return -ENOMEM; | ||
298 | } | 459 | } |
299 | if (pbpage) { | 460 | |
300 | for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) | 461 | /* Initialize the zone bitmap objects */ |
301 | p->next = p + 1; | 462 | for_each_zone (zone) { |
302 | p->next = NULL; | 463 | unsigned long pfn; |
464 | |||
465 | if (!populated_zone(zone) || is_highmem(zone)) | ||
466 | continue; | ||
467 | |||
468 | zone_bm->start_pfn = zone->zone_start_pfn; | ||
469 | zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
470 | /* Allocate the list of bitmap block objects */ | ||
471 | nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); | ||
472 | bb = create_bm_block_list(nr, &ca); | ||
473 | zone_bm->bm_blocks = bb; | ||
474 | zone_bm->cur_block = bb; | ||
475 | if (!bb) | ||
476 | goto Free; | ||
477 | |||
478 | nr = zone->spanned_pages; | ||
479 | pfn = zone->zone_start_pfn; | ||
480 | /* Initialize the bitmap block objects */ | ||
481 | while (bb) { | ||
482 | unsigned long *ptr; | ||
483 | |||
484 | ptr = alloc_image_page(gfp_mask, safe_needed); | ||
485 | bb->data = ptr; | ||
486 | if (!ptr) | ||
487 | goto Free; | ||
488 | |||
489 | bb->start_pfn = pfn; | ||
490 | if (nr >= BM_BITS_PER_BLOCK) { | ||
491 | pfn += BM_BITS_PER_BLOCK; | ||
492 | bb->size = BM_CHUNKS_PER_BLOCK; | ||
493 | nr -= BM_BITS_PER_BLOCK; | ||
494 | } else { | ||
495 | /* This is executed only once in the loop */ | ||
496 | pfn += nr; | ||
497 | bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK); | ||
498 | } | ||
499 | bb->end_pfn = pfn; | ||
500 | bb = bb->next; | ||
501 | } | ||
502 | zone_bm = zone_bm->next; | ||
303 | } | 503 | } |
504 | bm->p_list = ca.chain; | ||
505 | memory_bm_position_reset(bm); | ||
506 | return 0; | ||
507 | |||
508 | Free: | ||
509 | bm->p_list = ca.chain; | ||
510 | memory_bm_free(bm, PG_UNSAFE_CLEAR); | ||
511 | return -ENOMEM; | ||
304 | } | 512 | } |
305 | 513 | ||
306 | static unsigned int unsafe_pages; | 514 | /** |
515 | * memory_bm_free - free memory occupied by the memory bitmap @bm | ||
516 | */ | ||
517 | |||
518 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | ||
519 | { | ||
520 | struct zone_bitmap *zone_bm; | ||
521 | |||
522 | /* Free the list of bit blocks for each zone_bitmap object */ | ||
523 | zone_bm = bm->zone_bm_list; | ||
524 | while (zone_bm) { | ||
525 | struct bm_block *bb; | ||
526 | |||
527 | bb = zone_bm->bm_blocks; | ||
528 | while (bb) { | ||
529 | if (bb->data) | ||
530 | free_image_page(bb->data, clear_nosave_free); | ||
531 | bb = bb->next; | ||
532 | } | ||
533 | zone_bm = zone_bm->next; | ||
534 | } | ||
535 | free_list_of_pages(bm->p_list, clear_nosave_free); | ||
536 | bm->zone_bm_list = NULL; | ||
537 | } | ||
307 | 538 | ||
308 | /** | 539 | /** |
309 | * @safe_needed - on resume, for storing the PBE list and the image, | 540 | * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds |
310 | * we can only use memory pages that do not conflict with the pages | 541 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member |
311 | * used before suspend. | 542 | * of @bm->cur_zone_bm are updated. |
312 | * | 543 | * |
313 | * The unsafe pages are marked with the PG_nosave_free flag | 544 | * If the bit cannot be set, the function returns -EINVAL . |
314 | * and we count them using unsafe_pages | ||
315 | */ | 545 | */ |
316 | 546 | ||
317 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | 547 | static int |
548 | memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | ||
318 | { | 549 | { |
319 | void *res; | 550 | struct zone_bitmap *zone_bm; |
320 | 551 | struct bm_block *bb; | |
321 | res = (void *)get_zeroed_page(gfp_mask); | 552 | |
322 | if (safe_needed) | 553 | /* Check if the pfn is from the current zone */ |
323 | while (res && PageNosaveFree(virt_to_page(res))) { | 554 | zone_bm = bm->cur.zone_bm; |
324 | /* The page is unsafe, mark it for swsusp_free() */ | 555 | if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { |
325 | SetPageNosave(virt_to_page(res)); | 556 | zone_bm = bm->zone_bm_list; |
326 | unsafe_pages++; | 557 | /* We don't assume that the zones are sorted by pfns */ |
327 | res = (void *)get_zeroed_page(gfp_mask); | 558 | while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { |
559 | zone_bm = zone_bm->next; | ||
560 | if (unlikely(!zone_bm)) | ||
561 | return -EINVAL; | ||
328 | } | 562 | } |
329 | if (res) { | 563 | bm->cur.zone_bm = zone_bm; |
330 | SetPageNosave(virt_to_page(res)); | ||
331 | SetPageNosaveFree(virt_to_page(res)); | ||
332 | } | 564 | } |
333 | return res; | 565 | /* Check if the pfn corresponds to the current bitmap block */ |
566 | bb = zone_bm->cur_block; | ||
567 | if (pfn < bb->start_pfn) | ||
568 | bb = zone_bm->bm_blocks; | ||
569 | |||
570 | while (pfn >= bb->end_pfn) { | ||
571 | bb = bb->next; | ||
572 | if (unlikely(!bb)) | ||
573 | return -EINVAL; | ||
574 | } | ||
575 | zone_bm->cur_block = bb; | ||
576 | pfn -= bb->start_pfn; | ||
577 | set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); | ||
578 | return 0; | ||
334 | } | 579 | } |
335 | 580 | ||
336 | unsigned long get_safe_page(gfp_t gfp_mask) | 581 | /* Two auxiliary functions for memory_bm_next_pfn */ |
582 | |||
583 | /* Find the first set bit in the given chunk, if there is one */ | ||
584 | |||
585 | static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p) | ||
337 | { | 586 | { |
338 | return (unsigned long)alloc_image_page(gfp_mask, 1); | 587 | bit++; |
588 | while (bit < BM_BITS_PER_CHUNK) { | ||
589 | if (test_bit(bit, chunk_p)) | ||
590 | return bit; | ||
591 | |||
592 | bit++; | ||
593 | } | ||
594 | return -1; | ||
595 | } | ||
596 | |||
597 | /* Find a chunk containing some bits set in given block of bits */ | ||
598 | |||
599 | static inline int next_chunk_in_block(int n, struct bm_block *bb) | ||
600 | { | ||
601 | n++; | ||
602 | while (n < bb->size) { | ||
603 | if (bb->data[n]) | ||
604 | return n; | ||
605 | |||
606 | n++; | ||
607 | } | ||
608 | return -1; | ||
339 | } | 609 | } |
340 | 610 | ||
341 | /** | 611 | /** |
342 | * alloc_pagedir - Allocate the page directory. | 612 | * memory_bm_next_pfn - find the pfn that corresponds to the next set bit |
343 | * | 613 | * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is |
344 | * First, determine exactly how many pages we need and | 614 | * returned. |
345 | * allocate them. | ||
346 | * | 615 | * |
347 | * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE | 616 | * It is required to run memory_bm_position_reset() before the first call to |
348 | * struct pbe elements (pbes) and the last element in the page points | 617 | * this function. |
349 | * to the next page. | 618 | */ |
619 | |||
620 | static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | ||
621 | { | ||
622 | struct zone_bitmap *zone_bm; | ||
623 | struct bm_block *bb; | ||
624 | int chunk; | ||
625 | int bit; | ||
626 | |||
627 | do { | ||
628 | bb = bm->cur.block; | ||
629 | do { | ||
630 | chunk = bm->cur.chunk; | ||
631 | bit = bm->cur.bit; | ||
632 | do { | ||
633 | bit = next_bit_in_chunk(bit, bb->data + chunk); | ||
634 | if (bit >= 0) | ||
635 | goto Return_pfn; | ||
636 | |||
637 | chunk = next_chunk_in_block(chunk, bb); | ||
638 | bit = -1; | ||
639 | } while (chunk >= 0); | ||
640 | bb = bb->next; | ||
641 | bm->cur.block = bb; | ||
642 | memory_bm_reset_chunk(bm); | ||
643 | } while (bb); | ||
644 | zone_bm = bm->cur.zone_bm->next; | ||
645 | if (zone_bm) { | ||
646 | bm->cur.zone_bm = zone_bm; | ||
647 | bm->cur.block = zone_bm->bm_blocks; | ||
648 | memory_bm_reset_chunk(bm); | ||
649 | } | ||
650 | } while (zone_bm); | ||
651 | memory_bm_position_reset(bm); | ||
652 | return BM_END_OF_MAP; | ||
653 | |||
654 | Return_pfn: | ||
655 | bm->cur.chunk = chunk; | ||
656 | bm->cur.bit = bit; | ||
657 | return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; | ||
658 | } | ||
659 | |||
660 | /** | ||
661 | * snapshot_additional_pages - estimate the number of additional pages | ||
662 | * be needed for setting up the suspend image data structures for given | ||
663 | * zone (usually the returned value is greater than the exact number) | ||
664 | */ | ||
665 | |||
666 | unsigned int snapshot_additional_pages(struct zone *zone) | ||
667 | { | ||
668 | unsigned int res; | ||
669 | |||
670 | res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); | ||
671 | res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); | ||
672 | return res; | ||
673 | } | ||
674 | |||
675 | /** | ||
676 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
677 | */ | ||
678 | |||
679 | static inline int pfn_is_nosave(unsigned long pfn) | ||
680 | { | ||
681 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | ||
682 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | ||
683 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
684 | } | ||
685 | |||
686 | /** | ||
687 | * saveable - Determine whether a page should be cloned or not. | ||
688 | * @pfn: The page | ||
350 | * | 689 | * |
351 | * On each page we set up a list of struct_pbe elements. | 690 | * We save a page if it isn't Nosave, and is not in the range of pages |
691 | * statically defined as 'unsaveable', and it | ||
692 | * isn't a part of a free chunk of pages. | ||
352 | */ | 693 | */ |
353 | 694 | ||
354 | static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, | 695 | static struct page *saveable_page(unsigned long pfn) |
355 | int safe_needed) | ||
356 | { | 696 | { |
357 | unsigned int num; | 697 | struct page *page; |
358 | struct pbe *pblist, *pbe; | 698 | |
699 | if (!pfn_valid(pfn)) | ||
700 | return NULL; | ||
359 | 701 | ||
360 | if (!nr_pages) | 702 | page = pfn_to_page(pfn); |
703 | |||
704 | if (PageNosave(page)) | ||
705 | return NULL; | ||
706 | if (PageReserved(page) && pfn_is_nosave(pfn)) | ||
361 | return NULL; | 707 | return NULL; |
708 | if (PageNosaveFree(page)) | ||
709 | return NULL; | ||
710 | |||
711 | return page; | ||
712 | } | ||
713 | |||
714 | unsigned int count_data_pages(void) | ||
715 | { | ||
716 | struct zone *zone; | ||
717 | unsigned long pfn, max_zone_pfn; | ||
718 | unsigned int n = 0; | ||
362 | 719 | ||
363 | pblist = alloc_image_page(gfp_mask, safe_needed); | 720 | for_each_zone (zone) { |
364 | /* FIXME: rewrite this ugly loop */ | 721 | if (is_highmem(zone)) |
365 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | 722 | continue; |
366 | pbe = pbe->next, num += PBES_PER_PAGE) { | 723 | mark_free_pages(zone); |
367 | pbe += PB_PAGE_SKIP; | 724 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
368 | pbe->next = alloc_image_page(gfp_mask, safe_needed); | 725 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
726 | n += !!saveable_page(pfn); | ||
369 | } | 727 | } |
370 | if (!pbe) { /* get_zeroed_page() failed */ | 728 | return n; |
371 | free_pagedir(pblist, 1); | 729 | } |
372 | pblist = NULL; | 730 | |
373 | } else | 731 | static inline void copy_data_page(long *dst, long *src) |
374 | create_pbe_list(pblist, nr_pages); | 732 | { |
375 | return pblist; | 733 | int n; |
734 | |||
735 | /* copy_page and memcpy are not usable for copying task structs. */ | ||
736 | for (n = PAGE_SIZE / sizeof(long); n; n--) | ||
737 | *dst++ = *src++; | ||
738 | } | ||
739 | |||
740 | static void | ||
741 | copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) | ||
742 | { | ||
743 | struct zone *zone; | ||
744 | unsigned long pfn; | ||
745 | |||
746 | for_each_zone (zone) { | ||
747 | unsigned long max_zone_pfn; | ||
748 | |||
749 | if (is_highmem(zone)) | ||
750 | continue; | ||
751 | |||
752 | mark_free_pages(zone); | ||
753 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
754 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | ||
755 | if (saveable_page(pfn)) | ||
756 | memory_bm_set_bit(orig_bm, pfn); | ||
757 | } | ||
758 | memory_bm_position_reset(orig_bm); | ||
759 | memory_bm_position_reset(copy_bm); | ||
760 | do { | ||
761 | pfn = memory_bm_next_pfn(orig_bm); | ||
762 | if (likely(pfn != BM_END_OF_MAP)) { | ||
763 | struct page *page; | ||
764 | void *src; | ||
765 | |||
766 | page = pfn_to_page(pfn); | ||
767 | src = page_address(page); | ||
768 | page = pfn_to_page(memory_bm_next_pfn(copy_bm)); | ||
769 | copy_data_page(page_address(page), src); | ||
770 | } | ||
771 | } while (pfn != BM_END_OF_MAP); | ||
376 | } | 772 | } |
377 | 773 | ||
378 | /** | 774 | /** |
379 | * Free pages we allocated for suspend. Suspend pages are alocated | 775 | * swsusp_free - free pages allocated for the suspend. |
380 | * before atomic copy, so we need to free them after resume. | 776 | * |
777 | * Suspend pages are alocated before the atomic copy is made, so we | ||
778 | * need to release them after the resume. | ||
381 | */ | 779 | */ |
382 | 780 | ||
383 | void swsusp_free(void) | 781 | void swsusp_free(void) |
384 | { | 782 | { |
385 | struct zone *zone; | 783 | struct zone *zone; |
386 | unsigned long zone_pfn; | 784 | unsigned long pfn, max_zone_pfn; |
387 | 785 | ||
388 | for_each_zone(zone) { | 786 | for_each_zone(zone) { |
389 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 787 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
390 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { | 788 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
391 | struct page *page; | 789 | if (pfn_valid(pfn)) { |
392 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | 790 | struct page *page = pfn_to_page(pfn); |
791 | |||
393 | if (PageNosave(page) && PageNosaveFree(page)) { | 792 | if (PageNosave(page) && PageNosaveFree(page)) { |
394 | ClearPageNosave(page); | 793 | ClearPageNosave(page); |
395 | ClearPageNosaveFree(page); | 794 | ClearPageNosaveFree(page); |
@@ -399,7 +798,7 @@ void swsusp_free(void) | |||
399 | } | 798 | } |
400 | nr_copy_pages = 0; | 799 | nr_copy_pages = 0; |
401 | nr_meta_pages = 0; | 800 | nr_meta_pages = 0; |
402 | pagedir_nosave = NULL; | 801 | restore_pblist = NULL; |
403 | buffer = NULL; | 802 | buffer = NULL; |
404 | } | 803 | } |
405 | 804 | ||
@@ -414,46 +813,57 @@ void swsusp_free(void) | |||
414 | static int enough_free_mem(unsigned int nr_pages) | 813 | static int enough_free_mem(unsigned int nr_pages) |
415 | { | 814 | { |
416 | struct zone *zone; | 815 | struct zone *zone; |
417 | unsigned int n = 0; | 816 | unsigned int free = 0, meta = 0; |
418 | 817 | ||
419 | for_each_zone (zone) | 818 | for_each_zone (zone) |
420 | if (!is_highmem(zone)) | 819 | if (!is_highmem(zone)) { |
421 | n += zone->free_pages; | 820 | free += zone->free_pages; |
422 | pr_debug("swsusp: available memory: %u pages\n", n); | 821 | meta += snapshot_additional_pages(zone); |
423 | return n > (nr_pages + PAGES_FOR_IO + | 822 | } |
424 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
425 | } | ||
426 | 823 | ||
427 | static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) | 824 | pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", |
428 | { | 825 | nr_pages, PAGES_FOR_IO, meta, free); |
429 | struct pbe *p; | ||
430 | 826 | ||
431 | for_each_pbe (p, pblist) { | 827 | return free > nr_pages + PAGES_FOR_IO + meta; |
432 | p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); | ||
433 | if (!p->address) | ||
434 | return -ENOMEM; | ||
435 | } | ||
436 | return 0; | ||
437 | } | 828 | } |
438 | 829 | ||
439 | static struct pbe *swsusp_alloc(unsigned int nr_pages) | 830 | static int |
831 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | ||
832 | unsigned int nr_pages) | ||
440 | { | 833 | { |
441 | struct pbe *pblist; | 834 | int error; |
442 | 835 | ||
443 | if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { | 836 | error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); |
444 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); | 837 | if (error) |
445 | return NULL; | 838 | goto Free; |
446 | } | ||
447 | 839 | ||
448 | if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { | 840 | error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); |
449 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | 841 | if (error) |
450 | swsusp_free(); | 842 | goto Free; |
451 | return NULL; | 843 | |
844 | while (nr_pages-- > 0) { | ||
845 | struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); | ||
846 | if (!page) | ||
847 | goto Free; | ||
848 | |||
849 | SetPageNosave(page); | ||
850 | SetPageNosaveFree(page); | ||
851 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); | ||
452 | } | 852 | } |
853 | return 0; | ||
453 | 854 | ||
454 | return pblist; | 855 | Free: |
856 | swsusp_free(); | ||
857 | return -ENOMEM; | ||
455 | } | 858 | } |
456 | 859 | ||
860 | /* Memory bitmap used for marking saveable pages */ | ||
861 | static struct memory_bitmap orig_bm; | ||
862 | /* Memory bitmap used for marking allocated pages that will contain the copies | ||
863 | * of saveable pages | ||
864 | */ | ||
865 | static struct memory_bitmap copy_bm; | ||
866 | |||
457 | asmlinkage int swsusp_save(void) | 867 | asmlinkage int swsusp_save(void) |
458 | { | 868 | { |
459 | unsigned int nr_pages; | 869 | unsigned int nr_pages; |
@@ -464,25 +874,19 @@ asmlinkage int swsusp_save(void) | |||
464 | nr_pages = count_data_pages(); | 874 | nr_pages = count_data_pages(); |
465 | printk("swsusp: Need to copy %u pages\n", nr_pages); | 875 | printk("swsusp: Need to copy %u pages\n", nr_pages); |
466 | 876 | ||
467 | pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n", | ||
468 | nr_pages, | ||
469 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, | ||
470 | PAGES_FOR_IO, nr_free_pages()); | ||
471 | |||
472 | if (!enough_free_mem(nr_pages)) { | 877 | if (!enough_free_mem(nr_pages)) { |
473 | printk(KERN_ERR "swsusp: Not enough free memory\n"); | 878 | printk(KERN_ERR "swsusp: Not enough free memory\n"); |
474 | return -ENOMEM; | 879 | return -ENOMEM; |
475 | } | 880 | } |
476 | 881 | ||
477 | pagedir_nosave = swsusp_alloc(nr_pages); | 882 | if (swsusp_alloc(&orig_bm, ©_bm, nr_pages)) |
478 | if (!pagedir_nosave) | ||
479 | return -ENOMEM; | 883 | return -ENOMEM; |
480 | 884 | ||
481 | /* During allocating of suspend pagedir, new cold pages may appear. | 885 | /* During allocating of suspend pagedir, new cold pages may appear. |
482 | * Kill them. | 886 | * Kill them. |
483 | */ | 887 | */ |
484 | drain_local_pages(); | 888 | drain_local_pages(); |
485 | copy_data_pages(pagedir_nosave); | 889 | copy_data_pages(©_bm, &orig_bm); |
486 | 890 | ||
487 | /* | 891 | /* |
488 | * End of critical section. From now on, we can write to memory, | 892 | * End of critical section. From now on, we can write to memory, |
@@ -511,22 +915,20 @@ static void init_header(struct swsusp_info *info) | |||
511 | } | 915 | } |
512 | 916 | ||
513 | /** | 917 | /** |
514 | * pack_orig_addresses - the .orig_address fields of the PBEs from the | 918 | * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm |
515 | * list starting at @pbe are stored in the array @buf[] (1 page) | 919 | * are stored in the array @buf[] (1 page at a time) |
516 | */ | 920 | */ |
517 | 921 | ||
518 | static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe) | 922 | static inline void |
923 | pack_pfns(unsigned long *buf, struct memory_bitmap *bm) | ||
519 | { | 924 | { |
520 | int j; | 925 | int j; |
521 | 926 | ||
522 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { | 927 | for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { |
523 | buf[j] = pbe->orig_address; | 928 | buf[j] = memory_bm_next_pfn(bm); |
524 | pbe = pbe->next; | 929 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
930 | break; | ||
525 | } | 931 | } |
526 | if (!pbe) | ||
527 | for (; j < PAGE_SIZE / sizeof(long); j++) | ||
528 | buf[j] = 0; | ||
529 | return pbe; | ||
530 | } | 932 | } |
531 | 933 | ||
532 | /** | 934 | /** |
@@ -553,37 +955,39 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb | |||
553 | 955 | ||
554 | int snapshot_read_next(struct snapshot_handle *handle, size_t count) | 956 | int snapshot_read_next(struct snapshot_handle *handle, size_t count) |
555 | { | 957 | { |
556 | if (handle->page > nr_meta_pages + nr_copy_pages) | 958 | if (handle->cur > nr_meta_pages + nr_copy_pages) |
557 | return 0; | 959 | return 0; |
960 | |||
558 | if (!buffer) { | 961 | if (!buffer) { |
559 | /* This makes the buffer be freed by swsusp_free() */ | 962 | /* This makes the buffer be freed by swsusp_free() */ |
560 | buffer = alloc_image_page(GFP_ATOMIC, 0); | 963 | buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); |
561 | if (!buffer) | 964 | if (!buffer) |
562 | return -ENOMEM; | 965 | return -ENOMEM; |
563 | } | 966 | } |
564 | if (!handle->offset) { | 967 | if (!handle->offset) { |
565 | init_header((struct swsusp_info *)buffer); | 968 | init_header((struct swsusp_info *)buffer); |
566 | handle->buffer = buffer; | 969 | handle->buffer = buffer; |
567 | handle->pbe = pagedir_nosave; | 970 | memory_bm_position_reset(&orig_bm); |
971 | memory_bm_position_reset(©_bm); | ||
568 | } | 972 | } |
569 | if (handle->prev < handle->page) { | 973 | if (handle->prev < handle->cur) { |
570 | if (handle->page <= nr_meta_pages) { | 974 | if (handle->cur <= nr_meta_pages) { |
571 | handle->pbe = pack_orig_addresses(buffer, handle->pbe); | 975 | memset(buffer, 0, PAGE_SIZE); |
572 | if (!handle->pbe) | 976 | pack_pfns(buffer, &orig_bm); |
573 | handle->pbe = pagedir_nosave; | ||
574 | } else { | 977 | } else { |
575 | handle->buffer = (void *)handle->pbe->address; | 978 | unsigned long pfn = memory_bm_next_pfn(©_bm); |
576 | handle->pbe = handle->pbe->next; | 979 | |
980 | handle->buffer = page_address(pfn_to_page(pfn)); | ||
577 | } | 981 | } |
578 | handle->prev = handle->page; | 982 | handle->prev = handle->cur; |
579 | } | 983 | } |
580 | handle->buf_offset = handle->page_offset; | 984 | handle->buf_offset = handle->cur_offset; |
581 | if (handle->page_offset + count >= PAGE_SIZE) { | 985 | if (handle->cur_offset + count >= PAGE_SIZE) { |
582 | count = PAGE_SIZE - handle->page_offset; | 986 | count = PAGE_SIZE - handle->cur_offset; |
583 | handle->page_offset = 0; | 987 | handle->cur_offset = 0; |
584 | handle->page++; | 988 | handle->cur++; |
585 | } else { | 989 | } else { |
586 | handle->page_offset += count; | 990 | handle->cur_offset += count; |
587 | } | 991 | } |
588 | handle->offset += count; | 992 | handle->offset += count; |
589 | return count; | 993 | return count; |
@@ -595,47 +999,50 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |||
595 | * had been used before suspend | 999 | * had been used before suspend |
596 | */ | 1000 | */ |
597 | 1001 | ||
598 | static int mark_unsafe_pages(struct pbe *pblist) | 1002 | static int mark_unsafe_pages(struct memory_bitmap *bm) |
599 | { | 1003 | { |
600 | struct zone *zone; | 1004 | struct zone *zone; |
601 | unsigned long zone_pfn; | 1005 | unsigned long pfn, max_zone_pfn; |
602 | struct pbe *p; | ||
603 | |||
604 | if (!pblist) /* a sanity check */ | ||
605 | return -EINVAL; | ||
606 | 1006 | ||
607 | /* Clear page flags */ | 1007 | /* Clear page flags */ |
608 | for_each_zone (zone) { | 1008 | for_each_zone (zone) { |
609 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 1009 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
610 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | 1010 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
611 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | 1011 | if (pfn_valid(pfn)) |
612 | zone->zone_start_pfn)); | 1012 | ClearPageNosaveFree(pfn_to_page(pfn)); |
613 | } | 1013 | } |
614 | 1014 | ||
615 | /* Mark orig addresses */ | 1015 | /* Mark pages that correspond to the "original" pfns as "unsafe" */ |
616 | for_each_pbe (p, pblist) { | 1016 | memory_bm_position_reset(bm); |
617 | if (virt_addr_valid(p->orig_address)) | 1017 | do { |
618 | SetPageNosaveFree(virt_to_page(p->orig_address)); | 1018 | pfn = memory_bm_next_pfn(bm); |
619 | else | 1019 | if (likely(pfn != BM_END_OF_MAP)) { |
620 | return -EFAULT; | 1020 | if (likely(pfn_valid(pfn))) |
621 | } | 1021 | SetPageNosaveFree(pfn_to_page(pfn)); |
1022 | else | ||
1023 | return -EFAULT; | ||
1024 | } | ||
1025 | } while (pfn != BM_END_OF_MAP); | ||
622 | 1026 | ||
623 | unsafe_pages = 0; | 1027 | allocated_unsafe_pages = 0; |
624 | 1028 | ||
625 | return 0; | 1029 | return 0; |
626 | } | 1030 | } |
627 | 1031 | ||
628 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) | 1032 | static void |
1033 | duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) | ||
629 | { | 1034 | { |
630 | /* We assume both lists contain the same number of elements */ | 1035 | unsigned long pfn; |
631 | while (src) { | 1036 | |
632 | dst->orig_address = src->orig_address; | 1037 | memory_bm_position_reset(src); |
633 | dst = dst->next; | 1038 | pfn = memory_bm_next_pfn(src); |
634 | src = src->next; | 1039 | while (pfn != BM_END_OF_MAP) { |
1040 | memory_bm_set_bit(dst, pfn); | ||
1041 | pfn = memory_bm_next_pfn(src); | ||
635 | } | 1042 | } |
636 | } | 1043 | } |
637 | 1044 | ||
638 | static int check_header(struct swsusp_info *info) | 1045 | static inline int check_header(struct swsusp_info *info) |
639 | { | 1046 | { |
640 | char *reason = NULL; | 1047 | char *reason = NULL; |
641 | 1048 | ||
@@ -662,19 +1069,14 @@ static int check_header(struct swsusp_info *info) | |||
662 | * load header - check the image header and copy data from it | 1069 | * load header - check the image header and copy data from it |
663 | */ | 1070 | */ |
664 | 1071 | ||
665 | static int load_header(struct snapshot_handle *handle, | 1072 | static int |
666 | struct swsusp_info *info) | 1073 | load_header(struct swsusp_info *info) |
667 | { | 1074 | { |
668 | int error; | 1075 | int error; |
669 | struct pbe *pblist; | ||
670 | 1076 | ||
1077 | restore_pblist = NULL; | ||
671 | error = check_header(info); | 1078 | error = check_header(info); |
672 | if (!error) { | 1079 | if (!error) { |
673 | pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0); | ||
674 | if (!pblist) | ||
675 | return -ENOMEM; | ||
676 | pagedir_nosave = pblist; | ||
677 | handle->pbe = pblist; | ||
678 | nr_copy_pages = info->image_pages; | 1080 | nr_copy_pages = info->image_pages; |
679 | nr_meta_pages = info->pages - info->image_pages - 1; | 1081 | nr_meta_pages = info->pages - info->image_pages - 1; |
680 | } | 1082 | } |
@@ -682,113 +1084,137 @@ static int load_header(struct snapshot_handle *handle, | |||
682 | } | 1084 | } |
683 | 1085 | ||
684 | /** | 1086 | /** |
685 | * unpack_orig_addresses - copy the elements of @buf[] (1 page) to | 1087 | * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set |
686 | * the PBEs in the list starting at @pbe | 1088 | * the corresponding bit in the memory bitmap @bm |
687 | */ | 1089 | */ |
688 | 1090 | ||
689 | static inline struct pbe *unpack_orig_addresses(unsigned long *buf, | 1091 | static inline void |
690 | struct pbe *pbe) | 1092 | unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) |
691 | { | 1093 | { |
692 | int j; | 1094 | int j; |
693 | 1095 | ||
694 | for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { | 1096 | for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { |
695 | pbe->orig_address = buf[j]; | 1097 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
696 | pbe = pbe->next; | 1098 | break; |
1099 | |||
1100 | memory_bm_set_bit(bm, buf[j]); | ||
697 | } | 1101 | } |
698 | return pbe; | ||
699 | } | 1102 | } |
700 | 1103 | ||
701 | /** | 1104 | /** |
702 | * prepare_image - use metadata contained in the PBE list | 1105 | * prepare_image - use the memory bitmap @bm to mark the pages that will |
703 | * pointed to by pagedir_nosave to mark the pages that will | 1106 | * be overwritten in the process of restoring the system memory state |
704 | * be overwritten in the process of restoring the system | 1107 | * from the suspend image ("unsafe" pages) and allocate memory for the |
705 | * memory state from the image ("unsafe" pages) and allocate | 1108 | * image. |
706 | * memory for the image | ||
707 | * | 1109 | * |
708 | * The idea is to allocate the PBE list first and then | 1110 | * The idea is to allocate a new memory bitmap first and then allocate |
709 | * allocate as many pages as it's needed for the image data, | 1111 | * as many pages as needed for the image data, but not to assign these |
710 | * but not to assign these pages to the PBEs initially. | 1112 | * pages to specific tasks initially. Instead, we just mark them as |
711 | * Instead, we just mark them as allocated and create a list | 1113 | * allocated and create a list of "safe" pages that will be used later. |
712 | * of "safe" which will be used later | ||
713 | */ | 1114 | */ |
714 | 1115 | ||
715 | struct safe_page { | 1116 | #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) |
716 | struct safe_page *next; | ||
717 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
718 | }; | ||
719 | 1117 | ||
720 | static struct safe_page *safe_pages; | 1118 | static struct linked_page *safe_pages_list; |
721 | 1119 | ||
722 | static int prepare_image(struct snapshot_handle *handle) | 1120 | static int |
1121 | prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | ||
723 | { | 1122 | { |
724 | int error = 0; | 1123 | unsigned int nr_pages; |
725 | unsigned int nr_pages = nr_copy_pages; | 1124 | struct linked_page *sp_list, *lp; |
726 | struct pbe *p, *pblist = NULL; | 1125 | int error; |
727 | 1126 | ||
728 | p = pagedir_nosave; | 1127 | error = mark_unsafe_pages(bm); |
729 | error = mark_unsafe_pages(p); | 1128 | if (error) |
730 | if (!error) { | 1129 | goto Free; |
731 | pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); | 1130 | |
732 | if (pblist) | 1131 | error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); |
733 | copy_page_backup_list(pblist, p); | 1132 | if (error) |
734 | free_pagedir(p, 0); | 1133 | goto Free; |
735 | if (!pblist) | 1134 | |
1135 | duplicate_memory_bitmap(new_bm, bm); | ||
1136 | memory_bm_free(bm, PG_UNSAFE_KEEP); | ||
1137 | /* Reserve some safe pages for potential later use. | ||
1138 | * | ||
1139 | * NOTE: This way we make sure there will be enough safe pages for the | ||
1140 | * chain_alloc() in get_buffer(). It is a bit wasteful, but | ||
1141 | * nr_copy_pages cannot be greater than 50% of the memory anyway. | ||
1142 | */ | ||
1143 | sp_list = NULL; | ||
1144 | /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ | ||
1145 | nr_pages = nr_copy_pages - allocated_unsafe_pages; | ||
1146 | nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); | ||
1147 | while (nr_pages > 0) { | ||
1148 | lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); | ||
1149 | if (!lp) { | ||
736 | error = -ENOMEM; | 1150 | error = -ENOMEM; |
1151 | goto Free; | ||
1152 | } | ||
1153 | lp->next = sp_list; | ||
1154 | sp_list = lp; | ||
1155 | nr_pages--; | ||
737 | } | 1156 | } |
738 | safe_pages = NULL; | 1157 | /* Preallocate memory for the image */ |
739 | if (!error && nr_pages > unsafe_pages) { | 1158 | safe_pages_list = NULL; |
740 | nr_pages -= unsafe_pages; | 1159 | nr_pages = nr_copy_pages - allocated_unsafe_pages; |
741 | while (nr_pages--) { | 1160 | while (nr_pages > 0) { |
742 | struct safe_page *ptr; | 1161 | lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); |
743 | 1162 | if (!lp) { | |
744 | ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); | 1163 | error = -ENOMEM; |
745 | if (!ptr) { | 1164 | goto Free; |
746 | error = -ENOMEM; | 1165 | } |
747 | break; | 1166 | if (!PageNosaveFree(virt_to_page(lp))) { |
748 | } | 1167 | /* The page is "safe", add it to the list */ |
749 | if (!PageNosaveFree(virt_to_page(ptr))) { | 1168 | lp->next = safe_pages_list; |
750 | /* The page is "safe", add it to the list */ | 1169 | safe_pages_list = lp; |
751 | ptr->next = safe_pages; | ||
752 | safe_pages = ptr; | ||
753 | } | ||
754 | /* Mark the page as allocated */ | ||
755 | SetPageNosave(virt_to_page(ptr)); | ||
756 | SetPageNosaveFree(virt_to_page(ptr)); | ||
757 | } | 1170 | } |
1171 | /* Mark the page as allocated */ | ||
1172 | SetPageNosave(virt_to_page(lp)); | ||
1173 | SetPageNosaveFree(virt_to_page(lp)); | ||
1174 | nr_pages--; | ||
758 | } | 1175 | } |
759 | if (!error) { | 1176 | /* Free the reserved safe pages so that chain_alloc() can use them */ |
760 | pagedir_nosave = pblist; | 1177 | while (sp_list) { |
761 | } else { | 1178 | lp = sp_list->next; |
762 | handle->pbe = NULL; | 1179 | free_image_page(sp_list, PG_UNSAFE_CLEAR); |
763 | swsusp_free(); | 1180 | sp_list = lp; |
764 | } | 1181 | } |
1182 | return 0; | ||
1183 | |||
1184 | Free: | ||
1185 | swsusp_free(); | ||
765 | return error; | 1186 | return error; |
766 | } | 1187 | } |
767 | 1188 | ||
768 | static void *get_buffer(struct snapshot_handle *handle) | 1189 | /** |
1190 | * get_buffer - compute the address that snapshot_write_next() should | ||
1191 | * set for its caller to write to. | ||
1192 | */ | ||
1193 | |||
1194 | static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | ||
769 | { | 1195 | { |
770 | struct pbe *pbe = handle->pbe, *last = handle->last_pbe; | 1196 | struct pbe *pbe; |
771 | struct page *page = virt_to_page(pbe->orig_address); | 1197 | struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); |
772 | 1198 | ||
773 | if (PageNosave(page) && PageNosaveFree(page)) { | 1199 | if (PageNosave(page) && PageNosaveFree(page)) |
774 | /* | 1200 | /* We have allocated the "original" page frame and we can |
775 | * We have allocated the "original" page frame and we can | 1201 | * use it directly to store the loaded page. |
776 | * use it directly to store the read page | ||
777 | */ | 1202 | */ |
778 | pbe->address = 0; | 1203 | return page_address(page); |
779 | if (last && last->next) | 1204 | |
780 | last->next = NULL; | 1205 | /* The "original" page frame has not been allocated and we have to |
781 | return (void *)pbe->orig_address; | 1206 | * use a "safe" page frame to store the loaded page. |
782 | } | ||
783 | /* | ||
784 | * The "original" page frame has not been allocated and we have to | ||
785 | * use a "safe" page frame to store the read page | ||
786 | */ | 1207 | */ |
787 | pbe->address = (unsigned long)safe_pages; | 1208 | pbe = chain_alloc(ca, sizeof(struct pbe)); |
788 | safe_pages = safe_pages->next; | 1209 | if (!pbe) { |
789 | if (last) | 1210 | swsusp_free(); |
790 | last->next = pbe; | 1211 | return NULL; |
791 | handle->last_pbe = pbe; | 1212 | } |
1213 | pbe->orig_address = (unsigned long)page_address(page); | ||
1214 | pbe->address = (unsigned long)safe_pages_list; | ||
1215 | safe_pages_list = safe_pages_list->next; | ||
1216 | pbe->next = restore_pblist; | ||
1217 | restore_pblist = pbe; | ||
792 | return (void *)pbe->address; | 1218 | return (void *)pbe->address; |
793 | } | 1219 | } |
794 | 1220 | ||
@@ -816,46 +1242,60 @@ static void *get_buffer(struct snapshot_handle *handle) | |||
816 | 1242 | ||
817 | int snapshot_write_next(struct snapshot_handle *handle, size_t count) | 1243 | int snapshot_write_next(struct snapshot_handle *handle, size_t count) |
818 | { | 1244 | { |
1245 | static struct chain_allocator ca; | ||
819 | int error = 0; | 1246 | int error = 0; |
820 | 1247 | ||
821 | if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages) | 1248 | /* Check if we have already loaded the entire image */ |
1249 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) | ||
822 | return 0; | 1250 | return 0; |
1251 | |||
823 | if (!buffer) { | 1252 | if (!buffer) { |
824 | /* This makes the buffer be freed by swsusp_free() */ | 1253 | /* This makes the buffer be freed by swsusp_free() */ |
825 | buffer = alloc_image_page(GFP_ATOMIC, 0); | 1254 | buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); |
826 | if (!buffer) | 1255 | if (!buffer) |
827 | return -ENOMEM; | 1256 | return -ENOMEM; |
828 | } | 1257 | } |
829 | if (!handle->offset) | 1258 | if (!handle->offset) |
830 | handle->buffer = buffer; | 1259 | handle->buffer = buffer; |
831 | if (handle->prev < handle->page) { | 1260 | handle->sync_read = 1; |
832 | if (!handle->prev) { | 1261 | if (handle->prev < handle->cur) { |
833 | error = load_header(handle, (struct swsusp_info *)buffer); | 1262 | if (handle->prev == 0) { |
1263 | error = load_header(buffer); | ||
834 | if (error) | 1264 | if (error) |
835 | return error; | 1265 | return error; |
1266 | |||
1267 | error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); | ||
1268 | if (error) | ||
1269 | return error; | ||
1270 | |||
836 | } else if (handle->prev <= nr_meta_pages) { | 1271 | } else if (handle->prev <= nr_meta_pages) { |
837 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); | 1272 | unpack_orig_pfns(buffer, ©_bm); |
838 | if (!handle->pbe) { | 1273 | if (handle->prev == nr_meta_pages) { |
839 | error = prepare_image(handle); | 1274 | error = prepare_image(&orig_bm, ©_bm); |
840 | if (error) | 1275 | if (error) |
841 | return error; | 1276 | return error; |
842 | handle->pbe = pagedir_nosave; | 1277 | |
843 | handle->last_pbe = NULL; | 1278 | chain_init(&ca, GFP_ATOMIC, PG_SAFE); |
844 | handle->buffer = get_buffer(handle); | 1279 | memory_bm_position_reset(&orig_bm); |
1280 | restore_pblist = NULL; | ||
1281 | handle->buffer = get_buffer(&orig_bm, &ca); | ||
1282 | handle->sync_read = 0; | ||
1283 | if (!handle->buffer) | ||
1284 | return -ENOMEM; | ||
845 | } | 1285 | } |
846 | } else { | 1286 | } else { |
847 | handle->pbe = handle->pbe->next; | 1287 | handle->buffer = get_buffer(&orig_bm, &ca); |
848 | handle->buffer = get_buffer(handle); | 1288 | handle->sync_read = 0; |
849 | } | 1289 | } |
850 | handle->prev = handle->page; | 1290 | handle->prev = handle->cur; |
851 | } | 1291 | } |
852 | handle->buf_offset = handle->page_offset; | 1292 | handle->buf_offset = handle->cur_offset; |
853 | if (handle->page_offset + count >= PAGE_SIZE) { | 1293 | if (handle->cur_offset + count >= PAGE_SIZE) { |
854 | count = PAGE_SIZE - handle->page_offset; | 1294 | count = PAGE_SIZE - handle->cur_offset; |
855 | handle->page_offset = 0; | 1295 | handle->cur_offset = 0; |
856 | handle->page++; | 1296 | handle->cur++; |
857 | } else { | 1297 | } else { |
858 | handle->page_offset += count; | 1298 | handle->cur_offset += count; |
859 | } | 1299 | } |
860 | handle->offset += count; | 1300 | handle->offset += count; |
861 | return count; | 1301 | return count; |
@@ -863,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
863 | 1303 | ||
864 | int snapshot_image_loaded(struct snapshot_handle *handle) | 1304 | int snapshot_image_loaded(struct snapshot_handle *handle) |
865 | { | 1305 | { |
866 | return !(!handle->pbe || handle->pbe->next || !nr_copy_pages || | 1306 | return !(!nr_copy_pages || |
867 | handle->page <= nr_meta_pages + nr_copy_pages); | 1307 | handle->cur <= nr_meta_pages + nr_copy_pages); |
1308 | } | ||
1309 | |||
1310 | void snapshot_free_unused_memory(struct snapshot_handle *handle) | ||
1311 | { | ||
1312 | /* Free only if we have loaded the image entirely */ | ||
1313 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) | ||
1314 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); | ||
868 | } | 1315 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index f1dd146bd64d..9b2ee5344dee 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/device.h> | 22 | #include <linux/device.h> |
23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
24 | #include <linux/bio.h> | 24 | #include <linux/bio.h> |
25 | #include <linux/blkdev.h> | ||
25 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
26 | #include <linux/swapops.h> | 27 | #include <linux/swapops.h> |
27 | #include <linux/pm.h> | 28 | #include <linux/pm.h> |
@@ -49,18 +50,16 @@ static int mark_swapfiles(swp_entry_t start) | |||
49 | { | 50 | { |
50 | int error; | 51 | int error; |
51 | 52 | ||
52 | rw_swap_page_sync(READ, | 53 | rw_swap_page_sync(READ, swp_entry(root_swap, 0), |
53 | swp_entry(root_swap, 0), | 54 | virt_to_page((unsigned long)&swsusp_header), NULL); |
54 | virt_to_page((unsigned long)&swsusp_header)); | ||
55 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | 55 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || |
56 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | 56 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { |
57 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | 57 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); |
58 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | 58 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); |
59 | swsusp_header.image = start; | 59 | swsusp_header.image = start; |
60 | error = rw_swap_page_sync(WRITE, | 60 | error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), |
61 | swp_entry(root_swap, 0), | 61 | virt_to_page((unsigned long)&swsusp_header), |
62 | virt_to_page((unsigned long) | 62 | NULL); |
63 | &swsusp_header)); | ||
64 | } else { | 63 | } else { |
65 | pr_debug("swsusp: Partition is not swap space.\n"); | 64 | pr_debug("swsusp: Partition is not swap space.\n"); |
66 | error = -ENODEV; | 65 | error = -ENODEV; |
@@ -88,16 +87,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
88 | * write_page - Write one page to given swap location. | 87 | * write_page - Write one page to given swap location. |
89 | * @buf: Address we're writing. | 88 | * @buf: Address we're writing. |
90 | * @offset: Offset of the swap page we're writing to. | 89 | * @offset: Offset of the swap page we're writing to. |
90 | * @bio_chain: Link the next write BIO here | ||
91 | */ | 91 | */ |
92 | 92 | ||
93 | static int write_page(void *buf, unsigned long offset) | 93 | static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) |
94 | { | 94 | { |
95 | swp_entry_t entry; | 95 | swp_entry_t entry; |
96 | int error = -ENOSPC; | 96 | int error = -ENOSPC; |
97 | 97 | ||
98 | if (offset) { | 98 | if (offset) { |
99 | struct page *page = virt_to_page(buf); | ||
100 | |||
101 | if (bio_chain) { | ||
102 | /* | ||
103 | * Whether or not we successfully allocated a copy page, | ||
104 | * we take a ref on the page here. It gets undone in | ||
105 | * wait_on_bio_chain(). | ||
106 | */ | ||
107 | struct page *page_copy; | ||
108 | page_copy = alloc_page(GFP_ATOMIC); | ||
109 | if (page_copy == NULL) { | ||
110 | WARN_ON_ONCE(1); | ||
111 | bio_chain = NULL; /* Go synchronous */ | ||
112 | get_page(page); | ||
113 | } else { | ||
114 | memcpy(page_address(page_copy), | ||
115 | page_address(page), PAGE_SIZE); | ||
116 | page = page_copy; | ||
117 | } | ||
118 | } | ||
99 | entry = swp_entry(root_swap, offset); | 119 | entry = swp_entry(root_swap, offset); |
100 | error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf)); | 120 | error = rw_swap_page_sync(WRITE, entry, page, bio_chain); |
101 | } | 121 | } |
102 | return error; | 122 | return error; |
103 | } | 123 | } |
@@ -146,6 +166,26 @@ static void release_swap_writer(struct swap_map_handle *handle) | |||
146 | handle->bitmap = NULL; | 166 | handle->bitmap = NULL; |
147 | } | 167 | } |
148 | 168 | ||
169 | static void show_speed(struct timeval *start, struct timeval *stop, | ||
170 | unsigned nr_pages, char *msg) | ||
171 | { | ||
172 | s64 elapsed_centisecs64; | ||
173 | int centisecs; | ||
174 | int k; | ||
175 | int kps; | ||
176 | |||
177 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | ||
178 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | ||
179 | centisecs = elapsed_centisecs64; | ||
180 | if (centisecs == 0) | ||
181 | centisecs = 1; /* avoid div-by-zero */ | ||
182 | k = nr_pages * (PAGE_SIZE / 1024); | ||
183 | kps = (k * 100) / centisecs; | ||
184 | printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, | ||
185 | centisecs / 100, centisecs % 100, | ||
186 | kps / 1000, (kps % 1000) / 10); | ||
187 | } | ||
188 | |||
149 | static int get_swap_writer(struct swap_map_handle *handle) | 189 | static int get_swap_writer(struct swap_map_handle *handle) |
150 | { | 190 | { |
151 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); | 191 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); |
@@ -165,37 +205,70 @@ static int get_swap_writer(struct swap_map_handle *handle) | |||
165 | return 0; | 205 | return 0; |
166 | } | 206 | } |
167 | 207 | ||
168 | static int swap_write_page(struct swap_map_handle *handle, void *buf) | 208 | static int wait_on_bio_chain(struct bio **bio_chain) |
169 | { | 209 | { |
170 | int error; | 210 | struct bio *bio; |
211 | struct bio *next_bio; | ||
212 | int ret = 0; | ||
213 | |||
214 | if (bio_chain == NULL) | ||
215 | return 0; | ||
216 | |||
217 | bio = *bio_chain; | ||
218 | if (bio == NULL) | ||
219 | return 0; | ||
220 | while (bio) { | ||
221 | struct page *page; | ||
222 | |||
223 | next_bio = bio->bi_private; | ||
224 | page = bio->bi_io_vec[0].bv_page; | ||
225 | wait_on_page_locked(page); | ||
226 | if (!PageUptodate(page) || PageError(page)) | ||
227 | ret = -EIO; | ||
228 | put_page(page); | ||
229 | bio_put(bio); | ||
230 | bio = next_bio; | ||
231 | } | ||
232 | *bio_chain = NULL; | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | static int swap_write_page(struct swap_map_handle *handle, void *buf, | ||
237 | struct bio **bio_chain) | ||
238 | { | ||
239 | int error = 0; | ||
171 | unsigned long offset; | 240 | unsigned long offset; |
172 | 241 | ||
173 | if (!handle->cur) | 242 | if (!handle->cur) |
174 | return -EINVAL; | 243 | return -EINVAL; |
175 | offset = alloc_swap_page(root_swap, handle->bitmap); | 244 | offset = alloc_swap_page(root_swap, handle->bitmap); |
176 | error = write_page(buf, offset); | 245 | error = write_page(buf, offset, bio_chain); |
177 | if (error) | 246 | if (error) |
178 | return error; | 247 | return error; |
179 | handle->cur->entries[handle->k++] = offset; | 248 | handle->cur->entries[handle->k++] = offset; |
180 | if (handle->k >= MAP_PAGE_ENTRIES) { | 249 | if (handle->k >= MAP_PAGE_ENTRIES) { |
250 | error = wait_on_bio_chain(bio_chain); | ||
251 | if (error) | ||
252 | goto out; | ||
181 | offset = alloc_swap_page(root_swap, handle->bitmap); | 253 | offset = alloc_swap_page(root_swap, handle->bitmap); |
182 | if (!offset) | 254 | if (!offset) |
183 | return -ENOSPC; | 255 | return -ENOSPC; |
184 | handle->cur->next_swap = offset; | 256 | handle->cur->next_swap = offset; |
185 | error = write_page(handle->cur, handle->cur_swap); | 257 | error = write_page(handle->cur, handle->cur_swap, NULL); |
186 | if (error) | 258 | if (error) |
187 | return error; | 259 | goto out; |
188 | memset(handle->cur, 0, PAGE_SIZE); | 260 | memset(handle->cur, 0, PAGE_SIZE); |
189 | handle->cur_swap = offset; | 261 | handle->cur_swap = offset; |
190 | handle->k = 0; | 262 | handle->k = 0; |
191 | } | 263 | } |
192 | return 0; | 264 | out: |
265 | return error; | ||
193 | } | 266 | } |
194 | 267 | ||
195 | static int flush_swap_writer(struct swap_map_handle *handle) | 268 | static int flush_swap_writer(struct swap_map_handle *handle) |
196 | { | 269 | { |
197 | if (handle->cur && handle->cur_swap) | 270 | if (handle->cur && handle->cur_swap) |
198 | return write_page(handle->cur, handle->cur_swap); | 271 | return write_page(handle->cur, handle->cur_swap, NULL); |
199 | else | 272 | else |
200 | return -EINVAL; | 273 | return -EINVAL; |
201 | } | 274 | } |
@@ -206,21 +279,29 @@ static int flush_swap_writer(struct swap_map_handle *handle) | |||
206 | 279 | ||
207 | static int save_image(struct swap_map_handle *handle, | 280 | static int save_image(struct swap_map_handle *handle, |
208 | struct snapshot_handle *snapshot, | 281 | struct snapshot_handle *snapshot, |
209 | unsigned int nr_pages) | 282 | unsigned int nr_to_write) |
210 | { | 283 | { |
211 | unsigned int m; | 284 | unsigned int m; |
212 | int ret; | 285 | int ret; |
213 | int error = 0; | 286 | int error = 0; |
287 | int nr_pages; | ||
288 | int err2; | ||
289 | struct bio *bio; | ||
290 | struct timeval start; | ||
291 | struct timeval stop; | ||
214 | 292 | ||
215 | printk("Saving image data pages (%u pages) ... ", nr_pages); | 293 | printk("Saving image data pages (%u pages) ... ", nr_to_write); |
216 | m = nr_pages / 100; | 294 | m = nr_to_write / 100; |
217 | if (!m) | 295 | if (!m) |
218 | m = 1; | 296 | m = 1; |
219 | nr_pages = 0; | 297 | nr_pages = 0; |
298 | bio = NULL; | ||
299 | do_gettimeofday(&start); | ||
220 | do { | 300 | do { |
221 | ret = snapshot_read_next(snapshot, PAGE_SIZE); | 301 | ret = snapshot_read_next(snapshot, PAGE_SIZE); |
222 | if (ret > 0) { | 302 | if (ret > 0) { |
223 | error = swap_write_page(handle, data_of(*snapshot)); | 303 | error = swap_write_page(handle, data_of(*snapshot), |
304 | &bio); | ||
224 | if (error) | 305 | if (error) |
225 | break; | 306 | break; |
226 | if (!(nr_pages % m)) | 307 | if (!(nr_pages % m)) |
@@ -228,8 +309,13 @@ static int save_image(struct swap_map_handle *handle, | |||
228 | nr_pages++; | 309 | nr_pages++; |
229 | } | 310 | } |
230 | } while (ret > 0); | 311 | } while (ret > 0); |
312 | err2 = wait_on_bio_chain(&bio); | ||
313 | do_gettimeofday(&stop); | ||
314 | if (!error) | ||
315 | error = err2; | ||
231 | if (!error) | 316 | if (!error) |
232 | printk("\b\b\b\bdone\n"); | 317 | printk("\b\b\b\bdone\n"); |
318 | show_speed(&start, &stop, nr_to_write, "Wrote"); | ||
233 | return error; | 319 | return error; |
234 | } | 320 | } |
235 | 321 | ||
@@ -245,8 +331,7 @@ static int enough_swap(unsigned int nr_pages) | |||
245 | unsigned int free_swap = count_swap_pages(root_swap, 1); | 331 | unsigned int free_swap = count_swap_pages(root_swap, 1); |
246 | 332 | ||
247 | pr_debug("swsusp: free swap pages: %u\n", free_swap); | 333 | pr_debug("swsusp: free swap pages: %u\n", free_swap); |
248 | return free_swap > (nr_pages + PAGES_FOR_IO + | 334 | return free_swap > nr_pages + PAGES_FOR_IO; |
249 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
250 | } | 335 | } |
251 | 336 | ||
252 | /** | 337 | /** |
@@ -266,7 +351,8 @@ int swsusp_write(void) | |||
266 | int error; | 351 | int error; |
267 | 352 | ||
268 | if ((error = swsusp_swap_check())) { | 353 | if ((error = swsusp_swap_check())) { |
269 | printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n"); | 354 | printk(KERN_ERR "swsusp: Cannot find swap device, try " |
355 | "swapon -a.\n"); | ||
270 | return error; | 356 | return error; |
271 | } | 357 | } |
272 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 358 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
@@ -281,7 +367,7 @@ int swsusp_write(void) | |||
281 | error = get_swap_writer(&handle); | 367 | error = get_swap_writer(&handle); |
282 | if (!error) { | 368 | if (!error) { |
283 | unsigned long start = handle.cur_swap; | 369 | unsigned long start = handle.cur_swap; |
284 | error = swap_write_page(&handle, header); | 370 | error = swap_write_page(&handle, header, NULL); |
285 | if (!error) | 371 | if (!error) |
286 | error = save_image(&handle, &snapshot, | 372 | error = save_image(&handle, &snapshot, |
287 | header->pages - 1); | 373 | header->pages - 1); |
@@ -298,27 +384,6 @@ int swsusp_write(void) | |||
298 | return error; | 384 | return error; |
299 | } | 385 | } |
300 | 386 | ||
301 | /* | ||
302 | * Using bio to read from swap. | ||
303 | * This code requires a bit more work than just using buffer heads | ||
304 | * but, it is the recommended way for 2.5/2.6. | ||
305 | * The following are to signal the beginning and end of I/O. Bios | ||
306 | * finish asynchronously, while we want them to happen synchronously. | ||
307 | * A simple atomic_t, and a wait loop take care of this problem. | ||
308 | */ | ||
309 | |||
310 | static atomic_t io_done = ATOMIC_INIT(0); | ||
311 | |||
312 | static int end_io(struct bio *bio, unsigned int num, int err) | ||
313 | { | ||
314 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
315 | printk(KERN_ERR "I/O error reading swsusp image.\n"); | ||
316 | return -EIO; | ||
317 | } | ||
318 | atomic_set(&io_done, 0); | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static struct block_device *resume_bdev; | 387 | static struct block_device *resume_bdev; |
323 | 388 | ||
324 | /** | 389 | /** |
@@ -326,15 +391,15 @@ static struct block_device *resume_bdev; | |||
326 | * @rw: READ or WRITE. | 391 | * @rw: READ or WRITE. |
327 | * @off physical offset of page. | 392 | * @off physical offset of page. |
328 | * @page: page we're reading or writing. | 393 | * @page: page we're reading or writing. |
394 | * @bio_chain: list of pending biod (for async reading) | ||
329 | * | 395 | * |
330 | * Straight from the textbook - allocate and initialize the bio. | 396 | * Straight from the textbook - allocate and initialize the bio. |
331 | * If we're writing, make sure the page is marked as dirty. | 397 | * If we're reading, make sure the page is marked as dirty. |
332 | * Then submit it and wait. | 398 | * Then submit it and, if @bio_chain == NULL, wait. |
333 | */ | 399 | */ |
334 | 400 | static int submit(int rw, pgoff_t page_off, struct page *page, | |
335 | static int submit(int rw, pgoff_t page_off, void *page) | 401 | struct bio **bio_chain) |
336 | { | 402 | { |
337 | int error = 0; | ||
338 | struct bio *bio; | 403 | struct bio *bio; |
339 | 404 | ||
340 | bio = bio_alloc(GFP_ATOMIC, 1); | 405 | bio = bio_alloc(GFP_ATOMIC, 1); |
@@ -342,33 +407,40 @@ static int submit(int rw, pgoff_t page_off, void *page) | |||
342 | return -ENOMEM; | 407 | return -ENOMEM; |
343 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | 408 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); |
344 | bio->bi_bdev = resume_bdev; | 409 | bio->bi_bdev = resume_bdev; |
345 | bio->bi_end_io = end_io; | 410 | bio->bi_end_io = end_swap_bio_read; |
346 | 411 | ||
347 | if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { | 412 | if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { |
348 | printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); | 413 | printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); |
349 | error = -EFAULT; | 414 | bio_put(bio); |
350 | goto Done; | 415 | return -EFAULT; |
351 | } | 416 | } |
352 | 417 | ||
353 | atomic_set(&io_done, 1); | 418 | lock_page(page); |
354 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | 419 | bio_get(bio); |
355 | while (atomic_read(&io_done)) | 420 | |
356 | yield(); | 421 | if (bio_chain == NULL) { |
357 | if (rw == READ) | 422 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); |
358 | bio_set_pages_dirty(bio); | 423 | wait_on_page_locked(page); |
359 | Done: | 424 | if (rw == READ) |
360 | bio_put(bio); | 425 | bio_set_pages_dirty(bio); |
361 | return error; | 426 | bio_put(bio); |
427 | } else { | ||
428 | get_page(page); | ||
429 | bio->bi_private = *bio_chain; | ||
430 | *bio_chain = bio; | ||
431 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
432 | } | ||
433 | return 0; | ||
362 | } | 434 | } |
363 | 435 | ||
364 | static int bio_read_page(pgoff_t page_off, void *page) | 436 | static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) |
365 | { | 437 | { |
366 | return submit(READ, page_off, page); | 438 | return submit(READ, page_off, virt_to_page(addr), bio_chain); |
367 | } | 439 | } |
368 | 440 | ||
369 | static int bio_write_page(pgoff_t page_off, void *page) | 441 | static int bio_write_page(pgoff_t page_off, void *addr) |
370 | { | 442 | { |
371 | return submit(WRITE, page_off, page); | 443 | return submit(WRITE, page_off, virt_to_page(addr), NULL); |
372 | } | 444 | } |
373 | 445 | ||
374 | /** | 446 | /** |
@@ -393,7 +465,7 @@ static int get_swap_reader(struct swap_map_handle *handle, | |||
393 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); | 465 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); |
394 | if (!handle->cur) | 466 | if (!handle->cur) |
395 | return -ENOMEM; | 467 | return -ENOMEM; |
396 | error = bio_read_page(swp_offset(start), handle->cur); | 468 | error = bio_read_page(swp_offset(start), handle->cur, NULL); |
397 | if (error) { | 469 | if (error) { |
398 | release_swap_reader(handle); | 470 | release_swap_reader(handle); |
399 | return error; | 471 | return error; |
@@ -402,7 +474,8 @@ static int get_swap_reader(struct swap_map_handle *handle, | |||
402 | return 0; | 474 | return 0; |
403 | } | 475 | } |
404 | 476 | ||
405 | static int swap_read_page(struct swap_map_handle *handle, void *buf) | 477 | static int swap_read_page(struct swap_map_handle *handle, void *buf, |
478 | struct bio **bio_chain) | ||
406 | { | 479 | { |
407 | unsigned long offset; | 480 | unsigned long offset; |
408 | int error; | 481 | int error; |
@@ -412,16 +485,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) | |||
412 | offset = handle->cur->entries[handle->k]; | 485 | offset = handle->cur->entries[handle->k]; |
413 | if (!offset) | 486 | if (!offset) |
414 | return -EFAULT; | 487 | return -EFAULT; |
415 | error = bio_read_page(offset, buf); | 488 | error = bio_read_page(offset, buf, bio_chain); |
416 | if (error) | 489 | if (error) |
417 | return error; | 490 | return error; |
418 | if (++handle->k >= MAP_PAGE_ENTRIES) { | 491 | if (++handle->k >= MAP_PAGE_ENTRIES) { |
492 | error = wait_on_bio_chain(bio_chain); | ||
419 | handle->k = 0; | 493 | handle->k = 0; |
420 | offset = handle->cur->next_swap; | 494 | offset = handle->cur->next_swap; |
421 | if (!offset) | 495 | if (!offset) |
422 | release_swap_reader(handle); | 496 | release_swap_reader(handle); |
423 | else | 497 | else if (!error) |
424 | error = bio_read_page(offset, handle->cur); | 498 | error = bio_read_page(offset, handle->cur, NULL); |
425 | } | 499 | } |
426 | return error; | 500 | return error; |
427 | } | 501 | } |
@@ -434,33 +508,49 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf) | |||
434 | 508 | ||
435 | static int load_image(struct swap_map_handle *handle, | 509 | static int load_image(struct swap_map_handle *handle, |
436 | struct snapshot_handle *snapshot, | 510 | struct snapshot_handle *snapshot, |
437 | unsigned int nr_pages) | 511 | unsigned int nr_to_read) |
438 | { | 512 | { |
439 | unsigned int m; | 513 | unsigned int m; |
440 | int ret; | ||
441 | int error = 0; | 514 | int error = 0; |
515 | struct timeval start; | ||
516 | struct timeval stop; | ||
517 | struct bio *bio; | ||
518 | int err2; | ||
519 | unsigned nr_pages; | ||
442 | 520 | ||
443 | printk("Loading image data pages (%u pages) ... ", nr_pages); | 521 | printk("Loading image data pages (%u pages) ... ", nr_to_read); |
444 | m = nr_pages / 100; | 522 | m = nr_to_read / 100; |
445 | if (!m) | 523 | if (!m) |
446 | m = 1; | 524 | m = 1; |
447 | nr_pages = 0; | 525 | nr_pages = 0; |
448 | do { | 526 | bio = NULL; |
449 | ret = snapshot_write_next(snapshot, PAGE_SIZE); | 527 | do_gettimeofday(&start); |
450 | if (ret > 0) { | 528 | for ( ; ; ) { |
451 | error = swap_read_page(handle, data_of(*snapshot)); | 529 | error = snapshot_write_next(snapshot, PAGE_SIZE); |
452 | if (error) | 530 | if (error <= 0) |
453 | break; | 531 | break; |
454 | if (!(nr_pages % m)) | 532 | error = swap_read_page(handle, data_of(*snapshot), &bio); |
455 | printk("\b\b\b\b%3d%%", nr_pages / m); | 533 | if (error) |
456 | nr_pages++; | 534 | break; |
457 | } | 535 | if (snapshot->sync_read) |
458 | } while (ret > 0); | 536 | error = wait_on_bio_chain(&bio); |
537 | if (error) | ||
538 | break; | ||
539 | if (!(nr_pages % m)) | ||
540 | printk("\b\b\b\b%3d%%", nr_pages / m); | ||
541 | nr_pages++; | ||
542 | } | ||
543 | err2 = wait_on_bio_chain(&bio); | ||
544 | do_gettimeofday(&stop); | ||
545 | if (!error) | ||
546 | error = err2; | ||
459 | if (!error) { | 547 | if (!error) { |
460 | printk("\b\b\b\bdone\n"); | 548 | printk("\b\b\b\bdone\n"); |
549 | snapshot_free_unused_memory(snapshot); | ||
461 | if (!snapshot_image_loaded(snapshot)) | 550 | if (!snapshot_image_loaded(snapshot)) |
462 | error = -ENODATA; | 551 | error = -ENODATA; |
463 | } | 552 | } |
553 | show_speed(&start, &stop, nr_to_read, "Read"); | ||
464 | return error; | 554 | return error; |
465 | } | 555 | } |
466 | 556 | ||
@@ -483,7 +573,7 @@ int swsusp_read(void) | |||
483 | header = (struct swsusp_info *)data_of(snapshot); | 573 | header = (struct swsusp_info *)data_of(snapshot); |
484 | error = get_swap_reader(&handle, swsusp_header.image); | 574 | error = get_swap_reader(&handle, swsusp_header.image); |
485 | if (!error) | 575 | if (!error) |
486 | error = swap_read_page(&handle, header); | 576 | error = swap_read_page(&handle, header, NULL); |
487 | if (!error) | 577 | if (!error) |
488 | error = load_image(&handle, &snapshot, header->pages - 1); | 578 | error = load_image(&handle, &snapshot, header->pages - 1); |
489 | release_swap_reader(&handle); | 579 | release_swap_reader(&handle); |
@@ -509,7 +599,7 @@ int swsusp_check(void) | |||
509 | if (!IS_ERR(resume_bdev)) { | 599 | if (!IS_ERR(resume_bdev)) { |
510 | set_blocksize(resume_bdev, PAGE_SIZE); | 600 | set_blocksize(resume_bdev, PAGE_SIZE); |
511 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | 601 | memset(&swsusp_header, 0, sizeof(swsusp_header)); |
512 | if ((error = bio_read_page(0, &swsusp_header))) | 602 | if ((error = bio_read_page(0, &swsusp_header, NULL))) |
513 | return error; | 603 | return error; |
514 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | 604 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { |
515 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | 605 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 62752899b1a1..0b66659dc516 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -193,14 +193,13 @@ int swsusp_shrink_memory(void) | |||
193 | printk("Shrinking memory... "); | 193 | printk("Shrinking memory... "); |
194 | do { | 194 | do { |
195 | size = 2 * count_highmem_pages(); | 195 | size = 2 * count_highmem_pages(); |
196 | size += size / 50 + count_data_pages(); | 196 | size += size / 50 + count_data_pages() + PAGES_FOR_IO; |
197 | size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + | ||
198 | PAGES_FOR_IO; | ||
199 | tmp = size; | 197 | tmp = size; |
200 | for_each_zone (zone) | 198 | for_each_zone (zone) |
201 | if (!is_highmem(zone) && populated_zone(zone)) { | 199 | if (!is_highmem(zone) && populated_zone(zone)) { |
202 | tmp -= zone->free_pages; | 200 | tmp -= zone->free_pages; |
203 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | 201 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; |
202 | tmp += snapshot_additional_pages(zone); | ||
204 | } | 203 | } |
205 | if (tmp > 0) { | 204 | if (tmp > 0) { |
206 | tmp = __shrink_memory(tmp); | 205 | tmp = __shrink_memory(tmp); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 5a8d060d7909..72825c853cd7 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/swapops.h> | 19 | #include <linux/swapops.h> |
20 | #include <linux/pm.h> | 20 | #include <linux/pm.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/cpu.h> | ||
22 | 23 | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | 25 | ||
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
139 | if (data->frozen) | 140 | if (data->frozen) |
140 | break; | 141 | break; |
141 | down(&pm_sem); | 142 | down(&pm_sem); |
142 | disable_nonboot_cpus(); | 143 | error = disable_nonboot_cpus(); |
143 | if (freeze_processes()) { | 144 | if (!error) { |
144 | thaw_processes(); | 145 | error = freeze_processes(); |
145 | enable_nonboot_cpus(); | 146 | if (error) { |
146 | error = -EBUSY; | 147 | thaw_processes(); |
148 | error = -EBUSY; | ||
149 | } | ||
147 | } | 150 | } |
151 | enable_nonboot_cpus(); | ||
148 | up(&pm_sem); | 152 | up(&pm_sem); |
149 | if (!error) | 153 | if (!error) |
150 | data->frozen = 1; | 154 | data->frozen = 1; |
@@ -189,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
189 | error = -EPERM; | 193 | error = -EPERM; |
190 | break; | 194 | break; |
191 | } | 195 | } |
196 | snapshot_free_unused_memory(&data->handle); | ||
192 | down(&pm_sem); | 197 | down(&pm_sem); |
193 | pm_prepare_console(); | 198 | pm_prepare_console(); |
194 | error = device_suspend(PMSG_PRETHAW); | 199 | error = device_suspend(PMSG_PRETHAW); |
diff --git a/kernel/printk.c b/kernel/printk.c index 1149365e989e..771f5e861bcd 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
721 | return 0; | 721 | return 0; |
722 | } | 722 | } |
723 | 723 | ||
724 | #ifndef CONFIG_DISABLE_CONSOLE_SUSPEND | ||
724 | /** | 725 | /** |
725 | * suspend_console - suspend the console subsystem | 726 | * suspend_console - suspend the console subsystem |
726 | * | 727 | * |
@@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
728 | */ | 729 | */ |
729 | void suspend_console(void) | 730 | void suspend_console(void) |
730 | { | 731 | { |
732 | printk("Suspending console(s)\n"); | ||
731 | acquire_console_sem(); | 733 | acquire_console_sem(); |
732 | console_suspended = 1; | 734 | console_suspended = 1; |
733 | } | 735 | } |
@@ -737,6 +739,7 @@ void resume_console(void) | |||
737 | console_suspended = 0; | 739 | console_suspended = 0; |
738 | release_console_sem(); | 740 | release_console_sem(); |
739 | } | 741 | } |
742 | #endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ | ||
740 | 743 | ||
741 | /** | 744 | /** |
742 | * acquire_console_sem - lock the console system for exclusive use. | 745 | * acquire_console_sem - lock the console system for exclusive use. |
diff --git a/kernel/profile.c b/kernel/profile.c index d5bd75e7501c..fb660c7d35ba 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -309,13 +309,17 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, | |||
309 | node = cpu_to_node(cpu); | 309 | node = cpu_to_node(cpu); |
310 | per_cpu(cpu_profile_flip, cpu) = 0; | 310 | per_cpu(cpu_profile_flip, cpu) = 0; |
311 | if (!per_cpu(cpu_profile_hits, cpu)[1]) { | 311 | if (!per_cpu(cpu_profile_hits, cpu)[1]) { |
312 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | 312 | page = alloc_pages_node(node, |
313 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | ||
314 | 0); | ||
313 | if (!page) | 315 | if (!page) |
314 | return NOTIFY_BAD; | 316 | return NOTIFY_BAD; |
315 | per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); | 317 | per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); |
316 | } | 318 | } |
317 | if (!per_cpu(cpu_profile_hits, cpu)[0]) { | 319 | if (!per_cpu(cpu_profile_hits, cpu)[0]) { |
318 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | 320 | page = alloc_pages_node(node, |
321 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | ||
322 | 0); | ||
319 | if (!page) | 323 | if (!page) |
320 | goto out_free; | 324 | goto out_free; |
321 | per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); | 325 | per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); |
@@ -491,12 +495,16 @@ static int __init create_hash_tables(void) | |||
491 | int node = cpu_to_node(cpu); | 495 | int node = cpu_to_node(cpu); |
492 | struct page *page; | 496 | struct page *page; |
493 | 497 | ||
494 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | 498 | page = alloc_pages_node(node, |
499 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | ||
500 | 0); | ||
495 | if (!page) | 501 | if (!page) |
496 | goto out_cleanup; | 502 | goto out_cleanup; |
497 | per_cpu(cpu_profile_hits, cpu)[1] | 503 | per_cpu(cpu_profile_hits, cpu)[1] |
498 | = (struct profile_hit *)page_address(page); | 504 | = (struct profile_hit *)page_address(page); |
499 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | 505 | page = alloc_pages_node(node, |
506 | GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, | ||
507 | 0); | ||
500 | if (!page) | 508 | if (!page) |
501 | goto out_cleanup; | 509 | goto out_cleanup; |
502 | per_cpu(cpu_profile_hits, cpu)[0] | 510 | per_cpu(cpu_profile_hits, cpu)[0] |
diff --git a/kernel/sched.c b/kernel/sched.c index a234fbee1238..5c848fd4e461 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -238,6 +238,7 @@ struct rq { | |||
238 | /* For active balancing */ | 238 | /* For active balancing */ |
239 | int active_balance; | 239 | int active_balance; |
240 | int push_cpu; | 240 | int push_cpu; |
241 | int cpu; /* cpu of this runqueue */ | ||
241 | 242 | ||
242 | struct task_struct *migration_thread; | 243 | struct task_struct *migration_thread; |
243 | struct list_head migration_queue; | 244 | struct list_head migration_queue; |
@@ -267,6 +268,15 @@ struct rq { | |||
267 | 268 | ||
268 | static DEFINE_PER_CPU(struct rq, runqueues); | 269 | static DEFINE_PER_CPU(struct rq, runqueues); |
269 | 270 | ||
271 | static inline int cpu_of(struct rq *rq) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | return rq->cpu; | ||
275 | #else | ||
276 | return 0; | ||
277 | #endif | ||
278 | } | ||
279 | |||
270 | /* | 280 | /* |
271 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. | 281 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. |
272 | * See detach_destroy_domains: synchronize_sched for details. | 282 | * See detach_destroy_domains: synchronize_sched for details. |
@@ -2211,7 +2221,8 @@ out: | |||
2211 | */ | 2221 | */ |
2212 | static struct sched_group * | 2222 | static struct sched_group * |
2213 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2223 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
2214 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) | 2224 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
2225 | cpumask_t *cpus) | ||
2215 | { | 2226 | { |
2216 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2227 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
2217 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2228 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2248 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2259 | sum_weighted_load = sum_nr_running = avg_load = 0; |
2249 | 2260 | ||
2250 | for_each_cpu_mask(i, group->cpumask) { | 2261 | for_each_cpu_mask(i, group->cpumask) { |
2251 | struct rq *rq = cpu_rq(i); | 2262 | struct rq *rq; |
2263 | |||
2264 | if (!cpu_isset(i, *cpus)) | ||
2265 | continue; | ||
2266 | |||
2267 | rq = cpu_rq(i); | ||
2252 | 2268 | ||
2253 | if (*sd_idle && !idle_cpu(i)) | 2269 | if (*sd_idle && !idle_cpu(i)) |
2254 | *sd_idle = 0; | 2270 | *sd_idle = 0; |
@@ -2466,13 +2482,17 @@ ret: | |||
2466 | */ | 2482 | */ |
2467 | static struct rq * | 2483 | static struct rq * |
2468 | find_busiest_queue(struct sched_group *group, enum idle_type idle, | 2484 | find_busiest_queue(struct sched_group *group, enum idle_type idle, |
2469 | unsigned long imbalance) | 2485 | unsigned long imbalance, cpumask_t *cpus) |
2470 | { | 2486 | { |
2471 | struct rq *busiest = NULL, *rq; | 2487 | struct rq *busiest = NULL, *rq; |
2472 | unsigned long max_load = 0; | 2488 | unsigned long max_load = 0; |
2473 | int i; | 2489 | int i; |
2474 | 2490 | ||
2475 | for_each_cpu_mask(i, group->cpumask) { | 2491 | for_each_cpu_mask(i, group->cpumask) { |
2492 | |||
2493 | if (!cpu_isset(i, *cpus)) | ||
2494 | continue; | ||
2495 | |||
2476 | rq = cpu_rq(i); | 2496 | rq = cpu_rq(i); |
2477 | 2497 | ||
2478 | if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) | 2498 | if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) |
@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2511 | struct sched_group *group; | 2531 | struct sched_group *group; |
2512 | unsigned long imbalance; | 2532 | unsigned long imbalance; |
2513 | struct rq *busiest; | 2533 | struct rq *busiest; |
2534 | cpumask_t cpus = CPU_MASK_ALL; | ||
2514 | 2535 | ||
2515 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | 2536 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && |
2516 | !sched_smt_power_savings) | 2537 | !sched_smt_power_savings) |
@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2518 | 2539 | ||
2519 | schedstat_inc(sd, lb_cnt[idle]); | 2540 | schedstat_inc(sd, lb_cnt[idle]); |
2520 | 2541 | ||
2521 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); | 2542 | redo: |
2543 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | ||
2544 | &cpus); | ||
2522 | if (!group) { | 2545 | if (!group) { |
2523 | schedstat_inc(sd, lb_nobusyg[idle]); | 2546 | schedstat_inc(sd, lb_nobusyg[idle]); |
2524 | goto out_balanced; | 2547 | goto out_balanced; |
2525 | } | 2548 | } |
2526 | 2549 | ||
2527 | busiest = find_busiest_queue(group, idle, imbalance); | 2550 | busiest = find_busiest_queue(group, idle, imbalance, &cpus); |
2528 | if (!busiest) { | 2551 | if (!busiest) { |
2529 | schedstat_inc(sd, lb_nobusyq[idle]); | 2552 | schedstat_inc(sd, lb_nobusyq[idle]); |
2530 | goto out_balanced; | 2553 | goto out_balanced; |
@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2549 | double_rq_unlock(this_rq, busiest); | 2572 | double_rq_unlock(this_rq, busiest); |
2550 | 2573 | ||
2551 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2574 | /* All tasks on this runqueue were pinned by CPU affinity */ |
2552 | if (unlikely(all_pinned)) | 2575 | if (unlikely(all_pinned)) { |
2576 | cpu_clear(cpu_of(busiest), cpus); | ||
2577 | if (!cpus_empty(cpus)) | ||
2578 | goto redo; | ||
2553 | goto out_balanced; | 2579 | goto out_balanced; |
2580 | } | ||
2554 | } | 2581 | } |
2555 | 2582 | ||
2556 | if (!nr_moved) { | 2583 | if (!nr_moved) { |
@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2639 | unsigned long imbalance; | 2666 | unsigned long imbalance; |
2640 | int nr_moved = 0; | 2667 | int nr_moved = 0; |
2641 | int sd_idle = 0; | 2668 | int sd_idle = 0; |
2669 | cpumask_t cpus = CPU_MASK_ALL; | ||
2642 | 2670 | ||
2643 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) | 2671 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) |
2644 | sd_idle = 1; | 2672 | sd_idle = 1; |
2645 | 2673 | ||
2646 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2674 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2647 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); | 2675 | redo: |
2676 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | ||
2677 | &sd_idle, &cpus); | ||
2648 | if (!group) { | 2678 | if (!group) { |
2649 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2679 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2650 | goto out_balanced; | 2680 | goto out_balanced; |
2651 | } | 2681 | } |
2652 | 2682 | ||
2653 | busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance); | 2683 | busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance, |
2684 | &cpus); | ||
2654 | if (!busiest) { | 2685 | if (!busiest) { |
2655 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2686 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2656 | goto out_balanced; | 2687 | goto out_balanced; |
@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2668 | minus_1_or_zero(busiest->nr_running), | 2699 | minus_1_or_zero(busiest->nr_running), |
2669 | imbalance, sd, NEWLY_IDLE, NULL); | 2700 | imbalance, sd, NEWLY_IDLE, NULL); |
2670 | spin_unlock(&busiest->lock); | 2701 | spin_unlock(&busiest->lock); |
2702 | |||
2703 | if (!nr_moved) { | ||
2704 | cpu_clear(cpu_of(busiest), cpus); | ||
2705 | if (!cpus_empty(cpus)) | ||
2706 | goto redo; | ||
2707 | } | ||
2671 | } | 2708 | } |
2672 | 2709 | ||
2673 | if (!nr_moved) { | 2710 | if (!nr_moved) { |
@@ -6747,6 +6784,7 @@ void __init sched_init(void) | |||
6747 | rq->cpu_load[j] = 0; | 6784 | rq->cpu_load[j] = 0; |
6748 | rq->active_balance = 0; | 6785 | rq->active_balance = 0; |
6749 | rq->push_cpu = 0; | 6786 | rq->push_cpu = 0; |
6787 | rq->cpu = i; | ||
6750 | rq->migration_thread = NULL; | 6788 | rq->migration_thread = NULL; |
6751 | INIT_LIST_HEAD(&rq->migration_queue); | 6789 | INIT_LIST_HEAD(&rq->migration_queue); |
6752 | #endif | 6790 | #endif |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc37138..fd43c3e6786b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = { | |||
943 | .extra1 = &zero, | 943 | .extra1 = &zero, |
944 | .extra2 = &one_hundred, | 944 | .extra2 = &one_hundred, |
945 | }, | 945 | }, |
946 | { | ||
947 | .ctl_name = VM_MIN_SLAB, | ||
948 | .procname = "min_slab_ratio", | ||
949 | .data = &sysctl_min_slab_ratio, | ||
950 | .maxlen = sizeof(sysctl_min_slab_ratio), | ||
951 | .mode = 0644, | ||
952 | .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, | ||
953 | .strategy = &sysctl_intvec, | ||
954 | .extra1 = &zero, | ||
955 | .extra2 = &one_hundred, | ||
956 | }, | ||
946 | #endif | 957 | #endif |
947 | #ifdef CONFIG_X86_32 | 958 | #ifdef CONFIG_X86_32 |
948 | { | 959 | { |