diff options
Diffstat (limited to 'kernel')
44 files changed, 879 insertions, 540 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 76c9a11b72..4e9d208296 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -151,7 +151,7 @@ struct audit_buffer { | |||
151 | 151 | ||
152 | static void audit_set_pid(struct audit_buffer *ab, pid_t pid) | 152 | static void audit_set_pid(struct audit_buffer *ab, pid_t pid) |
153 | { | 153 | { |
154 | struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; | 154 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); |
155 | nlh->nlmsg_pid = pid; | 155 | nlh->nlmsg_pid = pid; |
156 | } | 156 | } |
157 | 157 | ||
@@ -750,7 +750,7 @@ static void audit_receive_skb(struct sk_buff *skb) | |||
750 | u32 rlen; | 750 | u32 rlen; |
751 | 751 | ||
752 | while (skb->len >= NLMSG_SPACE(0)) { | 752 | while (skb->len >= NLMSG_SPACE(0)) { |
753 | nlh = (struct nlmsghdr *)skb->data; | 753 | nlh = nlmsg_hdr(skb); |
754 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | 754 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) |
755 | return; | 755 | return; |
756 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 756 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); |
@@ -795,7 +795,7 @@ static int __init audit_init(void) | |||
795 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 795 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
796 | audit_default ? "enabled" : "disabled"); | 796 | audit_default ? "enabled" : "disabled"); |
797 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, | 797 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, |
798 | THIS_MODULE); | 798 | NULL, THIS_MODULE); |
799 | if (!audit_sock) | 799 | if (!audit_sock) |
800 | audit_panic("cannot initialize netlink socket"); | 800 | audit_panic("cannot initialize netlink socket"); |
801 | else | 801 | else |
@@ -1073,7 +1073,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, | |||
1073 | goto out; | 1073 | goto out; |
1074 | } | 1074 | } |
1075 | va_copy(args2, args); | 1075 | va_copy(args2, args); |
1076 | len = vsnprintf(skb->tail, avail, fmt, args); | 1076 | len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args); |
1077 | if (len >= avail) { | 1077 | if (len >= avail) { |
1078 | /* The printk buffer is 1024 bytes long, so if we get | 1078 | /* The printk buffer is 1024 bytes long, so if we get |
1079 | * here and AUDIT_BUFSIZ is at least 1024, then we can | 1079 | * here and AUDIT_BUFSIZ is at least 1024, then we can |
@@ -1082,7 +1082,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, | |||
1082 | max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); | 1082 | max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); |
1083 | if (!avail) | 1083 | if (!avail) |
1084 | goto out; | 1084 | goto out; |
1085 | len = vsnprintf(skb->tail, avail, fmt, args2); | 1085 | len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); |
1086 | } | 1086 | } |
1087 | if (len > 0) | 1087 | if (len > 0) |
1088 | skb_put(skb, len); | 1088 | skb_put(skb, len); |
@@ -1143,7 +1143,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, | |||
1143 | return; | 1143 | return; |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | ptr = skb->tail; | 1146 | ptr = skb_tail_pointer(skb); |
1147 | for (i=0; i<len; i++) { | 1147 | for (i=0; i<len; i++) { |
1148 | *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */ | 1148 | *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */ |
1149 | *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */ | 1149 | *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */ |
@@ -1175,7 +1175,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
1175 | if (!avail) | 1175 | if (!avail) |
1176 | return; | 1176 | return; |
1177 | } | 1177 | } |
1178 | ptr = skb->tail; | 1178 | ptr = skb_tail_pointer(skb); |
1179 | *ptr++ = '"'; | 1179 | *ptr++ = '"'; |
1180 | memcpy(ptr, string, slen); | 1180 | memcpy(ptr, string, slen); |
1181 | ptr += slen; | 1181 | ptr += slen; |
@@ -1268,7 +1268,7 @@ void audit_log_end(struct audit_buffer *ab) | |||
1268 | audit_log_lost("rate limit exceeded"); | 1268 | audit_log_lost("rate limit exceeded"); |
1269 | } else { | 1269 | } else { |
1270 | if (audit_pid) { | 1270 | if (audit_pid) { |
1271 | struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; | 1271 | struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); |
1272 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); | 1272 | nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); |
1273 | skb_queue_tail(&audit_skb_queue, ab->skb); | 1273 | skb_queue_tail(&audit_skb_queue, ab->skb); |
1274 | ab->skb = NULL; | 1274 | ab->skb = NULL; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 359955800d..628c7ac590 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -739,28 +739,26 @@ static inline void audit_free_context(struct audit_context *context) | |||
739 | void audit_log_task_context(struct audit_buffer *ab) | 739 | void audit_log_task_context(struct audit_buffer *ab) |
740 | { | 740 | { |
741 | char *ctx = NULL; | 741 | char *ctx = NULL; |
742 | ssize_t len = 0; | 742 | unsigned len; |
743 | int error; | ||
744 | u32 sid; | ||
745 | |||
746 | selinux_get_task_sid(current, &sid); | ||
747 | if (!sid) | ||
748 | return; | ||
743 | 749 | ||
744 | len = security_getprocattr(current, "current", NULL, 0); | 750 | error = selinux_sid_to_string(sid, &ctx, &len); |
745 | if (len < 0) { | 751 | if (error) { |
746 | if (len != -EINVAL) | 752 | if (error != -EINVAL) |
747 | goto error_path; | 753 | goto error_path; |
748 | return; | 754 | return; |
749 | } | 755 | } |
750 | 756 | ||
751 | ctx = kmalloc(len, GFP_KERNEL); | ||
752 | if (!ctx) | ||
753 | goto error_path; | ||
754 | |||
755 | len = security_getprocattr(current, "current", ctx, len); | ||
756 | if (len < 0 ) | ||
757 | goto error_path; | ||
758 | |||
759 | audit_log_format(ab, " subj=%s", ctx); | 757 | audit_log_format(ab, " subj=%s", ctx); |
758 | kfree(ctx); | ||
760 | return; | 759 | return; |
761 | 760 | ||
762 | error_path: | 761 | error_path: |
763 | kfree(ctx); | ||
764 | audit_panic("error in audit_log_task_context"); | 762 | audit_panic("error in audit_log_task_context"); |
765 | return; | 763 | return; |
766 | } | 764 | } |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 3d4206ada5..36e70845cf 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
254 | } | 254 | } |
255 | 255 | ||
256 | #ifdef CONFIG_SUSPEND_SMP | 256 | #ifdef CONFIG_SUSPEND_SMP |
257 | /* Needed to prevent the microcode driver from requesting firmware in its CPU | ||
258 | * hotplug notifier during the suspend/resume. | ||
259 | */ | ||
260 | int suspend_cpu_hotplug; | ||
261 | EXPORT_SYMBOL(suspend_cpu_hotplug); | ||
262 | |||
257 | static cpumask_t frozen_cpus; | 263 | static cpumask_t frozen_cpus; |
258 | 264 | ||
259 | int disable_nonboot_cpus(void) | 265 | int disable_nonboot_cpus(void) |
@@ -261,16 +267,8 @@ int disable_nonboot_cpus(void) | |||
261 | int cpu, first_cpu, error = 0; | 267 | int cpu, first_cpu, error = 0; |
262 | 268 | ||
263 | mutex_lock(&cpu_add_remove_lock); | 269 | mutex_lock(&cpu_add_remove_lock); |
264 | first_cpu = first_cpu(cpu_present_map); | 270 | suspend_cpu_hotplug = 1; |
265 | if (!cpu_online(first_cpu)) { | 271 | first_cpu = first_cpu(cpu_online_map); |
266 | error = _cpu_up(first_cpu); | ||
267 | if (error) { | ||
268 | printk(KERN_ERR "Could not bring CPU%d up.\n", | ||
269 | first_cpu); | ||
270 | goto out; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* We take down all of the non-boot CPUs in one shot to avoid races | 272 | /* We take down all of the non-boot CPUs in one shot to avoid races |
275 | * with the userspace trying to use the CPU hotplug at the same time | 273 | * with the userspace trying to use the CPU hotplug at the same time |
276 | */ | 274 | */ |
@@ -296,7 +294,7 @@ int disable_nonboot_cpus(void) | |||
296 | } else { | 294 | } else { |
297 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 295 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
298 | } | 296 | } |
299 | out: | 297 | suspend_cpu_hotplug = 0; |
300 | mutex_unlock(&cpu_add_remove_lock); | 298 | mutex_unlock(&cpu_add_remove_lock); |
301 | return error; | 299 | return error; |
302 | } | 300 | } |
@@ -308,20 +306,22 @@ void enable_nonboot_cpus(void) | |||
308 | /* Allow everyone to use the CPU hotplug again */ | 306 | /* Allow everyone to use the CPU hotplug again */ |
309 | mutex_lock(&cpu_add_remove_lock); | 307 | mutex_lock(&cpu_add_remove_lock); |
310 | cpu_hotplug_disabled = 0; | 308 | cpu_hotplug_disabled = 0; |
311 | mutex_unlock(&cpu_add_remove_lock); | ||
312 | if (cpus_empty(frozen_cpus)) | 309 | if (cpus_empty(frozen_cpus)) |
313 | return; | 310 | goto out; |
314 | 311 | ||
312 | suspend_cpu_hotplug = 1; | ||
315 | printk("Enabling non-boot CPUs ...\n"); | 313 | printk("Enabling non-boot CPUs ...\n"); |
316 | for_each_cpu_mask(cpu, frozen_cpus) { | 314 | for_each_cpu_mask(cpu, frozen_cpus) { |
317 | error = cpu_up(cpu); | 315 | error = _cpu_up(cpu); |
318 | if (!error) { | 316 | if (!error) { |
319 | printk("CPU%d is up\n", cpu); | 317 | printk("CPU%d is up\n", cpu); |
320 | continue; | 318 | continue; |
321 | } | 319 | } |
322 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", | 320 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
323 | cpu, error); | ||
324 | } | 321 | } |
325 | cpus_clear(frozen_cpus); | 322 | cpus_clear(frozen_cpus); |
323 | suspend_cpu_hotplug = 0; | ||
324 | out: | ||
325 | mutex_unlock(&cpu_add_remove_lock); | ||
326 | } | 326 | } |
327 | #endif | 327 | #endif |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f382b0f775..d240349cbf 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2351 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2351 | * z's node is in our tasks mems_allowed, yes. If it's not a |
2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
2354 | * If the task has been OOM killed and has access to memory reserves | ||
2355 | * as specified by the TIF_MEMDIE flag, yes. | ||
2354 | * Otherwise, no. | 2356 | * Otherwise, no. |
2355 | * | 2357 | * |
2356 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() | 2358 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() |
@@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2368 | * calls get to this routine, we should just shut up and say 'yes'. | 2370 | * calls get to this routine, we should just shut up and say 'yes'. |
2369 | * | 2371 | * |
2370 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2372 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
2371 | * and do not allow allocations outside the current tasks cpuset. | 2373 | * and do not allow allocations outside the current tasks cpuset |
2374 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | ||
2372 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2375 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2373 | * nearest enclosing mem_exclusive ancestor cpuset. | 2376 | * nearest enclosing mem_exclusive ancestor cpuset. |
2374 | * | 2377 | * |
@@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2392 | * affect that: | 2395 | * affect that: |
2393 | * in_interrupt - any node ok (current task context irrelevant) | 2396 | * in_interrupt - any node ok (current task context irrelevant) |
2394 | * GFP_ATOMIC - any node ok | 2397 | * GFP_ATOMIC - any node ok |
2398 | * TIF_MEMDIE - any node ok | ||
2395 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2399 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok |
2396 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2400 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2397 | * | 2401 | * |
@@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2413 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | 2417 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); |
2414 | if (node_isset(node, current->mems_allowed)) | 2418 | if (node_isset(node, current->mems_allowed)) |
2415 | return 1; | 2419 | return 1; |
2420 | /* | ||
2421 | * Allow tasks that have access to memory reserves because they have | ||
2422 | * been OOM killed to get memory anywhere. | ||
2423 | */ | ||
2424 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
2425 | return 1; | ||
2416 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | 2426 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ |
2417 | return 0; | 2427 | return 0; |
2418 | 2428 | ||
@@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2438 | * | 2448 | * |
2439 | * If we're in interrupt, yes, we can always allocate. | 2449 | * If we're in interrupt, yes, we can always allocate. |
2440 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone | 2450 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone |
2441 | * z's node is in our tasks mems_allowed, yes. Otherwise, no. | 2451 | * z's node is in our tasks mems_allowed, yes. If the task has been |
2452 | * OOM killed and has access to memory reserves as specified by the | ||
2453 | * TIF_MEMDIE flag, yes. Otherwise, no. | ||
2442 | * | 2454 | * |
2443 | * The __GFP_THISNODE placement logic is really handled elsewhere, | 2455 | * The __GFP_THISNODE placement logic is really handled elsewhere, |
2444 | * by forcibly using a zonelist starting at a specified node, and by | 2456 | * by forcibly using a zonelist starting at a specified node, and by |
@@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | |||
2462 | node = zone_to_nid(z); | 2474 | node = zone_to_nid(z); |
2463 | if (node_isset(node, current->mems_allowed)) | 2475 | if (node_isset(node, current->mems_allowed)) |
2464 | return 1; | 2476 | return 1; |
2477 | /* | ||
2478 | * Allow tasks that have access to memory reserves because they have | ||
2479 | * been OOM killed to get memory anywhere. | ||
2480 | */ | ||
2481 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
2482 | return 1; | ||
2465 | return 0; | 2483 | return 0; |
2466 | } | 2484 | } |
2467 | 2485 | ||
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 766d5912b2..c0148ae992 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
@@ -31,11 +31,7 @@ __setup("nodelayacct", delayacct_setup_disable); | |||
31 | 31 | ||
32 | void delayacct_init(void) | 32 | void delayacct_init(void) |
33 | { | 33 | { |
34 | delayacct_cache = kmem_cache_create("delayacct_cache", | 34 | delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC); |
35 | sizeof(struct task_delay_info), | ||
36 | 0, | ||
37 | SLAB_PANIC, | ||
38 | NULL, NULL); | ||
39 | delayacct_tsk_init(&init_task); | 35 | delayacct_tsk_init(&init_task); |
40 | } | 36 | } |
41 | 37 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index f132349c03..92369240d9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -790,7 +790,7 @@ static void exit_notify(struct task_struct *tsk) | |||
790 | 790 | ||
791 | pgrp = task_pgrp(tsk); | 791 | pgrp = task_pgrp(tsk); |
792 | if ((task_pgrp(t) != pgrp) && | 792 | if ((task_pgrp(t) != pgrp) && |
793 | (task_session(t) != task_session(tsk)) && | 793 | (task_session(t) == task_session(tsk)) && |
794 | will_become_orphaned_pgrp(pgrp, tsk) && | 794 | will_become_orphaned_pgrp(pgrp, tsk) && |
795 | has_stopped_jobs(pgrp)) { | 795 | has_stopped_jobs(pgrp)) { |
796 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | 796 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); |
@@ -1033,6 +1033,8 @@ asmlinkage void sys_exit_group(int error_code) | |||
1033 | 1033 | ||
1034 | static int eligible_child(pid_t pid, int options, struct task_struct *p) | 1034 | static int eligible_child(pid_t pid, int options, struct task_struct *p) |
1035 | { | 1035 | { |
1036 | int err; | ||
1037 | |||
1036 | if (pid > 0) { | 1038 | if (pid > 0) { |
1037 | if (p->pid != pid) | 1039 | if (p->pid != pid) |
1038 | return 0; | 1040 | return 0; |
@@ -1066,8 +1068,9 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p) | |||
1066 | if (delay_group_leader(p)) | 1068 | if (delay_group_leader(p)) |
1067 | return 2; | 1069 | return 2; |
1068 | 1070 | ||
1069 | if (security_task_wait(p)) | 1071 | err = security_task_wait(p); |
1070 | return 0; | 1072 | if (err) |
1073 | return err; | ||
1071 | 1074 | ||
1072 | return 1; | 1075 | return 1; |
1073 | } | 1076 | } |
@@ -1449,6 +1452,7 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop, | |||
1449 | DECLARE_WAITQUEUE(wait, current); | 1452 | DECLARE_WAITQUEUE(wait, current); |
1450 | struct task_struct *tsk; | 1453 | struct task_struct *tsk; |
1451 | int flag, retval; | 1454 | int flag, retval; |
1455 | int allowed, denied; | ||
1452 | 1456 | ||
1453 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1457 | add_wait_queue(¤t->signal->wait_chldexit,&wait); |
1454 | repeat: | 1458 | repeat: |
@@ -1457,6 +1461,7 @@ repeat: | |||
1457 | * match our criteria, even if we are not able to reap it yet. | 1461 | * match our criteria, even if we are not able to reap it yet. |
1458 | */ | 1462 | */ |
1459 | flag = 0; | 1463 | flag = 0; |
1464 | allowed = denied = 0; | ||
1460 | current->state = TASK_INTERRUPTIBLE; | 1465 | current->state = TASK_INTERRUPTIBLE; |
1461 | read_lock(&tasklist_lock); | 1466 | read_lock(&tasklist_lock); |
1462 | tsk = current; | 1467 | tsk = current; |
@@ -1472,6 +1477,12 @@ repeat: | |||
1472 | if (!ret) | 1477 | if (!ret) |
1473 | continue; | 1478 | continue; |
1474 | 1479 | ||
1480 | if (unlikely(ret < 0)) { | ||
1481 | denied = ret; | ||
1482 | continue; | ||
1483 | } | ||
1484 | allowed = 1; | ||
1485 | |||
1475 | switch (p->state) { | 1486 | switch (p->state) { |
1476 | case TASK_TRACED: | 1487 | case TASK_TRACED: |
1477 | /* | 1488 | /* |
@@ -1570,6 +1581,8 @@ check_continued: | |||
1570 | goto repeat; | 1581 | goto repeat; |
1571 | } | 1582 | } |
1572 | retval = -ECHILD; | 1583 | retval = -ECHILD; |
1584 | if (unlikely(denied) && !allowed) | ||
1585 | retval = denied; | ||
1573 | end: | 1586 | end: |
1574 | current->state = TASK_RUNNING; | 1587 | current->state = TASK_RUNNING; |
1575 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1588 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); |
diff --git a/kernel/fork.c b/kernel/fork.c index d154cc7864..b7d169def9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -286,6 +286,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
286 | if (retval) | 286 | if (retval) |
287 | goto out; | 287 | goto out; |
288 | } | 288 | } |
289 | /* a new mm has just been created */ | ||
290 | arch_dup_mmap(oldmm, mm); | ||
289 | retval = 0; | 291 | retval = 0; |
290 | out: | 292 | out: |
291 | up_write(&mm->mmap_sem); | 293 | up_write(&mm->mmap_sem); |
@@ -933,8 +935,8 @@ asmlinkage long sys_set_tid_address(int __user *tidptr) | |||
933 | 935 | ||
934 | static inline void rt_mutex_init_task(struct task_struct *p) | 936 | static inline void rt_mutex_init_task(struct task_struct *p) |
935 | { | 937 | { |
936 | #ifdef CONFIG_RT_MUTEXES | ||
937 | spin_lock_init(&p->pi_lock); | 938 | spin_lock_init(&p->pi_lock); |
939 | #ifdef CONFIG_RT_MUTEXES | ||
938 | plist_head_init(&p->pi_waiters, &p->pi_lock); | 940 | plist_head_init(&p->pi_waiters, &p->pi_lock); |
939 | p->pi_blocked_on = NULL; | 941 | p->pi_blocked_on = NULL; |
940 | #endif | 942 | #endif |
@@ -1423,8 +1425,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long fl | |||
1423 | { | 1425 | { |
1424 | struct sighand_struct *sighand = data; | 1426 | struct sighand_struct *sighand = data; |
1425 | 1427 | ||
1426 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | 1428 | if (flags & SLAB_CTOR_CONSTRUCTOR) |
1427 | SLAB_CTOR_CONSTRUCTOR) | ||
1428 | spin_lock_init(&sighand->siglock); | 1429 | spin_lock_init(&sighand->siglock); |
1429 | } | 1430 | } |
1430 | 1431 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index e749e7df14..5a270b5e3f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -565,6 +565,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
565 | if (!pi_state) | 565 | if (!pi_state) |
566 | return -EINVAL; | 566 | return -EINVAL; |
567 | 567 | ||
568 | spin_lock(&pi_state->pi_mutex.wait_lock); | ||
568 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 569 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
569 | 570 | ||
570 | /* | 571 | /* |
@@ -604,6 +605,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
604 | pi_state->owner = new_owner; | 605 | pi_state->owner = new_owner; |
605 | spin_unlock_irq(&new_owner->pi_lock); | 606 | spin_unlock_irq(&new_owner->pi_lock); |
606 | 607 | ||
608 | spin_unlock(&pi_state->pi_mutex.wait_lock); | ||
607 | rt_mutex_unlock(&pi_state->pi_mutex); | 609 | rt_mutex_unlock(&pi_state->pi_mutex); |
608 | 610 | ||
609 | return 0; | 611 | return 0; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ec4cb9f3e3..1b3033105b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -59,6 +59,7 @@ ktime_t ktime_get(void) | |||
59 | 59 | ||
60 | return timespec_to_ktime(now); | 60 | return timespec_to_ktime(now); |
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(ktime_get); | ||
62 | 63 | ||
63 | /** | 64 | /** |
64 | * ktime_get_real - get the real (wall-) time in ktime_t format | 65 | * ktime_get_real - get the real (wall-) time in ktime_t format |
@@ -135,7 +136,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); | |||
135 | static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) | 136 | static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) |
136 | { | 137 | { |
137 | ktime_t xtim, tomono; | 138 | ktime_t xtim, tomono; |
138 | struct timespec xts; | 139 | struct timespec xts, tom; |
139 | unsigned long seq; | 140 | unsigned long seq; |
140 | 141 | ||
141 | do { | 142 | do { |
@@ -145,10 +146,11 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) | |||
145 | #else | 146 | #else |
146 | xts = xtime; | 147 | xts = xtime; |
147 | #endif | 148 | #endif |
149 | tom = wall_to_monotonic; | ||
148 | } while (read_seqretry(&xtime_lock, seq)); | 150 | } while (read_seqretry(&xtime_lock, seq)); |
149 | 151 | ||
150 | xtim = timespec_to_ktime(xts); | 152 | xtim = timespec_to_ktime(xts); |
151 | tomono = timespec_to_ktime(wall_to_monotonic); | 153 | tomono = timespec_to_ktime(tom); |
152 | base->clock_base[CLOCK_REALTIME].softirq_time = xtim; | 154 | base->clock_base[CLOCK_REALTIME].softirq_time = xtim; |
153 | base->clock_base[CLOCK_MONOTONIC].softirq_time = | 155 | base->clock_base[CLOCK_MONOTONIC].softirq_time = |
154 | ktime_add(xtim, tomono); | 156 | ktime_add(xtim, tomono); |
@@ -277,6 +279,8 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) | |||
277 | 279 | ||
278 | return ktime_add(kt, tmp); | 280 | return ktime_add(kt, tmp); |
279 | } | 281 | } |
282 | |||
283 | EXPORT_SYMBOL_GPL(ktime_add_ns); | ||
280 | # endif /* !CONFIG_KTIME_SCALAR */ | 284 | # endif /* !CONFIG_KTIME_SCALAR */ |
281 | 285 | ||
282 | /* | 286 | /* |
@@ -458,6 +462,18 @@ void clock_was_set(void) | |||
458 | } | 462 | } |
459 | 463 | ||
460 | /* | 464 | /* |
465 | * During resume we might have to reprogram the high resolution timer | ||
466 | * interrupt (on the local CPU): | ||
467 | */ | ||
468 | void hres_timers_resume(void) | ||
469 | { | ||
470 | WARN_ON_ONCE(num_online_cpus() > 1); | ||
471 | |||
472 | /* Retrigger the CPU local events: */ | ||
473 | retrigger_next_event(NULL); | ||
474 | } | ||
475 | |||
476 | /* | ||
461 | * Check, whether the timer is on the callback pending list | 477 | * Check, whether the timer is on the callback pending list |
462 | */ | 478 | */ |
463 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | 479 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) |
@@ -644,6 +660,12 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
644 | orun++; | 660 | orun++; |
645 | } | 661 | } |
646 | timer->expires = ktime_add(timer->expires, interval); | 662 | timer->expires = ktime_add(timer->expires, interval); |
663 | /* | ||
664 | * Make sure, that the result did not wrap with a very large | ||
665 | * interval. | ||
666 | */ | ||
667 | if (timer->expires.tv64 < 0) | ||
668 | timer->expires = ktime_set(KTIME_SEC_MAX, 0); | ||
647 | 669 | ||
648 | return orun; | 670 | return orun; |
649 | } | 671 | } |
@@ -807,7 +829,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
807 | 829 | ||
808 | timer_stats_hrtimer_set_start_info(timer); | 830 | timer_stats_hrtimer_set_start_info(timer); |
809 | 831 | ||
810 | enqueue_hrtimer(timer, new_base, base == new_base); | 832 | /* |
833 | * Only allow reprogramming if the new base is on this CPU. | ||
834 | * (it might still be on another CPU if the timer was pending) | ||
835 | */ | ||
836 | enqueue_hrtimer(timer, new_base, | ||
837 | new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); | ||
811 | 838 | ||
812 | unlock_hrtimer_base(timer, &flags); | 839 | unlock_hrtimer_base(timer, &flags); |
813 | 840 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0133f4f9e9..615ce97c6c 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -11,6 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
14 | #include <linux/msi.h> | ||
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
16 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
@@ -185,6 +186,8 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) | |||
185 | desc = irq_desc + irq; | 186 | desc = irq_desc + irq; |
186 | spin_lock_irqsave(&desc->lock, flags); | 187 | spin_lock_irqsave(&desc->lock, flags); |
187 | desc->msi_desc = entry; | 188 | desc->msi_desc = entry; |
189 | if (entry) | ||
190 | entry->irq = irq; | ||
188 | spin_unlock_irqrestore(&desc->lock, flags); | 191 | spin_unlock_irqrestore(&desc->lock, flags); |
189 | return 0; | 192 | return 0; |
190 | } | 193 | } |
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 85a430da0f..d8ee241115 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
@@ -54,7 +54,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
54 | 54 | ||
55 | rc = request_irq(irq, handler, irqflags, devname, dev_id); | 55 | rc = request_irq(irq, handler, irqflags, devname, dev_id); |
56 | if (rc) { | 56 | if (rc) { |
57 | kfree(dr); | 57 | devres_free(dr); |
58 | return rc; | 58 | return rc; |
59 | } | 59 | } |
60 | 60 | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f294ff4f9..5a0de84097 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -267,27 +267,33 @@ const char *kallsyms_lookup(unsigned long addr, | |||
267 | return NULL; | 267 | return NULL; |
268 | } | 268 | } |
269 | 269 | ||
270 | /* Replace "%s" in format with address, or returns -errno. */ | 270 | /* Look up a kernel symbol and return it in a text buffer. */ |
271 | void __print_symbol(const char *fmt, unsigned long address) | 271 | int sprint_symbol(char *buffer, unsigned long address) |
272 | { | 272 | { |
273 | char *modname; | 273 | char *modname; |
274 | const char *name; | 274 | const char *name; |
275 | unsigned long offset, size; | 275 | unsigned long offset, size; |
276 | char namebuf[KSYM_NAME_LEN+1]; | 276 | char namebuf[KSYM_NAME_LEN+1]; |
277 | char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + | ||
278 | 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1]; | ||
279 | 277 | ||
280 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 278 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
281 | |||
282 | if (!name) | 279 | if (!name) |
283 | sprintf(buffer, "0x%lx", address); | 280 | return sprintf(buffer, "0x%lx", address); |
284 | else { | 281 | else { |
285 | if (modname) | 282 | if (modname) |
286 | sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, | 283 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, |
287 | size, modname); | 284 | size, modname); |
288 | else | 285 | else |
289 | sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); | 286 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); |
290 | } | 287 | } |
288 | } | ||
289 | |||
290 | /* Look up a kernel symbol and print it to the kernel messages. */ | ||
291 | void __print_symbol(const char *fmt, unsigned long address) | ||
292 | { | ||
293 | char buffer[KSYM_SYMBOL_LEN]; | ||
294 | |||
295 | sprint_symbol(buffer, address); | ||
296 | |||
291 | printk(fmt, buffer); | 297 | printk(fmt, buffer); |
292 | } | 298 | } |
293 | 299 | ||
@@ -452,3 +458,4 @@ static int __init kallsyms_init(void) | |||
452 | __initcall(kallsyms_init); | 458 | __initcall(kallsyms_init); |
453 | 459 | ||
454 | EXPORT_SYMBOL(__print_symbol); | 460 | EXPORT_SYMBOL(__print_symbol); |
461 | EXPORT_SYMBOL_GPL(sprint_symbol); | ||
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e0ffe4ab09..559deca5ed 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -24,18 +24,18 @@ static struct subsys_attribute _name##_attr = \ | |||
24 | 24 | ||
25 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) | 25 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) |
26 | /* current uevent sequence number */ | 26 | /* current uevent sequence number */ |
27 | static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) | 27 | static ssize_t uevent_seqnum_show(struct kset *kset, char *page) |
28 | { | 28 | { |
29 | return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum); | 29 | return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum); |
30 | } | 30 | } |
31 | KERNEL_ATTR_RO(uevent_seqnum); | 31 | KERNEL_ATTR_RO(uevent_seqnum); |
32 | 32 | ||
33 | /* uevent helper program, used during early boo */ | 33 | /* uevent helper program, used during early boo */ |
34 | static ssize_t uevent_helper_show(struct subsystem *subsys, char *page) | 34 | static ssize_t uevent_helper_show(struct kset *kset, char *page) |
35 | { | 35 | { |
36 | return sprintf(page, "%s\n", uevent_helper); | 36 | return sprintf(page, "%s\n", uevent_helper); |
37 | } | 37 | } |
38 | static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count) | 38 | static ssize_t uevent_helper_store(struct kset *kset, const char *page, size_t count) |
39 | { | 39 | { |
40 | if (count+1 > UEVENT_HELPER_PATH_LEN) | 40 | if (count+1 > UEVENT_HELPER_PATH_LEN) |
41 | return -ENOENT; | 41 | return -ENOENT; |
@@ -49,13 +49,13 @@ KERNEL_ATTR_RW(uevent_helper); | |||
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | #ifdef CONFIG_KEXEC | 51 | #ifdef CONFIG_KEXEC |
52 | static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) | 52 | static ssize_t kexec_loaded_show(struct kset *kset, char *page) |
53 | { | 53 | { |
54 | return sprintf(page, "%d\n", !!kexec_image); | 54 | return sprintf(page, "%d\n", !!kexec_image); |
55 | } | 55 | } |
56 | KERNEL_ATTR_RO(kexec_loaded); | 56 | KERNEL_ATTR_RO(kexec_loaded); |
57 | 57 | ||
58 | static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) | 58 | static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) |
59 | { | 59 | { |
60 | return sprintf(page, "%d\n", !!kexec_crash_image); | 60 | return sprintf(page, "%d\n", !!kexec_crash_image); |
61 | } | 61 | } |
@@ -85,7 +85,7 @@ static int __init ksysfs_init(void) | |||
85 | { | 85 | { |
86 | int error = subsystem_register(&kernel_subsys); | 86 | int error = subsystem_register(&kernel_subsys); |
87 | if (!error) | 87 | if (!error) |
88 | error = sysfs_create_group(&kernel_subsys.kset.kobj, | 88 | error = sysfs_create_group(&kernel_subsys.kobj, |
89 | &kernel_attr_group); | 89 | &kernel_attr_group); |
90 | 90 | ||
91 | return error; | 91 | return error; |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8dc24c92dc..7065a687ac 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2742,6 +2742,10 @@ void debug_show_all_locks(void) | |||
2742 | int count = 10; | 2742 | int count = 10; |
2743 | int unlock = 1; | 2743 | int unlock = 1; |
2744 | 2744 | ||
2745 | if (unlikely(!debug_locks)) { | ||
2746 | printk("INFO: lockdep is turned off.\n"); | ||
2747 | return; | ||
2748 | } | ||
2745 | printk("\nShowing all locks held in the system:\n"); | 2749 | printk("\nShowing all locks held in the system:\n"); |
2746 | 2750 | ||
2747 | /* | 2751 | /* |
@@ -2785,6 +2789,10 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); | |||
2785 | 2789 | ||
2786 | void debug_show_held_locks(struct task_struct *task) | 2790 | void debug_show_held_locks(struct task_struct *task) |
2787 | { | 2791 | { |
2792 | if (unlikely(!debug_locks)) { | ||
2793 | printk("INFO: lockdep is turned off.\n"); | ||
2794 | return; | ||
2795 | } | ||
2788 | lockdep_print_held_locks(task); | 2796 | lockdep_print_held_locks(task); |
2789 | } | 2797 | } |
2790 | 2798 | ||
diff --git a/kernel/module.c b/kernel/module.c index fbc51de644..1eb8ca565b 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #include <asm/cacheflush.h> | 45 | #include <asm/cacheflush.h> |
46 | #include <linux/license.h> | 46 | #include <linux/license.h> |
47 | 47 | ||
48 | extern int module_sysfs_initialized; | ||
49 | |||
48 | #if 0 | 50 | #if 0 |
49 | #define DEBUGP printk | 51 | #define DEBUGP printk |
50 | #else | 52 | #else |
@@ -346,10 +348,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, | |||
346 | unsigned int i; | 348 | unsigned int i; |
347 | void *ptr; | 349 | void *ptr; |
348 | 350 | ||
349 | if (align > SMP_CACHE_BYTES) { | 351 | if (align > PAGE_SIZE) { |
350 | printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", | 352 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
351 | name, align, SMP_CACHE_BYTES); | 353 | name, align, PAGE_SIZE); |
352 | align = SMP_CACHE_BYTES; | 354 | align = PAGE_SIZE; |
353 | } | 355 | } |
354 | 356 | ||
355 | ptr = __per_cpu_start; | 357 | ptr = __per_cpu_start; |
@@ -430,7 +432,7 @@ static int percpu_modinit(void) | |||
430 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, | 432 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, |
431 | GFP_KERNEL); | 433 | GFP_KERNEL); |
432 | /* Static in-kernel percpu data (used). */ | 434 | /* Static in-kernel percpu data (used). */ |
433 | pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); | 435 | pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); |
434 | /* Free room. */ | 436 | /* Free room. */ |
435 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; | 437 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; |
436 | if (pcpu_size[1] < 0) { | 438 | if (pcpu_size[1] < 0) { |
@@ -1117,8 +1119,8 @@ int mod_sysfs_init(struct module *mod) | |||
1117 | { | 1119 | { |
1118 | int err; | 1120 | int err; |
1119 | 1121 | ||
1120 | if (!module_subsys.kset.subsys) { | 1122 | if (!module_sysfs_initialized) { |
1121 | printk(KERN_ERR "%s: module_subsys not initialized\n", | 1123 | printk(KERN_ERR "%s: module sysfs not initialized\n", |
1122 | mod->name); | 1124 | mod->name); |
1123 | err = -EINVAL; | 1125 | err = -EINVAL; |
1124 | goto out; | 1126 | goto out; |
@@ -1148,8 +1150,10 @@ int mod_sysfs_setup(struct module *mod, | |||
1148 | goto out; | 1150 | goto out; |
1149 | 1151 | ||
1150 | mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); | 1152 | mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); |
1151 | if (!mod->holders_dir) | 1153 | if (!mod->holders_dir) { |
1154 | err = -ENOMEM; | ||
1152 | goto out_unreg; | 1155 | goto out_unreg; |
1156 | } | ||
1153 | 1157 | ||
1154 | err = module_param_sysfs_setup(mod, kparam, num_params); | 1158 | err = module_param_sysfs_setup(mod, kparam, num_params); |
1155 | if (err) | 1159 | if (err) |
@@ -2383,9 +2387,14 @@ void module_add_driver(struct module *mod, struct device_driver *drv) | |||
2383 | struct kobject *mkobj; | 2387 | struct kobject *mkobj; |
2384 | 2388 | ||
2385 | /* Lookup built-in module entry in /sys/modules */ | 2389 | /* Lookup built-in module entry in /sys/modules */ |
2386 | mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); | 2390 | mkobj = kset_find_obj(&module_subsys, drv->mod_name); |
2387 | if (mkobj) | 2391 | if (mkobj) { |
2388 | mk = container_of(mkobj, struct module_kobject, kobj); | 2392 | mk = container_of(mkobj, struct module_kobject, kobj); |
2393 | /* remember our module structure */ | ||
2394 | drv->mkobj = mk; | ||
2395 | /* kset_find_obj took a reference */ | ||
2396 | kobject_put(mkobj); | ||
2397 | } | ||
2389 | } | 2398 | } |
2390 | 2399 | ||
2391 | if (!mk) | 2400 | if (!mk) |
@@ -2405,17 +2414,22 @@ EXPORT_SYMBOL(module_add_driver); | |||
2405 | 2414 | ||
2406 | void module_remove_driver(struct device_driver *drv) | 2415 | void module_remove_driver(struct device_driver *drv) |
2407 | { | 2416 | { |
2417 | struct module_kobject *mk = NULL; | ||
2408 | char *driver_name; | 2418 | char *driver_name; |
2409 | 2419 | ||
2410 | if (!drv) | 2420 | if (!drv) |
2411 | return; | 2421 | return; |
2412 | 2422 | ||
2413 | sysfs_remove_link(&drv->kobj, "module"); | 2423 | sysfs_remove_link(&drv->kobj, "module"); |
2414 | if (drv->owner && drv->owner->mkobj.drivers_dir) { | 2424 | |
2425 | if (drv->owner) | ||
2426 | mk = &drv->owner->mkobj; | ||
2427 | else if (drv->mkobj) | ||
2428 | mk = drv->mkobj; | ||
2429 | if (mk && mk->drivers_dir) { | ||
2415 | driver_name = make_driver_name(drv); | 2430 | driver_name = make_driver_name(drv); |
2416 | if (driver_name) { | 2431 | if (driver_name) { |
2417 | sysfs_remove_link(drv->owner->mkobj.drivers_dir, | 2432 | sysfs_remove_link(mk->drivers_dir, driver_name); |
2418 | driver_name); | ||
2419 | kfree(driver_name); | 2433 | kfree(driver_name); |
2420 | } | 2434 | } |
2421 | } | 2435 | } |
diff --git a/kernel/params.c b/kernel/params.c index e265b13195..312172320b 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -356,6 +356,10 @@ int param_set_copystring(const char *val, struct kernel_param *kp) | |||
356 | { | 356 | { |
357 | struct kparam_string *kps = kp->arg; | 357 | struct kparam_string *kps = kp->arg; |
358 | 358 | ||
359 | if (!val) { | ||
360 | printk(KERN_ERR "%s: missing param set value\n", kp->name); | ||
361 | return -EINVAL; | ||
362 | } | ||
359 | if (strlen(val)+1 > kps->maxlen) { | 363 | if (strlen(val)+1 > kps->maxlen) { |
360 | printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", | 364 | printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", |
361 | kp->name, kps->maxlen-1); | 365 | kp->name, kps->maxlen-1); |
@@ -687,6 +691,7 @@ static struct kset_uevent_ops module_uevent_ops = { | |||
687 | }; | 691 | }; |
688 | 692 | ||
689 | decl_subsys(module, &module_ktype, &module_uevent_ops); | 693 | decl_subsys(module, &module_ktype, &module_uevent_ops); |
694 | int module_sysfs_initialized; | ||
690 | 695 | ||
691 | static struct kobj_type module_ktype = { | 696 | static struct kobj_type module_ktype = { |
692 | .sysfs_ops = &module_sysfs_ops, | 697 | .sysfs_ops = &module_sysfs_ops, |
@@ -705,6 +710,7 @@ static int __init param_sysfs_init(void) | |||
705 | __FILE__, __LINE__, ret); | 710 | __FILE__, __LINE__, ret); |
706 | return ret; | 711 | return ret; |
707 | } | 712 | } |
713 | module_sysfs_initialized = 1; | ||
708 | 714 | ||
709 | param_sysfs_builtin(); | 715 | param_sysfs_builtin(); |
710 | 716 | ||
diff --git a/kernel/pid.c b/kernel/pid.c index 78f2aee90f..9c80bc23d6 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -412,7 +412,5 @@ void __init pidmap_init(void) | |||
412 | set_bit(0, init_pid_ns.pidmap[0].page); | 412 | set_bit(0, init_pid_ns.pidmap[0].page); |
413 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 413 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
414 | 414 | ||
415 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), | 415 | pid_cachep = KMEM_CACHE(pid, SLAB_PANIC); |
416 | __alignof__(struct pid), | ||
417 | SLAB_PANIC, NULL, NULL); | ||
418 | } | 416 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 51a4dd0f1b..877721708f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED | |||
78 | are likely to be bus or driver specific. | 78 | are likely to be bus or driver specific. |
79 | 79 | ||
80 | config SOFTWARE_SUSPEND | 80 | config SOFTWARE_SUSPEND |
81 | bool "Software Suspend" | 81 | bool "Software Suspend (Hibernation)" |
82 | depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) | 82 | depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) |
83 | ---help--- | 83 | ---help--- |
84 | Enable the suspend to disk (STD) functionality. | 84 | Enable the suspend to disk (STD) functionality, which is usually |
85 | called "hibernation" in user interfaces. STD checkpoints the | ||
86 | system and powers it off; and restores that checkpoint on reboot. | ||
85 | 87 | ||
86 | You can suspend your machine with 'echo disk > /sys/power/state'. | 88 | You can suspend your machine with 'echo disk > /sys/power/state'. |
87 | Alternatively, you can use the additional userland tools available | 89 | Alternatively, you can use the additional userland tools available |
88 | from <http://suspend.sf.net>. | 90 | from <http://suspend.sf.net>. |
89 | 91 | ||
90 | In principle it does not require ACPI or APM, although for example | 92 | In principle it does not require ACPI or APM, although for example |
91 | ACPI will be used if available. | 93 | ACPI will be used for the final steps when it is available. One |
94 | of the reasons to use software suspend is that the firmware hooks | ||
95 | for suspend states like suspend-to-RAM (STR) often don't work very | ||
96 | well with Linux. | ||
92 | 97 | ||
93 | It creates an image which is saved in your active swap. Upon the next | 98 | It creates an image which is saved in your active swap. Upon the next |
94 | boot, pass the 'resume=/dev/swappartition' argument to the kernel to | 99 | boot, pass the 'resume=/dev/swappartition' argument to the kernel to |
diff --git a/kernel/power/console.c b/kernel/power/console.c index 623786d441..89bcf4973e 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
@@ -27,7 +27,15 @@ int pm_prepare_console(void) | |||
27 | return 1; | 27 | return 1; |
28 | } | 28 | } |
29 | 29 | ||
30 | set_console(SUSPEND_CONSOLE); | 30 | if (set_console(SUSPEND_CONSOLE)) { |
31 | /* | ||
32 | * We're unable to switch to the SUSPEND_CONSOLE. | ||
33 | * Let the calling function know so it can decide | ||
34 | * what to do. | ||
35 | */ | ||
36 | release_console_sem(); | ||
37 | return 1; | ||
38 | } | ||
31 | release_console_sem(); | 39 | release_console_sem(); |
32 | 40 | ||
33 | if (vt_waitactive(SUSPEND_CONSOLE)) { | 41 | if (vt_waitactive(SUSPEND_CONSOLE)) { |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 406b20adb2..06331374d8 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -39,7 +39,13 @@ static inline int platform_prepare(void) | |||
39 | { | 39 | { |
40 | int error = 0; | 40 | int error = 0; |
41 | 41 | ||
42 | if (pm_disk_mode == PM_DISK_PLATFORM) { | 42 | switch (pm_disk_mode) { |
43 | case PM_DISK_TEST: | ||
44 | case PM_DISK_TESTPROC: | ||
45 | case PM_DISK_SHUTDOWN: | ||
46 | case PM_DISK_REBOOT: | ||
47 | break; | ||
48 | default: | ||
43 | if (pm_ops && pm_ops->prepare) | 49 | if (pm_ops && pm_ops->prepare) |
44 | error = pm_ops->prepare(PM_SUSPEND_DISK); | 50 | error = pm_ops->prepare(PM_SUSPEND_DISK); |
45 | } | 51 | } |
@@ -48,40 +54,48 @@ static inline int platform_prepare(void) | |||
48 | 54 | ||
49 | /** | 55 | /** |
50 | * power_down - Shut machine down for hibernate. | 56 | * power_down - Shut machine down for hibernate. |
51 | * @mode: Suspend-to-disk mode | ||
52 | * | 57 | * |
53 | * Use the platform driver, if configured so, and return gracefully if it | 58 | * Use the platform driver, if configured so; otherwise try |
54 | * fails. | 59 | * to power off or reboot. |
55 | * Otherwise, try to power off and reboot. If they fail, halt the machine, | ||
56 | * there ain't no turning back. | ||
57 | */ | 60 | */ |
58 | 61 | ||
59 | static void power_down(suspend_disk_method_t mode) | 62 | static void power_down(void) |
60 | { | 63 | { |
61 | switch(mode) { | 64 | switch (pm_disk_mode) { |
62 | case PM_DISK_PLATFORM: | 65 | case PM_DISK_TEST: |
63 | if (pm_ops && pm_ops->enter) { | 66 | case PM_DISK_TESTPROC: |
64 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 67 | break; |
65 | pm_ops->enter(PM_SUSPEND_DISK); | ||
66 | break; | ||
67 | } | ||
68 | case PM_DISK_SHUTDOWN: | 68 | case PM_DISK_SHUTDOWN: |
69 | kernel_power_off(); | 69 | kernel_power_off(); |
70 | break; | 70 | break; |
71 | case PM_DISK_REBOOT: | 71 | case PM_DISK_REBOOT: |
72 | kernel_restart(NULL); | 72 | kernel_restart(NULL); |
73 | break; | 73 | break; |
74 | default: | ||
75 | if (pm_ops && pm_ops->enter) { | ||
76 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
77 | pm_ops->enter(PM_SUSPEND_DISK); | ||
78 | break; | ||
79 | } | ||
74 | } | 80 | } |
75 | kernel_halt(); | 81 | kernel_halt(); |
76 | /* Valid image is on the disk, if we continue we risk serious data corruption | 82 | /* |
77 | after resume. */ | 83 | * Valid image is on the disk, if we continue we risk serious data |
84 | * corruption after resume. | ||
85 | */ | ||
78 | printk(KERN_CRIT "Please power me down manually\n"); | 86 | printk(KERN_CRIT "Please power me down manually\n"); |
79 | while(1); | 87 | while(1); |
80 | } | 88 | } |
81 | 89 | ||
82 | static inline void platform_finish(void) | 90 | static inline void platform_finish(void) |
83 | { | 91 | { |
84 | if (pm_disk_mode == PM_DISK_PLATFORM) { | 92 | switch (pm_disk_mode) { |
93 | case PM_DISK_TEST: | ||
94 | case PM_DISK_TESTPROC: | ||
95 | case PM_DISK_SHUTDOWN: | ||
96 | case PM_DISK_REBOOT: | ||
97 | break; | ||
98 | default: | ||
85 | if (pm_ops && pm_ops->finish) | 99 | if (pm_ops && pm_ops->finish) |
86 | pm_ops->finish(PM_SUSPEND_DISK); | 100 | pm_ops->finish(PM_SUSPEND_DISK); |
87 | } | 101 | } |
@@ -108,8 +122,6 @@ static int prepare_processes(void) | |||
108 | /** | 122 | /** |
109 | * pm_suspend_disk - The granpappy of hibernation power management. | 123 | * pm_suspend_disk - The granpappy of hibernation power management. |
110 | * | 124 | * |
111 | * If we're going through the firmware, then get it over with quickly. | ||
112 | * | ||
113 | * If not, then call swsusp to do its thing, then figure out how | 125 | * If not, then call swsusp to do its thing, then figure out how |
114 | * to power down the system. | 126 | * to power down the system. |
115 | */ | 127 | */ |
@@ -118,15 +130,25 @@ int pm_suspend_disk(void) | |||
118 | { | 130 | { |
119 | int error; | 131 | int error; |
120 | 132 | ||
133 | /* The snapshot device should not be opened while we're running */ | ||
134 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | ||
135 | return -EBUSY; | ||
136 | |||
137 | /* Allocate memory management structures */ | ||
138 | error = create_basic_memory_bitmaps(); | ||
139 | if (error) | ||
140 | goto Exit; | ||
141 | |||
121 | error = prepare_processes(); | 142 | error = prepare_processes(); |
122 | if (error) | 143 | if (error) |
123 | return error; | 144 | goto Finish; |
124 | 145 | ||
125 | if (pm_disk_mode == PM_DISK_TESTPROC) { | 146 | if (pm_disk_mode == PM_DISK_TESTPROC) { |
126 | printk("swsusp debug: Waiting for 5 seconds.\n"); | 147 | printk("swsusp debug: Waiting for 5 seconds.\n"); |
127 | mdelay(5000); | 148 | mdelay(5000); |
128 | goto Thaw; | 149 | goto Thaw; |
129 | } | 150 | } |
151 | |||
130 | /* Free memory before shutting down devices. */ | 152 | /* Free memory before shutting down devices. */ |
131 | error = swsusp_shrink_memory(); | 153 | error = swsusp_shrink_memory(); |
132 | if (error) | 154 | if (error) |
@@ -166,7 +188,7 @@ int pm_suspend_disk(void) | |||
166 | pr_debug("PM: writing image.\n"); | 188 | pr_debug("PM: writing image.\n"); |
167 | error = swsusp_write(); | 189 | error = swsusp_write(); |
168 | if (!error) | 190 | if (!error) |
169 | power_down(pm_disk_mode); | 191 | power_down(); |
170 | else { | 192 | else { |
171 | swsusp_free(); | 193 | swsusp_free(); |
172 | goto Thaw; | 194 | goto Thaw; |
@@ -184,6 +206,10 @@ int pm_suspend_disk(void) | |||
184 | resume_console(); | 206 | resume_console(); |
185 | Thaw: | 207 | Thaw: |
186 | unprepare_processes(); | 208 | unprepare_processes(); |
209 | Finish: | ||
210 | free_basic_memory_bitmaps(); | ||
211 | Exit: | ||
212 | atomic_inc(&snapshot_device_available); | ||
187 | return error; | 213 | return error; |
188 | } | 214 | } |
189 | 215 | ||
@@ -227,25 +253,27 @@ static int software_resume(void) | |||
227 | } | 253 | } |
228 | 254 | ||
229 | pr_debug("PM: Checking swsusp image.\n"); | 255 | pr_debug("PM: Checking swsusp image.\n"); |
230 | |||
231 | error = swsusp_check(); | 256 | error = swsusp_check(); |
232 | if (error) | 257 | if (error) |
233 | goto Done; | 258 | goto Unlock; |
234 | 259 | ||
235 | pr_debug("PM: Preparing processes for restore.\n"); | 260 | /* The snapshot device should not be opened while we're running */ |
261 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | ||
262 | error = -EBUSY; | ||
263 | goto Unlock; | ||
264 | } | ||
236 | 265 | ||
266 | error = create_basic_memory_bitmaps(); | ||
267 | if (error) | ||
268 | goto Finish; | ||
269 | |||
270 | pr_debug("PM: Preparing processes for restore.\n"); | ||
237 | error = prepare_processes(); | 271 | error = prepare_processes(); |
238 | if (error) { | 272 | if (error) { |
239 | swsusp_close(); | 273 | swsusp_close(); |
240 | goto Done; | 274 | goto Done; |
241 | } | 275 | } |
242 | 276 | ||
243 | error = platform_prepare(); | ||
244 | if (error) { | ||
245 | swsusp_free(); | ||
246 | goto Thaw; | ||
247 | } | ||
248 | |||
249 | pr_debug("PM: Reading swsusp image.\n"); | 277 | pr_debug("PM: Reading swsusp image.\n"); |
250 | 278 | ||
251 | error = swsusp_read(); | 279 | error = swsusp_read(); |
@@ -268,14 +296,17 @@ static int software_resume(void) | |||
268 | enable_nonboot_cpus(); | 296 | enable_nonboot_cpus(); |
269 | Free: | 297 | Free: |
270 | swsusp_free(); | 298 | swsusp_free(); |
271 | platform_finish(); | ||
272 | device_resume(); | 299 | device_resume(); |
273 | resume_console(); | 300 | resume_console(); |
274 | Thaw: | 301 | Thaw: |
275 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); | 302 | printk(KERN_ERR "PM: Restore failed, recovering.\n"); |
276 | unprepare_processes(); | 303 | unprepare_processes(); |
277 | Done: | 304 | Done: |
305 | free_basic_memory_bitmaps(); | ||
306 | Finish: | ||
307 | atomic_inc(&snapshot_device_available); | ||
278 | /* For success case, the suspend path will release the lock */ | 308 | /* For success case, the suspend path will release the lock */ |
309 | Unlock: | ||
279 | mutex_unlock(&pm_mutex); | 310 | mutex_unlock(&pm_mutex); |
280 | pr_debug("PM: Resume from disk failed.\n"); | 311 | pr_debug("PM: Resume from disk failed.\n"); |
281 | return 0; | 312 | return 0; |
@@ -285,7 +316,6 @@ late_initcall(software_resume); | |||
285 | 316 | ||
286 | 317 | ||
287 | static const char * const pm_disk_modes[] = { | 318 | static const char * const pm_disk_modes[] = { |
288 | [PM_DISK_FIRMWARE] = "firmware", | ||
289 | [PM_DISK_PLATFORM] = "platform", | 319 | [PM_DISK_PLATFORM] = "platform", |
290 | [PM_DISK_SHUTDOWN] = "shutdown", | 320 | [PM_DISK_SHUTDOWN] = "shutdown", |
291 | [PM_DISK_REBOOT] = "reboot", | 321 | [PM_DISK_REBOOT] = "reboot", |
@@ -296,37 +326,62 @@ static const char * const pm_disk_modes[] = { | |||
296 | /** | 326 | /** |
297 | * disk - Control suspend-to-disk mode | 327 | * disk - Control suspend-to-disk mode |
298 | * | 328 | * |
299 | * Suspend-to-disk can be handled in several ways. The greatest | 329 | * Suspend-to-disk can be handled in several ways. We have a few options |
300 | * distinction is who writes memory to disk - the firmware or the OS. | 330 | * for putting the system to sleep - using the platform driver (e.g. ACPI |
301 | * If the firmware does it, we assume that it also handles suspending | 331 | * or other pm_ops), powering off the system or rebooting the system |
302 | * the system. | 332 | * (for testing) as well as the two test modes. |
303 | * If the OS does it, then we have three options for putting the system | ||
304 | * to sleep - using the platform driver (e.g. ACPI or other PM registers), | ||
305 | * powering off the system or rebooting the system (for testing). | ||
306 | * | 333 | * |
307 | * The system will support either 'firmware' or 'platform', and that is | 334 | * The system can support 'platform', and that is known a priori (and |
308 | * known a priori (and encoded in pm_ops). But, the user may choose | 335 | * encoded in pm_ops). However, the user may choose 'shutdown' or 'reboot' |
309 | * 'shutdown' or 'reboot' as alternatives. | 336 | * as alternatives, as well as the test modes 'test' and 'testproc'. |
310 | * | 337 | * |
311 | * show() will display what the mode is currently set to. | 338 | * show() will display what the mode is currently set to. |
312 | * store() will accept one of | 339 | * store() will accept one of |
313 | * | 340 | * |
314 | * 'firmware' | ||
315 | * 'platform' | 341 | * 'platform' |
316 | * 'shutdown' | 342 | * 'shutdown' |
317 | * 'reboot' | 343 | * 'reboot' |
344 | * 'test' | ||
345 | * 'testproc' | ||
318 | * | 346 | * |
319 | * It will only change to 'firmware' or 'platform' if the system | 347 | * It will only change to 'platform' if the system |
320 | * supports it (as determined from pm_ops->pm_disk_mode). | 348 | * supports it (as determined from pm_ops->pm_disk_mode). |
321 | */ | 349 | */ |
322 | 350 | ||
323 | static ssize_t disk_show(struct subsystem * subsys, char * buf) | 351 | static ssize_t disk_show(struct kset *kset, char *buf) |
324 | { | 352 | { |
325 | return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); | 353 | int i; |
354 | char *start = buf; | ||
355 | |||
356 | for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { | ||
357 | if (!pm_disk_modes[i]) | ||
358 | continue; | ||
359 | switch (i) { | ||
360 | case PM_DISK_SHUTDOWN: | ||
361 | case PM_DISK_REBOOT: | ||
362 | case PM_DISK_TEST: | ||
363 | case PM_DISK_TESTPROC: | ||
364 | break; | ||
365 | default: | ||
366 | if (pm_ops && pm_ops->enter && | ||
367 | (i == pm_ops->pm_disk_mode)) | ||
368 | break; | ||
369 | /* not a valid mode, continue with loop */ | ||
370 | continue; | ||
371 | } | ||
372 | if (i == pm_disk_mode) | ||
373 | buf += sprintf(buf, "[%s]", pm_disk_modes[i]); | ||
374 | else | ||
375 | buf += sprintf(buf, "%s", pm_disk_modes[i]); | ||
376 | if (i+1 != PM_DISK_MAX) | ||
377 | buf += sprintf(buf, " "); | ||
378 | } | ||
379 | buf += sprintf(buf, "\n"); | ||
380 | return buf-start; | ||
326 | } | 381 | } |
327 | 382 | ||
328 | 383 | ||
329 | static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | 384 | static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) |
330 | { | 385 | { |
331 | int error = 0; | 386 | int error = 0; |
332 | int i; | 387 | int i; |
@@ -338,17 +393,21 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | |||
338 | len = p ? p - buf : n; | 393 | len = p ? p - buf : n; |
339 | 394 | ||
340 | mutex_lock(&pm_mutex); | 395 | mutex_lock(&pm_mutex); |
341 | for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { | 396 | for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { |
342 | if (!strncmp(buf, pm_disk_modes[i], len)) { | 397 | if (!strncmp(buf, pm_disk_modes[i], len)) { |
343 | mode = i; | 398 | mode = i; |
344 | break; | 399 | break; |
345 | } | 400 | } |
346 | } | 401 | } |
347 | if (mode) { | 402 | if (mode) { |
348 | if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT || | 403 | switch (mode) { |
349 | mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) { | 404 | case PM_DISK_SHUTDOWN: |
405 | case PM_DISK_REBOOT: | ||
406 | case PM_DISK_TEST: | ||
407 | case PM_DISK_TESTPROC: | ||
350 | pm_disk_mode = mode; | 408 | pm_disk_mode = mode; |
351 | } else { | 409 | break; |
410 | default: | ||
352 | if (pm_ops && pm_ops->enter && | 411 | if (pm_ops && pm_ops->enter && |
353 | (mode == pm_ops->pm_disk_mode)) | 412 | (mode == pm_ops->pm_disk_mode)) |
354 | pm_disk_mode = mode; | 413 | pm_disk_mode = mode; |
@@ -367,13 +426,13 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | |||
367 | 426 | ||
368 | power_attr(disk); | 427 | power_attr(disk); |
369 | 428 | ||
370 | static ssize_t resume_show(struct subsystem * subsys, char *buf) | 429 | static ssize_t resume_show(struct kset *kset, char *buf) |
371 | { | 430 | { |
372 | return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), | 431 | return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), |
373 | MINOR(swsusp_resume_device)); | 432 | MINOR(swsusp_resume_device)); |
374 | } | 433 | } |
375 | 434 | ||
376 | static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) | 435 | static ssize_t resume_store(struct kset *kset, const char *buf, size_t n) |
377 | { | 436 | { |
378 | unsigned int maj, min; | 437 | unsigned int maj, min; |
379 | dev_t res; | 438 | dev_t res; |
@@ -399,12 +458,12 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) | |||
399 | 458 | ||
400 | power_attr(resume); | 459 | power_attr(resume); |
401 | 460 | ||
402 | static ssize_t image_size_show(struct subsystem * subsys, char *buf) | 461 | static ssize_t image_size_show(struct kset *kset, char *buf) |
403 | { | 462 | { |
404 | return sprintf(buf, "%lu\n", image_size); | 463 | return sprintf(buf, "%lu\n", image_size); |
405 | } | 464 | } |
406 | 465 | ||
407 | static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) | 466 | static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n) |
408 | { | 467 | { |
409 | unsigned long size; | 468 | unsigned long size; |
410 | 469 | ||
@@ -433,7 +492,7 @@ static struct attribute_group attr_group = { | |||
433 | 492 | ||
434 | static int __init pm_disk_init(void) | 493 | static int __init pm_disk_init(void) |
435 | { | 494 | { |
436 | return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); | 495 | return sysfs_create_group(&power_subsys.kobj, &attr_group); |
437 | } | 496 | } |
438 | 497 | ||
439 | core_initcall(pm_disk_init); | 498 | core_initcall(pm_disk_init); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index a064dfd887..f6dda685e7 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -30,7 +30,7 @@ | |||
30 | DEFINE_MUTEX(pm_mutex); | 30 | DEFINE_MUTEX(pm_mutex); |
31 | 31 | ||
32 | struct pm_ops *pm_ops; | 32 | struct pm_ops *pm_ops; |
33 | suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM; | 33 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; |
34 | 34 | ||
35 | /** | 35 | /** |
36 | * pm_set_ops - Set the global power method table. | 36 | * pm_set_ops - Set the global power method table. |
@@ -41,9 +41,26 @@ void pm_set_ops(struct pm_ops * ops) | |||
41 | { | 41 | { |
42 | mutex_lock(&pm_mutex); | 42 | mutex_lock(&pm_mutex); |
43 | pm_ops = ops; | 43 | pm_ops = ops; |
44 | if (ops && ops->pm_disk_mode != PM_DISK_INVALID) { | ||
45 | pm_disk_mode = ops->pm_disk_mode; | ||
46 | } else | ||
47 | pm_disk_mode = PM_DISK_SHUTDOWN; | ||
44 | mutex_unlock(&pm_mutex); | 48 | mutex_unlock(&pm_mutex); |
45 | } | 49 | } |
46 | 50 | ||
51 | /** | ||
52 | * pm_valid_only_mem - generic memory-only valid callback | ||
53 | * | ||
54 | * pm_ops drivers that implement mem suspend only and only need | ||
55 | * to check for that in their .valid callback can use this instead | ||
56 | * of rolling their own .valid callback. | ||
57 | */ | ||
58 | int pm_valid_only_mem(suspend_state_t state) | ||
59 | { | ||
60 | return state == PM_SUSPEND_MEM; | ||
61 | } | ||
62 | |||
63 | |||
47 | static inline void pm_finish(suspend_state_t state) | 64 | static inline void pm_finish(suspend_state_t state) |
48 | { | 65 | { |
49 | if (pm_ops->finish) | 66 | if (pm_ops->finish) |
@@ -111,13 +128,24 @@ static int suspend_prepare(suspend_state_t state) | |||
111 | return error; | 128 | return error; |
112 | } | 129 | } |
113 | 130 | ||
131 | /* default implementation */ | ||
132 | void __attribute__ ((weak)) arch_suspend_disable_irqs(void) | ||
133 | { | ||
134 | local_irq_disable(); | ||
135 | } | ||
136 | |||
137 | /* default implementation */ | ||
138 | void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | ||
139 | { | ||
140 | local_irq_enable(); | ||
141 | } | ||
114 | 142 | ||
115 | int suspend_enter(suspend_state_t state) | 143 | int suspend_enter(suspend_state_t state) |
116 | { | 144 | { |
117 | int error = 0; | 145 | int error = 0; |
118 | unsigned long flags; | ||
119 | 146 | ||
120 | local_irq_save(flags); | 147 | arch_suspend_disable_irqs(); |
148 | BUG_ON(!irqs_disabled()); | ||
121 | 149 | ||
122 | if ((error = device_power_down(PMSG_SUSPEND))) { | 150 | if ((error = device_power_down(PMSG_SUSPEND))) { |
123 | printk(KERN_ERR "Some devices failed to power down\n"); | 151 | printk(KERN_ERR "Some devices failed to power down\n"); |
@@ -126,7 +154,8 @@ int suspend_enter(suspend_state_t state) | |||
126 | error = pm_ops->enter(state); | 154 | error = pm_ops->enter(state); |
127 | device_power_up(); | 155 | device_power_up(); |
128 | Done: | 156 | Done: |
129 | local_irq_restore(flags); | 157 | arch_suspend_enable_irqs(); |
158 | BUG_ON(irqs_disabled()); | ||
130 | return error; | 159 | return error; |
131 | } | 160 | } |
132 | 161 | ||
@@ -155,22 +184,26 @@ static void suspend_finish(suspend_state_t state) | |||
155 | static const char * const pm_states[PM_SUSPEND_MAX] = { | 184 | static const char * const pm_states[PM_SUSPEND_MAX] = { |
156 | [PM_SUSPEND_STANDBY] = "standby", | 185 | [PM_SUSPEND_STANDBY] = "standby", |
157 | [PM_SUSPEND_MEM] = "mem", | 186 | [PM_SUSPEND_MEM] = "mem", |
158 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
159 | [PM_SUSPEND_DISK] = "disk", | 187 | [PM_SUSPEND_DISK] = "disk", |
160 | #endif | ||
161 | }; | 188 | }; |
162 | 189 | ||
163 | static inline int valid_state(suspend_state_t state) | 190 | static inline int valid_state(suspend_state_t state) |
164 | { | 191 | { |
165 | /* Suspend-to-disk does not really need low-level support. | 192 | /* Suspend-to-disk does not really need low-level support. |
166 | * It can work with reboot if needed. */ | 193 | * It can work with shutdown/reboot if needed. If it isn't |
194 | * configured, then it cannot be supported. | ||
195 | */ | ||
167 | if (state == PM_SUSPEND_DISK) | 196 | if (state == PM_SUSPEND_DISK) |
197 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
168 | return 1; | 198 | return 1; |
199 | #else | ||
200 | return 0; | ||
201 | #endif | ||
169 | 202 | ||
170 | /* all other states need lowlevel support and need to be | 203 | /* all other states need lowlevel support and need to be |
171 | * valid to the lowlevel implementation, no valid callback | 204 | * valid to the lowlevel implementation, no valid callback |
172 | * implies that all are valid. */ | 205 | * implies that none are valid. */ |
173 | if (!pm_ops || (pm_ops->valid && !pm_ops->valid(state))) | 206 | if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state)) |
174 | return 0; | 207 | return 0; |
175 | return 1; | 208 | return 1; |
176 | } | 209 | } |
@@ -215,15 +248,6 @@ static int enter_state(suspend_state_t state) | |||
215 | return error; | 248 | return error; |
216 | } | 249 | } |
217 | 250 | ||
218 | /* | ||
219 | * This is main interface to the outside world. It needs to be | ||
220 | * called from process context. | ||
221 | */ | ||
222 | int software_suspend(void) | ||
223 | { | ||
224 | return enter_state(PM_SUSPEND_DISK); | ||
225 | } | ||
226 | |||
227 | 251 | ||
228 | /** | 252 | /** |
229 | * pm_suspend - Externally visible function for suspending system. | 253 | * pm_suspend - Externally visible function for suspending system. |
@@ -256,7 +280,7 @@ decl_subsys(power,NULL,NULL); | |||
256 | * proper enumerated value, and initiates a suspend transition. | 280 | * proper enumerated value, and initiates a suspend transition. |
257 | */ | 281 | */ |
258 | 282 | ||
259 | static ssize_t state_show(struct subsystem * subsys, char * buf) | 283 | static ssize_t state_show(struct kset *kset, char *buf) |
260 | { | 284 | { |
261 | int i; | 285 | int i; |
262 | char * s = buf; | 286 | char * s = buf; |
@@ -269,7 +293,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) | |||
269 | return (s - buf); | 293 | return (s - buf); |
270 | } | 294 | } |
271 | 295 | ||
272 | static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) | 296 | static ssize_t state_store(struct kset *kset, const char *buf, size_t n) |
273 | { | 297 | { |
274 | suspend_state_t state = PM_SUSPEND_STANDBY; | 298 | suspend_state_t state = PM_SUSPEND_STANDBY; |
275 | const char * const *s; | 299 | const char * const *s; |
@@ -296,13 +320,13 @@ power_attr(state); | |||
296 | #ifdef CONFIG_PM_TRACE | 320 | #ifdef CONFIG_PM_TRACE |
297 | int pm_trace_enabled; | 321 | int pm_trace_enabled; |
298 | 322 | ||
299 | static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) | 323 | static ssize_t pm_trace_show(struct kset *kset, char *buf) |
300 | { | 324 | { |
301 | return sprintf(buf, "%d\n", pm_trace_enabled); | 325 | return sprintf(buf, "%d\n", pm_trace_enabled); |
302 | } | 326 | } |
303 | 327 | ||
304 | static ssize_t | 328 | static ssize_t |
305 | pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) | 329 | pm_trace_store(struct kset *kset, const char *buf, size_t n) |
306 | { | 330 | { |
307 | int val; | 331 | int val; |
308 | 332 | ||
@@ -336,7 +360,7 @@ static int __init pm_init(void) | |||
336 | { | 360 | { |
337 | int error = subsystem_register(&power_subsys); | 361 | int error = subsystem_register(&power_subsys); |
338 | if (!error) | 362 | if (!error) |
339 | error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); | 363 | error = sysfs_create_group(&power_subsys.kobj,&attr_group); |
340 | return error; | 364 | return error; |
341 | } | 365 | } |
342 | 366 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816b..34b4354278 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -14,8 +14,18 @@ struct swsusp_info { | |||
14 | 14 | ||
15 | 15 | ||
16 | #ifdef CONFIG_SOFTWARE_SUSPEND | 16 | #ifdef CONFIG_SOFTWARE_SUSPEND |
17 | extern int pm_suspend_disk(void); | 17 | /* |
18 | * Keep some memory free so that I/O operations can succeed without paging | ||
19 | * [Might this be more than 4 MB?] | ||
20 | */ | ||
21 | #define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) | ||
22 | /* | ||
23 | * Keep 1 MB of memory free so that device drivers can allocate some pages in | ||
24 | * their .suspend() routines without breaking the suspend to disk. | ||
25 | */ | ||
26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) | ||
18 | 27 | ||
28 | extern int pm_suspend_disk(void); | ||
19 | #else | 29 | #else |
20 | static inline int pm_suspend_disk(void) | 30 | static inline int pm_suspend_disk(void) |
21 | { | 31 | { |
@@ -23,6 +33,8 @@ static inline int pm_suspend_disk(void) | |||
23 | } | 33 | } |
24 | #endif | 34 | #endif |
25 | 35 | ||
36 | extern int pfn_is_nosave(unsigned long); | ||
37 | |||
26 | extern struct mutex pm_mutex; | 38 | extern struct mutex pm_mutex; |
27 | 39 | ||
28 | #define power_attr(_name) \ | 40 | #define power_attr(_name) \ |
@@ -35,10 +47,7 @@ static struct subsys_attribute _name##_attr = { \ | |||
35 | .store = _name##_store, \ | 47 | .store = _name##_store, \ |
36 | } | 48 | } |
37 | 49 | ||
38 | extern struct subsystem power_subsys; | 50 | extern struct kset power_subsys; |
39 | |||
40 | /* References to section boundaries */ | ||
41 | extern const void __nosave_begin, __nosave_end; | ||
42 | 51 | ||
43 | /* Preferred image size in bytes (default 500 MB) */ | 52 | /* Preferred image size in bytes (default 500 MB) */ |
44 | extern unsigned long image_size; | 53 | extern unsigned long image_size; |
@@ -49,6 +58,8 @@ extern sector_t swsusp_resume_block; | |||
49 | extern asmlinkage int swsusp_arch_suspend(void); | 58 | extern asmlinkage int swsusp_arch_suspend(void); |
50 | extern asmlinkage int swsusp_arch_resume(void); | 59 | extern asmlinkage int swsusp_arch_resume(void); |
51 | 60 | ||
61 | extern int create_basic_memory_bitmaps(void); | ||
62 | extern void free_basic_memory_bitmaps(void); | ||
52 | extern unsigned int count_data_pages(void); | 63 | extern unsigned int count_data_pages(void); |
53 | 64 | ||
54 | /** | 65 | /** |
@@ -139,30 +150,12 @@ struct resume_swap_area { | |||
139 | #define PMOPS_ENTER 2 | 150 | #define PMOPS_ENTER 2 |
140 | #define PMOPS_FINISH 3 | 151 | #define PMOPS_FINISH 3 |
141 | 152 | ||
142 | /** | 153 | /* If unset, the snapshot device cannot be open. */ |
143 | * The bitmap is used for tracing allocated swap pages | 154 | extern atomic_t snapshot_device_available; |
144 | * | ||
145 | * The entire bitmap consists of a number of bitmap_page | ||
146 | * structures linked with the help of the .next member. | ||
147 | * Thus each page can be allocated individually, so we only | ||
148 | * need to make 0-order memory allocations to create | ||
149 | * the bitmap. | ||
150 | */ | ||
151 | |||
152 | #define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *)) | ||
153 | #define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long)) | ||
154 | #define BITS_PER_CHUNK (sizeof(long) * 8) | ||
155 | #define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK) | ||
156 | |||
157 | struct bitmap_page { | ||
158 | unsigned long chunks[BITMAP_PAGE_CHUNKS]; | ||
159 | struct bitmap_page *next; | ||
160 | }; | ||
161 | 155 | ||
162 | extern void free_bitmap(struct bitmap_page *bitmap); | 156 | extern sector_t alloc_swapdev_block(int swap); |
163 | extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); | 157 | extern void free_all_swap_pages(int swap); |
164 | extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); | 158 | extern int swsusp_swap_in_use(void); |
165 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); | ||
166 | 159 | ||
167 | extern int swsusp_check(void); | 160 | extern int swsusp_check(void); |
168 | extern int swsusp_shrink_memory(void); | 161 | extern int swsusp_shrink_memory(void); |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 6d566bf708..0eb5c420e8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -47,8 +47,10 @@ void refrigerator(void) | |||
47 | recalc_sigpending(); /* We sent fake signal, clean it up */ | 47 | recalc_sigpending(); /* We sent fake signal, clean it up */ |
48 | spin_unlock_irq(¤t->sighand->siglock); | 48 | spin_unlock_irq(¤t->sighand->siglock); |
49 | 49 | ||
50 | while (frozen(current)) { | 50 | for (;;) { |
51 | current->state = TASK_UNINTERRUPTIBLE; | 51 | set_current_state(TASK_UNINTERRUPTIBLE); |
52 | if (!frozen(current)) | ||
53 | break; | ||
52 | schedule(); | 54 | schedule(); |
53 | } | 55 | } |
54 | pr_debug("%s left refrigerator\n", current->comm); | 56 | pr_debug("%s left refrigerator\n", current->comm); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fc53ad0681..128da11f01 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
22 | #include <linux/pm.h> | 22 | #include <linux/pm.h> |
23 | #include <linux/device.h> | 23 | #include <linux/device.h> |
24 | #include <linux/init.h> | ||
24 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
25 | #include <linux/syscalls.h> | 26 | #include <linux/syscalls.h> |
26 | #include <linux/console.h> | 27 | #include <linux/console.h> |
@@ -34,6 +35,10 @@ | |||
34 | 35 | ||
35 | #include "power.h" | 36 | #include "power.h" |
36 | 37 | ||
38 | static int swsusp_page_is_free(struct page *); | ||
39 | static void swsusp_set_page_forbidden(struct page *); | ||
40 | static void swsusp_unset_page_forbidden(struct page *); | ||
41 | |||
37 | /* List of PBEs needed for restoring the pages that were allocated before | 42 | /* List of PBEs needed for restoring the pages that were allocated before |
38 | * the suspend and included in the suspend image, but have also been | 43 | * the suspend and included in the suspend image, but have also been |
39 | * allocated by the "resume" kernel, so their contents cannot be written | 44 | * allocated by the "resume" kernel, so their contents cannot be written |
@@ -67,15 +72,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) | |||
67 | 72 | ||
68 | res = (void *)get_zeroed_page(gfp_mask); | 73 | res = (void *)get_zeroed_page(gfp_mask); |
69 | if (safe_needed) | 74 | if (safe_needed) |
70 | while (res && PageNosaveFree(virt_to_page(res))) { | 75 | while (res && swsusp_page_is_free(virt_to_page(res))) { |
71 | /* The page is unsafe, mark it for swsusp_free() */ | 76 | /* The page is unsafe, mark it for swsusp_free() */ |
72 | SetPageNosave(virt_to_page(res)); | 77 | swsusp_set_page_forbidden(virt_to_page(res)); |
73 | allocated_unsafe_pages++; | 78 | allocated_unsafe_pages++; |
74 | res = (void *)get_zeroed_page(gfp_mask); | 79 | res = (void *)get_zeroed_page(gfp_mask); |
75 | } | 80 | } |
76 | if (res) { | 81 | if (res) { |
77 | SetPageNosave(virt_to_page(res)); | 82 | swsusp_set_page_forbidden(virt_to_page(res)); |
78 | SetPageNosaveFree(virt_to_page(res)); | 83 | swsusp_set_page_free(virt_to_page(res)); |
79 | } | 84 | } |
80 | return res; | 85 | return res; |
81 | } | 86 | } |
@@ -91,8 +96,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask) | |||
91 | 96 | ||
92 | page = alloc_page(gfp_mask); | 97 | page = alloc_page(gfp_mask); |
93 | if (page) { | 98 | if (page) { |
94 | SetPageNosave(page); | 99 | swsusp_set_page_forbidden(page); |
95 | SetPageNosaveFree(page); | 100 | swsusp_set_page_free(page); |
96 | } | 101 | } |
97 | return page; | 102 | return page; |
98 | } | 103 | } |
@@ -110,9 +115,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free) | |||
110 | 115 | ||
111 | page = virt_to_page(addr); | 116 | page = virt_to_page(addr); |
112 | 117 | ||
113 | ClearPageNosave(page); | 118 | swsusp_unset_page_forbidden(page); |
114 | if (clear_nosave_free) | 119 | if (clear_nosave_free) |
115 | ClearPageNosaveFree(page); | 120 | swsusp_unset_page_free(page); |
116 | 121 | ||
117 | __free_page(page); | 122 | __free_page(page); |
118 | } | 123 | } |
@@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | |||
224 | * of type unsigned long each). It also contains the pfns that | 229 | * of type unsigned long each). It also contains the pfns that |
225 | * correspond to the start and end of the represented memory area and | 230 | * correspond to the start and end of the represented memory area and |
226 | * the number of bit chunks in the block. | 231 | * the number of bit chunks in the block. |
227 | * | ||
228 | * NOTE: Memory bitmaps are used for two types of operations only: | ||
229 | * "set a bit" and "find the next bit set". Moreover, the searching | ||
230 | * is always carried out after all of the "set a bit" operations | ||
231 | * on given bitmap. | ||
232 | */ | 232 | */ |
233 | 233 | ||
234 | #define BM_END_OF_MAP (~0UL) | 234 | #define BM_END_OF_MAP (~0UL) |
@@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |||
443 | } | 443 | } |
444 | 444 | ||
445 | /** | 445 | /** |
446 | * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds | 446 | * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds |
447 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member | 447 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member |
448 | * of @bm->cur_zone_bm are updated. | 448 | * of @bm->cur_zone_bm are updated. |
449 | * | ||
450 | * If the bit cannot be set, the function returns -EINVAL . | ||
451 | */ | 449 | */ |
452 | 450 | ||
453 | static int | 451 | static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
454 | memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | 452 | void **addr, unsigned int *bit_nr) |
455 | { | 453 | { |
456 | struct zone_bitmap *zone_bm; | 454 | struct zone_bitmap *zone_bm; |
457 | struct bm_block *bb; | 455 | struct bm_block *bb; |
@@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
463 | /* We don't assume that the zones are sorted by pfns */ | 461 | /* We don't assume that the zones are sorted by pfns */ |
464 | while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { | 462 | while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { |
465 | zone_bm = zone_bm->next; | 463 | zone_bm = zone_bm->next; |
466 | if (unlikely(!zone_bm)) | 464 | |
467 | return -EINVAL; | 465 | BUG_ON(!zone_bm); |
468 | } | 466 | } |
469 | bm->cur.zone_bm = zone_bm; | 467 | bm->cur.zone_bm = zone_bm; |
470 | } | 468 | } |
@@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
475 | 473 | ||
476 | while (pfn >= bb->end_pfn) { | 474 | while (pfn >= bb->end_pfn) { |
477 | bb = bb->next; | 475 | bb = bb->next; |
478 | if (unlikely(!bb)) | 476 | |
479 | return -EINVAL; | 477 | BUG_ON(!bb); |
480 | } | 478 | } |
481 | zone_bm->cur_block = bb; | 479 | zone_bm->cur_block = bb; |
482 | pfn -= bb->start_pfn; | 480 | pfn -= bb->start_pfn; |
483 | set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); | 481 | *bit_nr = pfn % BM_BITS_PER_CHUNK; |
484 | return 0; | 482 | *addr = bb->data + pfn / BM_BITS_PER_CHUNK; |
483 | } | ||
484 | |||
485 | static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | ||
486 | { | ||
487 | void *addr; | ||
488 | unsigned int bit; | ||
489 | |||
490 | memory_bm_find_bit(bm, pfn, &addr, &bit); | ||
491 | set_bit(bit, addr); | ||
492 | } | ||
493 | |||
494 | static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | ||
495 | { | ||
496 | void *addr; | ||
497 | unsigned int bit; | ||
498 | |||
499 | memory_bm_find_bit(bm, pfn, &addr, &bit); | ||
500 | clear_bit(bit, addr); | ||
501 | } | ||
502 | |||
503 | static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | ||
504 | { | ||
505 | void *addr; | ||
506 | unsigned int bit; | ||
507 | |||
508 | memory_bm_find_bit(bm, pfn, &addr, &bit); | ||
509 | return test_bit(bit, addr); | ||
485 | } | 510 | } |
486 | 511 | ||
487 | /* Two auxiliary functions for memory_bm_next_pfn */ | 512 | /* Two auxiliary functions for memory_bm_next_pfn */ |
@@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |||
564 | } | 589 | } |
565 | 590 | ||
566 | /** | 591 | /** |
592 | * This structure represents a range of page frames the contents of which | ||
593 | * should not be saved during the suspend. | ||
594 | */ | ||
595 | |||
596 | struct nosave_region { | ||
597 | struct list_head list; | ||
598 | unsigned long start_pfn; | ||
599 | unsigned long end_pfn; | ||
600 | }; | ||
601 | |||
602 | static LIST_HEAD(nosave_regions); | ||
603 | |||
604 | /** | ||
605 | * register_nosave_region - register a range of page frames the contents | ||
606 | * of which should not be saved during the suspend (to be used in the early | ||
607 | * initialization code) | ||
608 | */ | ||
609 | |||
610 | void __init | ||
611 | register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) | ||
612 | { | ||
613 | struct nosave_region *region; | ||
614 | |||
615 | if (start_pfn >= end_pfn) | ||
616 | return; | ||
617 | |||
618 | if (!list_empty(&nosave_regions)) { | ||
619 | /* Try to extend the previous region (they should be sorted) */ | ||
620 | region = list_entry(nosave_regions.prev, | ||
621 | struct nosave_region, list); | ||
622 | if (region->end_pfn == start_pfn) { | ||
623 | region->end_pfn = end_pfn; | ||
624 | goto Report; | ||
625 | } | ||
626 | } | ||
627 | /* This allocation cannot fail */ | ||
628 | region = alloc_bootmem_low(sizeof(struct nosave_region)); | ||
629 | region->start_pfn = start_pfn; | ||
630 | region->end_pfn = end_pfn; | ||
631 | list_add_tail(®ion->list, &nosave_regions); | ||
632 | Report: | ||
633 | printk("swsusp: Registered nosave memory region: %016lx - %016lx\n", | ||
634 | start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | ||
635 | } | ||
636 | |||
637 | /* | ||
638 | * Set bits in this map correspond to the page frames the contents of which | ||
639 | * should not be saved during the suspend. | ||
640 | */ | ||
641 | static struct memory_bitmap *forbidden_pages_map; | ||
642 | |||
643 | /* Set bits in this map correspond to free page frames. */ | ||
644 | static struct memory_bitmap *free_pages_map; | ||
645 | |||
646 | /* | ||
647 | * Each page frame allocated for creating the image is marked by setting the | ||
648 | * corresponding bits in forbidden_pages_map and free_pages_map simultaneously | ||
649 | */ | ||
650 | |||
651 | void swsusp_set_page_free(struct page *page) | ||
652 | { | ||
653 | if (free_pages_map) | ||
654 | memory_bm_set_bit(free_pages_map, page_to_pfn(page)); | ||
655 | } | ||
656 | |||
657 | static int swsusp_page_is_free(struct page *page) | ||
658 | { | ||
659 | return free_pages_map ? | ||
660 | memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; | ||
661 | } | ||
662 | |||
663 | void swsusp_unset_page_free(struct page *page) | ||
664 | { | ||
665 | if (free_pages_map) | ||
666 | memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); | ||
667 | } | ||
668 | |||
669 | static void swsusp_set_page_forbidden(struct page *page) | ||
670 | { | ||
671 | if (forbidden_pages_map) | ||
672 | memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); | ||
673 | } | ||
674 | |||
675 | int swsusp_page_is_forbidden(struct page *page) | ||
676 | { | ||
677 | return forbidden_pages_map ? | ||
678 | memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; | ||
679 | } | ||
680 | |||
681 | static void swsusp_unset_page_forbidden(struct page *page) | ||
682 | { | ||
683 | if (forbidden_pages_map) | ||
684 | memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); | ||
685 | } | ||
686 | |||
687 | /** | ||
688 | * mark_nosave_pages - set bits corresponding to the page frames the | ||
689 | * contents of which should not be saved in a given bitmap. | ||
690 | */ | ||
691 | |||
692 | static void mark_nosave_pages(struct memory_bitmap *bm) | ||
693 | { | ||
694 | struct nosave_region *region; | ||
695 | |||
696 | if (list_empty(&nosave_regions)) | ||
697 | return; | ||
698 | |||
699 | list_for_each_entry(region, &nosave_regions, list) { | ||
700 | unsigned long pfn; | ||
701 | |||
702 | printk("swsusp: Marking nosave pages: %016lx - %016lx\n", | ||
703 | region->start_pfn << PAGE_SHIFT, | ||
704 | region->end_pfn << PAGE_SHIFT); | ||
705 | |||
706 | for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | ||
707 | memory_bm_set_bit(bm, pfn); | ||
708 | } | ||
709 | } | ||
710 | |||
711 | /** | ||
712 | * create_basic_memory_bitmaps - create bitmaps needed for marking page | ||
713 | * frames that should not be saved and free page frames. The pointers | ||
714 | * forbidden_pages_map and free_pages_map are only modified if everything | ||
715 | * goes well, because we don't want the bits to be used before both bitmaps | ||
716 | * are set up. | ||
717 | */ | ||
718 | |||
719 | int create_basic_memory_bitmaps(void) | ||
720 | { | ||
721 | struct memory_bitmap *bm1, *bm2; | ||
722 | int error = 0; | ||
723 | |||
724 | BUG_ON(forbidden_pages_map || free_pages_map); | ||
725 | |||
726 | bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); | ||
727 | if (!bm1) | ||
728 | return -ENOMEM; | ||
729 | |||
730 | error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); | ||
731 | if (error) | ||
732 | goto Free_first_object; | ||
733 | |||
734 | bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); | ||
735 | if (!bm2) | ||
736 | goto Free_first_bitmap; | ||
737 | |||
738 | error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); | ||
739 | if (error) | ||
740 | goto Free_second_object; | ||
741 | |||
742 | forbidden_pages_map = bm1; | ||
743 | free_pages_map = bm2; | ||
744 | mark_nosave_pages(forbidden_pages_map); | ||
745 | |||
746 | printk("swsusp: Basic memory bitmaps created\n"); | ||
747 | |||
748 | return 0; | ||
749 | |||
750 | Free_second_object: | ||
751 | kfree(bm2); | ||
752 | Free_first_bitmap: | ||
753 | memory_bm_free(bm1, PG_UNSAFE_CLEAR); | ||
754 | Free_first_object: | ||
755 | kfree(bm1); | ||
756 | return -ENOMEM; | ||
757 | } | ||
758 | |||
759 | /** | ||
760 | * free_basic_memory_bitmaps - free memory bitmaps allocated by | ||
761 | * create_basic_memory_bitmaps(). The auxiliary pointers are necessary | ||
762 | * so that the bitmaps themselves are not referred to while they are being | ||
763 | * freed. | ||
764 | */ | ||
765 | |||
766 | void free_basic_memory_bitmaps(void) | ||
767 | { | ||
768 | struct memory_bitmap *bm1, *bm2; | ||
769 | |||
770 | BUG_ON(!(forbidden_pages_map && free_pages_map)); | ||
771 | |||
772 | bm1 = forbidden_pages_map; | ||
773 | bm2 = free_pages_map; | ||
774 | forbidden_pages_map = NULL; | ||
775 | free_pages_map = NULL; | ||
776 | memory_bm_free(bm1, PG_UNSAFE_CLEAR); | ||
777 | kfree(bm1); | ||
778 | memory_bm_free(bm2, PG_UNSAFE_CLEAR); | ||
779 | kfree(bm2); | ||
780 | |||
781 | printk("swsusp: Basic memory bitmaps freed\n"); | ||
782 | } | ||
783 | |||
784 | /** | ||
567 | * snapshot_additional_pages - estimate the number of additional pages | 785 | * snapshot_additional_pages - estimate the number of additional pages |
568 | * be needed for setting up the suspend image data structures for given | 786 | * be needed for setting up the suspend image data structures for given |
569 | * zone (usually the returned value is greater than the exact number) | 787 | * zone (usually the returned value is greater than the exact number) |
@@ -615,7 +833,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) | |||
615 | 833 | ||
616 | BUG_ON(!PageHighMem(page)); | 834 | BUG_ON(!PageHighMem(page)); |
617 | 835 | ||
618 | if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) | 836 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || |
837 | PageReserved(page)) | ||
619 | return NULL; | 838 | return NULL; |
620 | 839 | ||
621 | return page; | 840 | return page; |
@@ -651,17 +870,6 @@ static inline unsigned int count_highmem_pages(void) { return 0; } | |||
651 | #endif /* CONFIG_HIGHMEM */ | 870 | #endif /* CONFIG_HIGHMEM */ |
652 | 871 | ||
653 | /** | 872 | /** |
654 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
655 | */ | ||
656 | |||
657 | static inline int pfn_is_nosave(unsigned long pfn) | ||
658 | { | ||
659 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | ||
660 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | ||
661 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
662 | } | ||
663 | |||
664 | /** | ||
665 | * saveable - Determine whether a non-highmem page should be included in | 873 | * saveable - Determine whether a non-highmem page should be included in |
666 | * the suspend image. | 874 | * the suspend image. |
667 | * | 875 | * |
@@ -681,7 +889,7 @@ static struct page *saveable_page(unsigned long pfn) | |||
681 | 889 | ||
682 | BUG_ON(PageHighMem(page)); | 890 | BUG_ON(PageHighMem(page)); |
683 | 891 | ||
684 | if (PageNosave(page) || PageNosaveFree(page)) | 892 | if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) |
685 | return NULL; | 893 | return NULL; |
686 | 894 | ||
687 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 895 | if (PageReserved(page) && pfn_is_nosave(pfn)) |
@@ -821,9 +1029,10 @@ void swsusp_free(void) | |||
821 | if (pfn_valid(pfn)) { | 1029 | if (pfn_valid(pfn)) { |
822 | struct page *page = pfn_to_page(pfn); | 1030 | struct page *page = pfn_to_page(pfn); |
823 | 1031 | ||
824 | if (PageNosave(page) && PageNosaveFree(page)) { | 1032 | if (swsusp_page_is_forbidden(page) && |
825 | ClearPageNosave(page); | 1033 | swsusp_page_is_free(page)) { |
826 | ClearPageNosaveFree(page); | 1034 | swsusp_unset_page_forbidden(page); |
1035 | swsusp_unset_page_free(page); | ||
827 | __free_page(page); | 1036 | __free_page(page); |
828 | } | 1037 | } |
829 | } | 1038 | } |
@@ -1146,7 +1355,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) | |||
1146 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1355 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
1147 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1356 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1148 | if (pfn_valid(pfn)) | 1357 | if (pfn_valid(pfn)) |
1149 | ClearPageNosaveFree(pfn_to_page(pfn)); | 1358 | swsusp_unset_page_free(pfn_to_page(pfn)); |
1150 | } | 1359 | } |
1151 | 1360 | ||
1152 | /* Mark pages that correspond to the "original" pfns as "unsafe" */ | 1361 | /* Mark pages that correspond to the "original" pfns as "unsafe" */ |
@@ -1155,7 +1364,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) | |||
1155 | pfn = memory_bm_next_pfn(bm); | 1364 | pfn = memory_bm_next_pfn(bm); |
1156 | if (likely(pfn != BM_END_OF_MAP)) { | 1365 | if (likely(pfn != BM_END_OF_MAP)) { |
1157 | if (likely(pfn_valid(pfn))) | 1366 | if (likely(pfn_valid(pfn))) |
1158 | SetPageNosaveFree(pfn_to_page(pfn)); | 1367 | swsusp_set_page_free(pfn_to_page(pfn)); |
1159 | else | 1368 | else |
1160 | return -EFAULT; | 1369 | return -EFAULT; |
1161 | } | 1370 | } |
@@ -1321,14 +1530,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) | |||
1321 | struct page *page; | 1530 | struct page *page; |
1322 | 1531 | ||
1323 | page = alloc_page(__GFP_HIGHMEM); | 1532 | page = alloc_page(__GFP_HIGHMEM); |
1324 | if (!PageNosaveFree(page)) { | 1533 | if (!swsusp_page_is_free(page)) { |
1325 | /* The page is "safe", set its bit the bitmap */ | 1534 | /* The page is "safe", set its bit the bitmap */ |
1326 | memory_bm_set_bit(bm, page_to_pfn(page)); | 1535 | memory_bm_set_bit(bm, page_to_pfn(page)); |
1327 | safe_highmem_pages++; | 1536 | safe_highmem_pages++; |
1328 | } | 1537 | } |
1329 | /* Mark the page as allocated */ | 1538 | /* Mark the page as allocated */ |
1330 | SetPageNosave(page); | 1539 | swsusp_set_page_forbidden(page); |
1331 | SetPageNosaveFree(page); | 1540 | swsusp_set_page_free(page); |
1332 | } | 1541 | } |
1333 | memory_bm_position_reset(bm); | 1542 | memory_bm_position_reset(bm); |
1334 | safe_highmem_bm = bm; | 1543 | safe_highmem_bm = bm; |
@@ -1360,7 +1569,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | |||
1360 | struct highmem_pbe *pbe; | 1569 | struct highmem_pbe *pbe; |
1361 | void *kaddr; | 1570 | void *kaddr; |
1362 | 1571 | ||
1363 | if (PageNosave(page) && PageNosaveFree(page)) { | 1572 | if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { |
1364 | /* We have allocated the "original" page frame and we can | 1573 | /* We have allocated the "original" page frame and we can |
1365 | * use it directly to store the loaded page. | 1574 | * use it directly to store the loaded page. |
1366 | */ | 1575 | */ |
@@ -1522,14 +1731,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1522 | error = -ENOMEM; | 1731 | error = -ENOMEM; |
1523 | goto Free; | 1732 | goto Free; |
1524 | } | 1733 | } |
1525 | if (!PageNosaveFree(virt_to_page(lp))) { | 1734 | if (!swsusp_page_is_free(virt_to_page(lp))) { |
1526 | /* The page is "safe", add it to the list */ | 1735 | /* The page is "safe", add it to the list */ |
1527 | lp->next = safe_pages_list; | 1736 | lp->next = safe_pages_list; |
1528 | safe_pages_list = lp; | 1737 | safe_pages_list = lp; |
1529 | } | 1738 | } |
1530 | /* Mark the page as allocated */ | 1739 | /* Mark the page as allocated */ |
1531 | SetPageNosave(virt_to_page(lp)); | 1740 | swsusp_set_page_forbidden(virt_to_page(lp)); |
1532 | SetPageNosaveFree(virt_to_page(lp)); | 1741 | swsusp_set_page_free(virt_to_page(lp)); |
1533 | nr_pages--; | 1742 | nr_pages--; |
1534 | } | 1743 | } |
1535 | /* Free the reserved safe pages so that chain_alloc() can use them */ | 1744 | /* Free the reserved safe pages so that chain_alloc() can use them */ |
@@ -1558,7 +1767,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
1558 | if (PageHighMem(page)) | 1767 | if (PageHighMem(page)) |
1559 | return get_highmem_page_buffer(page, ca); | 1768 | return get_highmem_page_buffer(page, ca); |
1560 | 1769 | ||
1561 | if (PageNosave(page) && PageNosaveFree(page)) | 1770 | if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) |
1562 | /* We have allocated the "original" page frame and we can | 1771 | /* We have allocated the "original" page frame and we can |
1563 | * use it directly to store the loaded page. | 1772 | * use it directly to store the loaded page. |
1564 | */ | 1773 | */ |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3581f8f86a..e83ed9945a 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -33,12 +33,14 @@ extern char resume_file[]; | |||
33 | 33 | ||
34 | #define SWSUSP_SIG "S1SUSPEND" | 34 | #define SWSUSP_SIG "S1SUSPEND" |
35 | 35 | ||
36 | static struct swsusp_header { | 36 | struct swsusp_header { |
37 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; | 37 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; |
38 | sector_t image; | 38 | sector_t image; |
39 | char orig_sig[10]; | 39 | char orig_sig[10]; |
40 | char sig[10]; | 40 | char sig[10]; |
41 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; | 41 | } __attribute__((packed)); |
42 | |||
43 | static struct swsusp_header *swsusp_header; | ||
42 | 44 | ||
43 | /* | 45 | /* |
44 | * General things | 46 | * General things |
@@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start) | |||
141 | { | 143 | { |
142 | int error; | 144 | int error; |
143 | 145 | ||
144 | bio_read_page(swsusp_resume_block, &swsusp_header, NULL); | 146 | bio_read_page(swsusp_resume_block, swsusp_header, NULL); |
145 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | 147 | if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || |
146 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | 148 | !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { |
147 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | 149 | memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); |
148 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | 150 | memcpy(swsusp_header->sig,SWSUSP_SIG, 10); |
149 | swsusp_header.image = start; | 151 | swsusp_header->image = start; |
150 | error = bio_write_page(swsusp_resume_block, | 152 | error = bio_write_page(swsusp_resume_block, |
151 | &swsusp_header, NULL); | 153 | swsusp_header, NULL); |
152 | } else { | 154 | } else { |
153 | printk(KERN_ERR "swsusp: Swap header not found!\n"); | 155 | printk(KERN_ERR "swsusp: Swap header not found!\n"); |
154 | error = -ENODEV; | 156 | error = -ENODEV; |
@@ -241,7 +243,6 @@ struct swap_map_page { | |||
241 | struct swap_map_handle { | 243 | struct swap_map_handle { |
242 | struct swap_map_page *cur; | 244 | struct swap_map_page *cur; |
243 | sector_t cur_swap; | 245 | sector_t cur_swap; |
244 | struct bitmap_page *bitmap; | ||
245 | unsigned int k; | 246 | unsigned int k; |
246 | }; | 247 | }; |
247 | 248 | ||
@@ -250,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle) | |||
250 | if (handle->cur) | 251 | if (handle->cur) |
251 | free_page((unsigned long)handle->cur); | 252 | free_page((unsigned long)handle->cur); |
252 | handle->cur = NULL; | 253 | handle->cur = NULL; |
253 | if (handle->bitmap) | ||
254 | free_bitmap(handle->bitmap); | ||
255 | handle->bitmap = NULL; | ||
256 | } | 254 | } |
257 | 255 | ||
258 | static int get_swap_writer(struct swap_map_handle *handle) | 256 | static int get_swap_writer(struct swap_map_handle *handle) |
@@ -260,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle) | |||
260 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); | 258 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); |
261 | if (!handle->cur) | 259 | if (!handle->cur) |
262 | return -ENOMEM; | 260 | return -ENOMEM; |
263 | handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0)); | 261 | handle->cur_swap = alloc_swapdev_block(root_swap); |
264 | if (!handle->bitmap) { | ||
265 | release_swap_writer(handle); | ||
266 | return -ENOMEM; | ||
267 | } | ||
268 | handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); | ||
269 | if (!handle->cur_swap) { | 262 | if (!handle->cur_swap) { |
270 | release_swap_writer(handle); | 263 | release_swap_writer(handle); |
271 | return -ENOSPC; | 264 | return -ENOSPC; |
@@ -282,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
282 | 275 | ||
283 | if (!handle->cur) | 276 | if (!handle->cur) |
284 | return -EINVAL; | 277 | return -EINVAL; |
285 | offset = alloc_swapdev_block(root_swap, handle->bitmap); | 278 | offset = alloc_swapdev_block(root_swap); |
286 | error = write_page(buf, offset, bio_chain); | 279 | error = write_page(buf, offset, bio_chain); |
287 | if (error) | 280 | if (error) |
288 | return error; | 281 | return error; |
@@ -291,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
291 | error = wait_on_bio_chain(bio_chain); | 284 | error = wait_on_bio_chain(bio_chain); |
292 | if (error) | 285 | if (error) |
293 | goto out; | 286 | goto out; |
294 | offset = alloc_swapdev_block(root_swap, handle->bitmap); | 287 | offset = alloc_swapdev_block(root_swap); |
295 | if (!offset) | 288 | if (!offset) |
296 | return -ENOSPC; | 289 | return -ENOSPC; |
297 | handle->cur->next_swap = offset; | 290 | handle->cur->next_swap = offset; |
@@ -428,7 +421,8 @@ int swsusp_write(void) | |||
428 | } | 421 | } |
429 | } | 422 | } |
430 | if (error) | 423 | if (error) |
431 | free_all_swap_pages(root_swap, handle.bitmap); | 424 | free_all_swap_pages(root_swap); |
425 | |||
432 | release_swap_writer(&handle); | 426 | release_swap_writer(&handle); |
433 | out: | 427 | out: |
434 | swsusp_close(); | 428 | swsusp_close(); |
@@ -564,7 +558,7 @@ int swsusp_read(void) | |||
564 | if (error < PAGE_SIZE) | 558 | if (error < PAGE_SIZE) |
565 | return error < 0 ? error : -EFAULT; | 559 | return error < 0 ? error : -EFAULT; |
566 | header = (struct swsusp_info *)data_of(snapshot); | 560 | header = (struct swsusp_info *)data_of(snapshot); |
567 | error = get_swap_reader(&handle, swsusp_header.image); | 561 | error = get_swap_reader(&handle, swsusp_header->image); |
568 | if (!error) | 562 | if (!error) |
569 | error = swap_read_page(&handle, header, NULL); | 563 | error = swap_read_page(&handle, header, NULL); |
570 | if (!error) | 564 | if (!error) |
@@ -591,17 +585,17 @@ int swsusp_check(void) | |||
591 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 585 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
592 | if (!IS_ERR(resume_bdev)) { | 586 | if (!IS_ERR(resume_bdev)) { |
593 | set_blocksize(resume_bdev, PAGE_SIZE); | 587 | set_blocksize(resume_bdev, PAGE_SIZE); |
594 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | 588 | memset(swsusp_header, 0, sizeof(PAGE_SIZE)); |
595 | error = bio_read_page(swsusp_resume_block, | 589 | error = bio_read_page(swsusp_resume_block, |
596 | &swsusp_header, NULL); | 590 | swsusp_header, NULL); |
597 | if (error) | 591 | if (error) |
598 | return error; | 592 | return error; |
599 | 593 | ||
600 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | 594 | if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { |
601 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | 595 | memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); |
602 | /* Reset swap signature now */ | 596 | /* Reset swap signature now */ |
603 | error = bio_write_page(swsusp_resume_block, | 597 | error = bio_write_page(swsusp_resume_block, |
604 | &swsusp_header, NULL); | 598 | swsusp_header, NULL); |
605 | } else { | 599 | } else { |
606 | return -EINVAL; | 600 | return -EINVAL; |
607 | } | 601 | } |
@@ -632,3 +626,13 @@ void swsusp_close(void) | |||
632 | 626 | ||
633 | blkdev_put(resume_bdev); | 627 | blkdev_put(resume_bdev); |
634 | } | 628 | } |
629 | |||
630 | static int swsusp_header_init(void) | ||
631 | { | ||
632 | swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); | ||
633 | if (!swsusp_header) | ||
634 | panic("Could not allocate memory for swsusp_header\n"); | ||
635 | return 0; | ||
636 | } | ||
637 | |||
638 | core_initcall(swsusp_header_init); | ||
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7fb834397a..5da304c8f1 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/syscalls.h> | 50 | #include <linux/syscalls.h> |
51 | #include <linux/highmem.h> | 51 | #include <linux/highmem.h> |
52 | #include <linux/time.h> | 52 | #include <linux/time.h> |
53 | #include <linux/rbtree.h> | ||
53 | 54 | ||
54 | #include "power.h" | 55 | #include "power.h" |
55 | 56 | ||
@@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; } | |||
74 | /** | 75 | /** |
75 | * The following functions are used for tracing the allocated | 76 | * The following functions are used for tracing the allocated |
76 | * swap pages, so that they can be freed in case of an error. | 77 | * swap pages, so that they can be freed in case of an error. |
77 | * | ||
78 | * The functions operate on a linked bitmap structure defined | ||
79 | * in power.h | ||
80 | */ | 78 | */ |
81 | 79 | ||
82 | void free_bitmap(struct bitmap_page *bitmap) | 80 | struct swsusp_extent { |
83 | { | 81 | struct rb_node node; |
84 | struct bitmap_page *bp; | 82 | unsigned long start; |
83 | unsigned long end; | ||
84 | }; | ||
85 | 85 | ||
86 | while (bitmap) { | 86 | static struct rb_root swsusp_extents = RB_ROOT; |
87 | bp = bitmap->next; | ||
88 | free_page((unsigned long)bitmap); | ||
89 | bitmap = bp; | ||
90 | } | ||
91 | } | ||
92 | 87 | ||
93 | struct bitmap_page *alloc_bitmap(unsigned int nr_bits) | 88 | static int swsusp_extents_insert(unsigned long swap_offset) |
94 | { | 89 | { |
95 | struct bitmap_page *bitmap, *bp; | 90 | struct rb_node **new = &(swsusp_extents.rb_node); |
96 | unsigned int n; | 91 | struct rb_node *parent = NULL; |
97 | 92 | struct swsusp_extent *ext; | |
98 | if (!nr_bits) | 93 | |
99 | return NULL; | 94 | /* Figure out where to put the new node */ |
100 | 95 | while (*new) { | |
101 | bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); | 96 | ext = container_of(*new, struct swsusp_extent, node); |
102 | bp = bitmap; | 97 | parent = *new; |
103 | for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { | 98 | if (swap_offset < ext->start) { |
104 | bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); | 99 | /* Try to merge */ |
105 | bp = bp->next; | 100 | if (swap_offset == ext->start - 1) { |
106 | if (!bp) { | 101 | ext->start--; |
107 | free_bitmap(bitmap); | 102 | return 0; |
108 | return NULL; | 103 | } |
104 | new = &((*new)->rb_left); | ||
105 | } else if (swap_offset > ext->end) { | ||
106 | /* Try to merge */ | ||
107 | if (swap_offset == ext->end + 1) { | ||
108 | ext->end++; | ||
109 | return 0; | ||
110 | } | ||
111 | new = &((*new)->rb_right); | ||
112 | } else { | ||
113 | /* It already is in the tree */ | ||
114 | return -EINVAL; | ||
109 | } | 115 | } |
110 | } | 116 | } |
111 | return bitmap; | 117 | /* Add the new node and rebalance the tree. */ |
112 | } | 118 | ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); |
113 | 119 | if (!ext) | |
114 | static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) | 120 | return -ENOMEM; |
115 | { | 121 | |
116 | unsigned int n; | 122 | ext->start = swap_offset; |
117 | 123 | ext->end = swap_offset; | |
118 | n = BITMAP_PAGE_BITS; | 124 | rb_link_node(&ext->node, parent, new); |
119 | while (bitmap && n <= bit) { | 125 | rb_insert_color(&ext->node, &swsusp_extents); |
120 | n += BITMAP_PAGE_BITS; | ||
121 | bitmap = bitmap->next; | ||
122 | } | ||
123 | if (!bitmap) | ||
124 | return -EINVAL; | ||
125 | n -= BITMAP_PAGE_BITS; | ||
126 | bit -= n; | ||
127 | n = 0; | ||
128 | while (bit >= BITS_PER_CHUNK) { | ||
129 | bit -= BITS_PER_CHUNK; | ||
130 | n++; | ||
131 | } | ||
132 | bitmap->chunks[n] |= (1UL << bit); | ||
133 | return 0; | 126 | return 0; |
134 | } | 127 | } |
135 | 128 | ||
136 | sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) | 129 | /** |
130 | * alloc_swapdev_block - allocate a swap page and register that it has | ||
131 | * been allocated, so that it can be freed in case of an error. | ||
132 | */ | ||
133 | |||
134 | sector_t alloc_swapdev_block(int swap) | ||
137 | { | 135 | { |
138 | unsigned long offset; | 136 | unsigned long offset; |
139 | 137 | ||
140 | offset = swp_offset(get_swap_page_of_type(swap)); | 138 | offset = swp_offset(get_swap_page_of_type(swap)); |
141 | if (offset) { | 139 | if (offset) { |
142 | if (bitmap_set(bitmap, offset)) | 140 | if (swsusp_extents_insert(offset)) |
143 | swap_free(swp_entry(swap, offset)); | 141 | swap_free(swp_entry(swap, offset)); |
144 | else | 142 | else |
145 | return swapdev_block(swap, offset); | 143 | return swapdev_block(swap, offset); |
@@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) | |||
147 | return 0; | 145 | return 0; |
148 | } | 146 | } |
149 | 147 | ||
150 | void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | 148 | /** |
149 | * free_all_swap_pages - free swap pages allocated for saving image data. | ||
150 | * It also frees the extents used to register which swap entres had been | ||
151 | * allocated. | ||
152 | */ | ||
153 | |||
154 | void free_all_swap_pages(int swap) | ||
151 | { | 155 | { |
152 | unsigned int bit, n; | 156 | struct rb_node *node; |
153 | unsigned long test; | 157 | |
154 | 158 | while ((node = swsusp_extents.rb_node)) { | |
155 | bit = 0; | 159 | struct swsusp_extent *ext; |
156 | while (bitmap) { | 160 | unsigned long offset; |
157 | for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) | 161 | |
158 | for (test = 1UL; test; test <<= 1) { | 162 | ext = container_of(node, struct swsusp_extent, node); |
159 | if (bitmap->chunks[n] & test) | 163 | rb_erase(node, &swsusp_extents); |
160 | swap_free(swp_entry(swap, bit)); | 164 | for (offset = ext->start; offset <= ext->end; offset++) |
161 | bit++; | 165 | swap_free(swp_entry(swap, offset)); |
162 | } | 166 | |
163 | bitmap = bitmap->next; | 167 | kfree(ext); |
164 | } | 168 | } |
165 | } | 169 | } |
166 | 170 | ||
171 | int swsusp_swap_in_use(void) | ||
172 | { | ||
173 | return (swsusp_extents.rb_node != NULL); | ||
174 | } | ||
175 | |||
167 | /** | 176 | /** |
168 | * swsusp_show_speed - print the time elapsed between two events represented by | 177 | * swsusp_show_speed - print the time elapsed between two events represented by |
169 | * @start and @stop | 178 | * @start and @stop |
@@ -224,18 +233,18 @@ int swsusp_shrink_memory(void) | |||
224 | long size, highmem_size; | 233 | long size, highmem_size; |
225 | 234 | ||
226 | highmem_size = count_highmem_pages(); | 235 | highmem_size = count_highmem_pages(); |
227 | size = count_data_pages() + PAGES_FOR_IO; | 236 | size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; |
228 | tmp = size; | 237 | tmp = size; |
229 | size += highmem_size; | 238 | size += highmem_size; |
230 | for_each_zone (zone) | 239 | for_each_zone (zone) |
231 | if (populated_zone(zone)) { | 240 | if (populated_zone(zone)) { |
241 | tmp += snapshot_additional_pages(zone); | ||
232 | if (is_highmem(zone)) { | 242 | if (is_highmem(zone)) { |
233 | highmem_size -= | 243 | highmem_size -= |
234 | zone_page_state(zone, NR_FREE_PAGES); | 244 | zone_page_state(zone, NR_FREE_PAGES); |
235 | } else { | 245 | } else { |
236 | tmp -= zone_page_state(zone, NR_FREE_PAGES); | 246 | tmp -= zone_page_state(zone, NR_FREE_PAGES); |
237 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | 247 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; |
238 | tmp += snapshot_additional_pages(zone); | ||
239 | } | 248 | } |
240 | } | 249 | } |
241 | 250 | ||
diff --git a/kernel/power/user.c b/kernel/power/user.c index dd09efe7df..040560d9c3 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -33,25 +33,29 @@ | |||
33 | static struct snapshot_data { | 33 | static struct snapshot_data { |
34 | struct snapshot_handle handle; | 34 | struct snapshot_handle handle; |
35 | int swap; | 35 | int swap; |
36 | struct bitmap_page *bitmap; | ||
37 | int mode; | 36 | int mode; |
38 | char frozen; | 37 | char frozen; |
39 | char ready; | 38 | char ready; |
40 | char platform_suspend; | 39 | char platform_suspend; |
41 | } snapshot_state; | 40 | } snapshot_state; |
42 | 41 | ||
43 | static atomic_t device_available = ATOMIC_INIT(1); | 42 | atomic_t snapshot_device_available = ATOMIC_INIT(1); |
44 | 43 | ||
45 | static int snapshot_open(struct inode *inode, struct file *filp) | 44 | static int snapshot_open(struct inode *inode, struct file *filp) |
46 | { | 45 | { |
47 | struct snapshot_data *data; | 46 | struct snapshot_data *data; |
48 | 47 | ||
49 | if (!atomic_add_unless(&device_available, -1, 0)) | 48 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) |
50 | return -EBUSY; | 49 | return -EBUSY; |
51 | 50 | ||
52 | if ((filp->f_flags & O_ACCMODE) == O_RDWR) | 51 | if ((filp->f_flags & O_ACCMODE) == O_RDWR) { |
52 | atomic_inc(&snapshot_device_available); | ||
53 | return -ENOSYS; | 53 | return -ENOSYS; |
54 | 54 | } | |
55 | if(create_basic_memory_bitmaps()) { | ||
56 | atomic_inc(&snapshot_device_available); | ||
57 | return -ENOMEM; | ||
58 | } | ||
55 | nonseekable_open(inode, filp); | 59 | nonseekable_open(inode, filp); |
56 | data = &snapshot_state; | 60 | data = &snapshot_state; |
57 | filp->private_data = data; | 61 | filp->private_data = data; |
@@ -64,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
64 | data->swap = -1; | 68 | data->swap = -1; |
65 | data->mode = O_WRONLY; | 69 | data->mode = O_WRONLY; |
66 | } | 70 | } |
67 | data->bitmap = NULL; | ||
68 | data->frozen = 0; | 71 | data->frozen = 0; |
69 | data->ready = 0; | 72 | data->ready = 0; |
70 | data->platform_suspend = 0; | 73 | data->platform_suspend = 0; |
@@ -77,16 +80,15 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
77 | struct snapshot_data *data; | 80 | struct snapshot_data *data; |
78 | 81 | ||
79 | swsusp_free(); | 82 | swsusp_free(); |
83 | free_basic_memory_bitmaps(); | ||
80 | data = filp->private_data; | 84 | data = filp->private_data; |
81 | free_all_swap_pages(data->swap, data->bitmap); | 85 | free_all_swap_pages(data->swap); |
82 | free_bitmap(data->bitmap); | ||
83 | if (data->frozen) { | 86 | if (data->frozen) { |
84 | mutex_lock(&pm_mutex); | 87 | mutex_lock(&pm_mutex); |
85 | thaw_processes(); | 88 | thaw_processes(); |
86 | enable_nonboot_cpus(); | ||
87 | mutex_unlock(&pm_mutex); | 89 | mutex_unlock(&pm_mutex); |
88 | } | 90 | } |
89 | atomic_inc(&device_available); | 91 | atomic_inc(&snapshot_device_available); |
90 | return 0; | 92 | return 0; |
91 | } | 93 | } |
92 | 94 | ||
@@ -294,14 +296,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
294 | error = -ENODEV; | 296 | error = -ENODEV; |
295 | break; | 297 | break; |
296 | } | 298 | } |
297 | if (!data->bitmap) { | 299 | offset = alloc_swapdev_block(data->swap); |
298 | data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0)); | ||
299 | if (!data->bitmap) { | ||
300 | error = -ENOMEM; | ||
301 | break; | ||
302 | } | ||
303 | } | ||
304 | offset = alloc_swapdev_block(data->swap, data->bitmap); | ||
305 | if (offset) { | 300 | if (offset) { |
306 | offset <<= PAGE_SHIFT; | 301 | offset <<= PAGE_SHIFT; |
307 | error = put_user(offset, (sector_t __user *)arg); | 302 | error = put_user(offset, (sector_t __user *)arg); |
@@ -315,13 +310,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
315 | error = -ENODEV; | 310 | error = -ENODEV; |
316 | break; | 311 | break; |
317 | } | 312 | } |
318 | free_all_swap_pages(data->swap, data->bitmap); | 313 | free_all_swap_pages(data->swap); |
319 | free_bitmap(data->bitmap); | ||
320 | data->bitmap = NULL; | ||
321 | break; | 314 | break; |
322 | 315 | ||
323 | case SNAPSHOT_SET_SWAP_FILE: | 316 | case SNAPSHOT_SET_SWAP_FILE: |
324 | if (!data->bitmap) { | 317 | if (!swsusp_swap_in_use()) { |
325 | /* | 318 | /* |
326 | * User space encodes device types as two-byte values, | 319 | * User space encodes device types as two-byte values, |
327 | * so we need to recode them | 320 | * so we need to recode them |
@@ -368,9 +361,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
368 | if (error) { | 361 | if (error) { |
369 | printk(KERN_ERR "Failed to suspend some devices.\n"); | 362 | printk(KERN_ERR "Failed to suspend some devices.\n"); |
370 | } else { | 363 | } else { |
371 | /* Enter S3, system is already frozen */ | 364 | error = disable_nonboot_cpus(); |
372 | suspend_enter(PM_SUSPEND_MEM); | 365 | if (!error) { |
373 | 366 | /* Enter S3, system is already frozen */ | |
367 | suspend_enter(PM_SUSPEND_MEM); | ||
368 | enable_nonboot_cpus(); | ||
369 | } | ||
374 | /* Wake up devices */ | 370 | /* Wake up devices */ |
375 | device_resume(); | 371 | device_resume(); |
376 | } | 372 | } |
@@ -417,7 +413,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
417 | break; | 413 | break; |
418 | 414 | ||
419 | case SNAPSHOT_SET_SWAP_AREA: | 415 | case SNAPSHOT_SET_SWAP_AREA: |
420 | if (data->bitmap) { | 416 | if (swsusp_swap_in_use()) { |
421 | error = -EPERM; | 417 | error = -EPERM; |
422 | } else { | 418 | } else { |
423 | struct resume_swap_area swap_area; | 419 | struct resume_swap_area swap_area; |
diff --git a/kernel/resource.c b/kernel/resource.c index bdb55a33f9..9bd14fd3e6 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -213,27 +213,6 @@ int request_resource(struct resource *root, struct resource *new) | |||
213 | EXPORT_SYMBOL(request_resource); | 213 | EXPORT_SYMBOL(request_resource); |
214 | 214 | ||
215 | /** | 215 | /** |
216 | * ____request_resource - reserve a resource, with resource conflict returned | ||
217 | * @root: root resource descriptor | ||
218 | * @new: resource descriptor desired by caller | ||
219 | * | ||
220 | * Returns: | ||
221 | * On success, NULL is returned. | ||
222 | * On error, a pointer to the conflicting resource is returned. | ||
223 | */ | ||
224 | struct resource *____request_resource(struct resource *root, struct resource *new) | ||
225 | { | ||
226 | struct resource *conflict; | ||
227 | |||
228 | write_lock(&resource_lock); | ||
229 | conflict = __request_resource(root, new); | ||
230 | write_unlock(&resource_lock); | ||
231 | return conflict; | ||
232 | } | ||
233 | |||
234 | EXPORT_SYMBOL(____request_resource); | ||
235 | |||
236 | /** | ||
237 | * release_resource - release a previously reserved resource | 216 | * release_resource - release a previously reserved resource |
238 | * @old: resource pointer | 217 | * @old: resource pointer |
239 | */ | 218 | */ |
diff --git a/kernel/sched.c b/kernel/sched.c index a4ca632c47..0227f1625a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4687,32 +4687,10 @@ out_unlock: | |||
4687 | return retval; | 4687 | return retval; |
4688 | } | 4688 | } |
4689 | 4689 | ||
4690 | static inline struct task_struct *eldest_child(struct task_struct *p) | ||
4691 | { | ||
4692 | if (list_empty(&p->children)) | ||
4693 | return NULL; | ||
4694 | return list_entry(p->children.next,struct task_struct,sibling); | ||
4695 | } | ||
4696 | |||
4697 | static inline struct task_struct *older_sibling(struct task_struct *p) | ||
4698 | { | ||
4699 | if (p->sibling.prev==&p->parent->children) | ||
4700 | return NULL; | ||
4701 | return list_entry(p->sibling.prev,struct task_struct,sibling); | ||
4702 | } | ||
4703 | |||
4704 | static inline struct task_struct *younger_sibling(struct task_struct *p) | ||
4705 | { | ||
4706 | if (p->sibling.next==&p->parent->children) | ||
4707 | return NULL; | ||
4708 | return list_entry(p->sibling.next,struct task_struct,sibling); | ||
4709 | } | ||
4710 | |||
4711 | static const char stat_nam[] = "RSDTtZX"; | 4690 | static const char stat_nam[] = "RSDTtZX"; |
4712 | 4691 | ||
4713 | static void show_task(struct task_struct *p) | 4692 | static void show_task(struct task_struct *p) |
4714 | { | 4693 | { |
4715 | struct task_struct *relative; | ||
4716 | unsigned long free = 0; | 4694 | unsigned long free = 0; |
4717 | unsigned state; | 4695 | unsigned state; |
4718 | 4696 | ||
@@ -4738,19 +4716,7 @@ static void show_task(struct task_struct *p) | |||
4738 | free = (unsigned long)n - (unsigned long)end_of_stack(p); | 4716 | free = (unsigned long)n - (unsigned long)end_of_stack(p); |
4739 | } | 4717 | } |
4740 | #endif | 4718 | #endif |
4741 | printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); | 4719 | printk("%5lu %5d %6d", free, p->pid, p->parent->pid); |
4742 | if ((relative = eldest_child(p))) | ||
4743 | printk("%5d ", relative->pid); | ||
4744 | else | ||
4745 | printk(" "); | ||
4746 | if ((relative = younger_sibling(p))) | ||
4747 | printk("%7d", relative->pid); | ||
4748 | else | ||
4749 | printk(" "); | ||
4750 | if ((relative = older_sibling(p))) | ||
4751 | printk(" %5d", relative->pid); | ||
4752 | else | ||
4753 | printk(" "); | ||
4754 | if (!p->mm) | 4720 | if (!p->mm) |
4755 | printk(" (L-TLB)\n"); | 4721 | printk(" (L-TLB)\n"); |
4756 | else | 4722 | else |
@@ -4780,7 +4746,7 @@ void show_state_filter(unsigned long state_filter) | |||
4780 | * console might take alot of time: | 4746 | * console might take alot of time: |
4781 | */ | 4747 | */ |
4782 | touch_nmi_watchdog(); | 4748 | touch_nmi_watchdog(); |
4783 | if (p->state & state_filter) | 4749 | if (!state_filter || (p->state & state_filter)) |
4784 | show_task(p); | 4750 | show_task(p); |
4785 | } while_each_thread(g, p); | 4751 | } while_each_thread(g, p); |
4786 | 4752 | ||
@@ -5278,6 +5244,11 @@ int __init migration_init(void) | |||
5278 | #endif | 5244 | #endif |
5279 | 5245 | ||
5280 | #ifdef CONFIG_SMP | 5246 | #ifdef CONFIG_SMP |
5247 | |||
5248 | /* Number of possible processor ids */ | ||
5249 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
5250 | EXPORT_SYMBOL(nr_cpu_ids); | ||
5251 | |||
5281 | #undef SCHED_DOMAIN_DEBUG | 5252 | #undef SCHED_DOMAIN_DEBUG |
5282 | #ifdef SCHED_DOMAIN_DEBUG | 5253 | #ifdef SCHED_DOMAIN_DEBUG |
5283 | static void sched_domain_debug(struct sched_domain *sd, int cpu) | 5254 | static void sched_domain_debug(struct sched_domain *sd, int cpu) |
@@ -6760,6 +6731,7 @@ int in_sched_functions(unsigned long addr) | |||
6760 | void __init sched_init(void) | 6731 | void __init sched_init(void) |
6761 | { | 6732 | { |
6762 | int i, j, k; | 6733 | int i, j, k; |
6734 | int highest_cpu = 0; | ||
6763 | 6735 | ||
6764 | for_each_possible_cpu(i) { | 6736 | for_each_possible_cpu(i) { |
6765 | struct prio_array *array; | 6737 | struct prio_array *array; |
@@ -6794,11 +6766,13 @@ void __init sched_init(void) | |||
6794 | // delimiter for bitsearch | 6766 | // delimiter for bitsearch |
6795 | __set_bit(MAX_PRIO, array->bitmap); | 6767 | __set_bit(MAX_PRIO, array->bitmap); |
6796 | } | 6768 | } |
6769 | highest_cpu = i; | ||
6797 | } | 6770 | } |
6798 | 6771 | ||
6799 | set_load_weight(&init_task); | 6772 | set_load_weight(&init_task); |
6800 | 6773 | ||
6801 | #ifdef CONFIG_SMP | 6774 | #ifdef CONFIG_SMP |
6775 | nr_cpu_ids = highest_cpu + 1; | ||
6802 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); | 6776 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); |
6803 | #endif | 6777 | #endif |
6804 | 6778 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 3670225ecb..2b4087d545 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2636,9 +2636,5 @@ __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) | |||
2636 | 2636 | ||
2637 | void __init signals_init(void) | 2637 | void __init signals_init(void) |
2638 | { | 2638 | { |
2639 | sigqueue_cachep = | 2639 | sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); |
2640 | kmem_cache_create("sigqueue", | ||
2641 | sizeof(struct sigqueue), | ||
2642 | __alignof__(struct sigqueue), | ||
2643 | SLAB_PANIC, NULL, NULL); | ||
2644 | } | 2640 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index 123b165080..fe1f3ab204 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -881,7 +881,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user | |||
881 | #ifdef CONFIG_SOFTWARE_SUSPEND | 881 | #ifdef CONFIG_SOFTWARE_SUSPEND |
882 | case LINUX_REBOOT_CMD_SW_SUSPEND: | 882 | case LINUX_REBOOT_CMD_SW_SUSPEND: |
883 | { | 883 | { |
884 | int ret = software_suspend(); | 884 | int ret = pm_suspend(PM_SUSPEND_DISK); |
885 | unlock_kernel(); | 885 | unlock_kernel(); |
886 | return ret; | 886 | return ret; |
887 | } | 887 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1b255df4fc..c904748f22 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1676,7 +1676,7 @@ static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, | |||
1676 | { | 1676 | { |
1677 | int op; | 1677 | int op; |
1678 | 1678 | ||
1679 | if (!capable(CAP_SYS_ADMIN)) | 1679 | if (write && !capable(CAP_SYS_ADMIN)) |
1680 | return -EPERM; | 1680 | return -EPERM; |
1681 | 1681 | ||
1682 | op = OP_OR; | 1682 | op = OP_OR; |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4c3476fa05..906cae7715 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -102,7 +102,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
102 | */ | 102 | */ |
103 | static int send_reply(struct sk_buff *skb, pid_t pid) | 103 | static int send_reply(struct sk_buff *skb, pid_t pid) |
104 | { | 104 | { |
105 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 105 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
106 | void *reply = genlmsg_data(genlhdr); | 106 | void *reply = genlmsg_data(genlhdr); |
107 | int rc; | 107 | int rc; |
108 | 108 | ||
@@ -121,7 +121,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid) | |||
121 | static void send_cpu_listeners(struct sk_buff *skb, | 121 | static void send_cpu_listeners(struct sk_buff *skb, |
122 | struct listener_list *listeners) | 122 | struct listener_list *listeners) |
123 | { | 123 | { |
124 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 124 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
125 | struct listener *s, *tmp; | 125 | struct listener *s, *tmp; |
126 | struct sk_buff *skb_next, *skb_cur = skb; | 126 | struct sk_buff *skb_next, *skb_cur = skb; |
127 | void *reply = genlmsg_data(genlhdr); | 127 | void *reply = genlmsg_data(genlhdr); |
@@ -524,9 +524,7 @@ void __init taskstats_init_early(void) | |||
524 | { | 524 | { |
525 | unsigned int i; | 525 | unsigned int i; |
526 | 526 | ||
527 | taskstats_cache = kmem_cache_create("taskstats_cache", | 527 | taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); |
528 | sizeof(struct taskstats), | ||
529 | 0, SLAB_PANIC, NULL, NULL); | ||
530 | for_each_possible_cpu(i) { | 528 | for_each_possible_cpu(i) { |
531 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | 529 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); |
532 | init_rwsem(&(per_cpu(listener_array, i).sem)); | 530 | init_rwsem(&(per_cpu(listener_array, i).sem)); |
diff --git a/kernel/time.c b/kernel/time.c index c6c80ea5d0..ba18ec4899 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -452,6 +452,7 @@ struct timespec ns_to_timespec(const s64 nsec) | |||
452 | 452 | ||
453 | return ts; | 453 | return ts; |
454 | } | 454 | } |
455 | EXPORT_SYMBOL(ns_to_timespec); | ||
455 | 456 | ||
456 | /** | 457 | /** |
457 | * ns_to_timeval - Convert nanoseconds to timeval | 458 | * ns_to_timeval - Convert nanoseconds to timeval |
@@ -469,6 +470,7 @@ struct timeval ns_to_timeval(const s64 nsec) | |||
469 | 470 | ||
470 | return tv; | 471 | return tv; |
471 | } | 472 | } |
473 | EXPORT_SYMBOL(ns_to_timeval); | ||
472 | 474 | ||
473 | /* | 475 | /* |
474 | * Convert jiffies to milliseconds and back. | 476 | * Convert jiffies to milliseconds and back. |
@@ -635,6 +637,7 @@ timeval_to_jiffies(const struct timeval *value) | |||
635 | (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> | 637 | (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> |
636 | (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | 638 | (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; |
637 | } | 639 | } |
640 | EXPORT_SYMBOL(timeval_to_jiffies); | ||
638 | 641 | ||
639 | void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | 642 | void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) |
640 | { | 643 | { |
@@ -649,6 +652,7 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | |||
649 | tv_usec /= NSEC_PER_USEC; | 652 | tv_usec /= NSEC_PER_USEC; |
650 | value->tv_usec = tv_usec; | 653 | value->tv_usec = tv_usec; |
651 | } | 654 | } |
655 | EXPORT_SYMBOL(jiffies_to_timeval); | ||
652 | 656 | ||
653 | /* | 657 | /* |
654 | * Convert jiffies/jiffies_64 to clock_t and back. | 658 | * Convert jiffies/jiffies_64 to clock_t and back. |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 67932ea78c..76212b2a99 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -274,72 +274,3 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
274 | } | 274 | } |
275 | EXPORT_SYMBOL_GPL(clockevents_notify); | 275 | EXPORT_SYMBOL_GPL(clockevents_notify); |
276 | 276 | ||
277 | #ifdef CONFIG_SYSFS | ||
278 | |||
279 | /** | ||
280 | * clockevents_show_registered - sysfs interface for listing clockevents | ||
281 | * @dev: unused | ||
282 | * @buf: char buffer to be filled with clock events list | ||
283 | * | ||
284 | * Provides sysfs interface for listing registered clock event devices | ||
285 | */ | ||
286 | static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) | ||
287 | { | ||
288 | struct list_head *tmp; | ||
289 | char *p = buf; | ||
290 | int cpu; | ||
291 | |||
292 | spin_lock(&clockevents_lock); | ||
293 | |||
294 | list_for_each(tmp, &clockevent_devices) { | ||
295 | struct clock_event_device *ce; | ||
296 | |||
297 | ce = list_entry(tmp, struct clock_event_device, list); | ||
298 | p += sprintf(p, "%-20s F:%04x M:%d", ce->name, | ||
299 | ce->features, ce->mode); | ||
300 | p += sprintf(p, " C:"); | ||
301 | if (!cpus_equal(ce->cpumask, cpu_possible_map)) { | ||
302 | for_each_cpu_mask(cpu, ce->cpumask) | ||
303 | p += sprintf(p, " %d", cpu); | ||
304 | } else { | ||
305 | /* | ||
306 | * FIXME: Add the cpu which is handling this sucker | ||
307 | */ | ||
308 | } | ||
309 | p += sprintf(p, "\n"); | ||
310 | } | ||
311 | |||
312 | spin_unlock(&clockevents_lock); | ||
313 | |||
314 | return p - buf; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Sysfs setup bits: | ||
319 | */ | ||
320 | static SYSDEV_ATTR(registered, 0600, | ||
321 | clockevents_show_registered, NULL); | ||
322 | |||
323 | static struct sysdev_class clockevents_sysclass = { | ||
324 | set_kset_name("clockevents"), | ||
325 | }; | ||
326 | |||
327 | static struct sys_device clockevents_sys_device = { | ||
328 | .id = 0, | ||
329 | .cls = &clockevents_sysclass, | ||
330 | }; | ||
331 | |||
332 | static int __init clockevents_sysfs_init(void) | ||
333 | { | ||
334 | int error = sysdev_class_register(&clockevents_sysclass); | ||
335 | |||
336 | if (!error) | ||
337 | error = sysdev_register(&clockevents_sys_device); | ||
338 | if (!error) | ||
339 | error = sysdev_create_file( | ||
340 | &clockevents_sys_device, | ||
341 | &attr_registered); | ||
342 | return error; | ||
343 | } | ||
344 | device_initcall(clockevents_sysfs_init); | ||
345 | #endif | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5b0e46b56f..fe5c7db242 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -151,7 +151,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
151 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | 151 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; |
152 | add_timer(&watchdog_timer); | 152 | add_timer(&watchdog_timer); |
153 | } | 153 | } |
154 | } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { | 154 | } else { |
155 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | ||
155 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 156 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
156 | 157 | ||
157 | if (!watchdog || cs->rating > watchdog->rating) { | 158 | if (!watchdog || cs->rating > watchdog->rating) { |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 3be8da8fed..4c256fdb88 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -69,4 +69,4 @@ static int __init init_jiffies_clocksource(void) | |||
69 | return clocksource_register(&clocksource_jiffies); | 69 | return clocksource_register(&clocksource_jiffies); |
70 | } | 70 | } |
71 | 71 | ||
72 | module_init(init_jiffies_clocksource); | 72 | core_initcall(init_jiffies_clocksource); |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index eb12509e00..cb25649c6f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -32,7 +32,7 @@ static u64 tick_length, tick_length_base; | |||
32 | /* TIME_ERROR prevents overwriting the CMOS clock */ | 32 | /* TIME_ERROR prevents overwriting the CMOS clock */ |
33 | static int time_state = TIME_OK; /* clock synchronization status */ | 33 | static int time_state = TIME_OK; /* clock synchronization status */ |
34 | int time_status = STA_UNSYNC; /* clock status bits */ | 34 | int time_status = STA_UNSYNC; /* clock status bits */ |
35 | static long time_offset; /* time adjustment (ns) */ | 35 | static s64 time_offset; /* time adjustment (ns) */ |
36 | static long time_constant = 2; /* pll time constant */ | 36 | static long time_constant = 2; /* pll time constant */ |
37 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 37 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
38 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 38 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
@@ -196,7 +196,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) | |||
196 | */ | 196 | */ |
197 | int do_adjtimex(struct timex *txc) | 197 | int do_adjtimex(struct timex *txc) |
198 | { | 198 | { |
199 | long ltemp, mtemp, save_adjust; | 199 | long mtemp, save_adjust, rem; |
200 | s64 freq_adj, temp64; | 200 | s64 freq_adj, temp64; |
201 | int result; | 201 | int result; |
202 | 202 | ||
@@ -277,14 +277,14 @@ int do_adjtimex(struct timex *txc) | |||
277 | time_adjust = txc->offset; | 277 | time_adjust = txc->offset; |
278 | } | 278 | } |
279 | else if (time_status & STA_PLL) { | 279 | else if (time_status & STA_PLL) { |
280 | ltemp = txc->offset * NSEC_PER_USEC; | 280 | time_offset = txc->offset * NSEC_PER_USEC; |
281 | 281 | ||
282 | /* | 282 | /* |
283 | * Scale the phase adjustment and | 283 | * Scale the phase adjustment and |
284 | * clamp to the operating range. | 284 | * clamp to the operating range. |
285 | */ | 285 | */ |
286 | time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); | 286 | time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); |
287 | time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); | 287 | time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); |
288 | 288 | ||
289 | /* | 289 | /* |
290 | * Select whether the frequency is to be controlled | 290 | * Select whether the frequency is to be controlled |
@@ -297,11 +297,11 @@ int do_adjtimex(struct timex *txc) | |||
297 | mtemp = xtime.tv_sec - time_reftime; | 297 | mtemp = xtime.tv_sec - time_reftime; |
298 | time_reftime = xtime.tv_sec; | 298 | time_reftime = xtime.tv_sec; |
299 | 299 | ||
300 | freq_adj = (s64)time_offset * mtemp; | 300 | freq_adj = time_offset * mtemp; |
301 | freq_adj = shift_right(freq_adj, time_constant * 2 + | 301 | freq_adj = shift_right(freq_adj, time_constant * 2 + |
302 | (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); | 302 | (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); |
303 | if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { | 303 | if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { |
304 | temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); | 304 | temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); |
305 | if (time_offset < 0) { | 305 | if (time_offset < 0) { |
306 | temp64 = -temp64; | 306 | temp64 = -temp64; |
307 | do_div(temp64, mtemp); | 307 | do_div(temp64, mtemp); |
@@ -314,8 +314,10 @@ int do_adjtimex(struct timex *txc) | |||
314 | freq_adj += time_freq; | 314 | freq_adj += time_freq; |
315 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | 315 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); |
316 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | 316 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); |
317 | time_offset = (time_offset / NTP_INTERVAL_FREQ) | 317 | time_offset = div_long_long_rem_signed(time_offset, |
318 | << SHIFT_UPDATE; | 318 | NTP_INTERVAL_FREQ, |
319 | &rem); | ||
320 | time_offset <<= SHIFT_UPDATE; | ||
319 | } /* STA_PLL */ | 321 | } /* STA_PLL */ |
320 | } /* txc->modes & ADJ_OFFSET */ | 322 | } /* txc->modes & ADJ_OFFSET */ |
321 | if (txc->modes & ADJ_TICK) | 323 | if (txc->modes & ADJ_TICK) |
@@ -328,12 +330,12 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
328 | result = TIME_ERROR; | 330 | result = TIME_ERROR; |
329 | 331 | ||
330 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | 332 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
331 | txc->offset = save_adjust; | 333 | txc->offset = save_adjust; |
332 | else | 334 | else |
333 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) | 335 | txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * |
334 | * NTP_INTERVAL_FREQ / 1000; | 336 | NTP_INTERVAL_FREQ / 1000; |
335 | txc->freq = (time_freq / NSEC_PER_USEC) | 337 | txc->freq = (time_freq / NSEC_PER_USEC) << |
336 | << (SHIFT_USEC - SHIFT_NSEC); | 338 | (SHIFT_USEC - SHIFT_NSEC); |
337 | txc->maxerror = time_maxerror; | 339 | txc->maxerror = time_maxerror; |
338 | txc->esterror = time_esterror; | 340 | txc->esterror = time_esterror; |
339 | txc->status = time_status; | 341 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 5567745470..eadfce2fff 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -307,12 +307,19 @@ int tick_resume_broadcast(void) | |||
307 | spin_lock_irqsave(&tick_broadcast_lock, flags); | 307 | spin_lock_irqsave(&tick_broadcast_lock, flags); |
308 | 308 | ||
309 | bc = tick_broadcast_device.evtdev; | 309 | bc = tick_broadcast_device.evtdev; |
310 | if (bc) { | ||
311 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC && | ||
312 | !cpus_empty(tick_broadcast_mask)) | ||
313 | tick_broadcast_start_periodic(bc); | ||
314 | 310 | ||
315 | broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask); | 311 | if (bc) { |
312 | switch (tick_broadcast_device.mode) { | ||
313 | case TICKDEV_MODE_PERIODIC: | ||
314 | if(!cpus_empty(tick_broadcast_mask)) | ||
315 | tick_broadcast_start_periodic(bc); | ||
316 | broadcast = cpu_isset(smp_processor_id(), | ||
317 | tick_broadcast_mask); | ||
318 | break; | ||
319 | case TICKDEV_MODE_ONESHOT: | ||
320 | broadcast = tick_resume_broadcast_oneshot(bc); | ||
321 | break; | ||
322 | } | ||
316 | } | 323 | } |
317 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 324 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
318 | 325 | ||
@@ -347,6 +354,16 @@ static int tick_broadcast_set_event(ktime_t expires, int force) | |||
347 | } | 354 | } |
348 | } | 355 | } |
349 | 356 | ||
357 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | ||
358 | { | ||
359 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
360 | |||
361 | if(!cpus_empty(tick_broadcast_oneshot_mask)) | ||
362 | tick_broadcast_set_event(ktime_get(), 1); | ||
363 | |||
364 | return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); | ||
365 | } | ||
366 | |||
350 | /* | 367 | /* |
351 | * Reprogram the broadcast device: | 368 | * Reprogram the broadcast device: |
352 | * | 369 | * |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 43ba1bdec1..bfda3f7f07 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -298,18 +298,17 @@ static void tick_shutdown(unsigned int *cpup) | |||
298 | spin_unlock_irqrestore(&tick_device_lock, flags); | 298 | spin_unlock_irqrestore(&tick_device_lock, flags); |
299 | } | 299 | } |
300 | 300 | ||
301 | static void tick_suspend_periodic(void) | 301 | static void tick_suspend(void) |
302 | { | 302 | { |
303 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 303 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
304 | unsigned long flags; | 304 | unsigned long flags; |
305 | 305 | ||
306 | spin_lock_irqsave(&tick_device_lock, flags); | 306 | spin_lock_irqsave(&tick_device_lock, flags); |
307 | if (td->mode == TICKDEV_MODE_PERIODIC) | 307 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); |
308 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); | ||
309 | spin_unlock_irqrestore(&tick_device_lock, flags); | 308 | spin_unlock_irqrestore(&tick_device_lock, flags); |
310 | } | 309 | } |
311 | 310 | ||
312 | static void tick_resume_periodic(void) | 311 | static void tick_resume(void) |
313 | { | 312 | { |
314 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 313 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
315 | unsigned long flags; | 314 | unsigned long flags; |
@@ -317,6 +316,8 @@ static void tick_resume_periodic(void) | |||
317 | spin_lock_irqsave(&tick_device_lock, flags); | 316 | spin_lock_irqsave(&tick_device_lock, flags); |
318 | if (td->mode == TICKDEV_MODE_PERIODIC) | 317 | if (td->mode == TICKDEV_MODE_PERIODIC) |
319 | tick_setup_periodic(td->evtdev, 0); | 318 | tick_setup_periodic(td->evtdev, 0); |
319 | else | ||
320 | tick_resume_oneshot(); | ||
320 | spin_unlock_irqrestore(&tick_device_lock, flags); | 321 | spin_unlock_irqrestore(&tick_device_lock, flags); |
321 | } | 322 | } |
322 | 323 | ||
@@ -348,13 +349,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, | |||
348 | break; | 349 | break; |
349 | 350 | ||
350 | case CLOCK_EVT_NOTIFY_SUSPEND: | 351 | case CLOCK_EVT_NOTIFY_SUSPEND: |
351 | tick_suspend_periodic(); | 352 | tick_suspend(); |
352 | tick_suspend_broadcast(); | 353 | tick_suspend_broadcast(); |
353 | break; | 354 | break; |
354 | 355 | ||
355 | case CLOCK_EVT_NOTIFY_RESUME: | 356 | case CLOCK_EVT_NOTIFY_RESUME: |
356 | if (!tick_resume_broadcast()) | 357 | if (!tick_resume_broadcast()) |
357 | tick_resume_periodic(); | 358 | tick_resume(); |
358 | break; | 359 | break; |
359 | 360 | ||
360 | default: | 361 | default: |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 75890efd24..c9d203bde5 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -19,12 +19,13 @@ extern void tick_setup_oneshot(struct clock_event_device *newdev, | |||
19 | extern int tick_program_event(ktime_t expires, int force); | 19 | extern int tick_program_event(ktime_t expires, int force); |
20 | extern void tick_oneshot_notify(void); | 20 | extern void tick_oneshot_notify(void); |
21 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); | 21 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); |
22 | 22 | extern void tick_resume_oneshot(void); | |
23 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 23 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
24 | extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); | 24 | extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); |
25 | extern void tick_broadcast_oneshot_control(unsigned long reason); | 25 | extern void tick_broadcast_oneshot_control(unsigned long reason); |
26 | extern void tick_broadcast_switch_to_oneshot(void); | 26 | extern void tick_broadcast_switch_to_oneshot(void); |
27 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | 27 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); |
28 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); | ||
28 | # else /* BROADCAST */ | 29 | # else /* BROADCAST */ |
29 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 30 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
30 | { | 31 | { |
@@ -43,6 +44,10 @@ void tick_setup_oneshot(struct clock_event_device *newdev, | |||
43 | { | 44 | { |
44 | BUG(); | 45 | BUG(); |
45 | } | 46 | } |
47 | static inline void tick_resume_oneshot(void) | ||
48 | { | ||
49 | BUG(); | ||
50 | } | ||
46 | static inline int tick_program_event(ktime_t expires, int force) | 51 | static inline int tick_program_event(ktime_t expires, int force) |
47 | { | 52 | { |
48 | return 0; | 53 | return 0; |
@@ -54,6 +59,10 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
54 | } | 59 | } |
55 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | 60 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } |
56 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | 61 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } |
62 | static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
57 | #endif /* !TICK_ONESHOT */ | 66 | #endif /* !TICK_ONESHOT */ |
58 | 67 | ||
59 | /* | 68 | /* |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2e8b7ff863..f6997ab0c3 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
@@ -41,6 +41,18 @@ int tick_program_event(ktime_t expires, int force) | |||
41 | } | 41 | } |
42 | 42 | ||
43 | /** | 43 | /** |
44 | * tick_resume_onshot - resume oneshot mode | ||
45 | */ | ||
46 | void tick_resume_oneshot(void) | ||
47 | { | ||
48 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
49 | struct clock_event_device *dev = td->evtdev; | ||
50 | |||
51 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
52 | tick_program_event(ktime_get(), 1); | ||
53 | } | ||
54 | |||
55 | /** | ||
44 | * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) | 56 | * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) |
45 | */ | 57 | */ |
46 | void tick_setup_oneshot(struct clock_event_device *newdev, | 58 | void tick_setup_oneshot(struct clock_event_device *newdev, |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f82c635c3d..59df5e8555 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -194,9 +194,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td) | |||
194 | return; | 194 | return; |
195 | } | 195 | } |
196 | SEQ_printf(m, "%s\n", dev->name); | 196 | SEQ_printf(m, "%s\n", dev->name); |
197 | SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); | 197 | SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); |
198 | SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); | 198 | SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); |
199 | SEQ_printf(m, " mult: %ld\n", dev->mult); | 199 | SEQ_printf(m, " mult: %lu\n", dev->mult); |
200 | SEQ_printf(m, " shift: %d\n", dev->shift); | 200 | SEQ_printf(m, " shift: %d\n", dev->shift); |
201 | SEQ_printf(m, " mode: %d\n", dev->mode); | 201 | SEQ_printf(m, " mode: %d\n", dev->mode); |
202 | SEQ_printf(m, " next_event: %Ld nsecs\n", | 202 | SEQ_printf(m, " next_event: %Ld nsecs\n", |
diff --git a/kernel/timer.c b/kernel/timer.c index 797cccb864..b22bd39740 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -505,6 +505,8 @@ out: | |||
505 | return ret; | 505 | return ret; |
506 | } | 506 | } |
507 | 507 | ||
508 | EXPORT_SYMBOL(try_to_del_timer_sync); | ||
509 | |||
508 | /** | 510 | /** |
509 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 511 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
510 | * @timer: the timer to be deactivated | 512 | * @timer: the timer to be deactivated |
@@ -695,15 +697,28 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, | |||
695 | { | 697 | { |
696 | ktime_t hr_delta = hrtimer_get_next_event(); | 698 | ktime_t hr_delta = hrtimer_get_next_event(); |
697 | struct timespec tsdelta; | 699 | struct timespec tsdelta; |
700 | unsigned long delta; | ||
698 | 701 | ||
699 | if (hr_delta.tv64 == KTIME_MAX) | 702 | if (hr_delta.tv64 == KTIME_MAX) |
700 | return expires; | 703 | return expires; |
701 | 704 | ||
702 | if (hr_delta.tv64 <= TICK_NSEC) | 705 | /* |
703 | return now; | 706 | * Expired timer available, let it expire in the next tick |
707 | */ | ||
708 | if (hr_delta.tv64 <= 0) | ||
709 | return now + 1; | ||
704 | 710 | ||
705 | tsdelta = ktime_to_timespec(hr_delta); | 711 | tsdelta = ktime_to_timespec(hr_delta); |
706 | now += timespec_to_jiffies(&tsdelta); | 712 | delta = timespec_to_jiffies(&tsdelta); |
713 | /* | ||
714 | * Take rounding errors in to account and make sure, that it | ||
715 | * expires in the next tick. Otherwise we go into an endless | ||
716 | * ping pong due to tick_nohz_stop_sched_tick() retriggering | ||
717 | * the timer softirq | ||
718 | */ | ||
719 | if (delta < 1) | ||
720 | delta = 1; | ||
721 | now += delta; | ||
707 | if (time_before(now, expires)) | 722 | if (time_before(now, expires)) |
708 | return now; | 723 | return now; |
709 | return expires; | 724 | return expires; |
@@ -1003,7 +1018,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
1003 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | 1018 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); |
1004 | 1019 | ||
1005 | /* Resume hrtimers */ | 1020 | /* Resume hrtimers */ |
1006 | clock_was_set(); | 1021 | hres_timers_resume(); |
1007 | 1022 | ||
1008 | return 0; | 1023 | return 0; |
1009 | } | 1024 | } |