diff options
Diffstat (limited to 'kernel')
49 files changed, 1885 insertions, 845 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 248e1c396f8b..4af15802ccd4 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz | |||
@@ -7,7 +7,7 @@ choice | |||
7 | default HZ_250 | 7 | default HZ_250 |
8 | help | 8 | help |
9 | Allows the configuration of the timer frequency. It is customary | 9 | Allows the configuration of the timer frequency. It is customary |
10 | to have the timer interrupt run at 1000 HZ but 100 HZ may be more | 10 | to have the timer interrupt run at 1000 Hz but 100 Hz may be more |
11 | beneficial for servers and NUMA systems that do not need to have | 11 | beneficial for servers and NUMA systems that do not need to have |
12 | a fast response for user interaction and that may experience bus | 12 | a fast response for user interaction and that may experience bus |
13 | contention and cacheline bounces as a result of timer interrupts. | 13 | contention and cacheline bounces as a result of timer interrupts. |
@@ -19,21 +19,30 @@ choice | |||
19 | config HZ_100 | 19 | config HZ_100 |
20 | bool "100 HZ" | 20 | bool "100 HZ" |
21 | help | 21 | help |
22 | 100 HZ is a typical choice for servers, SMP and NUMA systems | 22 | 100 Hz is a typical choice for servers, SMP and NUMA systems |
23 | with lots of processors that may show reduced performance if | 23 | with lots of processors that may show reduced performance if |
24 | too many timer interrupts are occurring. | 24 | too many timer interrupts are occurring. |
25 | 25 | ||
26 | config HZ_250 | 26 | config HZ_250 |
27 | bool "250 HZ" | 27 | bool "250 HZ" |
28 | help | 28 | help |
29 | 250 HZ is a good compromise choice allowing server performance | 29 | 250 Hz is a good compromise choice allowing server performance |
30 | while also showing good interactive responsiveness even | 30 | while also showing good interactive responsiveness even |
31 | on SMP and NUMA systems. | 31 | on SMP and NUMA systems. If you are going to be using NTSC video |
32 | or multimedia, selected 300Hz instead. | ||
33 | |||
34 | config HZ_300 | ||
35 | bool "300 HZ" | ||
36 | help | ||
37 | 300 Hz is a good compromise choice allowing server performance | ||
38 | while also showing good interactive responsiveness even | ||
39 | on SMP and NUMA systems and exactly dividing by both PAL and | ||
40 | NTSC frame rates for video and multimedia work. | ||
32 | 41 | ||
33 | config HZ_1000 | 42 | config HZ_1000 |
34 | bool "1000 HZ" | 43 | bool "1000 HZ" |
35 | help | 44 | help |
36 | 1000 HZ is the preferred choice for desktop systems and other | 45 | 1000 Hz is the preferred choice for desktop systems and other |
37 | systems requiring fast interactive responses to events. | 46 | systems requiring fast interactive responses to events. |
38 | 47 | ||
39 | endchoice | 48 | endchoice |
@@ -42,5 +51,6 @@ config HZ | |||
42 | int | 51 | int |
43 | default 100 if HZ_100 | 52 | default 100 if HZ_100 |
44 | default 250 if HZ_250 | 53 | default 250 if HZ_250 |
54 | default 300 if HZ_300 | ||
45 | default 1000 if HZ_1000 | 55 | default 1000 if HZ_1000 |
46 | 56 | ||
diff --git a/kernel/acct.c b/kernel/acct.c index 0aad5ca36a81..dc12db8600e7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -89,7 +89,8 @@ struct acct_glbs { | |||
89 | struct timer_list timer; | 89 | struct timer_list timer; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED}; | 92 | static struct acct_glbs acct_globals __cacheline_aligned = |
93 | {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; | ||
93 | 94 | ||
94 | /* | 95 | /* |
95 | * Called whenever the timer says to check the free space. | 96 | * Called whenever the timer says to check the free space. |
diff --git a/kernel/audit.c b/kernel/audit.c index 98106f6078b0..d9b690ac684b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/netlink.h> | 57 | #include <linux/netlink.h> |
58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
59 | #include <linux/inotify.h> | 59 | #include <linux/inotify.h> |
60 | #include <linux/freezer.h> | ||
60 | 61 | ||
61 | #include "audit.h" | 62 | #include "audit.h" |
62 | 63 | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 4f40d923af8e..2e896f8ae29e 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -636,10 +636,9 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule) | |||
636 | struct audit_rule *rule; | 636 | struct audit_rule *rule; |
637 | int i; | 637 | int i; |
638 | 638 | ||
639 | rule = kmalloc(sizeof(*rule), GFP_KERNEL); | 639 | rule = kzalloc(sizeof(*rule), GFP_KERNEL); |
640 | if (unlikely(!rule)) | 640 | if (unlikely(!rule)) |
641 | return NULL; | 641 | return NULL; |
642 | memset(rule, 0, sizeof(*rule)); | ||
643 | 642 | ||
644 | rule->flags = krule->flags | krule->listnr; | 643 | rule->flags = krule->flags | krule->listnr; |
645 | rule->action = krule->action; | 644 | rule->action = krule->action; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ab97e5101232..40722e26de98 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -731,7 +731,7 @@ static inline void audit_free_context(struct audit_context *context) | |||
731 | printk(KERN_ERR "audit: freed %d contexts\n", count); | 731 | printk(KERN_ERR "audit: freed %d contexts\n", count); |
732 | } | 732 | } |
733 | 733 | ||
734 | static void audit_log_task_context(struct audit_buffer *ab) | 734 | void audit_log_task_context(struct audit_buffer *ab) |
735 | { | 735 | { |
736 | char *ctx = NULL; | 736 | char *ctx = NULL; |
737 | ssize_t len = 0; | 737 | ssize_t len = 0; |
@@ -760,6 +760,8 @@ error_path: | |||
760 | return; | 760 | return; |
761 | } | 761 | } |
762 | 762 | ||
763 | EXPORT_SYMBOL(audit_log_task_context); | ||
764 | |||
763 | static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) | 765 | static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) |
764 | { | 766 | { |
765 | char name[sizeof(tsk->comm)]; | 767 | char name[sizeof(tsk->comm)]; |
@@ -1488,6 +1490,8 @@ uid_t audit_get_loginuid(struct audit_context *ctx) | |||
1488 | return ctx ? ctx->loginuid : -1; | 1490 | return ctx ? ctx->loginuid : -1; |
1489 | } | 1491 | } |
1490 | 1492 | ||
1493 | EXPORT_SYMBOL(audit_get_loginuid); | ||
1494 | |||
1491 | /** | 1495 | /** |
1492 | * __audit_mq_open - record audit data for a POSIX MQ open | 1496 | * __audit_mq_open - record audit data for a POSIX MQ open |
1493 | * @oflag: open flag | 1497 | * @oflag: open flag |
diff --git a/kernel/configs.c b/kernel/configs.c index f9e31974f4ad..8fa1fb28f8a7 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf, | |||
75 | return count; | 75 | return count; |
76 | } | 76 | } |
77 | 77 | ||
78 | static struct file_operations ikconfig_file_ops = { | 78 | static const struct file_operations ikconfig_file_ops = { |
79 | .owner = THIS_MODULE, | 79 | .owner = THIS_MODULE, |
80 | .read = ikconfig_read_current, | 80 | .read = ikconfig_read_current, |
81 | }; | 81 | }; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 272254f20d97..9124669f4586 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -270,11 +270,7 @@ int disable_nonboot_cpus(void) | |||
270 | goto out; | 270 | goto out; |
271 | } | 271 | } |
272 | } | 272 | } |
273 | error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); | 273 | |
274 | if (error) { | ||
275 | printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); | ||
276 | goto out; | ||
277 | } | ||
278 | /* We take down all of the non-boot CPUs in one shot to avoid races | 274 | /* We take down all of the non-boot CPUs in one shot to avoid races |
279 | * with the userspace trying to use the CPU hotplug at the same time | 275 | * with the userspace trying to use the CPU hotplug at the same time |
280 | */ | 276 | */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6313c38c930e..0a6b4d89f9a0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -729,9 +729,11 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
729 | } | 729 | } |
730 | 730 | ||
731 | /* Remaining checks don't apply to root cpuset */ | 731 | /* Remaining checks don't apply to root cpuset */ |
732 | if ((par = cur->parent) == NULL) | 732 | if (cur == &top_cpuset) |
733 | return 0; | 733 | return 0; |
734 | 734 | ||
735 | par = cur->parent; | ||
736 | |||
735 | /* We must be a subset of our parent cpuset */ | 737 | /* We must be a subset of our parent cpuset */ |
736 | if (!is_cpuset_subset(trial, par)) | 738 | if (!is_cpuset_subset(trial, par)) |
737 | return -EACCES; | 739 | return -EACCES; |
@@ -1060,10 +1062,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | |||
1060 | cpu_exclusive_changed = | 1062 | cpu_exclusive_changed = |
1061 | (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); | 1063 | (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); |
1062 | mutex_lock(&callback_mutex); | 1064 | mutex_lock(&callback_mutex); |
1063 | if (turning_on) | 1065 | cs->flags = trialcs.flags; |
1064 | set_bit(bit, &cs->flags); | ||
1065 | else | ||
1066 | clear_bit(bit, &cs->flags); | ||
1067 | mutex_unlock(&callback_mutex); | 1066 | mutex_unlock(&callback_mutex); |
1068 | 1067 | ||
1069 | if (cpu_exclusive_changed) | 1068 | if (cpu_exclusive_changed) |
@@ -1281,7 +1280,8 @@ typedef enum { | |||
1281 | FILE_TASKLIST, | 1280 | FILE_TASKLIST, |
1282 | } cpuset_filetype_t; | 1281 | } cpuset_filetype_t; |
1283 | 1282 | ||
1284 | static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf, | 1283 | static ssize_t cpuset_common_file_write(struct file *file, |
1284 | const char __user *userbuf, | ||
1285 | size_t nbytes, loff_t *unused_ppos) | 1285 | size_t nbytes, loff_t *unused_ppos) |
1286 | { | 1286 | { |
1287 | struct cpuset *cs = __d_cs(file->f_dentry->d_parent); | 1287 | struct cpuset *cs = __d_cs(file->f_dentry->d_parent); |
@@ -1292,7 +1292,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
1292 | int retval = 0; | 1292 | int retval = 0; |
1293 | 1293 | ||
1294 | /* Crude upper limit on largest legitimate cpulist user might write. */ | 1294 | /* Crude upper limit on largest legitimate cpulist user might write. */ |
1295 | if (nbytes > 100 + 6 * NR_CPUS) | 1295 | if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES)) |
1296 | return -E2BIG; | 1296 | return -E2BIG; |
1297 | 1297 | ||
1298 | /* +1 for nul-terminator */ | 1298 | /* +1 for nul-terminator */ |
@@ -1532,7 +1532,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1532 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | 1532 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); |
1533 | } | 1533 | } |
1534 | 1534 | ||
1535 | static struct file_operations cpuset_file_operations = { | 1535 | static const struct file_operations cpuset_file_operations = { |
1536 | .read = cpuset_file_read, | 1536 | .read = cpuset_file_read, |
1537 | .write = cpuset_file_write, | 1537 | .write = cpuset_file_write, |
1538 | .llseek = generic_file_llseek, | 1538 | .llseek = generic_file_llseek, |
@@ -2045,7 +2045,6 @@ out: | |||
2045 | return err; | 2045 | return err; |
2046 | } | 2046 | } |
2047 | 2047 | ||
2048 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) | ||
2049 | /* | 2048 | /* |
2050 | * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs | 2049 | * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs |
2051 | * or memory nodes, we need to walk over the cpuset hierarchy, | 2050 | * or memory nodes, we need to walk over the cpuset hierarchy, |
@@ -2109,9 +2108,7 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
2109 | mutex_unlock(&callback_mutex); | 2108 | mutex_unlock(&callback_mutex); |
2110 | mutex_unlock(&manage_mutex); | 2109 | mutex_unlock(&manage_mutex); |
2111 | } | 2110 | } |
2112 | #endif | ||
2113 | 2111 | ||
2114 | #ifdef CONFIG_HOTPLUG_CPU | ||
2115 | /* | 2112 | /* |
2116 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | 2113 | * The top_cpuset tracks what CPUs and Memory Nodes are online, |
2117 | * period. This is necessary in order to make cpusets transparent | 2114 | * period. This is necessary in order to make cpusets transparent |
@@ -2128,7 +2125,6 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, | |||
2128 | common_cpu_mem_hotplug_unplug(); | 2125 | common_cpu_mem_hotplug_unplug(); |
2129 | return 0; | 2126 | return 0; |
2130 | } | 2127 | } |
2131 | #endif | ||
2132 | 2128 | ||
2133 | #ifdef CONFIG_MEMORY_HOTPLUG | 2129 | #ifdef CONFIG_MEMORY_HOTPLUG |
2134 | /* | 2130 | /* |
@@ -2610,7 +2606,7 @@ static int cpuset_open(struct inode *inode, struct file *file) | |||
2610 | return single_open(file, proc_cpuset_show, pid); | 2606 | return single_open(file, proc_cpuset_show, pid); |
2611 | } | 2607 | } |
2612 | 2608 | ||
2613 | struct file_operations proc_cpuset_operations = { | 2609 | const struct file_operations proc_cpuset_operations = { |
2614 | .open = cpuset_open, | 2610 | .open = cpuset_open, |
2615 | .read = seq_read, | 2611 | .read = seq_read, |
2616 | .llseek = seq_lseek, | 2612 | .llseek = seq_lseek, |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 66a0ea48751d..766d5912b26a 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
@@ -20,7 +20,7 @@ | |||
20 | #include <linux/delayacct.h> | 20 | #include <linux/delayacct.h> |
21 | 21 | ||
22 | int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ | 22 | int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ |
23 | kmem_cache_t *delayacct_cache; | 23 | struct kmem_cache *delayacct_cache; |
24 | 24 | ||
25 | static int __init delayacct_setup_disable(char *str) | 25 | static int __init delayacct_setup_disable(char *str) |
26 | { | 26 | { |
@@ -41,7 +41,7 @@ void delayacct_init(void) | |||
41 | 41 | ||
42 | void __delayacct_tsk_init(struct task_struct *tsk) | 42 | void __delayacct_tsk_init(struct task_struct *tsk) |
43 | { | 43 | { |
44 | tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); | 44 | tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); |
45 | if (tsk->delays) | 45 | if (tsk->delays) |
46 | spin_lock_init(&tsk->delays->lock); | 46 | spin_lock_init(&tsk->delays->lock); |
47 | } | 47 | } |
diff --git a/kernel/dma.c b/kernel/dma.c index 2020644c938a..937b13ca33ba 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -140,7 +140,7 @@ static int proc_dma_open(struct inode *inode, struct file *file) | |||
140 | return single_open(file, proc_dma_show, NULL); | 140 | return single_open(file, proc_dma_show, NULL); |
141 | } | 141 | } |
142 | 142 | ||
143 | static struct file_operations proc_dma_operations = { | 143 | static const struct file_operations proc_dma_operations = { |
144 | .open = proc_dma_open, | 144 | .open = proc_dma_open, |
145 | .read = seq_read, | 145 | .read = seq_read, |
146 | .llseek = seq_lseek, | 146 | .llseek = seq_lseek, |
diff --git a/kernel/exit.c b/kernel/exit.c index 06de6c4e8ca3..4e3f919edc48 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -850,9 +850,7 @@ static void exit_notify(struct task_struct *tsk) | |||
850 | fastcall NORET_TYPE void do_exit(long code) | 850 | fastcall NORET_TYPE void do_exit(long code) |
851 | { | 851 | { |
852 | struct task_struct *tsk = current; | 852 | struct task_struct *tsk = current; |
853 | struct taskstats *tidstats; | ||
854 | int group_dead; | 853 | int group_dead; |
855 | unsigned int mycpu; | ||
856 | 854 | ||
857 | profile_task_exit(tsk); | 855 | profile_task_exit(tsk); |
858 | 856 | ||
@@ -890,8 +888,6 @@ fastcall NORET_TYPE void do_exit(long code) | |||
890 | current->comm, current->pid, | 888 | current->comm, current->pid, |
891 | preempt_count()); | 889 | preempt_count()); |
892 | 890 | ||
893 | taskstats_exit_alloc(&tidstats, &mycpu); | ||
894 | |||
895 | acct_update_integrals(tsk); | 891 | acct_update_integrals(tsk); |
896 | if (tsk->mm) { | 892 | if (tsk->mm) { |
897 | update_hiwater_rss(tsk->mm); | 893 | update_hiwater_rss(tsk->mm); |
@@ -911,8 +907,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
911 | #endif | 907 | #endif |
912 | if (unlikely(tsk->audit_context)) | 908 | if (unlikely(tsk->audit_context)) |
913 | audit_free(tsk); | 909 | audit_free(tsk); |
914 | taskstats_exit_send(tsk, tidstats, group_dead, mycpu); | 910 | |
915 | taskstats_exit_free(tidstats); | 911 | taskstats_exit(tsk, group_dead); |
916 | 912 | ||
917 | exit_mm(tsk); | 913 | exit_mm(tsk); |
918 | 914 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 8cdd3e72ba55..7f2e31ba33af 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -82,26 +82,26 @@ int nr_processes(void) | |||
82 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 82 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
83 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | 83 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) |
84 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | 84 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) |
85 | static kmem_cache_t *task_struct_cachep; | 85 | static struct kmem_cache *task_struct_cachep; |
86 | #endif | 86 | #endif |
87 | 87 | ||
88 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 88 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
89 | static kmem_cache_t *signal_cachep; | 89 | static struct kmem_cache *signal_cachep; |
90 | 90 | ||
91 | /* SLAB cache for sighand_struct structures (tsk->sighand) */ | 91 | /* SLAB cache for sighand_struct structures (tsk->sighand) */ |
92 | kmem_cache_t *sighand_cachep; | 92 | struct kmem_cache *sighand_cachep; |
93 | 93 | ||
94 | /* SLAB cache for files_struct structures (tsk->files) */ | 94 | /* SLAB cache for files_struct structures (tsk->files) */ |
95 | kmem_cache_t *files_cachep; | 95 | struct kmem_cache *files_cachep; |
96 | 96 | ||
97 | /* SLAB cache for fs_struct structures (tsk->fs) */ | 97 | /* SLAB cache for fs_struct structures (tsk->fs) */ |
98 | kmem_cache_t *fs_cachep; | 98 | struct kmem_cache *fs_cachep; |
99 | 99 | ||
100 | /* SLAB cache for vm_area_struct structures */ | 100 | /* SLAB cache for vm_area_struct structures */ |
101 | kmem_cache_t *vm_area_cachep; | 101 | struct kmem_cache *vm_area_cachep; |
102 | 102 | ||
103 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 103 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
104 | static kmem_cache_t *mm_cachep; | 104 | static struct kmem_cache *mm_cachep; |
105 | 105 | ||
106 | void free_task(struct task_struct *tsk) | 106 | void free_task(struct task_struct *tsk) |
107 | { | 107 | { |
@@ -237,7 +237,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
237 | goto fail_nomem; | 237 | goto fail_nomem; |
238 | charge = len; | 238 | charge = len; |
239 | } | 239 | } |
240 | tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); | 240 | tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
241 | if (!tmp) | 241 | if (!tmp) |
242 | goto fail_nomem; | 242 | goto fail_nomem; |
243 | *tmp = *mpnt; | 243 | *tmp = *mpnt; |
@@ -319,7 +319,7 @@ static inline void mm_free_pgd(struct mm_struct * mm) | |||
319 | 319 | ||
320 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); | 320 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); |
321 | 321 | ||
322 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) | 322 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) |
323 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) | 323 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
324 | 324 | ||
325 | #include <linux/init_task.h> | 325 | #include <linux/init_task.h> |
@@ -448,7 +448,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
448 | tsk->vfork_done = NULL; | 448 | tsk->vfork_done = NULL; |
449 | complete(vfork_done); | 449 | complete(vfork_done); |
450 | } | 450 | } |
451 | if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) { | 451 | |
452 | /* | ||
453 | * If we're exiting normally, clear a user-space tid field if | ||
454 | * requested. We leave this alone when dying by signal, to leave | ||
455 | * the value intact in a core dump, and to save the unnecessary | ||
456 | * trouble otherwise. Userland only wants this done for a sys_exit. | ||
457 | */ | ||
458 | if (tsk->clear_child_tid | ||
459 | && !(tsk->flags & PF_SIGNALED) | ||
460 | && atomic_read(&mm->mm_users) > 1) { | ||
452 | u32 __user * tidptr = tsk->clear_child_tid; | 461 | u32 __user * tidptr = tsk->clear_child_tid; |
453 | tsk->clear_child_tid = NULL; | 462 | tsk->clear_child_tid = NULL; |
454 | 463 | ||
@@ -479,6 +488,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) | |||
479 | 488 | ||
480 | memcpy(mm, oldmm, sizeof(*mm)); | 489 | memcpy(mm, oldmm, sizeof(*mm)); |
481 | 490 | ||
491 | /* Initializing for Swap token stuff */ | ||
492 | mm->token_priority = 0; | ||
493 | mm->last_interval = 0; | ||
494 | |||
482 | if (!mm_init(mm)) | 495 | if (!mm_init(mm)) |
483 | goto fail_nomem; | 496 | goto fail_nomem; |
484 | 497 | ||
@@ -542,6 +555,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
542 | goto fail_nomem; | 555 | goto fail_nomem; |
543 | 556 | ||
544 | good_mm: | 557 | good_mm: |
558 | /* Initializing for Swap token stuff */ | ||
559 | mm->token_priority = 0; | ||
560 | mm->last_interval = 0; | ||
561 | |||
545 | tsk->mm = mm; | 562 | tsk->mm = mm; |
546 | tsk->active_mm = mm; | 563 | tsk->active_mm = mm; |
547 | return 0; | 564 | return 0; |
@@ -613,7 +630,7 @@ static struct files_struct *alloc_files(void) | |||
613 | struct files_struct *newf; | 630 | struct files_struct *newf; |
614 | struct fdtable *fdt; | 631 | struct fdtable *fdt; |
615 | 632 | ||
616 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | 633 | newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); |
617 | if (!newf) | 634 | if (!newf) |
618 | goto out; | 635 | goto out; |
619 | 636 | ||
@@ -830,7 +847,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
830 | if (clone_flags & CLONE_THREAD) { | 847 | if (clone_flags & CLONE_THREAD) { |
831 | atomic_inc(¤t->signal->count); | 848 | atomic_inc(¤t->signal->count); |
832 | atomic_inc(¤t->signal->live); | 849 | atomic_inc(¤t->signal->live); |
833 | taskstats_tgid_alloc(current); | ||
834 | return 0; | 850 | return 0; |
835 | } | 851 | } |
836 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 852 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
@@ -1303,7 +1319,7 @@ fork_out: | |||
1303 | return ERR_PTR(retval); | 1319 | return ERR_PTR(retval); |
1304 | } | 1320 | } |
1305 | 1321 | ||
1306 | struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) | 1322 | noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) |
1307 | { | 1323 | { |
1308 | memset(regs, 0, sizeof(struct pt_regs)); | 1324 | memset(regs, 0, sizeof(struct pt_regs)); |
1309 | return regs; | 1325 | return regs; |
@@ -1413,7 +1429,7 @@ long do_fork(unsigned long clone_flags, | |||
1413 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1429 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1414 | #endif | 1430 | #endif |
1415 | 1431 | ||
1416 | static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) | 1432 | static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) |
1417 | { | 1433 | { |
1418 | struct sighand_struct *sighand = data; | 1434 | struct sighand_struct *sighand = data; |
1419 | 1435 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 93ef30ba209f..95989a3b4168 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -282,9 +282,9 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | |||
282 | { | 282 | { |
283 | int ret; | 283 | int ret; |
284 | 284 | ||
285 | inc_preempt_count(); | 285 | pagefault_disable(); |
286 | ret = __copy_from_user_inatomic(dest, from, sizeof(u32)); | 286 | ret = __copy_from_user_inatomic(dest, from, sizeof(u32)); |
287 | dec_preempt_count(); | 287 | pagefault_enable(); |
288 | 288 | ||
289 | return ret ? -EFAULT : 0; | 289 | return ret ? -EFAULT : 0; |
290 | } | 290 | } |
@@ -324,12 +324,11 @@ static int refill_pi_state_cache(void) | |||
324 | if (likely(current->pi_state_cache)) | 324 | if (likely(current->pi_state_cache)) |
325 | return 0; | 325 | return 0; |
326 | 326 | ||
327 | pi_state = kmalloc(sizeof(*pi_state), GFP_KERNEL); | 327 | pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); |
328 | 328 | ||
329 | if (!pi_state) | 329 | if (!pi_state) |
330 | return -ENOMEM; | 330 | return -ENOMEM; |
331 | 331 | ||
332 | memset(pi_state, 0, sizeof(*pi_state)); | ||
333 | INIT_LIST_HEAD(&pi_state->list); | 332 | INIT_LIST_HEAD(&pi_state->list); |
334 | /* pi_mutex gets initialized later */ | 333 | /* pi_mutex gets initialized later */ |
335 | pi_state->owner = NULL; | 334 | pi_state->owner = NULL; |
@@ -553,7 +552,7 @@ static void wake_futex(struct futex_q *q) | |||
553 | * at the end of wake_up_all() does not prevent this store from | 552 | * at the end of wake_up_all() does not prevent this store from |
554 | * moving. | 553 | * moving. |
555 | */ | 554 | */ |
556 | wmb(); | 555 | smp_wmb(); |
557 | q->lock_ptr = NULL; | 556 | q->lock_ptr = NULL; |
558 | } | 557 | } |
559 | 558 | ||
@@ -585,9 +584,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
585 | if (!(uval & FUTEX_OWNER_DIED)) { | 584 | if (!(uval & FUTEX_OWNER_DIED)) { |
586 | newval = FUTEX_WAITERS | new_owner->pid; | 585 | newval = FUTEX_WAITERS | new_owner->pid; |
587 | 586 | ||
588 | inc_preempt_count(); | 587 | pagefault_disable(); |
589 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 588 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
590 | dec_preempt_count(); | 589 | pagefault_enable(); |
591 | if (curval == -EFAULT) | 590 | if (curval == -EFAULT) |
592 | return -EFAULT; | 591 | return -EFAULT; |
593 | if (curval != uval) | 592 | if (curval != uval) |
@@ -618,9 +617,9 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | |||
618 | * There is no waiter, so we unlock the futex. The owner died | 617 | * There is no waiter, so we unlock the futex. The owner died |
619 | * bit has not to be preserved here. We are the owner: | 618 | * bit has not to be preserved here. We are the owner: |
620 | */ | 619 | */ |
621 | inc_preempt_count(); | 620 | pagefault_disable(); |
622 | oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); | 621 | oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); |
623 | dec_preempt_count(); | 622 | pagefault_enable(); |
624 | 623 | ||
625 | if (oldval == -EFAULT) | 624 | if (oldval == -EFAULT) |
626 | return oldval; | 625 | return oldval; |
@@ -1158,9 +1157,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1158 | */ | 1157 | */ |
1159 | newval = current->pid; | 1158 | newval = current->pid; |
1160 | 1159 | ||
1161 | inc_preempt_count(); | 1160 | pagefault_disable(); |
1162 | curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); | 1161 | curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); |
1163 | dec_preempt_count(); | 1162 | pagefault_enable(); |
1164 | 1163 | ||
1165 | if (unlikely(curval == -EFAULT)) | 1164 | if (unlikely(curval == -EFAULT)) |
1166 | goto uaddr_faulted; | 1165 | goto uaddr_faulted; |
@@ -1183,9 +1182,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1183 | uval = curval; | 1182 | uval = curval; |
1184 | newval = uval | FUTEX_WAITERS; | 1183 | newval = uval | FUTEX_WAITERS; |
1185 | 1184 | ||
1186 | inc_preempt_count(); | 1185 | pagefault_disable(); |
1187 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 1186 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
1188 | dec_preempt_count(); | 1187 | pagefault_enable(); |
1189 | 1188 | ||
1190 | if (unlikely(curval == -EFAULT)) | 1189 | if (unlikely(curval == -EFAULT)) |
1191 | goto uaddr_faulted; | 1190 | goto uaddr_faulted; |
@@ -1215,10 +1214,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1215 | newval = current->pid | | 1214 | newval = current->pid | |
1216 | FUTEX_OWNER_DIED | FUTEX_WAITERS; | 1215 | FUTEX_OWNER_DIED | FUTEX_WAITERS; |
1217 | 1216 | ||
1218 | inc_preempt_count(); | 1217 | pagefault_disable(); |
1219 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | 1218 | curval = futex_atomic_cmpxchg_inatomic(uaddr, |
1220 | uval, newval); | 1219 | uval, newval); |
1221 | dec_preempt_count(); | 1220 | pagefault_enable(); |
1222 | 1221 | ||
1223 | if (unlikely(curval == -EFAULT)) | 1222 | if (unlikely(curval == -EFAULT)) |
1224 | goto uaddr_faulted; | 1223 | goto uaddr_faulted; |
@@ -1390,9 +1389,9 @@ retry_locked: | |||
1390 | * anyone else up: | 1389 | * anyone else up: |
1391 | */ | 1390 | */ |
1392 | if (!(uval & FUTEX_OWNER_DIED)) { | 1391 | if (!(uval & FUTEX_OWNER_DIED)) { |
1393 | inc_preempt_count(); | 1392 | pagefault_disable(); |
1394 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1393 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); |
1395 | dec_preempt_count(); | 1394 | pagefault_enable(); |
1396 | } | 1395 | } |
1397 | 1396 | ||
1398 | if (unlikely(uval == -EFAULT)) | 1397 | if (unlikely(uval == -EFAULT)) |
@@ -1493,7 +1492,7 @@ static unsigned int futex_poll(struct file *filp, | |||
1493 | return ret; | 1492 | return ret; |
1494 | } | 1493 | } |
1495 | 1494 | ||
1496 | static struct file_operations futex_fops = { | 1495 | static const struct file_operations futex_fops = { |
1497 | .release = futex_close, | 1496 | .release = futex_close, |
1498 | .poll = futex_poll, | 1497 | .poll = futex_poll, |
1499 | }; | 1498 | }; |
@@ -1858,10 +1857,16 @@ static struct file_system_type futex_fs_type = { | |||
1858 | 1857 | ||
1859 | static int __init init(void) | 1858 | static int __init init(void) |
1860 | { | 1859 | { |
1861 | unsigned int i; | 1860 | int i = register_filesystem(&futex_fs_type); |
1861 | |||
1862 | if (i) | ||
1863 | return i; | ||
1862 | 1864 | ||
1863 | register_filesystem(&futex_fs_type); | ||
1864 | futex_mnt = kern_mount(&futex_fs_type); | 1865 | futex_mnt = kern_mount(&futex_fs_type); |
1866 | if (IS_ERR(futex_mnt)) { | ||
1867 | unregister_filesystem(&futex_fs_type); | ||
1868 | return PTR_ERR(futex_mnt); | ||
1869 | } | ||
1865 | 1870 | ||
1866 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 1871 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |
1867 | INIT_LIST_HEAD(&futex_queues[i].chain); | 1872 | INIT_LIST_HEAD(&futex_queues[i].chain); |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a681912bc89a..aff1f0fabb0d 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -54,7 +54,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = { | |||
54 | .chip = &no_irq_chip, | 54 | .chip = &no_irq_chip, |
55 | .handle_irq = handle_bad_irq, | 55 | .handle_irq = handle_bad_irq, |
56 | .depth = 1, | 56 | .depth = 1, |
57 | .lock = SPIN_LOCK_UNLOCKED, | 57 | .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), |
58 | #ifdef CONFIG_SMP | 58 | #ifdef CONFIG_SMP |
59 | .affinity = CPU_MASK_ALL | 59 | .affinity = CPU_MASK_ALL |
60 | #endif | 60 | #endif |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index eeac3e313b2b..ab63cfc42992 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include <linux/sched.h> /* for cond_resched */ | 21 | #include <linux/sched.h> /* for cond_resched */ |
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/ctype.h> | ||
23 | 24 | ||
24 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
25 | 26 | ||
@@ -301,13 +302,6 @@ struct kallsym_iter | |||
301 | char name[KSYM_NAME_LEN+1]; | 302 | char name[KSYM_NAME_LEN+1]; |
302 | }; | 303 | }; |
303 | 304 | ||
304 | /* Only label it "global" if it is exported. */ | ||
305 | static void upcase_if_global(struct kallsym_iter *iter) | ||
306 | { | ||
307 | if (is_exported(iter->name, iter->owner)) | ||
308 | iter->type += 'A' - 'a'; | ||
309 | } | ||
310 | |||
311 | static int get_ksymbol_mod(struct kallsym_iter *iter) | 305 | static int get_ksymbol_mod(struct kallsym_iter *iter) |
312 | { | 306 | { |
313 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, | 307 | iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms, |
@@ -316,7 +310,10 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) | |||
316 | if (iter->owner == NULL) | 310 | if (iter->owner == NULL) |
317 | return 0; | 311 | return 0; |
318 | 312 | ||
319 | upcase_if_global(iter); | 313 | /* Label it "global" if it is exported, "local" if not exported. */ |
314 | iter->type = is_exported(iter->name, iter->owner) | ||
315 | ? toupper(iter->type) : tolower(iter->type); | ||
316 | |||
320 | return 1; | 317 | return 1; |
321 | } | 318 | } |
322 | 319 | ||
@@ -401,7 +398,7 @@ static int s_show(struct seq_file *m, void *p) | |||
401 | return 0; | 398 | return 0; |
402 | } | 399 | } |
403 | 400 | ||
404 | static struct seq_operations kallsyms_op = { | 401 | static const struct seq_operations kallsyms_op = { |
405 | .start = s_start, | 402 | .start = s_start, |
406 | .next = s_next, | 403 | .next = s_next, |
407 | .stop = s_stop, | 404 | .stop = s_stop, |
@@ -436,7 +433,7 @@ static int kallsyms_release(struct inode *inode, struct file *file) | |||
436 | return seq_release(inode, file); | 433 | return seq_release(inode, file); |
437 | } | 434 | } |
438 | 435 | ||
439 | static struct file_operations kallsyms_operations = { | 436 | static const struct file_operations kallsyms_operations = { |
440 | .open = kallsyms_open, | 437 | .open = kallsyms_open, |
441 | .read = seq_read, | 438 | .read = seq_read, |
442 | .llseek = seq_lseek, | 439 | .llseek = seq_lseek, |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 05aada293592..2a59c8a01ae0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/syscalls.h> | 20 | #include <linux/syscalls.h> |
21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/elf.h> | ||
24 | #include <linux/elfcore.h> | ||
23 | 25 | ||
24 | #include <asm/page.h> | 26 | #include <asm/page.h> |
25 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
@@ -108,11 +110,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, | |||
108 | 110 | ||
109 | /* Allocate a controlling structure */ | 111 | /* Allocate a controlling structure */ |
110 | result = -ENOMEM; | 112 | result = -ENOMEM; |
111 | image = kmalloc(sizeof(*image), GFP_KERNEL); | 113 | image = kzalloc(sizeof(*image), GFP_KERNEL); |
112 | if (!image) | 114 | if (!image) |
113 | goto out; | 115 | goto out; |
114 | 116 | ||
115 | memset(image, 0, sizeof(*image)); | ||
116 | image->head = 0; | 117 | image->head = 0; |
117 | image->entry = &image->head; | 118 | image->entry = &image->head; |
118 | image->last_entry = &image->head; | 119 | image->last_entry = &image->head; |
@@ -1068,6 +1069,60 @@ void crash_kexec(struct pt_regs *regs) | |||
1068 | } | 1069 | } |
1069 | } | 1070 | } |
1070 | 1071 | ||
1072 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | ||
1073 | size_t data_len) | ||
1074 | { | ||
1075 | struct elf_note note; | ||
1076 | |||
1077 | note.n_namesz = strlen(name) + 1; | ||
1078 | note.n_descsz = data_len; | ||
1079 | note.n_type = type; | ||
1080 | memcpy(buf, ¬e, sizeof(note)); | ||
1081 | buf += (sizeof(note) + 3)/4; | ||
1082 | memcpy(buf, name, note.n_namesz); | ||
1083 | buf += (note.n_namesz + 3)/4; | ||
1084 | memcpy(buf, data, note.n_descsz); | ||
1085 | buf += (note.n_descsz + 3)/4; | ||
1086 | |||
1087 | return buf; | ||
1088 | } | ||
1089 | |||
1090 | static void final_note(u32 *buf) | ||
1091 | { | ||
1092 | struct elf_note note; | ||
1093 | |||
1094 | note.n_namesz = 0; | ||
1095 | note.n_descsz = 0; | ||
1096 | note.n_type = 0; | ||
1097 | memcpy(buf, ¬e, sizeof(note)); | ||
1098 | } | ||
1099 | |||
1100 | void crash_save_cpu(struct pt_regs *regs, int cpu) | ||
1101 | { | ||
1102 | struct elf_prstatus prstatus; | ||
1103 | u32 *buf; | ||
1104 | |||
1105 | if ((cpu < 0) || (cpu >= NR_CPUS)) | ||
1106 | return; | ||
1107 | |||
1108 | /* Using ELF notes here is opportunistic. | ||
1109 | * I need a well defined structure format | ||
1110 | * for the data I pass, and I need tags | ||
1111 | * on the data to indicate what information I have | ||
1112 | * squirrelled away. ELF notes happen to provide | ||
1113 | * all of that, so there is no need to invent something new. | ||
1114 | */ | ||
1115 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); | ||
1116 | if (!buf) | ||
1117 | return; | ||
1118 | memset(&prstatus, 0, sizeof(prstatus)); | ||
1119 | prstatus.pr_pid = current->pid; | ||
1120 | elf_core_copy_regs(&prstatus.pr_reg, regs); | ||
1121 | buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, | ||
1122 | sizeof(prstatus)); | ||
1123 | final_note(buf); | ||
1124 | } | ||
1125 | |||
1071 | static int __init crash_notes_memory_init(void) | 1126 | static int __init crash_notes_memory_init(void) |
1072 | { | 1127 | { |
1073 | /* Allocate memory for saving cpu registers. */ | 1128 | /* Allocate memory for saving cpu registers. */ |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 610c837ad9e0..17ec4afb0994 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/module.h> | 38 | #include <linux/module.h> |
39 | #include <linux/moduleloader.h> | 39 | #include <linux/moduleloader.h> |
40 | #include <linux/kallsyms.h> | 40 | #include <linux/kallsyms.h> |
41 | #include <linux/freezer.h> | ||
41 | #include <asm-generic/sections.h> | 42 | #include <asm-generic/sections.h> |
42 | #include <asm/cacheflush.h> | 43 | #include <asm/cacheflush.h> |
43 | #include <asm/errno.h> | 44 | #include <asm/errno.h> |
@@ -83,9 +84,36 @@ struct kprobe_insn_page { | |||
83 | kprobe_opcode_t *insns; /* Page of instruction slots */ | 84 | kprobe_opcode_t *insns; /* Page of instruction slots */ |
84 | char slot_used[INSNS_PER_PAGE]; | 85 | char slot_used[INSNS_PER_PAGE]; |
85 | int nused; | 86 | int nused; |
87 | int ngarbage; | ||
86 | }; | 88 | }; |
87 | 89 | ||
88 | static struct hlist_head kprobe_insn_pages; | 90 | static struct hlist_head kprobe_insn_pages; |
91 | static int kprobe_garbage_slots; | ||
92 | static int collect_garbage_slots(void); | ||
93 | |||
94 | static int __kprobes check_safety(void) | ||
95 | { | ||
96 | int ret = 0; | ||
97 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) | ||
98 | ret = freeze_processes(); | ||
99 | if (ret == 0) { | ||
100 | struct task_struct *p, *q; | ||
101 | do_each_thread(p, q) { | ||
102 | if (p != current && p->state == TASK_RUNNING && | ||
103 | p->pid != 0) { | ||
104 | printk("Check failed: %s is running\n",p->comm); | ||
105 | ret = -1; | ||
106 | goto loop_end; | ||
107 | } | ||
108 | } while_each_thread(p, q); | ||
109 | } | ||
110 | loop_end: | ||
111 | thaw_processes(); | ||
112 | #else | ||
113 | synchronize_sched(); | ||
114 | #endif | ||
115 | return ret; | ||
116 | } | ||
89 | 117 | ||
90 | /** | 118 | /** |
91 | * get_insn_slot() - Find a slot on an executable page for an instruction. | 119 | * get_insn_slot() - Find a slot on an executable page for an instruction. |
@@ -96,6 +124,7 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) | |||
96 | struct kprobe_insn_page *kip; | 124 | struct kprobe_insn_page *kip; |
97 | struct hlist_node *pos; | 125 | struct hlist_node *pos; |
98 | 126 | ||
127 | retry: | ||
99 | hlist_for_each(pos, &kprobe_insn_pages) { | 128 | hlist_for_each(pos, &kprobe_insn_pages) { |
100 | kip = hlist_entry(pos, struct kprobe_insn_page, hlist); | 129 | kip = hlist_entry(pos, struct kprobe_insn_page, hlist); |
101 | if (kip->nused < INSNS_PER_PAGE) { | 130 | if (kip->nused < INSNS_PER_PAGE) { |
@@ -112,7 +141,11 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) | |||
112 | } | 141 | } |
113 | } | 142 | } |
114 | 143 | ||
115 | /* All out of space. Need to allocate a new page. Use slot 0.*/ | 144 | /* If there are any garbage slots, collect it and try again. */ |
145 | if (kprobe_garbage_slots && collect_garbage_slots() == 0) { | ||
146 | goto retry; | ||
147 | } | ||
148 | /* All out of space. Need to allocate a new page. Use slot 0. */ | ||
116 | kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); | 149 | kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); |
117 | if (!kip) { | 150 | if (!kip) { |
118 | return NULL; | 151 | return NULL; |
@@ -133,10 +166,62 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) | |||
133 | memset(kip->slot_used, 0, INSNS_PER_PAGE); | 166 | memset(kip->slot_used, 0, INSNS_PER_PAGE); |
134 | kip->slot_used[0] = 1; | 167 | kip->slot_used[0] = 1; |
135 | kip->nused = 1; | 168 | kip->nused = 1; |
169 | kip->ngarbage = 0; | ||
136 | return kip->insns; | 170 | return kip->insns; |
137 | } | 171 | } |
138 | 172 | ||
139 | void __kprobes free_insn_slot(kprobe_opcode_t *slot) | 173 | /* Return 1 if all garbages are collected, otherwise 0. */ |
174 | static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) | ||
175 | { | ||
176 | kip->slot_used[idx] = 0; | ||
177 | kip->nused--; | ||
178 | if (kip->nused == 0) { | ||
179 | /* | ||
180 | * Page is no longer in use. Free it unless | ||
181 | * it's the last one. We keep the last one | ||
182 | * so as not to have to set it up again the | ||
183 | * next time somebody inserts a probe. | ||
184 | */ | ||
185 | hlist_del(&kip->hlist); | ||
186 | if (hlist_empty(&kprobe_insn_pages)) { | ||
187 | INIT_HLIST_NODE(&kip->hlist); | ||
188 | hlist_add_head(&kip->hlist, | ||
189 | &kprobe_insn_pages); | ||
190 | } else { | ||
191 | module_free(NULL, kip->insns); | ||
192 | kfree(kip); | ||
193 | } | ||
194 | return 1; | ||
195 | } | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | static int __kprobes collect_garbage_slots(void) | ||
200 | { | ||
201 | struct kprobe_insn_page *kip; | ||
202 | struct hlist_node *pos, *next; | ||
203 | |||
204 | /* Ensure no-one is preepmted on the garbages */ | ||
205 | if (check_safety() != 0) | ||
206 | return -EAGAIN; | ||
207 | |||
208 | hlist_for_each_safe(pos, next, &kprobe_insn_pages) { | ||
209 | int i; | ||
210 | kip = hlist_entry(pos, struct kprobe_insn_page, hlist); | ||
211 | if (kip->ngarbage == 0) | ||
212 | continue; | ||
213 | kip->ngarbage = 0; /* we will collect all garbages */ | ||
214 | for (i = 0; i < INSNS_PER_PAGE; i++) { | ||
215 | if (kip->slot_used[i] == -1 && | ||
216 | collect_one_slot(kip, i)) | ||
217 | break; | ||
218 | } | ||
219 | } | ||
220 | kprobe_garbage_slots = 0; | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) | ||
140 | { | 225 | { |
141 | struct kprobe_insn_page *kip; | 226 | struct kprobe_insn_page *kip; |
142 | struct hlist_node *pos; | 227 | struct hlist_node *pos; |
@@ -146,28 +231,18 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) | |||
146 | if (kip->insns <= slot && | 231 | if (kip->insns <= slot && |
147 | slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { | 232 | slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { |
148 | int i = (slot - kip->insns) / MAX_INSN_SIZE; | 233 | int i = (slot - kip->insns) / MAX_INSN_SIZE; |
149 | kip->slot_used[i] = 0; | 234 | if (dirty) { |
150 | kip->nused--; | 235 | kip->slot_used[i] = -1; |
151 | if (kip->nused == 0) { | 236 | kip->ngarbage++; |
152 | /* | 237 | } else { |
153 | * Page is no longer in use. Free it unless | 238 | collect_one_slot(kip, i); |
154 | * it's the last one. We keep the last one | ||
155 | * so as not to have to set it up again the | ||
156 | * next time somebody inserts a probe. | ||
157 | */ | ||
158 | hlist_del(&kip->hlist); | ||
159 | if (hlist_empty(&kprobe_insn_pages)) { | ||
160 | INIT_HLIST_NODE(&kip->hlist); | ||
161 | hlist_add_head(&kip->hlist, | ||
162 | &kprobe_insn_pages); | ||
163 | } else { | ||
164 | module_free(NULL, kip->insns); | ||
165 | kfree(kip); | ||
166 | } | ||
167 | } | 239 | } |
168 | return; | 240 | break; |
169 | } | 241 | } |
170 | } | 242 | } |
243 | if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) { | ||
244 | collect_garbage_slots(); | ||
245 | } | ||
171 | } | 246 | } |
172 | #endif | 247 | #endif |
173 | 248 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c9fefdb1a7db..b02032476dc2 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -140,13 +140,6 @@ void lockdep_on(void) | |||
140 | 140 | ||
141 | EXPORT_SYMBOL(lockdep_on); | 141 | EXPORT_SYMBOL(lockdep_on); |
142 | 142 | ||
143 | int lockdep_internal(void) | ||
144 | { | ||
145 | return current->lockdep_recursion != 0; | ||
146 | } | ||
147 | |||
148 | EXPORT_SYMBOL(lockdep_internal); | ||
149 | |||
150 | /* | 143 | /* |
151 | * Debugging switches: | 144 | * Debugging switches: |
152 | */ | 145 | */ |
@@ -228,17 +221,15 @@ static int save_trace(struct stack_trace *trace) | |||
228 | trace->skip = 3; | 221 | trace->skip = 3; |
229 | trace->all_contexts = 0; | 222 | trace->all_contexts = 0; |
230 | 223 | ||
231 | /* Make sure to not recurse in case the the unwinder needs to tak | ||
232 | e locks. */ | ||
233 | lockdep_off(); | ||
234 | save_stack_trace(trace, NULL); | 224 | save_stack_trace(trace, NULL); |
235 | lockdep_on(); | ||
236 | 225 | ||
237 | trace->max_entries = trace->nr_entries; | 226 | trace->max_entries = trace->nr_entries; |
238 | 227 | ||
239 | nr_stack_trace_entries += trace->nr_entries; | 228 | nr_stack_trace_entries += trace->nr_entries; |
240 | if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) | 229 | if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) { |
230 | __raw_spin_unlock(&hash_lock); | ||
241 | return 0; | 231 | return 0; |
232 | } | ||
242 | 233 | ||
243 | if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { | 234 | if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { |
244 | __raw_spin_unlock(&hash_lock); | 235 | __raw_spin_unlock(&hash_lock); |
@@ -357,7 +348,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4 | |||
357 | 348 | ||
358 | static void print_lock_name(struct lock_class *class) | 349 | static void print_lock_name(struct lock_class *class) |
359 | { | 350 | { |
360 | char str[128], c1, c2, c3, c4; | 351 | char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4; |
361 | const char *name; | 352 | const char *name; |
362 | 353 | ||
363 | get_usage_chars(class, &c1, &c2, &c3, &c4); | 354 | get_usage_chars(class, &c1, &c2, &c3, &c4); |
@@ -379,7 +370,7 @@ static void print_lock_name(struct lock_class *class) | |||
379 | static void print_lockdep_cache(struct lockdep_map *lock) | 370 | static void print_lockdep_cache(struct lockdep_map *lock) |
380 | { | 371 | { |
381 | const char *name; | 372 | const char *name; |
382 | char str[128]; | 373 | char str[KSYM_NAME_LEN + 1]; |
383 | 374 | ||
384 | name = lock->name; | 375 | name = lock->name; |
385 | if (!name) | 376 | if (!name) |
@@ -449,7 +440,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth) | |||
449 | print_lock_class_header(class, depth); | 440 | print_lock_class_header(class, depth); |
450 | 441 | ||
451 | list_for_each_entry(entry, &class->locks_after, entry) { | 442 | list_for_each_entry(entry, &class->locks_after, entry) { |
452 | DEBUG_LOCKS_WARN_ON(!entry->class); | 443 | if (DEBUG_LOCKS_WARN_ON(!entry->class)) |
444 | return; | ||
445 | |||
453 | print_lock_dependencies(entry->class, depth + 1); | 446 | print_lock_dependencies(entry->class, depth + 1); |
454 | 447 | ||
455 | printk("%*s ... acquired at:\n",depth,""); | 448 | printk("%*s ... acquired at:\n",depth,""); |
@@ -474,7 +467,8 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | |||
474 | return 0; | 467 | return 0; |
475 | 468 | ||
476 | entry->class = this; | 469 | entry->class = this; |
477 | save_trace(&entry->trace); | 470 | if (!save_trace(&entry->trace)) |
471 | return 0; | ||
478 | 472 | ||
479 | /* | 473 | /* |
480 | * Since we never remove from the dependency list, the list can | 474 | * Since we never remove from the dependency list, the list can |
@@ -562,8 +556,12 @@ static noinline int print_circular_bug_tail(void) | |||
562 | if (debug_locks_silent) | 556 | if (debug_locks_silent) |
563 | return 0; | 557 | return 0; |
564 | 558 | ||
559 | /* hash_lock unlocked by the header */ | ||
560 | __raw_spin_lock(&hash_lock); | ||
565 | this.class = check_source->class; | 561 | this.class = check_source->class; |
566 | save_trace(&this.trace); | 562 | if (!save_trace(&this.trace)) |
563 | return 0; | ||
564 | __raw_spin_unlock(&hash_lock); | ||
567 | print_circular_bug_entry(&this, 0); | 565 | print_circular_bug_entry(&this, 0); |
568 | 566 | ||
569 | printk("\nother info that might help us debug this:\n\n"); | 567 | printk("\nother info that might help us debug this:\n\n"); |
@@ -966,14 +964,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
966 | &prev->class->locks_after, next->acquire_ip); | 964 | &prev->class->locks_after, next->acquire_ip); |
967 | if (!ret) | 965 | if (!ret) |
968 | return 0; | 966 | return 0; |
969 | /* | 967 | |
970 | * Return value of 2 signals 'dependency already added', | ||
971 | * in that case we dont have to add the backlink either. | ||
972 | */ | ||
973 | if (ret == 2) | ||
974 | return 2; | ||
975 | ret = add_lock_to_list(next->class, prev->class, | 968 | ret = add_lock_to_list(next->class, prev->class, |
976 | &next->class->locks_before, next->acquire_ip); | 969 | &next->class->locks_before, next->acquire_ip); |
970 | if (!ret) | ||
971 | return 0; | ||
977 | 972 | ||
978 | /* | 973 | /* |
979 | * Debugging printouts: | 974 | * Debugging printouts: |
@@ -1025,7 +1020,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
1025 | * added: | 1020 | * added: |
1026 | */ | 1021 | */ |
1027 | if (hlock->read != 2) { | 1022 | if (hlock->read != 2) { |
1028 | check_prev_add(curr, hlock, next); | 1023 | if (!check_prev_add(curr, hlock, next)) |
1024 | return 0; | ||
1029 | /* | 1025 | /* |
1030 | * Stop after the first non-trylock entry, | 1026 | * Stop after the first non-trylock entry, |
1031 | * as non-trylock entries have added their | 1027 | * as non-trylock entries have added their |
@@ -1182,6 +1178,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
1182 | struct lockdep_subclass_key *key; | 1178 | struct lockdep_subclass_key *key; |
1183 | struct list_head *hash_head; | 1179 | struct list_head *hash_head; |
1184 | struct lock_class *class; | 1180 | struct lock_class *class; |
1181 | unsigned long flags; | ||
1185 | 1182 | ||
1186 | class = look_up_lock_class(lock, subclass); | 1183 | class = look_up_lock_class(lock, subclass); |
1187 | if (likely(class)) | 1184 | if (likely(class)) |
@@ -1203,6 +1200,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
1203 | key = lock->key->subkeys + subclass; | 1200 | key = lock->key->subkeys + subclass; |
1204 | hash_head = classhashentry(key); | 1201 | hash_head = classhashentry(key); |
1205 | 1202 | ||
1203 | raw_local_irq_save(flags); | ||
1206 | __raw_spin_lock(&hash_lock); | 1204 | __raw_spin_lock(&hash_lock); |
1207 | /* | 1205 | /* |
1208 | * We have to do the hash-walk again, to avoid races | 1206 | * We have to do the hash-walk again, to avoid races |
@@ -1217,6 +1215,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
1217 | */ | 1215 | */ |
1218 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | 1216 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { |
1219 | __raw_spin_unlock(&hash_lock); | 1217 | __raw_spin_unlock(&hash_lock); |
1218 | raw_local_irq_restore(flags); | ||
1220 | debug_locks_off(); | 1219 | debug_locks_off(); |
1221 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | 1220 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); |
1222 | printk("turning off the locking correctness validator.\n"); | 1221 | printk("turning off the locking correctness validator.\n"); |
@@ -1239,15 +1238,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
1239 | 1238 | ||
1240 | if (verbose(class)) { | 1239 | if (verbose(class)) { |
1241 | __raw_spin_unlock(&hash_lock); | 1240 | __raw_spin_unlock(&hash_lock); |
1241 | raw_local_irq_restore(flags); | ||
1242 | printk("\nnew class %p: %s", class->key, class->name); | 1242 | printk("\nnew class %p: %s", class->key, class->name); |
1243 | if (class->name_version > 1) | 1243 | if (class->name_version > 1) |
1244 | printk("#%d", class->name_version); | 1244 | printk("#%d", class->name_version); |
1245 | printk("\n"); | 1245 | printk("\n"); |
1246 | dump_stack(); | 1246 | dump_stack(); |
1247 | raw_local_irq_save(flags); | ||
1247 | __raw_spin_lock(&hash_lock); | 1248 | __raw_spin_lock(&hash_lock); |
1248 | } | 1249 | } |
1249 | out_unlock_set: | 1250 | out_unlock_set: |
1250 | __raw_spin_unlock(&hash_lock); | 1251 | __raw_spin_unlock(&hash_lock); |
1252 | raw_local_irq_restore(flags); | ||
1251 | 1253 | ||
1252 | if (!subclass || force) | 1254 | if (!subclass || force) |
1253 | lock->class_cache = class; | 1255 | lock->class_cache = class; |
@@ -1728,6 +1730,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
1728 | debug_atomic_dec(&nr_unused_locks); | 1730 | debug_atomic_dec(&nr_unused_locks); |
1729 | break; | 1731 | break; |
1730 | default: | 1732 | default: |
1733 | __raw_spin_unlock(&hash_lock); | ||
1731 | debug_locks_off(); | 1734 | debug_locks_off(); |
1732 | WARN_ON(1); | 1735 | WARN_ON(1); |
1733 | return 0; | 1736 | return 0; |
@@ -2645,6 +2648,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | |||
2645 | } | 2648 | } |
2646 | local_irq_restore(flags); | 2649 | local_irq_restore(flags); |
2647 | } | 2650 | } |
2651 | EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); | ||
2648 | 2652 | ||
2649 | static void print_held_locks_bug(struct task_struct *curr) | 2653 | static void print_held_locks_bug(struct task_struct *curr) |
2650 | { | 2654 | { |
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index eab043c83bb2..8ce09bc4613d 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #define MAX_LOCKDEP_KEYS_BITS 11 | 20 | #define MAX_LOCKDEP_KEYS_BITS 11 |
21 | #define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) | 21 | #define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) |
22 | 22 | ||
23 | #define MAX_LOCKDEP_CHAINS_BITS 13 | 23 | #define MAX_LOCKDEP_CHAINS_BITS 14 |
24 | #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) | 24 | #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) |
25 | 25 | ||
26 | /* | 26 | /* |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index f6e72eaab3fa..b554b40a4aa6 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -113,7 +113,7 @@ static int l_show(struct seq_file *m, void *v) | |||
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
115 | 115 | ||
116 | static struct seq_operations lockdep_ops = { | 116 | static const struct seq_operations lockdep_ops = { |
117 | .start = l_start, | 117 | .start = l_start, |
118 | .next = l_next, | 118 | .next = l_next, |
119 | .stop = l_stop, | 119 | .stop = l_stop, |
@@ -135,7 +135,7 @@ static int lockdep_open(struct inode *inode, struct file *file) | |||
135 | return res; | 135 | return res; |
136 | } | 136 | } |
137 | 137 | ||
138 | static struct file_operations proc_lockdep_operations = { | 138 | static const struct file_operations proc_lockdep_operations = { |
139 | .open = lockdep_open, | 139 | .open = lockdep_open, |
140 | .read = seq_read, | 140 | .read = seq_read, |
141 | .llseek = seq_lseek, | 141 | .llseek = seq_lseek, |
@@ -319,7 +319,7 @@ static int lockdep_stats_open(struct inode *inode, struct file *file) | |||
319 | return single_open(file, lockdep_stats_show, NULL); | 319 | return single_open(file, lockdep_stats_show, NULL); |
320 | } | 320 | } |
321 | 321 | ||
322 | static struct file_operations proc_lockdep_stats_operations = { | 322 | static const struct file_operations proc_lockdep_stats_operations = { |
323 | .open = lockdep_stats_open, | 323 | .open = lockdep_stats_open, |
324 | .read = seq_read, | 324 | .read = seq_read, |
325 | .llseek = seq_lseek, | 325 | .llseek = seq_lseek, |
diff --git a/kernel/module.c b/kernel/module.c index e2d09d604ca0..d9eae45d0145 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2209,7 +2209,7 @@ static int m_show(struct seq_file *m, void *p) | |||
2209 | Where refcount is a number or -, and deps is a comma-separated list | 2209 | Where refcount is a number or -, and deps is a comma-separated list |
2210 | of depends or -. | 2210 | of depends or -. |
2211 | */ | 2211 | */ |
2212 | struct seq_operations modules_op = { | 2212 | const struct seq_operations modules_op = { |
2213 | .start = m_start, | 2213 | .start = m_start, |
2214 | .next = m_next, | 2214 | .next = m_next, |
2215 | .stop = m_stop, | 2215 | .stop = m_stop, |
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 18651641a7b5..841539d72c55 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -77,6 +77,9 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
77 | 77 | ||
78 | void debug_mutex_unlock(struct mutex *lock) | 78 | void debug_mutex_unlock(struct mutex *lock) |
79 | { | 79 | { |
80 | if (unlikely(!debug_locks)) | ||
81 | return; | ||
82 | |||
80 | DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); | 83 | DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); |
81 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 84 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
82 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 85 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
diff --git a/kernel/pid.c b/kernel/pid.c index b914392085f9..a48879b0b921 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | 31 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) |
32 | static struct hlist_head *pid_hash; | 32 | static struct hlist_head *pid_hash; |
33 | static int pidhash_shift; | 33 | static int pidhash_shift; |
34 | static kmem_cache_t *pid_cachep; | 34 | static struct kmem_cache *pid_cachep; |
35 | 35 | ||
36 | int pid_max = PID_MAX_DEFAULT; | 36 | int pid_max = PID_MAX_DEFAULT; |
37 | 37 | ||
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9cbb5d1be06f..5fe87de10ff0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -70,7 +70,7 @@ | |||
70 | /* | 70 | /* |
71 | * Lets keep our timers in a slab cache :-) | 71 | * Lets keep our timers in a slab cache :-) |
72 | */ | 72 | */ |
73 | static kmem_cache_t *posix_timers_cache; | 73 | static struct kmem_cache *posix_timers_cache; |
74 | static struct idr posix_timers_id; | 74 | static struct idr posix_timers_id; |
75 | static DEFINE_SPINLOCK(idr_lock); | 75 | static DEFINE_SPINLOCK(idr_lock); |
76 | 76 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 825068ca3479..710ed084e7c5 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED | |||
78 | 78 | ||
79 | config SOFTWARE_SUSPEND | 79 | config SOFTWARE_SUSPEND |
80 | bool "Software Suspend" | 80 | bool "Software Suspend" |
81 | depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP)) | 81 | depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) |
82 | ---help--- | 82 | ---help--- |
83 | Enable the possibility of suspending the machine. | 83 | Enable the possibility of suspending the machine. |
84 | It doesn't need ACPI or APM. | 84 | It doesn't need ACPI or APM. |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index b1fb7866b0b3..0b00f56c2ad0 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/pm.h> | 20 | #include <linux/pm.h> |
21 | #include <linux/console.h> | 21 | #include <linux/console.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/freezer.h> | ||
23 | 24 | ||
24 | #include "power.h" | 25 | #include "power.h" |
25 | 26 | ||
@@ -27,6 +28,23 @@ | |||
27 | static int noresume = 0; | 28 | static int noresume = 0; |
28 | char resume_file[256] = CONFIG_PM_STD_PARTITION; | 29 | char resume_file[256] = CONFIG_PM_STD_PARTITION; |
29 | dev_t swsusp_resume_device; | 30 | dev_t swsusp_resume_device; |
31 | sector_t swsusp_resume_block; | ||
32 | |||
33 | /** | ||
34 | * platform_prepare - prepare the machine for hibernation using the | ||
35 | * platform driver if so configured and return an error code if it fails | ||
36 | */ | ||
37 | |||
38 | static inline int platform_prepare(void) | ||
39 | { | ||
40 | int error = 0; | ||
41 | |||
42 | if (pm_disk_mode == PM_DISK_PLATFORM) { | ||
43 | if (pm_ops && pm_ops->prepare) | ||
44 | error = pm_ops->prepare(PM_SUSPEND_DISK); | ||
45 | } | ||
46 | return error; | ||
47 | } | ||
30 | 48 | ||
31 | /** | 49 | /** |
32 | * power_down - Shut machine down for hibernate. | 50 | * power_down - Shut machine down for hibernate. |
@@ -40,12 +58,10 @@ dev_t swsusp_resume_device; | |||
40 | 58 | ||
41 | static void power_down(suspend_disk_method_t mode) | 59 | static void power_down(suspend_disk_method_t mode) |
42 | { | 60 | { |
43 | int error = 0; | ||
44 | |||
45 | switch(mode) { | 61 | switch(mode) { |
46 | case PM_DISK_PLATFORM: | 62 | case PM_DISK_PLATFORM: |
47 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 63 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); |
48 | error = pm_ops->enter(PM_SUSPEND_DISK); | 64 | pm_ops->enter(PM_SUSPEND_DISK); |
49 | break; | 65 | break; |
50 | case PM_DISK_SHUTDOWN: | 66 | case PM_DISK_SHUTDOWN: |
51 | kernel_power_off(); | 67 | kernel_power_off(); |
@@ -90,12 +106,18 @@ static int prepare_processes(void) | |||
90 | goto thaw; | 106 | goto thaw; |
91 | } | 107 | } |
92 | 108 | ||
109 | error = platform_prepare(); | ||
110 | if (error) | ||
111 | goto thaw; | ||
112 | |||
93 | /* Free memory before shutting down devices. */ | 113 | /* Free memory before shutting down devices. */ |
94 | if (!(error = swsusp_shrink_memory())) | 114 | if (!(error = swsusp_shrink_memory())) |
95 | return 0; | 115 | return 0; |
96 | thaw: | 116 | |
117 | platform_finish(); | ||
118 | thaw: | ||
97 | thaw_processes(); | 119 | thaw_processes(); |
98 | enable_cpus: | 120 | enable_cpus: |
99 | enable_nonboot_cpus(); | 121 | enable_nonboot_cpus(); |
100 | pm_restore_console(); | 122 | pm_restore_console(); |
101 | return error; | 123 | return error; |
@@ -127,7 +149,7 @@ int pm_suspend_disk(void) | |||
127 | return error; | 149 | return error; |
128 | 150 | ||
129 | if (pm_disk_mode == PM_DISK_TESTPROC) | 151 | if (pm_disk_mode == PM_DISK_TESTPROC) |
130 | goto Thaw; | 152 | return 0; |
131 | 153 | ||
132 | suspend_console(); | 154 | suspend_console(); |
133 | error = device_suspend(PMSG_FREEZE); | 155 | error = device_suspend(PMSG_FREEZE); |
@@ -189,10 +211,10 @@ static int software_resume(void) | |||
189 | { | 211 | { |
190 | int error; | 212 | int error; |
191 | 213 | ||
192 | down(&pm_sem); | 214 | mutex_lock(&pm_mutex); |
193 | if (!swsusp_resume_device) { | 215 | if (!swsusp_resume_device) { |
194 | if (!strlen(resume_file)) { | 216 | if (!strlen(resume_file)) { |
195 | up(&pm_sem); | 217 | mutex_unlock(&pm_mutex); |
196 | return -ENOENT; | 218 | return -ENOENT; |
197 | } | 219 | } |
198 | swsusp_resume_device = name_to_dev_t(resume_file); | 220 | swsusp_resume_device = name_to_dev_t(resume_file); |
@@ -207,7 +229,7 @@ static int software_resume(void) | |||
207 | * FIXME: If noresume is specified, we need to find the partition | 229 | * FIXME: If noresume is specified, we need to find the partition |
208 | * and reset it back to normal swap space. | 230 | * and reset it back to normal swap space. |
209 | */ | 231 | */ |
210 | up(&pm_sem); | 232 | mutex_unlock(&pm_mutex); |
211 | return 0; | 233 | return 0; |
212 | } | 234 | } |
213 | 235 | ||
@@ -251,7 +273,7 @@ static int software_resume(void) | |||
251 | unprepare_processes(); | 273 | unprepare_processes(); |
252 | Done: | 274 | Done: |
253 | /* For success case, the suspend path will release the lock */ | 275 | /* For success case, the suspend path will release the lock */ |
254 | up(&pm_sem); | 276 | mutex_unlock(&pm_mutex); |
255 | pr_debug("PM: Resume from disk failed.\n"); | 277 | pr_debug("PM: Resume from disk failed.\n"); |
256 | return 0; | 278 | return 0; |
257 | } | 279 | } |
@@ -312,7 +334,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | |||
312 | p = memchr(buf, '\n', n); | 334 | p = memchr(buf, '\n', n); |
313 | len = p ? p - buf : n; | 335 | len = p ? p - buf : n; |
314 | 336 | ||
315 | down(&pm_sem); | 337 | mutex_lock(&pm_mutex); |
316 | for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { | 338 | for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { |
317 | if (!strncmp(buf, pm_disk_modes[i], len)) { | 339 | if (!strncmp(buf, pm_disk_modes[i], len)) { |
318 | mode = i; | 340 | mode = i; |
@@ -336,7 +358,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) | |||
336 | 358 | ||
337 | pr_debug("PM: suspend-to-disk mode set to '%s'\n", | 359 | pr_debug("PM: suspend-to-disk mode set to '%s'\n", |
338 | pm_disk_modes[mode]); | 360 | pm_disk_modes[mode]); |
339 | up(&pm_sem); | 361 | mutex_unlock(&pm_mutex); |
340 | return error ? error : n; | 362 | return error ? error : n; |
341 | } | 363 | } |
342 | 364 | ||
@@ -361,14 +383,14 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) | |||
361 | if (maj != MAJOR(res) || min != MINOR(res)) | 383 | if (maj != MAJOR(res) || min != MINOR(res)) |
362 | goto out; | 384 | goto out; |
363 | 385 | ||
364 | down(&pm_sem); | 386 | mutex_lock(&pm_mutex); |
365 | swsusp_resume_device = res; | 387 | swsusp_resume_device = res; |
366 | up(&pm_sem); | 388 | mutex_unlock(&pm_mutex); |
367 | printk("Attempting manual resume\n"); | 389 | printk("Attempting manual resume\n"); |
368 | noresume = 0; | 390 | noresume = 0; |
369 | software_resume(); | 391 | software_resume(); |
370 | ret = n; | 392 | ret = n; |
371 | out: | 393 | out: |
372 | return ret; | 394 | return ret; |
373 | } | 395 | } |
374 | 396 | ||
@@ -423,6 +445,19 @@ static int __init resume_setup(char *str) | |||
423 | return 1; | 445 | return 1; |
424 | } | 446 | } |
425 | 447 | ||
448 | static int __init resume_offset_setup(char *str) | ||
449 | { | ||
450 | unsigned long long offset; | ||
451 | |||
452 | if (noresume) | ||
453 | return 1; | ||
454 | |||
455 | if (sscanf(str, "%llu", &offset) == 1) | ||
456 | swsusp_resume_block = offset; | ||
457 | |||
458 | return 1; | ||
459 | } | ||
460 | |||
426 | static int __init noresume_setup(char *str) | 461 | static int __init noresume_setup(char *str) |
427 | { | 462 | { |
428 | noresume = 1; | 463 | noresume = 1; |
@@ -430,4 +465,5 @@ static int __init noresume_setup(char *str) | |||
430 | } | 465 | } |
431 | 466 | ||
432 | __setup("noresume", noresume_setup); | 467 | __setup("noresume", noresume_setup); |
468 | __setup("resume_offset=", resume_offset_setup); | ||
433 | __setup("resume=", resume_setup); | 469 | __setup("resume=", resume_setup); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 873228c71dab..500eb87f643d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/module.h> | ||
11 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
12 | #include <linux/kobject.h> | 13 | #include <linux/kobject.h> |
13 | #include <linux/string.h> | 14 | #include <linux/string.h> |
@@ -18,13 +19,14 @@ | |||
18 | #include <linux/console.h> | 19 | #include <linux/console.h> |
19 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
20 | #include <linux/resume-trace.h> | 21 | #include <linux/resume-trace.h> |
22 | #include <linux/freezer.h> | ||
21 | 23 | ||
22 | #include "power.h" | 24 | #include "power.h" |
23 | 25 | ||
24 | /*This is just an arbitrary number */ | 26 | /*This is just an arbitrary number */ |
25 | #define FREE_PAGE_NUMBER (100) | 27 | #define FREE_PAGE_NUMBER (100) |
26 | 28 | ||
27 | DECLARE_MUTEX(pm_sem); | 29 | DEFINE_MUTEX(pm_mutex); |
28 | 30 | ||
29 | struct pm_ops *pm_ops; | 31 | struct pm_ops *pm_ops; |
30 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; | 32 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; |
@@ -36,9 +38,9 @@ suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; | |||
36 | 38 | ||
37 | void pm_set_ops(struct pm_ops * ops) | 39 | void pm_set_ops(struct pm_ops * ops) |
38 | { | 40 | { |
39 | down(&pm_sem); | 41 | mutex_lock(&pm_mutex); |
40 | pm_ops = ops; | 42 | pm_ops = ops; |
41 | up(&pm_sem); | 43 | mutex_unlock(&pm_mutex); |
42 | } | 44 | } |
43 | 45 | ||
44 | 46 | ||
@@ -182,7 +184,7 @@ static int enter_state(suspend_state_t state) | |||
182 | 184 | ||
183 | if (!valid_state(state)) | 185 | if (!valid_state(state)) |
184 | return -ENODEV; | 186 | return -ENODEV; |
185 | if (down_trylock(&pm_sem)) | 187 | if (!mutex_trylock(&pm_mutex)) |
186 | return -EBUSY; | 188 | return -EBUSY; |
187 | 189 | ||
188 | if (state == PM_SUSPEND_DISK) { | 190 | if (state == PM_SUSPEND_DISK) { |
@@ -200,7 +202,7 @@ static int enter_state(suspend_state_t state) | |||
200 | pr_debug("PM: Finishing wakeup.\n"); | 202 | pr_debug("PM: Finishing wakeup.\n"); |
201 | suspend_finish(state); | 203 | suspend_finish(state); |
202 | Unlock: | 204 | Unlock: |
203 | up(&pm_sem); | 205 | mutex_unlock(&pm_mutex); |
204 | return error; | 206 | return error; |
205 | } | 207 | } |
206 | 208 | ||
@@ -229,7 +231,7 @@ int pm_suspend(suspend_state_t state) | |||
229 | return -EINVAL; | 231 | return -EINVAL; |
230 | } | 232 | } |
231 | 233 | ||
232 | 234 | EXPORT_SYMBOL(pm_suspend); | |
233 | 235 | ||
234 | decl_subsys(power,NULL,NULL); | 236 | decl_subsys(power,NULL,NULL); |
235 | 237 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index bfe999f7b272..eb461b816bf4 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -22,7 +22,9 @@ static inline int pm_suspend_disk(void) | |||
22 | return -EPERM; | 22 | return -EPERM; |
23 | } | 23 | } |
24 | #endif | 24 | #endif |
25 | extern struct semaphore pm_sem; | 25 | |
26 | extern struct mutex pm_mutex; | ||
27 | |||
26 | #define power_attr(_name) \ | 28 | #define power_attr(_name) \ |
27 | static struct subsys_attribute _name##_attr = { \ | 29 | static struct subsys_attribute _name##_attr = { \ |
28 | .attr = { \ | 30 | .attr = { \ |
@@ -42,6 +44,7 @@ extern const void __nosave_begin, __nosave_end; | |||
42 | extern unsigned long image_size; | 44 | extern unsigned long image_size; |
43 | extern int in_suspend; | 45 | extern int in_suspend; |
44 | extern dev_t swsusp_resume_device; | 46 | extern dev_t swsusp_resume_device; |
47 | extern sector_t swsusp_resume_block; | ||
45 | 48 | ||
46 | extern asmlinkage int swsusp_arch_suspend(void); | 49 | extern asmlinkage int swsusp_arch_suspend(void); |
47 | extern asmlinkage int swsusp_arch_resume(void); | 50 | extern asmlinkage int swsusp_arch_resume(void); |
@@ -102,8 +105,18 @@ struct snapshot_handle { | |||
102 | extern unsigned int snapshot_additional_pages(struct zone *zone); | 105 | extern unsigned int snapshot_additional_pages(struct zone *zone); |
103 | extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); | 106 | extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); |
104 | extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); | 107 | extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); |
108 | extern void snapshot_write_finalize(struct snapshot_handle *handle); | ||
105 | extern int snapshot_image_loaded(struct snapshot_handle *handle); | 109 | extern int snapshot_image_loaded(struct snapshot_handle *handle); |
106 | extern void snapshot_free_unused_memory(struct snapshot_handle *handle); | 110 | |
111 | /* | ||
112 | * This structure is used to pass the values needed for the identification | ||
113 | * of the resume swap area from a user space to the kernel via the | ||
114 | * SNAPSHOT_SET_SWAP_AREA ioctl | ||
115 | */ | ||
116 | struct resume_swap_area { | ||
117 | loff_t offset; | ||
118 | u_int32_t dev; | ||
119 | } __attribute__((packed)); | ||
107 | 120 | ||
108 | #define SNAPSHOT_IOC_MAGIC '3' | 121 | #define SNAPSHOT_IOC_MAGIC '3' |
109 | #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) | 122 | #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) |
@@ -117,7 +130,14 @@ extern void snapshot_free_unused_memory(struct snapshot_handle *handle); | |||
117 | #define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9) | 130 | #define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9) |
118 | #define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) | 131 | #define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) |
119 | #define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11) | 132 | #define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11) |
120 | #define SNAPSHOT_IOC_MAXNR 11 | 133 | #define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) |
134 | #define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \ | ||
135 | struct resume_swap_area) | ||
136 | #define SNAPSHOT_IOC_MAXNR 13 | ||
137 | |||
138 | #define PMOPS_PREPARE 1 | ||
139 | #define PMOPS_ENTER 2 | ||
140 | #define PMOPS_FINISH 3 | ||
121 | 141 | ||
122 | /** | 142 | /** |
123 | * The bitmap is used for tracing allocated swap pages | 143 | * The bitmap is used for tracing allocated swap pages |
@@ -141,7 +161,7 @@ struct bitmap_page { | |||
141 | 161 | ||
142 | extern void free_bitmap(struct bitmap_page *bitmap); | 162 | extern void free_bitmap(struct bitmap_page *bitmap); |
143 | extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); | 163 | extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); |
144 | extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); | 164 | extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); |
145 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); | 165 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); |
146 | 166 | ||
147 | extern int swsusp_check(void); | 167 | extern int swsusp_check(void); |
@@ -153,3 +173,7 @@ extern int swsusp_read(void); | |||
153 | extern int swsusp_write(void); | 173 | extern int swsusp_write(void); |
154 | extern void swsusp_close(void); | 174 | extern void swsusp_close(void); |
155 | extern int suspend_enter(suspend_state_t state); | 175 | extern int suspend_enter(suspend_state_t state); |
176 | |||
177 | struct timeval; | ||
178 | extern void swsusp_show_speed(struct timeval *, struct timeval *, | ||
179 | unsigned int, char *); | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index 72e72d2c61e6..99eeb119b06d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -13,12 +13,15 @@ | |||
13 | #include <linux/suspend.h> | 13 | #include <linux/suspend.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/freezer.h> | ||
16 | 17 | ||
17 | /* | 18 | /* |
18 | * Timeout for stopping processes | 19 | * Timeout for stopping processes |
19 | */ | 20 | */ |
20 | #define TIMEOUT (20 * HZ) | 21 | #define TIMEOUT (20 * HZ) |
21 | 22 | ||
23 | #define FREEZER_KERNEL_THREADS 0 | ||
24 | #define FREEZER_USER_SPACE 1 | ||
22 | 25 | ||
23 | static inline int freezeable(struct task_struct * p) | 26 | static inline int freezeable(struct task_struct * p) |
24 | { | 27 | { |
@@ -39,7 +42,6 @@ void refrigerator(void) | |||
39 | long save; | 42 | long save; |
40 | save = current->state; | 43 | save = current->state; |
41 | pr_debug("%s entered refrigerator\n", current->comm); | 44 | pr_debug("%s entered refrigerator\n", current->comm); |
42 | printk("="); | ||
43 | 45 | ||
44 | frozen_process(current); | 46 | frozen_process(current); |
45 | spin_lock_irq(¤t->sighand->siglock); | 47 | spin_lock_irq(¤t->sighand->siglock); |
@@ -79,96 +81,136 @@ static void cancel_freezing(struct task_struct *p) | |||
79 | } | 81 | } |
80 | } | 82 | } |
81 | 83 | ||
82 | /* 0 = success, else # of processes that we failed to stop */ | 84 | static inline int is_user_space(struct task_struct *p) |
83 | int freeze_processes(void) | 85 | { |
86 | return p->mm && !(p->flags & PF_BORROWED_MM); | ||
87 | } | ||
88 | |||
89 | static unsigned int try_to_freeze_tasks(int freeze_user_space) | ||
84 | { | 90 | { |
85 | int todo, nr_user, user_frozen; | ||
86 | unsigned long start_time; | ||
87 | struct task_struct *g, *p; | 91 | struct task_struct *g, *p; |
92 | unsigned long end_time; | ||
93 | unsigned int todo; | ||
88 | 94 | ||
89 | printk( "Stopping tasks: " ); | 95 | end_time = jiffies + TIMEOUT; |
90 | start_time = jiffies; | ||
91 | user_frozen = 0; | ||
92 | do { | 96 | do { |
93 | nr_user = todo = 0; | 97 | todo = 0; |
94 | read_lock(&tasklist_lock); | 98 | read_lock(&tasklist_lock); |
95 | do_each_thread(g, p) { | 99 | do_each_thread(g, p) { |
96 | if (!freezeable(p)) | 100 | if (!freezeable(p)) |
97 | continue; | 101 | continue; |
102 | |||
98 | if (frozen(p)) | 103 | if (frozen(p)) |
99 | continue; | 104 | continue; |
100 | if (p->state == TASK_TRACED && frozen(p->parent)) { | 105 | |
106 | if (p->state == TASK_TRACED && | ||
107 | (frozen(p->parent) || | ||
108 | p->parent->state == TASK_STOPPED)) { | ||
101 | cancel_freezing(p); | 109 | cancel_freezing(p); |
102 | continue; | 110 | continue; |
103 | } | 111 | } |
104 | if (p->mm && !(p->flags & PF_BORROWED_MM)) { | 112 | if (is_user_space(p)) { |
105 | /* The task is a user-space one. | 113 | if (!freeze_user_space) |
106 | * Freeze it unless there's a vfork completion | 114 | continue; |
107 | * pending | 115 | |
116 | /* Freeze the task unless there is a vfork | ||
117 | * completion pending | ||
108 | */ | 118 | */ |
109 | if (!p->vfork_done) | 119 | if (!p->vfork_done) |
110 | freeze_process(p); | 120 | freeze_process(p); |
111 | nr_user++; | ||
112 | } else { | 121 | } else { |
113 | /* Freeze only if the user space is frozen */ | 122 | if (freeze_user_space) |
114 | if (user_frozen) | 123 | continue; |
115 | freeze_process(p); | 124 | |
116 | todo++; | 125 | freeze_process(p); |
117 | } | 126 | } |
127 | todo++; | ||
118 | } while_each_thread(g, p); | 128 | } while_each_thread(g, p); |
119 | read_unlock(&tasklist_lock); | 129 | read_unlock(&tasklist_lock); |
120 | todo += nr_user; | ||
121 | if (!user_frozen && !nr_user) { | ||
122 | sys_sync(); | ||
123 | start_time = jiffies; | ||
124 | } | ||
125 | user_frozen = !nr_user; | ||
126 | yield(); /* Yield is okay here */ | 130 | yield(); /* Yield is okay here */ |
127 | if (todo && time_after(jiffies, start_time + TIMEOUT)) | 131 | if (todo && time_after(jiffies, end_time)) |
128 | break; | 132 | break; |
129 | } while(todo); | 133 | } while (todo); |
130 | 134 | ||
131 | /* This does not unfreeze processes that are already frozen | ||
132 | * (we have slightly ugly calling convention in that respect, | ||
133 | * and caller must call thaw_processes() if something fails), | ||
134 | * but it cleans up leftover PF_FREEZE requests. | ||
135 | */ | ||
136 | if (todo) { | 135 | if (todo) { |
137 | printk( "\n" ); | 136 | /* This does not unfreeze processes that are already frozen |
138 | printk(KERN_ERR " stopping tasks timed out " | 137 | * (we have slightly ugly calling convention in that respect, |
139 | "after %d seconds (%d tasks remaining):\n", | 138 | * and caller must call thaw_processes() if something fails), |
140 | TIMEOUT / HZ, todo); | 139 | * but it cleans up leftover PF_FREEZE requests. |
140 | */ | ||
141 | printk("\n"); | ||
142 | printk(KERN_ERR "Stopping %s timed out after %d seconds " | ||
143 | "(%d tasks refusing to freeze):\n", | ||
144 | freeze_user_space ? "user space processes" : | ||
145 | "kernel threads", | ||
146 | TIMEOUT / HZ, todo); | ||
141 | read_lock(&tasklist_lock); | 147 | read_lock(&tasklist_lock); |
142 | do_each_thread(g, p) { | 148 | do_each_thread(g, p) { |
149 | if (is_user_space(p) == !freeze_user_space) | ||
150 | continue; | ||
151 | |||
143 | if (freezeable(p) && !frozen(p)) | 152 | if (freezeable(p) && !frozen(p)) |
144 | printk(KERN_ERR " %s\n", p->comm); | 153 | printk(KERN_ERR " %s\n", p->comm); |
154 | |||
145 | cancel_freezing(p); | 155 | cancel_freezing(p); |
146 | } while_each_thread(g, p); | 156 | } while_each_thread(g, p); |
147 | read_unlock(&tasklist_lock); | 157 | read_unlock(&tasklist_lock); |
148 | return todo; | ||
149 | } | 158 | } |
150 | 159 | ||
151 | printk( "|\n" ); | 160 | return todo; |
161 | } | ||
162 | |||
163 | /** | ||
164 | * freeze_processes - tell processes to enter the refrigerator | ||
165 | * | ||
166 | * Returns 0 on success, or the number of processes that didn't freeze, | ||
167 | * although they were told to. | ||
168 | */ | ||
169 | int freeze_processes(void) | ||
170 | { | ||
171 | unsigned int nr_unfrozen; | ||
172 | |||
173 | printk("Stopping tasks ... "); | ||
174 | nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); | ||
175 | if (nr_unfrozen) | ||
176 | return nr_unfrozen; | ||
177 | |||
178 | sys_sync(); | ||
179 | nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); | ||
180 | if (nr_unfrozen) | ||
181 | return nr_unfrozen; | ||
182 | |||
183 | printk("done.\n"); | ||
152 | BUG_ON(in_atomic()); | 184 | BUG_ON(in_atomic()); |
153 | return 0; | 185 | return 0; |
154 | } | 186 | } |
155 | 187 | ||
156 | void thaw_processes(void) | 188 | static void thaw_tasks(int thaw_user_space) |
157 | { | 189 | { |
158 | struct task_struct *g, *p; | 190 | struct task_struct *g, *p; |
159 | 191 | ||
160 | printk( "Restarting tasks..." ); | ||
161 | read_lock(&tasklist_lock); | 192 | read_lock(&tasklist_lock); |
162 | do_each_thread(g, p) { | 193 | do_each_thread(g, p) { |
163 | if (!freezeable(p)) | 194 | if (!freezeable(p)) |
164 | continue; | 195 | continue; |
196 | |||
197 | if (is_user_space(p) == !thaw_user_space) | ||
198 | continue; | ||
199 | |||
165 | if (!thaw_process(p)) | 200 | if (!thaw_process(p)) |
166 | printk(KERN_INFO " Strange, %s not stopped\n", p->comm ); | 201 | printk(KERN_WARNING " Strange, %s not stopped\n", |
202 | p->comm ); | ||
167 | } while_each_thread(g, p); | 203 | } while_each_thread(g, p); |
168 | |||
169 | read_unlock(&tasklist_lock); | 204 | read_unlock(&tasklist_lock); |
205 | } | ||
206 | |||
207 | void thaw_processes(void) | ||
208 | { | ||
209 | printk("Restarting tasks ... "); | ||
210 | thaw_tasks(FREEZER_KERNEL_THREADS); | ||
211 | thaw_tasks(FREEZER_USER_SPACE); | ||
170 | schedule(); | 212 | schedule(); |
171 | printk( " done\n" ); | 213 | printk("done.\n"); |
172 | } | 214 | } |
173 | 215 | ||
174 | EXPORT_SYMBOL(refrigerator); | 216 | EXPORT_SYMBOL(refrigerator); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 99f9b7d177d6..c024606221c4 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1,15 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * linux/kernel/power/snapshot.c | 2 | * linux/kernel/power/snapshot.c |
3 | * | 3 | * |
4 | * This file provide system snapshot/restore functionality. | 4 | * This file provides system snapshot/restore functionality for swsusp. |
5 | * | 5 | * |
6 | * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> | 6 | * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> |
7 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> | ||
7 | * | 8 | * |
8 | * This file is released under the GPLv2, and is based on swsusp.c. | 9 | * This file is released under the GPLv2. |
9 | * | 10 | * |
10 | */ | 11 | */ |
11 | 12 | ||
12 | |||
13 | #include <linux/version.h> | 13 | #include <linux/version.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
@@ -34,137 +34,24 @@ | |||
34 | 34 | ||
35 | #include "power.h" | 35 | #include "power.h" |
36 | 36 | ||
37 | /* List of PBEs used for creating and restoring the suspend image */ | 37 | /* List of PBEs needed for restoring the pages that were allocated before |
38 | * the suspend and included in the suspend image, but have also been | ||
39 | * allocated by the "resume" kernel, so their contents cannot be written | ||
40 | * directly to their "original" page frames. | ||
41 | */ | ||
38 | struct pbe *restore_pblist; | 42 | struct pbe *restore_pblist; |
39 | 43 | ||
40 | static unsigned int nr_copy_pages; | 44 | /* Pointer to an auxiliary buffer (1 page) */ |
41 | static unsigned int nr_meta_pages; | ||
42 | static void *buffer; | 45 | static void *buffer; |
43 | 46 | ||
44 | #ifdef CONFIG_HIGHMEM | ||
45 | unsigned int count_highmem_pages(void) | ||
46 | { | ||
47 | struct zone *zone; | ||
48 | unsigned long zone_pfn; | ||
49 | unsigned int n = 0; | ||
50 | |||
51 | for_each_zone (zone) | ||
52 | if (is_highmem(zone)) { | ||
53 | mark_free_pages(zone); | ||
54 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { | ||
55 | struct page *page; | ||
56 | unsigned long pfn = zone_pfn + zone->zone_start_pfn; | ||
57 | if (!pfn_valid(pfn)) | ||
58 | continue; | ||
59 | page = pfn_to_page(pfn); | ||
60 | if (PageReserved(page)) | ||
61 | continue; | ||
62 | if (PageNosaveFree(page)) | ||
63 | continue; | ||
64 | n++; | ||
65 | } | ||
66 | } | ||
67 | return n; | ||
68 | } | ||
69 | |||
70 | struct highmem_page { | ||
71 | char *data; | ||
72 | struct page *page; | ||
73 | struct highmem_page *next; | ||
74 | }; | ||
75 | |||
76 | static struct highmem_page *highmem_copy; | ||
77 | |||
78 | static int save_highmem_zone(struct zone *zone) | ||
79 | { | ||
80 | unsigned long zone_pfn; | ||
81 | mark_free_pages(zone); | ||
82 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { | ||
83 | struct page *page; | ||
84 | struct highmem_page *save; | ||
85 | void *kaddr; | ||
86 | unsigned long pfn = zone_pfn + zone->zone_start_pfn; | ||
87 | |||
88 | if (!(pfn%10000)) | ||
89 | printk("."); | ||
90 | if (!pfn_valid(pfn)) | ||
91 | continue; | ||
92 | page = pfn_to_page(pfn); | ||
93 | /* | ||
94 | * This condition results from rvmalloc() sans vmalloc_32() | ||
95 | * and architectural memory reservations. This should be | ||
96 | * corrected eventually when the cases giving rise to this | ||
97 | * are better understood. | ||
98 | */ | ||
99 | if (PageReserved(page)) | ||
100 | continue; | ||
101 | BUG_ON(PageNosave(page)); | ||
102 | if (PageNosaveFree(page)) | ||
103 | continue; | ||
104 | save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); | ||
105 | if (!save) | ||
106 | return -ENOMEM; | ||
107 | save->next = highmem_copy; | ||
108 | save->page = page; | ||
109 | save->data = (void *) get_zeroed_page(GFP_ATOMIC); | ||
110 | if (!save->data) { | ||
111 | kfree(save); | ||
112 | return -ENOMEM; | ||
113 | } | ||
114 | kaddr = kmap_atomic(page, KM_USER0); | ||
115 | memcpy(save->data, kaddr, PAGE_SIZE); | ||
116 | kunmap_atomic(kaddr, KM_USER0); | ||
117 | highmem_copy = save; | ||
118 | } | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | int save_highmem(void) | ||
123 | { | ||
124 | struct zone *zone; | ||
125 | int res = 0; | ||
126 | |||
127 | pr_debug("swsusp: Saving Highmem"); | ||
128 | drain_local_pages(); | ||
129 | for_each_zone (zone) { | ||
130 | if (is_highmem(zone)) | ||
131 | res = save_highmem_zone(zone); | ||
132 | if (res) | ||
133 | return res; | ||
134 | } | ||
135 | printk("\n"); | ||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | int restore_highmem(void) | ||
140 | { | ||
141 | printk("swsusp: Restoring Highmem\n"); | ||
142 | while (highmem_copy) { | ||
143 | struct highmem_page *save = highmem_copy; | ||
144 | void *kaddr; | ||
145 | highmem_copy = save->next; | ||
146 | |||
147 | kaddr = kmap_atomic(save->page, KM_USER0); | ||
148 | memcpy(kaddr, save->data, PAGE_SIZE); | ||
149 | kunmap_atomic(kaddr, KM_USER0); | ||
150 | free_page((long) save->data); | ||
151 | kfree(save); | ||
152 | } | ||
153 | return 0; | ||
154 | } | ||
155 | #else | ||
156 | static inline unsigned int count_highmem_pages(void) {return 0;} | ||
157 | static inline int save_highmem(void) {return 0;} | ||
158 | static inline int restore_highmem(void) {return 0;} | ||
159 | #endif | ||
160 | |||
161 | /** | 47 | /** |
162 | * @safe_needed - on resume, for storing the PBE list and the image, | 48 | * @safe_needed - on resume, for storing the PBE list and the image, |
163 | * we can only use memory pages that do not conflict with the pages | 49 | * we can only use memory pages that do not conflict with the pages |
164 | * used before suspend. | 50 | * used before suspend. The unsafe pages have PageNosaveFree set |
51 | * and we count them using unsafe_pages. | ||
165 | * | 52 | * |
166 | * The unsafe pages are marked with the PG_nosave_free flag | 53 | * Each allocated image page is marked as PageNosave and PageNosaveFree |
167 | * and we count them using unsafe_pages | 54 | * so that swsusp_free() can release it. |
168 | */ | 55 | */ |
169 | 56 | ||
170 | #define PG_ANY 0 | 57 | #define PG_ANY 0 |
@@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;} | |||
174 | 61 | ||
175 | static unsigned int allocated_unsafe_pages; | 62 | static unsigned int allocated_unsafe_pages; |
176 | 63 | ||
177 | static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | 64 | static void *get_image_page(gfp_t gfp_mask, int safe_needed) |
178 | { | 65 | { |
179 | void *res; | 66 | void *res; |
180 | 67 | ||
@@ -195,20 +82,39 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | |||
195 | 82 | ||
196 | unsigned long get_safe_page(gfp_t gfp_mask) | 83 | unsigned long get_safe_page(gfp_t gfp_mask) |
197 | { | 84 | { |
198 | return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); | 85 | return (unsigned long)get_image_page(gfp_mask, PG_SAFE); |
86 | } | ||
87 | |||
88 | static struct page *alloc_image_page(gfp_t gfp_mask) | ||
89 | { | ||
90 | struct page *page; | ||
91 | |||
92 | page = alloc_page(gfp_mask); | ||
93 | if (page) { | ||
94 | SetPageNosave(page); | ||
95 | SetPageNosaveFree(page); | ||
96 | } | ||
97 | return page; | ||
199 | } | 98 | } |
200 | 99 | ||
201 | /** | 100 | /** |
202 | * free_image_page - free page represented by @addr, allocated with | 101 | * free_image_page - free page represented by @addr, allocated with |
203 | * alloc_image_page (page flags set by it must be cleared) | 102 | * get_image_page (page flags set by it must be cleared) |
204 | */ | 103 | */ |
205 | 104 | ||
206 | static inline void free_image_page(void *addr, int clear_nosave_free) | 105 | static inline void free_image_page(void *addr, int clear_nosave_free) |
207 | { | 106 | { |
208 | ClearPageNosave(virt_to_page(addr)); | 107 | struct page *page; |
108 | |||
109 | BUG_ON(!virt_addr_valid(addr)); | ||
110 | |||
111 | page = virt_to_page(addr); | ||
112 | |||
113 | ClearPageNosave(page); | ||
209 | if (clear_nosave_free) | 114 | if (clear_nosave_free) |
210 | ClearPageNosaveFree(virt_to_page(addr)); | 115 | ClearPageNosaveFree(page); |
211 | free_page((unsigned long)addr); | 116 | |
117 | __free_page(page); | ||
212 | } | 118 | } |
213 | 119 | ||
214 | /* struct linked_page is used to build chains of pages */ | 120 | /* struct linked_page is used to build chains of pages */ |
@@ -269,7 +175,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) | |||
269 | if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { | 175 | if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { |
270 | struct linked_page *lp; | 176 | struct linked_page *lp; |
271 | 177 | ||
272 | lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); | 178 | lp = get_image_page(ca->gfp_mask, ca->safe_needed); |
273 | if (!lp) | 179 | if (!lp) |
274 | return NULL; | 180 | return NULL; |
275 | 181 | ||
@@ -446,8 +352,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |||
446 | 352 | ||
447 | /* Compute the number of zones */ | 353 | /* Compute the number of zones */ |
448 | nr = 0; | 354 | nr = 0; |
449 | for_each_zone (zone) | 355 | for_each_zone(zone) |
450 | if (populated_zone(zone) && !is_highmem(zone)) | 356 | if (populated_zone(zone)) |
451 | nr++; | 357 | nr++; |
452 | 358 | ||
453 | /* Allocate the list of zones bitmap objects */ | 359 | /* Allocate the list of zones bitmap objects */ |
@@ -459,10 +365,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |||
459 | } | 365 | } |
460 | 366 | ||
461 | /* Initialize the zone bitmap objects */ | 367 | /* Initialize the zone bitmap objects */ |
462 | for_each_zone (zone) { | 368 | for_each_zone(zone) { |
463 | unsigned long pfn; | 369 | unsigned long pfn; |
464 | 370 | ||
465 | if (!populated_zone(zone) || is_highmem(zone)) | 371 | if (!populated_zone(zone)) |
466 | continue; | 372 | continue; |
467 | 373 | ||
468 | zone_bm->start_pfn = zone->zone_start_pfn; | 374 | zone_bm->start_pfn = zone->zone_start_pfn; |
@@ -481,7 +387,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |||
481 | while (bb) { | 387 | while (bb) { |
482 | unsigned long *ptr; | 388 | unsigned long *ptr; |
483 | 389 | ||
484 | ptr = alloc_image_page(gfp_mask, safe_needed); | 390 | ptr = get_image_page(gfp_mask, safe_needed); |
485 | bb->data = ptr; | 391 | bb->data = ptr; |
486 | if (!ptr) | 392 | if (!ptr) |
487 | goto Free; | 393 | goto Free; |
@@ -505,7 +411,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |||
505 | memory_bm_position_reset(bm); | 411 | memory_bm_position_reset(bm); |
506 | return 0; | 412 | return 0; |
507 | 413 | ||
508 | Free: | 414 | Free: |
509 | bm->p_list = ca.chain; | 415 | bm->p_list = ca.chain; |
510 | memory_bm_free(bm, PG_UNSAFE_CLEAR); | 416 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
511 | return -ENOMEM; | 417 | return -ENOMEM; |
@@ -651,7 +557,7 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |||
651 | memory_bm_position_reset(bm); | 557 | memory_bm_position_reset(bm); |
652 | return BM_END_OF_MAP; | 558 | return BM_END_OF_MAP; |
653 | 559 | ||
654 | Return_pfn: | 560 | Return_pfn: |
655 | bm->cur.chunk = chunk; | 561 | bm->cur.chunk = chunk; |
656 | bm->cur.bit = bit; | 562 | bm->cur.bit = bit; |
657 | return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; | 563 | return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; |
@@ -669,10 +575,82 @@ unsigned int snapshot_additional_pages(struct zone *zone) | |||
669 | 575 | ||
670 | res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); | 576 | res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); |
671 | res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); | 577 | res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); |
672 | return res; | 578 | return 2 * res; |
579 | } | ||
580 | |||
581 | #ifdef CONFIG_HIGHMEM | ||
582 | /** | ||
583 | * count_free_highmem_pages - compute the total number of free highmem | ||
584 | * pages, system-wide. | ||
585 | */ | ||
586 | |||
587 | static unsigned int count_free_highmem_pages(void) | ||
588 | { | ||
589 | struct zone *zone; | ||
590 | unsigned int cnt = 0; | ||
591 | |||
592 | for_each_zone(zone) | ||
593 | if (populated_zone(zone) && is_highmem(zone)) | ||
594 | cnt += zone->free_pages; | ||
595 | |||
596 | return cnt; | ||
597 | } | ||
598 | |||
599 | /** | ||
600 | * saveable_highmem_page - Determine whether a highmem page should be | ||
601 | * included in the suspend image. | ||
602 | * | ||
603 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, | ||
604 | * and it isn't a part of a free chunk of pages. | ||
605 | */ | ||
606 | |||
607 | static struct page *saveable_highmem_page(unsigned long pfn) | ||
608 | { | ||
609 | struct page *page; | ||
610 | |||
611 | if (!pfn_valid(pfn)) | ||
612 | return NULL; | ||
613 | |||
614 | page = pfn_to_page(pfn); | ||
615 | |||
616 | BUG_ON(!PageHighMem(page)); | ||
617 | |||
618 | if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) | ||
619 | return NULL; | ||
620 | |||
621 | return page; | ||
673 | } | 622 | } |
674 | 623 | ||
675 | /** | 624 | /** |
625 | * count_highmem_pages - compute the total number of saveable highmem | ||
626 | * pages. | ||
627 | */ | ||
628 | |||
629 | unsigned int count_highmem_pages(void) | ||
630 | { | ||
631 | struct zone *zone; | ||
632 | unsigned int n = 0; | ||
633 | |||
634 | for_each_zone(zone) { | ||
635 | unsigned long pfn, max_zone_pfn; | ||
636 | |||
637 | if (!is_highmem(zone)) | ||
638 | continue; | ||
639 | |||
640 | mark_free_pages(zone); | ||
641 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
642 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | ||
643 | if (saveable_highmem_page(pfn)) | ||
644 | n++; | ||
645 | } | ||
646 | return n; | ||
647 | } | ||
648 | #else | ||
649 | static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | ||
650 | static inline unsigned int count_highmem_pages(void) { return 0; } | ||
651 | #endif /* CONFIG_HIGHMEM */ | ||
652 | |||
653 | /** | ||
676 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | 654 | * pfn_is_nosave - check if given pfn is in the 'nosave' section |
677 | */ | 655 | */ |
678 | 656 | ||
@@ -684,12 +662,12 @@ static inline int pfn_is_nosave(unsigned long pfn) | |||
684 | } | 662 | } |
685 | 663 | ||
686 | /** | 664 | /** |
687 | * saveable - Determine whether a page should be cloned or not. | 665 | * saveable - Determine whether a non-highmem page should be included in |
688 | * @pfn: The page | 666 | * the suspend image. |
689 | * | 667 | * |
690 | * We save a page if it isn't Nosave, and is not in the range of pages | 668 | * We should save the page if it isn't Nosave, and is not in the range |
691 | * statically defined as 'unsaveable', and it | 669 | * of pages statically defined as 'unsaveable', and it isn't a part of |
692 | * isn't a part of a free chunk of pages. | 670 | * a free chunk of pages. |
693 | */ | 671 | */ |
694 | 672 | ||
695 | static struct page *saveable_page(unsigned long pfn) | 673 | static struct page *saveable_page(unsigned long pfn) |
@@ -701,76 +679,130 @@ static struct page *saveable_page(unsigned long pfn) | |||
701 | 679 | ||
702 | page = pfn_to_page(pfn); | 680 | page = pfn_to_page(pfn); |
703 | 681 | ||
704 | if (PageNosave(page)) | 682 | BUG_ON(PageHighMem(page)); |
683 | |||
684 | if (PageNosave(page) || PageNosaveFree(page)) | ||
705 | return NULL; | 685 | return NULL; |
686 | |||
706 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 687 | if (PageReserved(page) && pfn_is_nosave(pfn)) |
707 | return NULL; | 688 | return NULL; |
708 | if (PageNosaveFree(page)) | ||
709 | return NULL; | ||
710 | 689 | ||
711 | return page; | 690 | return page; |
712 | } | 691 | } |
713 | 692 | ||
693 | /** | ||
694 | * count_data_pages - compute the total number of saveable non-highmem | ||
695 | * pages. | ||
696 | */ | ||
697 | |||
714 | unsigned int count_data_pages(void) | 698 | unsigned int count_data_pages(void) |
715 | { | 699 | { |
716 | struct zone *zone; | 700 | struct zone *zone; |
717 | unsigned long pfn, max_zone_pfn; | 701 | unsigned long pfn, max_zone_pfn; |
718 | unsigned int n = 0; | 702 | unsigned int n = 0; |
719 | 703 | ||
720 | for_each_zone (zone) { | 704 | for_each_zone(zone) { |
721 | if (is_highmem(zone)) | 705 | if (is_highmem(zone)) |
722 | continue; | 706 | continue; |
707 | |||
723 | mark_free_pages(zone); | 708 | mark_free_pages(zone); |
724 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 709 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
725 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 710 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
726 | n += !!saveable_page(pfn); | 711 | if(saveable_page(pfn)) |
712 | n++; | ||
727 | } | 713 | } |
728 | return n; | 714 | return n; |
729 | } | 715 | } |
730 | 716 | ||
731 | static inline void copy_data_page(long *dst, long *src) | 717 | /* This is needed, because copy_page and memcpy are not usable for copying |
718 | * task structs. | ||
719 | */ | ||
720 | static inline void do_copy_page(long *dst, long *src) | ||
732 | { | 721 | { |
733 | int n; | 722 | int n; |
734 | 723 | ||
735 | /* copy_page and memcpy are not usable for copying task structs. */ | ||
736 | for (n = PAGE_SIZE / sizeof(long); n; n--) | 724 | for (n = PAGE_SIZE / sizeof(long); n; n--) |
737 | *dst++ = *src++; | 725 | *dst++ = *src++; |
738 | } | 726 | } |
739 | 727 | ||
728 | #ifdef CONFIG_HIGHMEM | ||
729 | static inline struct page * | ||
730 | page_is_saveable(struct zone *zone, unsigned long pfn) | ||
731 | { | ||
732 | return is_highmem(zone) ? | ||
733 | saveable_highmem_page(pfn) : saveable_page(pfn); | ||
734 | } | ||
735 | |||
736 | static inline void | ||
737 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
738 | { | ||
739 | struct page *s_page, *d_page; | ||
740 | void *src, *dst; | ||
741 | |||
742 | s_page = pfn_to_page(src_pfn); | ||
743 | d_page = pfn_to_page(dst_pfn); | ||
744 | if (PageHighMem(s_page)) { | ||
745 | src = kmap_atomic(s_page, KM_USER0); | ||
746 | dst = kmap_atomic(d_page, KM_USER1); | ||
747 | do_copy_page(dst, src); | ||
748 | kunmap_atomic(src, KM_USER0); | ||
749 | kunmap_atomic(dst, KM_USER1); | ||
750 | } else { | ||
751 | src = page_address(s_page); | ||
752 | if (PageHighMem(d_page)) { | ||
753 | /* Page pointed to by src may contain some kernel | ||
754 | * data modified by kmap_atomic() | ||
755 | */ | ||
756 | do_copy_page(buffer, src); | ||
757 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); | ||
758 | memcpy(dst, buffer, PAGE_SIZE); | ||
759 | kunmap_atomic(dst, KM_USER0); | ||
760 | } else { | ||
761 | dst = page_address(d_page); | ||
762 | do_copy_page(dst, src); | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | #else | ||
767 | #define page_is_saveable(zone, pfn) saveable_page(pfn) | ||
768 | |||
769 | static inline void | ||
770 | copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | ||
771 | { | ||
772 | do_copy_page(page_address(pfn_to_page(dst_pfn)), | ||
773 | page_address(pfn_to_page(src_pfn))); | ||
774 | } | ||
775 | #endif /* CONFIG_HIGHMEM */ | ||
776 | |||
740 | static void | 777 | static void |
741 | copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) | 778 | copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) |
742 | { | 779 | { |
743 | struct zone *zone; | 780 | struct zone *zone; |
744 | unsigned long pfn; | 781 | unsigned long pfn; |
745 | 782 | ||
746 | for_each_zone (zone) { | 783 | for_each_zone(zone) { |
747 | unsigned long max_zone_pfn; | 784 | unsigned long max_zone_pfn; |
748 | 785 | ||
749 | if (is_highmem(zone)) | ||
750 | continue; | ||
751 | |||
752 | mark_free_pages(zone); | 786 | mark_free_pages(zone); |
753 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 787 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
754 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 788 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
755 | if (saveable_page(pfn)) | 789 | if (page_is_saveable(zone, pfn)) |
756 | memory_bm_set_bit(orig_bm, pfn); | 790 | memory_bm_set_bit(orig_bm, pfn); |
757 | } | 791 | } |
758 | memory_bm_position_reset(orig_bm); | 792 | memory_bm_position_reset(orig_bm); |
759 | memory_bm_position_reset(copy_bm); | 793 | memory_bm_position_reset(copy_bm); |
760 | do { | 794 | do { |
761 | pfn = memory_bm_next_pfn(orig_bm); | 795 | pfn = memory_bm_next_pfn(orig_bm); |
762 | if (likely(pfn != BM_END_OF_MAP)) { | 796 | if (likely(pfn != BM_END_OF_MAP)) |
763 | struct page *page; | 797 | copy_data_page(memory_bm_next_pfn(copy_bm), pfn); |
764 | void *src; | ||
765 | |||
766 | page = pfn_to_page(pfn); | ||
767 | src = page_address(page); | ||
768 | page = pfn_to_page(memory_bm_next_pfn(copy_bm)); | ||
769 | copy_data_page(page_address(page), src); | ||
770 | } | ||
771 | } while (pfn != BM_END_OF_MAP); | 798 | } while (pfn != BM_END_OF_MAP); |
772 | } | 799 | } |
773 | 800 | ||
801 | /* Total number of image pages */ | ||
802 | static unsigned int nr_copy_pages; | ||
803 | /* Number of pages needed for saving the original pfns of the image pages */ | ||
804 | static unsigned int nr_meta_pages; | ||
805 | |||
774 | /** | 806 | /** |
775 | * swsusp_free - free pages allocated for the suspend. | 807 | * swsusp_free - free pages allocated for the suspend. |
776 | * | 808 | * |
@@ -792,7 +824,7 @@ void swsusp_free(void) | |||
792 | if (PageNosave(page) && PageNosaveFree(page)) { | 824 | if (PageNosave(page) && PageNosaveFree(page)) { |
793 | ClearPageNosave(page); | 825 | ClearPageNosave(page); |
794 | ClearPageNosaveFree(page); | 826 | ClearPageNosaveFree(page); |
795 | free_page((long) page_address(page)); | 827 | __free_page(page); |
796 | } | 828 | } |
797 | } | 829 | } |
798 | } | 830 | } |
@@ -802,34 +834,108 @@ void swsusp_free(void) | |||
802 | buffer = NULL; | 834 | buffer = NULL; |
803 | } | 835 | } |
804 | 836 | ||
837 | #ifdef CONFIG_HIGHMEM | ||
838 | /** | ||
839 | * count_pages_for_highmem - compute the number of non-highmem pages | ||
840 | * that will be necessary for creating copies of highmem pages. | ||
841 | */ | ||
842 | |||
843 | static unsigned int count_pages_for_highmem(unsigned int nr_highmem) | ||
844 | { | ||
845 | unsigned int free_highmem = count_free_highmem_pages(); | ||
846 | |||
847 | if (free_highmem >= nr_highmem) | ||
848 | nr_highmem = 0; | ||
849 | else | ||
850 | nr_highmem -= free_highmem; | ||
851 | |||
852 | return nr_highmem; | ||
853 | } | ||
854 | #else | ||
855 | static unsigned int | ||
856 | count_pages_for_highmem(unsigned int nr_highmem) { return 0; } | ||
857 | #endif /* CONFIG_HIGHMEM */ | ||
805 | 858 | ||
806 | /** | 859 | /** |
807 | * enough_free_mem - Make sure we enough free memory to snapshot. | 860 | * enough_free_mem - Make sure we have enough free memory for the |
808 | * | 861 | * snapshot image. |
809 | * Returns TRUE or FALSE after checking the number of available | ||
810 | * free pages. | ||
811 | */ | 862 | */ |
812 | 863 | ||
813 | static int enough_free_mem(unsigned int nr_pages) | 864 | static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) |
814 | { | 865 | { |
815 | struct zone *zone; | 866 | struct zone *zone; |
816 | unsigned int free = 0, meta = 0; | 867 | unsigned int free = 0, meta = 0; |
817 | 868 | ||
818 | for_each_zone (zone) | 869 | for_each_zone(zone) { |
819 | if (!is_highmem(zone)) { | 870 | meta += snapshot_additional_pages(zone); |
871 | if (!is_highmem(zone)) | ||
820 | free += zone->free_pages; | 872 | free += zone->free_pages; |
821 | meta += snapshot_additional_pages(zone); | 873 | } |
822 | } | ||
823 | 874 | ||
824 | pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", | 875 | nr_pages += count_pages_for_highmem(nr_highmem); |
876 | pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n", | ||
825 | nr_pages, PAGES_FOR_IO, meta, free); | 877 | nr_pages, PAGES_FOR_IO, meta, free); |
826 | 878 | ||
827 | return free > nr_pages + PAGES_FOR_IO + meta; | 879 | return free > nr_pages + PAGES_FOR_IO + meta; |
828 | } | 880 | } |
829 | 881 | ||
882 | #ifdef CONFIG_HIGHMEM | ||
883 | /** | ||
884 | * get_highmem_buffer - if there are some highmem pages in the suspend | ||
885 | * image, we may need the buffer to copy them and/or load their data. | ||
886 | */ | ||
887 | |||
888 | static inline int get_highmem_buffer(int safe_needed) | ||
889 | { | ||
890 | buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); | ||
891 | return buffer ? 0 : -ENOMEM; | ||
892 | } | ||
893 | |||
894 | /** | ||
895 | * alloc_highmem_image_pages - allocate some highmem pages for the image. | ||
896 | * Try to allocate as many pages as needed, but if the number of free | ||
897 | * highmem pages is lesser than that, allocate them all. | ||
898 | */ | ||
899 | |||
900 | static inline unsigned int | ||
901 | alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) | ||
902 | { | ||
903 | unsigned int to_alloc = count_free_highmem_pages(); | ||
904 | |||
905 | if (to_alloc > nr_highmem) | ||
906 | to_alloc = nr_highmem; | ||
907 | |||
908 | nr_highmem -= to_alloc; | ||
909 | while (to_alloc-- > 0) { | ||
910 | struct page *page; | ||
911 | |||
912 | page = alloc_image_page(__GFP_HIGHMEM); | ||
913 | memory_bm_set_bit(bm, page_to_pfn(page)); | ||
914 | } | ||
915 | return nr_highmem; | ||
916 | } | ||
917 | #else | ||
918 | static inline int get_highmem_buffer(int safe_needed) { return 0; } | ||
919 | |||
920 | static inline unsigned int | ||
921 | alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } | ||
922 | #endif /* CONFIG_HIGHMEM */ | ||
923 | |||
924 | /** | ||
925 | * swsusp_alloc - allocate memory for the suspend image | ||
926 | * | ||
927 | * We first try to allocate as many highmem pages as there are | ||
928 | * saveable highmem pages in the system. If that fails, we allocate | ||
929 | * non-highmem pages for the copies of the remaining highmem ones. | ||
930 | * | ||
931 | * In this approach it is likely that the copies of highmem pages will | ||
932 | * also be located in the high memory, because of the way in which | ||
933 | * copy_data_pages() works. | ||
934 | */ | ||
935 | |||
830 | static int | 936 | static int |
831 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | 937 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, |
832 | unsigned int nr_pages) | 938 | unsigned int nr_pages, unsigned int nr_highmem) |
833 | { | 939 | { |
834 | int error; | 940 | int error; |
835 | 941 | ||
@@ -841,46 +947,61 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | |||
841 | if (error) | 947 | if (error) |
842 | goto Free; | 948 | goto Free; |
843 | 949 | ||
950 | if (nr_highmem > 0) { | ||
951 | error = get_highmem_buffer(PG_ANY); | ||
952 | if (error) | ||
953 | goto Free; | ||
954 | |||
955 | nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); | ||
956 | } | ||
844 | while (nr_pages-- > 0) { | 957 | while (nr_pages-- > 0) { |
845 | struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 958 | struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); |
959 | |||
846 | if (!page) | 960 | if (!page) |
847 | goto Free; | 961 | goto Free; |
848 | 962 | ||
849 | SetPageNosave(page); | ||
850 | SetPageNosaveFree(page); | ||
851 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); | 963 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); |
852 | } | 964 | } |
853 | return 0; | 965 | return 0; |
854 | 966 | ||
855 | Free: | 967 | Free: |
856 | swsusp_free(); | 968 | swsusp_free(); |
857 | return -ENOMEM; | 969 | return -ENOMEM; |
858 | } | 970 | } |
859 | 971 | ||
860 | /* Memory bitmap used for marking saveable pages */ | 972 | /* Memory bitmap used for marking saveable pages (during suspend) or the |
973 | * suspend image pages (during resume) | ||
974 | */ | ||
861 | static struct memory_bitmap orig_bm; | 975 | static struct memory_bitmap orig_bm; |
862 | /* Memory bitmap used for marking allocated pages that will contain the copies | 976 | /* Memory bitmap used on suspend for marking allocated pages that will contain |
863 | * of saveable pages | 977 | * the copies of saveable pages. During resume it is initially used for |
978 | * marking the suspend image pages, but then its set bits are duplicated in | ||
979 | * @orig_bm and it is released. Next, on systems with high memory, it may be | ||
980 | * used for marking "safe" highmem pages, but it has to be reinitialized for | ||
981 | * this purpose. | ||
864 | */ | 982 | */ |
865 | static struct memory_bitmap copy_bm; | 983 | static struct memory_bitmap copy_bm; |
866 | 984 | ||
867 | asmlinkage int swsusp_save(void) | 985 | asmlinkage int swsusp_save(void) |
868 | { | 986 | { |
869 | unsigned int nr_pages; | 987 | unsigned int nr_pages, nr_highmem; |
870 | 988 | ||
871 | pr_debug("swsusp: critical section: \n"); | 989 | printk("swsusp: critical section: \n"); |
872 | 990 | ||
873 | drain_local_pages(); | 991 | drain_local_pages(); |
874 | nr_pages = count_data_pages(); | 992 | nr_pages = count_data_pages(); |
875 | printk("swsusp: Need to copy %u pages\n", nr_pages); | 993 | nr_highmem = count_highmem_pages(); |
994 | printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem); | ||
876 | 995 | ||
877 | if (!enough_free_mem(nr_pages)) { | 996 | if (!enough_free_mem(nr_pages, nr_highmem)) { |
878 | printk(KERN_ERR "swsusp: Not enough free memory\n"); | 997 | printk(KERN_ERR "swsusp: Not enough free memory\n"); |
879 | return -ENOMEM; | 998 | return -ENOMEM; |
880 | } | 999 | } |
881 | 1000 | ||
882 | if (swsusp_alloc(&orig_bm, ©_bm, nr_pages)) | 1001 | if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { |
1002 | printk(KERN_ERR "swsusp: Memory allocation failed\n"); | ||
883 | return -ENOMEM; | 1003 | return -ENOMEM; |
1004 | } | ||
884 | 1005 | ||
885 | /* During allocating of suspend pagedir, new cold pages may appear. | 1006 | /* During allocating of suspend pagedir, new cold pages may appear. |
886 | * Kill them. | 1007 | * Kill them. |
@@ -894,10 +1015,12 @@ asmlinkage int swsusp_save(void) | |||
894 | * touch swap space! Except we must write out our image of course. | 1015 | * touch swap space! Except we must write out our image of course. |
895 | */ | 1016 | */ |
896 | 1017 | ||
1018 | nr_pages += nr_highmem; | ||
897 | nr_copy_pages = nr_pages; | 1019 | nr_copy_pages = nr_pages; |
898 | nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1020 | nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); |
899 | 1021 | ||
900 | printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); | 1022 | printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); |
1023 | |||
901 | return 0; | 1024 | return 0; |
902 | } | 1025 | } |
903 | 1026 | ||
@@ -960,7 +1083,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |||
960 | 1083 | ||
961 | if (!buffer) { | 1084 | if (!buffer) { |
962 | /* This makes the buffer be freed by swsusp_free() */ | 1085 | /* This makes the buffer be freed by swsusp_free() */ |
963 | buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); | 1086 | buffer = get_image_page(GFP_ATOMIC, PG_ANY); |
964 | if (!buffer) | 1087 | if (!buffer) |
965 | return -ENOMEM; | 1088 | return -ENOMEM; |
966 | } | 1089 | } |
@@ -975,9 +1098,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) | |||
975 | memset(buffer, 0, PAGE_SIZE); | 1098 | memset(buffer, 0, PAGE_SIZE); |
976 | pack_pfns(buffer, &orig_bm); | 1099 | pack_pfns(buffer, &orig_bm); |
977 | } else { | 1100 | } else { |
978 | unsigned long pfn = memory_bm_next_pfn(©_bm); | 1101 | struct page *page; |
979 | 1102 | ||
980 | handle->buffer = page_address(pfn_to_page(pfn)); | 1103 | page = pfn_to_page(memory_bm_next_pfn(©_bm)); |
1104 | if (PageHighMem(page)) { | ||
1105 | /* Highmem pages are copied to the buffer, | ||
1106 | * because we can't return with a kmapped | ||
1107 | * highmem page (we may not be called again). | ||
1108 | */ | ||
1109 | void *kaddr; | ||
1110 | |||
1111 | kaddr = kmap_atomic(page, KM_USER0); | ||
1112 | memcpy(buffer, kaddr, PAGE_SIZE); | ||
1113 | kunmap_atomic(kaddr, KM_USER0); | ||
1114 | handle->buffer = buffer; | ||
1115 | } else { | ||
1116 | handle->buffer = page_address(page); | ||
1117 | } | ||
981 | } | 1118 | } |
982 | handle->prev = handle->cur; | 1119 | handle->prev = handle->cur; |
983 | } | 1120 | } |
@@ -1005,7 +1142,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) | |||
1005 | unsigned long pfn, max_zone_pfn; | 1142 | unsigned long pfn, max_zone_pfn; |
1006 | 1143 | ||
1007 | /* Clear page flags */ | 1144 | /* Clear page flags */ |
1008 | for_each_zone (zone) { | 1145 | for_each_zone(zone) { |
1009 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1146 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
1010 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1147 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1011 | if (pfn_valid(pfn)) | 1148 | if (pfn_valid(pfn)) |
@@ -1101,6 +1238,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1101 | } | 1238 | } |
1102 | } | 1239 | } |
1103 | 1240 | ||
1241 | /* List of "safe" pages that may be used to store data loaded from the suspend | ||
1242 | * image | ||
1243 | */ | ||
1244 | static struct linked_page *safe_pages_list; | ||
1245 | |||
1246 | #ifdef CONFIG_HIGHMEM | ||
1247 | /* struct highmem_pbe is used for creating the list of highmem pages that | ||
1248 | * should be restored atomically during the resume from disk, because the page | ||
1249 | * frames they have occupied before the suspend are in use. | ||
1250 | */ | ||
1251 | struct highmem_pbe { | ||
1252 | struct page *copy_page; /* data is here now */ | ||
1253 | struct page *orig_page; /* data was here before the suspend */ | ||
1254 | struct highmem_pbe *next; | ||
1255 | }; | ||
1256 | |||
1257 | /* List of highmem PBEs needed for restoring the highmem pages that were | ||
1258 | * allocated before the suspend and included in the suspend image, but have | ||
1259 | * also been allocated by the "resume" kernel, so their contents cannot be | ||
1260 | * written directly to their "original" page frames. | ||
1261 | */ | ||
1262 | static struct highmem_pbe *highmem_pblist; | ||
1263 | |||
1264 | /** | ||
1265 | * count_highmem_image_pages - compute the number of highmem pages in the | ||
1266 | * suspend image. The bits in the memory bitmap @bm that correspond to the | ||
1267 | * image pages are assumed to be set. | ||
1268 | */ | ||
1269 | |||
1270 | static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) | ||
1271 | { | ||
1272 | unsigned long pfn; | ||
1273 | unsigned int cnt = 0; | ||
1274 | |||
1275 | memory_bm_position_reset(bm); | ||
1276 | pfn = memory_bm_next_pfn(bm); | ||
1277 | while (pfn != BM_END_OF_MAP) { | ||
1278 | if (PageHighMem(pfn_to_page(pfn))) | ||
1279 | cnt++; | ||
1280 | |||
1281 | pfn = memory_bm_next_pfn(bm); | ||
1282 | } | ||
1283 | return cnt; | ||
1284 | } | ||
1285 | |||
1286 | /** | ||
1287 | * prepare_highmem_image - try to allocate as many highmem pages as | ||
1288 | * there are highmem image pages (@nr_highmem_p points to the variable | ||
1289 | * containing the number of highmem image pages). The pages that are | ||
1290 | * "safe" (ie. will not be overwritten when the suspend image is | ||
1291 | * restored) have the corresponding bits set in @bm (it must be | ||
1292 | * unitialized). | ||
1293 | * | ||
1294 | * NOTE: This function should not be called if there are no highmem | ||
1295 | * image pages. | ||
1296 | */ | ||
1297 | |||
1298 | static unsigned int safe_highmem_pages; | ||
1299 | |||
1300 | static struct memory_bitmap *safe_highmem_bm; | ||
1301 | |||
1302 | static int | ||
1303 | prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) | ||
1304 | { | ||
1305 | unsigned int to_alloc; | ||
1306 | |||
1307 | if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) | ||
1308 | return -ENOMEM; | ||
1309 | |||
1310 | if (get_highmem_buffer(PG_SAFE)) | ||
1311 | return -ENOMEM; | ||
1312 | |||
1313 | to_alloc = count_free_highmem_pages(); | ||
1314 | if (to_alloc > *nr_highmem_p) | ||
1315 | to_alloc = *nr_highmem_p; | ||
1316 | else | ||
1317 | *nr_highmem_p = to_alloc; | ||
1318 | |||
1319 | safe_highmem_pages = 0; | ||
1320 | while (to_alloc-- > 0) { | ||
1321 | struct page *page; | ||
1322 | |||
1323 | page = alloc_page(__GFP_HIGHMEM); | ||
1324 | if (!PageNosaveFree(page)) { | ||
1325 | /* The page is "safe", set its bit the bitmap */ | ||
1326 | memory_bm_set_bit(bm, page_to_pfn(page)); | ||
1327 | safe_highmem_pages++; | ||
1328 | } | ||
1329 | /* Mark the page as allocated */ | ||
1330 | SetPageNosave(page); | ||
1331 | SetPageNosaveFree(page); | ||
1332 | } | ||
1333 | memory_bm_position_reset(bm); | ||
1334 | safe_highmem_bm = bm; | ||
1335 | return 0; | ||
1336 | } | ||
1337 | |||
1338 | /** | ||
1339 | * get_highmem_page_buffer - for given highmem image page find the buffer | ||
1340 | * that suspend_write_next() should set for its caller to write to. | ||
1341 | * | ||
1342 | * If the page is to be saved to its "original" page frame or a copy of | ||
1343 | * the page is to be made in the highmem, @buffer is returned. Otherwise, | ||
1344 | * the copy of the page is to be made in normal memory, so the address of | ||
1345 | * the copy is returned. | ||
1346 | * | ||
1347 | * If @buffer is returned, the caller of suspend_write_next() will write | ||
1348 | * the page's contents to @buffer, so they will have to be copied to the | ||
1349 | * right location on the next call to suspend_write_next() and it is done | ||
1350 | * with the help of copy_last_highmem_page(). For this purpose, if | ||
1351 | * @buffer is returned, @last_highmem page is set to the page to which | ||
1352 | * the data will have to be copied from @buffer. | ||
1353 | */ | ||
1354 | |||
1355 | static struct page *last_highmem_page; | ||
1356 | |||
1357 | static void * | ||
1358 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | ||
1359 | { | ||
1360 | struct highmem_pbe *pbe; | ||
1361 | void *kaddr; | ||
1362 | |||
1363 | if (PageNosave(page) && PageNosaveFree(page)) { | ||
1364 | /* We have allocated the "original" page frame and we can | ||
1365 | * use it directly to store the loaded page. | ||
1366 | */ | ||
1367 | last_highmem_page = page; | ||
1368 | return buffer; | ||
1369 | } | ||
1370 | /* The "original" page frame has not been allocated and we have to | ||
1371 | * use a "safe" page frame to store the loaded page. | ||
1372 | */ | ||
1373 | pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); | ||
1374 | if (!pbe) { | ||
1375 | swsusp_free(); | ||
1376 | return NULL; | ||
1377 | } | ||
1378 | pbe->orig_page = page; | ||
1379 | if (safe_highmem_pages > 0) { | ||
1380 | struct page *tmp; | ||
1381 | |||
1382 | /* Copy of the page will be stored in high memory */ | ||
1383 | kaddr = buffer; | ||
1384 | tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); | ||
1385 | safe_highmem_pages--; | ||
1386 | last_highmem_page = tmp; | ||
1387 | pbe->copy_page = tmp; | ||
1388 | } else { | ||
1389 | /* Copy of the page will be stored in normal memory */ | ||
1390 | kaddr = safe_pages_list; | ||
1391 | safe_pages_list = safe_pages_list->next; | ||
1392 | pbe->copy_page = virt_to_page(kaddr); | ||
1393 | } | ||
1394 | pbe->next = highmem_pblist; | ||
1395 | highmem_pblist = pbe; | ||
1396 | return kaddr; | ||
1397 | } | ||
1398 | |||
1399 | /** | ||
1400 | * copy_last_highmem_page - copy the contents of a highmem image from | ||
1401 | * @buffer, where the caller of snapshot_write_next() has place them, | ||
1402 | * to the right location represented by @last_highmem_page . | ||
1403 | */ | ||
1404 | |||
1405 | static void copy_last_highmem_page(void) | ||
1406 | { | ||
1407 | if (last_highmem_page) { | ||
1408 | void *dst; | ||
1409 | |||
1410 | dst = kmap_atomic(last_highmem_page, KM_USER0); | ||
1411 | memcpy(dst, buffer, PAGE_SIZE); | ||
1412 | kunmap_atomic(dst, KM_USER0); | ||
1413 | last_highmem_page = NULL; | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | static inline int last_highmem_page_copied(void) | ||
1418 | { | ||
1419 | return !last_highmem_page; | ||
1420 | } | ||
1421 | |||
1422 | static inline void free_highmem_data(void) | ||
1423 | { | ||
1424 | if (safe_highmem_bm) | ||
1425 | memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); | ||
1426 | |||
1427 | if (buffer) | ||
1428 | free_image_page(buffer, PG_UNSAFE_CLEAR); | ||
1429 | } | ||
1430 | #else | ||
1431 | static inline int get_safe_write_buffer(void) { return 0; } | ||
1432 | |||
1433 | static unsigned int | ||
1434 | count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } | ||
1435 | |||
1436 | static inline int | ||
1437 | prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) | ||
1438 | { | ||
1439 | return 0; | ||
1440 | } | ||
1441 | |||
1442 | static inline void * | ||
1443 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | ||
1444 | { | ||
1445 | return NULL; | ||
1446 | } | ||
1447 | |||
1448 | static inline void copy_last_highmem_page(void) {} | ||
1449 | static inline int last_highmem_page_copied(void) { return 1; } | ||
1450 | static inline void free_highmem_data(void) {} | ||
1451 | #endif /* CONFIG_HIGHMEM */ | ||
1452 | |||
1104 | /** | 1453 | /** |
1105 | * prepare_image - use the memory bitmap @bm to mark the pages that will | 1454 | * prepare_image - use the memory bitmap @bm to mark the pages that will |
1106 | * be overwritten in the process of restoring the system memory state | 1455 | * be overwritten in the process of restoring the system memory state |
@@ -1110,20 +1459,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1110 | * The idea is to allocate a new memory bitmap first and then allocate | 1459 | * The idea is to allocate a new memory bitmap first and then allocate |
1111 | * as many pages as needed for the image data, but not to assign these | 1460 | * as many pages as needed for the image data, but not to assign these |
1112 | * pages to specific tasks initially. Instead, we just mark them as | 1461 | * pages to specific tasks initially. Instead, we just mark them as |
1113 | * allocated and create a list of "safe" pages that will be used later. | 1462 | * allocated and create a lists of "safe" pages that will be used |
1463 | * later. On systems with high memory a list of "safe" highmem pages is | ||
1464 | * also created. | ||
1114 | */ | 1465 | */ |
1115 | 1466 | ||
1116 | #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) | 1467 | #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) |
1117 | 1468 | ||
1118 | static struct linked_page *safe_pages_list; | ||
1119 | |||
1120 | static int | 1469 | static int |
1121 | prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | 1470 | prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) |
1122 | { | 1471 | { |
1123 | unsigned int nr_pages; | 1472 | unsigned int nr_pages, nr_highmem; |
1124 | struct linked_page *sp_list, *lp; | 1473 | struct linked_page *sp_list, *lp; |
1125 | int error; | 1474 | int error; |
1126 | 1475 | ||
1476 | /* If there is no highmem, the buffer will not be necessary */ | ||
1477 | free_image_page(buffer, PG_UNSAFE_CLEAR); | ||
1478 | buffer = NULL; | ||
1479 | |||
1480 | nr_highmem = count_highmem_image_pages(bm); | ||
1127 | error = mark_unsafe_pages(bm); | 1481 | error = mark_unsafe_pages(bm); |
1128 | if (error) | 1482 | if (error) |
1129 | goto Free; | 1483 | goto Free; |
@@ -1134,6 +1488,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1134 | 1488 | ||
1135 | duplicate_memory_bitmap(new_bm, bm); | 1489 | duplicate_memory_bitmap(new_bm, bm); |
1136 | memory_bm_free(bm, PG_UNSAFE_KEEP); | 1490 | memory_bm_free(bm, PG_UNSAFE_KEEP); |
1491 | if (nr_highmem > 0) { | ||
1492 | error = prepare_highmem_image(bm, &nr_highmem); | ||
1493 | if (error) | ||
1494 | goto Free; | ||
1495 | } | ||
1137 | /* Reserve some safe pages for potential later use. | 1496 | /* Reserve some safe pages for potential later use. |
1138 | * | 1497 | * |
1139 | * NOTE: This way we make sure there will be enough safe pages for the | 1498 | * NOTE: This way we make sure there will be enough safe pages for the |
@@ -1142,10 +1501,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1142 | */ | 1501 | */ |
1143 | sp_list = NULL; | 1502 | sp_list = NULL; |
1144 | /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ | 1503 | /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ |
1145 | nr_pages = nr_copy_pages - allocated_unsafe_pages; | 1504 | nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; |
1146 | nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); | 1505 | nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); |
1147 | while (nr_pages > 0) { | 1506 | while (nr_pages > 0) { |
1148 | lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); | 1507 | lp = get_image_page(GFP_ATOMIC, PG_SAFE); |
1149 | if (!lp) { | 1508 | if (!lp) { |
1150 | error = -ENOMEM; | 1509 | error = -ENOMEM; |
1151 | goto Free; | 1510 | goto Free; |
@@ -1156,7 +1515,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1156 | } | 1515 | } |
1157 | /* Preallocate memory for the image */ | 1516 | /* Preallocate memory for the image */ |
1158 | safe_pages_list = NULL; | 1517 | safe_pages_list = NULL; |
1159 | nr_pages = nr_copy_pages - allocated_unsafe_pages; | 1518 | nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; |
1160 | while (nr_pages > 0) { | 1519 | while (nr_pages > 0) { |
1161 | lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); | 1520 | lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); |
1162 | if (!lp) { | 1521 | if (!lp) { |
@@ -1181,7 +1540,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1181 | } | 1540 | } |
1182 | return 0; | 1541 | return 0; |
1183 | 1542 | ||
1184 | Free: | 1543 | Free: |
1185 | swsusp_free(); | 1544 | swsusp_free(); |
1186 | return error; | 1545 | return error; |
1187 | } | 1546 | } |
@@ -1196,6 +1555,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
1196 | struct pbe *pbe; | 1555 | struct pbe *pbe; |
1197 | struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); | 1556 | struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); |
1198 | 1557 | ||
1558 | if (PageHighMem(page)) | ||
1559 | return get_highmem_page_buffer(page, ca); | ||
1560 | |||
1199 | if (PageNosave(page) && PageNosaveFree(page)) | 1561 | if (PageNosave(page) && PageNosaveFree(page)) |
1200 | /* We have allocated the "original" page frame and we can | 1562 | /* We have allocated the "original" page frame and we can |
1201 | * use it directly to store the loaded page. | 1563 | * use it directly to store the loaded page. |
@@ -1210,12 +1572,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
1210 | swsusp_free(); | 1572 | swsusp_free(); |
1211 | return NULL; | 1573 | return NULL; |
1212 | } | 1574 | } |
1213 | pbe->orig_address = (unsigned long)page_address(page); | 1575 | pbe->orig_address = page_address(page); |
1214 | pbe->address = (unsigned long)safe_pages_list; | 1576 | pbe->address = safe_pages_list; |
1215 | safe_pages_list = safe_pages_list->next; | 1577 | safe_pages_list = safe_pages_list->next; |
1216 | pbe->next = restore_pblist; | 1578 | pbe->next = restore_pblist; |
1217 | restore_pblist = pbe; | 1579 | restore_pblist = pbe; |
1218 | return (void *)pbe->address; | 1580 | return pbe->address; |
1219 | } | 1581 | } |
1220 | 1582 | ||
1221 | /** | 1583 | /** |
@@ -1249,14 +1611,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1249 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) | 1611 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) |
1250 | return 0; | 1612 | return 0; |
1251 | 1613 | ||
1252 | if (!buffer) { | 1614 | if (handle->offset == 0) { |
1253 | /* This makes the buffer be freed by swsusp_free() */ | 1615 | if (!buffer) |
1254 | buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); | 1616 | /* This makes the buffer be freed by swsusp_free() */ |
1617 | buffer = get_image_page(GFP_ATOMIC, PG_ANY); | ||
1618 | |||
1255 | if (!buffer) | 1619 | if (!buffer) |
1256 | return -ENOMEM; | 1620 | return -ENOMEM; |
1257 | } | 1621 | |
1258 | if (!handle->offset) | ||
1259 | handle->buffer = buffer; | 1622 | handle->buffer = buffer; |
1623 | } | ||
1260 | handle->sync_read = 1; | 1624 | handle->sync_read = 1; |
1261 | if (handle->prev < handle->cur) { | 1625 | if (handle->prev < handle->cur) { |
1262 | if (handle->prev == 0) { | 1626 | if (handle->prev == 0) { |
@@ -1284,8 +1648,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1284 | return -ENOMEM; | 1648 | return -ENOMEM; |
1285 | } | 1649 | } |
1286 | } else { | 1650 | } else { |
1651 | copy_last_highmem_page(); | ||
1287 | handle->buffer = get_buffer(&orig_bm, &ca); | 1652 | handle->buffer = get_buffer(&orig_bm, &ca); |
1288 | handle->sync_read = 0; | 1653 | if (handle->buffer != buffer) |
1654 | handle->sync_read = 0; | ||
1289 | } | 1655 | } |
1290 | handle->prev = handle->cur; | 1656 | handle->prev = handle->cur; |
1291 | } | 1657 | } |
@@ -1301,15 +1667,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1301 | return count; | 1667 | return count; |
1302 | } | 1668 | } |
1303 | 1669 | ||
1670 | /** | ||
1671 | * snapshot_write_finalize - must be called after the last call to | ||
1672 | * snapshot_write_next() in case the last page in the image happens | ||
1673 | * to be a highmem page and its contents should be stored in the | ||
1674 | * highmem. Additionally, it releases the memory that will not be | ||
1675 | * used any more. | ||
1676 | */ | ||
1677 | |||
1678 | void snapshot_write_finalize(struct snapshot_handle *handle) | ||
1679 | { | ||
1680 | copy_last_highmem_page(); | ||
1681 | /* Free only if we have loaded the image entirely */ | ||
1682 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { | ||
1683 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); | ||
1684 | free_highmem_data(); | ||
1685 | } | ||
1686 | } | ||
1687 | |||
1304 | int snapshot_image_loaded(struct snapshot_handle *handle) | 1688 | int snapshot_image_loaded(struct snapshot_handle *handle) |
1305 | { | 1689 | { |
1306 | return !(!nr_copy_pages || | 1690 | return !(!nr_copy_pages || !last_highmem_page_copied() || |
1307 | handle->cur <= nr_meta_pages + nr_copy_pages); | 1691 | handle->cur <= nr_meta_pages + nr_copy_pages); |
1308 | } | 1692 | } |
1309 | 1693 | ||
1310 | void snapshot_free_unused_memory(struct snapshot_handle *handle) | 1694 | #ifdef CONFIG_HIGHMEM |
1695 | /* Assumes that @buf is ready and points to a "safe" page */ | ||
1696 | static inline void | ||
1697 | swap_two_pages_data(struct page *p1, struct page *p2, void *buf) | ||
1311 | { | 1698 | { |
1312 | /* Free only if we have loaded the image entirely */ | 1699 | void *kaddr1, *kaddr2; |
1313 | if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) | 1700 | |
1314 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); | 1701 | kaddr1 = kmap_atomic(p1, KM_USER0); |
1702 | kaddr2 = kmap_atomic(p2, KM_USER1); | ||
1703 | memcpy(buf, kaddr1, PAGE_SIZE); | ||
1704 | memcpy(kaddr1, kaddr2, PAGE_SIZE); | ||
1705 | memcpy(kaddr2, buf, PAGE_SIZE); | ||
1706 | kunmap_atomic(kaddr1, KM_USER0); | ||
1707 | kunmap_atomic(kaddr2, KM_USER1); | ||
1708 | } | ||
1709 | |||
1710 | /** | ||
1711 | * restore_highmem - for each highmem page that was allocated before | ||
1712 | * the suspend and included in the suspend image, and also has been | ||
1713 | * allocated by the "resume" kernel swap its current (ie. "before | ||
1714 | * resume") contents with the previous (ie. "before suspend") one. | ||
1715 | * | ||
1716 | * If the resume eventually fails, we can call this function once | ||
1717 | * again and restore the "before resume" highmem state. | ||
1718 | */ | ||
1719 | |||
1720 | int restore_highmem(void) | ||
1721 | { | ||
1722 | struct highmem_pbe *pbe = highmem_pblist; | ||
1723 | void *buf; | ||
1724 | |||
1725 | if (!pbe) | ||
1726 | return 0; | ||
1727 | |||
1728 | buf = get_image_page(GFP_ATOMIC, PG_SAFE); | ||
1729 | if (!buf) | ||
1730 | return -ENOMEM; | ||
1731 | |||
1732 | while (pbe) { | ||
1733 | swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); | ||
1734 | pbe = pbe->next; | ||
1735 | } | ||
1736 | free_image_page(buf, PG_UNSAFE_CLEAR); | ||
1737 | return 0; | ||
1315 | } | 1738 | } |
1739 | #endif /* CONFIG_HIGHMEM */ | ||
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 1a3b0dd2c3fc..f133d4a6d817 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -34,34 +34,123 @@ extern char resume_file[]; | |||
34 | #define SWSUSP_SIG "S1SUSPEND" | 34 | #define SWSUSP_SIG "S1SUSPEND" |
35 | 35 | ||
36 | static struct swsusp_header { | 36 | static struct swsusp_header { |
37 | char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; | 37 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; |
38 | swp_entry_t image; | 38 | sector_t image; |
39 | char orig_sig[10]; | 39 | char orig_sig[10]; |
40 | char sig[10]; | 40 | char sig[10]; |
41 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; | 41 | } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Saving part... | 44 | * General things |
45 | */ | 45 | */ |
46 | 46 | ||
47 | static unsigned short root_swap = 0xffff; | 47 | static unsigned short root_swap = 0xffff; |
48 | static struct block_device *resume_bdev; | ||
49 | |||
50 | /** | ||
51 | * submit - submit BIO request. | ||
52 | * @rw: READ or WRITE. | ||
53 | * @off physical offset of page. | ||
54 | * @page: page we're reading or writing. | ||
55 | * @bio_chain: list of pending biod (for async reading) | ||
56 | * | ||
57 | * Straight from the textbook - allocate and initialize the bio. | ||
58 | * If we're reading, make sure the page is marked as dirty. | ||
59 | * Then submit it and, if @bio_chain == NULL, wait. | ||
60 | */ | ||
61 | static int submit(int rw, pgoff_t page_off, struct page *page, | ||
62 | struct bio **bio_chain) | ||
63 | { | ||
64 | struct bio *bio; | ||
65 | |||
66 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | ||
67 | if (!bio) | ||
68 | return -ENOMEM; | ||
69 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | ||
70 | bio->bi_bdev = resume_bdev; | ||
71 | bio->bi_end_io = end_swap_bio_read; | ||
72 | |||
73 | if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { | ||
74 | printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); | ||
75 | bio_put(bio); | ||
76 | return -EFAULT; | ||
77 | } | ||
78 | |||
79 | lock_page(page); | ||
80 | bio_get(bio); | ||
81 | |||
82 | if (bio_chain == NULL) { | ||
83 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
84 | wait_on_page_locked(page); | ||
85 | if (rw == READ) | ||
86 | bio_set_pages_dirty(bio); | ||
87 | bio_put(bio); | ||
88 | } else { | ||
89 | if (rw == READ) | ||
90 | get_page(page); /* These pages are freed later */ | ||
91 | bio->bi_private = *bio_chain; | ||
92 | *bio_chain = bio; | ||
93 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
99 | { | ||
100 | return submit(READ, page_off, virt_to_page(addr), bio_chain); | ||
101 | } | ||
102 | |||
103 | static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
104 | { | ||
105 | return submit(WRITE, page_off, virt_to_page(addr), bio_chain); | ||
106 | } | ||
107 | |||
108 | static int wait_on_bio_chain(struct bio **bio_chain) | ||
109 | { | ||
110 | struct bio *bio; | ||
111 | struct bio *next_bio; | ||
112 | int ret = 0; | ||
113 | |||
114 | if (bio_chain == NULL) | ||
115 | return 0; | ||
116 | |||
117 | bio = *bio_chain; | ||
118 | if (bio == NULL) | ||
119 | return 0; | ||
120 | while (bio) { | ||
121 | struct page *page; | ||
122 | |||
123 | next_bio = bio->bi_private; | ||
124 | page = bio->bi_io_vec[0].bv_page; | ||
125 | wait_on_page_locked(page); | ||
126 | if (!PageUptodate(page) || PageError(page)) | ||
127 | ret = -EIO; | ||
128 | put_page(page); | ||
129 | bio_put(bio); | ||
130 | bio = next_bio; | ||
131 | } | ||
132 | *bio_chain = NULL; | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Saving part | ||
138 | */ | ||
48 | 139 | ||
49 | static int mark_swapfiles(swp_entry_t start) | 140 | static int mark_swapfiles(sector_t start) |
50 | { | 141 | { |
51 | int error; | 142 | int error; |
52 | 143 | ||
53 | rw_swap_page_sync(READ, swp_entry(root_swap, 0), | 144 | bio_read_page(swsusp_resume_block, &swsusp_header, NULL); |
54 | virt_to_page((unsigned long)&swsusp_header), NULL); | ||
55 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || | 145 | if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || |
56 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | 146 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { |
57 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | 147 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); |
58 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | 148 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); |
59 | swsusp_header.image = start; | 149 | swsusp_header.image = start; |
60 | error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), | 150 | error = bio_write_page(swsusp_resume_block, |
61 | virt_to_page((unsigned long)&swsusp_header), | 151 | &swsusp_header, NULL); |
62 | NULL); | ||
63 | } else { | 152 | } else { |
64 | pr_debug("swsusp: Partition is not swap space.\n"); | 153 | printk(KERN_ERR "swsusp: Swap header not found!\n"); |
65 | error = -ENODEV; | 154 | error = -ENODEV; |
66 | } | 155 | } |
67 | return error; | 156 | return error; |
@@ -74,12 +163,21 @@ static int mark_swapfiles(swp_entry_t start) | |||
74 | 163 | ||
75 | static int swsusp_swap_check(void) /* This is called before saving image */ | 164 | static int swsusp_swap_check(void) /* This is called before saving image */ |
76 | { | 165 | { |
77 | int res = swap_type_of(swsusp_resume_device); | 166 | int res; |
167 | |||
168 | res = swap_type_of(swsusp_resume_device, swsusp_resume_block); | ||
169 | if (res < 0) | ||
170 | return res; | ||
171 | |||
172 | root_swap = res; | ||
173 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE); | ||
174 | if (IS_ERR(resume_bdev)) | ||
175 | return PTR_ERR(resume_bdev); | ||
176 | |||
177 | res = set_blocksize(resume_bdev, PAGE_SIZE); | ||
178 | if (res < 0) | ||
179 | blkdev_put(resume_bdev); | ||
78 | 180 | ||
79 | if (res >= 0) { | ||
80 | root_swap = res; | ||
81 | return 0; | ||
82 | } | ||
83 | return res; | 181 | return res; |
84 | } | 182 | } |
85 | 183 | ||
@@ -90,36 +188,26 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
90 | * @bio_chain: Link the next write BIO here | 188 | * @bio_chain: Link the next write BIO here |
91 | */ | 189 | */ |
92 | 190 | ||
93 | static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) | 191 | static int write_page(void *buf, sector_t offset, struct bio **bio_chain) |
94 | { | 192 | { |
95 | swp_entry_t entry; | 193 | void *src; |
96 | int error = -ENOSPC; | 194 | |
97 | 195 | if (!offset) | |
98 | if (offset) { | 196 | return -ENOSPC; |
99 | struct page *page = virt_to_page(buf); | 197 | |
100 | 198 | if (bio_chain) { | |
101 | if (bio_chain) { | 199 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
102 | /* | 200 | if (src) { |
103 | * Whether or not we successfully allocated a copy page, | 201 | memcpy(src, buf, PAGE_SIZE); |
104 | * we take a ref on the page here. It gets undone in | 202 | } else { |
105 | * wait_on_bio_chain(). | 203 | WARN_ON_ONCE(1); |
106 | */ | 204 | bio_chain = NULL; /* Go synchronous */ |
107 | struct page *page_copy; | 205 | src = buf; |
108 | page_copy = alloc_page(GFP_ATOMIC); | ||
109 | if (page_copy == NULL) { | ||
110 | WARN_ON_ONCE(1); | ||
111 | bio_chain = NULL; /* Go synchronous */ | ||
112 | get_page(page); | ||
113 | } else { | ||
114 | memcpy(page_address(page_copy), | ||
115 | page_address(page), PAGE_SIZE); | ||
116 | page = page_copy; | ||
117 | } | ||
118 | } | 206 | } |
119 | entry = swp_entry(root_swap, offset); | 207 | } else { |
120 | error = rw_swap_page_sync(WRITE, entry, page, bio_chain); | 208 | src = buf; |
121 | } | 209 | } |
122 | return error; | 210 | return bio_write_page(offset, src, bio_chain); |
123 | } | 211 | } |
124 | 212 | ||
125 | /* | 213 | /* |
@@ -137,11 +225,11 @@ static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) | |||
137 | * at a time. | 225 | * at a time. |
138 | */ | 226 | */ |
139 | 227 | ||
140 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1) | 228 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) |
141 | 229 | ||
142 | struct swap_map_page { | 230 | struct swap_map_page { |
143 | unsigned long entries[MAP_PAGE_ENTRIES]; | 231 | sector_t entries[MAP_PAGE_ENTRIES]; |
144 | unsigned long next_swap; | 232 | sector_t next_swap; |
145 | }; | 233 | }; |
146 | 234 | ||
147 | /** | 235 | /** |
@@ -151,7 +239,7 @@ struct swap_map_page { | |||
151 | 239 | ||
152 | struct swap_map_handle { | 240 | struct swap_map_handle { |
153 | struct swap_map_page *cur; | 241 | struct swap_map_page *cur; |
154 | unsigned long cur_swap; | 242 | sector_t cur_swap; |
155 | struct bitmap_page *bitmap; | 243 | struct bitmap_page *bitmap; |
156 | unsigned int k; | 244 | unsigned int k; |
157 | }; | 245 | }; |
@@ -166,26 +254,6 @@ static void release_swap_writer(struct swap_map_handle *handle) | |||
166 | handle->bitmap = NULL; | 254 | handle->bitmap = NULL; |
167 | } | 255 | } |
168 | 256 | ||
169 | static void show_speed(struct timeval *start, struct timeval *stop, | ||
170 | unsigned nr_pages, char *msg) | ||
171 | { | ||
172 | s64 elapsed_centisecs64; | ||
173 | int centisecs; | ||
174 | int k; | ||
175 | int kps; | ||
176 | |||
177 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | ||
178 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | ||
179 | centisecs = elapsed_centisecs64; | ||
180 | if (centisecs == 0) | ||
181 | centisecs = 1; /* avoid div-by-zero */ | ||
182 | k = nr_pages * (PAGE_SIZE / 1024); | ||
183 | kps = (k * 100) / centisecs; | ||
184 | printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, | ||
185 | centisecs / 100, centisecs % 100, | ||
186 | kps / 1000, (kps % 1000) / 10); | ||
187 | } | ||
188 | |||
189 | static int get_swap_writer(struct swap_map_handle *handle) | 257 | static int get_swap_writer(struct swap_map_handle *handle) |
190 | { | 258 | { |
191 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); | 259 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); |
@@ -196,7 +264,7 @@ static int get_swap_writer(struct swap_map_handle *handle) | |||
196 | release_swap_writer(handle); | 264 | release_swap_writer(handle); |
197 | return -ENOMEM; | 265 | return -ENOMEM; |
198 | } | 266 | } |
199 | handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap); | 267 | handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); |
200 | if (!handle->cur_swap) { | 268 | if (!handle->cur_swap) { |
201 | release_swap_writer(handle); | 269 | release_swap_writer(handle); |
202 | return -ENOSPC; | 270 | return -ENOSPC; |
@@ -205,43 +273,15 @@ static int get_swap_writer(struct swap_map_handle *handle) | |||
205 | return 0; | 273 | return 0; |
206 | } | 274 | } |
207 | 275 | ||
208 | static int wait_on_bio_chain(struct bio **bio_chain) | ||
209 | { | ||
210 | struct bio *bio; | ||
211 | struct bio *next_bio; | ||
212 | int ret = 0; | ||
213 | |||
214 | if (bio_chain == NULL) | ||
215 | return 0; | ||
216 | |||
217 | bio = *bio_chain; | ||
218 | if (bio == NULL) | ||
219 | return 0; | ||
220 | while (bio) { | ||
221 | struct page *page; | ||
222 | |||
223 | next_bio = bio->bi_private; | ||
224 | page = bio->bi_io_vec[0].bv_page; | ||
225 | wait_on_page_locked(page); | ||
226 | if (!PageUptodate(page) || PageError(page)) | ||
227 | ret = -EIO; | ||
228 | put_page(page); | ||
229 | bio_put(bio); | ||
230 | bio = next_bio; | ||
231 | } | ||
232 | *bio_chain = NULL; | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | static int swap_write_page(struct swap_map_handle *handle, void *buf, | 276 | static int swap_write_page(struct swap_map_handle *handle, void *buf, |
237 | struct bio **bio_chain) | 277 | struct bio **bio_chain) |
238 | { | 278 | { |
239 | int error = 0; | 279 | int error = 0; |
240 | unsigned long offset; | 280 | sector_t offset; |
241 | 281 | ||
242 | if (!handle->cur) | 282 | if (!handle->cur) |
243 | return -EINVAL; | 283 | return -EINVAL; |
244 | offset = alloc_swap_page(root_swap, handle->bitmap); | 284 | offset = alloc_swapdev_block(root_swap, handle->bitmap); |
245 | error = write_page(buf, offset, bio_chain); | 285 | error = write_page(buf, offset, bio_chain); |
246 | if (error) | 286 | if (error) |
247 | return error; | 287 | return error; |
@@ -250,7 +290,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
250 | error = wait_on_bio_chain(bio_chain); | 290 | error = wait_on_bio_chain(bio_chain); |
251 | if (error) | 291 | if (error) |
252 | goto out; | 292 | goto out; |
253 | offset = alloc_swap_page(root_swap, handle->bitmap); | 293 | offset = alloc_swapdev_block(root_swap, handle->bitmap); |
254 | if (!offset) | 294 | if (!offset) |
255 | return -ENOSPC; | 295 | return -ENOSPC; |
256 | handle->cur->next_swap = offset; | 296 | handle->cur->next_swap = offset; |
@@ -261,7 +301,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
261 | handle->cur_swap = offset; | 301 | handle->cur_swap = offset; |
262 | handle->k = 0; | 302 | handle->k = 0; |
263 | } | 303 | } |
264 | out: | 304 | out: |
265 | return error; | 305 | return error; |
266 | } | 306 | } |
267 | 307 | ||
@@ -315,7 +355,7 @@ static int save_image(struct swap_map_handle *handle, | |||
315 | error = err2; | 355 | error = err2; |
316 | if (!error) | 356 | if (!error) |
317 | printk("\b\b\b\bdone\n"); | 357 | printk("\b\b\b\bdone\n"); |
318 | show_speed(&start, &stop, nr_to_write, "Wrote"); | 358 | swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); |
319 | return error; | 359 | return error; |
320 | } | 360 | } |
321 | 361 | ||
@@ -350,100 +390,50 @@ int swsusp_write(void) | |||
350 | struct swsusp_info *header; | 390 | struct swsusp_info *header; |
351 | int error; | 391 | int error; |
352 | 392 | ||
353 | if ((error = swsusp_swap_check())) { | 393 | error = swsusp_swap_check(); |
394 | if (error) { | ||
354 | printk(KERN_ERR "swsusp: Cannot find swap device, try " | 395 | printk(KERN_ERR "swsusp: Cannot find swap device, try " |
355 | "swapon -a.\n"); | 396 | "swapon -a.\n"); |
356 | return error; | 397 | return error; |
357 | } | 398 | } |
358 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 399 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
359 | error = snapshot_read_next(&snapshot, PAGE_SIZE); | 400 | error = snapshot_read_next(&snapshot, PAGE_SIZE); |
360 | if (error < PAGE_SIZE) | 401 | if (error < PAGE_SIZE) { |
361 | return error < 0 ? error : -EFAULT; | 402 | if (error >= 0) |
403 | error = -EFAULT; | ||
404 | |||
405 | goto out; | ||
406 | } | ||
362 | header = (struct swsusp_info *)data_of(snapshot); | 407 | header = (struct swsusp_info *)data_of(snapshot); |
363 | if (!enough_swap(header->pages)) { | 408 | if (!enough_swap(header->pages)) { |
364 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | 409 | printk(KERN_ERR "swsusp: Not enough free swap\n"); |
365 | return -ENOSPC; | 410 | error = -ENOSPC; |
411 | goto out; | ||
366 | } | 412 | } |
367 | error = get_swap_writer(&handle); | 413 | error = get_swap_writer(&handle); |
368 | if (!error) { | 414 | if (!error) { |
369 | unsigned long start = handle.cur_swap; | 415 | sector_t start = handle.cur_swap; |
416 | |||
370 | error = swap_write_page(&handle, header, NULL); | 417 | error = swap_write_page(&handle, header, NULL); |
371 | if (!error) | 418 | if (!error) |
372 | error = save_image(&handle, &snapshot, | 419 | error = save_image(&handle, &snapshot, |
373 | header->pages - 1); | 420 | header->pages - 1); |
421 | |||
374 | if (!error) { | 422 | if (!error) { |
375 | flush_swap_writer(&handle); | 423 | flush_swap_writer(&handle); |
376 | printk("S"); | 424 | printk("S"); |
377 | error = mark_swapfiles(swp_entry(root_swap, start)); | 425 | error = mark_swapfiles(start); |
378 | printk("|\n"); | 426 | printk("|\n"); |
379 | } | 427 | } |
380 | } | 428 | } |
381 | if (error) | 429 | if (error) |
382 | free_all_swap_pages(root_swap, handle.bitmap); | 430 | free_all_swap_pages(root_swap, handle.bitmap); |
383 | release_swap_writer(&handle); | 431 | release_swap_writer(&handle); |
432 | out: | ||
433 | swsusp_close(); | ||
384 | return error; | 434 | return error; |
385 | } | 435 | } |
386 | 436 | ||
387 | static struct block_device *resume_bdev; | ||
388 | |||
389 | /** | ||
390 | * submit - submit BIO request. | ||
391 | * @rw: READ or WRITE. | ||
392 | * @off physical offset of page. | ||
393 | * @page: page we're reading or writing. | ||
394 | * @bio_chain: list of pending biod (for async reading) | ||
395 | * | ||
396 | * Straight from the textbook - allocate and initialize the bio. | ||
397 | * If we're reading, make sure the page is marked as dirty. | ||
398 | * Then submit it and, if @bio_chain == NULL, wait. | ||
399 | */ | ||
400 | static int submit(int rw, pgoff_t page_off, struct page *page, | ||
401 | struct bio **bio_chain) | ||
402 | { | ||
403 | struct bio *bio; | ||
404 | |||
405 | bio = bio_alloc(GFP_ATOMIC, 1); | ||
406 | if (!bio) | ||
407 | return -ENOMEM; | ||
408 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | ||
409 | bio->bi_bdev = resume_bdev; | ||
410 | bio->bi_end_io = end_swap_bio_read; | ||
411 | |||
412 | if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { | ||
413 | printk("swsusp: ERROR: adding page to bio at %ld\n", page_off); | ||
414 | bio_put(bio); | ||
415 | return -EFAULT; | ||
416 | } | ||
417 | |||
418 | lock_page(page); | ||
419 | bio_get(bio); | ||
420 | |||
421 | if (bio_chain == NULL) { | ||
422 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
423 | wait_on_page_locked(page); | ||
424 | if (rw == READ) | ||
425 | bio_set_pages_dirty(bio); | ||
426 | bio_put(bio); | ||
427 | } else { | ||
428 | if (rw == READ) | ||
429 | get_page(page); /* These pages are freed later */ | ||
430 | bio->bi_private = *bio_chain; | ||
431 | *bio_chain = bio; | ||
432 | submit_bio(rw | (1 << BIO_RW_SYNC), bio); | ||
433 | } | ||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) | ||
438 | { | ||
439 | return submit(READ, page_off, virt_to_page(addr), bio_chain); | ||
440 | } | ||
441 | |||
442 | static int bio_write_page(pgoff_t page_off, void *addr) | ||
443 | { | ||
444 | return submit(WRITE, page_off, virt_to_page(addr), NULL); | ||
445 | } | ||
446 | |||
447 | /** | 437 | /** |
448 | * The following functions allow us to read data using a swap map | 438 | * The following functions allow us to read data using a swap map |
449 | * in a file-alike way | 439 | * in a file-alike way |
@@ -456,17 +446,18 @@ static void release_swap_reader(struct swap_map_handle *handle) | |||
456 | handle->cur = NULL; | 446 | handle->cur = NULL; |
457 | } | 447 | } |
458 | 448 | ||
459 | static int get_swap_reader(struct swap_map_handle *handle, | 449 | static int get_swap_reader(struct swap_map_handle *handle, sector_t start) |
460 | swp_entry_t start) | ||
461 | { | 450 | { |
462 | int error; | 451 | int error; |
463 | 452 | ||
464 | if (!swp_offset(start)) | 453 | if (!start) |
465 | return -EINVAL; | 454 | return -EINVAL; |
466 | handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); | 455 | |
456 | handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); | ||
467 | if (!handle->cur) | 457 | if (!handle->cur) |
468 | return -ENOMEM; | 458 | return -ENOMEM; |
469 | error = bio_read_page(swp_offset(start), handle->cur, NULL); | 459 | |
460 | error = bio_read_page(start, handle->cur, NULL); | ||
470 | if (error) { | 461 | if (error) { |
471 | release_swap_reader(handle); | 462 | release_swap_reader(handle); |
472 | return error; | 463 | return error; |
@@ -478,7 +469,7 @@ static int get_swap_reader(struct swap_map_handle *handle, | |||
478 | static int swap_read_page(struct swap_map_handle *handle, void *buf, | 469 | static int swap_read_page(struct swap_map_handle *handle, void *buf, |
479 | struct bio **bio_chain) | 470 | struct bio **bio_chain) |
480 | { | 471 | { |
481 | unsigned long offset; | 472 | sector_t offset; |
482 | int error; | 473 | int error; |
483 | 474 | ||
484 | if (!handle->cur) | 475 | if (!handle->cur) |
@@ -547,11 +538,11 @@ static int load_image(struct swap_map_handle *handle, | |||
547 | error = err2; | 538 | error = err2; |
548 | if (!error) { | 539 | if (!error) { |
549 | printk("\b\b\b\bdone\n"); | 540 | printk("\b\b\b\bdone\n"); |
550 | snapshot_free_unused_memory(snapshot); | 541 | snapshot_write_finalize(snapshot); |
551 | if (!snapshot_image_loaded(snapshot)) | 542 | if (!snapshot_image_loaded(snapshot)) |
552 | error = -ENODATA; | 543 | error = -ENODATA; |
553 | } | 544 | } |
554 | show_speed(&start, &stop, nr_to_read, "Read"); | 545 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); |
555 | return error; | 546 | return error; |
556 | } | 547 | } |
557 | 548 | ||
@@ -600,12 +591,16 @@ int swsusp_check(void) | |||
600 | if (!IS_ERR(resume_bdev)) { | 591 | if (!IS_ERR(resume_bdev)) { |
601 | set_blocksize(resume_bdev, PAGE_SIZE); | 592 | set_blocksize(resume_bdev, PAGE_SIZE); |
602 | memset(&swsusp_header, 0, sizeof(swsusp_header)); | 593 | memset(&swsusp_header, 0, sizeof(swsusp_header)); |
603 | if ((error = bio_read_page(0, &swsusp_header, NULL))) | 594 | error = bio_read_page(swsusp_resume_block, |
595 | &swsusp_header, NULL); | ||
596 | if (error) | ||
604 | return error; | 597 | return error; |
598 | |||
605 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | 599 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { |
606 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | 600 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); |
607 | /* Reset swap signature now */ | 601 | /* Reset swap signature now */ |
608 | error = bio_write_page(0, &swsusp_header); | 602 | error = bio_write_page(swsusp_resume_block, |
603 | &swsusp_header, NULL); | ||
609 | } else { | 604 | } else { |
610 | return -EINVAL; | 605 | return -EINVAL; |
611 | } | 606 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 0b66659dc516..31aa0390c777 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/bootmem.h> | 49 | #include <linux/bootmem.h> |
50 | #include <linux/syscalls.h> | 50 | #include <linux/syscalls.h> |
51 | #include <linux/highmem.h> | 51 | #include <linux/highmem.h> |
52 | #include <linux/time.h> | ||
52 | 53 | ||
53 | #include "power.h" | 54 | #include "power.h" |
54 | 55 | ||
@@ -64,10 +65,8 @@ int in_suspend __nosavedata = 0; | |||
64 | 65 | ||
65 | #ifdef CONFIG_HIGHMEM | 66 | #ifdef CONFIG_HIGHMEM |
66 | unsigned int count_highmem_pages(void); | 67 | unsigned int count_highmem_pages(void); |
67 | int save_highmem(void); | ||
68 | int restore_highmem(void); | 68 | int restore_highmem(void); |
69 | #else | 69 | #else |
70 | static inline int save_highmem(void) { return 0; } | ||
71 | static inline int restore_highmem(void) { return 0; } | 70 | static inline int restore_highmem(void) { return 0; } |
72 | static inline unsigned int count_highmem_pages(void) { return 0; } | 71 | static inline unsigned int count_highmem_pages(void) { return 0; } |
73 | #endif | 72 | #endif |
@@ -134,18 +133,18 @@ static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) | |||
134 | return 0; | 133 | return 0; |
135 | } | 134 | } |
136 | 135 | ||
137 | unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) | 136 | sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) |
138 | { | 137 | { |
139 | unsigned long offset; | 138 | unsigned long offset; |
140 | 139 | ||
141 | offset = swp_offset(get_swap_page_of_type(swap)); | 140 | offset = swp_offset(get_swap_page_of_type(swap)); |
142 | if (offset) { | 141 | if (offset) { |
143 | if (bitmap_set(bitmap, offset)) { | 142 | if (bitmap_set(bitmap, offset)) |
144 | swap_free(swp_entry(swap, offset)); | 143 | swap_free(swp_entry(swap, offset)); |
145 | offset = 0; | 144 | else |
146 | } | 145 | return swapdev_block(swap, offset); |
147 | } | 146 | } |
148 | return offset; | 147 | return 0; |
149 | } | 148 | } |
150 | 149 | ||
151 | void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | 150 | void free_all_swap_pages(int swap, struct bitmap_page *bitmap) |
@@ -166,6 +165,34 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | |||
166 | } | 165 | } |
167 | 166 | ||
168 | /** | 167 | /** |
168 | * swsusp_show_speed - print the time elapsed between two events represented by | ||
169 | * @start and @stop | ||
170 | * | ||
171 | * @nr_pages - number of pages processed between @start and @stop | ||
172 | * @msg - introductory message to print | ||
173 | */ | ||
174 | |||
175 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, | ||
176 | unsigned nr_pages, char *msg) | ||
177 | { | ||
178 | s64 elapsed_centisecs64; | ||
179 | int centisecs; | ||
180 | int k; | ||
181 | int kps; | ||
182 | |||
183 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | ||
184 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | ||
185 | centisecs = elapsed_centisecs64; | ||
186 | if (centisecs == 0) | ||
187 | centisecs = 1; /* avoid div-by-zero */ | ||
188 | k = nr_pages * (PAGE_SIZE / 1024); | ||
189 | kps = (k * 100) / centisecs; | ||
190 | printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, | ||
191 | centisecs / 100, centisecs % 100, | ||
192 | kps / 1000, (kps % 1000) / 10); | ||
193 | } | ||
194 | |||
195 | /** | ||
169 | * swsusp_shrink_memory - Try to free as much memory as needed | 196 | * swsusp_shrink_memory - Try to free as much memory as needed |
170 | * | 197 | * |
171 | * ... but do not OOM-kill anyone | 198 | * ... but do not OOM-kill anyone |
@@ -184,23 +211,37 @@ static inline unsigned long __shrink_memory(long tmp) | |||
184 | 211 | ||
185 | int swsusp_shrink_memory(void) | 212 | int swsusp_shrink_memory(void) |
186 | { | 213 | { |
187 | long size, tmp; | 214 | long tmp; |
188 | struct zone *zone; | 215 | struct zone *zone; |
189 | unsigned long pages = 0; | 216 | unsigned long pages = 0; |
190 | unsigned int i = 0; | 217 | unsigned int i = 0; |
191 | char *p = "-\\|/"; | 218 | char *p = "-\\|/"; |
219 | struct timeval start, stop; | ||
192 | 220 | ||
193 | printk("Shrinking memory... "); | 221 | printk("Shrinking memory... "); |
222 | do_gettimeofday(&start); | ||
194 | do { | 223 | do { |
195 | size = 2 * count_highmem_pages(); | 224 | long size, highmem_size; |
196 | size += size / 50 + count_data_pages() + PAGES_FOR_IO; | 225 | |
226 | highmem_size = count_highmem_pages(); | ||
227 | size = count_data_pages() + PAGES_FOR_IO; | ||
197 | tmp = size; | 228 | tmp = size; |
229 | size += highmem_size; | ||
198 | for_each_zone (zone) | 230 | for_each_zone (zone) |
199 | if (!is_highmem(zone) && populated_zone(zone)) { | 231 | if (populated_zone(zone)) { |
200 | tmp -= zone->free_pages; | 232 | if (is_highmem(zone)) { |
201 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | 233 | highmem_size -= zone->free_pages; |
202 | tmp += snapshot_additional_pages(zone); | 234 | } else { |
235 | tmp -= zone->free_pages; | ||
236 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | ||
237 | tmp += snapshot_additional_pages(zone); | ||
238 | } | ||
203 | } | 239 | } |
240 | |||
241 | if (highmem_size < 0) | ||
242 | highmem_size = 0; | ||
243 | |||
244 | tmp += highmem_size; | ||
204 | if (tmp > 0) { | 245 | if (tmp > 0) { |
205 | tmp = __shrink_memory(tmp); | 246 | tmp = __shrink_memory(tmp); |
206 | if (!tmp) | 247 | if (!tmp) |
@@ -212,7 +253,9 @@ int swsusp_shrink_memory(void) | |||
212 | } | 253 | } |
213 | printk("\b%c", p[i++%4]); | 254 | printk("\b%c", p[i++%4]); |
214 | } while (tmp > 0); | 255 | } while (tmp > 0); |
256 | do_gettimeofday(&stop); | ||
215 | printk("\bdone (%lu pages freed)\n", pages); | 257 | printk("\bdone (%lu pages freed)\n", pages); |
258 | swsusp_show_speed(&start, &stop, pages, "Freed"); | ||
216 | 259 | ||
217 | return 0; | 260 | return 0; |
218 | } | 261 | } |
@@ -223,6 +266,7 @@ int swsusp_suspend(void) | |||
223 | 266 | ||
224 | if ((error = arch_prepare_suspend())) | 267 | if ((error = arch_prepare_suspend())) |
225 | return error; | 268 | return error; |
269 | |||
226 | local_irq_disable(); | 270 | local_irq_disable(); |
227 | /* At this point, device_suspend() has been called, but *not* | 271 | /* At this point, device_suspend() has been called, but *not* |
228 | * device_power_down(). We *must* device_power_down() now. | 272 | * device_power_down(). We *must* device_power_down() now. |
@@ -235,23 +279,16 @@ int swsusp_suspend(void) | |||
235 | goto Enable_irqs; | 279 | goto Enable_irqs; |
236 | } | 280 | } |
237 | 281 | ||
238 | if ((error = save_highmem())) { | ||
239 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); | ||
240 | goto Restore_highmem; | ||
241 | } | ||
242 | |||
243 | save_processor_state(); | 282 | save_processor_state(); |
244 | if ((error = swsusp_arch_suspend())) | 283 | if ((error = swsusp_arch_suspend())) |
245 | printk(KERN_ERR "Error %d suspending\n", error); | 284 | printk(KERN_ERR "Error %d suspending\n", error); |
246 | /* Restore control flow magically appears here */ | 285 | /* Restore control flow magically appears here */ |
247 | restore_processor_state(); | 286 | restore_processor_state(); |
248 | Restore_highmem: | ||
249 | restore_highmem(); | ||
250 | /* NOTE: device_power_up() is just a resume() for devices | 287 | /* NOTE: device_power_up() is just a resume() for devices |
251 | * that suspended with irqs off ... no overall powerup. | 288 | * that suspended with irqs off ... no overall powerup. |
252 | */ | 289 | */ |
253 | device_power_up(); | 290 | device_power_up(); |
254 | Enable_irqs: | 291 | Enable_irqs: |
255 | local_irq_enable(); | 292 | local_irq_enable(); |
256 | return error; | 293 | return error; |
257 | } | 294 | } |
@@ -268,18 +305,23 @@ int swsusp_resume(void) | |||
268 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); | 305 | printk(KERN_ERR "Some devices failed to power down, very bad\n"); |
269 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | 306 | /* We'll ignore saved state, but this gets preempt count (etc) right */ |
270 | save_processor_state(); | 307 | save_processor_state(); |
271 | error = swsusp_arch_resume(); | 308 | error = restore_highmem(); |
272 | /* Code below is only ever reached in case of failure. Otherwise | 309 | if (!error) { |
273 | * execution continues at place where swsusp_arch_suspend was called | 310 | error = swsusp_arch_resume(); |
274 | */ | 311 | /* The code below is only ever reached in case of a failure. |
275 | BUG_ON(!error); | 312 | * Otherwise execution continues at place where |
313 | * swsusp_arch_suspend() was called | ||
314 | */ | ||
315 | BUG_ON(!error); | ||
316 | /* This call to restore_highmem() undos the previous one */ | ||
317 | restore_highmem(); | ||
318 | } | ||
276 | /* The only reason why swsusp_arch_resume() can fail is memory being | 319 | /* The only reason why swsusp_arch_resume() can fail is memory being |
277 | * very tight, so we have to free it as soon as we can to avoid | 320 | * very tight, so we have to free it as soon as we can to avoid |
278 | * subsequent failures | 321 | * subsequent failures |
279 | */ | 322 | */ |
280 | swsusp_free(); | 323 | swsusp_free(); |
281 | restore_processor_state(); | 324 | restore_processor_state(); |
282 | restore_highmem(); | ||
283 | touch_softlockup_watchdog(); | 325 | touch_softlockup_watchdog(); |
284 | device_power_up(); | 326 | device_power_up(); |
285 | local_irq_enable(); | 327 | local_irq_enable(); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index d991d3b0e5a4..89443b85163b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
14 | #include <linux/reboot.h> | ||
14 | #include <linux/string.h> | 15 | #include <linux/string.h> |
15 | #include <linux/device.h> | 16 | #include <linux/device.h> |
16 | #include <linux/miscdevice.h> | 17 | #include <linux/miscdevice.h> |
@@ -21,6 +22,7 @@ | |||
21 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
22 | #include <linux/console.h> | 23 | #include <linux/console.h> |
23 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/freezer.h> | ||
24 | 26 | ||
25 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
26 | 28 | ||
@@ -54,7 +56,8 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
54 | filp->private_data = data; | 56 | filp->private_data = data; |
55 | memset(&data->handle, 0, sizeof(struct snapshot_handle)); | 57 | memset(&data->handle, 0, sizeof(struct snapshot_handle)); |
56 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { | 58 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { |
57 | data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1; | 59 | data->swap = swsusp_resume_device ? |
60 | swap_type_of(swsusp_resume_device, 0) : -1; | ||
58 | data->mode = O_RDONLY; | 61 | data->mode = O_RDONLY; |
59 | } else { | 62 | } else { |
60 | data->swap = -1; | 63 | data->swap = -1; |
@@ -76,10 +79,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
76 | free_all_swap_pages(data->swap, data->bitmap); | 79 | free_all_swap_pages(data->swap, data->bitmap); |
77 | free_bitmap(data->bitmap); | 80 | free_bitmap(data->bitmap); |
78 | if (data->frozen) { | 81 | if (data->frozen) { |
79 | down(&pm_sem); | 82 | mutex_lock(&pm_mutex); |
80 | thaw_processes(); | 83 | thaw_processes(); |
81 | enable_nonboot_cpus(); | 84 | enable_nonboot_cpus(); |
82 | up(&pm_sem); | 85 | mutex_unlock(&pm_mutex); |
83 | } | 86 | } |
84 | atomic_inc(&device_available); | 87 | atomic_inc(&device_available); |
85 | return 0; | 88 | return 0; |
@@ -124,7 +127,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
124 | { | 127 | { |
125 | int error = 0; | 128 | int error = 0; |
126 | struct snapshot_data *data; | 129 | struct snapshot_data *data; |
127 | loff_t offset, avail; | 130 | loff_t avail; |
131 | sector_t offset; | ||
128 | 132 | ||
129 | if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) | 133 | if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) |
130 | return -ENOTTY; | 134 | return -ENOTTY; |
@@ -140,7 +144,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
140 | case SNAPSHOT_FREEZE: | 144 | case SNAPSHOT_FREEZE: |
141 | if (data->frozen) | 145 | if (data->frozen) |
142 | break; | 146 | break; |
143 | down(&pm_sem); | 147 | mutex_lock(&pm_mutex); |
144 | error = disable_nonboot_cpus(); | 148 | error = disable_nonboot_cpus(); |
145 | if (!error) { | 149 | if (!error) { |
146 | error = freeze_processes(); | 150 | error = freeze_processes(); |
@@ -150,7 +154,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
150 | error = -EBUSY; | 154 | error = -EBUSY; |
151 | } | 155 | } |
152 | } | 156 | } |
153 | up(&pm_sem); | 157 | mutex_unlock(&pm_mutex); |
154 | if (!error) | 158 | if (!error) |
155 | data->frozen = 1; | 159 | data->frozen = 1; |
156 | break; | 160 | break; |
@@ -158,10 +162,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
158 | case SNAPSHOT_UNFREEZE: | 162 | case SNAPSHOT_UNFREEZE: |
159 | if (!data->frozen) | 163 | if (!data->frozen) |
160 | break; | 164 | break; |
161 | down(&pm_sem); | 165 | mutex_lock(&pm_mutex); |
162 | thaw_processes(); | 166 | thaw_processes(); |
163 | enable_nonboot_cpus(); | 167 | enable_nonboot_cpus(); |
164 | up(&pm_sem); | 168 | mutex_unlock(&pm_mutex); |
165 | data->frozen = 0; | 169 | data->frozen = 0; |
166 | break; | 170 | break; |
167 | 171 | ||
@@ -170,7 +174,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
170 | error = -EPERM; | 174 | error = -EPERM; |
171 | break; | 175 | break; |
172 | } | 176 | } |
173 | down(&pm_sem); | 177 | mutex_lock(&pm_mutex); |
174 | /* Free memory before shutting down devices. */ | 178 | /* Free memory before shutting down devices. */ |
175 | error = swsusp_shrink_memory(); | 179 | error = swsusp_shrink_memory(); |
176 | if (!error) { | 180 | if (!error) { |
@@ -183,7 +187,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
183 | } | 187 | } |
184 | resume_console(); | 188 | resume_console(); |
185 | } | 189 | } |
186 | up(&pm_sem); | 190 | mutex_unlock(&pm_mutex); |
187 | if (!error) | 191 | if (!error) |
188 | error = put_user(in_suspend, (unsigned int __user *)arg); | 192 | error = put_user(in_suspend, (unsigned int __user *)arg); |
189 | if (!error) | 193 | if (!error) |
@@ -191,13 +195,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
191 | break; | 195 | break; |
192 | 196 | ||
193 | case SNAPSHOT_ATOMIC_RESTORE: | 197 | case SNAPSHOT_ATOMIC_RESTORE: |
198 | snapshot_write_finalize(&data->handle); | ||
194 | if (data->mode != O_WRONLY || !data->frozen || | 199 | if (data->mode != O_WRONLY || !data->frozen || |
195 | !snapshot_image_loaded(&data->handle)) { | 200 | !snapshot_image_loaded(&data->handle)) { |
196 | error = -EPERM; | 201 | error = -EPERM; |
197 | break; | 202 | break; |
198 | } | 203 | } |
199 | snapshot_free_unused_memory(&data->handle); | 204 | mutex_lock(&pm_mutex); |
200 | down(&pm_sem); | ||
201 | pm_prepare_console(); | 205 | pm_prepare_console(); |
202 | suspend_console(); | 206 | suspend_console(); |
203 | error = device_suspend(PMSG_PRETHAW); | 207 | error = device_suspend(PMSG_PRETHAW); |
@@ -207,7 +211,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
207 | } | 211 | } |
208 | resume_console(); | 212 | resume_console(); |
209 | pm_restore_console(); | 213 | pm_restore_console(); |
210 | up(&pm_sem); | 214 | mutex_unlock(&pm_mutex); |
211 | break; | 215 | break; |
212 | 216 | ||
213 | case SNAPSHOT_FREE: | 217 | case SNAPSHOT_FREE: |
@@ -238,10 +242,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
238 | break; | 242 | break; |
239 | } | 243 | } |
240 | } | 244 | } |
241 | offset = alloc_swap_page(data->swap, data->bitmap); | 245 | offset = alloc_swapdev_block(data->swap, data->bitmap); |
242 | if (offset) { | 246 | if (offset) { |
243 | offset <<= PAGE_SHIFT; | 247 | offset <<= PAGE_SHIFT; |
244 | error = put_user(offset, (loff_t __user *)arg); | 248 | error = put_user(offset, (sector_t __user *)arg); |
245 | } else { | 249 | } else { |
246 | error = -ENOSPC; | 250 | error = -ENOSPC; |
247 | } | 251 | } |
@@ -264,7 +268,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
264 | * so we need to recode them | 268 | * so we need to recode them |
265 | */ | 269 | */ |
266 | if (old_decode_dev(arg)) { | 270 | if (old_decode_dev(arg)) { |
267 | data->swap = swap_type_of(old_decode_dev(arg)); | 271 | data->swap = swap_type_of(old_decode_dev(arg), 0); |
268 | if (data->swap < 0) | 272 | if (data->swap < 0) |
269 | error = -ENODEV; | 273 | error = -ENODEV; |
270 | } else { | 274 | } else { |
@@ -282,7 +286,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
282 | break; | 286 | break; |
283 | } | 287 | } |
284 | 288 | ||
285 | if (down_trylock(&pm_sem)) { | 289 | if (!mutex_trylock(&pm_mutex)) { |
286 | error = -EBUSY; | 290 | error = -EBUSY; |
287 | break; | 291 | break; |
288 | } | 292 | } |
@@ -309,8 +313,66 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
309 | if (pm_ops->finish) | 313 | if (pm_ops->finish) |
310 | pm_ops->finish(PM_SUSPEND_MEM); | 314 | pm_ops->finish(PM_SUSPEND_MEM); |
311 | 315 | ||
312 | OutS3: | 316 | OutS3: |
313 | up(&pm_sem); | 317 | mutex_unlock(&pm_mutex); |
318 | break; | ||
319 | |||
320 | case SNAPSHOT_PMOPS: | ||
321 | switch (arg) { | ||
322 | |||
323 | case PMOPS_PREPARE: | ||
324 | if (pm_ops->prepare) { | ||
325 | error = pm_ops->prepare(PM_SUSPEND_DISK); | ||
326 | } | ||
327 | break; | ||
328 | |||
329 | case PMOPS_ENTER: | ||
330 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | ||
331 | error = pm_ops->enter(PM_SUSPEND_DISK); | ||
332 | break; | ||
333 | |||
334 | case PMOPS_FINISH: | ||
335 | if (pm_ops && pm_ops->finish) { | ||
336 | pm_ops->finish(PM_SUSPEND_DISK); | ||
337 | } | ||
338 | break; | ||
339 | |||
340 | default: | ||
341 | printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); | ||
342 | error = -EINVAL; | ||
343 | |||
344 | } | ||
345 | break; | ||
346 | |||
347 | case SNAPSHOT_SET_SWAP_AREA: | ||
348 | if (data->bitmap) { | ||
349 | error = -EPERM; | ||
350 | } else { | ||
351 | struct resume_swap_area swap_area; | ||
352 | dev_t swdev; | ||
353 | |||
354 | error = copy_from_user(&swap_area, (void __user *)arg, | ||
355 | sizeof(struct resume_swap_area)); | ||
356 | if (error) { | ||
357 | error = -EFAULT; | ||
358 | break; | ||
359 | } | ||
360 | |||
361 | /* | ||
362 | * User space encodes device types as two-byte values, | ||
363 | * so we need to recode them | ||
364 | */ | ||
365 | swdev = old_decode_dev(swap_area.dev); | ||
366 | if (swdev) { | ||
367 | offset = swap_area.offset; | ||
368 | data->swap = swap_type_of(swdev, offset); | ||
369 | if (data->swap < 0) | ||
370 | error = -ENODEV; | ||
371 | } else { | ||
372 | data->swap = -1; | ||
373 | error = -EINVAL; | ||
374 | } | ||
375 | } | ||
314 | break; | 376 | break; |
315 | 377 | ||
316 | default: | 378 | default: |
@@ -321,7 +383,7 @@ OutS3: | |||
321 | return error; | 383 | return error; |
322 | } | 384 | } |
323 | 385 | ||
324 | static struct file_operations snapshot_fops = { | 386 | static const struct file_operations snapshot_fops = { |
325 | .open = snapshot_open, | 387 | .open = snapshot_open, |
326 | .release = snapshot_release, | 388 | .release = snapshot_release, |
327 | .read = snapshot_read, | 389 | .read = snapshot_read, |
diff --git a/kernel/printk.c b/kernel/printk.c index 66426552fbfe..185bb45eacf7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -53,8 +53,6 @@ int console_printk[4] = { | |||
53 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | 53 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ |
54 | }; | 54 | }; |
55 | 55 | ||
56 | EXPORT_UNUSED_SYMBOL(console_printk); /* June 2006 */ | ||
57 | |||
58 | /* | 56 | /* |
59 | * Low lever drivers may need that to know if they can schedule in | 57 | * Low lever drivers may need that to know if they can schedule in |
60 | * their unblank() callback or not. So let's export it. | 58 | * their unblank() callback or not. So let's export it. |
@@ -335,13 +333,25 @@ static void __call_console_drivers(unsigned long start, unsigned long end) | |||
335 | } | 333 | } |
336 | } | 334 | } |
337 | 335 | ||
336 | static int __read_mostly ignore_loglevel; | ||
337 | |||
338 | int __init ignore_loglevel_setup(char *str) | ||
339 | { | ||
340 | ignore_loglevel = 1; | ||
341 | printk(KERN_INFO "debug: ignoring loglevel setting.\n"); | ||
342 | |||
343 | return 1; | ||
344 | } | ||
345 | |||
346 | __setup("ignore_loglevel", ignore_loglevel_setup); | ||
347 | |||
338 | /* | 348 | /* |
339 | * Write out chars from start to end - 1 inclusive | 349 | * Write out chars from start to end - 1 inclusive |
340 | */ | 350 | */ |
341 | static void _call_console_drivers(unsigned long start, | 351 | static void _call_console_drivers(unsigned long start, |
342 | unsigned long end, int msg_log_level) | 352 | unsigned long end, int msg_log_level) |
343 | { | 353 | { |
344 | if (msg_log_level < console_loglevel && | 354 | if ((msg_log_level < console_loglevel || ignore_loglevel) && |
345 | console_drivers && start != end) { | 355 | console_drivers && start != end) { |
346 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { | 356 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { |
347 | /* wrapped write */ | 357 | /* wrapped write */ |
@@ -631,12 +641,7 @@ EXPORT_SYMBOL(vprintk); | |||
631 | 641 | ||
632 | asmlinkage long sys_syslog(int type, char __user *buf, int len) | 642 | asmlinkage long sys_syslog(int type, char __user *buf, int len) |
633 | { | 643 | { |
634 | return 0; | 644 | return -ENOSYS; |
635 | } | ||
636 | |||
637 | int do_syslog(int type, char __user *buf, int len) | ||
638 | { | ||
639 | return 0; | ||
640 | } | 645 | } |
641 | 646 | ||
642 | static void call_console_drivers(unsigned long start, unsigned long end) | 647 | static void call_console_drivers(unsigned long start, unsigned long end) |
@@ -777,7 +782,6 @@ int is_console_locked(void) | |||
777 | { | 782 | { |
778 | return console_locked; | 783 | return console_locked; |
779 | } | 784 | } |
780 | EXPORT_UNUSED_SYMBOL(is_console_locked); /* June 2006 */ | ||
781 | 785 | ||
782 | /** | 786 | /** |
783 | * release_console_sem - unlock the console system | 787 | * release_console_sem - unlock the console system |
diff --git a/kernel/profile.c b/kernel/profile.c index f940b462eec9..fb5e03d57e9d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -40,7 +40,7 @@ int (*timer_hook)(struct pt_regs *) __read_mostly; | |||
40 | 40 | ||
41 | static atomic_t *prof_buffer; | 41 | static atomic_t *prof_buffer; |
42 | static unsigned long prof_len, prof_shift; | 42 | static unsigned long prof_len, prof_shift; |
43 | static int prof_on __read_mostly; | 43 | int prof_on __read_mostly; |
44 | static cpumask_t prof_cpu_mask = CPU_MASK_ALL; | 44 | static cpumask_t prof_cpu_mask = CPU_MASK_ALL; |
45 | #ifdef CONFIG_SMP | 45 | #ifdef CONFIG_SMP |
46 | static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); | 46 | static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); |
@@ -51,9 +51,19 @@ static DEFINE_MUTEX(profile_flip_mutex); | |||
51 | static int __init profile_setup(char * str) | 51 | static int __init profile_setup(char * str) |
52 | { | 52 | { |
53 | static char __initdata schedstr[] = "schedule"; | 53 | static char __initdata schedstr[] = "schedule"; |
54 | static char __initdata sleepstr[] = "sleep"; | ||
54 | int par; | 55 | int par; |
55 | 56 | ||
56 | if (!strncmp(str, schedstr, strlen(schedstr))) { | 57 | if (!strncmp(str, sleepstr, strlen(sleepstr))) { |
58 | prof_on = SLEEP_PROFILING; | ||
59 | if (str[strlen(sleepstr)] == ',') | ||
60 | str += strlen(sleepstr) + 1; | ||
61 | if (get_option(&str, &par)) | ||
62 | prof_shift = par; | ||
63 | printk(KERN_INFO | ||
64 | "kernel sleep profiling enabled (shift: %ld)\n", | ||
65 | prof_shift); | ||
66 | } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { | ||
57 | prof_on = SCHED_PROFILING; | 67 | prof_on = SCHED_PROFILING; |
58 | if (str[strlen(schedstr)] == ',') | 68 | if (str[strlen(schedstr)] == ',') |
59 | str += strlen(schedstr) + 1; | 69 | str += strlen(schedstr) + 1; |
@@ -204,7 +214,8 @@ EXPORT_SYMBOL_GPL(profile_event_unregister); | |||
204 | * positions to which hits are accounted during short intervals (e.g. | 214 | * positions to which hits are accounted during short intervals (e.g. |
205 | * several seconds) is usually very small. Exclusion from buffer | 215 | * several seconds) is usually very small. Exclusion from buffer |
206 | * flipping is provided by interrupt disablement (note that for | 216 | * flipping is provided by interrupt disablement (note that for |
207 | * SCHED_PROFILING profile_hit() may be called from process context). | 217 | * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from |
218 | * process context). | ||
208 | * The hash function is meant to be lightweight as opposed to strong, | 219 | * The hash function is meant to be lightweight as opposed to strong, |
209 | * and was vaguely inspired by ppc64 firmware-supported inverted | 220 | * and was vaguely inspired by ppc64 firmware-supported inverted |
210 | * pagetable hash functions, but uses a full hashtable full of finite | 221 | * pagetable hash functions, but uses a full hashtable full of finite |
@@ -257,7 +268,7 @@ static void profile_discard_flip_buffers(void) | |||
257 | mutex_unlock(&profile_flip_mutex); | 268 | mutex_unlock(&profile_flip_mutex); |
258 | } | 269 | } |
259 | 270 | ||
260 | void profile_hit(int type, void *__pc) | 271 | void profile_hits(int type, void *__pc, unsigned int nr_hits) |
261 | { | 272 | { |
262 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; | 273 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; |
263 | int i, j, cpu; | 274 | int i, j, cpu; |
@@ -274,21 +285,31 @@ void profile_hit(int type, void *__pc) | |||
274 | put_cpu(); | 285 | put_cpu(); |
275 | return; | 286 | return; |
276 | } | 287 | } |
288 | /* | ||
289 | * We buffer the global profiler buffer into a per-CPU | ||
290 | * queue and thus reduce the number of global (and possibly | ||
291 | * NUMA-alien) accesses. The write-queue is self-coalescing: | ||
292 | */ | ||
277 | local_irq_save(flags); | 293 | local_irq_save(flags); |
278 | do { | 294 | do { |
279 | for (j = 0; j < PROFILE_GRPSZ; ++j) { | 295 | for (j = 0; j < PROFILE_GRPSZ; ++j) { |
280 | if (hits[i + j].pc == pc) { | 296 | if (hits[i + j].pc == pc) { |
281 | hits[i + j].hits++; | 297 | hits[i + j].hits += nr_hits; |
282 | goto out; | 298 | goto out; |
283 | } else if (!hits[i + j].hits) { | 299 | } else if (!hits[i + j].hits) { |
284 | hits[i + j].pc = pc; | 300 | hits[i + j].pc = pc; |
285 | hits[i + j].hits = 1; | 301 | hits[i + j].hits = nr_hits; |
286 | goto out; | 302 | goto out; |
287 | } | 303 | } |
288 | } | 304 | } |
289 | i = (i + secondary) & (NR_PROFILE_HIT - 1); | 305 | i = (i + secondary) & (NR_PROFILE_HIT - 1); |
290 | } while (i != primary); | 306 | } while (i != primary); |
291 | atomic_inc(&prof_buffer[pc]); | 307 | |
308 | /* | ||
309 | * Add the current hit(s) and flush the write-queue out | ||
310 | * to the global buffer: | ||
311 | */ | ||
312 | atomic_add(nr_hits, &prof_buffer[pc]); | ||
292 | for (i = 0; i < NR_PROFILE_HIT; ++i) { | 313 | for (i = 0; i < NR_PROFILE_HIT; ++i) { |
293 | atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); | 314 | atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); |
294 | hits[i].pc = hits[i].hits = 0; | 315 | hits[i].pc = hits[i].hits = 0; |
@@ -298,7 +319,6 @@ out: | |||
298 | put_cpu(); | 319 | put_cpu(); |
299 | } | 320 | } |
300 | 321 | ||
301 | #ifdef CONFIG_HOTPLUG_CPU | ||
302 | static int __devinit profile_cpu_callback(struct notifier_block *info, | 322 | static int __devinit profile_cpu_callback(struct notifier_block *info, |
303 | unsigned long action, void *__cpu) | 323 | unsigned long action, void *__cpu) |
304 | { | 324 | { |
@@ -351,19 +371,19 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, | |||
351 | } | 371 | } |
352 | return NOTIFY_OK; | 372 | return NOTIFY_OK; |
353 | } | 373 | } |
354 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
355 | #else /* !CONFIG_SMP */ | 374 | #else /* !CONFIG_SMP */ |
356 | #define profile_flip_buffers() do { } while (0) | 375 | #define profile_flip_buffers() do { } while (0) |
357 | #define profile_discard_flip_buffers() do { } while (0) | 376 | #define profile_discard_flip_buffers() do { } while (0) |
377 | #define profile_cpu_callback NULL | ||
358 | 378 | ||
359 | void profile_hit(int type, void *__pc) | 379 | void profile_hits(int type, void *__pc, unsigned int nr_hits) |
360 | { | 380 | { |
361 | unsigned long pc; | 381 | unsigned long pc; |
362 | 382 | ||
363 | if (prof_on != type || !prof_buffer) | 383 | if (prof_on != type || !prof_buffer) |
364 | return; | 384 | return; |
365 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; | 385 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; |
366 | atomic_inc(&prof_buffer[min(pc, prof_len - 1)]); | 386 | atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); |
367 | } | 387 | } |
368 | #endif /* !CONFIG_SMP */ | 388 | #endif /* !CONFIG_SMP */ |
369 | 389 | ||
@@ -442,7 +462,8 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
442 | read = 0; | 462 | read = 0; |
443 | 463 | ||
444 | while (p < sizeof(unsigned int) && count > 0) { | 464 | while (p < sizeof(unsigned int) && count > 0) { |
445 | put_user(*((char *)(&sample_step)+p),buf); | 465 | if (put_user(*((char *)(&sample_step)+p),buf)) |
466 | return -EFAULT; | ||
446 | buf++; p++; count--; read++; | 467 | buf++; p++; count--; read++; |
447 | } | 468 | } |
448 | pnt = (char *)prof_buffer + p - sizeof(atomic_t); | 469 | pnt = (char *)prof_buffer + p - sizeof(atomic_t); |
@@ -480,7 +501,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, | |||
480 | return count; | 501 | return count; |
481 | } | 502 | } |
482 | 503 | ||
483 | static struct file_operations proc_profile_operations = { | 504 | static const struct file_operations proc_profile_operations = { |
484 | .read = read_profile, | 505 | .read = read_profile, |
485 | .write = write_profile, | 506 | .write = write_profile, |
486 | }; | 507 | }; |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 26bb5ffe1ef1..3554b76da84c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -235,12 +235,14 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
235 | 235 | ||
236 | list = rdp->donelist; | 236 | list = rdp->donelist; |
237 | while (list) { | 237 | while (list) { |
238 | next = rdp->donelist = list->next; | 238 | next = list->next; |
239 | prefetch(next); | ||
239 | list->func(list); | 240 | list->func(list); |
240 | list = next; | 241 | list = next; |
241 | if (++count >= rdp->blimit) | 242 | if (++count >= rdp->blimit) |
242 | break; | 243 | break; |
243 | } | 244 | } |
245 | rdp->donelist = list; | ||
244 | 246 | ||
245 | local_irq_disable(); | 247 | local_irq_disable(); |
246 | rdp->qlen -= count; | 248 | rdp->qlen -= count; |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e2bda18f6f42..c52f981ea008 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -401,7 +401,7 @@ static void srcu_torture_cleanup(void) | |||
401 | cleanup_srcu_struct(&srcu_ctl); | 401 | cleanup_srcu_struct(&srcu_ctl); |
402 | } | 402 | } |
403 | 403 | ||
404 | static int srcu_torture_read_lock(void) | 404 | static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) |
405 | { | 405 | { |
406 | return srcu_read_lock(&srcu_ctl); | 406 | return srcu_read_lock(&srcu_ctl); |
407 | } | 407 | } |
@@ -419,7 +419,7 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) | |||
419 | schedule_timeout_interruptible(longdelay); | 419 | schedule_timeout_interruptible(longdelay); |
420 | } | 420 | } |
421 | 421 | ||
422 | static void srcu_torture_read_unlock(int idx) | 422 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) |
423 | { | 423 | { |
424 | srcu_read_unlock(&srcu_ctl, idx); | 424 | srcu_read_unlock(&srcu_ctl, idx); |
425 | } | 425 | } |
diff --git a/kernel/relay.c b/kernel/relay.c index 2b92e8ece85b..75a3a9a7efc2 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1013,7 +1013,7 @@ static ssize_t relay_file_sendfile(struct file *filp, | |||
1013 | actor, &desc); | 1013 | actor, &desc); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | struct file_operations relay_file_operations = { | 1016 | const struct file_operations relay_file_operations = { |
1017 | .open = relay_file_open, | 1017 | .open = relay_file_open, |
1018 | .poll = relay_file_poll, | 1018 | .poll = relay_file_poll, |
1019 | .mmap = relay_file_mmap, | 1019 | .mmap = relay_file_mmap, |
diff --git a/kernel/resource.c b/kernel/resource.c index 6de60c12143e..7b9a497419d9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -88,7 +88,7 @@ static int r_show(struct seq_file *m, void *v) | |||
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | static struct seq_operations resource_op = { | 91 | static const struct seq_operations resource_op = { |
92 | .start = r_start, | 92 | .start = r_start, |
93 | .next = r_next, | 93 | .next = r_next, |
94 | .stop = r_stop, | 94 | .stop = r_stop, |
@@ -115,14 +115,14 @@ static int iomem_open(struct inode *inode, struct file *file) | |||
115 | return res; | 115 | return res; |
116 | } | 116 | } |
117 | 117 | ||
118 | static struct file_operations proc_ioports_operations = { | 118 | static const struct file_operations proc_ioports_operations = { |
119 | .open = ioports_open, | 119 | .open = ioports_open, |
120 | .read = seq_read, | 120 | .read = seq_read, |
121 | .llseek = seq_lseek, | 121 | .llseek = seq_lseek, |
122 | .release = seq_release, | 122 | .release = seq_release, |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static struct file_operations proc_iomem_operations = { | 125 | static const struct file_operations proc_iomem_operations = { |
126 | .open = iomem_open, | 126 | .open = iomem_open, |
127 | .read = seq_read, | 127 | .read = seq_read, |
128 | .llseek = seq_lseek, | 128 | .llseek = seq_lseek, |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 6dcea9dd8c94..015fc633c96c 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
14 | #include <linux/sysdev.h> | 14 | #include <linux/sysdev.h> |
15 | #include <linux/timer.h> | 15 | #include <linux/timer.h> |
16 | #include <linux/freezer.h> | ||
16 | 17 | ||
17 | #include "rtmutex.h" | 18 | #include "rtmutex.h" |
18 | 19 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 3399701c680e..f385eff4682d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -34,7 +34,7 @@ | |||
34 | #include <linux/security.h> | 34 | #include <linux/security.h> |
35 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/profile.h> | 36 | #include <linux/profile.h> |
37 | #include <linux/suspend.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/vmalloc.h> | 38 | #include <linux/vmalloc.h> |
39 | #include <linux/blkdev.h> | 39 | #include <linux/blkdev.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
@@ -505,7 +505,7 @@ static int schedstat_open(struct inode *inode, struct file *file) | |||
505 | return res; | 505 | return res; |
506 | } | 506 | } |
507 | 507 | ||
508 | struct file_operations proc_schedstat_operations = { | 508 | const struct file_operations proc_schedstat_operations = { |
509 | .open = schedstat_open, | 509 | .open = schedstat_open, |
510 | .read = seq_read, | 510 | .read = seq_read, |
511 | .llseek = seq_lseek, | 511 | .llseek = seq_lseek, |
@@ -948,6 +948,17 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) | |||
948 | } | 948 | } |
949 | #endif | 949 | #endif |
950 | 950 | ||
951 | /* | ||
952 | * Sleep time is in units of nanosecs, so shift by 20 to get a | ||
953 | * milliseconds-range estimation of the amount of time that the task | ||
954 | * spent sleeping: | ||
955 | */ | ||
956 | if (unlikely(prof_on == SLEEP_PROFILING)) { | ||
957 | if (p->state == TASK_UNINTERRUPTIBLE) | ||
958 | profile_hits(SLEEP_PROFILING, (void *)get_wchan(p), | ||
959 | (now - p->timestamp) >> 20); | ||
960 | } | ||
961 | |||
951 | if (!rt_task(p)) | 962 | if (!rt_task(p)) |
952 | p->prio = recalc_task_prio(p, now); | 963 | p->prio = recalc_task_prio(p, now); |
953 | 964 | ||
@@ -3333,6 +3344,7 @@ asmlinkage void __sched schedule(void) | |||
3333 | printk(KERN_ERR "BUG: scheduling while atomic: " | 3344 | printk(KERN_ERR "BUG: scheduling while atomic: " |
3334 | "%s/0x%08x/%d\n", | 3345 | "%s/0x%08x/%d\n", |
3335 | current->comm, preempt_count(), current->pid); | 3346 | current->comm, preempt_count(), current->pid); |
3347 | debug_show_held_locks(current); | ||
3336 | dump_stack(); | 3348 | dump_stack(); |
3337 | } | 3349 | } |
3338 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 3350 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
@@ -4804,18 +4816,18 @@ static void show_task(struct task_struct *p) | |||
4804 | show_stack(p, NULL); | 4816 | show_stack(p, NULL); |
4805 | } | 4817 | } |
4806 | 4818 | ||
4807 | void show_state(void) | 4819 | void show_state_filter(unsigned long state_filter) |
4808 | { | 4820 | { |
4809 | struct task_struct *g, *p; | 4821 | struct task_struct *g, *p; |
4810 | 4822 | ||
4811 | #if (BITS_PER_LONG == 32) | 4823 | #if (BITS_PER_LONG == 32) |
4812 | printk("\n" | 4824 | printk("\n" |
4813 | " sibling\n"); | 4825 | " free sibling\n"); |
4814 | printk(" task PC pid father child younger older\n"); | 4826 | printk(" task PC stack pid father child younger older\n"); |
4815 | #else | 4827 | #else |
4816 | printk("\n" | 4828 | printk("\n" |
4817 | " sibling\n"); | 4829 | " free sibling\n"); |
4818 | printk(" task PC pid father child younger older\n"); | 4830 | printk(" task PC stack pid father child younger older\n"); |
4819 | #endif | 4831 | #endif |
4820 | read_lock(&tasklist_lock); | 4832 | read_lock(&tasklist_lock); |
4821 | do_each_thread(g, p) { | 4833 | do_each_thread(g, p) { |
@@ -4824,11 +4836,16 @@ void show_state(void) | |||
4824 | * console might take alot of time: | 4836 | * console might take alot of time: |
4825 | */ | 4837 | */ |
4826 | touch_nmi_watchdog(); | 4838 | touch_nmi_watchdog(); |
4827 | show_task(p); | 4839 | if (p->state & state_filter) |
4840 | show_task(p); | ||
4828 | } while_each_thread(g, p); | 4841 | } while_each_thread(g, p); |
4829 | 4842 | ||
4830 | read_unlock(&tasklist_lock); | 4843 | read_unlock(&tasklist_lock); |
4831 | debug_show_all_locks(); | 4844 | /* |
4845 | * Only show locks if all tasks are dumped: | ||
4846 | */ | ||
4847 | if (state_filter == -1) | ||
4848 | debug_show_all_locks(); | ||
4832 | } | 4849 | } |
4833 | 4850 | ||
4834 | /** | 4851 | /** |
@@ -6723,8 +6740,6 @@ SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, | |||
6723 | sched_smt_power_savings_store); | 6740 | sched_smt_power_savings_store); |
6724 | #endif | 6741 | #endif |
6725 | 6742 | ||
6726 | |||
6727 | #ifdef CONFIG_HOTPLUG_CPU | ||
6728 | /* | 6743 | /* |
6729 | * Force a reinitialization of the sched domains hierarchy. The domains | 6744 | * Force a reinitialization of the sched domains hierarchy. The domains |
6730 | * and groups cannot be updated in place without racing with the balancing | 6745 | * and groups cannot be updated in place without racing with the balancing |
@@ -6757,7 +6772,6 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
6757 | 6772 | ||
6758 | return NOTIFY_OK; | 6773 | return NOTIFY_OK; |
6759 | } | 6774 | } |
6760 | #endif | ||
6761 | 6775 | ||
6762 | void __init sched_init_smp(void) | 6776 | void __init sched_init_smp(void) |
6763 | { | 6777 | { |
@@ -6867,6 +6881,7 @@ void __might_sleep(char *file, int line) | |||
6867 | " context at %s:%d\n", file, line); | 6881 | " context at %s:%d\n", file, line); |
6868 | printk("in_atomic():%d, irqs_disabled():%d\n", | 6882 | printk("in_atomic():%d, irqs_disabled():%d\n", |
6869 | in_atomic(), irqs_disabled()); | 6883 | in_atomic(), irqs_disabled()); |
6884 | debug_show_held_locks(current); | ||
6870 | dump_stack(); | 6885 | dump_stack(); |
6871 | } | 6886 | } |
6872 | #endif | 6887 | #endif |
diff --git a/kernel/signal.c b/kernel/signal.c index df18c167a2a7..ec81defde339 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/ptrace.h> | 23 | #include <linux/ptrace.h> |
24 | #include <linux/signal.h> | 24 | #include <linux/signal.h> |
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/freezer.h> | ||
26 | #include <asm/param.h> | 27 | #include <asm/param.h> |
27 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
28 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
@@ -33,7 +34,7 @@ | |||
33 | * SLAB caches for signal bits. | 34 | * SLAB caches for signal bits. |
34 | */ | 35 | */ |
35 | 36 | ||
36 | static kmem_cache_t *sigqueue_cachep; | 37 | static struct kmem_cache *sigqueue_cachep; |
37 | 38 | ||
38 | /* | 39 | /* |
39 | * In POSIX a signal is sent either to a specific thread (Linux task) | 40 | * In POSIX a signal is sent either to a specific thread (Linux task) |
@@ -1133,8 +1134,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) | |||
1133 | return error; | 1134 | return error; |
1134 | } | 1135 | } |
1135 | 1136 | ||
1136 | int | 1137 | static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
1137 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) | ||
1138 | { | 1138 | { |
1139 | int error; | 1139 | int error; |
1140 | rcu_read_lock(); | 1140 | rcu_read_lock(); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index bf25015dce16..918e52df090e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -574,8 +574,6 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
574 | 574 | ||
575 | switch (action) { | 575 | switch (action) { |
576 | case CPU_UP_PREPARE: | 576 | case CPU_UP_PREPARE: |
577 | BUG_ON(per_cpu(tasklet_vec, hotcpu).list); | ||
578 | BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list); | ||
579 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | 577 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); |
580 | if (IS_ERR(p)) { | 578 | if (IS_ERR(p)) { |
581 | printk("ksoftirqd for %i failed\n", hotcpu); | 579 | printk("ksoftirqd for %i failed\n", hotcpu); |
diff --git a/kernel/sys.c b/kernel/sys.c index c87b461de38d..a0c1a29a507f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1102,14 +1102,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
1102 | asmlinkage long sys_setuid(uid_t uid) | 1102 | asmlinkage long sys_setuid(uid_t uid) |
1103 | { | 1103 | { |
1104 | int old_euid = current->euid; | 1104 | int old_euid = current->euid; |
1105 | int old_ruid, old_suid, new_ruid, new_suid; | 1105 | int old_ruid, old_suid, new_suid; |
1106 | int retval; | 1106 | int retval; |
1107 | 1107 | ||
1108 | retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); | 1108 | retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); |
1109 | if (retval) | 1109 | if (retval) |
1110 | return retval; | 1110 | return retval; |
1111 | 1111 | ||
1112 | old_ruid = new_ruid = current->uid; | 1112 | old_ruid = current->uid; |
1113 | old_suid = current->suid; | 1113 | old_suid = current->suid; |
1114 | new_suid = old_suid; | 1114 | new_suid = old_suid; |
1115 | 1115 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792b..8e9f00fd6d18 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
54 | 54 | ||
55 | #ifdef CONFIG_X86 | 55 | #ifdef CONFIG_X86 |
56 | #include <asm/nmi.h> | 56 | #include <asm/nmi.h> |
57 | #include <asm/stacktrace.h> | ||
57 | #endif | 58 | #endif |
58 | 59 | ||
59 | #if defined(CONFIG_SYSCTL) | 60 | #if defined(CONFIG_SYSCTL) |
@@ -170,7 +171,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); | |||
170 | static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); | 171 | static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); |
171 | static int proc_opensys(struct inode *, struct file *); | 172 | static int proc_opensys(struct inode *, struct file *); |
172 | 173 | ||
173 | struct file_operations proc_sys_file_operations = { | 174 | const struct file_operations proc_sys_file_operations = { |
174 | .open = proc_opensys, | 175 | .open = proc_opensys, |
175 | .read = proc_readsys, | 176 | .read = proc_readsys, |
176 | .write = proc_writesys, | 177 | .write = proc_writesys, |
@@ -707,6 +708,14 @@ static ctl_table kern_table[] = { | |||
707 | .mode = 0444, | 708 | .mode = 0444, |
708 | .proc_handler = &proc_dointvec, | 709 | .proc_handler = &proc_dointvec, |
709 | }, | 710 | }, |
711 | { | ||
712 | .ctl_name = CTL_UNNUMBERED, | ||
713 | .procname = "kstack_depth_to_print", | ||
714 | .data = &kstack_depth_to_print, | ||
715 | .maxlen = sizeof(int), | ||
716 | .mode = 0644, | ||
717 | .proc_handler = &proc_dointvec, | ||
718 | }, | ||
710 | #endif | 719 | #endif |
711 | #if defined(CONFIG_MMU) | 720 | #if defined(CONFIG_MMU) |
712 | { | 721 | { |
@@ -977,17 +986,6 @@ static ctl_table vm_table[] = { | |||
977 | .extra1 = &zero, | 986 | .extra1 = &zero, |
978 | }, | 987 | }, |
979 | #endif | 988 | #endif |
980 | #ifdef CONFIG_SWAP | ||
981 | { | ||
982 | .ctl_name = VM_SWAP_TOKEN_TIMEOUT, | ||
983 | .procname = "swap_token_timeout", | ||
984 | .data = &swap_token_default_timeout, | ||
985 | .maxlen = sizeof(swap_token_default_timeout), | ||
986 | .mode = 0644, | ||
987 | .proc_handler = &proc_dointvec_jiffies, | ||
988 | .strategy = &sysctl_jiffies, | ||
989 | }, | ||
990 | #endif | ||
991 | #ifdef CONFIG_NUMA | 989 | #ifdef CONFIG_NUMA |
992 | { | 990 | { |
993 | .ctl_name = VM_ZONE_RECLAIM_MODE, | 991 | .ctl_name = VM_ZONE_RECLAIM_MODE, |
@@ -1886,7 +1884,7 @@ static int __do_proc_dointvec(void *tbl_data, ctl_table *table, | |||
1886 | p = buf; | 1884 | p = buf; |
1887 | if (*p == '-' && left > 1) { | 1885 | if (*p == '-' && left > 1) { |
1888 | neg = 1; | 1886 | neg = 1; |
1889 | left--, p++; | 1887 | p++; |
1890 | } | 1888 | } |
1891 | if (*p < '0' || *p > '9') | 1889 | if (*p < '0' || *p > '9') |
1892 | break; | 1890 | break; |
@@ -2137,7 +2135,7 @@ static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, | |||
2137 | p = buf; | 2135 | p = buf; |
2138 | if (*p == '-' && left > 1) { | 2136 | if (*p == '-' && left > 1) { |
2139 | neg = 1; | 2137 | neg = 1; |
2140 | left--, p++; | 2138 | p++; |
2141 | } | 2139 | } |
2142 | if (*p < '0' || *p > '9') | 2140 | if (*p < '0' || *p > '9') |
2143 | break; | 2141 | break; |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d3d28919d4b4..4c3476fa058d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -34,7 +34,7 @@ | |||
34 | 34 | ||
35 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | 35 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; |
36 | static int family_registered; | 36 | static int family_registered; |
37 | kmem_cache_t *taskstats_cache; | 37 | struct kmem_cache *taskstats_cache; |
38 | 38 | ||
39 | static struct genl_family family = { | 39 | static struct genl_family family = { |
40 | .id = GENL_ID_GENERATE, | 40 | .id = GENL_ID_GENERATE, |
@@ -69,7 +69,7 @@ enum actions { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | 71 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, |
72 | void **replyp, size_t size) | 72 | size_t size) |
73 | { | 73 | { |
74 | struct sk_buff *skb; | 74 | struct sk_buff *skb; |
75 | void *reply; | 75 | void *reply; |
@@ -94,7 +94,6 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
94 | } | 94 | } |
95 | 95 | ||
96 | *skbp = skb; | 96 | *skbp = skb; |
97 | *replyp = reply; | ||
98 | return 0; | 97 | return 0; |
99 | } | 98 | } |
100 | 99 | ||
@@ -119,10 +118,10 @@ static int send_reply(struct sk_buff *skb, pid_t pid) | |||
119 | /* | 118 | /* |
120 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | 119 | * Send taskstats data in @skb to listeners registered for @cpu's exit data |
121 | */ | 120 | */ |
122 | static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | 121 | static void send_cpu_listeners(struct sk_buff *skb, |
122 | struct listener_list *listeners) | ||
123 | { | 123 | { |
124 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 124 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); |
125 | struct listener_list *listeners; | ||
126 | struct listener *s, *tmp; | 125 | struct listener *s, *tmp; |
127 | struct sk_buff *skb_next, *skb_cur = skb; | 126 | struct sk_buff *skb_next, *skb_cur = skb; |
128 | void *reply = genlmsg_data(genlhdr); | 127 | void *reply = genlmsg_data(genlhdr); |
@@ -135,7 +134,6 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | |||
135 | } | 134 | } |
136 | 135 | ||
137 | rc = 0; | 136 | rc = 0; |
138 | listeners = &per_cpu(listener_array, cpu); | ||
139 | down_read(&listeners->sem); | 137 | down_read(&listeners->sem); |
140 | list_for_each_entry(s, &listeners->list, list) { | 138 | list_for_each_entry(s, &listeners->list, list) { |
141 | skb_next = NULL; | 139 | skb_next = NULL; |
@@ -186,6 +184,7 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
186 | } else | 184 | } else |
187 | get_task_struct(tsk); | 185 | get_task_struct(tsk); |
188 | 186 | ||
187 | memset(stats, 0, sizeof(*stats)); | ||
189 | /* | 188 | /* |
190 | * Each accounting subsystem adds calls to its functions to | 189 | * Each accounting subsystem adds calls to its functions to |
191 | * fill in relevant parts of struct taskstsats as follows | 190 | * fill in relevant parts of struct taskstsats as follows |
@@ -228,6 +227,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
228 | 227 | ||
229 | if (first->signal->stats) | 228 | if (first->signal->stats) |
230 | memcpy(stats, first->signal->stats, sizeof(*stats)); | 229 | memcpy(stats, first->signal->stats, sizeof(*stats)); |
230 | else | ||
231 | memset(stats, 0, sizeof(*stats)); | ||
231 | 232 | ||
232 | tsk = first; | 233 | tsk = first; |
233 | do { | 234 | do { |
@@ -344,14 +345,36 @@ static int parse(struct nlattr *na, cpumask_t *mask) | |||
344 | return ret; | 345 | return ret; |
345 | } | 346 | } |
346 | 347 | ||
348 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | ||
349 | { | ||
350 | struct nlattr *na, *ret; | ||
351 | int aggr; | ||
352 | |||
353 | aggr = (type == TASKSTATS_TYPE_PID) | ||
354 | ? TASKSTATS_TYPE_AGGR_PID | ||
355 | : TASKSTATS_TYPE_AGGR_TGID; | ||
356 | |||
357 | na = nla_nest_start(skb, aggr); | ||
358 | if (!na) | ||
359 | goto err; | ||
360 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | ||
361 | goto err; | ||
362 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | ||
363 | if (!ret) | ||
364 | goto err; | ||
365 | nla_nest_end(skb, na); | ||
366 | |||
367 | return nla_data(ret); | ||
368 | err: | ||
369 | return NULL; | ||
370 | } | ||
371 | |||
347 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 372 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) |
348 | { | 373 | { |
349 | int rc = 0; | 374 | int rc = 0; |
350 | struct sk_buff *rep_skb; | 375 | struct sk_buff *rep_skb; |
351 | struct taskstats stats; | 376 | struct taskstats *stats; |
352 | void *reply; | ||
353 | size_t size; | 377 | size_t size; |
354 | struct nlattr *na; | ||
355 | cpumask_t mask; | 378 | cpumask_t mask; |
356 | 379 | ||
357 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | 380 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); |
@@ -372,83 +395,71 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
372 | size = nla_total_size(sizeof(u32)) + | 395 | size = nla_total_size(sizeof(u32)) + |
373 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 396 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
374 | 397 | ||
375 | memset(&stats, 0, sizeof(stats)); | 398 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
376 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | ||
377 | if (rc < 0) | 399 | if (rc < 0) |
378 | return rc; | 400 | return rc; |
379 | 401 | ||
402 | rc = -EINVAL; | ||
380 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 403 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { |
381 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 404 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
382 | rc = fill_pid(pid, NULL, &stats); | 405 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
383 | if (rc < 0) | 406 | if (!stats) |
384 | goto err; | 407 | goto err; |
385 | 408 | ||
386 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | 409 | rc = fill_pid(pid, NULL, stats); |
387 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | 410 | if (rc < 0) |
388 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 411 | goto err; |
389 | stats); | ||
390 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | 412 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { |
391 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | 413 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); |
392 | rc = fill_tgid(tgid, NULL, &stats); | 414 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); |
393 | if (rc < 0) | 415 | if (!stats) |
394 | goto err; | 416 | goto err; |
395 | 417 | ||
396 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | 418 | rc = fill_tgid(tgid, NULL, stats); |
397 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | 419 | if (rc < 0) |
398 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 420 | goto err; |
399 | stats); | 421 | } else |
400 | } else { | ||
401 | rc = -EINVAL; | ||
402 | goto err; | 422 | goto err; |
403 | } | ||
404 | |||
405 | nla_nest_end(rep_skb, na); | ||
406 | 423 | ||
407 | return send_reply(rep_skb, info->snd_pid); | 424 | return send_reply(rep_skb, info->snd_pid); |
408 | |||
409 | nla_put_failure: | ||
410 | rc = genlmsg_cancel(rep_skb, reply); | ||
411 | err: | 425 | err: |
412 | nlmsg_free(rep_skb); | 426 | nlmsg_free(rep_skb); |
413 | return rc; | 427 | return rc; |
414 | } | 428 | } |
415 | 429 | ||
416 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | 430 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
417 | { | 431 | { |
418 | struct listener_list *listeners; | 432 | struct signal_struct *sig = tsk->signal; |
419 | struct taskstats *tmp; | 433 | struct taskstats *stats; |
420 | /* | ||
421 | * This is the cpu on which the task is exiting currently and will | ||
422 | * be the one for which the exit event is sent, even if the cpu | ||
423 | * on which this function is running changes later. | ||
424 | */ | ||
425 | *mycpu = raw_smp_processor_id(); | ||
426 | 434 | ||
427 | *ptidstats = NULL; | 435 | if (sig->stats || thread_group_empty(tsk)) |
428 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | 436 | goto ret; |
429 | if (!tmp) | ||
430 | return; | ||
431 | 437 | ||
432 | listeners = &per_cpu(listener_array, *mycpu); | 438 | /* No problem if kmem_cache_zalloc() fails */ |
433 | down_read(&listeners->sem); | 439 | stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); |
434 | if (!list_empty(&listeners->list)) { | 440 | |
435 | *ptidstats = tmp; | 441 | spin_lock_irq(&tsk->sighand->siglock); |
436 | tmp = NULL; | 442 | if (!sig->stats) { |
443 | sig->stats = stats; | ||
444 | stats = NULL; | ||
437 | } | 445 | } |
438 | up_read(&listeners->sem); | 446 | spin_unlock_irq(&tsk->sighand->siglock); |
439 | kfree(tmp); | 447 | |
448 | if (stats) | ||
449 | kmem_cache_free(taskstats_cache, stats); | ||
450 | ret: | ||
451 | return sig->stats; | ||
440 | } | 452 | } |
441 | 453 | ||
442 | /* Send pid data out on exit */ | 454 | /* Send pid data out on exit */ |
443 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | 455 | void taskstats_exit(struct task_struct *tsk, int group_dead) |
444 | int group_dead, unsigned int mycpu) | ||
445 | { | 456 | { |
446 | int rc; | 457 | int rc; |
458 | struct listener_list *listeners; | ||
459 | struct taskstats *stats; | ||
447 | struct sk_buff *rep_skb; | 460 | struct sk_buff *rep_skb; |
448 | void *reply; | ||
449 | size_t size; | 461 | size_t size; |
450 | int is_thread_group; | 462 | int is_thread_group; |
451 | struct nlattr *na; | ||
452 | 463 | ||
453 | if (!family_registered) | 464 | if (!family_registered) |
454 | return; | 465 | return; |
@@ -459,7 +470,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | |||
459 | size = nla_total_size(sizeof(u32)) + | 470 | size = nla_total_size(sizeof(u32)) + |
460 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 471 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
461 | 472 | ||
462 | is_thread_group = (tsk->signal->stats != NULL); | 473 | is_thread_group = !!taskstats_tgid_alloc(tsk); |
463 | if (is_thread_group) { | 474 | if (is_thread_group) { |
464 | /* PID + STATS + TGID + STATS */ | 475 | /* PID + STATS + TGID + STATS */ |
465 | size = 2 * size; | 476 | size = 2 * size; |
@@ -467,49 +478,39 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | |||
467 | fill_tgid_exit(tsk); | 478 | fill_tgid_exit(tsk); |
468 | } | 479 | } |
469 | 480 | ||
470 | if (!tidstats) | 481 | listeners = &__raw_get_cpu_var(listener_array); |
482 | if (list_empty(&listeners->list)) | ||
471 | return; | 483 | return; |
472 | 484 | ||
473 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | 485 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); |
474 | if (rc < 0) | ||
475 | goto ret; | ||
476 | |||
477 | rc = fill_pid(tsk->pid, tsk, tidstats); | ||
478 | if (rc < 0) | 486 | if (rc < 0) |
479 | goto err_skb; | 487 | return; |
480 | 488 | ||
481 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | 489 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); |
482 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | 490 | if (!stats) |
483 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 491 | goto err; |
484 | *tidstats); | ||
485 | nla_nest_end(rep_skb, na); | ||
486 | 492 | ||
487 | if (!is_thread_group) | 493 | rc = fill_pid(tsk->pid, tsk, stats); |
488 | goto send; | 494 | if (rc < 0) |
495 | goto err; | ||
489 | 496 | ||
490 | /* | 497 | /* |
491 | * Doesn't matter if tsk is the leader or the last group member leaving | 498 | * Doesn't matter if tsk is the leader or the last group member leaving |
492 | */ | 499 | */ |
493 | if (!group_dead) | 500 | if (!is_thread_group || !group_dead) |
494 | goto send; | 501 | goto send; |
495 | 502 | ||
496 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | 503 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); |
497 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | 504 | if (!stats) |
498 | /* No locking needed for tsk->signal->stats since group is dead */ | 505 | goto err; |
499 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 506 | |
500 | *tsk->signal->stats); | 507 | memcpy(stats, tsk->signal->stats, sizeof(*stats)); |
501 | nla_nest_end(rep_skb, na); | ||
502 | 508 | ||
503 | send: | 509 | send: |
504 | send_cpu_listeners(rep_skb, mycpu); | 510 | send_cpu_listeners(rep_skb, listeners); |
505 | return; | 511 | return; |
506 | 512 | err: | |
507 | nla_put_failure: | ||
508 | genlmsg_cancel(rep_skb, reply); | ||
509 | err_skb: | ||
510 | nlmsg_free(rep_skb); | 513 | nlmsg_free(rep_skb); |
511 | ret: | ||
512 | return; | ||
513 | } | 514 | } |
514 | 515 | ||
515 | static struct genl_ops taskstats_ops = { | 516 | static struct genl_ops taskstats_ops = { |
diff --git a/kernel/unwind.c b/kernel/unwind.c index ed0a21d4a902..09c261329249 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c | |||
@@ -14,11 +14,12 @@ | |||
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/sort.h> | 15 | #include <linux/sort.h> |
16 | #include <linux/stop_machine.h> | 16 | #include <linux/stop_machine.h> |
17 | #include <linux/uaccess.h> | ||
17 | #include <asm/sections.h> | 18 | #include <asm/sections.h> |
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
19 | #include <asm/unaligned.h> | 20 | #include <asm/unaligned.h> |
20 | 21 | ||
21 | extern char __start_unwind[], __end_unwind[]; | 22 | extern const char __start_unwind[], __end_unwind[]; |
22 | extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; | 23 | extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; |
23 | 24 | ||
24 | #define MAX_STACK_DEPTH 8 | 25 | #define MAX_STACK_DEPTH 8 |
@@ -94,6 +95,7 @@ static const struct { | |||
94 | 95 | ||
95 | typedef unsigned long uleb128_t; | 96 | typedef unsigned long uleb128_t; |
96 | typedef signed long sleb128_t; | 97 | typedef signed long sleb128_t; |
98 | #define sleb128abs __builtin_labs | ||
97 | 99 | ||
98 | static struct unwind_table { | 100 | static struct unwind_table { |
99 | struct { | 101 | struct { |
@@ -135,6 +137,17 @@ struct unwind_state { | |||
135 | 137 | ||
136 | static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; | 138 | static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; |
137 | 139 | ||
140 | static unsigned unwind_debug; | ||
141 | static int __init unwind_debug_setup(char *s) | ||
142 | { | ||
143 | unwind_debug = simple_strtoul(s, NULL, 0); | ||
144 | return 1; | ||
145 | } | ||
146 | __setup("unwind_debug=", unwind_debug_setup); | ||
147 | #define dprintk(lvl, fmt, args...) \ | ||
148 | ((void)(lvl > unwind_debug \ | ||
149 | || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) | ||
150 | |||
138 | static struct unwind_table *find_table(unsigned long pc) | 151 | static struct unwind_table *find_table(unsigned long pc) |
139 | { | 152 | { |
140 | struct unwind_table *table; | 153 | struct unwind_table *table; |
@@ -151,7 +164,9 @@ static struct unwind_table *find_table(unsigned long pc) | |||
151 | 164 | ||
152 | static unsigned long read_pointer(const u8 **pLoc, | 165 | static unsigned long read_pointer(const u8 **pLoc, |
153 | const void *end, | 166 | const void *end, |
154 | signed ptrType); | 167 | signed ptrType, |
168 | unsigned long text_base, | ||
169 | unsigned long data_base); | ||
155 | 170 | ||
156 | static void init_unwind_table(struct unwind_table *table, | 171 | static void init_unwind_table(struct unwind_table *table, |
157 | const char *name, | 172 | const char *name, |
@@ -176,10 +191,13 @@ static void init_unwind_table(struct unwind_table *table, | |||
176 | /* See if the linker provided table looks valid. */ | 191 | /* See if the linker provided table looks valid. */ |
177 | if (header_size <= 4 | 192 | if (header_size <= 4 |
178 | || header_start[0] != 1 | 193 | || header_start[0] != 1 |
179 | || (void *)read_pointer(&ptr, end, header_start[1]) != table_start | 194 | || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) |
180 | || header_start[2] == DW_EH_PE_omit | 195 | != table_start |
181 | || read_pointer(&ptr, end, header_start[2]) <= 0 | 196 | || !read_pointer(&ptr, end, header_start[2], 0, 0) |
182 | || header_start[3] == DW_EH_PE_omit) | 197 | || !read_pointer(&ptr, end, header_start[3], 0, |
198 | (unsigned long)header_start) | ||
199 | || !read_pointer(&ptr, end, header_start[3], 0, | ||
200 | (unsigned long)header_start)) | ||
183 | header_start = NULL; | 201 | header_start = NULL; |
184 | table->hdrsz = header_size; | 202 | table->hdrsz = header_size; |
185 | smp_wmb(); | 203 | smp_wmb(); |
@@ -269,7 +287,7 @@ static void __init setup_unwind_table(struct unwind_table *table, | |||
269 | ptr = (const u8 *)(fde + 2); | 287 | ptr = (const u8 *)(fde + 2); |
270 | if (!read_pointer(&ptr, | 288 | if (!read_pointer(&ptr, |
271 | (const u8 *)(fde + 1) + *fde, | 289 | (const u8 *)(fde + 1) + *fde, |
272 | ptrType)) | 290 | ptrType, 0, 0)) |
273 | return; | 291 | return; |
274 | ++n; | 292 | ++n; |
275 | } | 293 | } |
@@ -279,6 +297,7 @@ static void __init setup_unwind_table(struct unwind_table *table, | |||
279 | 297 | ||
280 | hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) | 298 | hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) |
281 | + 2 * n * sizeof(unsigned long); | 299 | + 2 * n * sizeof(unsigned long); |
300 | dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); | ||
282 | header = alloc(hdrSize); | 301 | header = alloc(hdrSize); |
283 | if (!header) | 302 | if (!header) |
284 | return; | 303 | return; |
@@ -303,7 +322,7 @@ static void __init setup_unwind_table(struct unwind_table *table, | |||
303 | ptr = (const u8 *)(fde + 2); | 322 | ptr = (const u8 *)(fde + 2); |
304 | header->table[n].start = read_pointer(&ptr, | 323 | header->table[n].start = read_pointer(&ptr, |
305 | (const u8 *)(fde + 1) + *fde, | 324 | (const u8 *)(fde + 1) + *fde, |
306 | fde_pointer_type(cie)); | 325 | fde_pointer_type(cie), 0, 0); |
307 | header->table[n].fde = (unsigned long)fde; | 326 | header->table[n].fde = (unsigned long)fde; |
308 | ++n; | 327 | ++n; |
309 | } | 328 | } |
@@ -486,7 +505,9 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) | |||
486 | 505 | ||
487 | static unsigned long read_pointer(const u8 **pLoc, | 506 | static unsigned long read_pointer(const u8 **pLoc, |
488 | const void *end, | 507 | const void *end, |
489 | signed ptrType) | 508 | signed ptrType, |
509 | unsigned long text_base, | ||
510 | unsigned long data_base) | ||
490 | { | 511 | { |
491 | unsigned long value = 0; | 512 | unsigned long value = 0; |
492 | union { | 513 | union { |
@@ -498,13 +519,17 @@ static unsigned long read_pointer(const u8 **pLoc, | |||
498 | const unsigned long *pul; | 519 | const unsigned long *pul; |
499 | } ptr; | 520 | } ptr; |
500 | 521 | ||
501 | if (ptrType < 0 || ptrType == DW_EH_PE_omit) | 522 | if (ptrType < 0 || ptrType == DW_EH_PE_omit) { |
523 | dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); | ||
502 | return 0; | 524 | return 0; |
525 | } | ||
503 | ptr.p8 = *pLoc; | 526 | ptr.p8 = *pLoc; |
504 | switch(ptrType & DW_EH_PE_FORM) { | 527 | switch(ptrType & DW_EH_PE_FORM) { |
505 | case DW_EH_PE_data2: | 528 | case DW_EH_PE_data2: |
506 | if (end < (const void *)(ptr.p16u + 1)) | 529 | if (end < (const void *)(ptr.p16u + 1)) { |
530 | dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); | ||
507 | return 0; | 531 | return 0; |
532 | } | ||
508 | if(ptrType & DW_EH_PE_signed) | 533 | if(ptrType & DW_EH_PE_signed) |
509 | value = get_unaligned(ptr.p16s++); | 534 | value = get_unaligned(ptr.p16s++); |
510 | else | 535 | else |
@@ -512,8 +537,10 @@ static unsigned long read_pointer(const u8 **pLoc, | |||
512 | break; | 537 | break; |
513 | case DW_EH_PE_data4: | 538 | case DW_EH_PE_data4: |
514 | #ifdef CONFIG_64BIT | 539 | #ifdef CONFIG_64BIT |
515 | if (end < (const void *)(ptr.p32u + 1)) | 540 | if (end < (const void *)(ptr.p32u + 1)) { |
541 | dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); | ||
516 | return 0; | 542 | return 0; |
543 | } | ||
517 | if(ptrType & DW_EH_PE_signed) | 544 | if(ptrType & DW_EH_PE_signed) |
518 | value = get_unaligned(ptr.p32s++); | 545 | value = get_unaligned(ptr.p32s++); |
519 | else | 546 | else |
@@ -525,8 +552,10 @@ static unsigned long read_pointer(const u8 **pLoc, | |||
525 | BUILD_BUG_ON(sizeof(u32) != sizeof(value)); | 552 | BUILD_BUG_ON(sizeof(u32) != sizeof(value)); |
526 | #endif | 553 | #endif |
527 | case DW_EH_PE_native: | 554 | case DW_EH_PE_native: |
528 | if (end < (const void *)(ptr.pul + 1)) | 555 | if (end < (const void *)(ptr.pul + 1)) { |
556 | dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); | ||
529 | return 0; | 557 | return 0; |
558 | } | ||
530 | value = get_unaligned(ptr.pul++); | 559 | value = get_unaligned(ptr.pul++); |
531 | break; | 560 | break; |
532 | case DW_EH_PE_leb128: | 561 | case DW_EH_PE_leb128: |
@@ -534,10 +563,14 @@ static unsigned long read_pointer(const u8 **pLoc, | |||
534 | value = ptrType & DW_EH_PE_signed | 563 | value = ptrType & DW_EH_PE_signed |
535 | ? get_sleb128(&ptr.p8, end) | 564 | ? get_sleb128(&ptr.p8, end) |
536 | : get_uleb128(&ptr.p8, end); | 565 | : get_uleb128(&ptr.p8, end); |
537 | if ((const void *)ptr.p8 > end) | 566 | if ((const void *)ptr.p8 > end) { |
567 | dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); | ||
538 | return 0; | 568 | return 0; |
569 | } | ||
539 | break; | 570 | break; |
540 | default: | 571 | default: |
572 | dprintk(2, "Cannot decode pointer type %02X (%p,%p).", | ||
573 | ptrType, ptr.p8, end); | ||
541 | return 0; | 574 | return 0; |
542 | } | 575 | } |
543 | switch(ptrType & DW_EH_PE_ADJUST) { | 576 | switch(ptrType & DW_EH_PE_ADJUST) { |
@@ -546,12 +579,33 @@ static unsigned long read_pointer(const u8 **pLoc, | |||
546 | case DW_EH_PE_pcrel: | 579 | case DW_EH_PE_pcrel: |
547 | value += (unsigned long)*pLoc; | 580 | value += (unsigned long)*pLoc; |
548 | break; | 581 | break; |
582 | case DW_EH_PE_textrel: | ||
583 | if (likely(text_base)) { | ||
584 | value += text_base; | ||
585 | break; | ||
586 | } | ||
587 | dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", | ||
588 | ptrType, *pLoc, end); | ||
589 | return 0; | ||
590 | case DW_EH_PE_datarel: | ||
591 | if (likely(data_base)) { | ||
592 | value += data_base; | ||
593 | break; | ||
594 | } | ||
595 | dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", | ||
596 | ptrType, *pLoc, end); | ||
597 | return 0; | ||
549 | default: | 598 | default: |
599 | dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", | ||
600 | ptrType, *pLoc, end); | ||
550 | return 0; | 601 | return 0; |
551 | } | 602 | } |
552 | if ((ptrType & DW_EH_PE_indirect) | 603 | if ((ptrType & DW_EH_PE_indirect) |
553 | && __get_user(value, (unsigned long *)value)) | 604 | && probe_kernel_address((unsigned long *)value, value)) { |
605 | dprintk(1, "Cannot read indirect value %lx (%p,%p).", | ||
606 | value, *pLoc, end); | ||
554 | return 0; | 607 | return 0; |
608 | } | ||
555 | *pLoc = ptr.p8; | 609 | *pLoc = ptr.p8; |
556 | 610 | ||
557 | return value; | 611 | return value; |
@@ -594,7 +648,8 @@ static signed fde_pointer_type(const u32 *cie) | |||
594 | case 'P': { | 648 | case 'P': { |
595 | signed ptrType = *ptr++; | 649 | signed ptrType = *ptr++; |
596 | 650 | ||
597 | if (!read_pointer(&ptr, end, ptrType) || ptr > end) | 651 | if (!read_pointer(&ptr, end, ptrType, 0, 0) |
652 | || ptr > end) | ||
598 | return -1; | 653 | return -1; |
599 | } | 654 | } |
600 | break; | 655 | break; |
@@ -654,7 +709,8 @@ static int processCFI(const u8 *start, | |||
654 | case DW_CFA_nop: | 709 | case DW_CFA_nop: |
655 | break; | 710 | break; |
656 | case DW_CFA_set_loc: | 711 | case DW_CFA_set_loc: |
657 | if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) | 712 | state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); |
713 | if (state->loc == 0) | ||
658 | result = 0; | 714 | result = 0; |
659 | break; | 715 | break; |
660 | case DW_CFA_advance_loc1: | 716 | case DW_CFA_advance_loc1: |
@@ -700,8 +756,10 @@ static int processCFI(const u8 *start, | |||
700 | state->label = NULL; | 756 | state->label = NULL; |
701 | return 1; | 757 | return 1; |
702 | } | 758 | } |
703 | if (state->stackDepth >= MAX_STACK_DEPTH) | 759 | if (state->stackDepth >= MAX_STACK_DEPTH) { |
760 | dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); | ||
704 | return 0; | 761 | return 0; |
762 | } | ||
705 | state->stack[state->stackDepth++] = ptr.p8; | 763 | state->stack[state->stackDepth++] = ptr.p8; |
706 | break; | 764 | break; |
707 | case DW_CFA_restore_state: | 765 | case DW_CFA_restore_state: |
@@ -716,8 +774,10 @@ static int processCFI(const u8 *start, | |||
716 | result = processCFI(start, end, 0, ptrType, state); | 774 | result = processCFI(start, end, 0, ptrType, state); |
717 | state->loc = loc; | 775 | state->loc = loc; |
718 | state->label = label; | 776 | state->label = label; |
719 | } else | 777 | } else { |
778 | dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); | ||
720 | return 0; | 779 | return 0; |
780 | } | ||
721 | break; | 781 | break; |
722 | case DW_CFA_def_cfa: | 782 | case DW_CFA_def_cfa: |
723 | state->cfa.reg = get_uleb128(&ptr.p8, end); | 783 | state->cfa.reg = get_uleb128(&ptr.p8, end); |
@@ -749,6 +809,7 @@ static int processCFI(const u8 *start, | |||
749 | break; | 809 | break; |
750 | case DW_CFA_GNU_window_save: | 810 | case DW_CFA_GNU_window_save: |
751 | default: | 811 | default: |
812 | dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); | ||
752 | result = 0; | 813 | result = 0; |
753 | break; | 814 | break; |
754 | } | 815 | } |
@@ -764,12 +825,17 @@ static int processCFI(const u8 *start, | |||
764 | set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); | 825 | set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); |
765 | break; | 826 | break; |
766 | } | 827 | } |
767 | if (ptr.p8 > end) | 828 | if (ptr.p8 > end) { |
829 | dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); | ||
768 | result = 0; | 830 | result = 0; |
831 | } | ||
769 | if (result && targetLoc != 0 && targetLoc < state->loc) | 832 | if (result && targetLoc != 0 && targetLoc < state->loc) |
770 | return 1; | 833 | return 1; |
771 | } | 834 | } |
772 | 835 | ||
836 | if (result && ptr.p8 < end) | ||
837 | dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); | ||
838 | |||
773 | return result | 839 | return result |
774 | && ptr.p8 == end | 840 | && ptr.p8 == end |
775 | && (targetLoc == 0 | 841 | && (targetLoc == 0 |
@@ -786,7 +852,7 @@ int unwind(struct unwind_frame_info *frame) | |||
786 | #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) | 852 | #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) |
787 | const u32 *fde = NULL, *cie = NULL; | 853 | const u32 *fde = NULL, *cie = NULL; |
788 | const u8 *ptr = NULL, *end = NULL; | 854 | const u8 *ptr = NULL, *end = NULL; |
789 | unsigned long pc = UNW_PC(frame) - frame->call_frame; | 855 | unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; |
790 | unsigned long startLoc = 0, endLoc = 0, cfa; | 856 | unsigned long startLoc = 0, endLoc = 0, cfa; |
791 | unsigned i; | 857 | unsigned i; |
792 | signed ptrType = -1; | 858 | signed ptrType = -1; |
@@ -813,9 +879,9 @@ int unwind(struct unwind_frame_info *frame) | |||
813 | ptr = hdr + 4; | 879 | ptr = hdr + 4; |
814 | end = hdr + table->hdrsz; | 880 | end = hdr + table->hdrsz; |
815 | if (tableSize | 881 | if (tableSize |
816 | && read_pointer(&ptr, end, hdr[1]) | 882 | && read_pointer(&ptr, end, hdr[1], 0, 0) |
817 | == (unsigned long)table->address | 883 | == (unsigned long)table->address |
818 | && (i = read_pointer(&ptr, end, hdr[2])) > 0 | 884 | && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 |
819 | && i == (end - ptr) / (2 * tableSize) | 885 | && i == (end - ptr) / (2 * tableSize) |
820 | && !((end - ptr) % (2 * tableSize))) { | 886 | && !((end - ptr) % (2 * tableSize))) { |
821 | do { | 887 | do { |
@@ -823,7 +889,8 @@ int unwind(struct unwind_frame_info *frame) | |||
823 | 889 | ||
824 | startLoc = read_pointer(&cur, | 890 | startLoc = read_pointer(&cur, |
825 | cur + tableSize, | 891 | cur + tableSize, |
826 | hdr[3]); | 892 | hdr[3], 0, |
893 | (unsigned long)hdr); | ||
827 | if (pc < startLoc) | 894 | if (pc < startLoc) |
828 | i /= 2; | 895 | i /= 2; |
829 | else { | 896 | else { |
@@ -834,13 +901,17 @@ int unwind(struct unwind_frame_info *frame) | |||
834 | if (i == 1 | 901 | if (i == 1 |
835 | && (startLoc = read_pointer(&ptr, | 902 | && (startLoc = read_pointer(&ptr, |
836 | ptr + tableSize, | 903 | ptr + tableSize, |
837 | hdr[3])) != 0 | 904 | hdr[3], 0, |
905 | (unsigned long)hdr)) != 0 | ||
838 | && pc >= startLoc) | 906 | && pc >= startLoc) |
839 | fde = (void *)read_pointer(&ptr, | 907 | fde = (void *)read_pointer(&ptr, |
840 | ptr + tableSize, | 908 | ptr + tableSize, |
841 | hdr[3]); | 909 | hdr[3], 0, |
910 | (unsigned long)hdr); | ||
842 | } | 911 | } |
843 | } | 912 | } |
913 | if(hdr && !fde) | ||
914 | dprintk(3, "Binary lookup for %lx failed.", pc); | ||
844 | 915 | ||
845 | if (fde != NULL) { | 916 | if (fde != NULL) { |
846 | cie = cie_for_fde(fde, table); | 917 | cie = cie_for_fde(fde, table); |
@@ -851,17 +922,19 @@ int unwind(struct unwind_frame_info *frame) | |||
851 | && (ptrType = fde_pointer_type(cie)) >= 0 | 922 | && (ptrType = fde_pointer_type(cie)) >= 0 |
852 | && read_pointer(&ptr, | 923 | && read_pointer(&ptr, |
853 | (const u8 *)(fde + 1) + *fde, | 924 | (const u8 *)(fde + 1) + *fde, |
854 | ptrType) == startLoc) { | 925 | ptrType, 0, 0) == startLoc) { |
855 | if (!(ptrType & DW_EH_PE_indirect)) | 926 | if (!(ptrType & DW_EH_PE_indirect)) |
856 | ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; | 927 | ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; |
857 | endLoc = startLoc | 928 | endLoc = startLoc |
858 | + read_pointer(&ptr, | 929 | + read_pointer(&ptr, |
859 | (const u8 *)(fde + 1) + *fde, | 930 | (const u8 *)(fde + 1) + *fde, |
860 | ptrType); | 931 | ptrType, 0, 0); |
861 | if(pc >= endLoc) | 932 | if(pc >= endLoc) |
862 | fde = NULL; | 933 | fde = NULL; |
863 | } else | 934 | } else |
864 | fde = NULL; | 935 | fde = NULL; |
936 | if(!fde) | ||
937 | dprintk(1, "Binary lookup result for %lx discarded.", pc); | ||
865 | } | 938 | } |
866 | if (fde == NULL) { | 939 | if (fde == NULL) { |
867 | for (fde = table->address, tableSize = table->size; | 940 | for (fde = table->address, tableSize = table->size; |
@@ -881,7 +954,7 @@ int unwind(struct unwind_frame_info *frame) | |||
881 | ptr = (const u8 *)(fde + 2); | 954 | ptr = (const u8 *)(fde + 2); |
882 | startLoc = read_pointer(&ptr, | 955 | startLoc = read_pointer(&ptr, |
883 | (const u8 *)(fde + 1) + *fde, | 956 | (const u8 *)(fde + 1) + *fde, |
884 | ptrType); | 957 | ptrType, 0, 0); |
885 | if (!startLoc) | 958 | if (!startLoc) |
886 | continue; | 959 | continue; |
887 | if (!(ptrType & DW_EH_PE_indirect)) | 960 | if (!(ptrType & DW_EH_PE_indirect)) |
@@ -889,10 +962,12 @@ int unwind(struct unwind_frame_info *frame) | |||
889 | endLoc = startLoc | 962 | endLoc = startLoc |
890 | + read_pointer(&ptr, | 963 | + read_pointer(&ptr, |
891 | (const u8 *)(fde + 1) + *fde, | 964 | (const u8 *)(fde + 1) + *fde, |
892 | ptrType); | 965 | ptrType, 0, 0); |
893 | if (pc >= startLoc && pc < endLoc) | 966 | if (pc >= startLoc && pc < endLoc) |
894 | break; | 967 | break; |
895 | } | 968 | } |
969 | if(!fde) | ||
970 | dprintk(3, "Linear lookup for %lx failed.", pc); | ||
896 | } | 971 | } |
897 | } | 972 | } |
898 | if (cie != NULL) { | 973 | if (cie != NULL) { |
@@ -926,6 +1001,8 @@ int unwind(struct unwind_frame_info *frame) | |||
926 | if (ptr >= end || *ptr) | 1001 | if (ptr >= end || *ptr) |
927 | cie = NULL; | 1002 | cie = NULL; |
928 | } | 1003 | } |
1004 | if(!cie) | ||
1005 | dprintk(1, "CIE unusable (%p,%p).", ptr, end); | ||
929 | ++ptr; | 1006 | ++ptr; |
930 | } | 1007 | } |
931 | if (cie != NULL) { | 1008 | if (cie != NULL) { |
@@ -935,7 +1012,12 @@ int unwind(struct unwind_frame_info *frame) | |||
935 | state.dataAlign = get_sleb128(&ptr, end); | 1012 | state.dataAlign = get_sleb128(&ptr, end); |
936 | if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) | 1013 | if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) |
937 | cie = NULL; | 1014 | cie = NULL; |
938 | else { | 1015 | else if (UNW_PC(frame) % state.codeAlign |
1016 | || UNW_SP(frame) % sleb128abs(state.dataAlign)) { | ||
1017 | dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", | ||
1018 | UNW_PC(frame), UNW_SP(frame)); | ||
1019 | return -EPERM; | ||
1020 | } else { | ||
939 | retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); | 1021 | retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); |
940 | /* skip augmentation */ | 1022 | /* skip augmentation */ |
941 | if (((const char *)(cie + 2))[1] == 'z') { | 1023 | if (((const char *)(cie + 2))[1] == 'z') { |
@@ -949,6 +1031,8 @@ int unwind(struct unwind_frame_info *frame) | |||
949 | || reg_info[retAddrReg].width != sizeof(unsigned long)) | 1031 | || reg_info[retAddrReg].width != sizeof(unsigned long)) |
950 | cie = NULL; | 1032 | cie = NULL; |
951 | } | 1033 | } |
1034 | if(!cie) | ||
1035 | dprintk(1, "CIE validation failed (%p,%p).", ptr, end); | ||
952 | } | 1036 | } |
953 | if (cie != NULL) { | 1037 | if (cie != NULL) { |
954 | state.cieStart = ptr; | 1038 | state.cieStart = ptr; |
@@ -962,11 +1046,15 @@ int unwind(struct unwind_frame_info *frame) | |||
962 | if ((ptr += augSize) > end) | 1046 | if ((ptr += augSize) > end) |
963 | fde = NULL; | 1047 | fde = NULL; |
964 | } | 1048 | } |
1049 | if(!fde) | ||
1050 | dprintk(1, "FDE validation failed (%p,%p).", ptr, end); | ||
965 | } | 1051 | } |
966 | if (cie == NULL || fde == NULL) { | 1052 | if (cie == NULL || fde == NULL) { |
967 | #ifdef CONFIG_FRAME_POINTER | 1053 | #ifdef CONFIG_FRAME_POINTER |
968 | unsigned long top, bottom; | 1054 | unsigned long top, bottom; |
969 | 1055 | ||
1056 | if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) | ||
1057 | return -EPERM; | ||
970 | top = STACK_TOP(frame->task); | 1058 | top = STACK_TOP(frame->task); |
971 | bottom = STACK_BOTTOM(frame->task); | 1059 | bottom = STACK_BOTTOM(frame->task); |
972 | # if FRAME_RETADDR_OFFSET < 0 | 1060 | # if FRAME_RETADDR_OFFSET < 0 |
@@ -982,18 +1070,19 @@ int unwind(struct unwind_frame_info *frame) | |||
982 | & (sizeof(unsigned long) - 1))) { | 1070 | & (sizeof(unsigned long) - 1))) { |
983 | unsigned long link; | 1071 | unsigned long link; |
984 | 1072 | ||
985 | if (!__get_user(link, | 1073 | if (!probe_kernel_address( |
986 | (unsigned long *)(UNW_FP(frame) | 1074 | (unsigned long *)(UNW_FP(frame) |
987 | + FRAME_LINK_OFFSET)) | 1075 | + FRAME_LINK_OFFSET), |
1076 | link) | ||
988 | # if FRAME_RETADDR_OFFSET < 0 | 1077 | # if FRAME_RETADDR_OFFSET < 0 |
989 | && link > bottom && link < UNW_FP(frame) | 1078 | && link > bottom && link < UNW_FP(frame) |
990 | # else | 1079 | # else |
991 | && link > UNW_FP(frame) && link < bottom | 1080 | && link > UNW_FP(frame) && link < bottom |
992 | # endif | 1081 | # endif |
993 | && !(link & (sizeof(link) - 1)) | 1082 | && !(link & (sizeof(link) - 1)) |
994 | && !__get_user(UNW_PC(frame), | 1083 | && !probe_kernel_address( |
995 | (unsigned long *)(UNW_FP(frame) | 1084 | (unsigned long *)(UNW_FP(frame) |
996 | + FRAME_RETADDR_OFFSET))) { | 1085 | + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { |
997 | UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET | 1086 | UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET |
998 | # if FRAME_RETADDR_OFFSET < 0 | 1087 | # if FRAME_RETADDR_OFFSET < 0 |
999 | - | 1088 | - |
@@ -1016,8 +1105,11 @@ int unwind(struct unwind_frame_info *frame) | |||
1016 | || state.regs[retAddrReg].where == Nowhere | 1105 | || state.regs[retAddrReg].where == Nowhere |
1017 | || state.cfa.reg >= ARRAY_SIZE(reg_info) | 1106 | || state.cfa.reg >= ARRAY_SIZE(reg_info) |
1018 | || reg_info[state.cfa.reg].width != sizeof(unsigned long) | 1107 | || reg_info[state.cfa.reg].width != sizeof(unsigned long) |
1019 | || state.cfa.offs % sizeof(unsigned long)) | 1108 | || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) |
1109 | || state.cfa.offs % sizeof(unsigned long)) { | ||
1110 | dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); | ||
1020 | return -EIO; | 1111 | return -EIO; |
1112 | } | ||
1021 | /* update frame */ | 1113 | /* update frame */ |
1022 | #ifndef CONFIG_AS_CFI_SIGNAL_FRAME | 1114 | #ifndef CONFIG_AS_CFI_SIGNAL_FRAME |
1023 | if(frame->call_frame | 1115 | if(frame->call_frame |
@@ -1036,10 +1128,14 @@ int unwind(struct unwind_frame_info *frame) | |||
1036 | #else | 1128 | #else |
1037 | # define CASES CASE(8); CASE(16); CASE(32); CASE(64) | 1129 | # define CASES CASE(8); CASE(16); CASE(32); CASE(64) |
1038 | #endif | 1130 | #endif |
1131 | pc = UNW_PC(frame); | ||
1132 | sp = UNW_SP(frame); | ||
1039 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | 1133 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { |
1040 | if (REG_INVALID(i)) { | 1134 | if (REG_INVALID(i)) { |
1041 | if (state.regs[i].where == Nowhere) | 1135 | if (state.regs[i].where == Nowhere) |
1042 | continue; | 1136 | continue; |
1137 | dprintk(1, "Cannot restore register %u (%d).", | ||
1138 | i, state.regs[i].where); | ||
1043 | return -EIO; | 1139 | return -EIO; |
1044 | } | 1140 | } |
1045 | switch(state.regs[i].where) { | 1141 | switch(state.regs[i].where) { |
@@ -1048,8 +1144,11 @@ int unwind(struct unwind_frame_info *frame) | |||
1048 | case Register: | 1144 | case Register: |
1049 | if (state.regs[i].value >= ARRAY_SIZE(reg_info) | 1145 | if (state.regs[i].value >= ARRAY_SIZE(reg_info) |
1050 | || REG_INVALID(state.regs[i].value) | 1146 | || REG_INVALID(state.regs[i].value) |
1051 | || reg_info[i].width > reg_info[state.regs[i].value].width) | 1147 | || reg_info[i].width > reg_info[state.regs[i].value].width) { |
1148 | dprintk(1, "Cannot restore register %u from register %lu.", | ||
1149 | i, state.regs[i].value); | ||
1052 | return -EIO; | 1150 | return -EIO; |
1151 | } | ||
1053 | switch(reg_info[state.regs[i].value].width) { | 1152 | switch(reg_info[state.regs[i].value].width) { |
1054 | #define CASE(n) \ | 1153 | #define CASE(n) \ |
1055 | case sizeof(u##n): \ | 1154 | case sizeof(u##n): \ |
@@ -1059,6 +1158,9 @@ int unwind(struct unwind_frame_info *frame) | |||
1059 | CASES; | 1158 | CASES; |
1060 | #undef CASE | 1159 | #undef CASE |
1061 | default: | 1160 | default: |
1161 | dprintk(1, "Unsupported register size %u (%lu).", | ||
1162 | reg_info[state.regs[i].value].width, | ||
1163 | state.regs[i].value); | ||
1062 | return -EIO; | 1164 | return -EIO; |
1063 | } | 1165 | } |
1064 | break; | 1166 | break; |
@@ -1083,12 +1185,17 @@ int unwind(struct unwind_frame_info *frame) | |||
1083 | CASES; | 1185 | CASES; |
1084 | #undef CASE | 1186 | #undef CASE |
1085 | default: | 1187 | default: |
1188 | dprintk(1, "Unsupported register size %u (%u).", | ||
1189 | reg_info[i].width, i); | ||
1086 | return -EIO; | 1190 | return -EIO; |
1087 | } | 1191 | } |
1088 | break; | 1192 | break; |
1089 | case Value: | 1193 | case Value: |
1090 | if (reg_info[i].width != sizeof(unsigned long)) | 1194 | if (reg_info[i].width != sizeof(unsigned long)) { |
1195 | dprintk(1, "Unsupported value size %u (%u).", | ||
1196 | reg_info[i].width, i); | ||
1091 | return -EIO; | 1197 | return -EIO; |
1198 | } | ||
1092 | FRAME_REG(i, unsigned long) = cfa + state.regs[i].value | 1199 | FRAME_REG(i, unsigned long) = cfa + state.regs[i].value |
1093 | * state.dataAlign; | 1200 | * state.dataAlign; |
1094 | break; | 1201 | break; |
@@ -1100,15 +1207,20 @@ int unwind(struct unwind_frame_info *frame) | |||
1100 | % sizeof(unsigned long) | 1207 | % sizeof(unsigned long) |
1101 | || addr < startLoc | 1208 | || addr < startLoc |
1102 | || addr + sizeof(unsigned long) < addr | 1209 | || addr + sizeof(unsigned long) < addr |
1103 | || addr + sizeof(unsigned long) > endLoc) | 1210 | || addr + sizeof(unsigned long) > endLoc) { |
1211 | dprintk(1, "Bad memory location %lx (%lx).", | ||
1212 | addr, state.regs[i].value); | ||
1104 | return -EIO; | 1213 | return -EIO; |
1214 | } | ||
1105 | switch(reg_info[i].width) { | 1215 | switch(reg_info[i].width) { |
1106 | #define CASE(n) case sizeof(u##n): \ | 1216 | #define CASE(n) case sizeof(u##n): \ |
1107 | __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ | 1217 | probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ |
1108 | break | 1218 | break |
1109 | CASES; | 1219 | CASES; |
1110 | #undef CASE | 1220 | #undef CASE |
1111 | default: | 1221 | default: |
1222 | dprintk(1, "Unsupported memory size %u (%u).", | ||
1223 | reg_info[i].width, i); | ||
1112 | return -EIO; | 1224 | return -EIO; |
1113 | } | 1225 | } |
1114 | } | 1226 | } |
@@ -1116,6 +1228,17 @@ int unwind(struct unwind_frame_info *frame) | |||
1116 | } | 1228 | } |
1117 | } | 1229 | } |
1118 | 1230 | ||
1231 | if (UNW_PC(frame) % state.codeAlign | ||
1232 | || UNW_SP(frame) % sleb128abs(state.dataAlign)) { | ||
1233 | dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", | ||
1234 | UNW_PC(frame), UNW_SP(frame)); | ||
1235 | return -EIO; | ||
1236 | } | ||
1237 | if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { | ||
1238 | dprintk(1, "No progress (%lx,%lx).", pc, sp); | ||
1239 | return -EIO; | ||
1240 | } | ||
1241 | |||
1119 | return 0; | 1242 | return 0; |
1120 | #undef CASES | 1243 | #undef CASES |
1121 | #undef FRAME_REG | 1244 | #undef FRAME_REG |
diff --git a/kernel/user.c b/kernel/user.c index 220e586127a0..4869563080e9 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) | 26 | #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) |
27 | #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) | 27 | #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) |
28 | 28 | ||
29 | static kmem_cache_t *uid_cachep; | 29 | static struct kmem_cache *uid_cachep; |
30 | static struct list_head uidhash_table[UIDHASH_SZ]; | 30 | static struct list_head uidhash_table[UIDHASH_SZ]; |
31 | 31 | ||
32 | /* | 32 | /* |
@@ -132,7 +132,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
132 | if (!up) { | 132 | if (!up) { |
133 | struct user_struct *new; | 133 | struct user_struct *new; |
134 | 134 | ||
135 | new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); | 135 | new = kmem_cache_alloc(uid_cachep, GFP_KERNEL); |
136 | if (!new) | 136 | if (!new) |
137 | return NULL; | 137 | return NULL; |
138 | new->uid = uid; | 138 | new->uid = uid; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8d1e7cb8a51a..6b186750e9be 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -29,6 +29,9 @@ | |||
29 | #include <linux/kthread.h> | 29 | #include <linux/kthread.h> |
30 | #include <linux/hardirq.h> | 30 | #include <linux/hardirq.h> |
31 | #include <linux/mempolicy.h> | 31 | #include <linux/mempolicy.h> |
32 | #include <linux/freezer.h> | ||
33 | #include <linux/kallsyms.h> | ||
34 | #include <linux/debug_locks.h> | ||
32 | 35 | ||
33 | /* | 36 | /* |
34 | * The per-CPU workqueue (if single thread, we always use the first | 37 | * The per-CPU workqueue (if single thread, we always use the first |
@@ -55,6 +58,8 @@ struct cpu_workqueue_struct { | |||
55 | struct task_struct *thread; | 58 | struct task_struct *thread; |
56 | 59 | ||
57 | int run_depth; /* Detect run_workqueue() recursion depth */ | 60 | int run_depth; /* Detect run_workqueue() recursion depth */ |
61 | |||
62 | int freezeable; /* Freeze the thread during suspend */ | ||
58 | } ____cacheline_aligned; | 63 | } ____cacheline_aligned; |
59 | 64 | ||
60 | /* | 65 | /* |
@@ -103,6 +108,79 @@ static inline void *get_wq_data(struct work_struct *work) | |||
103 | return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); | 108 | return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); |
104 | } | 109 | } |
105 | 110 | ||
111 | static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) | ||
112 | { | ||
113 | int ret = 0; | ||
114 | unsigned long flags; | ||
115 | |||
116 | spin_lock_irqsave(&cwq->lock, flags); | ||
117 | /* | ||
118 | * We need to re-validate the work info after we've gotten | ||
119 | * the cpu_workqueue lock. We can run the work now iff: | ||
120 | * | ||
121 | * - the wq_data still matches the cpu_workqueue_struct | ||
122 | * - AND the work is still marked pending | ||
123 | * - AND the work is still on a list (which will be this | ||
124 | * workqueue_struct list) | ||
125 | * | ||
126 | * All these conditions are important, because we | ||
127 | * need to protect against the work being run right | ||
128 | * now on another CPU (all but the last one might be | ||
129 | * true if it's currently running and has not been | ||
130 | * released yet, for example). | ||
131 | */ | ||
132 | if (get_wq_data(work) == cwq | ||
133 | && work_pending(work) | ||
134 | && !list_empty(&work->entry)) { | ||
135 | work_func_t f = work->func; | ||
136 | list_del_init(&work->entry); | ||
137 | spin_unlock_irqrestore(&cwq->lock, flags); | ||
138 | |||
139 | if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) | ||
140 | work_release(work); | ||
141 | f(work); | ||
142 | |||
143 | spin_lock_irqsave(&cwq->lock, flags); | ||
144 | cwq->remove_sequence++; | ||
145 | wake_up(&cwq->work_done); | ||
146 | ret = 1; | ||
147 | } | ||
148 | spin_unlock_irqrestore(&cwq->lock, flags); | ||
149 | return ret; | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * run_scheduled_work - run scheduled work synchronously | ||
154 | * @work: work to run | ||
155 | * | ||
156 | * This checks if the work was pending, and runs it | ||
157 | * synchronously if so. It returns a boolean to indicate | ||
158 | * whether it had any scheduled work to run or not. | ||
159 | * | ||
160 | * NOTE! This _only_ works for normal work_structs. You | ||
161 | * CANNOT use this for delayed work, because the wq data | ||
162 | * for delayed work will not point properly to the per- | ||
163 | * CPU workqueue struct, but will change! | ||
164 | */ | ||
165 | int fastcall run_scheduled_work(struct work_struct *work) | ||
166 | { | ||
167 | for (;;) { | ||
168 | struct cpu_workqueue_struct *cwq; | ||
169 | |||
170 | if (!work_pending(work)) | ||
171 | return 0; | ||
172 | if (list_empty(&work->entry)) | ||
173 | return 0; | ||
174 | /* NOTE! This depends intimately on __queue_work! */ | ||
175 | cwq = get_wq_data(work); | ||
176 | if (!cwq) | ||
177 | return 0; | ||
178 | if (__run_work(cwq, work)) | ||
179 | return 1; | ||
180 | } | ||
181 | } | ||
182 | EXPORT_SYMBOL(run_scheduled_work); | ||
183 | |||
106 | /* Preempt must be disabled. */ | 184 | /* Preempt must be disabled. */ |
107 | static void __queue_work(struct cpu_workqueue_struct *cwq, | 185 | static void __queue_work(struct cpu_workqueue_struct *cwq, |
108 | struct work_struct *work) | 186 | struct work_struct *work) |
@@ -250,6 +328,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
250 | work_release(work); | 328 | work_release(work); |
251 | f(work); | 329 | f(work); |
252 | 330 | ||
331 | if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { | ||
332 | printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " | ||
333 | "%s/0x%08x/%d\n", | ||
334 | current->comm, preempt_count(), | ||
335 | current->pid); | ||
336 | printk(KERN_ERR " last function: "); | ||
337 | print_symbol("%s\n", (unsigned long)f); | ||
338 | debug_show_held_locks(current); | ||
339 | dump_stack(); | ||
340 | } | ||
341 | |||
253 | spin_lock_irqsave(&cwq->lock, flags); | 342 | spin_lock_irqsave(&cwq->lock, flags); |
254 | cwq->remove_sequence++; | 343 | cwq->remove_sequence++; |
255 | wake_up(&cwq->work_done); | 344 | wake_up(&cwq->work_done); |
@@ -265,7 +354,8 @@ static int worker_thread(void *__cwq) | |||
265 | struct k_sigaction sa; | 354 | struct k_sigaction sa; |
266 | sigset_t blocked; | 355 | sigset_t blocked; |
267 | 356 | ||
268 | current->flags |= PF_NOFREEZE; | 357 | if (!cwq->freezeable) |
358 | current->flags |= PF_NOFREEZE; | ||
269 | 359 | ||
270 | set_user_nice(current, -5); | 360 | set_user_nice(current, -5); |
271 | 361 | ||
@@ -288,6 +378,9 @@ static int worker_thread(void *__cwq) | |||
288 | 378 | ||
289 | set_current_state(TASK_INTERRUPTIBLE); | 379 | set_current_state(TASK_INTERRUPTIBLE); |
290 | while (!kthread_should_stop()) { | 380 | while (!kthread_should_stop()) { |
381 | if (cwq->freezeable) | ||
382 | try_to_freeze(); | ||
383 | |||
291 | add_wait_queue(&cwq->more_work, &wait); | 384 | add_wait_queue(&cwq->more_work, &wait); |
292 | if (list_empty(&cwq->worklist)) | 385 | if (list_empty(&cwq->worklist)) |
293 | schedule(); | 386 | schedule(); |
@@ -364,7 +457,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) | |||
364 | EXPORT_SYMBOL_GPL(flush_workqueue); | 457 | EXPORT_SYMBOL_GPL(flush_workqueue); |
365 | 458 | ||
366 | static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, | 459 | static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, |
367 | int cpu) | 460 | int cpu, int freezeable) |
368 | { | 461 | { |
369 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 462 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
370 | struct task_struct *p; | 463 | struct task_struct *p; |
@@ -374,6 +467,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, | |||
374 | cwq->thread = NULL; | 467 | cwq->thread = NULL; |
375 | cwq->insert_sequence = 0; | 468 | cwq->insert_sequence = 0; |
376 | cwq->remove_sequence = 0; | 469 | cwq->remove_sequence = 0; |
470 | cwq->freezeable = freezeable; | ||
377 | INIT_LIST_HEAD(&cwq->worklist); | 471 | INIT_LIST_HEAD(&cwq->worklist); |
378 | init_waitqueue_head(&cwq->more_work); | 472 | init_waitqueue_head(&cwq->more_work); |
379 | init_waitqueue_head(&cwq->work_done); | 473 | init_waitqueue_head(&cwq->work_done); |
@@ -389,7 +483,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, | |||
389 | } | 483 | } |
390 | 484 | ||
391 | struct workqueue_struct *__create_workqueue(const char *name, | 485 | struct workqueue_struct *__create_workqueue(const char *name, |
392 | int singlethread) | 486 | int singlethread, int freezeable) |
393 | { | 487 | { |
394 | int cpu, destroy = 0; | 488 | int cpu, destroy = 0; |
395 | struct workqueue_struct *wq; | 489 | struct workqueue_struct *wq; |
@@ -409,7 +503,7 @@ struct workqueue_struct *__create_workqueue(const char *name, | |||
409 | mutex_lock(&workqueue_mutex); | 503 | mutex_lock(&workqueue_mutex); |
410 | if (singlethread) { | 504 | if (singlethread) { |
411 | INIT_LIST_HEAD(&wq->list); | 505 | INIT_LIST_HEAD(&wq->list); |
412 | p = create_workqueue_thread(wq, singlethread_cpu); | 506 | p = create_workqueue_thread(wq, singlethread_cpu, freezeable); |
413 | if (!p) | 507 | if (!p) |
414 | destroy = 1; | 508 | destroy = 1; |
415 | else | 509 | else |
@@ -417,7 +511,7 @@ struct workqueue_struct *__create_workqueue(const char *name, | |||
417 | } else { | 511 | } else { |
418 | list_add(&wq->list, &workqueues); | 512 | list_add(&wq->list, &workqueues); |
419 | for_each_online_cpu(cpu) { | 513 | for_each_online_cpu(cpu) { |
420 | p = create_workqueue_thread(wq, cpu); | 514 | p = create_workqueue_thread(wq, cpu, freezeable); |
421 | if (p) { | 515 | if (p) { |
422 | kthread_bind(p, cpu); | 516 | kthread_bind(p, cpu); |
423 | wake_up_process(p); | 517 | wake_up_process(p); |
@@ -634,7 +728,6 @@ int current_is_keventd(void) | |||
634 | 728 | ||
635 | } | 729 | } |
636 | 730 | ||
637 | #ifdef CONFIG_HOTPLUG_CPU | ||
638 | /* Take the work from this (downed) CPU. */ | 731 | /* Take the work from this (downed) CPU. */ |
639 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | 732 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) |
640 | { | 733 | { |
@@ -667,7 +760,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
667 | mutex_lock(&workqueue_mutex); | 760 | mutex_lock(&workqueue_mutex); |
668 | /* Create a new workqueue thread for it. */ | 761 | /* Create a new workqueue thread for it. */ |
669 | list_for_each_entry(wq, &workqueues, list) { | 762 | list_for_each_entry(wq, &workqueues, list) { |
670 | if (!create_workqueue_thread(wq, hotcpu)) { | 763 | if (!create_workqueue_thread(wq, hotcpu, 0)) { |
671 | printk("workqueue for %i failed\n", hotcpu); | 764 | printk("workqueue for %i failed\n", hotcpu); |
672 | return NOTIFY_BAD; | 765 | return NOTIFY_BAD; |
673 | } | 766 | } |
@@ -717,7 +810,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
717 | 810 | ||
718 | return NOTIFY_OK; | 811 | return NOTIFY_OK; |
719 | } | 812 | } |
720 | #endif | ||
721 | 813 | ||
722 | void init_workqueues(void) | 814 | void init_workqueues(void) |
723 | { | 815 | { |