diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/auditsc.c | 1 | ||||
-rw-r--r-- | kernel/compat.c | 23 | ||||
-rw-r--r-- | kernel/cpuset.c | 16 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/futex.c | 8 | ||||
-rw-r--r-- | kernel/irq/handle.c | 5 | ||||
-rw-r--r-- | kernel/irq/migration.c | 4 | ||||
-rw-r--r-- | kernel/irq/proc.c | 3 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 12 | ||||
-rw-r--r-- | kernel/kexec.c | 6 | ||||
-rw-r--r-- | kernel/ksysfs.c | 19 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/power.h | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 260 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 32 | ||||
-rw-r--r-- | kernel/rcupdate.c | 13 | ||||
-rw-r--r-- | kernel/sched.c | 12 | ||||
-rw-r--r-- | kernel/sys.c | 70 | ||||
-rw-r--r-- | kernel/sys_ni.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | kernel/timer.c | 30 | ||||
-rw-r--r-- | kernel/user.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 4 |
25 files changed, 378 insertions, 175 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index b327f4d20104..6802020e0ceb 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file) | |||
118 | spin_unlock(&acct_globals.lock); | 118 | spin_unlock(&acct_globals.lock); |
119 | 119 | ||
120 | /* May block */ | 120 | /* May block */ |
121 | if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) | 121 | if (vfs_statfs(file->f_dentry, &sbuf)) |
122 | return res; | 122 | return res; |
123 | suspend = sbuf.f_blocks * SUSPEND; | 123 | suspend = sbuf.f_blocks * SUSPEND; |
124 | resume = sbuf.f_blocks * RESUME; | 124 | resume = sbuf.f_blocks * RESUME; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b097ccb4eb7e..9ebd96fda295 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1558,6 +1558,7 @@ int __audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
1558 | * @uid: msgq user id | 1558 | * @uid: msgq user id |
1559 | * @gid: msgq group id | 1559 | * @gid: msgq group id |
1560 | * @mode: msgq mode (permissions) | 1560 | * @mode: msgq mode (permissions) |
1561 | * @ipcp: in-kernel IPC permissions | ||
1561 | * | 1562 | * |
1562 | * Returns 0 for success or NULL context or < 0 on error. | 1563 | * Returns 0 for success or NULL context or < 0 on error. |
1563 | */ | 1564 | */ |
diff --git a/kernel/compat.c b/kernel/compat.c index c1601a84f8d8..2f672332430f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/unistd.h> | 21 | #include <linux/unistd.h> |
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/migrate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | 27 | ||
@@ -934,3 +935,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | |||
934 | 935 | ||
935 | return ret; | 936 | return ret; |
936 | } | 937 | } |
938 | |||
939 | #ifdef CONFIG_NUMA | ||
940 | asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
941 | compat_uptr_t __user *pages32, | ||
942 | const int __user *nodes, | ||
943 | int __user *status, | ||
944 | int flags) | ||
945 | { | ||
946 | const void __user * __user *pages; | ||
947 | int i; | ||
948 | |||
949 | pages = compat_alloc_user_space(nr_pages * sizeof(void *)); | ||
950 | for (i = 0; i < nr_pages; i++) { | ||
951 | compat_uptr_t p; | ||
952 | |||
953 | if (get_user(p, pages32 + i) || | ||
954 | put_user(compat_ptr(p), pages + i)) | ||
955 | return -EFAULT; | ||
956 | } | ||
957 | return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); | ||
958 | } | ||
959 | #endif | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ab81fdd4572b..b602f73fb38d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/security.h> | ||
44 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
45 | #include <linux/smp_lock.h> | 46 | #include <linux/smp_lock.h> |
46 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, | |||
392 | return 0; | 393 | return 0; |
393 | } | 394 | } |
394 | 395 | ||
395 | static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, | 396 | static int cpuset_get_sb(struct file_system_type *fs_type, |
396 | int flags, const char *unused_dev_name, | 397 | int flags, const char *unused_dev_name, |
397 | void *data) | 398 | void *data, struct vfsmount *mnt) |
398 | { | 399 | { |
399 | return get_sb_single(fs_type, flags, data, cpuset_fill_super); | 400 | return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); |
400 | } | 401 | } |
401 | 402 | ||
402 | static struct file_system_type cpuset_fs_type = { | 403 | static struct file_system_type cpuset_fs_type = { |
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1177 | cpumask_t cpus; | 1178 | cpumask_t cpus; |
1178 | nodemask_t from, to; | 1179 | nodemask_t from, to; |
1179 | struct mm_struct *mm; | 1180 | struct mm_struct *mm; |
1181 | int retval; | ||
1180 | 1182 | ||
1181 | if (sscanf(pidbuf, "%d", &pid) != 1) | 1183 | if (sscanf(pidbuf, "%d", &pid) != 1) |
1182 | return -EIO; | 1184 | return -EIO; |
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1205 | get_task_struct(tsk); | 1207 | get_task_struct(tsk); |
1206 | } | 1208 | } |
1207 | 1209 | ||
1210 | retval = security_task_setscheduler(tsk, 0, NULL); | ||
1211 | if (retval) { | ||
1212 | put_task_struct(tsk); | ||
1213 | return retval; | ||
1214 | } | ||
1215 | |||
1208 | mutex_lock(&callback_mutex); | 1216 | mutex_lock(&callback_mutex); |
1209 | 1217 | ||
1210 | task_lock(tsk); | 1218 | task_lock(tsk); |
diff --git a/kernel/exit.c b/kernel/exit.c index e06d0c10a24e..a3baf92462bd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -579,7 +579,7 @@ static void exit_mm(struct task_struct * tsk) | |||
579 | down_read(&mm->mmap_sem); | 579 | down_read(&mm->mmap_sem); |
580 | } | 580 | } |
581 | atomic_inc(&mm->mm_count); | 581 | atomic_inc(&mm->mm_count); |
582 | if (mm != tsk->active_mm) BUG(); | 582 | BUG_ON(mm != tsk->active_mm); |
583 | /* more a memory barrier than a real lock */ | 583 | /* more a memory barrier than a real lock */ |
584 | task_lock(tsk); | 584 | task_lock(tsk); |
585 | tsk->mm = NULL; | 585 | tsk->mm = NULL; |
@@ -1530,8 +1530,7 @@ check_continued: | |||
1530 | if (options & __WNOTHREAD) | 1530 | if (options & __WNOTHREAD) |
1531 | break; | 1531 | break; |
1532 | tsk = next_thread(tsk); | 1532 | tsk = next_thread(tsk); |
1533 | if (tsk->signal != current->signal) | 1533 | BUG_ON(tsk->signal != current->signal); |
1534 | BUG(); | ||
1535 | } while (tsk != current); | 1534 | } while (tsk != current); |
1536 | 1535 | ||
1537 | read_unlock(&tasklist_lock); | 1536 | read_unlock(&tasklist_lock); |
diff --git a/kernel/fork.c b/kernel/fork.c index ac8100e3088a..49adc0e8d47c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm) | |||
368 | */ | 368 | */ |
369 | void mmput(struct mm_struct *mm) | 369 | void mmput(struct mm_struct *mm) |
370 | { | 370 | { |
371 | might_sleep(); | ||
372 | |||
371 | if (atomic_dec_and_test(&mm->mm_users)) { | 373 | if (atomic_dec_and_test(&mm->mm_users)) { |
372 | exit_aio(mm); | 374 | exit_aio(mm); |
373 | exit_mmap(mm); | 375 | exit_mmap(mm); |
@@ -623,6 +625,7 @@ out: | |||
623 | /* | 625 | /* |
624 | * Allocate a new files structure and copy contents from the | 626 | * Allocate a new files structure and copy contents from the |
625 | * passed in files structure. | 627 | * passed in files structure. |
628 | * errorp will be valid only when the returned files_struct is NULL. | ||
626 | */ | 629 | */ |
627 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | 630 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) |
628 | { | 631 | { |
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
631 | int open_files, size, i, expand; | 634 | int open_files, size, i, expand; |
632 | struct fdtable *old_fdt, *new_fdt; | 635 | struct fdtable *old_fdt, *new_fdt; |
633 | 636 | ||
637 | *errorp = -ENOMEM; | ||
634 | newf = alloc_files(); | 638 | newf = alloc_files(); |
635 | if (!newf) | 639 | if (!newf) |
636 | goto out; | 640 | goto out; |
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
744 | * break this. | 748 | * break this. |
745 | */ | 749 | */ |
746 | tsk->files = NULL; | 750 | tsk->files = NULL; |
747 | error = -ENOMEM; | ||
748 | newf = dup_fd(oldf, &error); | 751 | newf = dup_fd(oldf, &error); |
749 | if (!newf) | 752 | if (!newf) |
750 | goto out; | 753 | goto out; |
diff --git a/kernel/futex.c b/kernel/futex.c index 5699c512057b..e1a380c77a5a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, | |||
1056 | (unsigned long)uaddr2, val2, val3); | 1056 | (unsigned long)uaddr2, val2, val3); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | static struct super_block * | 1059 | static int futexfs_get_sb(struct file_system_type *fs_type, |
1060 | futexfs_get_sb(struct file_system_type *fs_type, | 1060 | int flags, const char *dev_name, void *data, |
1061 | int flags, const char *dev_name, void *data) | 1061 | struct vfsmount *mnt) |
1062 | { | 1062 | { |
1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); | 1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt); |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | static struct file_system_type futex_fs_type = { | 1066 | static struct file_system_type futex_fs_type = { |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 51df337b37db..0f6530117105 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) | |||
76 | /* | 76 | /* |
77 | * Have got an event to handle: | 77 | * Have got an event to handle: |
78 | */ | 78 | */ |
79 | fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, | 79 | fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, |
80 | struct irqaction *action) | 80 | struct irqaction *action) |
81 | { | 81 | { |
82 | int ret, retval = 0, status = 0; | 82 | irqreturn_t ret, retval = IRQ_NONE; |
83 | unsigned int status = 0; | ||
83 | 84 | ||
84 | if (!(action->flags & SA_INTERRUPT)) | 85 | if (!(action->flags & SA_INTERRUPT)) |
85 | local_irq_enable(); | 86 | local_irq_enable(); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 134f9f2e0e39..a12d00eb5e7c 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -30,7 +30,7 @@ void move_native_irq(int irq) | |||
30 | 30 | ||
31 | desc->move_irq = 0; | 31 | desc->move_irq = 0; |
32 | 32 | ||
33 | if (likely(cpus_empty(pending_irq_cpumask[irq]))) | 33 | if (unlikely(cpus_empty(pending_irq_cpumask[irq]))) |
34 | return; | 34 | return; |
35 | 35 | ||
36 | if (!desc->handler->set_affinity) | 36 | if (!desc->handler->set_affinity) |
@@ -49,7 +49,7 @@ void move_native_irq(int irq) | |||
49 | * cause some ioapics to mal-function. | 49 | * cause some ioapics to mal-function. |
50 | * Being paranoid i guess! | 50 | * Being paranoid i guess! |
51 | */ | 51 | */ |
52 | if (unlikely(!cpus_empty(tmp))) { | 52 | if (likely(!cpus_empty(tmp))) { |
53 | if (likely(!(desc->status & IRQ_DISABLED))) | 53 | if (likely(!(desc->status & IRQ_DISABLED))) |
54 | desc->handler->disable(irq); | 54 | desc->handler->disable(irq); |
55 | 55 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d03b5eef8ce0..afacd6f585fa 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; | |||
24 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 24 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
26 | { | 26 | { |
27 | set_balance_irq_affinity(irq, mask_val); | ||
28 | |||
27 | /* | 29 | /* |
28 | * Save these away for later use. Re-progam when the | 30 | * Save these away for later use. Re-progam when the |
29 | * interrupt is pending | 31 | * interrupt is pending |
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | |||
33 | #else | 35 | #else |
34 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 36 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
35 | { | 37 | { |
38 | set_balance_irq_affinity(irq, mask_val); | ||
36 | irq_affinity[irq] = mask_val; | 39 | irq_affinity[irq] = mask_val; |
37 | irq_desc[irq].handler->set_affinity(irq, mask_val); | 40 | irq_desc[irq].handler->set_affinity(irq, mask_val); |
38 | } | 41 | } |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 7df9abd5ec86..b2fb3c18d06b 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/kallsyms.h> | 11 | #include <linux/kallsyms.h> |
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | 13 | ||
14 | static int irqfixup; | 14 | static int irqfixup __read_mostly; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * Recovery handler for misrouted interrupts. | 17 | * Recovery handler for misrouted interrupts. |
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio | |||
136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | 136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, |
137 | struct pt_regs *regs) | 137 | struct pt_regs *regs) |
138 | { | 138 | { |
139 | if (action_ret != IRQ_HANDLED) { | 139 | if (unlikely(action_ret != IRQ_HANDLED)) { |
140 | desc->irqs_unhandled++; | 140 | desc->irqs_unhandled++; |
141 | if (action_ret != IRQ_NONE) | 141 | if (unlikely(action_ret != IRQ_NONE)) |
142 | report_bad_irq(irq, desc, action_ret); | 142 | report_bad_irq(irq, desc, action_ret); |
143 | } | 143 | } |
144 | 144 | ||
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
152 | } | 152 | } |
153 | 153 | ||
154 | desc->irq_count++; | 154 | desc->irq_count++; |
155 | if (desc->irq_count < 100000) | 155 | if (likely(desc->irq_count < 100000)) |
156 | return; | 156 | return; |
157 | 157 | ||
158 | desc->irq_count = 0; | 158 | desc->irq_count = 0; |
159 | if (desc->irqs_unhandled > 99900) { | 159 | if (unlikely(desc->irqs_unhandled > 99900)) { |
160 | /* | 160 | /* |
161 | * The interrupt is stuck | 161 | * The interrupt is stuck |
162 | */ | 162 | */ |
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
171 | desc->irqs_unhandled = 0; | 171 | desc->irqs_unhandled = 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | int noirqdebug; | 174 | int noirqdebug __read_mostly; |
175 | 175 | ||
176 | int __init noirqdebug_setup(char *str) | 176 | int __init noirqdebug_setup(char *str) |
177 | { | 177 | { |
diff --git a/kernel/kexec.c b/kernel/kexec.c index bf39d28e4c0e..58f0f382597c 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image, | |||
902 | * kexec does not sync, or unmount filesystems so if you need | 902 | * kexec does not sync, or unmount filesystems so if you need |
903 | * that to happen you need to do that yourself. | 903 | * that to happen you need to do that yourself. |
904 | */ | 904 | */ |
905 | struct kimage *kexec_image = NULL; | 905 | struct kimage *kexec_image; |
906 | static struct kimage *kexec_crash_image = NULL; | 906 | struct kimage *kexec_crash_image; |
907 | /* | 907 | /* |
908 | * A home grown binary mutex. | 908 | * A home grown binary mutex. |
909 | * Nothing can wait so this mutex is safe to use | 909 | * Nothing can wait so this mutex is safe to use |
910 | * in interrupt context :) | 910 | * in interrupt context :) |
911 | */ | 911 | */ |
912 | static int kexec_lock = 0; | 912 | static int kexec_lock; |
913 | 913 | ||
914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, | 914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, |
915 | struct kexec_segment __user *segments, | 915 | struct kexec_segment __user *segments, |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index f119e098e67b..9e28478a17a5 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sysfs.h> | 14 | #include <linux/sysfs.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/kexec.h> | ||
17 | 18 | ||
18 | #define KERNEL_ATTR_RO(_name) \ | 19 | #define KERNEL_ATTR_RO(_name) \ |
19 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) | 20 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) |
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s | |||
48 | KERNEL_ATTR_RW(uevent_helper); | 49 | KERNEL_ATTR_RW(uevent_helper); |
49 | #endif | 50 | #endif |
50 | 51 | ||
52 | #ifdef CONFIG_KEXEC | ||
53 | static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) | ||
54 | { | ||
55 | return sprintf(page, "%d\n", !!kexec_image); | ||
56 | } | ||
57 | KERNEL_ATTR_RO(kexec_loaded); | ||
58 | |||
59 | static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) | ||
60 | { | ||
61 | return sprintf(page, "%d\n", !!kexec_crash_image); | ||
62 | } | ||
63 | KERNEL_ATTR_RO(kexec_crash_loaded); | ||
64 | #endif /* CONFIG_KEXEC */ | ||
65 | |||
51 | decl_subsys(kernel, NULL, NULL); | 66 | decl_subsys(kernel, NULL, NULL); |
52 | EXPORT_SYMBOL_GPL(kernel_subsys); | 67 | EXPORT_SYMBOL_GPL(kernel_subsys); |
53 | 68 | ||
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = { | |||
56 | &uevent_seqnum_attr.attr, | 71 | &uevent_seqnum_attr.attr, |
57 | &uevent_helper_attr.attr, | 72 | &uevent_helper_attr.attr, |
58 | #endif | 73 | #endif |
74 | #ifdef CONFIG_KEXEC | ||
75 | &kexec_loaded_attr.attr, | ||
76 | &kexec_crash_loaded_attr.attr, | ||
77 | #endif | ||
59 | NULL | 78 | NULL |
60 | }; | 79 | }; |
61 | 80 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 0a907f0dc56b..cdf0f07af92f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
18 | 18 | #include <linux/console.h> | |
19 | 19 | ||
20 | #include "power.h" | 20 | #include "power.h" |
21 | 21 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index f06f12f21767..98c41423f3b1 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -55,7 +55,7 @@ struct snapshot_handle { | |||
55 | unsigned int page; | 55 | unsigned int page; |
56 | unsigned int page_offset; | 56 | unsigned int page_offset; |
57 | unsigned int prev; | 57 | unsigned int prev; |
58 | struct pbe *pbe; | 58 | struct pbe *pbe, *last_pbe; |
59 | void *buffer; | 59 | void *buffer; |
60 | unsigned int buf_offset; | 60 | unsigned int buf_offset; |
61 | }; | 61 | }; |
@@ -105,6 +105,10 @@ extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); | |||
105 | extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); | 105 | extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); |
106 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); | 106 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); |
107 | 107 | ||
108 | extern unsigned int count_special_pages(void); | ||
109 | extern int save_special_mem(void); | ||
110 | extern int restore_special_mem(void); | ||
111 | |||
108 | extern int swsusp_check(void); | 112 | extern int swsusp_check(void); |
109 | extern int swsusp_shrink_memory(void); | 113 | extern int swsusp_shrink_memory(void); |
110 | extern void swsusp_free(void); | 114 | extern void swsusp_free(void); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3eeedbb13b78..3d9284100b22 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -39,8 +39,90 @@ static unsigned int nr_copy_pages; | |||
39 | static unsigned int nr_meta_pages; | 39 | static unsigned int nr_meta_pages; |
40 | static unsigned long *buffer; | 40 | static unsigned long *buffer; |
41 | 41 | ||
42 | struct arch_saveable_page { | ||
43 | unsigned long start; | ||
44 | unsigned long end; | ||
45 | char *data; | ||
46 | struct arch_saveable_page *next; | ||
47 | }; | ||
48 | static struct arch_saveable_page *arch_pages; | ||
49 | |||
50 | int swsusp_add_arch_pages(unsigned long start, unsigned long end) | ||
51 | { | ||
52 | struct arch_saveable_page *tmp; | ||
53 | |||
54 | while (start < end) { | ||
55 | tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL); | ||
56 | if (!tmp) | ||
57 | return -ENOMEM; | ||
58 | tmp->start = start; | ||
59 | tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT; | ||
60 | if (tmp->end > end) | ||
61 | tmp->end = end; | ||
62 | tmp->next = arch_pages; | ||
63 | start = tmp->end; | ||
64 | arch_pages = tmp; | ||
65 | } | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static unsigned int count_arch_pages(void) | ||
70 | { | ||
71 | unsigned int count = 0; | ||
72 | struct arch_saveable_page *tmp = arch_pages; | ||
73 | while (tmp) { | ||
74 | count++; | ||
75 | tmp = tmp->next; | ||
76 | } | ||
77 | return count; | ||
78 | } | ||
79 | |||
80 | static int save_arch_mem(void) | ||
81 | { | ||
82 | char *kaddr; | ||
83 | struct arch_saveable_page *tmp = arch_pages; | ||
84 | int offset; | ||
85 | |||
86 | pr_debug("swsusp: Saving arch specific memory"); | ||
87 | while (tmp) { | ||
88 | tmp->data = (char *)__get_free_page(GFP_ATOMIC); | ||
89 | if (!tmp->data) | ||
90 | return -ENOMEM; | ||
91 | offset = tmp->start - (tmp->start & PAGE_MASK); | ||
92 | /* arch pages might haven't a 'struct page' */ | ||
93 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | ||
94 | memcpy(tmp->data + offset, kaddr + offset, | ||
95 | tmp->end - tmp->start); | ||
96 | kunmap_atomic(kaddr, KM_USER0); | ||
97 | |||
98 | tmp = tmp->next; | ||
99 | } | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int restore_arch_mem(void) | ||
104 | { | ||
105 | char *kaddr; | ||
106 | struct arch_saveable_page *tmp = arch_pages; | ||
107 | int offset; | ||
108 | |||
109 | while (tmp) { | ||
110 | if (!tmp->data) | ||
111 | continue; | ||
112 | offset = tmp->start - (tmp->start & PAGE_MASK); | ||
113 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | ||
114 | memcpy(kaddr + offset, tmp->data + offset, | ||
115 | tmp->end - tmp->start); | ||
116 | kunmap_atomic(kaddr, KM_USER0); | ||
117 | free_page((long)tmp->data); | ||
118 | tmp->data = NULL; | ||
119 | tmp = tmp->next; | ||
120 | } | ||
121 | return 0; | ||
122 | } | ||
123 | |||
42 | #ifdef CONFIG_HIGHMEM | 124 | #ifdef CONFIG_HIGHMEM |
43 | unsigned int count_highmem_pages(void) | 125 | static unsigned int count_highmem_pages(void) |
44 | { | 126 | { |
45 | struct zone *zone; | 127 | struct zone *zone; |
46 | unsigned long zone_pfn; | 128 | unsigned long zone_pfn; |
@@ -117,7 +199,7 @@ static int save_highmem_zone(struct zone *zone) | |||
117 | return 0; | 199 | return 0; |
118 | } | 200 | } |
119 | 201 | ||
120 | int save_highmem(void) | 202 | static int save_highmem(void) |
121 | { | 203 | { |
122 | struct zone *zone; | 204 | struct zone *zone; |
123 | int res = 0; | 205 | int res = 0; |
@@ -134,7 +216,7 @@ int save_highmem(void) | |||
134 | return 0; | 216 | return 0; |
135 | } | 217 | } |
136 | 218 | ||
137 | int restore_highmem(void) | 219 | static int restore_highmem(void) |
138 | { | 220 | { |
139 | printk("swsusp: Restoring Highmem\n"); | 221 | printk("swsusp: Restoring Highmem\n"); |
140 | while (highmem_copy) { | 222 | while (highmem_copy) { |
@@ -150,8 +232,35 @@ int restore_highmem(void) | |||
150 | } | 232 | } |
151 | return 0; | 233 | return 0; |
152 | } | 234 | } |
235 | #else | ||
236 | static inline unsigned int count_highmem_pages(void) {return 0;} | ||
237 | static inline int save_highmem(void) {return 0;} | ||
238 | static inline int restore_highmem(void) {return 0;} | ||
153 | #endif | 239 | #endif |
154 | 240 | ||
241 | unsigned int count_special_pages(void) | ||
242 | { | ||
243 | return count_arch_pages() + count_highmem_pages(); | ||
244 | } | ||
245 | |||
246 | int save_special_mem(void) | ||
247 | { | ||
248 | int ret; | ||
249 | ret = save_arch_mem(); | ||
250 | if (!ret) | ||
251 | ret = save_highmem(); | ||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | int restore_special_mem(void) | ||
256 | { | ||
257 | int ret; | ||
258 | ret = restore_arch_mem(); | ||
259 | if (!ret) | ||
260 | ret = restore_highmem(); | ||
261 | return ret; | ||
262 | } | ||
263 | |||
155 | static int pfn_is_nosave(unsigned long pfn) | 264 | static int pfn_is_nosave(unsigned long pfn) |
156 | { | 265 | { |
157 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | 266 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; |
@@ -177,7 +286,6 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn) | |||
177 | return 0; | 286 | return 0; |
178 | 287 | ||
179 | page = pfn_to_page(pfn); | 288 | page = pfn_to_page(pfn); |
180 | BUG_ON(PageReserved(page) && PageNosave(page)); | ||
181 | if (PageNosave(page)) | 289 | if (PageNosave(page)) |
182 | return 0; | 290 | return 0; |
183 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 291 | if (PageReserved(page) && pfn_is_nosave(pfn)) |
@@ -293,62 +401,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) | |||
293 | } | 401 | } |
294 | } | 402 | } |
295 | 403 | ||
296 | /** | 404 | static unsigned int unsafe_pages; |
297 | * On resume it is necessary to trace and eventually free the unsafe | ||
298 | * pages that have been allocated, because they are needed for I/O | ||
299 | * (on x86-64 we likely will "eat" these pages once again while | ||
300 | * creating the temporary page translation tables) | ||
301 | */ | ||
302 | |||
303 | struct eaten_page { | ||
304 | struct eaten_page *next; | ||
305 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
306 | }; | ||
307 | |||
308 | static struct eaten_page *eaten_pages = NULL; | ||
309 | |||
310 | static void release_eaten_pages(void) | ||
311 | { | ||
312 | struct eaten_page *p, *q; | ||
313 | |||
314 | p = eaten_pages; | ||
315 | while (p) { | ||
316 | q = p->next; | ||
317 | /* We don't want swsusp_free() to free this page again */ | ||
318 | ClearPageNosave(virt_to_page(p)); | ||
319 | free_page((unsigned long)p); | ||
320 | p = q; | ||
321 | } | ||
322 | eaten_pages = NULL; | ||
323 | } | ||
324 | 405 | ||
325 | /** | 406 | /** |
326 | * @safe_needed - on resume, for storing the PBE list and the image, | 407 | * @safe_needed - on resume, for storing the PBE list and the image, |
327 | * we can only use memory pages that do not conflict with the pages | 408 | * we can only use memory pages that do not conflict with the pages |
328 | * which had been used before suspend. | 409 | * used before suspend. |
329 | * | 410 | * |
330 | * The unsafe pages are marked with the PG_nosave_free flag | 411 | * The unsafe pages are marked with the PG_nosave_free flag |
331 | * | 412 | * and we count them using unsafe_pages |
332 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
333 | * so that swsusp_free() can release them | ||
334 | */ | 413 | */ |
335 | 414 | ||
336 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | 415 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) |
337 | { | 416 | { |
338 | void *res; | 417 | void *res; |
339 | 418 | ||
419 | res = (void *)get_zeroed_page(gfp_mask); | ||
340 | if (safe_needed) | 420 | if (safe_needed) |
341 | do { | 421 | while (res && PageNosaveFree(virt_to_page(res))) { |
422 | /* The page is unsafe, mark it for swsusp_free() */ | ||
423 | SetPageNosave(virt_to_page(res)); | ||
424 | unsafe_pages++; | ||
342 | res = (void *)get_zeroed_page(gfp_mask); | 425 | res = (void *)get_zeroed_page(gfp_mask); |
343 | if (res && PageNosaveFree(virt_to_page(res))) { | 426 | } |
344 | /* This is for swsusp_free() */ | ||
345 | SetPageNosave(virt_to_page(res)); | ||
346 | ((struct eaten_page *)res)->next = eaten_pages; | ||
347 | eaten_pages = res; | ||
348 | } | ||
349 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
350 | else | ||
351 | res = (void *)get_zeroed_page(gfp_mask); | ||
352 | if (res) { | 427 | if (res) { |
353 | SetPageNosave(virt_to_page(res)); | 428 | SetPageNosave(virt_to_page(res)); |
354 | SetPageNosaveFree(virt_to_page(res)); | 429 | SetPageNosaveFree(virt_to_page(res)); |
@@ -374,7 +449,8 @@ unsigned long get_safe_page(gfp_t gfp_mask) | |||
374 | * On each page we set up a list of struct_pbe elements. | 449 | * On each page we set up a list of struct_pbe elements. |
375 | */ | 450 | */ |
376 | 451 | ||
377 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) | 452 | static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, |
453 | int safe_needed) | ||
378 | { | 454 | { |
379 | unsigned int num; | 455 | unsigned int num; |
380 | struct pbe *pblist, *pbe; | 456 | struct pbe *pblist, *pbe; |
@@ -642,6 +718,8 @@ static int mark_unsafe_pages(struct pbe *pblist) | |||
642 | return -EFAULT; | 718 | return -EFAULT; |
643 | } | 719 | } |
644 | 720 | ||
721 | unsafe_pages = 0; | ||
722 | |||
645 | return 0; | 723 | return 0; |
646 | } | 724 | } |
647 | 725 | ||
@@ -719,42 +797,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf, | |||
719 | } | 797 | } |
720 | 798 | ||
721 | /** | 799 | /** |
722 | * create_image - use metadata contained in the PBE list | 800 | * prepare_image - use metadata contained in the PBE list |
723 | * pointed to by pagedir_nosave to mark the pages that will | 801 | * pointed to by pagedir_nosave to mark the pages that will |
724 | * be overwritten in the process of restoring the system | 802 | * be overwritten in the process of restoring the system |
725 | * memory state from the image and allocate memory for | 803 | * memory state from the image ("unsafe" pages) and allocate |
726 | * the image avoiding these pages | 804 | * memory for the image |
805 | * | ||
806 | * The idea is to allocate the PBE list first and then | ||
807 | * allocate as many pages as it's needed for the image data, | ||
808 | * but not to assign these pages to the PBEs initially. | ||
809 | * Instead, we just mark them as allocated and create a list | ||
810 | * of "safe" which will be used later | ||
727 | */ | 811 | */ |
728 | 812 | ||
729 | static int create_image(struct snapshot_handle *handle) | 813 | struct safe_page { |
814 | struct safe_page *next; | ||
815 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
816 | }; | ||
817 | |||
818 | static struct safe_page *safe_pages; | ||
819 | |||
820 | static int prepare_image(struct snapshot_handle *handle) | ||
730 | { | 821 | { |
731 | int error = 0; | 822 | int error = 0; |
732 | struct pbe *p, *pblist; | 823 | unsigned int nr_pages = nr_copy_pages; |
824 | struct pbe *p, *pblist = NULL; | ||
733 | 825 | ||
734 | p = pagedir_nosave; | 826 | p = pagedir_nosave; |
735 | error = mark_unsafe_pages(p); | 827 | error = mark_unsafe_pages(p); |
736 | if (!error) { | 828 | if (!error) { |
737 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | 829 | pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); |
738 | if (pblist) | 830 | if (pblist) |
739 | copy_page_backup_list(pblist, p); | 831 | copy_page_backup_list(pblist, p); |
740 | free_pagedir(p, 0); | 832 | free_pagedir(p, 0); |
741 | if (!pblist) | 833 | if (!pblist) |
742 | error = -ENOMEM; | 834 | error = -ENOMEM; |
743 | } | 835 | } |
744 | if (!error) | 836 | safe_pages = NULL; |
745 | error = alloc_data_pages(pblist, GFP_ATOMIC, 1); | 837 | if (!error && nr_pages > unsafe_pages) { |
838 | nr_pages -= unsafe_pages; | ||
839 | while (nr_pages--) { | ||
840 | struct safe_page *ptr; | ||
841 | |||
842 | ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); | ||
843 | if (!ptr) { | ||
844 | error = -ENOMEM; | ||
845 | break; | ||
846 | } | ||
847 | if (!PageNosaveFree(virt_to_page(ptr))) { | ||
848 | /* The page is "safe", add it to the list */ | ||
849 | ptr->next = safe_pages; | ||
850 | safe_pages = ptr; | ||
851 | } | ||
852 | /* Mark the page as allocated */ | ||
853 | SetPageNosave(virt_to_page(ptr)); | ||
854 | SetPageNosaveFree(virt_to_page(ptr)); | ||
855 | } | ||
856 | } | ||
746 | if (!error) { | 857 | if (!error) { |
747 | release_eaten_pages(); | ||
748 | pagedir_nosave = pblist; | 858 | pagedir_nosave = pblist; |
749 | } else { | 859 | } else { |
750 | pagedir_nosave = NULL; | ||
751 | handle->pbe = NULL; | 860 | handle->pbe = NULL; |
752 | nr_copy_pages = 0; | 861 | swsusp_free(); |
753 | nr_meta_pages = 0; | ||
754 | } | 862 | } |
755 | return error; | 863 | return error; |
756 | } | 864 | } |
757 | 865 | ||
866 | static void *get_buffer(struct snapshot_handle *handle) | ||
867 | { | ||
868 | struct pbe *pbe = handle->pbe, *last = handle->last_pbe; | ||
869 | struct page *page = virt_to_page(pbe->orig_address); | ||
870 | |||
871 | if (PageNosave(page) && PageNosaveFree(page)) { | ||
872 | /* | ||
873 | * We have allocated the "original" page frame and we can | ||
874 | * use it directly to store the read page | ||
875 | */ | ||
876 | pbe->address = 0; | ||
877 | if (last && last->next) | ||
878 | last->next = NULL; | ||
879 | return (void *)pbe->orig_address; | ||
880 | } | ||
881 | /* | ||
882 | * The "original" page frame has not been allocated and we have to | ||
883 | * use a "safe" page frame to store the read page | ||
884 | */ | ||
885 | pbe->address = (unsigned long)safe_pages; | ||
886 | safe_pages = safe_pages->next; | ||
887 | if (last) | ||
888 | last->next = pbe; | ||
889 | handle->last_pbe = pbe; | ||
890 | return (void *)pbe->address; | ||
891 | } | ||
892 | |||
758 | /** | 893 | /** |
759 | * snapshot_write_next - used for writing the system memory snapshot. | 894 | * snapshot_write_next - used for writing the system memory snapshot. |
760 | * | 895 | * |
@@ -799,15 +934,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
799 | } else if (handle->prev <= nr_meta_pages) { | 934 | } else if (handle->prev <= nr_meta_pages) { |
800 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); | 935 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); |
801 | if (!handle->pbe) { | 936 | if (!handle->pbe) { |
802 | error = create_image(handle); | 937 | error = prepare_image(handle); |
803 | if (error) | 938 | if (error) |
804 | return error; | 939 | return error; |
805 | handle->pbe = pagedir_nosave; | 940 | handle->pbe = pagedir_nosave; |
806 | handle->buffer = (void *)handle->pbe->address; | 941 | handle->last_pbe = NULL; |
942 | handle->buffer = get_buffer(handle); | ||
807 | } | 943 | } |
808 | } else { | 944 | } else { |
809 | handle->pbe = handle->pbe->next; | 945 | handle->pbe = handle->pbe->next; |
810 | handle->buffer = (void *)handle->pbe->address; | 946 | handle->buffer = get_buffer(handle); |
811 | } | 947 | } |
812 | handle->prev = handle->page; | 948 | handle->prev = handle->page; |
813 | } | 949 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c4016cbbd3e0..f0ee4e7780d6 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -62,16 +62,6 @@ unsigned long image_size = 500 * 1024 * 1024; | |||
62 | 62 | ||
63 | int in_suspend __nosavedata = 0; | 63 | int in_suspend __nosavedata = 0; |
64 | 64 | ||
65 | #ifdef CONFIG_HIGHMEM | ||
66 | unsigned int count_highmem_pages(void); | ||
67 | int save_highmem(void); | ||
68 | int restore_highmem(void); | ||
69 | #else | ||
70 | static int save_highmem(void) { return 0; } | ||
71 | static int restore_highmem(void) { return 0; } | ||
72 | static unsigned int count_highmem_pages(void) { return 0; } | ||
73 | #endif | ||
74 | |||
75 | /** | 65 | /** |
76 | * The following functions are used for tracing the allocated | 66 | * The following functions are used for tracing the allocated |
77 | * swap pages, so that they can be freed in case of an error. | 67 | * swap pages, so that they can be freed in case of an error. |
@@ -175,6 +165,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | |||
175 | */ | 165 | */ |
176 | 166 | ||
177 | #define SHRINK_BITE 10000 | 167 | #define SHRINK_BITE 10000 |
168 | static inline unsigned long __shrink_memory(long tmp) | ||
169 | { | ||
170 | if (tmp > SHRINK_BITE) | ||
171 | tmp = SHRINK_BITE; | ||
172 | return shrink_all_memory(tmp); | ||
173 | } | ||
178 | 174 | ||
179 | int swsusp_shrink_memory(void) | 175 | int swsusp_shrink_memory(void) |
180 | { | 176 | { |
@@ -186,21 +182,23 @@ int swsusp_shrink_memory(void) | |||
186 | 182 | ||
187 | printk("Shrinking memory... "); | 183 | printk("Shrinking memory... "); |
188 | do { | 184 | do { |
189 | size = 2 * count_highmem_pages(); | 185 | size = 2 * count_special_pages(); |
190 | size += size / 50 + count_data_pages(); | 186 | size += size / 50 + count_data_pages(); |
191 | size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + | 187 | size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + |
192 | PAGES_FOR_IO; | 188 | PAGES_FOR_IO; |
193 | tmp = size; | 189 | tmp = size; |
194 | for_each_zone (zone) | 190 | for_each_zone (zone) |
195 | if (!is_highmem(zone)) | 191 | if (!is_highmem(zone) && populated_zone(zone)) { |
196 | tmp -= zone->free_pages; | 192 | tmp -= zone->free_pages; |
193 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | ||
194 | } | ||
197 | if (tmp > 0) { | 195 | if (tmp > 0) { |
198 | tmp = shrink_all_memory(SHRINK_BITE); | 196 | tmp = __shrink_memory(tmp); |
199 | if (!tmp) | 197 | if (!tmp) |
200 | return -ENOMEM; | 198 | return -ENOMEM; |
201 | pages += tmp; | 199 | pages += tmp; |
202 | } else if (size > image_size / PAGE_SIZE) { | 200 | } else if (size > image_size / PAGE_SIZE) { |
203 | tmp = shrink_all_memory(SHRINK_BITE); | 201 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
204 | pages += tmp; | 202 | pages += tmp; |
205 | } | 203 | } |
206 | printk("\b%c", p[i++%4]); | 204 | printk("\b%c", p[i++%4]); |
@@ -228,7 +226,7 @@ int swsusp_suspend(void) | |||
228 | goto Enable_irqs; | 226 | goto Enable_irqs; |
229 | } | 227 | } |
230 | 228 | ||
231 | if ((error = save_highmem())) { | 229 | if ((error = save_special_mem())) { |
232 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); | 230 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); |
233 | goto Restore_highmem; | 231 | goto Restore_highmem; |
234 | } | 232 | } |
@@ -239,7 +237,7 @@ int swsusp_suspend(void) | |||
239 | /* Restore control flow magically appears here */ | 237 | /* Restore control flow magically appears here */ |
240 | restore_processor_state(); | 238 | restore_processor_state(); |
241 | Restore_highmem: | 239 | Restore_highmem: |
242 | restore_highmem(); | 240 | restore_special_mem(); |
243 | device_power_up(); | 241 | device_power_up(); |
244 | Enable_irqs: | 242 | Enable_irqs: |
245 | local_irq_enable(); | 243 | local_irq_enable(); |
@@ -265,7 +263,7 @@ int swsusp_resume(void) | |||
265 | */ | 263 | */ |
266 | swsusp_free(); | 264 | swsusp_free(); |
267 | restore_processor_state(); | 265 | restore_processor_state(); |
268 | restore_highmem(); | 266 | restore_special_mem(); |
269 | touch_softlockup_watchdog(); | 267 | touch_softlockup_watchdog(); |
270 | device_power_up(); | 268 | device_power_up(); |
271 | local_irq_enable(); | 269 | local_irq_enable(); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2058f88c7bbb..20e9710fc21c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -612,14 +612,6 @@ void synchronize_rcu(void) | |||
612 | wait_for_completion(&rcu.completion); | 612 | wait_for_completion(&rcu.completion); |
613 | } | 613 | } |
614 | 614 | ||
615 | /* | ||
616 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | ||
617 | */ | ||
618 | void synchronize_kernel(void) | ||
619 | { | ||
620 | synchronize_rcu(); | ||
621 | } | ||
622 | |||
623 | module_param(blimit, int, 0); | 615 | module_param(blimit, int, 0); |
624 | module_param(qhimark, int, 0); | 616 | module_param(qhimark, int, 0); |
625 | module_param(qlowmark, int, 0); | 617 | module_param(qlowmark, int, 0); |
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0); | |||
627 | module_param(rsinterval, int, 0); | 619 | module_param(rsinterval, int, 0); |
628 | #endif | 620 | #endif |
629 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 621 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
630 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ | 622 | EXPORT_SYMBOL_GPL(call_rcu); |
631 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ | 623 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
632 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 624 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
633 | EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c13f1bd2df7d..5dbc42694477 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
3886 | !capable(CAP_SYS_NICE)) | 3886 | !capable(CAP_SYS_NICE)) |
3887 | goto out_unlock; | 3887 | goto out_unlock; |
3888 | 3888 | ||
3889 | retval = security_task_setscheduler(p, 0, NULL); | ||
3890 | if (retval) | ||
3891 | goto out_unlock; | ||
3892 | |||
3889 | cpus_allowed = cpuset_cpus_allowed(p); | 3893 | cpus_allowed = cpuset_cpus_allowed(p); |
3890 | cpus_and(new_mask, new_mask, cpus_allowed); | 3894 | cpus_and(new_mask, new_mask, cpus_allowed); |
3891 | retval = set_cpus_allowed(p, new_mask); | 3895 | retval = set_cpus_allowed(p, new_mask); |
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
3954 | if (!p) | 3958 | if (!p) |
3955 | goto out_unlock; | 3959 | goto out_unlock; |
3956 | 3960 | ||
3957 | retval = 0; | 3961 | retval = security_task_getscheduler(p); |
3962 | if (retval) | ||
3963 | goto out_unlock; | ||
3964 | |||
3958 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); | 3965 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); |
3959 | 3966 | ||
3960 | out_unlock: | 3967 | out_unlock: |
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void) | |||
4046 | 4053 | ||
4047 | static inline void __cond_resched(void) | 4054 | static inline void __cond_resched(void) |
4048 | { | 4055 | { |
4056 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | ||
4057 | __might_sleep(__FILE__, __LINE__); | ||
4058 | #endif | ||
4049 | /* | 4059 | /* |
4050 | * The BKS might be reacquired before we have dropped | 4060 | * The BKS might be reacquired before we have dropped |
4051 | * PREEMPT_ACTIVE, which could trigger a second | 4061 | * PREEMPT_ACTIVE, which could trigger a second |
diff --git a/kernel/sys.c b/kernel/sys.c index 0b6ec0e7936f..90930b28d2ca 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/notifier.h> | 13 | #include <linux/notifier.h> |
14 | #include <linux/reboot.h> | 14 | #include <linux/reboot.h> |
15 | #include <linux/prctl.h> | 15 | #include <linux/prctl.h> |
16 | #include <linux/init.h> | ||
17 | #include <linux/highuid.h> | 16 | #include <linux/highuid.h> |
18 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
@@ -57,6 +56,12 @@ | |||
57 | #ifndef GET_FPEXC_CTL | 56 | #ifndef GET_FPEXC_CTL |
58 | # define GET_FPEXC_CTL(a,b) (-EINVAL) | 57 | # define GET_FPEXC_CTL(a,b) (-EINVAL) |
59 | #endif | 58 | #endif |
59 | #ifndef GET_ENDIAN | ||
60 | # define GET_ENDIAN(a,b) (-EINVAL) | ||
61 | #endif | ||
62 | #ifndef SET_ENDIAN | ||
63 | # define SET_ENDIAN(a,b) (-EINVAL) | ||
64 | #endif | ||
60 | 65 | ||
61 | /* | 66 | /* |
62 | * this is where the system-wide overflow UID and GID are defined, for | 67 | * this is where the system-wide overflow UID and GID are defined, for |
@@ -1860,23 +1865,20 @@ out: | |||
1860 | * fields when reaping, so a sample either gets all the additions of a | 1865 | * fields when reaping, so a sample either gets all the additions of a |
1861 | * given child after it's reaped, or none so this sample is before reaping. | 1866 | * given child after it's reaped, or none so this sample is before reaping. |
1862 | * | 1867 | * |
1863 | * tasklist_lock locking optimisation: | 1868 | * Locking: |
1864 | * If we are current and single threaded, we do not need to take the tasklist | 1869 | * We need to take the siglock for CHILDEREN, SELF and BOTH |
1865 | * lock or the siglock. No one else can take our signal_struct away, | 1870 | * for the cases current multithreaded, non-current single threaded |
1866 | * no one else can reap the children to update signal->c* counters, and | 1871 | * non-current multithreaded. Thread traversal is now safe with |
1867 | * no one else can race with the signal-> fields. | 1872 | * the siglock held. |
1868 | * If we do not take the tasklist_lock, the signal-> fields could be read | 1873 | * Strictly speaking, we donot need to take the siglock if we are current and |
1869 | * out of order while another thread was just exiting. So we place a | 1874 | * single threaded, as no one else can take our signal_struct away, no one |
1870 | * read memory barrier when we avoid the lock. On the writer side, | 1875 | * else can reap the children to update signal->c* counters, and no one else |
1871 | * write memory barrier is implied in __exit_signal as __exit_signal releases | 1876 | * can race with the signal-> fields. If we do not take any lock, the |
1872 | * the siglock spinlock after updating the signal-> fields. | 1877 | * signal-> fields could be read out of order while another thread was just |
1873 | * | 1878 | * exiting. So we should place a read memory barrier when we avoid the lock. |
1874 | * We don't really need the siglock when we access the non c* fields | 1879 | * On the writer side, write memory barrier is implied in __exit_signal |
1875 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | 1880 | * as __exit_signal releases the siglock spinlock after updating the signal-> |
1876 | * case, since we take the tasklist lock for read and the non c* signal-> | 1881 | * fields. But we don't do this yet to keep things simple. |
1877 | * fields are updated only in __exit_signal, which is called with | ||
1878 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
1879 | * concurrently. | ||
1880 | * | 1882 | * |
1881 | */ | 1883 | */ |
1882 | 1884 | ||
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1885 | struct task_struct *t; | 1887 | struct task_struct *t; |
1886 | unsigned long flags; | 1888 | unsigned long flags; |
1887 | cputime_t utime, stime; | 1889 | cputime_t utime, stime; |
1888 | int need_lock = 0; | ||
1889 | 1890 | ||
1890 | memset((char *) r, 0, sizeof *r); | 1891 | memset((char *) r, 0, sizeof *r); |
1891 | utime = stime = cputime_zero; | 1892 | utime = stime = cputime_zero; |
1892 | 1893 | ||
1893 | if (p != current || !thread_group_empty(p)) | 1894 | rcu_read_lock(); |
1894 | need_lock = 1; | 1895 | if (!lock_task_sighand(p, &flags)) { |
1895 | 1896 | rcu_read_unlock(); | |
1896 | if (need_lock) { | 1897 | return; |
1897 | read_lock(&tasklist_lock); | 1898 | } |
1898 | if (unlikely(!p->signal)) { | ||
1899 | read_unlock(&tasklist_lock); | ||
1900 | return; | ||
1901 | } | ||
1902 | } else | ||
1903 | /* See locking comments above */ | ||
1904 | smp_rmb(); | ||
1905 | 1899 | ||
1906 | switch (who) { | 1900 | switch (who) { |
1907 | case RUSAGE_BOTH: | 1901 | case RUSAGE_BOTH: |
1908 | case RUSAGE_CHILDREN: | 1902 | case RUSAGE_CHILDREN: |
1909 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
1910 | utime = p->signal->cutime; | 1903 | utime = p->signal->cutime; |
1911 | stime = p->signal->cstime; | 1904 | stime = p->signal->cstime; |
1912 | r->ru_nvcsw = p->signal->cnvcsw; | 1905 | r->ru_nvcsw = p->signal->cnvcsw; |
1913 | r->ru_nivcsw = p->signal->cnivcsw; | 1906 | r->ru_nivcsw = p->signal->cnivcsw; |
1914 | r->ru_minflt = p->signal->cmin_flt; | 1907 | r->ru_minflt = p->signal->cmin_flt; |
1915 | r->ru_majflt = p->signal->cmaj_flt; | 1908 | r->ru_majflt = p->signal->cmaj_flt; |
1916 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
1917 | 1909 | ||
1918 | if (who == RUSAGE_CHILDREN) | 1910 | if (who == RUSAGE_CHILDREN) |
1919 | break; | 1911 | break; |
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1941 | BUG(); | 1933 | BUG(); |
1942 | } | 1934 | } |
1943 | 1935 | ||
1944 | if (need_lock) | 1936 | unlock_task_sighand(p, &flags); |
1945 | read_unlock(&tasklist_lock); | 1937 | rcu_read_unlock(); |
1938 | |||
1946 | cputime_to_timeval(utime, &r->ru_utime); | 1939 | cputime_to_timeval(utime, &r->ru_utime); |
1947 | cputime_to_timeval(stime, &r->ru_stime); | 1940 | cputime_to_timeval(stime, &r->ru_stime); |
1948 | } | 1941 | } |
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
2057 | return -EFAULT; | 2050 | return -EFAULT; |
2058 | return 0; | 2051 | return 0; |
2059 | } | 2052 | } |
2053 | case PR_GET_ENDIAN: | ||
2054 | error = GET_ENDIAN(current, arg2); | ||
2055 | break; | ||
2056 | case PR_SET_ENDIAN: | ||
2057 | error = SET_ENDIAN(current, arg2); | ||
2058 | break; | ||
2059 | |||
2060 | default: | 2060 | default: |
2061 | error = -EINVAL; | 2061 | error = -EINVAL; |
2062 | break; | 2062 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 5433195040f1..6991bece67e8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init); | |||
87 | cond_syscall(sys_inotify_add_watch); | 87 | cond_syscall(sys_inotify_add_watch); |
88 | cond_syscall(sys_inotify_rm_watch); | 88 | cond_syscall(sys_inotify_rm_watch); |
89 | cond_syscall(sys_migrate_pages); | 89 | cond_syscall(sys_migrate_pages); |
90 | cond_syscall(sys_move_pages); | ||
90 | cond_syscall(sys_chown16); | 91 | cond_syscall(sys_chown16); |
91 | cond_syscall(sys_fchown16); | 92 | cond_syscall(sys_fchown16); |
92 | cond_syscall(sys_getegid16); | 93 | cond_syscall(sys_getegid16); |
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore); | |||
132 | cond_syscall(sys_madvise); | 133 | cond_syscall(sys_madvise); |
133 | cond_syscall(sys_mremap); | 134 | cond_syscall(sys_mremap); |
134 | cond_syscall(sys_remap_file_pages); | 135 | cond_syscall(sys_remap_file_pages); |
136 | cond_syscall(compat_sys_move_pages); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0d656e61621d..eb8bd214e7d7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
59 | extern int C_A_D; | 59 | extern int C_A_D; |
60 | extern int sysctl_overcommit_memory; | 60 | extern int sysctl_overcommit_memory; |
61 | extern int sysctl_overcommit_ratio; | 61 | extern int sysctl_overcommit_ratio; |
62 | extern int sysctl_panic_on_oom; | ||
62 | extern int max_threads; | 63 | extern int max_threads; |
63 | extern int sysrq_enabled; | 64 | extern int sysrq_enabled; |
64 | extern int core_uses_pid; | 65 | extern int core_uses_pid; |
@@ -398,7 +399,7 @@ static ctl_table kern_table[] = { | |||
398 | .strategy = &sysctl_string, | 399 | .strategy = &sysctl_string, |
399 | }, | 400 | }, |
400 | #endif | 401 | #endif |
401 | #ifdef CONFIG_HOTPLUG | 402 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) |
402 | { | 403 | { |
403 | .ctl_name = KERN_HOTPLUG, | 404 | .ctl_name = KERN_HOTPLUG, |
404 | .procname = "hotplug", | 405 | .procname = "hotplug", |
@@ -702,6 +703,14 @@ static ctl_table vm_table[] = { | |||
702 | .proc_handler = &proc_dointvec, | 703 | .proc_handler = &proc_dointvec, |
703 | }, | 704 | }, |
704 | { | 705 | { |
706 | .ctl_name = VM_PANIC_ON_OOM, | ||
707 | .procname = "panic_on_oom", | ||
708 | .data = &sysctl_panic_on_oom, | ||
709 | .maxlen = sizeof(sysctl_panic_on_oom), | ||
710 | .mode = 0644, | ||
711 | .proc_handler = &proc_dointvec, | ||
712 | }, | ||
713 | { | ||
705 | .ctl_name = VM_OVERCOMMIT_RATIO, | 714 | .ctl_name = VM_OVERCOMMIT_RATIO, |
706 | .procname = "overcommit_ratio", | 715 | .procname = "overcommit_ratio", |
707 | .data = &sysctl_overcommit_ratio, | 716 | .data = &sysctl_overcommit_ratio, |
diff --git a/kernel/timer.c b/kernel/timer.c index 9e49deed468c..f35b3939e937 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync); | |||
383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) | 383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) |
384 | { | 384 | { |
385 | /* cascade all the timers from tv up one level */ | 385 | /* cascade all the timers from tv up one level */ |
386 | struct list_head *head, *curr; | 386 | struct timer_list *timer, *tmp; |
387 | struct list_head tv_list; | ||
388 | |||
389 | list_replace_init(tv->vec + index, &tv_list); | ||
387 | 390 | ||
388 | head = tv->vec + index; | ||
389 | curr = head->next; | ||
390 | /* | 391 | /* |
391 | * We are removing _all_ timers from the list, so we don't have to | 392 | * We are removing _all_ timers from the list, so we |
392 | * detach them individually, just clear the list afterwards. | 393 | * don't have to detach them individually. |
393 | */ | 394 | */ |
394 | while (curr != head) { | 395 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
395 | struct timer_list *tmp; | 396 | BUG_ON(timer->base != base); |
396 | 397 | internal_add_timer(base, timer); | |
397 | tmp = list_entry(curr, struct timer_list, entry); | ||
398 | BUG_ON(tmp->base != base); | ||
399 | curr = curr->next; | ||
400 | internal_add_timer(base, tmp); | ||
401 | } | 398 | } |
402 | INIT_LIST_HEAD(head); | ||
403 | 399 | ||
404 | return index; | 400 | return index; |
405 | } | 401 | } |
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base) | |||
419 | 415 | ||
420 | spin_lock_irq(&base->lock); | 416 | spin_lock_irq(&base->lock); |
421 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 417 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
422 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 418 | struct list_head work_list; |
423 | struct list_head *head = &work_list; | 419 | struct list_head *head = &work_list; |
424 | int index = base->timer_jiffies & TVR_MASK; | 420 | int index = base->timer_jiffies & TVR_MASK; |
425 | 421 | ||
426 | /* | 422 | /* |
427 | * Cascade timers: | 423 | * Cascade timers: |
428 | */ | 424 | */ |
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
431 | (!cascade(base, &base->tv3, INDEX(1))) && | 427 | (!cascade(base, &base->tv3, INDEX(1))) && |
432 | !cascade(base, &base->tv4, INDEX(2))) | 428 | !cascade(base, &base->tv4, INDEX(2))) |
433 | cascade(base, &base->tv5, INDEX(3)); | 429 | cascade(base, &base->tv5, INDEX(3)); |
434 | ++base->timer_jiffies; | 430 | ++base->timer_jiffies; |
435 | list_splice_init(base->tv1.vec + index, &work_list); | 431 | list_replace_init(base->tv1.vec + index, &work_list); |
436 | while (!list_empty(head)) { | 432 | while (!list_empty(head)) { |
437 | void (*fn)(unsigned long); | 433 | void (*fn)(unsigned long); |
438 | unsigned long data; | 434 | unsigned long data; |
diff --git a/kernel/user.c b/kernel/user.c index 4b1eb745afa1..6408c0424291 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
148 | new->mq_bytes = 0; | 148 | new->mq_bytes = 0; |
149 | new->locked_shm = 0; | 149 | new->locked_shm = 0; |
150 | 150 | ||
151 | if (alloc_uid_keyring(new) < 0) { | 151 | if (alloc_uid_keyring(new, current) < 0) { |
152 | kmem_cache_free(uid_cachep, new); | 152 | kmem_cache_free(uid_cachep, new); |
153 | return NULL; | 153 | return NULL; |
154 | } | 154 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 880fb415a8f6..740c5abceb07 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -531,11 +531,11 @@ int current_is_keventd(void) | |||
531 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | 531 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) |
532 | { | 532 | { |
533 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 533 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
534 | LIST_HEAD(list); | 534 | struct list_head list; |
535 | struct work_struct *work; | 535 | struct work_struct *work; |
536 | 536 | ||
537 | spin_lock_irq(&cwq->lock); | 537 | spin_lock_irq(&cwq->lock); |
538 | list_splice_init(&cwq->worklist, &list); | 538 | list_replace_init(&cwq->worklist, &list); |
539 | 539 | ||
540 | while (!list_empty(&list)) { | 540 | while (!list_empty(&list)) { |
541 | printk("Taking work for %s\n", wq->name); | 541 | printk("Taking work for %s\n", wq->name); |