aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/auditsc.c1
-rw-r--r--kernel/compat.c23
-rw-r--r--kernel/cpuset.c16
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/futex.c8
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/irq/migration.c4
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/ksysfs.c19
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/power.h6
-rw-r--r--kernel/power/snapshot.c260
-rw-r--r--kernel/power/swsusp.c32
-rw-r--r--kernel/rcupdate.c13
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/sys.c70
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/timer.c30
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/workqueue.c4
25 files changed, 378 insertions, 175 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d20104..6802020e0ceb 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
118 spin_unlock(&acct_globals.lock); 118 spin_unlock(&acct_globals.lock);
119 119
120 /* May block */ 120 /* May block */
121 if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) 121 if (vfs_statfs(file->f_dentry, &sbuf))
122 return res; 122 return res;
123 suspend = sbuf.f_blocks * SUSPEND; 123 suspend = sbuf.f_blocks * SUSPEND;
124 resume = sbuf.f_blocks * RESUME; 124 resume = sbuf.f_blocks * RESUME;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b097ccb4eb7e..9ebd96fda295 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1558,6 +1558,7 @@ int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
1558 * @uid: msgq user id 1558 * @uid: msgq user id
1559 * @gid: msgq group id 1559 * @gid: msgq group id
1560 * @mode: msgq mode (permissions) 1560 * @mode: msgq mode (permissions)
1561 * @ipcp: in-kernel IPC permissions
1561 * 1562 *
1562 * Returns 0 for success or NULL context or < 0 on error. 1563 * Returns 0 for success or NULL context or < 0 on error.
1563 */ 1564 */
diff --git a/kernel/compat.c b/kernel/compat.c
index c1601a84f8d8..2f672332430f 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -21,6 +21,7 @@
21#include <linux/unistd.h> 21#include <linux/unistd.h>
22#include <linux/security.h> 22#include <linux/security.h>
23#include <linux/timex.h> 23#include <linux/timex.h>
24#include <linux/migrate.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
@@ -934,3 +935,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
934 935
935 return ret; 936 return ret;
936} 937}
938
939#ifdef CONFIG_NUMA
940asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
941 compat_uptr_t __user *pages32,
942 const int __user *nodes,
943 int __user *status,
944 int flags)
945{
946 const void __user * __user *pages;
947 int i;
948
949 pages = compat_alloc_user_space(nr_pages * sizeof(void *));
950 for (i = 0; i < nr_pages; i++) {
951 compat_uptr_t p;
952
953 if (get_user(p, pages32 + i) ||
954 put_user(compat_ptr(p), pages + i))
955 return -EFAULT;
956 }
957 return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
958}
959#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ab81fdd4572b..b602f73fb38d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -41,6 +41,7 @@
41#include <linux/rcupdate.h> 41#include <linux/rcupdate.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/security.h>
44#include <linux/slab.h> 45#include <linux/slab.h>
45#include <linux/smp_lock.h> 46#include <linux/smp_lock.h>
46#include <linux/spinlock.h> 47#include <linux/spinlock.h>
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
392 return 0; 393 return 0;
393} 394}
394 395
395static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, 396static int cpuset_get_sb(struct file_system_type *fs_type,
396 int flags, const char *unused_dev_name, 397 int flags, const char *unused_dev_name,
397 void *data) 398 void *data, struct vfsmount *mnt)
398{ 399{
399 return get_sb_single(fs_type, flags, data, cpuset_fill_super); 400 return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
400} 401}
401 402
402static struct file_system_type cpuset_fs_type = { 403static struct file_system_type cpuset_fs_type = {
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1177 cpumask_t cpus; 1178 cpumask_t cpus;
1178 nodemask_t from, to; 1179 nodemask_t from, to;
1179 struct mm_struct *mm; 1180 struct mm_struct *mm;
1181 int retval;
1180 1182
1181 if (sscanf(pidbuf, "%d", &pid) != 1) 1183 if (sscanf(pidbuf, "%d", &pid) != 1)
1182 return -EIO; 1184 return -EIO;
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1205 get_task_struct(tsk); 1207 get_task_struct(tsk);
1206 } 1208 }
1207 1209
1210 retval = security_task_setscheduler(tsk, 0, NULL);
1211 if (retval) {
1212 put_task_struct(tsk);
1213 return retval;
1214 }
1215
1208 mutex_lock(&callback_mutex); 1216 mutex_lock(&callback_mutex);
1209 1217
1210 task_lock(tsk); 1218 task_lock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index e06d0c10a24e..a3baf92462bd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -579,7 +579,7 @@ static void exit_mm(struct task_struct * tsk)
579 down_read(&mm->mmap_sem); 579 down_read(&mm->mmap_sem);
580 } 580 }
581 atomic_inc(&mm->mm_count); 581 atomic_inc(&mm->mm_count);
582 if (mm != tsk->active_mm) BUG(); 582 BUG_ON(mm != tsk->active_mm);
583 /* more a memory barrier than a real lock */ 583 /* more a memory barrier than a real lock */
584 task_lock(tsk); 584 task_lock(tsk);
585 tsk->mm = NULL; 585 tsk->mm = NULL;
@@ -1530,8 +1530,7 @@ check_continued:
1530 if (options & __WNOTHREAD) 1530 if (options & __WNOTHREAD)
1531 break; 1531 break;
1532 tsk = next_thread(tsk); 1532 tsk = next_thread(tsk);
1533 if (tsk->signal != current->signal) 1533 BUG_ON(tsk->signal != current->signal);
1534 BUG();
1535 } while (tsk != current); 1534 } while (tsk != current);
1536 1535
1537 read_unlock(&tasklist_lock); 1536 read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e3088a..49adc0e8d47c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm)
368 */ 368 */
369void mmput(struct mm_struct *mm) 369void mmput(struct mm_struct *mm)
370{ 370{
371 might_sleep();
372
371 if (atomic_dec_and_test(&mm->mm_users)) { 373 if (atomic_dec_and_test(&mm->mm_users)) {
372 exit_aio(mm); 374 exit_aio(mm);
373 exit_mmap(mm); 375 exit_mmap(mm);
@@ -623,6 +625,7 @@ out:
623/* 625/*
624 * Allocate a new files structure and copy contents from the 626 * Allocate a new files structure and copy contents from the
625 * passed in files structure. 627 * passed in files structure.
628 * errorp will be valid only when the returned files_struct is NULL.
626 */ 629 */
627static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) 630static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
628{ 631{
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
631 int open_files, size, i, expand; 634 int open_files, size, i, expand;
632 struct fdtable *old_fdt, *new_fdt; 635 struct fdtable *old_fdt, *new_fdt;
633 636
637 *errorp = -ENOMEM;
634 newf = alloc_files(); 638 newf = alloc_files();
635 if (!newf) 639 if (!newf)
636 goto out; 640 goto out;
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
744 * break this. 748 * break this.
745 */ 749 */
746 tsk->files = NULL; 750 tsk->files = NULL;
747 error = -ENOMEM;
748 newf = dup_fd(oldf, &error); 751 newf = dup_fd(oldf, &error);
749 if (!newf) 752 if (!newf)
750 goto out; 753 goto out;
diff --git a/kernel/futex.c b/kernel/futex.c
index 5699c512057b..e1a380c77a5a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
1056 (unsigned long)uaddr2, val2, val3); 1056 (unsigned long)uaddr2, val2, val3);
1057} 1057}
1058 1058
1059static struct super_block * 1059static int futexfs_get_sb(struct file_system_type *fs_type,
1060futexfs_get_sb(struct file_system_type *fs_type, 1060 int flags, const char *dev_name, void *data,
1061 int flags, const char *dev_name, void *data) 1061 struct vfsmount *mnt)
1062{ 1062{
1063 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); 1063 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
1064} 1064}
1065 1065
1066static struct file_system_type futex_fs_type = { 1066static struct file_system_type futex_fs_type = {
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 51df337b37db..0f6530117105 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
76/* 76/*
77 * Have got an event to handle: 77 * Have got an event to handle:
78 */ 78 */
79fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, 79fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
80 struct irqaction *action) 80 struct irqaction *action)
81{ 81{
82 int ret, retval = 0, status = 0; 82 irqreturn_t ret, retval = IRQ_NONE;
83 unsigned int status = 0;
83 84
84 if (!(action->flags & SA_INTERRUPT)) 85 if (!(action->flags & SA_INTERRUPT))
85 local_irq_enable(); 86 local_irq_enable();
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 134f9f2e0e39..a12d00eb5e7c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -30,7 +30,7 @@ void move_native_irq(int irq)
30 30
31 desc->move_irq = 0; 31 desc->move_irq = 0;
32 32
33 if (likely(cpus_empty(pending_irq_cpumask[irq]))) 33 if (unlikely(cpus_empty(pending_irq_cpumask[irq])))
34 return; 34 return;
35 35
36 if (!desc->handler->set_affinity) 36 if (!desc->handler->set_affinity)
@@ -49,7 +49,7 @@ void move_native_irq(int irq)
49 * cause some ioapics to mal-function. 49 * cause some ioapics to mal-function.
50 * Being paranoid i guess! 50 * Being paranoid i guess!
51 */ 51 */
52 if (unlikely(!cpus_empty(tmp))) { 52 if (likely(!cpus_empty(tmp))) {
53 if (likely(!(desc->status & IRQ_DISABLED))) 53 if (likely(!(desc->status & IRQ_DISABLED)))
54 desc->handler->disable(irq); 54 desc->handler->disable(irq);
55 55
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d03b5eef8ce0..afacd6f585fa 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
24#ifdef CONFIG_GENERIC_PENDING_IRQ 24#ifdef CONFIG_GENERIC_PENDING_IRQ
25void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 25void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
26{ 26{
27 set_balance_irq_affinity(irq, mask_val);
28
27 /* 29 /*
28 * Save these away for later use. Re-progam when the 30 * Save these away for later use. Re-progam when the
29 * interrupt is pending 31 * interrupt is pending
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
33#else 35#else
34void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 36void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
35{ 37{
38 set_balance_irq_affinity(irq, mask_val);
36 irq_affinity[irq] = mask_val; 39 irq_affinity[irq] = mask_val;
37 irq_desc[irq].handler->set_affinity(irq, mask_val); 40 irq_desc[irq].handler->set_affinity(irq, mask_val);
38} 41}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7df9abd5ec86..b2fb3c18d06b 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,7 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13 13
14static int irqfixup; 14static int irqfixup __read_mostly;
15 15
16/* 16/*
17 * Recovery handler for misrouted interrupts. 17 * Recovery handler for misrouted interrupts.
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
136void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, 136void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
137 struct pt_regs *regs) 137 struct pt_regs *regs)
138{ 138{
139 if (action_ret != IRQ_HANDLED) { 139 if (unlikely(action_ret != IRQ_HANDLED)) {
140 desc->irqs_unhandled++; 140 desc->irqs_unhandled++;
141 if (action_ret != IRQ_NONE) 141 if (unlikely(action_ret != IRQ_NONE))
142 report_bad_irq(irq, desc, action_ret); 142 report_bad_irq(irq, desc, action_ret);
143 } 143 }
144 144
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
152 } 152 }
153 153
154 desc->irq_count++; 154 desc->irq_count++;
155 if (desc->irq_count < 100000) 155 if (likely(desc->irq_count < 100000))
156 return; 156 return;
157 157
158 desc->irq_count = 0; 158 desc->irq_count = 0;
159 if (desc->irqs_unhandled > 99900) { 159 if (unlikely(desc->irqs_unhandled > 99900)) {
160 /* 160 /*
161 * The interrupt is stuck 161 * The interrupt is stuck
162 */ 162 */
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
171 desc->irqs_unhandled = 0; 171 desc->irqs_unhandled = 0;
172} 172}
173 173
174int noirqdebug; 174int noirqdebug __read_mostly;
175 175
176int __init noirqdebug_setup(char *str) 176int __init noirqdebug_setup(char *str)
177{ 177{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bf39d28e4c0e..58f0f382597c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image,
902 * kexec does not sync, or unmount filesystems so if you need 902 * kexec does not sync, or unmount filesystems so if you need
903 * that to happen you need to do that yourself. 903 * that to happen you need to do that yourself.
904 */ 904 */
905struct kimage *kexec_image = NULL; 905struct kimage *kexec_image;
906static struct kimage *kexec_crash_image = NULL; 906struct kimage *kexec_crash_image;
907/* 907/*
908 * A home grown binary mutex. 908 * A home grown binary mutex.
909 * Nothing can wait so this mutex is safe to use 909 * Nothing can wait so this mutex is safe to use
910 * in interrupt context :) 910 * in interrupt context :)
911 */ 911 */
912static int kexec_lock = 0; 912static int kexec_lock;
913 913
914asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 914asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
915 struct kexec_segment __user *segments, 915 struct kexec_segment __user *segments,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f119e098e67b..9e28478a17a5 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
14#include <linux/sysfs.h> 14#include <linux/sysfs.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/kexec.h>
17 18
18#define KERNEL_ATTR_RO(_name) \ 19#define KERNEL_ATTR_RO(_name) \
19static struct subsys_attribute _name##_attr = __ATTR_RO(_name) 20static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s
48KERNEL_ATTR_RW(uevent_helper); 49KERNEL_ATTR_RW(uevent_helper);
49#endif 50#endif
50 51
52#ifdef CONFIG_KEXEC
53static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page)
54{
55 return sprintf(page, "%d\n", !!kexec_image);
56}
57KERNEL_ATTR_RO(kexec_loaded);
58
59static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page)
60{
61 return sprintf(page, "%d\n", !!kexec_crash_image);
62}
63KERNEL_ATTR_RO(kexec_crash_loaded);
64#endif /* CONFIG_KEXEC */
65
51decl_subsys(kernel, NULL, NULL); 66decl_subsys(kernel, NULL, NULL);
52EXPORT_SYMBOL_GPL(kernel_subsys); 67EXPORT_SYMBOL_GPL(kernel_subsys);
53 68
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = {
56 &uevent_seqnum_attr.attr, 71 &uevent_seqnum_attr.attr,
57 &uevent_helper_attr.attr, 72 &uevent_helper_attr.attr,
58#endif 73#endif
74#ifdef CONFIG_KEXEC
75 &kexec_loaded_attr.attr,
76 &kexec_crash_loaded_attr.attr,
77#endif
59 NULL 78 NULL
60}; 79};
61 80
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0a907f0dc56b..cdf0f07af92f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,7 +15,7 @@
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/pm.h> 17#include <linux/pm.h>
18 18#include <linux/console.h>
19 19
20#include "power.h" 20#include "power.h"
21 21
diff --git a/kernel/power/power.h b/kernel/power/power.h
index f06f12f21767..98c41423f3b1 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -55,7 +55,7 @@ struct snapshot_handle {
55 unsigned int page; 55 unsigned int page;
56 unsigned int page_offset; 56 unsigned int page_offset;
57 unsigned int prev; 57 unsigned int prev;
58 struct pbe *pbe; 58 struct pbe *pbe, *last_pbe;
59 void *buffer; 59 void *buffer;
60 unsigned int buf_offset; 60 unsigned int buf_offset;
61}; 61};
@@ -105,6 +105,10 @@ extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); 105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); 106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
107 107
108extern unsigned int count_special_pages(void);
109extern int save_special_mem(void);
110extern int restore_special_mem(void);
111
108extern int swsusp_check(void); 112extern int swsusp_check(void);
109extern int swsusp_shrink_memory(void); 113extern int swsusp_shrink_memory(void);
110extern void swsusp_free(void); 114extern void swsusp_free(void);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3eeedbb13b78..3d9284100b22 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,8 +39,90 @@ static unsigned int nr_copy_pages;
39static unsigned int nr_meta_pages; 39static unsigned int nr_meta_pages;
40static unsigned long *buffer; 40static unsigned long *buffer;
41 41
42struct arch_saveable_page {
43 unsigned long start;
44 unsigned long end;
45 char *data;
46 struct arch_saveable_page *next;
47};
48static struct arch_saveable_page *arch_pages;
49
50int swsusp_add_arch_pages(unsigned long start, unsigned long end)
51{
52 struct arch_saveable_page *tmp;
53
54 while (start < end) {
55 tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL);
56 if (!tmp)
57 return -ENOMEM;
58 tmp->start = start;
59 tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT;
60 if (tmp->end > end)
61 tmp->end = end;
62 tmp->next = arch_pages;
63 start = tmp->end;
64 arch_pages = tmp;
65 }
66 return 0;
67}
68
69static unsigned int count_arch_pages(void)
70{
71 unsigned int count = 0;
72 struct arch_saveable_page *tmp = arch_pages;
73 while (tmp) {
74 count++;
75 tmp = tmp->next;
76 }
77 return count;
78}
79
80static int save_arch_mem(void)
81{
82 char *kaddr;
83 struct arch_saveable_page *tmp = arch_pages;
84 int offset;
85
86 pr_debug("swsusp: Saving arch specific memory");
87 while (tmp) {
88 tmp->data = (char *)__get_free_page(GFP_ATOMIC);
89 if (!tmp->data)
90 return -ENOMEM;
91 offset = tmp->start - (tmp->start & PAGE_MASK);
92 /* arch pages might haven't a 'struct page' */
93 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
94 memcpy(tmp->data + offset, kaddr + offset,
95 tmp->end - tmp->start);
96 kunmap_atomic(kaddr, KM_USER0);
97
98 tmp = tmp->next;
99 }
100 return 0;
101}
102
103static int restore_arch_mem(void)
104{
105 char *kaddr;
106 struct arch_saveable_page *tmp = arch_pages;
107 int offset;
108
109 while (tmp) {
110 if (!tmp->data)
111 continue;
112 offset = tmp->start - (tmp->start & PAGE_MASK);
113 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
114 memcpy(kaddr + offset, tmp->data + offset,
115 tmp->end - tmp->start);
116 kunmap_atomic(kaddr, KM_USER0);
117 free_page((long)tmp->data);
118 tmp->data = NULL;
119 tmp = tmp->next;
120 }
121 return 0;
122}
123
42#ifdef CONFIG_HIGHMEM 124#ifdef CONFIG_HIGHMEM
43unsigned int count_highmem_pages(void) 125static unsigned int count_highmem_pages(void)
44{ 126{
45 struct zone *zone; 127 struct zone *zone;
46 unsigned long zone_pfn; 128 unsigned long zone_pfn;
@@ -117,7 +199,7 @@ static int save_highmem_zone(struct zone *zone)
117 return 0; 199 return 0;
118} 200}
119 201
120int save_highmem(void) 202static int save_highmem(void)
121{ 203{
122 struct zone *zone; 204 struct zone *zone;
123 int res = 0; 205 int res = 0;
@@ -134,7 +216,7 @@ int save_highmem(void)
134 return 0; 216 return 0;
135} 217}
136 218
137int restore_highmem(void) 219static int restore_highmem(void)
138{ 220{
139 printk("swsusp: Restoring Highmem\n"); 221 printk("swsusp: Restoring Highmem\n");
140 while (highmem_copy) { 222 while (highmem_copy) {
@@ -150,8 +232,35 @@ int restore_highmem(void)
150 } 232 }
151 return 0; 233 return 0;
152} 234}
235#else
236static inline unsigned int count_highmem_pages(void) {return 0;}
237static inline int save_highmem(void) {return 0;}
238static inline int restore_highmem(void) {return 0;}
153#endif 239#endif
154 240
241unsigned int count_special_pages(void)
242{
243 return count_arch_pages() + count_highmem_pages();
244}
245
246int save_special_mem(void)
247{
248 int ret;
249 ret = save_arch_mem();
250 if (!ret)
251 ret = save_highmem();
252 return ret;
253}
254
255int restore_special_mem(void)
256{
257 int ret;
258 ret = restore_arch_mem();
259 if (!ret)
260 ret = restore_highmem();
261 return ret;
262}
263
155static int pfn_is_nosave(unsigned long pfn) 264static int pfn_is_nosave(unsigned long pfn)
156{ 265{
157 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; 266 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
@@ -177,7 +286,6 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn)
177 return 0; 286 return 0;
178 287
179 page = pfn_to_page(pfn); 288 page = pfn_to_page(pfn);
180 BUG_ON(PageReserved(page) && PageNosave(page));
181 if (PageNosave(page)) 289 if (PageNosave(page))
182 return 0; 290 return 0;
183 if (PageReserved(page) && pfn_is_nosave(pfn)) 291 if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -293,62 +401,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
293 } 401 }
294} 402}
295 403
296/** 404static unsigned int unsafe_pages;
297 * On resume it is necessary to trace and eventually free the unsafe
298 * pages that have been allocated, because they are needed for I/O
299 * (on x86-64 we likely will "eat" these pages once again while
300 * creating the temporary page translation tables)
301 */
302
303struct eaten_page {
304 struct eaten_page *next;
305 char padding[PAGE_SIZE - sizeof(void *)];
306};
307
308static struct eaten_page *eaten_pages = NULL;
309
310static void release_eaten_pages(void)
311{
312 struct eaten_page *p, *q;
313
314 p = eaten_pages;
315 while (p) {
316 q = p->next;
317 /* We don't want swsusp_free() to free this page again */
318 ClearPageNosave(virt_to_page(p));
319 free_page((unsigned long)p);
320 p = q;
321 }
322 eaten_pages = NULL;
323}
324 405
325/** 406/**
326 * @safe_needed - on resume, for storing the PBE list and the image, 407 * @safe_needed - on resume, for storing the PBE list and the image,
327 * we can only use memory pages that do not conflict with the pages 408 * we can only use memory pages that do not conflict with the pages
328 * which had been used before suspend. 409 * used before suspend.
329 * 410 *
330 * The unsafe pages are marked with the PG_nosave_free flag 411 * The unsafe pages are marked with the PG_nosave_free flag
331 * 412 * and we count them using unsafe_pages
332 * Allocated but unusable (ie eaten) memory pages should be marked
333 * so that swsusp_free() can release them
334 */ 413 */
335 414
336static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) 415static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
337{ 416{
338 void *res; 417 void *res;
339 418
419 res = (void *)get_zeroed_page(gfp_mask);
340 if (safe_needed) 420 if (safe_needed)
341 do { 421 while (res && PageNosaveFree(virt_to_page(res))) {
422 /* The page is unsafe, mark it for swsusp_free() */
423 SetPageNosave(virt_to_page(res));
424 unsafe_pages++;
342 res = (void *)get_zeroed_page(gfp_mask); 425 res = (void *)get_zeroed_page(gfp_mask);
343 if (res && PageNosaveFree(virt_to_page(res))) { 426 }
344 /* This is for swsusp_free() */
345 SetPageNosave(virt_to_page(res));
346 ((struct eaten_page *)res)->next = eaten_pages;
347 eaten_pages = res;
348 }
349 } while (res && PageNosaveFree(virt_to_page(res)));
350 else
351 res = (void *)get_zeroed_page(gfp_mask);
352 if (res) { 427 if (res) {
353 SetPageNosave(virt_to_page(res)); 428 SetPageNosave(virt_to_page(res));
354 SetPageNosaveFree(virt_to_page(res)); 429 SetPageNosaveFree(virt_to_page(res));
@@ -374,7 +449,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
374 * On each page we set up a list of struct_pbe elements. 449 * On each page we set up a list of struct_pbe elements.
375 */ 450 */
376 451
377struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) 452static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
453 int safe_needed)
378{ 454{
379 unsigned int num; 455 unsigned int num;
380 struct pbe *pblist, *pbe; 456 struct pbe *pblist, *pbe;
@@ -642,6 +718,8 @@ static int mark_unsafe_pages(struct pbe *pblist)
642 return -EFAULT; 718 return -EFAULT;
643 } 719 }
644 720
721 unsafe_pages = 0;
722
645 return 0; 723 return 0;
646} 724}
647 725
@@ -719,42 +797,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
719} 797}
720 798
721/** 799/**
722 * create_image - use metadata contained in the PBE list 800 * prepare_image - use metadata contained in the PBE list
723 * pointed to by pagedir_nosave to mark the pages that will 801 * pointed to by pagedir_nosave to mark the pages that will
724 * be overwritten in the process of restoring the system 802 * be overwritten in the process of restoring the system
725 * memory state from the image and allocate memory for 803 * memory state from the image ("unsafe" pages) and allocate
726 * the image avoiding these pages 804 * memory for the image
805 *
806 * The idea is to allocate the PBE list first and then
807 * allocate as many pages as it's needed for the image data,
808 * but not to assign these pages to the PBEs initially.
809 * Instead, we just mark them as allocated and create a list
810 * of "safe" which will be used later
727 */ 811 */
728 812
729static int create_image(struct snapshot_handle *handle) 813struct safe_page {
814 struct safe_page *next;
815 char padding[PAGE_SIZE - sizeof(void *)];
816};
817
818static struct safe_page *safe_pages;
819
820static int prepare_image(struct snapshot_handle *handle)
730{ 821{
731 int error = 0; 822 int error = 0;
732 struct pbe *p, *pblist; 823 unsigned int nr_pages = nr_copy_pages;
824 struct pbe *p, *pblist = NULL;
733 825
734 p = pagedir_nosave; 826 p = pagedir_nosave;
735 error = mark_unsafe_pages(p); 827 error = mark_unsafe_pages(p);
736 if (!error) { 828 if (!error) {
737 pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); 829 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
738 if (pblist) 830 if (pblist)
739 copy_page_backup_list(pblist, p); 831 copy_page_backup_list(pblist, p);
740 free_pagedir(p, 0); 832 free_pagedir(p, 0);
741 if (!pblist) 833 if (!pblist)
742 error = -ENOMEM; 834 error = -ENOMEM;
743 } 835 }
744 if (!error) 836 safe_pages = NULL;
745 error = alloc_data_pages(pblist, GFP_ATOMIC, 1); 837 if (!error && nr_pages > unsafe_pages) {
838 nr_pages -= unsafe_pages;
839 while (nr_pages--) {
840 struct safe_page *ptr;
841
842 ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
843 if (!ptr) {
844 error = -ENOMEM;
845 break;
846 }
847 if (!PageNosaveFree(virt_to_page(ptr))) {
848 /* The page is "safe", add it to the list */
849 ptr->next = safe_pages;
850 safe_pages = ptr;
851 }
852 /* Mark the page as allocated */
853 SetPageNosave(virt_to_page(ptr));
854 SetPageNosaveFree(virt_to_page(ptr));
855 }
856 }
746 if (!error) { 857 if (!error) {
747 release_eaten_pages();
748 pagedir_nosave = pblist; 858 pagedir_nosave = pblist;
749 } else { 859 } else {
750 pagedir_nosave = NULL;
751 handle->pbe = NULL; 860 handle->pbe = NULL;
752 nr_copy_pages = 0; 861 swsusp_free();
753 nr_meta_pages = 0;
754 } 862 }
755 return error; 863 return error;
756} 864}
757 865
866static void *get_buffer(struct snapshot_handle *handle)
867{
868 struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
869 struct page *page = virt_to_page(pbe->orig_address);
870
871 if (PageNosave(page) && PageNosaveFree(page)) {
872 /*
873 * We have allocated the "original" page frame and we can
874 * use it directly to store the read page
875 */
876 pbe->address = 0;
877 if (last && last->next)
878 last->next = NULL;
879 return (void *)pbe->orig_address;
880 }
881 /*
882 * The "original" page frame has not been allocated and we have to
883 * use a "safe" page frame to store the read page
884 */
885 pbe->address = (unsigned long)safe_pages;
886 safe_pages = safe_pages->next;
887 if (last)
888 last->next = pbe;
889 handle->last_pbe = pbe;
890 return (void *)pbe->address;
891}
892
758/** 893/**
759 * snapshot_write_next - used for writing the system memory snapshot. 894 * snapshot_write_next - used for writing the system memory snapshot.
760 * 895 *
@@ -799,15 +934,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
799 } else if (handle->prev <= nr_meta_pages) { 934 } else if (handle->prev <= nr_meta_pages) {
800 handle->pbe = unpack_orig_addresses(buffer, handle->pbe); 935 handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
801 if (!handle->pbe) { 936 if (!handle->pbe) {
802 error = create_image(handle); 937 error = prepare_image(handle);
803 if (error) 938 if (error)
804 return error; 939 return error;
805 handle->pbe = pagedir_nosave; 940 handle->pbe = pagedir_nosave;
806 handle->buffer = (void *)handle->pbe->address; 941 handle->last_pbe = NULL;
942 handle->buffer = get_buffer(handle);
807 } 943 }
808 } else { 944 } else {
809 handle->pbe = handle->pbe->next; 945 handle->pbe = handle->pbe->next;
810 handle->buffer = (void *)handle->pbe->address; 946 handle->buffer = get_buffer(handle);
811 } 947 }
812 handle->prev = handle->page; 948 handle->prev = handle->page;
813 } 949 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c4016cbbd3e0..f0ee4e7780d6 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -62,16 +62,6 @@ unsigned long image_size = 500 * 1024 * 1024;
62 62
63int in_suspend __nosavedata = 0; 63int in_suspend __nosavedata = 0;
64 64
65#ifdef CONFIG_HIGHMEM
66unsigned int count_highmem_pages(void);
67int save_highmem(void);
68int restore_highmem(void);
69#else
70static int save_highmem(void) { return 0; }
71static int restore_highmem(void) { return 0; }
72static unsigned int count_highmem_pages(void) { return 0; }
73#endif
74
75/** 65/**
76 * The following functions are used for tracing the allocated 66 * The following functions are used for tracing the allocated
77 * swap pages, so that they can be freed in case of an error. 67 * swap pages, so that they can be freed in case of an error.
@@ -175,6 +165,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
175 */ 165 */
176 166
177#define SHRINK_BITE 10000 167#define SHRINK_BITE 10000
168static inline unsigned long __shrink_memory(long tmp)
169{
170 if (tmp > SHRINK_BITE)
171 tmp = SHRINK_BITE;
172 return shrink_all_memory(tmp);
173}
178 174
179int swsusp_shrink_memory(void) 175int swsusp_shrink_memory(void)
180{ 176{
@@ -186,21 +182,23 @@ int swsusp_shrink_memory(void)
186 182
187 printk("Shrinking memory... "); 183 printk("Shrinking memory... ");
188 do { 184 do {
189 size = 2 * count_highmem_pages(); 185 size = 2 * count_special_pages();
190 size += size / 50 + count_data_pages(); 186 size += size / 50 + count_data_pages();
191 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + 187 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
192 PAGES_FOR_IO; 188 PAGES_FOR_IO;
193 tmp = size; 189 tmp = size;
194 for_each_zone (zone) 190 for_each_zone (zone)
195 if (!is_highmem(zone)) 191 if (!is_highmem(zone) && populated_zone(zone)) {
196 tmp -= zone->free_pages; 192 tmp -= zone->free_pages;
193 tmp += zone->lowmem_reserve[ZONE_NORMAL];
194 }
197 if (tmp > 0) { 195 if (tmp > 0) {
198 tmp = shrink_all_memory(SHRINK_BITE); 196 tmp = __shrink_memory(tmp);
199 if (!tmp) 197 if (!tmp)
200 return -ENOMEM; 198 return -ENOMEM;
201 pages += tmp; 199 pages += tmp;
202 } else if (size > image_size / PAGE_SIZE) { 200 } else if (size > image_size / PAGE_SIZE) {
203 tmp = shrink_all_memory(SHRINK_BITE); 201 tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
204 pages += tmp; 202 pages += tmp;
205 } 203 }
206 printk("\b%c", p[i++%4]); 204 printk("\b%c", p[i++%4]);
@@ -228,7 +226,7 @@ int swsusp_suspend(void)
228 goto Enable_irqs; 226 goto Enable_irqs;
229 } 227 }
230 228
231 if ((error = save_highmem())) { 229 if ((error = save_special_mem())) {
232 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); 230 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
233 goto Restore_highmem; 231 goto Restore_highmem;
234 } 232 }
@@ -239,7 +237,7 @@ int swsusp_suspend(void)
239 /* Restore control flow magically appears here */ 237 /* Restore control flow magically appears here */
240 restore_processor_state(); 238 restore_processor_state();
241Restore_highmem: 239Restore_highmem:
242 restore_highmem(); 240 restore_special_mem();
243 device_power_up(); 241 device_power_up();
244Enable_irqs: 242Enable_irqs:
245 local_irq_enable(); 243 local_irq_enable();
@@ -265,7 +263,7 @@ int swsusp_resume(void)
265 */ 263 */
266 swsusp_free(); 264 swsusp_free();
267 restore_processor_state(); 265 restore_processor_state();
268 restore_highmem(); 266 restore_special_mem();
269 touch_softlockup_watchdog(); 267 touch_softlockup_watchdog();
270 device_power_up(); 268 device_power_up();
271 local_irq_enable(); 269 local_irq_enable();
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2058f88c7bbb..20e9710fc21c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -612,14 +612,6 @@ void synchronize_rcu(void)
612 wait_for_completion(&rcu.completion); 612 wait_for_completion(&rcu.completion);
613} 613}
614 614
615/*
616 * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
617 */
618void synchronize_kernel(void)
619{
620 synchronize_rcu();
621}
622
623module_param(blimit, int, 0); 615module_param(blimit, int, 0);
624module_param(qhimark, int, 0); 616module_param(qhimark, int, 0);
625module_param(qlowmark, int, 0); 617module_param(qlowmark, int, 0);
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0);
627module_param(rsinterval, int, 0); 619module_param(rsinterval, int, 0);
628#endif 620#endif
629EXPORT_SYMBOL_GPL(rcu_batches_completed); 621EXPORT_SYMBOL_GPL(rcu_batches_completed);
630EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ 622EXPORT_SYMBOL_GPL(call_rcu);
631EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ 623EXPORT_SYMBOL_GPL(call_rcu_bh);
632EXPORT_SYMBOL_GPL(synchronize_rcu); 624EXPORT_SYMBOL_GPL(synchronize_rcu);
633EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13f1bd2df7d..5dbc42694477 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
3886 !capable(CAP_SYS_NICE)) 3886 !capable(CAP_SYS_NICE))
3887 goto out_unlock; 3887 goto out_unlock;
3888 3888
3889 retval = security_task_setscheduler(p, 0, NULL);
3890 if (retval)
3891 goto out_unlock;
3892
3889 cpus_allowed = cpuset_cpus_allowed(p); 3893 cpus_allowed = cpuset_cpus_allowed(p);
3890 cpus_and(new_mask, new_mask, cpus_allowed); 3894 cpus_and(new_mask, new_mask, cpus_allowed);
3891 retval = set_cpus_allowed(p, new_mask); 3895 retval = set_cpus_allowed(p, new_mask);
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
3954 if (!p) 3958 if (!p)
3955 goto out_unlock; 3959 goto out_unlock;
3956 3960
3957 retval = 0; 3961 retval = security_task_getscheduler(p);
3962 if (retval)
3963 goto out_unlock;
3964
3958 cpus_and(*mask, p->cpus_allowed, cpu_online_map); 3965 cpus_and(*mask, p->cpus_allowed, cpu_online_map);
3959 3966
3960out_unlock: 3967out_unlock:
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void)
4046 4053
4047static inline void __cond_resched(void) 4054static inline void __cond_resched(void)
4048{ 4055{
4056#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
4057 __might_sleep(__FILE__, __LINE__);
4058#endif
4049 /* 4059 /*
4050 * The BKS might be reacquired before we have dropped 4060 * The BKS might be reacquired before we have dropped
4051 * PREEMPT_ACTIVE, which could trigger a second 4061 * PREEMPT_ACTIVE, which could trigger a second
diff --git a/kernel/sys.c b/kernel/sys.c
index 0b6ec0e7936f..90930b28d2ca 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,7 +13,6 @@
13#include <linux/notifier.h> 13#include <linux/notifier.h>
14#include <linux/reboot.h> 14#include <linux/reboot.h>
15#include <linux/prctl.h> 15#include <linux/prctl.h>
16#include <linux/init.h>
17#include <linux/highuid.h> 16#include <linux/highuid.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -57,6 +56,12 @@
57#ifndef GET_FPEXC_CTL 56#ifndef GET_FPEXC_CTL
58# define GET_FPEXC_CTL(a,b) (-EINVAL) 57# define GET_FPEXC_CTL(a,b) (-EINVAL)
59#endif 58#endif
59#ifndef GET_ENDIAN
60# define GET_ENDIAN(a,b) (-EINVAL)
61#endif
62#ifndef SET_ENDIAN
63# define SET_ENDIAN(a,b) (-EINVAL)
64#endif
60 65
61/* 66/*
62 * this is where the system-wide overflow UID and GID are defined, for 67 * this is where the system-wide overflow UID and GID are defined, for
@@ -1860,23 +1865,20 @@ out:
1860 * fields when reaping, so a sample either gets all the additions of a 1865 * fields when reaping, so a sample either gets all the additions of a
1861 * given child after it's reaped, or none so this sample is before reaping. 1866 * given child after it's reaped, or none so this sample is before reaping.
1862 * 1867 *
1863 * tasklist_lock locking optimisation: 1868 * Locking:
1864 * If we are current and single threaded, we do not need to take the tasklist 1869 * We need to take the siglock for CHILDEREN, SELF and BOTH
1865 * lock or the siglock. No one else can take our signal_struct away, 1870 * for the cases current multithreaded, non-current single threaded
1866 * no one else can reap the children to update signal->c* counters, and 1871 * non-current multithreaded. Thread traversal is now safe with
1867 * no one else can race with the signal-> fields. 1872 * the siglock held.
1868 * If we do not take the tasklist_lock, the signal-> fields could be read 1873 * Strictly speaking, we donot need to take the siglock if we are current and
1869 * out of order while another thread was just exiting. So we place a 1874 * single threaded, as no one else can take our signal_struct away, no one
1870 * read memory barrier when we avoid the lock. On the writer side, 1875 * else can reap the children to update signal->c* counters, and no one else
1871 * write memory barrier is implied in __exit_signal as __exit_signal releases 1876 * can race with the signal-> fields. If we do not take any lock, the
1872 * the siglock spinlock after updating the signal-> fields. 1877 * signal-> fields could be read out of order while another thread was just
1873 * 1878 * exiting. So we should place a read memory barrier when we avoid the lock.
1874 * We don't really need the siglock when we access the non c* fields 1879 * On the writer side, write memory barrier is implied in __exit_signal
1875 * of the signal_struct (for RUSAGE_SELF) even in multithreaded 1880 * as __exit_signal releases the siglock spinlock after updating the signal->
1876 * case, since we take the tasklist lock for read and the non c* signal-> 1881 * fields. But we don't do this yet to keep things simple.
1877 * fields are updated only in __exit_signal, which is called with
1878 * tasklist_lock taken for write, hence these two threads cannot execute
1879 * concurrently.
1880 * 1882 *
1881 */ 1883 */
1882 1884
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1885 struct task_struct *t; 1887 struct task_struct *t;
1886 unsigned long flags; 1888 unsigned long flags;
1887 cputime_t utime, stime; 1889 cputime_t utime, stime;
1888 int need_lock = 0;
1889 1890
1890 memset((char *) r, 0, sizeof *r); 1891 memset((char *) r, 0, sizeof *r);
1891 utime = stime = cputime_zero; 1892 utime = stime = cputime_zero;
1892 1893
1893 if (p != current || !thread_group_empty(p)) 1894 rcu_read_lock();
1894 need_lock = 1; 1895 if (!lock_task_sighand(p, &flags)) {
1895 1896 rcu_read_unlock();
1896 if (need_lock) { 1897 return;
1897 read_lock(&tasklist_lock); 1898 }
1898 if (unlikely(!p->signal)) {
1899 read_unlock(&tasklist_lock);
1900 return;
1901 }
1902 } else
1903 /* See locking comments above */
1904 smp_rmb();
1905 1899
1906 switch (who) { 1900 switch (who) {
1907 case RUSAGE_BOTH: 1901 case RUSAGE_BOTH:
1908 case RUSAGE_CHILDREN: 1902 case RUSAGE_CHILDREN:
1909 spin_lock_irqsave(&p->sighand->siglock, flags);
1910 utime = p->signal->cutime; 1903 utime = p->signal->cutime;
1911 stime = p->signal->cstime; 1904 stime = p->signal->cstime;
1912 r->ru_nvcsw = p->signal->cnvcsw; 1905 r->ru_nvcsw = p->signal->cnvcsw;
1913 r->ru_nivcsw = p->signal->cnivcsw; 1906 r->ru_nivcsw = p->signal->cnivcsw;
1914 r->ru_minflt = p->signal->cmin_flt; 1907 r->ru_minflt = p->signal->cmin_flt;
1915 r->ru_majflt = p->signal->cmaj_flt; 1908 r->ru_majflt = p->signal->cmaj_flt;
1916 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1917 1909
1918 if (who == RUSAGE_CHILDREN) 1910 if (who == RUSAGE_CHILDREN)
1919 break; 1911 break;
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1941 BUG(); 1933 BUG();
1942 } 1934 }
1943 1935
1944 if (need_lock) 1936 unlock_task_sighand(p, &flags);
1945 read_unlock(&tasklist_lock); 1937 rcu_read_unlock();
1938
1946 cputime_to_timeval(utime, &r->ru_utime); 1939 cputime_to_timeval(utime, &r->ru_utime);
1947 cputime_to_timeval(stime, &r->ru_stime); 1940 cputime_to_timeval(stime, &r->ru_stime);
1948} 1941}
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2057 return -EFAULT; 2050 return -EFAULT;
2058 return 0; 2051 return 0;
2059 } 2052 }
2053 case PR_GET_ENDIAN:
2054 error = GET_ENDIAN(current, arg2);
2055 break;
2056 case PR_SET_ENDIAN:
2057 error = SET_ENDIAN(current, arg2);
2058 break;
2059
2060 default: 2060 default:
2061 error = -EINVAL; 2061 error = -EINVAL;
2062 break; 2062 break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5433195040f1..6991bece67e8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init);
87cond_syscall(sys_inotify_add_watch); 87cond_syscall(sys_inotify_add_watch);
88cond_syscall(sys_inotify_rm_watch); 88cond_syscall(sys_inotify_rm_watch);
89cond_syscall(sys_migrate_pages); 89cond_syscall(sys_migrate_pages);
90cond_syscall(sys_move_pages);
90cond_syscall(sys_chown16); 91cond_syscall(sys_chown16);
91cond_syscall(sys_fchown16); 92cond_syscall(sys_fchown16);
92cond_syscall(sys_getegid16); 93cond_syscall(sys_getegid16);
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore);
132cond_syscall(sys_madvise); 133cond_syscall(sys_madvise);
133cond_syscall(sys_mremap); 134cond_syscall(sys_mremap);
134cond_syscall(sys_remap_file_pages); 135cond_syscall(sys_remap_file_pages);
136cond_syscall(compat_sys_move_pages);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d656e61621d..eb8bd214e7d7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
59extern int C_A_D; 59extern int C_A_D;
60extern int sysctl_overcommit_memory; 60extern int sysctl_overcommit_memory;
61extern int sysctl_overcommit_ratio; 61extern int sysctl_overcommit_ratio;
62extern int sysctl_panic_on_oom;
62extern int max_threads; 63extern int max_threads;
63extern int sysrq_enabled; 64extern int sysrq_enabled;
64extern int core_uses_pid; 65extern int core_uses_pid;
@@ -398,7 +399,7 @@ static ctl_table kern_table[] = {
398 .strategy = &sysctl_string, 399 .strategy = &sysctl_string,
399 }, 400 },
400#endif 401#endif
401#ifdef CONFIG_HOTPLUG 402#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
402 { 403 {
403 .ctl_name = KERN_HOTPLUG, 404 .ctl_name = KERN_HOTPLUG,
404 .procname = "hotplug", 405 .procname = "hotplug",
@@ -702,6 +703,14 @@ static ctl_table vm_table[] = {
702 .proc_handler = &proc_dointvec, 703 .proc_handler = &proc_dointvec,
703 }, 704 },
704 { 705 {
706 .ctl_name = VM_PANIC_ON_OOM,
707 .procname = "panic_on_oom",
708 .data = &sysctl_panic_on_oom,
709 .maxlen = sizeof(sysctl_panic_on_oom),
710 .mode = 0644,
711 .proc_handler = &proc_dointvec,
712 },
713 {
705 .ctl_name = VM_OVERCOMMIT_RATIO, 714 .ctl_name = VM_OVERCOMMIT_RATIO,
706 .procname = "overcommit_ratio", 715 .procname = "overcommit_ratio",
707 .data = &sysctl_overcommit_ratio, 716 .data = &sysctl_overcommit_ratio,
diff --git a/kernel/timer.c b/kernel/timer.c
index 9e49deed468c..f35b3939e937 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync);
383static int cascade(tvec_base_t *base, tvec_t *tv, int index) 383static int cascade(tvec_base_t *base, tvec_t *tv, int index)
384{ 384{
385 /* cascade all the timers from tv up one level */ 385 /* cascade all the timers from tv up one level */
386 struct list_head *head, *curr; 386 struct timer_list *timer, *tmp;
387 struct list_head tv_list;
388
389 list_replace_init(tv->vec + index, &tv_list);
387 390
388 head = tv->vec + index;
389 curr = head->next;
390 /* 391 /*
391 * We are removing _all_ timers from the list, so we don't have to 392 * We are removing _all_ timers from the list, so we
392 * detach them individually, just clear the list afterwards. 393 * don't have to detach them individually.
393 */ 394 */
394 while (curr != head) { 395 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
395 struct timer_list *tmp; 396 BUG_ON(timer->base != base);
396 397 internal_add_timer(base, timer);
397 tmp = list_entry(curr, struct timer_list, entry);
398 BUG_ON(tmp->base != base);
399 curr = curr->next;
400 internal_add_timer(base, tmp);
401 } 398 }
402 INIT_LIST_HEAD(head);
403 399
404 return index; 400 return index;
405} 401}
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base)
419 415
420 spin_lock_irq(&base->lock); 416 spin_lock_irq(&base->lock);
421 while (time_after_eq(jiffies, base->timer_jiffies)) { 417 while (time_after_eq(jiffies, base->timer_jiffies)) {
422 struct list_head work_list = LIST_HEAD_INIT(work_list); 418 struct list_head work_list;
423 struct list_head *head = &work_list; 419 struct list_head *head = &work_list;
424 int index = base->timer_jiffies & TVR_MASK; 420 int index = base->timer_jiffies & TVR_MASK;
425 421
426 /* 422 /*
427 * Cascade timers: 423 * Cascade timers:
428 */ 424 */
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base)
431 (!cascade(base, &base->tv3, INDEX(1))) && 427 (!cascade(base, &base->tv3, INDEX(1))) &&
432 !cascade(base, &base->tv4, INDEX(2))) 428 !cascade(base, &base->tv4, INDEX(2)))
433 cascade(base, &base->tv5, INDEX(3)); 429 cascade(base, &base->tv5, INDEX(3));
434 ++base->timer_jiffies; 430 ++base->timer_jiffies;
435 list_splice_init(base->tv1.vec + index, &work_list); 431 list_replace_init(base->tv1.vec + index, &work_list);
436 while (!list_empty(head)) { 432 while (!list_empty(head)) {
437 void (*fn)(unsigned long); 433 void (*fn)(unsigned long);
438 unsigned long data; 434 unsigned long data;
diff --git a/kernel/user.c b/kernel/user.c
index 4b1eb745afa1..6408c0424291 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid)
148 new->mq_bytes = 0; 148 new->mq_bytes = 0;
149 new->locked_shm = 0; 149 new->locked_shm = 0;
150 150
151 if (alloc_uid_keyring(new) < 0) { 151 if (alloc_uid_keyring(new, current) < 0) {
152 kmem_cache_free(uid_cachep, new); 152 kmem_cache_free(uid_cachep, new);
153 return NULL; 153 return NULL;
154 } 154 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 880fb415a8f6..740c5abceb07 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -531,11 +531,11 @@ int current_is_keventd(void)
531static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) 531static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
532{ 532{
533 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); 533 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
534 LIST_HEAD(list); 534 struct list_head list;
535 struct work_struct *work; 535 struct work_struct *work;
536 536
537 spin_lock_irq(&cwq->lock); 537 spin_lock_irq(&cwq->lock);
538 list_splice_init(&cwq->worklist, &list); 538 list_replace_init(&cwq->worklist, &list);
539 539
540 while (!list_empty(&list)) { 540 while (!list_empty(&list)) {
541 printk("Taking work for %s\n", wq->name); 541 printk("Taking work for %s\n", wq->name);