diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 92 | ||||
-rw-r--r-- | kernel/cpu.c | 33 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/futex.c | 5 | ||||
-rw-r--r-- | kernel/irq/manage.c | 1 | ||||
-rw-r--r-- | kernel/kprobes.c | 134 | ||||
-rw-r--r-- | kernel/module.c | 1 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 6 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/power.h | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 100 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 251 | ||||
-rw-r--r-- | kernel/printk.c | 1 | ||||
-rw-r--r-- | kernel/ptrace.c | 84 | ||||
-rw-r--r-- | kernel/sched.c | 167 | ||||
-rw-r--r-- | kernel/softirq.c | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 6 | ||||
-rw-r--r-- | kernel/sys.c | 26 | ||||
-rw-r--r-- | kernel/sysctl.c | 141 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
21 files changed, 676 insertions, 396 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 2e3f4a47e7..6312d6bd43 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/jiffies.h> | 54 | #include <linux/jiffies.h> |
55 | #include <linux/times.h> | 55 | #include <linux/times.h> |
56 | #include <linux/syscalls.h> | 56 | #include <linux/syscalls.h> |
57 | #include <linux/mount.h> | ||
57 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
58 | #include <asm/div64.h> | 59 | #include <asm/div64.h> |
59 | #include <linux/blkdev.h> /* sector_div */ | 60 | #include <linux/blkdev.h> /* sector_div */ |
@@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file) | |||
192 | add_timer(&acct_globals.timer); | 193 | add_timer(&acct_globals.timer); |
193 | } | 194 | } |
194 | if (old_acct) { | 195 | if (old_acct) { |
196 | mnt_unpin(old_acct->f_vfsmnt); | ||
195 | spin_unlock(&acct_globals.lock); | 197 | spin_unlock(&acct_globals.lock); |
196 | do_acct_process(0, old_acct); | 198 | do_acct_process(0, old_acct); |
197 | filp_close(old_acct, NULL); | 199 | filp_close(old_acct, NULL); |
@@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file) | |||
199 | } | 201 | } |
200 | } | 202 | } |
201 | 203 | ||
204 | static int acct_on(char *name) | ||
205 | { | ||
206 | struct file *file; | ||
207 | int error; | ||
208 | |||
209 | /* Difference from BSD - they don't do O_APPEND */ | ||
210 | file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | ||
211 | if (IS_ERR(file)) | ||
212 | return PTR_ERR(file); | ||
213 | |||
214 | if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { | ||
215 | filp_close(file, NULL); | ||
216 | return -EACCES; | ||
217 | } | ||
218 | |||
219 | if (!file->f_op->write) { | ||
220 | filp_close(file, NULL); | ||
221 | return -EIO; | ||
222 | } | ||
223 | |||
224 | error = security_acct(file); | ||
225 | if (error) { | ||
226 | filp_close(file, NULL); | ||
227 | return error; | ||
228 | } | ||
229 | |||
230 | spin_lock(&acct_globals.lock); | ||
231 | mnt_pin(file->f_vfsmnt); | ||
232 | acct_file_reopen(file); | ||
233 | spin_unlock(&acct_globals.lock); | ||
234 | |||
235 | mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ | ||
236 | |||
237 | return 0; | ||
238 | } | ||
239 | |||
202 | /** | 240 | /** |
203 | * sys_acct - enable/disable process accounting | 241 | * sys_acct - enable/disable process accounting |
204 | * @name: file name for accounting records or NULL to shutdown accounting | 242 | * @name: file name for accounting records or NULL to shutdown accounting |
@@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file) | |||
212 | */ | 250 | */ |
213 | asmlinkage long sys_acct(const char __user *name) | 251 | asmlinkage long sys_acct(const char __user *name) |
214 | { | 252 | { |
215 | struct file *file = NULL; | ||
216 | char *tmp; | ||
217 | int error; | 253 | int error; |
218 | 254 | ||
219 | if (!capable(CAP_SYS_PACCT)) | 255 | if (!capable(CAP_SYS_PACCT)) |
220 | return -EPERM; | 256 | return -EPERM; |
221 | 257 | ||
222 | if (name) { | 258 | if (name) { |
223 | tmp = getname(name); | 259 | char *tmp = getname(name); |
224 | if (IS_ERR(tmp)) { | 260 | if (IS_ERR(tmp)) |
225 | return (PTR_ERR(tmp)); | 261 | return (PTR_ERR(tmp)); |
226 | } | 262 | error = acct_on(tmp); |
227 | /* Difference from BSD - they don't do O_APPEND */ | ||
228 | file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | ||
229 | putname(tmp); | 263 | putname(tmp); |
230 | if (IS_ERR(file)) { | 264 | } else { |
231 | return (PTR_ERR(file)); | 265 | error = security_acct(NULL); |
232 | } | 266 | if (!error) { |
233 | if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { | 267 | spin_lock(&acct_globals.lock); |
234 | filp_close(file, NULL); | 268 | acct_file_reopen(NULL); |
235 | return (-EACCES); | 269 | spin_unlock(&acct_globals.lock); |
236 | } | ||
237 | |||
238 | if (!file->f_op->write) { | ||
239 | filp_close(file, NULL); | ||
240 | return (-EIO); | ||
241 | } | 270 | } |
242 | } | 271 | } |
272 | return error; | ||
273 | } | ||
243 | 274 | ||
244 | error = security_acct(file); | 275 | /** |
245 | if (error) { | 276 | * acct_auto_close - turn off a filesystem's accounting if it is on |
246 | if (file) | 277 | * @m: vfsmount being shut down |
247 | filp_close(file, NULL); | 278 | * |
248 | return error; | 279 | * If the accounting is turned on for a file in the subtree pointed to |
249 | } | 280 | * to by m, turn accounting off. Done when m is about to die. |
250 | 281 | */ | |
282 | void acct_auto_close_mnt(struct vfsmount *m) | ||
283 | { | ||
251 | spin_lock(&acct_globals.lock); | 284 | spin_lock(&acct_globals.lock); |
252 | acct_file_reopen(file); | 285 | if (acct_globals.file && acct_globals.file->f_vfsmnt == m) |
286 | acct_file_reopen(NULL); | ||
253 | spin_unlock(&acct_globals.lock); | 287 | spin_unlock(&acct_globals.lock); |
254 | |||
255 | return (0); | ||
256 | } | 288 | } |
257 | 289 | ||
258 | /** | 290 | /** |
@@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb) | |||
266 | { | 298 | { |
267 | spin_lock(&acct_globals.lock); | 299 | spin_lock(&acct_globals.lock); |
268 | if (acct_globals.file && | 300 | if (acct_globals.file && |
269 | acct_globals.file->f_dentry->d_inode->i_sb == sb) { | 301 | acct_globals.file->f_vfsmnt->mnt_sb == sb) { |
270 | acct_file_reopen((struct file *)NULL); | 302 | acct_file_reopen(NULL); |
271 | } | 303 | } |
272 | spin_unlock(&acct_globals.lock); | 304 | spin_unlock(&acct_globals.lock); |
273 | } | 305 | } |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 3619e93918..d61ba88f34 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -21,6 +21,24 @@ EXPORT_SYMBOL_GPL(cpucontrol); | |||
21 | 21 | ||
22 | static struct notifier_block *cpu_chain; | 22 | static struct notifier_block *cpu_chain; |
23 | 23 | ||
24 | /* | ||
25 | * Used to check by callers if they need to acquire the cpucontrol | ||
26 | * or not to protect a cpu from being removed. Its sometimes required to | ||
27 | * call these functions both for normal operations, and in response to | ||
28 | * a cpu being added/removed. If the context of the call is in the same | ||
29 | * thread context as a CPU hotplug thread, we dont need to take the lock | ||
30 | * since its already protected | ||
31 | * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj | ||
32 | */ | ||
33 | |||
34 | int current_in_cpu_hotplug(void) | ||
35 | { | ||
36 | return (current->flags & PF_HOTPLUG_CPU); | ||
37 | } | ||
38 | |||
39 | EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); | ||
40 | |||
41 | |||
24 | /* Need to know about CPUs going up/down? */ | 42 | /* Need to know about CPUs going up/down? */ |
25 | int register_cpu_notifier(struct notifier_block *nb) | 43 | int register_cpu_notifier(struct notifier_block *nb) |
26 | { | 44 | { |
@@ -94,6 +112,13 @@ int cpu_down(unsigned int cpu) | |||
94 | goto out; | 112 | goto out; |
95 | } | 113 | } |
96 | 114 | ||
115 | /* | ||
116 | * Leave a trace in current->flags indicating we are already in | ||
117 | * process of performing CPU hotplug. Callers can check if cpucontrol | ||
118 | * is already acquired by current thread, and if so not cause | ||
119 | * a dead lock by not acquiring the lock | ||
120 | */ | ||
121 | current->flags |= PF_HOTPLUG_CPU; | ||
97 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 122 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, |
98 | (void *)(long)cpu); | 123 | (void *)(long)cpu); |
99 | if (err == NOTIFY_BAD) { | 124 | if (err == NOTIFY_BAD) { |
@@ -146,6 +171,7 @@ out_thread: | |||
146 | out_allowed: | 171 | out_allowed: |
147 | set_cpus_allowed(current, old_allowed); | 172 | set_cpus_allowed(current, old_allowed); |
148 | out: | 173 | out: |
174 | current->flags &= ~PF_HOTPLUG_CPU; | ||
149 | unlock_cpu_hotplug(); | 175 | unlock_cpu_hotplug(); |
150 | return err; | 176 | return err; |
151 | } | 177 | } |
@@ -163,6 +189,12 @@ int __devinit cpu_up(unsigned int cpu) | |||
163 | ret = -EINVAL; | 189 | ret = -EINVAL; |
164 | goto out; | 190 | goto out; |
165 | } | 191 | } |
192 | |||
193 | /* | ||
194 | * Leave a trace in current->flags indicating we are already in | ||
195 | * process of performing CPU hotplug. | ||
196 | */ | ||
197 | current->flags |= PF_HOTPLUG_CPU; | ||
166 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 198 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); |
167 | if (ret == NOTIFY_BAD) { | 199 | if (ret == NOTIFY_BAD) { |
168 | printk("%s: attempt to bring up CPU %u failed\n", | 200 | printk("%s: attempt to bring up CPU %u failed\n", |
@@ -185,6 +217,7 @@ out_notify: | |||
185 | if (ret != 0) | 217 | if (ret != 0) |
186 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); | 218 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); |
187 | out: | 219 | out: |
220 | current->flags &= ~PF_HOTPLUG_CPU; | ||
188 | up(&cpucontrol); | 221 | up(&cpucontrol); |
189 | return ret; | 222 | return ret; |
190 | } | 223 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 537394b25e..452a1d1161 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/cpuset.h> | 28 | #include <linux/cpuset.h> |
29 | #include <linux/syscalls.h> | 29 | #include <linux/syscalls.h> |
30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
31 | #include <linux/cn_proc.h> | ||
31 | 32 | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/unistd.h> | 34 | #include <asm/unistd.h> |
@@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
863 | module_put(tsk->binfmt->module); | 864 | module_put(tsk->binfmt->module); |
864 | 865 | ||
865 | tsk->exit_code = code; | 866 | tsk->exit_code = code; |
867 | proc_exit_connector(tsk); | ||
866 | exit_notify(tsk); | 868 | exit_notify(tsk); |
867 | #ifdef CONFIG_NUMA | 869 | #ifdef CONFIG_NUMA |
868 | mpol_free(tsk->mempolicy); | 870 | mpol_free(tsk->mempolicy); |
diff --git a/kernel/fork.c b/kernel/fork.c index 8a069612ea..158710d225 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/profile.h> | 42 | #include <linux/profile.h> |
43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
45 | #include <linux/cn_proc.h> | ||
45 | 46 | ||
46 | #include <asm/pgtable.h> | 47 | #include <asm/pgtable.h> |
47 | #include <asm/pgalloc.h> | 48 | #include <asm/pgalloc.h> |
@@ -469,13 +470,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
469 | if (clone_flags & CLONE_VM) { | 470 | if (clone_flags & CLONE_VM) { |
470 | atomic_inc(&oldmm->mm_users); | 471 | atomic_inc(&oldmm->mm_users); |
471 | mm = oldmm; | 472 | mm = oldmm; |
472 | /* | ||
473 | * There are cases where the PTL is held to ensure no | ||
474 | * new threads start up in user mode using an mm, which | ||
475 | * allows optimizing out ipis; the tlb_gather_mmu code | ||
476 | * is an example. | ||
477 | */ | ||
478 | spin_unlock_wait(&oldmm->page_table_lock); | ||
479 | goto good_mm; | 473 | goto good_mm; |
480 | } | 474 | } |
481 | 475 | ||
@@ -1143,6 +1137,7 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1143 | __get_cpu_var(process_counts)++; | 1137 | __get_cpu_var(process_counts)++; |
1144 | } | 1138 | } |
1145 | 1139 | ||
1140 | proc_fork_connector(p); | ||
1146 | if (!current->signal->tty && p->signal->tty) | 1141 | if (!current->signal->tty && p->signal->tty) |
1147 | p->signal->tty = NULL; | 1142 | p->signal->tty = NULL; |
1148 | 1143 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 3b4d5ad44c..aca8d10704 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -365,6 +365,11 @@ retry: | |||
365 | if (bh1 != bh2) | 365 | if (bh1 != bh2) |
366 | spin_unlock(&bh2->lock); | 366 | spin_unlock(&bh2->lock); |
367 | 367 | ||
368 | if (unlikely(op_ret != -EFAULT)) { | ||
369 | ret = op_ret; | ||
370 | goto out; | ||
371 | } | ||
372 | |||
368 | /* futex_atomic_op_inuser needs to both read and write | 373 | /* futex_atomic_op_inuser needs to both read and write |
369 | * *(int __user *)uaddr2, but we can't modify it | 374 | * *(int __user *)uaddr2, but we can't modify it |
370 | * non-atomically. Therefore, if get_user below is not | 375 | * non-atomically. Therefore, if get_user below is not |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1cfdb08ddf..3bd7226d15 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; | |||
24 | 24 | ||
25 | /** | 25 | /** |
26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
27 | * @irq: interrupt number to wait for | ||
27 | * | 28 | * |
28 | * This function waits for any pending IRQ handlers for this interrupt | 29 | * This function waits for any pending IRQ handlers for this interrupt |
29 | * to complete before returning. If you use this function while | 30 | * to complete before returning. If you use this function while |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ce4915dd68..5beda378cc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -32,7 +32,6 @@ | |||
32 | * <prasanna@in.ibm.com> added function-return probes. | 32 | * <prasanna@in.ibm.com> added function-return probes. |
33 | */ | 33 | */ |
34 | #include <linux/kprobes.h> | 34 | #include <linux/kprobes.h> |
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/hash.h> | 35 | #include <linux/hash.h> |
37 | #include <linux/init.h> | 36 | #include <linux/init.h> |
38 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
@@ -49,9 +48,9 @@ | |||
49 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
50 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
51 | 50 | ||
52 | unsigned int kprobe_cpu = NR_CPUS; | 51 | static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ |
53 | static DEFINE_SPINLOCK(kprobe_lock); | 52 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
54 | static struct kprobe *curr_kprobe; | 53 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
55 | 54 | ||
56 | /* | 55 | /* |
57 | * kprobe->ainsn.insn points to the copy of the instruction to be | 56 | * kprobe->ainsn.insn points to the copy of the instruction to be |
@@ -153,50 +152,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) | |||
153 | } | 152 | } |
154 | } | 153 | } |
155 | 154 | ||
156 | /* Locks kprobe: irqs must be disabled */ | 155 | /* We have preemption disabled.. so it is safe to use __ versions */ |
157 | void __kprobes lock_kprobes(void) | 156 | static inline void set_kprobe_instance(struct kprobe *kp) |
158 | { | 157 | { |
159 | unsigned long flags = 0; | 158 | __get_cpu_var(kprobe_instance) = kp; |
160 | |||
161 | /* Avoiding local interrupts to happen right after we take the kprobe_lock | ||
162 | * and before we get a chance to update kprobe_cpu, this to prevent | ||
163 | * deadlock when we have a kprobe on ISR routine and a kprobe on task | ||
164 | * routine | ||
165 | */ | ||
166 | local_irq_save(flags); | ||
167 | |||
168 | spin_lock(&kprobe_lock); | ||
169 | kprobe_cpu = smp_processor_id(); | ||
170 | |||
171 | local_irq_restore(flags); | ||
172 | } | 159 | } |
173 | 160 | ||
174 | void __kprobes unlock_kprobes(void) | 161 | static inline void reset_kprobe_instance(void) |
175 | { | 162 | { |
176 | unsigned long flags = 0; | 163 | __get_cpu_var(kprobe_instance) = NULL; |
177 | |||
178 | /* Avoiding local interrupts to happen right after we update | ||
179 | * kprobe_cpu and before we get a a chance to release kprobe_lock, | ||
180 | * this to prevent deadlock when we have a kprobe on ISR routine and | ||
181 | * a kprobe on task routine | ||
182 | */ | ||
183 | local_irq_save(flags); | ||
184 | |||
185 | kprobe_cpu = NR_CPUS; | ||
186 | spin_unlock(&kprobe_lock); | ||
187 | |||
188 | local_irq_restore(flags); | ||
189 | } | 164 | } |
190 | 165 | ||
191 | /* You have to be holding the kprobe_lock */ | 166 | /* |
167 | * This routine is called either: | ||
168 | * - under the kprobe_lock spinlock - during kprobe_[un]register() | ||
169 | * OR | ||
170 | * - with preemption disabled - from arch/xxx/kernel/kprobes.c | ||
171 | */ | ||
192 | struct kprobe __kprobes *get_kprobe(void *addr) | 172 | struct kprobe __kprobes *get_kprobe(void *addr) |
193 | { | 173 | { |
194 | struct hlist_head *head; | 174 | struct hlist_head *head; |
195 | struct hlist_node *node; | 175 | struct hlist_node *node; |
176 | struct kprobe *p; | ||
196 | 177 | ||
197 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; | 178 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; |
198 | hlist_for_each(node, head) { | 179 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
199 | struct kprobe *p = hlist_entry(node, struct kprobe, hlist); | ||
200 | if (p->addr == addr) | 180 | if (p->addr == addr) |
201 | return p; | 181 | return p; |
202 | } | 182 | } |
@@ -211,13 +191,13 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
211 | { | 191 | { |
212 | struct kprobe *kp; | 192 | struct kprobe *kp; |
213 | 193 | ||
214 | list_for_each_entry(kp, &p->list, list) { | 194 | list_for_each_entry_rcu(kp, &p->list, list) { |
215 | if (kp->pre_handler) { | 195 | if (kp->pre_handler) { |
216 | curr_kprobe = kp; | 196 | set_kprobe_instance(kp); |
217 | if (kp->pre_handler(kp, regs)) | 197 | if (kp->pre_handler(kp, regs)) |
218 | return 1; | 198 | return 1; |
219 | } | 199 | } |
220 | curr_kprobe = NULL; | 200 | reset_kprobe_instance(); |
221 | } | 201 | } |
222 | return 0; | 202 | return 0; |
223 | } | 203 | } |
@@ -227,11 +207,11 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
227 | { | 207 | { |
228 | struct kprobe *kp; | 208 | struct kprobe *kp; |
229 | 209 | ||
230 | list_for_each_entry(kp, &p->list, list) { | 210 | list_for_each_entry_rcu(kp, &p->list, list) { |
231 | if (kp->post_handler) { | 211 | if (kp->post_handler) { |
232 | curr_kprobe = kp; | 212 | set_kprobe_instance(kp); |
233 | kp->post_handler(kp, regs, flags); | 213 | kp->post_handler(kp, regs, flags); |
234 | curr_kprobe = NULL; | 214 | reset_kprobe_instance(); |
235 | } | 215 | } |
236 | } | 216 | } |
237 | return; | 217 | return; |
@@ -240,12 +220,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
240 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | 220 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
241 | int trapnr) | 221 | int trapnr) |
242 | { | 222 | { |
223 | struct kprobe *cur = __get_cpu_var(kprobe_instance); | ||
224 | |||
243 | /* | 225 | /* |
244 | * if we faulted "during" the execution of a user specified | 226 | * if we faulted "during" the execution of a user specified |
245 | * probe handler, invoke just that probe's fault handler | 227 | * probe handler, invoke just that probe's fault handler |
246 | */ | 228 | */ |
247 | if (curr_kprobe && curr_kprobe->fault_handler) { | 229 | if (cur && cur->fault_handler) { |
248 | if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) | 230 | if (cur->fault_handler(cur, regs, trapnr)) |
249 | return 1; | 231 | return 1; |
250 | } | 232 | } |
251 | return 0; | 233 | return 0; |
@@ -253,17 +235,18 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | |||
253 | 235 | ||
254 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | 236 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) |
255 | { | 237 | { |
256 | struct kprobe *kp = curr_kprobe; | 238 | struct kprobe *cur = __get_cpu_var(kprobe_instance); |
257 | if (curr_kprobe && kp->break_handler) { | 239 | int ret = 0; |
258 | if (kp->break_handler(kp, regs)) { | 240 | |
259 | curr_kprobe = NULL; | 241 | if (cur && cur->break_handler) { |
260 | return 1; | 242 | if (cur->break_handler(cur, regs)) |
261 | } | 243 | ret = 1; |
262 | } | 244 | } |
263 | curr_kprobe = NULL; | 245 | reset_kprobe_instance(); |
264 | return 0; | 246 | return ret; |
265 | } | 247 | } |
266 | 248 | ||
249 | /* Called with kretprobe_lock held */ | ||
267 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) | 250 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) |
268 | { | 251 | { |
269 | struct hlist_node *node; | 252 | struct hlist_node *node; |
@@ -273,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) | |||
273 | return NULL; | 256 | return NULL; |
274 | } | 257 | } |
275 | 258 | ||
259 | /* Called with kretprobe_lock held */ | ||
276 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe | 260 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe |
277 | *rp) | 261 | *rp) |
278 | { | 262 | { |
@@ -283,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe | |||
283 | return NULL; | 267 | return NULL; |
284 | } | 268 | } |
285 | 269 | ||
270 | /* Called with kretprobe_lock held */ | ||
286 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) | 271 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) |
287 | { | 272 | { |
288 | /* | 273 | /* |
@@ -301,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) | |||
301 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); | 286 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); |
302 | } | 287 | } |
303 | 288 | ||
289 | /* Called with kretprobe_lock held */ | ||
304 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) | 290 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) |
305 | { | 291 | { |
306 | /* remove rp inst off the rprobe_inst_table */ | 292 | /* remove rp inst off the rprobe_inst_table */ |
@@ -334,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
334 | struct hlist_node *node, *tmp; | 320 | struct hlist_node *node, *tmp; |
335 | unsigned long flags = 0; | 321 | unsigned long flags = 0; |
336 | 322 | ||
337 | spin_lock_irqsave(&kprobe_lock, flags); | 323 | spin_lock_irqsave(&kretprobe_lock, flags); |
338 | head = kretprobe_inst_table_head(current); | 324 | head = kretprobe_inst_table_head(current); |
339 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 325 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { |
340 | if (ri->task == tk) | 326 | if (ri->task == tk) |
341 | recycle_rp_inst(ri); | 327 | recycle_rp_inst(ri); |
342 | } | 328 | } |
343 | spin_unlock_irqrestore(&kprobe_lock, flags); | 329 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
344 | } | 330 | } |
345 | 331 | ||
346 | /* | 332 | /* |
@@ -351,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
351 | struct pt_regs *regs) | 337 | struct pt_regs *regs) |
352 | { | 338 | { |
353 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | 339 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); |
340 | unsigned long flags = 0; | ||
354 | 341 | ||
355 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | 342 | /*TODO: consider to only swap the RA after the last pre_handler fired */ |
343 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
356 | arch_prepare_kretprobe(rp, regs); | 344 | arch_prepare_kretprobe(rp, regs); |
345 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
357 | return 0; | 346 | return 0; |
358 | } | 347 | } |
359 | 348 | ||
@@ -384,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
384 | struct kprobe *kp; | 373 | struct kprobe *kp; |
385 | 374 | ||
386 | if (p->break_handler) { | 375 | if (p->break_handler) { |
387 | list_for_each_entry(kp, &old_p->list, list) { | 376 | list_for_each_entry_rcu(kp, &old_p->list, list) { |
388 | if (kp->break_handler) | 377 | if (kp->break_handler) |
389 | return -EEXIST; | 378 | return -EEXIST; |
390 | } | 379 | } |
391 | list_add_tail(&p->list, &old_p->list); | 380 | list_add_tail_rcu(&p->list, &old_p->list); |
392 | } else | 381 | } else |
393 | list_add(&p->list, &old_p->list); | 382 | list_add_rcu(&p->list, &old_p->list); |
394 | return 0; | 383 | return 0; |
395 | } | 384 | } |
396 | 385 | ||
@@ -408,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
408 | ap->break_handler = aggr_break_handler; | 397 | ap->break_handler = aggr_break_handler; |
409 | 398 | ||
410 | INIT_LIST_HEAD(&ap->list); | 399 | INIT_LIST_HEAD(&ap->list); |
411 | list_add(&p->list, &ap->list); | 400 | list_add_rcu(&p->list, &ap->list); |
412 | 401 | ||
413 | INIT_HLIST_NODE(&ap->hlist); | 402 | INIT_HLIST_NODE(&ap->hlist); |
414 | hlist_del(&p->hlist); | 403 | hlist_del_rcu(&p->hlist); |
415 | hlist_add_head(&ap->hlist, | 404 | hlist_add_head_rcu(&ap->hlist, |
416 | &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); | 405 | &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); |
417 | } | 406 | } |
418 | 407 | ||
419 | /* | 408 | /* |
420 | * This is the second or subsequent kprobe at the address - handle | 409 | * This is the second or subsequent kprobe at the address - handle |
421 | * the intricacies | 410 | * the intricacies |
422 | * TODO: Move kcalloc outside the spinlock | 411 | * TODO: Move kcalloc outside the spin_lock |
423 | */ | 412 | */ |
424 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | 413 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, |
425 | struct kprobe *p) | 414 | struct kprobe *p) |
@@ -445,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
445 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) | 434 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) |
446 | { | 435 | { |
447 | arch_disarm_kprobe(p); | 436 | arch_disarm_kprobe(p); |
448 | hlist_del(&p->hlist); | 437 | hlist_del_rcu(&p->hlist); |
449 | spin_unlock_irqrestore(&kprobe_lock, flags); | 438 | spin_unlock_irqrestore(&kprobe_lock, flags); |
450 | arch_remove_kprobe(p); | 439 | arch_remove_kprobe(p); |
451 | } | 440 | } |
@@ -453,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) | |||
453 | static inline void cleanup_aggr_kprobe(struct kprobe *old_p, | 442 | static inline void cleanup_aggr_kprobe(struct kprobe *old_p, |
454 | struct kprobe *p, unsigned long flags) | 443 | struct kprobe *p, unsigned long flags) |
455 | { | 444 | { |
456 | list_del(&p->list); | 445 | list_del_rcu(&p->list); |
457 | if (list_empty(&old_p->list)) { | 446 | if (list_empty(&old_p->list)) |
458 | cleanup_kprobe(old_p, flags); | 447 | cleanup_kprobe(old_p, flags); |
459 | kfree(old_p); | 448 | else |
460 | } else | ||
461 | spin_unlock_irqrestore(&kprobe_lock, flags); | 449 | spin_unlock_irqrestore(&kprobe_lock, flags); |
462 | } | 450 | } |
463 | 451 | ||
@@ -480,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
480 | if ((ret = arch_prepare_kprobe(p)) != 0) | 468 | if ((ret = arch_prepare_kprobe(p)) != 0) |
481 | goto rm_kprobe; | 469 | goto rm_kprobe; |
482 | 470 | ||
471 | p->nmissed = 0; | ||
483 | spin_lock_irqsave(&kprobe_lock, flags); | 472 | spin_lock_irqsave(&kprobe_lock, flags); |
484 | old_p = get_kprobe(p->addr); | 473 | old_p = get_kprobe(p->addr); |
485 | p->nmissed = 0; | ||
486 | if (old_p) { | 474 | if (old_p) { |
487 | ret = register_aggr_kprobe(old_p, p); | 475 | ret = register_aggr_kprobe(old_p, p); |
488 | goto out; | 476 | goto out; |
@@ -490,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
490 | 478 | ||
491 | arch_copy_kprobe(p); | 479 | arch_copy_kprobe(p); |
492 | INIT_HLIST_NODE(&p->hlist); | 480 | INIT_HLIST_NODE(&p->hlist); |
493 | hlist_add_head(&p->hlist, | 481 | hlist_add_head_rcu(&p->hlist, |
494 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 482 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
495 | 483 | ||
496 | arch_arm_kprobe(p); | 484 | arch_arm_kprobe(p); |
@@ -511,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p) | |||
511 | spin_lock_irqsave(&kprobe_lock, flags); | 499 | spin_lock_irqsave(&kprobe_lock, flags); |
512 | old_p = get_kprobe(p->addr); | 500 | old_p = get_kprobe(p->addr); |
513 | if (old_p) { | 501 | if (old_p) { |
502 | /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ | ||
514 | if (old_p->pre_handler == aggr_pre_handler) | 503 | if (old_p->pre_handler == aggr_pre_handler) |
515 | cleanup_aggr_kprobe(old_p, p, flags); | 504 | cleanup_aggr_kprobe(old_p, p, flags); |
516 | else | 505 | else |
517 | cleanup_kprobe(p, flags); | 506 | cleanup_kprobe(p, flags); |
507 | |||
508 | synchronize_sched(); | ||
509 | if (old_p->pre_handler == aggr_pre_handler && | ||
510 | list_empty(&old_p->list)) | ||
511 | kfree(old_p); | ||
518 | } else | 512 | } else |
519 | spin_unlock_irqrestore(&kprobe_lock, flags); | 513 | spin_unlock_irqrestore(&kprobe_lock, flags); |
520 | } | 514 | } |
@@ -591,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) | |||
591 | 585 | ||
592 | unregister_kprobe(&rp->kp); | 586 | unregister_kprobe(&rp->kp); |
593 | /* No race here */ | 587 | /* No race here */ |
594 | spin_lock_irqsave(&kprobe_lock, flags); | 588 | spin_lock_irqsave(&kretprobe_lock, flags); |
595 | free_rp_inst(rp); | 589 | free_rp_inst(rp); |
596 | while ((ri = get_used_rp_inst(rp)) != NULL) { | 590 | while ((ri = get_used_rp_inst(rp)) != NULL) { |
597 | ri->rp = NULL; | 591 | ri->rp = NULL; |
598 | hlist_del(&ri->uflist); | 592 | hlist_del(&ri->uflist); |
599 | } | 593 | } |
600 | spin_unlock_irqrestore(&kprobe_lock, flags); | 594 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
601 | } | 595 | } |
602 | 596 | ||
603 | static int __init init_kprobes(void) | 597 | static int __init init_kprobes(void) |
diff --git a/kernel/module.c b/kernel/module.c index ff5c500ab6..2ea929d51a 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/stop_machine.h> | 37 | #include <linux/stop_machine.h> |
38 | #include <linux/device.h> | 38 | #include <linux/device.h> |
39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/sched.h> | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include <asm/semaphore.h> | 42 | #include <asm/semaphore.h> |
42 | #include <asm/cacheflush.h> | 43 | #include <asm/cacheflush.h> |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 91a8942649..84af54c39e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
497 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), | 497 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), |
498 | nthreads); | 498 | nthreads); |
499 | do { | 499 | do { |
500 | if (!unlikely(t->flags & PF_EXITING)) { | 500 | if (likely(!(t->flags & PF_EXITING))) { |
501 | ticks = cputime_add(prof_ticks(t), left); | 501 | ticks = cputime_add(prof_ticks(t), left); |
502 | if (cputime_eq(t->it_prof_expires, | 502 | if (cputime_eq(t->it_prof_expires, |
503 | cputime_zero) || | 503 | cputime_zero) || |
@@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
512 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), | 512 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), |
513 | nthreads); | 513 | nthreads); |
514 | do { | 514 | do { |
515 | if (!unlikely(t->flags & PF_EXITING)) { | 515 | if (likely(!(t->flags & PF_EXITING))) { |
516 | ticks = cputime_add(virt_ticks(t), left); | 516 | ticks = cputime_add(virt_ticks(t), left); |
517 | if (cputime_eq(t->it_virt_expires, | 517 | if (cputime_eq(t->it_virt_expires, |
518 | cputime_zero) || | 518 | cputime_zero) || |
@@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
527 | nsleft = expires.sched - val.sched; | 527 | nsleft = expires.sched - val.sched; |
528 | do_div(nsleft, nthreads); | 528 | do_div(nsleft, nthreads); |
529 | do { | 529 | do { |
530 | if (!unlikely(t->flags & PF_EXITING)) { | 530 | if (likely(!(t->flags & PF_EXITING))) { |
531 | ns = t->sched_time + nsleft; | 531 | ns = t->sched_time + nsleft; |
532 | if (t->it_sched_expires == 0 || | 532 | if (t->it_sched_expires == 0 || |
533 | t->it_sched_expires > ns) { | 533 | t->it_sched_expires > ns) { |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 18d7d693fb..6ee2cad530 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -167,7 +167,7 @@ static int enter_state(suspend_state_t state) | |||
167 | { | 167 | { |
168 | int error; | 168 | int error; |
169 | 169 | ||
170 | if (pm_ops->valid && !pm_ops->valid(state)) | 170 | if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) |
171 | return -ENODEV; | 171 | return -ENODEV; |
172 | if (down_trylock(&pm_sem)) | 172 | if (down_trylock(&pm_sem)) |
173 | return -EBUSY; | 173 | return -EBUSY; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index d4fd96a135..6c042b5ee1 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -65,8 +65,8 @@ extern suspend_pagedir_t *pagedir_save; | |||
65 | extern asmlinkage int swsusp_arch_suspend(void); | 65 | extern asmlinkage int swsusp_arch_suspend(void); |
66 | extern asmlinkage int swsusp_arch_resume(void); | 66 | extern asmlinkage int swsusp_arch_resume(void); |
67 | 67 | ||
68 | extern int restore_highmem(void); | 68 | extern void free_pagedir(struct pbe *pblist); |
69 | extern struct pbe * alloc_pagedir(unsigned nr_pages); | 69 | extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); |
70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); | 70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); |
71 | extern void swsusp_free(void); | 71 | extern void swsusp_free(void); |
72 | extern int enough_swap(unsigned nr_pages); | 72 | extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 42a6287043..4a6dbcefd3 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -88,8 +88,7 @@ static int save_highmem_zone(struct zone *zone) | |||
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | 91 | int save_highmem(void) | |
92 | static int save_highmem(void) | ||
93 | { | 92 | { |
94 | struct zone *zone; | 93 | struct zone *zone; |
95 | int res = 0; | 94 | int res = 0; |
@@ -120,11 +119,7 @@ int restore_highmem(void) | |||
120 | } | 119 | } |
121 | return 0; | 120 | return 0; |
122 | } | 121 | } |
123 | #else | 122 | #endif |
124 | static int save_highmem(void) { return 0; } | ||
125 | int restore_highmem(void) { return 0; } | ||
126 | #endif /* CONFIG_HIGHMEM */ | ||
127 | |||
128 | 123 | ||
129 | static int pfn_is_nosave(unsigned long pfn) | 124 | static int pfn_is_nosave(unsigned long pfn) |
130 | { | 125 | { |
@@ -168,9 +163,8 @@ static unsigned count_data_pages(void) | |||
168 | { | 163 | { |
169 | struct zone *zone; | 164 | struct zone *zone; |
170 | unsigned long zone_pfn; | 165 | unsigned long zone_pfn; |
171 | unsigned n; | 166 | unsigned int n = 0; |
172 | 167 | ||
173 | n = 0; | ||
174 | for_each_zone (zone) { | 168 | for_each_zone (zone) { |
175 | if (is_highmem(zone)) | 169 | if (is_highmem(zone)) |
176 | continue; | 170 | continue; |
@@ -217,7 +211,7 @@ static void copy_data_pages(struct pbe *pblist) | |||
217 | * free_pagedir - free pages allocated with alloc_pagedir() | 211 | * free_pagedir - free pages allocated with alloc_pagedir() |
218 | */ | 212 | */ |
219 | 213 | ||
220 | static void free_pagedir(struct pbe *pblist) | 214 | void free_pagedir(struct pbe *pblist) |
221 | { | 215 | { |
222 | struct pbe *pbe; | 216 | struct pbe *pbe; |
223 | 217 | ||
@@ -250,10 +244,10 @@ static inline void fill_pb_page(struct pbe *pbpage) | |||
250 | * of memory pages allocated with alloc_pagedir() | 244 | * of memory pages allocated with alloc_pagedir() |
251 | */ | 245 | */ |
252 | 246 | ||
253 | void create_pbe_list(struct pbe *pblist, unsigned nr_pages) | 247 | void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) |
254 | { | 248 | { |
255 | struct pbe *pbpage, *p; | 249 | struct pbe *pbpage, *p; |
256 | unsigned num = PBES_PER_PAGE; | 250 | unsigned int num = PBES_PER_PAGE; |
257 | 251 | ||
258 | for_each_pb_page (pbpage, pblist) { | 252 | for_each_pb_page (pbpage, pblist) { |
259 | if (num >= nr_pages) | 253 | if (num >= nr_pages) |
@@ -270,9 +264,30 @@ void create_pbe_list(struct pbe *pblist, unsigned nr_pages) | |||
270 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); | 264 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); |
271 | } | 265 | } |
272 | 266 | ||
273 | static void *alloc_image_page(void) | 267 | /** |
268 | * @safe_needed - on resume, for storing the PBE list and the image, | ||
269 | * we can only use memory pages that do not conflict with the pages | ||
270 | * which had been used before suspend. | ||
271 | * | ||
272 | * The unsafe pages are marked with the PG_nosave_free flag | ||
273 | * | ||
274 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
275 | * so that swsusp_free() can release them | ||
276 | */ | ||
277 | |||
278 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | ||
274 | { | 279 | { |
275 | void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | 280 | void *res; |
281 | |||
282 | if (safe_needed) | ||
283 | do { | ||
284 | res = (void *)get_zeroed_page(gfp_mask); | ||
285 | if (res && PageNosaveFree(virt_to_page(res))) | ||
286 | /* This is for swsusp_free() */ | ||
287 | SetPageNosave(virt_to_page(res)); | ||
288 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
289 | else | ||
290 | res = (void *)get_zeroed_page(gfp_mask); | ||
276 | if (res) { | 291 | if (res) { |
277 | SetPageNosave(virt_to_page(res)); | 292 | SetPageNosave(virt_to_page(res)); |
278 | SetPageNosaveFree(virt_to_page(res)); | 293 | SetPageNosaveFree(virt_to_page(res)); |
@@ -280,6 +295,11 @@ static void *alloc_image_page(void) | |||
280 | return res; | 295 | return res; |
281 | } | 296 | } |
282 | 297 | ||
298 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
299 | { | ||
300 | return (unsigned long)alloc_image_page(gfp_mask, 1); | ||
301 | } | ||
302 | |||
283 | /** | 303 | /** |
284 | * alloc_pagedir - Allocate the page directory. | 304 | * alloc_pagedir - Allocate the page directory. |
285 | * | 305 | * |
@@ -293,21 +313,21 @@ static void *alloc_image_page(void) | |||
293 | * On each page we set up a list of struct_pbe elements. | 313 | * On each page we set up a list of struct_pbe elements. |
294 | */ | 314 | */ |
295 | 315 | ||
296 | struct pbe *alloc_pagedir(unsigned nr_pages) | 316 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) |
297 | { | 317 | { |
298 | unsigned num; | 318 | unsigned int num; |
299 | struct pbe *pblist, *pbe; | 319 | struct pbe *pblist, *pbe; |
300 | 320 | ||
301 | if (!nr_pages) | 321 | if (!nr_pages) |
302 | return NULL; | 322 | return NULL; |
303 | 323 | ||
304 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); | 324 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); |
305 | pblist = alloc_image_page(); | 325 | pblist = alloc_image_page(gfp_mask, safe_needed); |
306 | /* FIXME: rewrite this ugly loop */ | 326 | /* FIXME: rewrite this ugly loop */ |
307 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | 327 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; |
308 | pbe = pbe->next, num += PBES_PER_PAGE) { | 328 | pbe = pbe->next, num += PBES_PER_PAGE) { |
309 | pbe += PB_PAGE_SKIP; | 329 | pbe += PB_PAGE_SKIP; |
310 | pbe->next = alloc_image_page(); | 330 | pbe->next = alloc_image_page(gfp_mask, safe_needed); |
311 | } | 331 | } |
312 | if (!pbe) { /* get_zeroed_page() failed */ | 332 | if (!pbe) { /* get_zeroed_page() failed */ |
313 | free_pagedir(pblist); | 333 | free_pagedir(pblist); |
@@ -329,7 +349,7 @@ void swsusp_free(void) | |||
329 | for_each_zone(zone) { | 349 | for_each_zone(zone) { |
330 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 350 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
331 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { | 351 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { |
332 | struct page * page; | 352 | struct page *page; |
333 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | 353 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); |
334 | if (PageNosave(page) && PageNosaveFree(page)) { | 354 | if (PageNosave(page) && PageNosaveFree(page)) { |
335 | ClearPageNosave(page); | 355 | ClearPageNosave(page); |
@@ -348,31 +368,39 @@ void swsusp_free(void) | |||
348 | * free pages. | 368 | * free pages. |
349 | */ | 369 | */ |
350 | 370 | ||
351 | static int enough_free_mem(unsigned nr_pages) | 371 | static int enough_free_mem(unsigned int nr_pages) |
352 | { | 372 | { |
353 | pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); | 373 | pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); |
354 | return nr_free_pages() > (nr_pages + PAGES_FOR_IO + | 374 | return nr_free_pages() > (nr_pages + PAGES_FOR_IO + |
355 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | 375 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); |
356 | } | 376 | } |
357 | 377 | ||
378 | int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) | ||
379 | { | ||
380 | struct pbe *p; | ||
358 | 381 | ||
359 | static struct pbe *swsusp_alloc(unsigned nr_pages) | 382 | for_each_pbe (p, pblist) { |
383 | p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); | ||
384 | if (!p->address) | ||
385 | return -ENOMEM; | ||
386 | } | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static struct pbe *swsusp_alloc(unsigned int nr_pages) | ||
360 | { | 391 | { |
361 | struct pbe *pblist, *p; | 392 | struct pbe *pblist; |
362 | 393 | ||
363 | if (!(pblist = alloc_pagedir(nr_pages))) { | 394 | if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { |
364 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); | 395 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); |
365 | return NULL; | 396 | return NULL; |
366 | } | 397 | } |
367 | create_pbe_list(pblist, nr_pages); | 398 | create_pbe_list(pblist, nr_pages); |
368 | 399 | ||
369 | for_each_pbe (p, pblist) { | 400 | if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { |
370 | p->address = (unsigned long)alloc_image_page(); | 401 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); |
371 | if (!p->address) { | 402 | swsusp_free(); |
372 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | 403 | return NULL; |
373 | swsusp_free(); | ||
374 | return NULL; | ||
375 | } | ||
376 | } | 404 | } |
377 | 405 | ||
378 | return pblist; | 406 | return pblist; |
@@ -380,14 +408,9 @@ static struct pbe *swsusp_alloc(unsigned nr_pages) | |||
380 | 408 | ||
381 | asmlinkage int swsusp_save(void) | 409 | asmlinkage int swsusp_save(void) |
382 | { | 410 | { |
383 | unsigned nr_pages; | 411 | unsigned int nr_pages; |
384 | 412 | ||
385 | pr_debug("swsusp: critical section: \n"); | 413 | pr_debug("swsusp: critical section: \n"); |
386 | if (save_highmem()) { | ||
387 | printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n"); | ||
388 | restore_highmem(); | ||
389 | return -ENOMEM; | ||
390 | } | ||
391 | 414 | ||
392 | drain_local_pages(); | 415 | drain_local_pages(); |
393 | nr_pages = count_data_pages(); | 416 | nr_pages = count_data_pages(); |
@@ -407,11 +430,6 @@ asmlinkage int swsusp_save(void) | |||
407 | return -ENOMEM; | 430 | return -ENOMEM; |
408 | } | 431 | } |
409 | 432 | ||
410 | if (!enough_swap(nr_pages)) { | ||
411 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
412 | return -ENOSPC; | ||
413 | } | ||
414 | |||
415 | pagedir_nosave = swsusp_alloc(nr_pages); | 433 | pagedir_nosave = swsusp_alloc(nr_pages); |
416 | if (!pagedir_nosave) | 434 | if (!pagedir_nosave) |
417 | return -ENOMEM; | 435 | return -ENOMEM; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 12db1d2ad6..c05f46e734 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -73,6 +73,14 @@ | |||
73 | 73 | ||
74 | #include "power.h" | 74 | #include "power.h" |
75 | 75 | ||
76 | #ifdef CONFIG_HIGHMEM | ||
77 | int save_highmem(void); | ||
78 | int restore_highmem(void); | ||
79 | #else | ||
80 | static int save_highmem(void) { return 0; } | ||
81 | static int restore_highmem(void) { return 0; } | ||
82 | #endif | ||
83 | |||
76 | #define CIPHER "aes" | 84 | #define CIPHER "aes" |
77 | #define MAXKEY 32 | 85 | #define MAXKEY 32 |
78 | #define MAXIV 32 | 86 | #define MAXIV 32 |
@@ -85,18 +93,11 @@ unsigned int nr_copy_pages __nosavedata = 0; | |||
85 | /* Suspend pagedir is allocated before final copy, therefore it | 93 | /* Suspend pagedir is allocated before final copy, therefore it |
86 | must be freed after resume | 94 | must be freed after resume |
87 | 95 | ||
88 | Warning: this is evil. There are actually two pagedirs at time of | ||
89 | resume. One is "pagedir_save", which is empty frame allocated at | ||
90 | time of suspend, that must be freed. Second is "pagedir_nosave", | ||
91 | allocated at time of resume, that travels through memory not to | ||
92 | collide with anything. | ||
93 | |||
94 | Warning: this is even more evil than it seems. Pagedirs this file | 96 | Warning: this is even more evil than it seems. Pagedirs this file |
95 | talks about are completely different from page directories used by | 97 | talks about are completely different from page directories used by |
96 | MMU hardware. | 98 | MMU hardware. |
97 | */ | 99 | */ |
98 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; | 100 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; |
99 | suspend_pagedir_t *pagedir_save; | ||
100 | 101 | ||
101 | #define SWSUSP_SIG "S1SUSPEND" | 102 | #define SWSUSP_SIG "S1SUSPEND" |
102 | 103 | ||
@@ -122,8 +123,8 @@ static struct swsusp_info swsusp_info; | |||
122 | static unsigned short swapfile_used[MAX_SWAPFILES]; | 123 | static unsigned short swapfile_used[MAX_SWAPFILES]; |
123 | static unsigned short root_swap; | 124 | static unsigned short root_swap; |
124 | 125 | ||
125 | static int write_page(unsigned long addr, swp_entry_t * loc); | 126 | static int write_page(unsigned long addr, swp_entry_t *loc); |
126 | static int bio_read_page(pgoff_t page_off, void * page); | 127 | static int bio_read_page(pgoff_t page_off, void *page); |
127 | 128 | ||
128 | static u8 key_iv[MAXKEY+MAXIV]; | 129 | static u8 key_iv[MAXKEY+MAXIV]; |
129 | 130 | ||
@@ -355,7 +356,7 @@ static void lock_swapdevices(void) | |||
355 | * This is a partial improvement, since we will at least return other | 356 | * This is a partial improvement, since we will at least return other |
356 | * errors, though we need to eventually fix the damn code. | 357 | * errors, though we need to eventually fix the damn code. |
357 | */ | 358 | */ |
358 | static int write_page(unsigned long addr, swp_entry_t * loc) | 359 | static int write_page(unsigned long addr, swp_entry_t *loc) |
359 | { | 360 | { |
360 | swp_entry_t entry; | 361 | swp_entry_t entry; |
361 | int error = 0; | 362 | int error = 0; |
@@ -383,9 +384,9 @@ static int write_page(unsigned long addr, swp_entry_t * loc) | |||
383 | static void data_free(void) | 384 | static void data_free(void) |
384 | { | 385 | { |
385 | swp_entry_t entry; | 386 | swp_entry_t entry; |
386 | struct pbe * p; | 387 | struct pbe *p; |
387 | 388 | ||
388 | for_each_pbe(p, pagedir_nosave) { | 389 | for_each_pbe (p, pagedir_nosave) { |
389 | entry = p->swap_address; | 390 | entry = p->swap_address; |
390 | if (entry.val) | 391 | if (entry.val) |
391 | swap_free(entry); | 392 | swap_free(entry); |
@@ -492,8 +493,8 @@ static void free_pagedir_entries(void) | |||
492 | static int write_pagedir(void) | 493 | static int write_pagedir(void) |
493 | { | 494 | { |
494 | int error = 0; | 495 | int error = 0; |
495 | unsigned n = 0; | 496 | unsigned int n = 0; |
496 | struct pbe * pbe; | 497 | struct pbe *pbe; |
497 | 498 | ||
498 | printk( "Writing pagedir..."); | 499 | printk( "Writing pagedir..."); |
499 | for_each_pb_page (pbe, pagedir_nosave) { | 500 | for_each_pb_page (pbe, pagedir_nosave) { |
@@ -507,6 +508,26 @@ static int write_pagedir(void) | |||
507 | } | 508 | } |
508 | 509 | ||
509 | /** | 510 | /** |
511 | * enough_swap - Make sure we have enough swap to save the image. | ||
512 | * | ||
513 | * Returns TRUE or FALSE after checking the total amount of swap | ||
514 | * space avaiable. | ||
515 | * | ||
516 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
517 | * We should only consider resume_device. | ||
518 | */ | ||
519 | |||
520 | static int enough_swap(unsigned int nr_pages) | ||
521 | { | ||
522 | struct sysinfo i; | ||
523 | |||
524 | si_swapinfo(&i); | ||
525 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
526 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
527 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
528 | } | ||
529 | |||
530 | /** | ||
510 | * write_suspend_image - Write entire image and metadata. | 531 | * write_suspend_image - Write entire image and metadata. |
511 | * | 532 | * |
512 | */ | 533 | */ |
@@ -514,6 +535,11 @@ static int write_suspend_image(void) | |||
514 | { | 535 | { |
515 | int error; | 536 | int error; |
516 | 537 | ||
538 | if (!enough_swap(nr_copy_pages)) { | ||
539 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
540 | return -ENOSPC; | ||
541 | } | ||
542 | |||
517 | init_header(); | 543 | init_header(); |
518 | if ((error = data_write())) | 544 | if ((error = data_write())) |
519 | goto FreeData; | 545 | goto FreeData; |
@@ -533,27 +559,6 @@ static int write_suspend_image(void) | |||
533 | goto Done; | 559 | goto Done; |
534 | } | 560 | } |
535 | 561 | ||
536 | /** | ||
537 | * enough_swap - Make sure we have enough swap to save the image. | ||
538 | * | ||
539 | * Returns TRUE or FALSE after checking the total amount of swap | ||
540 | * space avaiable. | ||
541 | * | ||
542 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
543 | * We should only consider resume_device. | ||
544 | */ | ||
545 | |||
546 | int enough_swap(unsigned nr_pages) | ||
547 | { | ||
548 | struct sysinfo i; | ||
549 | |||
550 | si_swapinfo(&i); | ||
551 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
552 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
553 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
554 | } | ||
555 | |||
556 | |||
557 | /* It is important _NOT_ to umount filesystems at this point. We want | 562 | /* It is important _NOT_ to umount filesystems at this point. We want |
558 | * them synced (in case something goes wrong) but we DO not want to mark | 563 | * them synced (in case something goes wrong) but we DO not want to mark |
559 | * filesystem clean: it is not. (And it does not matter, if we resume | 564 | * filesystem clean: it is not. (And it does not matter, if we resume |
@@ -563,12 +568,15 @@ int swsusp_write(void) | |||
563 | { | 568 | { |
564 | int error; | 569 | int error; |
565 | 570 | ||
571 | if ((error = swsusp_swap_check())) { | ||
572 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | ||
573 | return error; | ||
574 | } | ||
566 | lock_swapdevices(); | 575 | lock_swapdevices(); |
567 | error = write_suspend_image(); | 576 | error = write_suspend_image(); |
568 | /* This will unlock ignored swap devices since writing is finished */ | 577 | /* This will unlock ignored swap devices since writing is finished */ |
569 | lock_swapdevices(); | 578 | lock_swapdevices(); |
570 | return error; | 579 | return error; |
571 | |||
572 | } | 580 | } |
573 | 581 | ||
574 | 582 | ||
@@ -576,6 +584,7 @@ int swsusp_write(void) | |||
576 | int swsusp_suspend(void) | 584 | int swsusp_suspend(void) |
577 | { | 585 | { |
578 | int error; | 586 | int error; |
587 | |||
579 | if ((error = arch_prepare_suspend())) | 588 | if ((error = arch_prepare_suspend())) |
580 | return error; | 589 | return error; |
581 | local_irq_disable(); | 590 | local_irq_disable(); |
@@ -587,15 +596,12 @@ int swsusp_suspend(void) | |||
587 | */ | 596 | */ |
588 | if ((error = device_power_down(PMSG_FREEZE))) { | 597 | if ((error = device_power_down(PMSG_FREEZE))) { |
589 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); | 598 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
590 | local_irq_enable(); | 599 | goto Enable_irqs; |
591 | return error; | ||
592 | } | 600 | } |
593 | 601 | ||
594 | if ((error = swsusp_swap_check())) { | 602 | if ((error = save_highmem())) { |
595 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | 603 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); |
596 | device_power_up(); | 604 | goto Restore_highmem; |
597 | local_irq_enable(); | ||
598 | return error; | ||
599 | } | 605 | } |
600 | 606 | ||
601 | save_processor_state(); | 607 | save_processor_state(); |
@@ -603,8 +609,10 @@ int swsusp_suspend(void) | |||
603 | printk(KERN_ERR "Error %d suspending\n", error); | 609 | printk(KERN_ERR "Error %d suspending\n", error); |
604 | /* Restore control flow magically appears here */ | 610 | /* Restore control flow magically appears here */ |
605 | restore_processor_state(); | 611 | restore_processor_state(); |
612 | Restore_highmem: | ||
606 | restore_highmem(); | 613 | restore_highmem(); |
607 | device_power_up(); | 614 | device_power_up(); |
615 | Enable_irqs: | ||
608 | local_irq_enable(); | 616 | local_irq_enable(); |
609 | return error; | 617 | return error; |
610 | } | 618 | } |
@@ -636,127 +644,43 @@ int swsusp_resume(void) | |||
636 | } | 644 | } |
637 | 645 | ||
638 | /** | 646 | /** |
639 | * On resume, for storing the PBE list and the image, | 647 | * mark_unsafe_pages - mark the pages that cannot be used for storing |
640 | * we can only use memory pages that do not conflict with the pages | 648 | * the image during resume, because they conflict with the pages that |
641 | * which had been used before suspend. | 649 | * had been used before suspend |
642 | * | ||
643 | * We don't know which pages are usable until we allocate them. | ||
644 | * | ||
645 | * Allocated but unusable (ie eaten) memory pages are marked so that | ||
646 | * swsusp_free() can release them | ||
647 | */ | ||
648 | |||
649 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
650 | { | ||
651 | unsigned long m; | ||
652 | |||
653 | do { | ||
654 | m = get_zeroed_page(gfp_mask); | ||
655 | if (m && PageNosaveFree(virt_to_page(m))) | ||
656 | /* This is for swsusp_free() */ | ||
657 | SetPageNosave(virt_to_page(m)); | ||
658 | } while (m && PageNosaveFree(virt_to_page(m))); | ||
659 | if (m) { | ||
660 | /* This is for swsusp_free() */ | ||
661 | SetPageNosave(virt_to_page(m)); | ||
662 | SetPageNosaveFree(virt_to_page(m)); | ||
663 | } | ||
664 | return m; | ||
665 | } | ||
666 | |||
667 | /** | ||
668 | * check_pagedir - We ensure here that pages that the PBEs point to | ||
669 | * won't collide with pages where we're going to restore from the loaded | ||
670 | * pages later | ||
671 | */ | ||
672 | |||
673 | static int check_pagedir(struct pbe *pblist) | ||
674 | { | ||
675 | struct pbe *p; | ||
676 | |||
677 | /* This is necessary, so that we can free allocated pages | ||
678 | * in case of failure | ||
679 | */ | ||
680 | for_each_pbe (p, pblist) | ||
681 | p->address = 0UL; | ||
682 | |||
683 | for_each_pbe (p, pblist) { | ||
684 | p->address = get_safe_page(GFP_ATOMIC); | ||
685 | if (!p->address) | ||
686 | return -ENOMEM; | ||
687 | } | ||
688 | return 0; | ||
689 | } | ||
690 | |||
691 | /** | ||
692 | * swsusp_pagedir_relocate - It is possible, that some memory pages | ||
693 | * occupied by the list of PBEs collide with pages where we're going to | ||
694 | * restore from the loaded pages later. We relocate them here. | ||
695 | */ | 650 | */ |
696 | 651 | ||
697 | static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) | 652 | static void mark_unsafe_pages(struct pbe *pblist) |
698 | { | 653 | { |
699 | struct zone *zone; | 654 | struct zone *zone; |
700 | unsigned long zone_pfn; | 655 | unsigned long zone_pfn; |
701 | struct pbe *pbpage, *tail, *p; | 656 | struct pbe *p; |
702 | void *m; | ||
703 | int rel = 0; | ||
704 | 657 | ||
705 | if (!pblist) /* a sanity check */ | 658 | if (!pblist) /* a sanity check */ |
706 | return NULL; | 659 | return; |
707 | |||
708 | pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", | ||
709 | swsusp_info.pagedir_pages); | ||
710 | 660 | ||
711 | /* Clear page flags */ | 661 | /* Clear page flags */ |
712 | |||
713 | for_each_zone (zone) { | 662 | for_each_zone (zone) { |
714 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 663 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
715 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | 664 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) |
716 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | 665 | ClearPageNosaveFree(pfn_to_page(zone_pfn + |
717 | zone->zone_start_pfn)); | 666 | zone->zone_start_pfn)); |
718 | } | 667 | } |
719 | 668 | ||
720 | /* Mark orig addresses */ | 669 | /* Mark orig addresses */ |
721 | |||
722 | for_each_pbe (p, pblist) | 670 | for_each_pbe (p, pblist) |
723 | SetPageNosaveFree(virt_to_page(p->orig_address)); | 671 | SetPageNosaveFree(virt_to_page(p->orig_address)); |
724 | 672 | ||
725 | tail = pblist + PB_PAGE_SKIP; | 673 | } |
726 | |||
727 | /* Relocate colliding pages */ | ||
728 | |||
729 | for_each_pb_page (pbpage, pblist) { | ||
730 | if (PageNosaveFree(virt_to_page((unsigned long)pbpage))) { | ||
731 | m = (void *)get_safe_page(GFP_ATOMIC | __GFP_COLD); | ||
732 | if (!m) | ||
733 | return NULL; | ||
734 | memcpy(m, (void *)pbpage, PAGE_SIZE); | ||
735 | if (pbpage == pblist) | ||
736 | pblist = (struct pbe *)m; | ||
737 | else | ||
738 | tail->next = (struct pbe *)m; | ||
739 | pbpage = (struct pbe *)m; | ||
740 | |||
741 | /* We have to link the PBEs again */ | ||
742 | for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) | ||
743 | if (p->next) /* needed to save the end */ | ||
744 | p->next = p + 1; | ||
745 | |||
746 | rel++; | ||
747 | } | ||
748 | tail = pbpage + PB_PAGE_SKIP; | ||
749 | } | ||
750 | 674 | ||
751 | /* This is for swsusp_free() */ | 675 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) |
752 | for_each_pb_page (pbpage, pblist) { | 676 | { |
753 | SetPageNosave(virt_to_page(pbpage)); | 677 | /* We assume both lists contain the same number of elements */ |
754 | SetPageNosaveFree(virt_to_page(pbpage)); | 678 | while (src) { |
679 | dst->orig_address = src->orig_address; | ||
680 | dst->swap_address = src->swap_address; | ||
681 | dst = dst->next; | ||
682 | src = src->next; | ||
755 | } | 683 | } |
756 | |||
757 | printk("swsusp: Relocated %d pages\n", rel); | ||
758 | |||
759 | return pblist; | ||
760 | } | 684 | } |
761 | 685 | ||
762 | /* | 686 | /* |
@@ -770,7 +694,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) | |||
770 | 694 | ||
771 | static atomic_t io_done = ATOMIC_INIT(0); | 695 | static atomic_t io_done = ATOMIC_INIT(0); |
772 | 696 | ||
773 | static int end_io(struct bio * bio, unsigned int num, int err) | 697 | static int end_io(struct bio *bio, unsigned int num, int err) |
774 | { | 698 | { |
775 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 699 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
776 | panic("I/O error reading memory image"); | 700 | panic("I/O error reading memory image"); |
@@ -778,7 +702,7 @@ static int end_io(struct bio * bio, unsigned int num, int err) | |||
778 | return 0; | 702 | return 0; |
779 | } | 703 | } |
780 | 704 | ||
781 | static struct block_device * resume_bdev; | 705 | static struct block_device *resume_bdev; |
782 | 706 | ||
783 | /** | 707 | /** |
784 | * submit - submit BIO request. | 708 | * submit - submit BIO request. |
@@ -791,10 +715,10 @@ static struct block_device * resume_bdev; | |||
791 | * Then submit it and wait. | 715 | * Then submit it and wait. |
792 | */ | 716 | */ |
793 | 717 | ||
794 | static int submit(int rw, pgoff_t page_off, void * page) | 718 | static int submit(int rw, pgoff_t page_off, void *page) |
795 | { | 719 | { |
796 | int error = 0; | 720 | int error = 0; |
797 | struct bio * bio; | 721 | struct bio *bio; |
798 | 722 | ||
799 | bio = bio_alloc(GFP_ATOMIC, 1); | 723 | bio = bio_alloc(GFP_ATOMIC, 1); |
800 | if (!bio) | 724 | if (!bio) |
@@ -823,12 +747,12 @@ static int submit(int rw, pgoff_t page_off, void * page) | |||
823 | return error; | 747 | return error; |
824 | } | 748 | } |
825 | 749 | ||
826 | static int bio_read_page(pgoff_t page_off, void * page) | 750 | static int bio_read_page(pgoff_t page_off, void *page) |
827 | { | 751 | { |
828 | return submit(READ, page_off, page); | 752 | return submit(READ, page_off, page); |
829 | } | 753 | } |
830 | 754 | ||
831 | static int bio_write_page(pgoff_t page_off, void * page) | 755 | static int bio_write_page(pgoff_t page_off, void *page) |
832 | { | 756 | { |
833 | return submit(WRITE, page_off, page); | 757 | return submit(WRITE, page_off, page); |
834 | } | 758 | } |
@@ -838,7 +762,7 @@ static int bio_write_page(pgoff_t page_off, void * page) | |||
838 | * I really don't think that it's foolproof but more than nothing.. | 762 | * I really don't think that it's foolproof but more than nothing.. |
839 | */ | 763 | */ |
840 | 764 | ||
841 | static const char * sanity_check(void) | 765 | static const char *sanity_check(void) |
842 | { | 766 | { |
843 | dump_info(); | 767 | dump_info(); |
844 | if (swsusp_info.version_code != LINUX_VERSION_CODE) | 768 | if (swsusp_info.version_code != LINUX_VERSION_CODE) |
@@ -864,7 +788,7 @@ static const char * sanity_check(void) | |||
864 | 788 | ||
865 | static int check_header(void) | 789 | static int check_header(void) |
866 | { | 790 | { |
867 | const char * reason = NULL; | 791 | const char *reason = NULL; |
868 | int error; | 792 | int error; |
869 | 793 | ||
870 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) | 794 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) |
@@ -895,7 +819,7 @@ static int check_sig(void) | |||
895 | * Reset swap signature now. | 819 | * Reset swap signature now. |
896 | */ | 820 | */ |
897 | error = bio_write_page(0, &swsusp_header); | 821 | error = bio_write_page(0, &swsusp_header); |
898 | } else { | 822 | } else { |
899 | return -EINVAL; | 823 | return -EINVAL; |
900 | } | 824 | } |
901 | if (!error) | 825 | if (!error) |
@@ -912,7 +836,7 @@ static int check_sig(void) | |||
912 | 836 | ||
913 | static int data_read(struct pbe *pblist) | 837 | static int data_read(struct pbe *pblist) |
914 | { | 838 | { |
915 | struct pbe * p; | 839 | struct pbe *p; |
916 | int error = 0; | 840 | int error = 0; |
917 | int i = 0; | 841 | int i = 0; |
918 | int mod = swsusp_info.image_pages / 100; | 842 | int mod = swsusp_info.image_pages / 100; |
@@ -950,7 +874,7 @@ static int data_read(struct pbe *pblist) | |||
950 | static int read_pagedir(struct pbe *pblist) | 874 | static int read_pagedir(struct pbe *pblist) |
951 | { | 875 | { |
952 | struct pbe *pbpage, *p; | 876 | struct pbe *pbpage, *p; |
953 | unsigned i = 0; | 877 | unsigned int i = 0; |
954 | int error; | 878 | int error; |
955 | 879 | ||
956 | if (!pblist) | 880 | if (!pblist) |
@@ -997,20 +921,25 @@ static int read_suspend_image(void) | |||
997 | int error = 0; | 921 | int error = 0; |
998 | struct pbe *p; | 922 | struct pbe *p; |
999 | 923 | ||
1000 | if (!(p = alloc_pagedir(nr_copy_pages))) | 924 | if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) |
1001 | return -ENOMEM; | 925 | return -ENOMEM; |
1002 | 926 | ||
1003 | if ((error = read_pagedir(p))) | 927 | if ((error = read_pagedir(p))) |
1004 | return error; | 928 | return error; |
1005 | |||
1006 | create_pbe_list(p, nr_copy_pages); | 929 | create_pbe_list(p, nr_copy_pages); |
1007 | 930 | mark_unsafe_pages(p); | |
1008 | if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) | 931 | pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); |
932 | if (pagedir_nosave) { | ||
933 | create_pbe_list(pagedir_nosave, nr_copy_pages); | ||
934 | copy_page_backup_list(pagedir_nosave, p); | ||
935 | } | ||
936 | free_pagedir(p); | ||
937 | if (!pagedir_nosave) | ||
1009 | return -ENOMEM; | 938 | return -ENOMEM; |
1010 | 939 | ||
1011 | /* Allocate memory for the image and read the data from swap */ | 940 | /* Allocate memory for the image and read the data from swap */ |
1012 | 941 | ||
1013 | error = check_pagedir(pagedir_nosave); | 942 | error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); |
1014 | 943 | ||
1015 | if (!error) | 944 | if (!error) |
1016 | error = data_read(pagedir_nosave); | 945 | error = data_read(pagedir_nosave); |
diff --git a/kernel/printk.c b/kernel/printk.c index 3cb9708209..e9be027bc9 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -806,7 +806,6 @@ void console_unblank(void) | |||
806 | c->unblank(); | 806 | c->unblank(); |
807 | release_console_sem(); | 807 | release_console_sem(); |
808 | } | 808 | } |
809 | EXPORT_SYMBOL(console_unblank); | ||
810 | 809 | ||
811 | /* | 810 | /* |
812 | * Return the console tty driver structure and its associated index | 811 | * Return the console tty driver structure and its associated index |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 863eee8bff..b88d4186cd 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -155,7 +155,7 @@ int ptrace_attach(struct task_struct *task) | |||
155 | retval = -EPERM; | 155 | retval = -EPERM; |
156 | if (task->pid <= 1) | 156 | if (task->pid <= 1) |
157 | goto bad; | 157 | goto bad; |
158 | if (task == current) | 158 | if (task->tgid == current->tgid) |
159 | goto bad; | 159 | goto bad; |
160 | /* the same process cannot be attached many times */ | 160 | /* the same process cannot be attached many times */ |
161 | if (task->ptrace & PT_PTRACED) | 161 | if (task->ptrace & PT_PTRACED) |
@@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request, | |||
406 | 406 | ||
407 | return ret; | 407 | return ret; |
408 | } | 408 | } |
409 | |||
410 | #ifndef __ARCH_SYS_PTRACE | ||
411 | static int ptrace_get_task_struct(long request, long pid, | ||
412 | struct task_struct **childp) | ||
413 | { | ||
414 | struct task_struct *child; | ||
415 | int ret; | ||
416 | |||
417 | /* | ||
418 | * Callers use child == NULL as an indication to exit early even | ||
419 | * when the return value is 0, so make sure it is non-NULL here. | ||
420 | */ | ||
421 | *childp = NULL; | ||
422 | |||
423 | if (request == PTRACE_TRACEME) { | ||
424 | /* | ||
425 | * Are we already being traced? | ||
426 | */ | ||
427 | if (current->ptrace & PT_PTRACED) | ||
428 | return -EPERM; | ||
429 | ret = security_ptrace(current->parent, current); | ||
430 | if (ret) | ||
431 | return -EPERM; | ||
432 | /* | ||
433 | * Set the ptrace bit in the process ptrace flags. | ||
434 | */ | ||
435 | current->ptrace |= PT_PTRACED; | ||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * You may not mess with init | ||
441 | */ | ||
442 | if (pid == 1) | ||
443 | return -EPERM; | ||
444 | |||
445 | ret = -ESRCH; | ||
446 | read_lock(&tasklist_lock); | ||
447 | child = find_task_by_pid(pid); | ||
448 | if (child) | ||
449 | get_task_struct(child); | ||
450 | read_unlock(&tasklist_lock); | ||
451 | if (!child) | ||
452 | return -ESRCH; | ||
453 | |||
454 | *childp = child; | ||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | ||
459 | { | ||
460 | struct task_struct *child; | ||
461 | long ret; | ||
462 | |||
463 | /* | ||
464 | * This lock_kernel fixes a subtle race with suid exec | ||
465 | */ | ||
466 | lock_kernel(); | ||
467 | ret = ptrace_get_task_struct(request, pid, &child); | ||
468 | if (!child) | ||
469 | goto out; | ||
470 | |||
471 | if (request == PTRACE_ATTACH) { | ||
472 | ret = ptrace_attach(child); | ||
473 | goto out; | ||
474 | } | ||
475 | |||
476 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
477 | if (ret < 0) | ||
478 | goto out_put_task_struct; | ||
479 | |||
480 | ret = arch_ptrace(child, request, addr, data); | ||
481 | if (ret < 0) | ||
482 | goto out_put_task_struct; | ||
483 | |||
484 | out_put_task_struct: | ||
485 | put_task_struct(child); | ||
486 | out: | ||
487 | unlock_kernel(); | ||
488 | return ret; | ||
489 | } | ||
490 | #endif /* __ARCH_SYS_PTRACE */ | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 340dd238c1..b6506671b2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -206,6 +206,7 @@ struct runqueue { | |||
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long prio_bias; | ||
209 | unsigned long cpu_load[3]; | 210 | unsigned long cpu_load[3]; |
210 | #endif | 211 | #endif |
211 | unsigned long long nr_switches; | 212 | unsigned long long nr_switches; |
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p) | |||
659 | return prio; | 660 | return prio; |
660 | } | 661 | } |
661 | 662 | ||
663 | #ifdef CONFIG_SMP | ||
664 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
665 | { | ||
666 | rq->prio_bias += MAX_PRIO - prio; | ||
667 | } | ||
668 | |||
669 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
670 | { | ||
671 | rq->prio_bias -= MAX_PRIO - prio; | ||
672 | } | ||
673 | |||
674 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
675 | { | ||
676 | rq->nr_running++; | ||
677 | if (rt_task(p)) { | ||
678 | if (p != rq->migration_thread) | ||
679 | /* | ||
680 | * The migration thread does the actual balancing. Do | ||
681 | * not bias by its priority as the ultra high priority | ||
682 | * will skew balancing adversely. | ||
683 | */ | ||
684 | inc_prio_bias(rq, p->prio); | ||
685 | } else | ||
686 | inc_prio_bias(rq, p->static_prio); | ||
687 | } | ||
688 | |||
689 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
690 | { | ||
691 | rq->nr_running--; | ||
692 | if (rt_task(p)) { | ||
693 | if (p != rq->migration_thread) | ||
694 | dec_prio_bias(rq, p->prio); | ||
695 | } else | ||
696 | dec_prio_bias(rq, p->static_prio); | ||
697 | } | ||
698 | #else | ||
699 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
700 | { | ||
701 | } | ||
702 | |||
703 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
704 | { | ||
705 | } | ||
706 | |||
707 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
708 | { | ||
709 | rq->nr_running++; | ||
710 | } | ||
711 | |||
712 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
713 | { | ||
714 | rq->nr_running--; | ||
715 | } | ||
716 | #endif | ||
717 | |||
662 | /* | 718 | /* |
663 | * __activate_task - move a task to the runqueue. | 719 | * __activate_task - move a task to the runqueue. |
664 | */ | 720 | */ |
665 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 721 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
666 | { | 722 | { |
667 | enqueue_task(p, rq->active); | 723 | enqueue_task(p, rq->active); |
668 | rq->nr_running++; | 724 | inc_nr_running(p, rq); |
669 | } | 725 | } |
670 | 726 | ||
671 | /* | 727 | /* |
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
674 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 730 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
675 | { | 731 | { |
676 | enqueue_task_head(p, rq->active); | 732 | enqueue_task_head(p, rq->active); |
677 | rq->nr_running++; | 733 | inc_nr_running(p, rq); |
678 | } | 734 | } |
679 | 735 | ||
680 | static int recalc_task_prio(task_t *p, unsigned long long now) | 736 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
759 | } | 815 | } |
760 | #endif | 816 | #endif |
761 | 817 | ||
762 | p->prio = recalc_task_prio(p, now); | 818 | if (!rt_task(p)) |
819 | p->prio = recalc_task_prio(p, now); | ||
763 | 820 | ||
764 | /* | 821 | /* |
765 | * This checks to make sure it's not an uninterruptible task | 822 | * This checks to make sure it's not an uninterruptible task |
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
793 | */ | 850 | */ |
794 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 851 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
795 | { | 852 | { |
796 | rq->nr_running--; | 853 | dec_nr_running(p, rq); |
797 | dequeue_task(p, p->array); | 854 | dequeue_task(p, p->array); |
798 | p->array = NULL; | 855 | p->array = NULL; |
799 | } | 856 | } |
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
808 | #ifdef CONFIG_SMP | 865 | #ifdef CONFIG_SMP |
809 | static void resched_task(task_t *p) | 866 | static void resched_task(task_t *p) |
810 | { | 867 | { |
811 | int need_resched, nrpolling; | 868 | int cpu; |
812 | 869 | ||
813 | assert_spin_locked(&task_rq(p)->lock); | 870 | assert_spin_locked(&task_rq(p)->lock); |
814 | 871 | ||
815 | /* minimise the chance of sending an interrupt to poll_idle() */ | 872 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) |
816 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 873 | return; |
817 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); | 874 | |
818 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 875 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
876 | |||
877 | cpu = task_cpu(p); | ||
878 | if (cpu == smp_processor_id()) | ||
879 | return; | ||
819 | 880 | ||
820 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) | 881 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ |
821 | smp_send_reschedule(task_cpu(p)); | 882 | smp_mb(); |
883 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | ||
884 | smp_send_reschedule(cpu); | ||
822 | } | 885 | } |
823 | #else | 886 | #else |
824 | static inline void resched_task(task_t *p) | 887 | static inline void resched_task(task_t *p) |
825 | { | 888 | { |
889 | assert_spin_locked(&task_rq(p)->lock); | ||
826 | set_tsk_need_resched(p); | 890 | set_tsk_need_resched(p); |
827 | } | 891 | } |
828 | #endif | 892 | #endif |
@@ -930,27 +994,61 @@ void kick_process(task_t *p) | |||
930 | * We want to under-estimate the load of migration sources, to | 994 | * We want to under-estimate the load of migration sources, to |
931 | * balance conservatively. | 995 | * balance conservatively. |
932 | */ | 996 | */ |
933 | static inline unsigned long source_load(int cpu, int type) | 997 | static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) |
934 | { | 998 | { |
935 | runqueue_t *rq = cpu_rq(cpu); | 999 | runqueue_t *rq = cpu_rq(cpu); |
936 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1000 | unsigned long running = rq->nr_running; |
1001 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1002 | load_now = running * SCHED_LOAD_SCALE; | ||
1003 | |||
937 | if (type == 0) | 1004 | if (type == 0) |
938 | return load_now; | 1005 | source_load = load_now; |
1006 | else | ||
1007 | source_load = min(cpu_load, load_now); | ||
1008 | |||
1009 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1010 | /* | ||
1011 | * If we are busy rebalancing the load is biased by | ||
1012 | * priority to create 'nice' support across cpus. When | ||
1013 | * idle rebalancing we should only bias the source_load if | ||
1014 | * there is more than one task running on that queue to | ||
1015 | * prevent idle rebalance from trying to pull tasks from a | ||
1016 | * queue with only one running task. | ||
1017 | */ | ||
1018 | source_load = source_load * rq->prio_bias / running; | ||
1019 | |||
1020 | return source_load; | ||
1021 | } | ||
939 | 1022 | ||
940 | return min(rq->cpu_load[type-1], load_now); | 1023 | static inline unsigned long source_load(int cpu, int type) |
1024 | { | ||
1025 | return __source_load(cpu, type, NOT_IDLE); | ||
941 | } | 1026 | } |
942 | 1027 | ||
943 | /* | 1028 | /* |
944 | * Return a high guess at the load of a migration-target cpu | 1029 | * Return a high guess at the load of a migration-target cpu |
945 | */ | 1030 | */ |
946 | static inline unsigned long target_load(int cpu, int type) | 1031 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) |
947 | { | 1032 | { |
948 | runqueue_t *rq = cpu_rq(cpu); | 1033 | runqueue_t *rq = cpu_rq(cpu); |
949 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1034 | unsigned long running = rq->nr_running; |
1035 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1036 | load_now = running * SCHED_LOAD_SCALE; | ||
1037 | |||
950 | if (type == 0) | 1038 | if (type == 0) |
951 | return load_now; | 1039 | target_load = load_now; |
1040 | else | ||
1041 | target_load = max(cpu_load, load_now); | ||
1042 | |||
1043 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1044 | target_load = target_load * rq->prio_bias / running; | ||
1045 | |||
1046 | return target_load; | ||
1047 | } | ||
952 | 1048 | ||
953 | return max(rq->cpu_load[type-1], load_now); | 1049 | static inline unsigned long target_load(int cpu, int type) |
1050 | { | ||
1051 | return __target_load(cpu, type, NOT_IDLE); | ||
954 | } | 1052 | } |
955 | 1053 | ||
956 | /* | 1054 | /* |
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1411 | list_add_tail(&p->run_list, ¤t->run_list); | 1509 | list_add_tail(&p->run_list, ¤t->run_list); |
1412 | p->array = current->array; | 1510 | p->array = current->array; |
1413 | p->array->nr_active++; | 1511 | p->array->nr_active++; |
1414 | rq->nr_running++; | 1512 | inc_nr_running(p, rq); |
1415 | } | 1513 | } |
1416 | set_need_resched(); | 1514 | set_need_resched(); |
1417 | } else | 1515 | } else |
@@ -1468,7 +1566,7 @@ void fastcall sched_exit(task_t *p) | |||
1468 | * the sleep_avg of the parent as well. | 1566 | * the sleep_avg of the parent as well. |
1469 | */ | 1567 | */ |
1470 | rq = task_rq_lock(p->parent, &flags); | 1568 | rq = task_rq_lock(p->parent, &flags); |
1471 | if (p->first_time_slice) { | 1569 | if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { |
1472 | p->parent->time_slice += p->time_slice; | 1570 | p->parent->time_slice += p->time_slice; |
1473 | if (unlikely(p->parent->time_slice > task_timeslice(p))) | 1571 | if (unlikely(p->parent->time_slice > task_timeslice(p))) |
1474 | p->parent->time_slice = task_timeslice(p); | 1572 | p->parent->time_slice = task_timeslice(p); |
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1756 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1854 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1757 | { | 1855 | { |
1758 | dequeue_task(p, src_array); | 1856 | dequeue_task(p, src_array); |
1759 | src_rq->nr_running--; | 1857 | dec_nr_running(p, src_rq); |
1760 | set_task_cpu(p, this_cpu); | 1858 | set_task_cpu(p, this_cpu); |
1761 | this_rq->nr_running++; | 1859 | inc_nr_running(p, this_rq); |
1762 | enqueue_task(p, this_array); | 1860 | enqueue_task(p, this_array); |
1763 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1861 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1764 | + this_rq->timestamp_last_tick; | 1862 | + this_rq->timestamp_last_tick; |
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1937 | 2035 | ||
1938 | /* Bias balancing toward cpus of our domain */ | 2036 | /* Bias balancing toward cpus of our domain */ |
1939 | if (local_group) | 2037 | if (local_group) |
1940 | load = target_load(i, load_idx); | 2038 | load = __target_load(i, load_idx, idle); |
1941 | else | 2039 | else |
1942 | load = source_load(i, load_idx); | 2040 | load = __source_load(i, load_idx, idle); |
1943 | 2041 | ||
1944 | avg_load += load; | 2042 | avg_load += load; |
1945 | } | 2043 | } |
@@ -2044,14 +2142,15 @@ out_balanced: | |||
2044 | /* | 2142 | /* |
2045 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2143 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
2046 | */ | 2144 | */ |
2047 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 2145 | static runqueue_t *find_busiest_queue(struct sched_group *group, |
2146 | enum idle_type idle) | ||
2048 | { | 2147 | { |
2049 | unsigned long load, max_load = 0; | 2148 | unsigned long load, max_load = 0; |
2050 | runqueue_t *busiest = NULL; | 2149 | runqueue_t *busiest = NULL; |
2051 | int i; | 2150 | int i; |
2052 | 2151 | ||
2053 | for_each_cpu_mask(i, group->cpumask) { | 2152 | for_each_cpu_mask(i, group->cpumask) { |
2054 | load = source_load(i, 0); | 2153 | load = __source_load(i, 0, idle); |
2055 | 2154 | ||
2056 | if (load > max_load) { | 2155 | if (load > max_load) { |
2057 | max_load = load; | 2156 | max_load = load; |
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2095 | goto out_balanced; | 2194 | goto out_balanced; |
2096 | } | 2195 | } |
2097 | 2196 | ||
2098 | busiest = find_busiest_queue(group); | 2197 | busiest = find_busiest_queue(group, idle); |
2099 | if (!busiest) { | 2198 | if (!busiest) { |
2100 | schedstat_inc(sd, lb_nobusyq[idle]); | 2199 | schedstat_inc(sd, lb_nobusyq[idle]); |
2101 | goto out_balanced; | 2200 | goto out_balanced; |
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2218 | goto out_balanced; | 2317 | goto out_balanced; |
2219 | } | 2318 | } |
2220 | 2319 | ||
2221 | busiest = find_busiest_queue(group); | 2320 | busiest = find_busiest_queue(group, NEWLY_IDLE); |
2222 | if (!busiest) { | 2321 | if (!busiest) { |
2223 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2322 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2224 | goto out_balanced; | 2323 | goto out_balanced; |
@@ -3451,8 +3550,10 @@ void set_user_nice(task_t *p, long nice) | |||
3451 | goto out_unlock; | 3550 | goto out_unlock; |
3452 | } | 3551 | } |
3453 | array = p->array; | 3552 | array = p->array; |
3454 | if (array) | 3553 | if (array) { |
3455 | dequeue_task(p, array); | 3554 | dequeue_task(p, array); |
3555 | dec_prio_bias(rq, p->static_prio); | ||
3556 | } | ||
3456 | 3557 | ||
3457 | old_prio = p->prio; | 3558 | old_prio = p->prio; |
3458 | new_prio = NICE_TO_PRIO(nice); | 3559 | new_prio = NICE_TO_PRIO(nice); |
@@ -3462,6 +3563,7 @@ void set_user_nice(task_t *p, long nice) | |||
3462 | 3563 | ||
3463 | if (array) { | 3564 | if (array) { |
3464 | enqueue_task(p, array); | 3565 | enqueue_task(p, array); |
3566 | inc_prio_bias(rq, p->static_prio); | ||
3465 | /* | 3567 | /* |
3466 | * If the task increased its priority or is running and | 3568 | * If the task increased its priority or is running and |
3467 | * lowered its priority, then reschedule its CPU: | 3569 | * lowered its priority, then reschedule its CPU: |
@@ -3563,8 +3665,6 @@ int idle_cpu(int cpu) | |||
3563 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 3665 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; |
3564 | } | 3666 | } |
3565 | 3667 | ||
3566 | EXPORT_SYMBOL_GPL(idle_cpu); | ||
3567 | |||
3568 | /** | 3668 | /** |
3569 | * idle_task - return the idle task for a given cpu. | 3669 | * idle_task - return the idle task for a given cpu. |
3570 | * @cpu: the processor in question. | 3670 | * @cpu: the processor in question. |
@@ -4680,7 +4780,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, | |||
4680 | #ifdef CONFIG_HOTPLUG_CPU | 4780 | #ifdef CONFIG_HOTPLUG_CPU |
4681 | case CPU_UP_CANCELED: | 4781 | case CPU_UP_CANCELED: |
4682 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 4782 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
4683 | kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); | 4783 | kthread_bind(cpu_rq(cpu)->migration_thread, |
4784 | any_online_cpu(cpu_online_map)); | ||
4684 | kthread_stop(cpu_rq(cpu)->migration_thread); | 4785 | kthread_stop(cpu_rq(cpu)->migration_thread); |
4685 | cpu_rq(cpu)->migration_thread = NULL; | 4786 | cpu_rq(cpu)->migration_thread = NULL; |
4686 | break; | 4787 | break; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index f766b2fc48..ad3295cdde 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
470 | #ifdef CONFIG_HOTPLUG_CPU | 470 | #ifdef CONFIG_HOTPLUG_CPU |
471 | case CPU_UP_CANCELED: | 471 | case CPU_UP_CANCELED: |
472 | /* Unbind so it can run. Fall thru. */ | 472 | /* Unbind so it can run. Fall thru. */ |
473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); | 473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
474 | any_online_cpu(cpu_online_map)); | ||
474 | case CPU_DEAD: | 475 | case CPU_DEAD: |
475 | p = per_cpu(ksoftirqd, hotcpu); | 476 | p = per_cpu(ksoftirqd, hotcpu); |
476 | per_cpu(ksoftirqd, hotcpu) = NULL; | 477 | per_cpu(ksoftirqd, hotcpu) = NULL; |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 75976209ce..c67189a25d 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -73,9 +73,6 @@ void softlockup_tick(struct pt_regs *regs) | |||
73 | static int watchdog(void * __bind_cpu) | 73 | static int watchdog(void * __bind_cpu) |
74 | { | 74 | { |
75 | struct sched_param param = { .sched_priority = 99 }; | 75 | struct sched_param param = { .sched_priority = 99 }; |
76 | int this_cpu = (long) __bind_cpu; | ||
77 | |||
78 | printk("softlockup thread %d started up.\n", this_cpu); | ||
79 | 76 | ||
80 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 77 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
81 | current->flags |= PF_NOFREEZE; | 78 | current->flags |= PF_NOFREEZE; |
@@ -123,7 +120,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
123 | #ifdef CONFIG_HOTPLUG_CPU | 120 | #ifdef CONFIG_HOTPLUG_CPU |
124 | case CPU_UP_CANCELED: | 121 | case CPU_UP_CANCELED: |
125 | /* Unbind so it can run. Fall thru. */ | 122 | /* Unbind so it can run. Fall thru. */ |
126 | kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); | 123 | kthread_bind(per_cpu(watchdog_task, hotcpu), |
124 | any_online_cpu(cpu_online_map)); | ||
127 | case CPU_DEAD: | 125 | case CPU_DEAD: |
128 | p = per_cpu(watchdog_task, hotcpu); | 126 | p = per_cpu(watchdog_task, hotcpu); |
129 | per_cpu(watchdog_task, hotcpu) = NULL; | 127 | per_cpu(watchdog_task, hotcpu) = NULL; |
diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed1812..c43b3e22bb 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/suspend.h> | 28 | #include <linux/suspend.h> |
29 | #include <linux/tty.h> | 29 | #include <linux/tty.h> |
30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
31 | #include <linux/cn_proc.h> | ||
31 | 32 | ||
32 | #include <linux/compat.h> | 33 | #include <linux/compat.h> |
33 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
@@ -375,18 +376,21 @@ void emergency_restart(void) | |||
375 | } | 376 | } |
376 | EXPORT_SYMBOL_GPL(emergency_restart); | 377 | EXPORT_SYMBOL_GPL(emergency_restart); |
377 | 378 | ||
378 | /** | ||
379 | * kernel_restart - reboot the system | ||
380 | * | ||
381 | * Shutdown everything and perform a clean reboot. | ||
382 | * This is not safe to call in interrupt context. | ||
383 | */ | ||
384 | void kernel_restart_prepare(char *cmd) | 379 | void kernel_restart_prepare(char *cmd) |
385 | { | 380 | { |
386 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 381 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); |
387 | system_state = SYSTEM_RESTART; | 382 | system_state = SYSTEM_RESTART; |
388 | device_shutdown(); | 383 | device_shutdown(); |
389 | } | 384 | } |
385 | |||
386 | /** | ||
387 | * kernel_restart - reboot the system | ||
388 | * @cmd: pointer to buffer containing command to execute for restart | ||
389 | * or %NULL | ||
390 | * | ||
391 | * Shutdown everything and perform a clean reboot. | ||
392 | * This is not safe to call in interrupt context. | ||
393 | */ | ||
390 | void kernel_restart(char *cmd) | 394 | void kernel_restart(char *cmd) |
391 | { | 395 | { |
392 | kernel_restart_prepare(cmd); | 396 | kernel_restart_prepare(cmd); |
@@ -623,6 +627,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
623 | current->egid = new_egid; | 627 | current->egid = new_egid; |
624 | current->gid = new_rgid; | 628 | current->gid = new_rgid; |
625 | key_fsgid_changed(current); | 629 | key_fsgid_changed(current); |
630 | proc_id_connector(current, PROC_EVENT_GID); | ||
626 | return 0; | 631 | return 0; |
627 | } | 632 | } |
628 | 633 | ||
@@ -662,6 +667,7 @@ asmlinkage long sys_setgid(gid_t gid) | |||
662 | return -EPERM; | 667 | return -EPERM; |
663 | 668 | ||
664 | key_fsgid_changed(current); | 669 | key_fsgid_changed(current); |
670 | proc_id_connector(current, PROC_EVENT_GID); | ||
665 | return 0; | 671 | return 0; |
666 | } | 672 | } |
667 | 673 | ||
@@ -751,6 +757,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
751 | current->fsuid = current->euid; | 757 | current->fsuid = current->euid; |
752 | 758 | ||
753 | key_fsuid_changed(current); | 759 | key_fsuid_changed(current); |
760 | proc_id_connector(current, PROC_EVENT_UID); | ||
754 | 761 | ||
755 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); | 762 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); |
756 | } | 763 | } |
@@ -798,6 +805,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
798 | current->suid = new_suid; | 805 | current->suid = new_suid; |
799 | 806 | ||
800 | key_fsuid_changed(current); | 807 | key_fsuid_changed(current); |
808 | proc_id_connector(current, PROC_EVENT_UID); | ||
801 | 809 | ||
802 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); | 810 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); |
803 | } | 811 | } |
@@ -846,6 +854,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
846 | current->suid = suid; | 854 | current->suid = suid; |
847 | 855 | ||
848 | key_fsuid_changed(current); | 856 | key_fsuid_changed(current); |
857 | proc_id_connector(current, PROC_EVENT_UID); | ||
849 | 858 | ||
850 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); | 859 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); |
851 | } | 860 | } |
@@ -898,6 +907,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
898 | current->sgid = sgid; | 907 | current->sgid = sgid; |
899 | 908 | ||
900 | key_fsgid_changed(current); | 909 | key_fsgid_changed(current); |
910 | proc_id_connector(current, PROC_EVENT_GID); | ||
901 | return 0; | 911 | return 0; |
902 | } | 912 | } |
903 | 913 | ||
@@ -940,6 +950,7 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
940 | } | 950 | } |
941 | 951 | ||
942 | key_fsuid_changed(current); | 952 | key_fsuid_changed(current); |
953 | proc_id_connector(current, PROC_EVENT_UID); | ||
943 | 954 | ||
944 | security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); | 955 | security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); |
945 | 956 | ||
@@ -968,6 +979,7 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
968 | } | 979 | } |
969 | current->fsgid = gid; | 980 | current->fsgid = gid; |
970 | key_fsgid_changed(current); | 981 | key_fsgid_changed(current); |
982 | proc_id_connector(current, PROC_EVENT_GID); | ||
971 | } | 983 | } |
972 | return old_fsgid; | 984 | return old_fsgid; |
973 | } | 985 | } |
@@ -1485,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p); | |||
1485 | 1497 | ||
1486 | DECLARE_RWSEM(uts_sem); | 1498 | DECLARE_RWSEM(uts_sem); |
1487 | 1499 | ||
1488 | EXPORT_SYMBOL(uts_sem); | ||
1489 | |||
1490 | asmlinkage long sys_newuname(struct new_utsname __user * name) | 1500 | asmlinkage long sys_newuname(struct new_utsname __user * name) |
1491 | { | 1501 | { |
1492 | int errno = 0; | 1502 | int errno = 0; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e24955..9990e10192 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = { | |||
169 | 169 | ||
170 | extern struct proc_dir_entry *proc_sys_root; | 170 | extern struct proc_dir_entry *proc_sys_root; |
171 | 171 | ||
172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *); | 172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); |
173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); | 173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); |
174 | #endif | 174 | #endif |
175 | 175 | ||
@@ -952,7 +952,7 @@ static ctl_table fs_table[] = { | |||
952 | .data = &aio_nr, | 952 | .data = &aio_nr, |
953 | .maxlen = sizeof(aio_nr), | 953 | .maxlen = sizeof(aio_nr), |
954 | .mode = 0444, | 954 | .mode = 0444, |
955 | .proc_handler = &proc_dointvec, | 955 | .proc_handler = &proc_doulongvec_minmax, |
956 | }, | 956 | }, |
957 | { | 957 | { |
958 | .ctl_name = FS_AIO_MAX_NR, | 958 | .ctl_name = FS_AIO_MAX_NR, |
@@ -960,7 +960,7 @@ static ctl_table fs_table[] = { | |||
960 | .data = &aio_max_nr, | 960 | .data = &aio_max_nr, |
961 | .maxlen = sizeof(aio_max_nr), | 961 | .maxlen = sizeof(aio_max_nr), |
962 | .mode = 0644, | 962 | .mode = 0644, |
963 | .proc_handler = &proc_dointvec, | 963 | .proc_handler = &proc_doulongvec_minmax, |
964 | }, | 964 | }, |
965 | #ifdef CONFIG_INOTIFY | 965 | #ifdef CONFIG_INOTIFY |
966 | { | 966 | { |
@@ -992,10 +992,51 @@ static ctl_table dev_table[] = { | |||
992 | 992 | ||
993 | extern void init_irq_proc (void); | 993 | extern void init_irq_proc (void); |
994 | 994 | ||
995 | static DEFINE_SPINLOCK(sysctl_lock); | ||
996 | |||
997 | /* called under sysctl_lock */ | ||
998 | static int use_table(struct ctl_table_header *p) | ||
999 | { | ||
1000 | if (unlikely(p->unregistering)) | ||
1001 | return 0; | ||
1002 | p->used++; | ||
1003 | return 1; | ||
1004 | } | ||
1005 | |||
1006 | /* called under sysctl_lock */ | ||
1007 | static void unuse_table(struct ctl_table_header *p) | ||
1008 | { | ||
1009 | if (!--p->used) | ||
1010 | if (unlikely(p->unregistering)) | ||
1011 | complete(p->unregistering); | ||
1012 | } | ||
1013 | |||
1014 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
1015 | static void start_unregistering(struct ctl_table_header *p) | ||
1016 | { | ||
1017 | /* | ||
1018 | * if p->used is 0, nobody will ever touch that entry again; | ||
1019 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
1020 | */ | ||
1021 | if (unlikely(p->used)) { | ||
1022 | struct completion wait; | ||
1023 | init_completion(&wait); | ||
1024 | p->unregistering = &wait; | ||
1025 | spin_unlock(&sysctl_lock); | ||
1026 | wait_for_completion(&wait); | ||
1027 | spin_lock(&sysctl_lock); | ||
1028 | } | ||
1029 | /* | ||
1030 | * do not remove from the list until nobody holds it; walking the | ||
1031 | * list in do_sysctl() relies on that. | ||
1032 | */ | ||
1033 | list_del_init(&p->ctl_entry); | ||
1034 | } | ||
1035 | |||
995 | void __init sysctl_init(void) | 1036 | void __init sysctl_init(void) |
996 | { | 1037 | { |
997 | #ifdef CONFIG_PROC_FS | 1038 | #ifdef CONFIG_PROC_FS |
998 | register_proc_table(root_table, proc_sys_root); | 1039 | register_proc_table(root_table, proc_sys_root, &root_table_header); |
999 | init_irq_proc(); | 1040 | init_irq_proc(); |
1000 | #endif | 1041 | #endif |
1001 | } | 1042 | } |
@@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1004 | void __user *newval, size_t newlen) | 1045 | void __user *newval, size_t newlen) |
1005 | { | 1046 | { |
1006 | struct list_head *tmp; | 1047 | struct list_head *tmp; |
1048 | int error = -ENOTDIR; | ||
1007 | 1049 | ||
1008 | if (nlen <= 0 || nlen >= CTL_MAXNAME) | 1050 | if (nlen <= 0 || nlen >= CTL_MAXNAME) |
1009 | return -ENOTDIR; | 1051 | return -ENOTDIR; |
@@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
1012 | if (!oldlenp || get_user(old_len, oldlenp)) | 1054 | if (!oldlenp || get_user(old_len, oldlenp)) |
1013 | return -EFAULT; | 1055 | return -EFAULT; |
1014 | } | 1056 | } |
1057 | spin_lock(&sysctl_lock); | ||
1015 | tmp = &root_table_header.ctl_entry; | 1058 | tmp = &root_table_header.ctl_entry; |
1016 | do { | 1059 | do { |
1017 | struct ctl_table_header *head = | 1060 | struct ctl_table_header *head = |
1018 | list_entry(tmp, struct ctl_table_header, ctl_entry); | 1061 | list_entry(tmp, struct ctl_table_header, ctl_entry); |
1019 | void *context = NULL; | 1062 | void *context = NULL; |
1020 | int error = parse_table(name, nlen, oldval, oldlenp, | 1063 | |
1064 | if (!use_table(head)) | ||
1065 | continue; | ||
1066 | |||
1067 | spin_unlock(&sysctl_lock); | ||
1068 | |||
1069 | error = parse_table(name, nlen, oldval, oldlenp, | ||
1021 | newval, newlen, head->ctl_table, | 1070 | newval, newlen, head->ctl_table, |
1022 | &context); | 1071 | &context); |
1023 | kfree(context); | 1072 | kfree(context); |
1073 | |||
1074 | spin_lock(&sysctl_lock); | ||
1075 | unuse_table(head); | ||
1024 | if (error != -ENOTDIR) | 1076 | if (error != -ENOTDIR) |
1025 | return error; | 1077 | break; |
1026 | tmp = tmp->next; | 1078 | } while ((tmp = tmp->next) != &root_table_header.ctl_entry); |
1027 | } while (tmp != &root_table_header.ctl_entry); | 1079 | spin_unlock(&sysctl_lock); |
1028 | return -ENOTDIR; | 1080 | return error; |
1029 | } | 1081 | } |
1030 | 1082 | ||
1031 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) | 1083 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) |
@@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
1236 | return NULL; | 1288 | return NULL; |
1237 | tmp->ctl_table = table; | 1289 | tmp->ctl_table = table; |
1238 | INIT_LIST_HEAD(&tmp->ctl_entry); | 1290 | INIT_LIST_HEAD(&tmp->ctl_entry); |
1291 | tmp->used = 0; | ||
1292 | tmp->unregistering = NULL; | ||
1293 | spin_lock(&sysctl_lock); | ||
1239 | if (insert_at_head) | 1294 | if (insert_at_head) |
1240 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1295 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); |
1241 | else | 1296 | else |
1242 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1297 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); |
1298 | spin_unlock(&sysctl_lock); | ||
1243 | #ifdef CONFIG_PROC_FS | 1299 | #ifdef CONFIG_PROC_FS |
1244 | register_proc_table(table, proc_sys_root); | 1300 | register_proc_table(table, proc_sys_root, tmp); |
1245 | #endif | 1301 | #endif |
1246 | return tmp; | 1302 | return tmp; |
1247 | } | 1303 | } |
@@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
1255 | */ | 1311 | */ |
1256 | void unregister_sysctl_table(struct ctl_table_header * header) | 1312 | void unregister_sysctl_table(struct ctl_table_header * header) |
1257 | { | 1313 | { |
1258 | list_del(&header->ctl_entry); | 1314 | might_sleep(); |
1315 | spin_lock(&sysctl_lock); | ||
1316 | start_unregistering(header); | ||
1259 | #ifdef CONFIG_PROC_FS | 1317 | #ifdef CONFIG_PROC_FS |
1260 | unregister_proc_table(header->ctl_table, proc_sys_root); | 1318 | unregister_proc_table(header->ctl_table, proc_sys_root); |
1261 | #endif | 1319 | #endif |
1320 | spin_unlock(&sysctl_lock); | ||
1262 | kfree(header); | 1321 | kfree(header); |
1263 | } | 1322 | } |
1264 | 1323 | ||
@@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) | |||
1269 | #ifdef CONFIG_PROC_FS | 1328 | #ifdef CONFIG_PROC_FS |
1270 | 1329 | ||
1271 | /* Scan the sysctl entries in table and add them all into /proc */ | 1330 | /* Scan the sysctl entries in table and add them all into /proc */ |
1272 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | 1331 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) |
1273 | { | 1332 | { |
1274 | struct proc_dir_entry *de; | 1333 | struct proc_dir_entry *de; |
1275 | int len; | 1334 | int len; |
@@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | |||
1305 | de = create_proc_entry(table->procname, mode, root); | 1364 | de = create_proc_entry(table->procname, mode, root); |
1306 | if (!de) | 1365 | if (!de) |
1307 | continue; | 1366 | continue; |
1367 | de->set = set; | ||
1308 | de->data = (void *) table; | 1368 | de->data = (void *) table; |
1309 | if (table->proc_handler) | 1369 | if (table->proc_handler) |
1310 | de->proc_fops = &proc_sys_file_operations; | 1370 | de->proc_fops = &proc_sys_file_operations; |
1311 | } | 1371 | } |
1312 | table->de = de; | 1372 | table->de = de; |
1313 | if (de->mode & S_IFDIR) | 1373 | if (de->mode & S_IFDIR) |
1314 | register_proc_table(table->child, de); | 1374 | register_proc_table(table->child, de, set); |
1315 | } | 1375 | } |
1316 | } | 1376 | } |
1317 | 1377 | ||
@@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root | |||
1336 | continue; | 1396 | continue; |
1337 | } | 1397 | } |
1338 | 1398 | ||
1399 | /* | ||
1400 | * In any case, mark the entry as goner; we'll keep it | ||
1401 | * around if it's busy, but we'll know to do nothing with | ||
1402 | * its fields. We are under sysctl_lock here. | ||
1403 | */ | ||
1404 | de->data = NULL; | ||
1405 | |||
1339 | /* Don't unregister proc entries that are still being used.. */ | 1406 | /* Don't unregister proc entries that are still being used.. */ |
1340 | if (atomic_read(&de->count)) | 1407 | if (atomic_read(&de->count)) |
1341 | continue; | 1408 | continue; |
@@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, | |||
1349 | size_t count, loff_t *ppos) | 1416 | size_t count, loff_t *ppos) |
1350 | { | 1417 | { |
1351 | int op; | 1418 | int op; |
1352 | struct proc_dir_entry *de; | 1419 | struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); |
1353 | struct ctl_table *table; | 1420 | struct ctl_table *table; |
1354 | size_t res; | 1421 | size_t res; |
1355 | ssize_t error; | 1422 | ssize_t error = -ENOTDIR; |
1356 | |||
1357 | de = PDE(file->f_dentry->d_inode); | ||
1358 | if (!de || !de->data) | ||
1359 | return -ENOTDIR; | ||
1360 | table = (struct ctl_table *) de->data; | ||
1361 | if (!table || !table->proc_handler) | ||
1362 | return -ENOTDIR; | ||
1363 | op = (write ? 002 : 004); | ||
1364 | if (ctl_perm(table, op)) | ||
1365 | return -EPERM; | ||
1366 | 1423 | ||
1367 | res = count; | 1424 | spin_lock(&sysctl_lock); |
1368 | 1425 | if (de && de->data && use_table(de->set)) { | |
1369 | error = (*table->proc_handler) (table, write, file, buf, &res, ppos); | 1426 | /* |
1370 | if (error) | 1427 | * at that point we know that sysctl was not unregistered |
1371 | return error; | 1428 | * and won't be until we finish |
1372 | return res; | 1429 | */ |
1430 | spin_unlock(&sysctl_lock); | ||
1431 | table = (struct ctl_table *) de->data; | ||
1432 | if (!table || !table->proc_handler) | ||
1433 | goto out; | ||
1434 | error = -EPERM; | ||
1435 | op = (write ? 002 : 004); | ||
1436 | if (ctl_perm(table, op)) | ||
1437 | goto out; | ||
1438 | |||
1439 | /* careful: calling conventions are nasty here */ | ||
1440 | res = count; | ||
1441 | error = (*table->proc_handler)(table, write, file, | ||
1442 | buf, &res, ppos); | ||
1443 | if (!error) | ||
1444 | error = res; | ||
1445 | out: | ||
1446 | spin_lock(&sysctl_lock); | ||
1447 | unuse_table(de->set); | ||
1448 | } | ||
1449 | spin_unlock(&sysctl_lock); | ||
1450 | return error; | ||
1373 | } | 1451 | } |
1374 | 1452 | ||
1375 | static int proc_opensys(struct inode *inode, struct file *file) | 1453 | static int proc_opensys(struct inode *inode, struct file *file) |
@@ -1997,6 +2075,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, | |||
1997 | * @filp: the file structure | 2075 | * @filp: the file structure |
1998 | * @buffer: the user buffer | 2076 | * @buffer: the user buffer |
1999 | * @lenp: the size of the user buffer | 2077 | * @lenp: the size of the user buffer |
2078 | * @ppos: pointer to the file position | ||
2000 | * | 2079 | * |
2001 | * Reads/writes up to table->maxlen/sizeof(unsigned int) integer | 2080 | * Reads/writes up to table->maxlen/sizeof(unsigned int) integer |
2002 | * values from/to the user buffer, treated as an ASCII string. | 2081 | * values from/to the user buffer, treated as an ASCII string. |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231..42df83d7fa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
524 | list_for_each_entry(wq, &workqueues, list) { | 524 | list_for_each_entry(wq, &workqueues, list) { |
525 | /* Unbind so it can run. */ | 525 | /* Unbind so it can run. */ |
526 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, | 526 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, |
527 | smp_processor_id()); | 527 | any_online_cpu(cpu_online_map)); |
528 | cleanup_workqueue_thread(wq, hotcpu); | 528 | cleanup_workqueue_thread(wq, hotcpu); |
529 | } | 529 | } |
530 | break; | 530 | break; |