diff options
| author | Jeff Garzik <jgarzik@pobox.com> | 2005-11-10 04:12:10 -0500 |
|---|---|---|
| committer | Jeff Garzik <jgarzik@pobox.com> | 2005-11-10 04:12:10 -0500 |
| commit | 2f67bdb23d74a6c6fd4f98f64239c5c34d1833cc (patch) | |
| tree | fe533abe3e7c400848647b95e4806f5125c654c3 /kernel | |
| parent | d40d9d29c020f8466c96f8e3ad4b7c014ff1085d (diff) | |
| parent | 3b44f137b9a846c5452d9e6e1271b79b1dbcc942 (diff) | |
Merge branch 'master'
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/acct.c | 92 | ||||
| -rw-r--r-- | kernel/cpu.c | 33 | ||||
| -rw-r--r-- | kernel/exit.c | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 9 | ||||
| -rw-r--r-- | kernel/futex.c | 5 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 1 | ||||
| -rw-r--r-- | kernel/kprobes.c | 134 | ||||
| -rw-r--r-- | kernel/module.c | 1 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 6 | ||||
| -rw-r--r-- | kernel/power/main.c | 2 | ||||
| -rw-r--r-- | kernel/power/power.h | 6 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 100 | ||||
| -rw-r--r-- | kernel/power/swsusp.c | 251 | ||||
| -rw-r--r-- | kernel/printk.c | 1 | ||||
| -rw-r--r-- | kernel/ptrace.c | 84 | ||||
| -rw-r--r-- | kernel/sched.c | 165 | ||||
| -rw-r--r-- | kernel/softirq.c | 3 | ||||
| -rw-r--r-- | kernel/softlockup.c | 6 | ||||
| -rw-r--r-- | kernel/sys.c | 26 | ||||
| -rw-r--r-- | kernel/sysctl.c | 141 | ||||
| -rw-r--r-- | kernel/workqueue.c | 2 |
21 files changed, 675 insertions, 395 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 2e3f4a47e7d0..6312d6bd43e3 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include <linux/jiffies.h> | 54 | #include <linux/jiffies.h> |
| 55 | #include <linux/times.h> | 55 | #include <linux/times.h> |
| 56 | #include <linux/syscalls.h> | 56 | #include <linux/syscalls.h> |
| 57 | #include <linux/mount.h> | ||
| 57 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
| 58 | #include <asm/div64.h> | 59 | #include <asm/div64.h> |
| 59 | #include <linux/blkdev.h> /* sector_div */ | 60 | #include <linux/blkdev.h> /* sector_div */ |
| @@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file) | |||
| 192 | add_timer(&acct_globals.timer); | 193 | add_timer(&acct_globals.timer); |
| 193 | } | 194 | } |
| 194 | if (old_acct) { | 195 | if (old_acct) { |
| 196 | mnt_unpin(old_acct->f_vfsmnt); | ||
| 195 | spin_unlock(&acct_globals.lock); | 197 | spin_unlock(&acct_globals.lock); |
| 196 | do_acct_process(0, old_acct); | 198 | do_acct_process(0, old_acct); |
| 197 | filp_close(old_acct, NULL); | 199 | filp_close(old_acct, NULL); |
| @@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file) | |||
| 199 | } | 201 | } |
| 200 | } | 202 | } |
| 201 | 203 | ||
| 204 | static int acct_on(char *name) | ||
| 205 | { | ||
| 206 | struct file *file; | ||
| 207 | int error; | ||
| 208 | |||
| 209 | /* Difference from BSD - they don't do O_APPEND */ | ||
| 210 | file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | ||
| 211 | if (IS_ERR(file)) | ||
| 212 | return PTR_ERR(file); | ||
| 213 | |||
| 214 | if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { | ||
| 215 | filp_close(file, NULL); | ||
| 216 | return -EACCES; | ||
| 217 | } | ||
| 218 | |||
| 219 | if (!file->f_op->write) { | ||
| 220 | filp_close(file, NULL); | ||
| 221 | return -EIO; | ||
| 222 | } | ||
| 223 | |||
| 224 | error = security_acct(file); | ||
| 225 | if (error) { | ||
| 226 | filp_close(file, NULL); | ||
| 227 | return error; | ||
| 228 | } | ||
| 229 | |||
| 230 | spin_lock(&acct_globals.lock); | ||
| 231 | mnt_pin(file->f_vfsmnt); | ||
| 232 | acct_file_reopen(file); | ||
| 233 | spin_unlock(&acct_globals.lock); | ||
| 234 | |||
| 235 | mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ | ||
| 236 | |||
| 237 | return 0; | ||
| 238 | } | ||
| 239 | |||
| 202 | /** | 240 | /** |
| 203 | * sys_acct - enable/disable process accounting | 241 | * sys_acct - enable/disable process accounting |
| 204 | * @name: file name for accounting records or NULL to shutdown accounting | 242 | * @name: file name for accounting records or NULL to shutdown accounting |
| @@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file) | |||
| 212 | */ | 250 | */ |
| 213 | asmlinkage long sys_acct(const char __user *name) | 251 | asmlinkage long sys_acct(const char __user *name) |
| 214 | { | 252 | { |
| 215 | struct file *file = NULL; | ||
| 216 | char *tmp; | ||
| 217 | int error; | 253 | int error; |
| 218 | 254 | ||
| 219 | if (!capable(CAP_SYS_PACCT)) | 255 | if (!capable(CAP_SYS_PACCT)) |
| 220 | return -EPERM; | 256 | return -EPERM; |
| 221 | 257 | ||
| 222 | if (name) { | 258 | if (name) { |
| 223 | tmp = getname(name); | 259 | char *tmp = getname(name); |
| 224 | if (IS_ERR(tmp)) { | 260 | if (IS_ERR(tmp)) |
| 225 | return (PTR_ERR(tmp)); | 261 | return (PTR_ERR(tmp)); |
| 226 | } | 262 | error = acct_on(tmp); |
| 227 | /* Difference from BSD - they don't do O_APPEND */ | ||
| 228 | file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | ||
| 229 | putname(tmp); | 263 | putname(tmp); |
| 230 | if (IS_ERR(file)) { | 264 | } else { |
| 231 | return (PTR_ERR(file)); | 265 | error = security_acct(NULL); |
| 232 | } | 266 | if (!error) { |
| 233 | if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { | 267 | spin_lock(&acct_globals.lock); |
| 234 | filp_close(file, NULL); | 268 | acct_file_reopen(NULL); |
| 235 | return (-EACCES); | 269 | spin_unlock(&acct_globals.lock); |
| 236 | } | ||
| 237 | |||
| 238 | if (!file->f_op->write) { | ||
| 239 | filp_close(file, NULL); | ||
| 240 | return (-EIO); | ||
| 241 | } | 270 | } |
| 242 | } | 271 | } |
| 272 | return error; | ||
| 273 | } | ||
| 243 | 274 | ||
| 244 | error = security_acct(file); | 275 | /** |
| 245 | if (error) { | 276 | * acct_auto_close - turn off a filesystem's accounting if it is on |
| 246 | if (file) | 277 | * @m: vfsmount being shut down |
| 247 | filp_close(file, NULL); | 278 | * |
| 248 | return error; | 279 | * If the accounting is turned on for a file in the subtree pointed to |
| 249 | } | 280 | * to by m, turn accounting off. Done when m is about to die. |
| 250 | 281 | */ | |
| 282 | void acct_auto_close_mnt(struct vfsmount *m) | ||
| 283 | { | ||
| 251 | spin_lock(&acct_globals.lock); | 284 | spin_lock(&acct_globals.lock); |
| 252 | acct_file_reopen(file); | 285 | if (acct_globals.file && acct_globals.file->f_vfsmnt == m) |
| 286 | acct_file_reopen(NULL); | ||
| 253 | spin_unlock(&acct_globals.lock); | 287 | spin_unlock(&acct_globals.lock); |
| 254 | |||
| 255 | return (0); | ||
| 256 | } | 288 | } |
| 257 | 289 | ||
| 258 | /** | 290 | /** |
| @@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb) | |||
| 266 | { | 298 | { |
| 267 | spin_lock(&acct_globals.lock); | 299 | spin_lock(&acct_globals.lock); |
| 268 | if (acct_globals.file && | 300 | if (acct_globals.file && |
| 269 | acct_globals.file->f_dentry->d_inode->i_sb == sb) { | 301 | acct_globals.file->f_vfsmnt->mnt_sb == sb) { |
| 270 | acct_file_reopen((struct file *)NULL); | 302 | acct_file_reopen(NULL); |
| 271 | } | 303 | } |
| 272 | spin_unlock(&acct_globals.lock); | 304 | spin_unlock(&acct_globals.lock); |
| 273 | } | 305 | } |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 3619e939182e..d61ba88f34e5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -21,6 +21,24 @@ EXPORT_SYMBOL_GPL(cpucontrol); | |||
| 21 | 21 | ||
| 22 | static struct notifier_block *cpu_chain; | 22 | static struct notifier_block *cpu_chain; |
| 23 | 23 | ||
| 24 | /* | ||
| 25 | * Used to check by callers if they need to acquire the cpucontrol | ||
| 26 | * or not to protect a cpu from being removed. Its sometimes required to | ||
| 27 | * call these functions both for normal operations, and in response to | ||
| 28 | * a cpu being added/removed. If the context of the call is in the same | ||
| 29 | * thread context as a CPU hotplug thread, we dont need to take the lock | ||
| 30 | * since its already protected | ||
| 31 | * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj | ||
| 32 | */ | ||
| 33 | |||
| 34 | int current_in_cpu_hotplug(void) | ||
| 35 | { | ||
| 36 | return (current->flags & PF_HOTPLUG_CPU); | ||
| 37 | } | ||
| 38 | |||
| 39 | EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); | ||
| 40 | |||
| 41 | |||
| 24 | /* Need to know about CPUs going up/down? */ | 42 | /* Need to know about CPUs going up/down? */ |
| 25 | int register_cpu_notifier(struct notifier_block *nb) | 43 | int register_cpu_notifier(struct notifier_block *nb) |
| 26 | { | 44 | { |
| @@ -94,6 +112,13 @@ int cpu_down(unsigned int cpu) | |||
| 94 | goto out; | 112 | goto out; |
| 95 | } | 113 | } |
| 96 | 114 | ||
| 115 | /* | ||
| 116 | * Leave a trace in current->flags indicating we are already in | ||
| 117 | * process of performing CPU hotplug. Callers can check if cpucontrol | ||
| 118 | * is already acquired by current thread, and if so not cause | ||
| 119 | * a dead lock by not acquiring the lock | ||
| 120 | */ | ||
| 121 | current->flags |= PF_HOTPLUG_CPU; | ||
| 97 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 122 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, |
| 98 | (void *)(long)cpu); | 123 | (void *)(long)cpu); |
| 99 | if (err == NOTIFY_BAD) { | 124 | if (err == NOTIFY_BAD) { |
| @@ -146,6 +171,7 @@ out_thread: | |||
| 146 | out_allowed: | 171 | out_allowed: |
| 147 | set_cpus_allowed(current, old_allowed); | 172 | set_cpus_allowed(current, old_allowed); |
| 148 | out: | 173 | out: |
| 174 | current->flags &= ~PF_HOTPLUG_CPU; | ||
| 149 | unlock_cpu_hotplug(); | 175 | unlock_cpu_hotplug(); |
| 150 | return err; | 176 | return err; |
| 151 | } | 177 | } |
| @@ -163,6 +189,12 @@ int __devinit cpu_up(unsigned int cpu) | |||
| 163 | ret = -EINVAL; | 189 | ret = -EINVAL; |
| 164 | goto out; | 190 | goto out; |
| 165 | } | 191 | } |
| 192 | |||
| 193 | /* | ||
| 194 | * Leave a trace in current->flags indicating we are already in | ||
| 195 | * process of performing CPU hotplug. | ||
| 196 | */ | ||
| 197 | current->flags |= PF_HOTPLUG_CPU; | ||
| 166 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 198 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); |
| 167 | if (ret == NOTIFY_BAD) { | 199 | if (ret == NOTIFY_BAD) { |
| 168 | printk("%s: attempt to bring up CPU %u failed\n", | 200 | printk("%s: attempt to bring up CPU %u failed\n", |
| @@ -185,6 +217,7 @@ out_notify: | |||
| 185 | if (ret != 0) | 217 | if (ret != 0) |
| 186 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); | 218 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); |
| 187 | out: | 219 | out: |
| 220 | current->flags &= ~PF_HOTPLUG_CPU; | ||
| 188 | up(&cpucontrol); | 221 | up(&cpucontrol); |
| 189 | return ret; | 222 | return ret; |
| 190 | } | 223 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 537394b25e8d..452a1d116178 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/cpuset.h> | 28 | #include <linux/cpuset.h> |
| 29 | #include <linux/syscalls.h> | 29 | #include <linux/syscalls.h> |
| 30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
| 31 | #include <linux/cn_proc.h> | ||
| 31 | 32 | ||
| 32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
| 33 | #include <asm/unistd.h> | 34 | #include <asm/unistd.h> |
| @@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 863 | module_put(tsk->binfmt->module); | 864 | module_put(tsk->binfmt->module); |
| 864 | 865 | ||
| 865 | tsk->exit_code = code; | 866 | tsk->exit_code = code; |
| 867 | proc_exit_connector(tsk); | ||
| 866 | exit_notify(tsk); | 868 | exit_notify(tsk); |
| 867 | #ifdef CONFIG_NUMA | 869 | #ifdef CONFIG_NUMA |
| 868 | mpol_free(tsk->mempolicy); | 870 | mpol_free(tsk->mempolicy); |
diff --git a/kernel/fork.c b/kernel/fork.c index 8a069612eac3..158710d22566 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -42,6 +42,7 @@ | |||
| 42 | #include <linux/profile.h> | 42 | #include <linux/profile.h> |
| 43 | #include <linux/rmap.h> | 43 | #include <linux/rmap.h> |
| 44 | #include <linux/acct.h> | 44 | #include <linux/acct.h> |
| 45 | #include <linux/cn_proc.h> | ||
| 45 | 46 | ||
| 46 | #include <asm/pgtable.h> | 47 | #include <asm/pgtable.h> |
| 47 | #include <asm/pgalloc.h> | 48 | #include <asm/pgalloc.h> |
| @@ -469,13 +470,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
| 469 | if (clone_flags & CLONE_VM) { | 470 | if (clone_flags & CLONE_VM) { |
| 470 | atomic_inc(&oldmm->mm_users); | 471 | atomic_inc(&oldmm->mm_users); |
| 471 | mm = oldmm; | 472 | mm = oldmm; |
| 472 | /* | ||
| 473 | * There are cases where the PTL is held to ensure no | ||
| 474 | * new threads start up in user mode using an mm, which | ||
| 475 | * allows optimizing out ipis; the tlb_gather_mmu code | ||
| 476 | * is an example. | ||
| 477 | */ | ||
| 478 | spin_unlock_wait(&oldmm->page_table_lock); | ||
| 479 | goto good_mm; | 473 | goto good_mm; |
| 480 | } | 474 | } |
| 481 | 475 | ||
| @@ -1143,6 +1137,7 @@ static task_t *copy_process(unsigned long clone_flags, | |||
| 1143 | __get_cpu_var(process_counts)++; | 1137 | __get_cpu_var(process_counts)++; |
| 1144 | } | 1138 | } |
| 1145 | 1139 | ||
| 1140 | proc_fork_connector(p); | ||
| 1146 | if (!current->signal->tty && p->signal->tty) | 1141 | if (!current->signal->tty && p->signal->tty) |
| 1147 | p->signal->tty = NULL; | 1142 | p->signal->tty = NULL; |
| 1148 | 1143 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 3b4d5ad44cc6..aca8d10704f6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -365,6 +365,11 @@ retry: | |||
| 365 | if (bh1 != bh2) | 365 | if (bh1 != bh2) |
| 366 | spin_unlock(&bh2->lock); | 366 | spin_unlock(&bh2->lock); |
| 367 | 367 | ||
| 368 | if (unlikely(op_ret != -EFAULT)) { | ||
| 369 | ret = op_ret; | ||
| 370 | goto out; | ||
| 371 | } | ||
| 372 | |||
| 368 | /* futex_atomic_op_inuser needs to both read and write | 373 | /* futex_atomic_op_inuser needs to both read and write |
| 369 | * *(int __user *)uaddr2, but we can't modify it | 374 | * *(int __user *)uaddr2, but we can't modify it |
| 370 | * non-atomically. Therefore, if get_user below is not | 375 | * non-atomically. Therefore, if get_user below is not |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1cfdb08ddf20..3bd7226d15fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; | |||
| 24 | 24 | ||
| 25 | /** | 25 | /** |
| 26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
| 27 | * @irq: interrupt number to wait for | ||
| 27 | * | 28 | * |
| 28 | * This function waits for any pending IRQ handlers for this interrupt | 29 | * This function waits for any pending IRQ handlers for this interrupt |
| 29 | * to complete before returning. If you use this function while | 30 | * to complete before returning. If you use this function while |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ce4915dd683a..5beda378cc75 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | * <prasanna@in.ibm.com> added function-return probes. | 32 | * <prasanna@in.ibm.com> added function-return probes. |
| 33 | */ | 33 | */ |
| 34 | #include <linux/kprobes.h> | 34 | #include <linux/kprobes.h> |
| 35 | #include <linux/spinlock.h> | ||
| 36 | #include <linux/hash.h> | 35 | #include <linux/hash.h> |
| 37 | #include <linux/init.h> | 36 | #include <linux/init.h> |
| 38 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
| @@ -49,9 +48,9 @@ | |||
| 49 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
| 50 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
| 51 | 50 | ||
| 52 | unsigned int kprobe_cpu = NR_CPUS; | 51 | static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ |
| 53 | static DEFINE_SPINLOCK(kprobe_lock); | 52 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
| 54 | static struct kprobe *curr_kprobe; | 53 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
| 55 | 54 | ||
| 56 | /* | 55 | /* |
| 57 | * kprobe->ainsn.insn points to the copy of the instruction to be | 56 | * kprobe->ainsn.insn points to the copy of the instruction to be |
| @@ -153,50 +152,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) | |||
| 153 | } | 152 | } |
| 154 | } | 153 | } |
| 155 | 154 | ||
| 156 | /* Locks kprobe: irqs must be disabled */ | 155 | /* We have preemption disabled.. so it is safe to use __ versions */ |
| 157 | void __kprobes lock_kprobes(void) | 156 | static inline void set_kprobe_instance(struct kprobe *kp) |
| 158 | { | 157 | { |
| 159 | unsigned long flags = 0; | 158 | __get_cpu_var(kprobe_instance) = kp; |
| 160 | |||
| 161 | /* Avoiding local interrupts to happen right after we take the kprobe_lock | ||
| 162 | * and before we get a chance to update kprobe_cpu, this to prevent | ||
| 163 | * deadlock when we have a kprobe on ISR routine and a kprobe on task | ||
| 164 | * routine | ||
| 165 | */ | ||
| 166 | local_irq_save(flags); | ||
| 167 | |||
| 168 | spin_lock(&kprobe_lock); | ||
| 169 | kprobe_cpu = smp_processor_id(); | ||
| 170 | |||
| 171 | local_irq_restore(flags); | ||
| 172 | } | 159 | } |
| 173 | 160 | ||
| 174 | void __kprobes unlock_kprobes(void) | 161 | static inline void reset_kprobe_instance(void) |
| 175 | { | 162 | { |
| 176 | unsigned long flags = 0; | 163 | __get_cpu_var(kprobe_instance) = NULL; |
| 177 | |||
| 178 | /* Avoiding local interrupts to happen right after we update | ||
| 179 | * kprobe_cpu and before we get a a chance to release kprobe_lock, | ||
| 180 | * this to prevent deadlock when we have a kprobe on ISR routine and | ||
| 181 | * a kprobe on task routine | ||
| 182 | */ | ||
| 183 | local_irq_save(flags); | ||
| 184 | |||
| 185 | kprobe_cpu = NR_CPUS; | ||
| 186 | spin_unlock(&kprobe_lock); | ||
| 187 | |||
| 188 | local_irq_restore(flags); | ||
| 189 | } | 164 | } |
| 190 | 165 | ||
| 191 | /* You have to be holding the kprobe_lock */ | 166 | /* |
| 167 | * This routine is called either: | ||
| 168 | * - under the kprobe_lock spinlock - during kprobe_[un]register() | ||
| 169 | * OR | ||
| 170 | * - with preemption disabled - from arch/xxx/kernel/kprobes.c | ||
| 171 | */ | ||
| 192 | struct kprobe __kprobes *get_kprobe(void *addr) | 172 | struct kprobe __kprobes *get_kprobe(void *addr) |
| 193 | { | 173 | { |
| 194 | struct hlist_head *head; | 174 | struct hlist_head *head; |
| 195 | struct hlist_node *node; | 175 | struct hlist_node *node; |
| 176 | struct kprobe *p; | ||
| 196 | 177 | ||
| 197 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; | 178 | head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; |
| 198 | hlist_for_each(node, head) { | 179 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
| 199 | struct kprobe *p = hlist_entry(node, struct kprobe, hlist); | ||
| 200 | if (p->addr == addr) | 180 | if (p->addr == addr) |
| 201 | return p; | 181 | return p; |
| 202 | } | 182 | } |
| @@ -211,13 +191,13 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
| 211 | { | 191 | { |
| 212 | struct kprobe *kp; | 192 | struct kprobe *kp; |
| 213 | 193 | ||
| 214 | list_for_each_entry(kp, &p->list, list) { | 194 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 215 | if (kp->pre_handler) { | 195 | if (kp->pre_handler) { |
| 216 | curr_kprobe = kp; | 196 | set_kprobe_instance(kp); |
| 217 | if (kp->pre_handler(kp, regs)) | 197 | if (kp->pre_handler(kp, regs)) |
| 218 | return 1; | 198 | return 1; |
| 219 | } | 199 | } |
| 220 | curr_kprobe = NULL; | 200 | reset_kprobe_instance(); |
| 221 | } | 201 | } |
| 222 | return 0; | 202 | return 0; |
| 223 | } | 203 | } |
| @@ -227,11 +207,11 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
| 227 | { | 207 | { |
| 228 | struct kprobe *kp; | 208 | struct kprobe *kp; |
| 229 | 209 | ||
| 230 | list_for_each_entry(kp, &p->list, list) { | 210 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 231 | if (kp->post_handler) { | 211 | if (kp->post_handler) { |
| 232 | curr_kprobe = kp; | 212 | set_kprobe_instance(kp); |
| 233 | kp->post_handler(kp, regs, flags); | 213 | kp->post_handler(kp, regs, flags); |
| 234 | curr_kprobe = NULL; | 214 | reset_kprobe_instance(); |
| 235 | } | 215 | } |
| 236 | } | 216 | } |
| 237 | return; | 217 | return; |
| @@ -240,12 +220,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
| 240 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | 220 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
| 241 | int trapnr) | 221 | int trapnr) |
| 242 | { | 222 | { |
| 223 | struct kprobe *cur = __get_cpu_var(kprobe_instance); | ||
| 224 | |||
| 243 | /* | 225 | /* |
| 244 | * if we faulted "during" the execution of a user specified | 226 | * if we faulted "during" the execution of a user specified |
| 245 | * probe handler, invoke just that probe's fault handler | 227 | * probe handler, invoke just that probe's fault handler |
| 246 | */ | 228 | */ |
| 247 | if (curr_kprobe && curr_kprobe->fault_handler) { | 229 | if (cur && cur->fault_handler) { |
| 248 | if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) | 230 | if (cur->fault_handler(cur, regs, trapnr)) |
| 249 | return 1; | 231 | return 1; |
| 250 | } | 232 | } |
| 251 | return 0; | 233 | return 0; |
| @@ -253,17 +235,18 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | |||
| 253 | 235 | ||
| 254 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | 236 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) |
| 255 | { | 237 | { |
| 256 | struct kprobe *kp = curr_kprobe; | 238 | struct kprobe *cur = __get_cpu_var(kprobe_instance); |
| 257 | if (curr_kprobe && kp->break_handler) { | 239 | int ret = 0; |
| 258 | if (kp->break_handler(kp, regs)) { | 240 | |
| 259 | curr_kprobe = NULL; | 241 | if (cur && cur->break_handler) { |
| 260 | return 1; | 242 | if (cur->break_handler(cur, regs)) |
| 261 | } | 243 | ret = 1; |
| 262 | } | 244 | } |
| 263 | curr_kprobe = NULL; | 245 | reset_kprobe_instance(); |
| 264 | return 0; | 246 | return ret; |
| 265 | } | 247 | } |
| 266 | 248 | ||
| 249 | /* Called with kretprobe_lock held */ | ||
| 267 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) | 250 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) |
| 268 | { | 251 | { |
| 269 | struct hlist_node *node; | 252 | struct hlist_node *node; |
| @@ -273,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) | |||
| 273 | return NULL; | 256 | return NULL; |
| 274 | } | 257 | } |
| 275 | 258 | ||
| 259 | /* Called with kretprobe_lock held */ | ||
| 276 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe | 260 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe |
| 277 | *rp) | 261 | *rp) |
| 278 | { | 262 | { |
| @@ -283,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe | |||
| 283 | return NULL; | 267 | return NULL; |
| 284 | } | 268 | } |
| 285 | 269 | ||
| 270 | /* Called with kretprobe_lock held */ | ||
| 286 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) | 271 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) |
| 287 | { | 272 | { |
| 288 | /* | 273 | /* |
| @@ -301,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) | |||
| 301 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); | 286 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); |
| 302 | } | 287 | } |
| 303 | 288 | ||
| 289 | /* Called with kretprobe_lock held */ | ||
| 304 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) | 290 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) |
| 305 | { | 291 | { |
| 306 | /* remove rp inst off the rprobe_inst_table */ | 292 | /* remove rp inst off the rprobe_inst_table */ |
| @@ -334,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) | |||
| 334 | struct hlist_node *node, *tmp; | 320 | struct hlist_node *node, *tmp; |
| 335 | unsigned long flags = 0; | 321 | unsigned long flags = 0; |
| 336 | 322 | ||
| 337 | spin_lock_irqsave(&kprobe_lock, flags); | 323 | spin_lock_irqsave(&kretprobe_lock, flags); |
| 338 | head = kretprobe_inst_table_head(current); | 324 | head = kretprobe_inst_table_head(current); |
| 339 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 325 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { |
| 340 | if (ri->task == tk) | 326 | if (ri->task == tk) |
| 341 | recycle_rp_inst(ri); | 327 | recycle_rp_inst(ri); |
| 342 | } | 328 | } |
| 343 | spin_unlock_irqrestore(&kprobe_lock, flags); | 329 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
| 344 | } | 330 | } |
| 345 | 331 | ||
| 346 | /* | 332 | /* |
| @@ -351,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
| 351 | struct pt_regs *regs) | 337 | struct pt_regs *regs) |
| 352 | { | 338 | { |
| 353 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | 339 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); |
| 340 | unsigned long flags = 0; | ||
| 354 | 341 | ||
| 355 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | 342 | /*TODO: consider to only swap the RA after the last pre_handler fired */ |
| 343 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
| 356 | arch_prepare_kretprobe(rp, regs); | 344 | arch_prepare_kretprobe(rp, regs); |
| 345 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
| 357 | return 0; | 346 | return 0; |
| 358 | } | 347 | } |
| 359 | 348 | ||
| @@ -384,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
| 384 | struct kprobe *kp; | 373 | struct kprobe *kp; |
| 385 | 374 | ||
| 386 | if (p->break_handler) { | 375 | if (p->break_handler) { |
| 387 | list_for_each_entry(kp, &old_p->list, list) { | 376 | list_for_each_entry_rcu(kp, &old_p->list, list) { |
| 388 | if (kp->break_handler) | 377 | if (kp->break_handler) |
| 389 | return -EEXIST; | 378 | return -EEXIST; |
| 390 | } | 379 | } |
| 391 | list_add_tail(&p->list, &old_p->list); | 380 | list_add_tail_rcu(&p->list, &old_p->list); |
| 392 | } else | 381 | } else |
| 393 | list_add(&p->list, &old_p->list); | 382 | list_add_rcu(&p->list, &old_p->list); |
| 394 | return 0; | 383 | return 0; |
| 395 | } | 384 | } |
| 396 | 385 | ||
| @@ -408,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
| 408 | ap->break_handler = aggr_break_handler; | 397 | ap->break_handler = aggr_break_handler; |
| 409 | 398 | ||
| 410 | INIT_LIST_HEAD(&ap->list); | 399 | INIT_LIST_HEAD(&ap->list); |
| 411 | list_add(&p->list, &ap->list); | 400 | list_add_rcu(&p->list, &ap->list); |
| 412 | 401 | ||
| 413 | INIT_HLIST_NODE(&ap->hlist); | 402 | INIT_HLIST_NODE(&ap->hlist); |
| 414 | hlist_del(&p->hlist); | 403 | hlist_del_rcu(&p->hlist); |
| 415 | hlist_add_head(&ap->hlist, | 404 | hlist_add_head_rcu(&ap->hlist, |
| 416 | &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); | 405 | &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); |
| 417 | } | 406 | } |
| 418 | 407 | ||
| 419 | /* | 408 | /* |
| 420 | * This is the second or subsequent kprobe at the address - handle | 409 | * This is the second or subsequent kprobe at the address - handle |
| 421 | * the intricacies | 410 | * the intricacies |
| 422 | * TODO: Move kcalloc outside the spinlock | 411 | * TODO: Move kcalloc outside the spin_lock |
| 423 | */ | 412 | */ |
| 424 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | 413 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, |
| 425 | struct kprobe *p) | 414 | struct kprobe *p) |
| @@ -445,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
| 445 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) | 434 | static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) |
| 446 | { | 435 | { |
| 447 | arch_disarm_kprobe(p); | 436 | arch_disarm_kprobe(p); |
| 448 | hlist_del(&p->hlist); | 437 | hlist_del_rcu(&p->hlist); |
| 449 | spin_unlock_irqrestore(&kprobe_lock, flags); | 438 | spin_unlock_irqrestore(&kprobe_lock, flags); |
| 450 | arch_remove_kprobe(p); | 439 | arch_remove_kprobe(p); |
| 451 | } | 440 | } |
| @@ -453,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) | |||
| 453 | static inline void cleanup_aggr_kprobe(struct kprobe *old_p, | 442 | static inline void cleanup_aggr_kprobe(struct kprobe *old_p, |
| 454 | struct kprobe *p, unsigned long flags) | 443 | struct kprobe *p, unsigned long flags) |
| 455 | { | 444 | { |
| 456 | list_del(&p->list); | 445 | list_del_rcu(&p->list); |
| 457 | if (list_empty(&old_p->list)) { | 446 | if (list_empty(&old_p->list)) |
| 458 | cleanup_kprobe(old_p, flags); | 447 | cleanup_kprobe(old_p, flags); |
| 459 | kfree(old_p); | 448 | else |
| 460 | } else | ||
| 461 | spin_unlock_irqrestore(&kprobe_lock, flags); | 449 | spin_unlock_irqrestore(&kprobe_lock, flags); |
| 462 | } | 450 | } |
| 463 | 451 | ||
| @@ -480,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 480 | if ((ret = arch_prepare_kprobe(p)) != 0) | 468 | if ((ret = arch_prepare_kprobe(p)) != 0) |
| 481 | goto rm_kprobe; | 469 | goto rm_kprobe; |
| 482 | 470 | ||
| 471 | p->nmissed = 0; | ||
| 483 | spin_lock_irqsave(&kprobe_lock, flags); | 472 | spin_lock_irqsave(&kprobe_lock, flags); |
| 484 | old_p = get_kprobe(p->addr); | 473 | old_p = get_kprobe(p->addr); |
| 485 | p->nmissed = 0; | ||
| 486 | if (old_p) { | 474 | if (old_p) { |
| 487 | ret = register_aggr_kprobe(old_p, p); | 475 | ret = register_aggr_kprobe(old_p, p); |
| 488 | goto out; | 476 | goto out; |
| @@ -490,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 490 | 478 | ||
| 491 | arch_copy_kprobe(p); | 479 | arch_copy_kprobe(p); |
| 492 | INIT_HLIST_NODE(&p->hlist); | 480 | INIT_HLIST_NODE(&p->hlist); |
| 493 | hlist_add_head(&p->hlist, | 481 | hlist_add_head_rcu(&p->hlist, |
| 494 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 482 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
| 495 | 483 | ||
| 496 | arch_arm_kprobe(p); | 484 | arch_arm_kprobe(p); |
| @@ -511,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p) | |||
| 511 | spin_lock_irqsave(&kprobe_lock, flags); | 499 | spin_lock_irqsave(&kprobe_lock, flags); |
| 512 | old_p = get_kprobe(p->addr); | 500 | old_p = get_kprobe(p->addr); |
| 513 | if (old_p) { | 501 | if (old_p) { |
| 502 | /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ | ||
| 514 | if (old_p->pre_handler == aggr_pre_handler) | 503 | if (old_p->pre_handler == aggr_pre_handler) |
| 515 | cleanup_aggr_kprobe(old_p, p, flags); | 504 | cleanup_aggr_kprobe(old_p, p, flags); |
| 516 | else | 505 | else |
| 517 | cleanup_kprobe(p, flags); | 506 | cleanup_kprobe(p, flags); |
| 507 | |||
| 508 | synchronize_sched(); | ||
| 509 | if (old_p->pre_handler == aggr_pre_handler && | ||
| 510 | list_empty(&old_p->list)) | ||
| 511 | kfree(old_p); | ||
| 518 | } else | 512 | } else |
| 519 | spin_unlock_irqrestore(&kprobe_lock, flags); | 513 | spin_unlock_irqrestore(&kprobe_lock, flags); |
| 520 | } | 514 | } |
| @@ -591,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) | |||
| 591 | 585 | ||
| 592 | unregister_kprobe(&rp->kp); | 586 | unregister_kprobe(&rp->kp); |
| 593 | /* No race here */ | 587 | /* No race here */ |
| 594 | spin_lock_irqsave(&kprobe_lock, flags); | 588 | spin_lock_irqsave(&kretprobe_lock, flags); |
| 595 | free_rp_inst(rp); | 589 | free_rp_inst(rp); |
| 596 | while ((ri = get_used_rp_inst(rp)) != NULL) { | 590 | while ((ri = get_used_rp_inst(rp)) != NULL) { |
| 597 | ri->rp = NULL; | 591 | ri->rp = NULL; |
| 598 | hlist_del(&ri->uflist); | 592 | hlist_del(&ri->uflist); |
| 599 | } | 593 | } |
| 600 | spin_unlock_irqrestore(&kprobe_lock, flags); | 594 | spin_unlock_irqrestore(&kretprobe_lock, flags); |
| 601 | } | 595 | } |
| 602 | 596 | ||
| 603 | static int __init init_kprobes(void) | 597 | static int __init init_kprobes(void) |
diff --git a/kernel/module.c b/kernel/module.c index ff5c500ab625..2ea929d51ad0 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/stop_machine.h> | 37 | #include <linux/stop_machine.h> |
| 38 | #include <linux/device.h> | 38 | #include <linux/device.h> |
| 39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
| 40 | #include <linux/sched.h> | ||
| 40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| 41 | #include <asm/semaphore.h> | 42 | #include <asm/semaphore.h> |
| 42 | #include <asm/cacheflush.h> | 43 | #include <asm/cacheflush.h> |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 91a894264941..84af54c39e1b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
| 497 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), | 497 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), |
| 498 | nthreads); | 498 | nthreads); |
| 499 | do { | 499 | do { |
| 500 | if (!unlikely(t->flags & PF_EXITING)) { | 500 | if (likely(!(t->flags & PF_EXITING))) { |
| 501 | ticks = cputime_add(prof_ticks(t), left); | 501 | ticks = cputime_add(prof_ticks(t), left); |
| 502 | if (cputime_eq(t->it_prof_expires, | 502 | if (cputime_eq(t->it_prof_expires, |
| 503 | cputime_zero) || | 503 | cputime_zero) || |
| @@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
| 512 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), | 512 | left = cputime_div(cputime_sub(expires.cpu, val.cpu), |
| 513 | nthreads); | 513 | nthreads); |
| 514 | do { | 514 | do { |
| 515 | if (!unlikely(t->flags & PF_EXITING)) { | 515 | if (likely(!(t->flags & PF_EXITING))) { |
| 516 | ticks = cputime_add(virt_ticks(t), left); | 516 | ticks = cputime_add(virt_ticks(t), left); |
| 517 | if (cputime_eq(t->it_virt_expires, | 517 | if (cputime_eq(t->it_virt_expires, |
| 518 | cputime_zero) || | 518 | cputime_zero) || |
| @@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
| 527 | nsleft = expires.sched - val.sched; | 527 | nsleft = expires.sched - val.sched; |
| 528 | do_div(nsleft, nthreads); | 528 | do_div(nsleft, nthreads); |
| 529 | do { | 529 | do { |
| 530 | if (!unlikely(t->flags & PF_EXITING)) { | 530 | if (likely(!(t->flags & PF_EXITING))) { |
| 531 | ns = t->sched_time + nsleft; | 531 | ns = t->sched_time + nsleft; |
| 532 | if (t->it_sched_expires == 0 || | 532 | if (t->it_sched_expires == 0 || |
| 533 | t->it_sched_expires > ns) { | 533 | t->it_sched_expires > ns) { |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 18d7d693fbba..6ee2cad530e8 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -167,7 +167,7 @@ static int enter_state(suspend_state_t state) | |||
| 167 | { | 167 | { |
| 168 | int error; | 168 | int error; |
| 169 | 169 | ||
| 170 | if (pm_ops->valid && !pm_ops->valid(state)) | 170 | if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) |
| 171 | return -ENODEV; | 171 | return -ENODEV; |
| 172 | if (down_trylock(&pm_sem)) | 172 | if (down_trylock(&pm_sem)) |
| 173 | return -EBUSY; | 173 | return -EBUSY; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index d4fd96a135ab..6c042b5ee14b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
| @@ -65,8 +65,8 @@ extern suspend_pagedir_t *pagedir_save; | |||
| 65 | extern asmlinkage int swsusp_arch_suspend(void); | 65 | extern asmlinkage int swsusp_arch_suspend(void); |
| 66 | extern asmlinkage int swsusp_arch_resume(void); | 66 | extern asmlinkage int swsusp_arch_resume(void); |
| 67 | 67 | ||
| 68 | extern int restore_highmem(void); | 68 | extern void free_pagedir(struct pbe *pblist); |
| 69 | extern struct pbe * alloc_pagedir(unsigned nr_pages); | 69 | extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); |
| 70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); | 70 | extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); |
| 71 | extern void swsusp_free(void); | 71 | extern void swsusp_free(void); |
| 72 | extern int enough_swap(unsigned nr_pages); | 72 | extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 42a628704398..4a6dbcefd378 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -88,8 +88,7 @@ static int save_highmem_zone(struct zone *zone) | |||
| 88 | return 0; | 88 | return 0; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | 91 | int save_highmem(void) | |
| 92 | static int save_highmem(void) | ||
| 93 | { | 92 | { |
| 94 | struct zone *zone; | 93 | struct zone *zone; |
| 95 | int res = 0; | 94 | int res = 0; |
| @@ -120,11 +119,7 @@ int restore_highmem(void) | |||
| 120 | } | 119 | } |
| 121 | return 0; | 120 | return 0; |
| 122 | } | 121 | } |
| 123 | #else | 122 | #endif |
| 124 | static int save_highmem(void) { return 0; } | ||
| 125 | int restore_highmem(void) { return 0; } | ||
| 126 | #endif /* CONFIG_HIGHMEM */ | ||
| 127 | |||
| 128 | 123 | ||
| 129 | static int pfn_is_nosave(unsigned long pfn) | 124 | static int pfn_is_nosave(unsigned long pfn) |
| 130 | { | 125 | { |
| @@ -168,9 +163,8 @@ static unsigned count_data_pages(void) | |||
| 168 | { | 163 | { |
| 169 | struct zone *zone; | 164 | struct zone *zone; |
| 170 | unsigned long zone_pfn; | 165 | unsigned long zone_pfn; |
| 171 | unsigned n; | 166 | unsigned int n = 0; |
| 172 | 167 | ||
| 173 | n = 0; | ||
| 174 | for_each_zone (zone) { | 168 | for_each_zone (zone) { |
| 175 | if (is_highmem(zone)) | 169 | if (is_highmem(zone)) |
| 176 | continue; | 170 | continue; |
| @@ -217,7 +211,7 @@ static void copy_data_pages(struct pbe *pblist) | |||
| 217 | * free_pagedir - free pages allocated with alloc_pagedir() | 211 | * free_pagedir - free pages allocated with alloc_pagedir() |
| 218 | */ | 212 | */ |
| 219 | 213 | ||
| 220 | static void free_pagedir(struct pbe *pblist) | 214 | void free_pagedir(struct pbe *pblist) |
| 221 | { | 215 | { |
| 222 | struct pbe *pbe; | 216 | struct pbe *pbe; |
| 223 | 217 | ||
| @@ -250,10 +244,10 @@ static inline void fill_pb_page(struct pbe *pbpage) | |||
| 250 | * of memory pages allocated with alloc_pagedir() | 244 | * of memory pages allocated with alloc_pagedir() |
| 251 | */ | 245 | */ |
| 252 | 246 | ||
| 253 | void create_pbe_list(struct pbe *pblist, unsigned nr_pages) | 247 | void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) |
| 254 | { | 248 | { |
| 255 | struct pbe *pbpage, *p; | 249 | struct pbe *pbpage, *p; |
| 256 | unsigned num = PBES_PER_PAGE; | 250 | unsigned int num = PBES_PER_PAGE; |
| 257 | 251 | ||
| 258 | for_each_pb_page (pbpage, pblist) { | 252 | for_each_pb_page (pbpage, pblist) { |
| 259 | if (num >= nr_pages) | 253 | if (num >= nr_pages) |
| @@ -270,9 +264,30 @@ void create_pbe_list(struct pbe *pblist, unsigned nr_pages) | |||
| 270 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); | 264 | pr_debug("create_pbe_list(): initialized %d PBEs\n", num); |
| 271 | } | 265 | } |
| 272 | 266 | ||
| 273 | static void *alloc_image_page(void) | 267 | /** |
| 268 | * @safe_needed - on resume, for storing the PBE list and the image, | ||
| 269 | * we can only use memory pages that do not conflict with the pages | ||
| 270 | * which had been used before suspend. | ||
| 271 | * | ||
| 272 | * The unsafe pages are marked with the PG_nosave_free flag | ||
| 273 | * | ||
| 274 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
| 275 | * so that swsusp_free() can release them | ||
| 276 | */ | ||
| 277 | |||
| 278 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | ||
| 274 | { | 279 | { |
| 275 | void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); | 280 | void *res; |
| 281 | |||
| 282 | if (safe_needed) | ||
| 283 | do { | ||
| 284 | res = (void *)get_zeroed_page(gfp_mask); | ||
| 285 | if (res && PageNosaveFree(virt_to_page(res))) | ||
| 286 | /* This is for swsusp_free() */ | ||
| 287 | SetPageNosave(virt_to_page(res)); | ||
| 288 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
| 289 | else | ||
| 290 | res = (void *)get_zeroed_page(gfp_mask); | ||
| 276 | if (res) { | 291 | if (res) { |
| 277 | SetPageNosave(virt_to_page(res)); | 292 | SetPageNosave(virt_to_page(res)); |
| 278 | SetPageNosaveFree(virt_to_page(res)); | 293 | SetPageNosaveFree(virt_to_page(res)); |
| @@ -280,6 +295,11 @@ static void *alloc_image_page(void) | |||
| 280 | return res; | 295 | return res; |
| 281 | } | 296 | } |
| 282 | 297 | ||
| 298 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
| 299 | { | ||
| 300 | return (unsigned long)alloc_image_page(gfp_mask, 1); | ||
| 301 | } | ||
| 302 | |||
| 283 | /** | 303 | /** |
| 284 | * alloc_pagedir - Allocate the page directory. | 304 | * alloc_pagedir - Allocate the page directory. |
| 285 | * | 305 | * |
| @@ -293,21 +313,21 @@ static void *alloc_image_page(void) | |||
| 293 | * On each page we set up a list of struct_pbe elements. | 313 | * On each page we set up a list of struct_pbe elements. |
| 294 | */ | 314 | */ |
| 295 | 315 | ||
| 296 | struct pbe *alloc_pagedir(unsigned nr_pages) | 316 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) |
| 297 | { | 317 | { |
| 298 | unsigned num; | 318 | unsigned int num; |
| 299 | struct pbe *pblist, *pbe; | 319 | struct pbe *pblist, *pbe; |
| 300 | 320 | ||
| 301 | if (!nr_pages) | 321 | if (!nr_pages) |
| 302 | return NULL; | 322 | return NULL; |
| 303 | 323 | ||
| 304 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); | 324 | pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); |
| 305 | pblist = alloc_image_page(); | 325 | pblist = alloc_image_page(gfp_mask, safe_needed); |
| 306 | /* FIXME: rewrite this ugly loop */ | 326 | /* FIXME: rewrite this ugly loop */ |
| 307 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; | 327 | for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; |
| 308 | pbe = pbe->next, num += PBES_PER_PAGE) { | 328 | pbe = pbe->next, num += PBES_PER_PAGE) { |
| 309 | pbe += PB_PAGE_SKIP; | 329 | pbe += PB_PAGE_SKIP; |
| 310 | pbe->next = alloc_image_page(); | 330 | pbe->next = alloc_image_page(gfp_mask, safe_needed); |
| 311 | } | 331 | } |
| 312 | if (!pbe) { /* get_zeroed_page() failed */ | 332 | if (!pbe) { /* get_zeroed_page() failed */ |
| 313 | free_pagedir(pblist); | 333 | free_pagedir(pblist); |
| @@ -329,7 +349,7 @@ void swsusp_free(void) | |||
| 329 | for_each_zone(zone) { | 349 | for_each_zone(zone) { |
| 330 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 350 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
| 331 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { | 351 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { |
| 332 | struct page * page; | 352 | struct page *page; |
| 333 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); | 353 | page = pfn_to_page(zone_pfn + zone->zone_start_pfn); |
| 334 | if (PageNosave(page) && PageNosaveFree(page)) { | 354 | if (PageNosave(page) && PageNosaveFree(page)) { |
| 335 | ClearPageNosave(page); | 355 | ClearPageNosave(page); |
| @@ -348,31 +368,39 @@ void swsusp_free(void) | |||
| 348 | * free pages. | 368 | * free pages. |
| 349 | */ | 369 | */ |
| 350 | 370 | ||
| 351 | static int enough_free_mem(unsigned nr_pages) | 371 | static int enough_free_mem(unsigned int nr_pages) |
| 352 | { | 372 | { |
| 353 | pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); | 373 | pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); |
| 354 | return nr_free_pages() > (nr_pages + PAGES_FOR_IO + | 374 | return nr_free_pages() > (nr_pages + PAGES_FOR_IO + |
| 355 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | 375 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); |
| 356 | } | 376 | } |
| 357 | 377 | ||
| 378 | int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) | ||
| 379 | { | ||
| 380 | struct pbe *p; | ||
| 358 | 381 | ||
| 359 | static struct pbe *swsusp_alloc(unsigned nr_pages) | 382 | for_each_pbe (p, pblist) { |
| 383 | p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); | ||
| 384 | if (!p->address) | ||
| 385 | return -ENOMEM; | ||
| 386 | } | ||
| 387 | return 0; | ||
| 388 | } | ||
| 389 | |||
| 390 | static struct pbe *swsusp_alloc(unsigned int nr_pages) | ||
| 360 | { | 391 | { |
| 361 | struct pbe *pblist, *p; | 392 | struct pbe *pblist; |
| 362 | 393 | ||
| 363 | if (!(pblist = alloc_pagedir(nr_pages))) { | 394 | if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { |
| 364 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); | 395 | printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); |
| 365 | return NULL; | 396 | return NULL; |
| 366 | } | 397 | } |
| 367 | create_pbe_list(pblist, nr_pages); | 398 | create_pbe_list(pblist, nr_pages); |
| 368 | 399 | ||
| 369 | for_each_pbe (p, pblist) { | 400 | if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { |
| 370 | p->address = (unsigned long)alloc_image_page(); | 401 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); |
| 371 | if (!p->address) { | 402 | swsusp_free(); |
| 372 | printk(KERN_ERR "suspend: Allocating image pages failed.\n"); | 403 | return NULL; |
| 373 | swsusp_free(); | ||
| 374 | return NULL; | ||
| 375 | } | ||
| 376 | } | 404 | } |
| 377 | 405 | ||
| 378 | return pblist; | 406 | return pblist; |
| @@ -380,14 +408,9 @@ static struct pbe *swsusp_alloc(unsigned nr_pages) | |||
| 380 | 408 | ||
| 381 | asmlinkage int swsusp_save(void) | 409 | asmlinkage int swsusp_save(void) |
| 382 | { | 410 | { |
| 383 | unsigned nr_pages; | 411 | unsigned int nr_pages; |
| 384 | 412 | ||
| 385 | pr_debug("swsusp: critical section: \n"); | 413 | pr_debug("swsusp: critical section: \n"); |
| 386 | if (save_highmem()) { | ||
| 387 | printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n"); | ||
| 388 | restore_highmem(); | ||
| 389 | return -ENOMEM; | ||
| 390 | } | ||
| 391 | 414 | ||
| 392 | drain_local_pages(); | 415 | drain_local_pages(); |
| 393 | nr_pages = count_data_pages(); | 416 | nr_pages = count_data_pages(); |
| @@ -407,11 +430,6 @@ asmlinkage int swsusp_save(void) | |||
| 407 | return -ENOMEM; | 430 | return -ENOMEM; |
| 408 | } | 431 | } |
| 409 | 432 | ||
| 410 | if (!enough_swap(nr_pages)) { | ||
| 411 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
| 412 | return -ENOSPC; | ||
| 413 | } | ||
| 414 | |||
| 415 | pagedir_nosave = swsusp_alloc(nr_pages); | 433 | pagedir_nosave = swsusp_alloc(nr_pages); |
| 416 | if (!pagedir_nosave) | 434 | if (!pagedir_nosave) |
| 417 | return -ENOMEM; | 435 | return -ENOMEM; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 12db1d2ad61f..c05f46e7348f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
| @@ -73,6 +73,14 @@ | |||
| 73 | 73 | ||
| 74 | #include "power.h" | 74 | #include "power.h" |
| 75 | 75 | ||
| 76 | #ifdef CONFIG_HIGHMEM | ||
| 77 | int save_highmem(void); | ||
| 78 | int restore_highmem(void); | ||
| 79 | #else | ||
| 80 | static int save_highmem(void) { return 0; } | ||
| 81 | static int restore_highmem(void) { return 0; } | ||
| 82 | #endif | ||
| 83 | |||
| 76 | #define CIPHER "aes" | 84 | #define CIPHER "aes" |
| 77 | #define MAXKEY 32 | 85 | #define MAXKEY 32 |
| 78 | #define MAXIV 32 | 86 | #define MAXIV 32 |
| @@ -85,18 +93,11 @@ unsigned int nr_copy_pages __nosavedata = 0; | |||
| 85 | /* Suspend pagedir is allocated before final copy, therefore it | 93 | /* Suspend pagedir is allocated before final copy, therefore it |
| 86 | must be freed after resume | 94 | must be freed after resume |
| 87 | 95 | ||
| 88 | Warning: this is evil. There are actually two pagedirs at time of | ||
| 89 | resume. One is "pagedir_save", which is empty frame allocated at | ||
| 90 | time of suspend, that must be freed. Second is "pagedir_nosave", | ||
| 91 | allocated at time of resume, that travels through memory not to | ||
| 92 | collide with anything. | ||
| 93 | |||
| 94 | Warning: this is even more evil than it seems. Pagedirs this file | 96 | Warning: this is even more evil than it seems. Pagedirs this file |
| 95 | talks about are completely different from page directories used by | 97 | talks about are completely different from page directories used by |
| 96 | MMU hardware. | 98 | MMU hardware. |
| 97 | */ | 99 | */ |
| 98 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; | 100 | suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; |
| 99 | suspend_pagedir_t *pagedir_save; | ||
| 100 | 101 | ||
| 101 | #define SWSUSP_SIG "S1SUSPEND" | 102 | #define SWSUSP_SIG "S1SUSPEND" |
| 102 | 103 | ||
| @@ -122,8 +123,8 @@ static struct swsusp_info swsusp_info; | |||
| 122 | static unsigned short swapfile_used[MAX_SWAPFILES]; | 123 | static unsigned short swapfile_used[MAX_SWAPFILES]; |
| 123 | static unsigned short root_swap; | 124 | static unsigned short root_swap; |
| 124 | 125 | ||
| 125 | static int write_page(unsigned long addr, swp_entry_t * loc); | 126 | static int write_page(unsigned long addr, swp_entry_t *loc); |
| 126 | static int bio_read_page(pgoff_t page_off, void * page); | 127 | static int bio_read_page(pgoff_t page_off, void *page); |
| 127 | 128 | ||
| 128 | static u8 key_iv[MAXKEY+MAXIV]; | 129 | static u8 key_iv[MAXKEY+MAXIV]; |
| 129 | 130 | ||
| @@ -355,7 +356,7 @@ static void lock_swapdevices(void) | |||
| 355 | * This is a partial improvement, since we will at least return other | 356 | * This is a partial improvement, since we will at least return other |
| 356 | * errors, though we need to eventually fix the damn code. | 357 | * errors, though we need to eventually fix the damn code. |
| 357 | */ | 358 | */ |
| 358 | static int write_page(unsigned long addr, swp_entry_t * loc) | 359 | static int write_page(unsigned long addr, swp_entry_t *loc) |
| 359 | { | 360 | { |
| 360 | swp_entry_t entry; | 361 | swp_entry_t entry; |
| 361 | int error = 0; | 362 | int error = 0; |
| @@ -383,9 +384,9 @@ static int write_page(unsigned long addr, swp_entry_t * loc) | |||
| 383 | static void data_free(void) | 384 | static void data_free(void) |
| 384 | { | 385 | { |
| 385 | swp_entry_t entry; | 386 | swp_entry_t entry; |
| 386 | struct pbe * p; | 387 | struct pbe *p; |
| 387 | 388 | ||
| 388 | for_each_pbe(p, pagedir_nosave) { | 389 | for_each_pbe (p, pagedir_nosave) { |
| 389 | entry = p->swap_address; | 390 | entry = p->swap_address; |
| 390 | if (entry.val) | 391 | if (entry.val) |
| 391 | swap_free(entry); | 392 | swap_free(entry); |
| @@ -492,8 +493,8 @@ static void free_pagedir_entries(void) | |||
| 492 | static int write_pagedir(void) | 493 | static int write_pagedir(void) |
| 493 | { | 494 | { |
| 494 | int error = 0; | 495 | int error = 0; |
| 495 | unsigned n = 0; | 496 | unsigned int n = 0; |
| 496 | struct pbe * pbe; | 497 | struct pbe *pbe; |
| 497 | 498 | ||
| 498 | printk( "Writing pagedir..."); | 499 | printk( "Writing pagedir..."); |
| 499 | for_each_pb_page (pbe, pagedir_nosave) { | 500 | for_each_pb_page (pbe, pagedir_nosave) { |
| @@ -507,6 +508,26 @@ static int write_pagedir(void) | |||
| 507 | } | 508 | } |
| 508 | 509 | ||
| 509 | /** | 510 | /** |
| 511 | * enough_swap - Make sure we have enough swap to save the image. | ||
| 512 | * | ||
| 513 | * Returns TRUE or FALSE after checking the total amount of swap | ||
| 514 | * space avaiable. | ||
| 515 | * | ||
| 516 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
| 517 | * We should only consider resume_device. | ||
| 518 | */ | ||
| 519 | |||
| 520 | static int enough_swap(unsigned int nr_pages) | ||
| 521 | { | ||
| 522 | struct sysinfo i; | ||
| 523 | |||
| 524 | si_swapinfo(&i); | ||
| 525 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
| 526 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
| 527 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
| 528 | } | ||
| 529 | |||
| 530 | /** | ||
| 510 | * write_suspend_image - Write entire image and metadata. | 531 | * write_suspend_image - Write entire image and metadata. |
| 511 | * | 532 | * |
| 512 | */ | 533 | */ |
| @@ -514,6 +535,11 @@ static int write_suspend_image(void) | |||
| 514 | { | 535 | { |
| 515 | int error; | 536 | int error; |
| 516 | 537 | ||
| 538 | if (!enough_swap(nr_copy_pages)) { | ||
| 539 | printk(KERN_ERR "swsusp: Not enough free swap\n"); | ||
| 540 | return -ENOSPC; | ||
| 541 | } | ||
| 542 | |||
| 517 | init_header(); | 543 | init_header(); |
| 518 | if ((error = data_write())) | 544 | if ((error = data_write())) |
| 519 | goto FreeData; | 545 | goto FreeData; |
| @@ -533,27 +559,6 @@ static int write_suspend_image(void) | |||
| 533 | goto Done; | 559 | goto Done; |
| 534 | } | 560 | } |
| 535 | 561 | ||
| 536 | /** | ||
| 537 | * enough_swap - Make sure we have enough swap to save the image. | ||
| 538 | * | ||
| 539 | * Returns TRUE or FALSE after checking the total amount of swap | ||
| 540 | * space avaiable. | ||
| 541 | * | ||
| 542 | * FIXME: si_swapinfo(&i) returns all swap devices information. | ||
| 543 | * We should only consider resume_device. | ||
| 544 | */ | ||
| 545 | |||
| 546 | int enough_swap(unsigned nr_pages) | ||
| 547 | { | ||
| 548 | struct sysinfo i; | ||
| 549 | |||
| 550 | si_swapinfo(&i); | ||
| 551 | pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); | ||
| 552 | return i.freeswap > (nr_pages + PAGES_FOR_IO + | ||
| 553 | (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); | ||
| 554 | } | ||
| 555 | |||
| 556 | |||
| 557 | /* It is important _NOT_ to umount filesystems at this point. We want | 562 | /* It is important _NOT_ to umount filesystems at this point. We want |
| 558 | * them synced (in case something goes wrong) but we DO not want to mark | 563 | * them synced (in case something goes wrong) but we DO not want to mark |
| 559 | * filesystem clean: it is not. (And it does not matter, if we resume | 564 | * filesystem clean: it is not. (And it does not matter, if we resume |
| @@ -563,12 +568,15 @@ int swsusp_write(void) | |||
| 563 | { | 568 | { |
| 564 | int error; | 569 | int error; |
| 565 | 570 | ||
| 571 | if ((error = swsusp_swap_check())) { | ||
| 572 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | ||
| 573 | return error; | ||
| 574 | } | ||
| 566 | lock_swapdevices(); | 575 | lock_swapdevices(); |
| 567 | error = write_suspend_image(); | 576 | error = write_suspend_image(); |
| 568 | /* This will unlock ignored swap devices since writing is finished */ | 577 | /* This will unlock ignored swap devices since writing is finished */ |
| 569 | lock_swapdevices(); | 578 | lock_swapdevices(); |
| 570 | return error; | 579 | return error; |
| 571 | |||
| 572 | } | 580 | } |
| 573 | 581 | ||
| 574 | 582 | ||
| @@ -576,6 +584,7 @@ int swsusp_write(void) | |||
| 576 | int swsusp_suspend(void) | 584 | int swsusp_suspend(void) |
| 577 | { | 585 | { |
| 578 | int error; | 586 | int error; |
| 587 | |||
| 579 | if ((error = arch_prepare_suspend())) | 588 | if ((error = arch_prepare_suspend())) |
| 580 | return error; | 589 | return error; |
| 581 | local_irq_disable(); | 590 | local_irq_disable(); |
| @@ -587,15 +596,12 @@ int swsusp_suspend(void) | |||
| 587 | */ | 596 | */ |
| 588 | if ((error = device_power_down(PMSG_FREEZE))) { | 597 | if ((error = device_power_down(PMSG_FREEZE))) { |
| 589 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); | 598 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); |
| 590 | local_irq_enable(); | 599 | goto Enable_irqs; |
| 591 | return error; | ||
| 592 | } | 600 | } |
| 593 | 601 | ||
| 594 | if ((error = swsusp_swap_check())) { | 602 | if ((error = save_highmem())) { |
| 595 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); | 603 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); |
| 596 | device_power_up(); | 604 | goto Restore_highmem; |
| 597 | local_irq_enable(); | ||
| 598 | return error; | ||
| 599 | } | 605 | } |
| 600 | 606 | ||
| 601 | save_processor_state(); | 607 | save_processor_state(); |
| @@ -603,8 +609,10 @@ int swsusp_suspend(void) | |||
| 603 | printk(KERN_ERR "Error %d suspending\n", error); | 609 | printk(KERN_ERR "Error %d suspending\n", error); |
| 604 | /* Restore control flow magically appears here */ | 610 | /* Restore control flow magically appears here */ |
| 605 | restore_processor_state(); | 611 | restore_processor_state(); |
| 612 | Restore_highmem: | ||
| 606 | restore_highmem(); | 613 | restore_highmem(); |
| 607 | device_power_up(); | 614 | device_power_up(); |
| 615 | Enable_irqs: | ||
| 608 | local_irq_enable(); | 616 | local_irq_enable(); |
| 609 | return error; | 617 | return error; |
| 610 | } | 618 | } |
| @@ -636,127 +644,43 @@ int swsusp_resume(void) | |||
| 636 | } | 644 | } |
| 637 | 645 | ||
| 638 | /** | 646 | /** |
| 639 | * On resume, for storing the PBE list and the image, | 647 | * mark_unsafe_pages - mark the pages that cannot be used for storing |
| 640 | * we can only use memory pages that do not conflict with the pages | 648 | * the image during resume, because they conflict with the pages that |
| 641 | * which had been used before suspend. | 649 | * had been used before suspend |
| 642 | * | ||
| 643 | * We don't know which pages are usable until we allocate them. | ||
| 644 | * | ||
| 645 | * Allocated but unusable (ie eaten) memory pages are marked so that | ||
| 646 | * swsusp_free() can release them | ||
| 647 | */ | ||
| 648 | |||
| 649 | unsigned long get_safe_page(gfp_t gfp_mask) | ||
| 650 | { | ||
| 651 | unsigned long m; | ||
| 652 | |||
| 653 | do { | ||
| 654 | m = get_zeroed_page(gfp_mask); | ||
| 655 | if (m && PageNosaveFree(virt_to_page(m))) | ||
| 656 | /* This is for swsusp_free() */ | ||
| 657 | SetPageNosave(virt_to_page(m)); | ||
| 658 | } while (m && PageNosaveFree(virt_to_page(m))); | ||
| 659 | if (m) { | ||
| 660 | /* This is for swsusp_free() */ | ||
| 661 | SetPageNosave(virt_to_page(m)); | ||
| 662 | SetPageNosaveFree(virt_to_page(m)); | ||
| 663 | } | ||
| 664 | return m; | ||
| 665 | } | ||
| 666 | |||
| 667 | /** | ||
| 668 | * check_pagedir - We ensure here that pages that the PBEs point to | ||
| 669 | * won't collide with pages where we're going to restore from the loaded | ||
| 670 | * pages later | ||
| 671 | */ | ||
| 672 | |||
| 673 | static int check_pagedir(struct pbe *pblist) | ||
| 674 | { | ||
| 675 | struct pbe *p; | ||
| 676 | |||
| 677 | /* This is necessary, so that we can free allocated pages | ||
| 678 | * in case of failure | ||
| 679 | */ | ||
| 680 | for_each_pbe (p, pblist) | ||
| 681 | p->address = 0UL; | ||
| 682 | |||
| 683 | for_each_pbe (p, pblist) { | ||
| 684 | p->address = get_safe_page(GFP_ATOMIC); | ||
| 685 | if (!p->address) | ||
| 686 | return -ENOMEM; | ||
| 687 | } | ||
| 688 | return 0; | ||
| 689 | } | ||
| 690 | |||
| 691 | /** | ||
| 692 | * swsusp_pagedir_relocate - It is possible, that some memory pages | ||
| 693 | * occupied by the list of PBEs collide with pages where we're going to | ||
| 694 | * restore from the loaded pages later. We relocate them here. | ||
| 695 | */ | 650 | */ |
| 696 | 651 | ||
| 697 | static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) | 652 | static void mark_unsafe_pages(struct pbe *pblist) |
| 698 | { | 653 | { |
| 699 | struct zone *zone; | 654 | struct zone *zone; |
| 700 | unsigned long zone_pfn; | 655 | unsigned long zone_pfn; |
| 701 | struct pbe *pbpage, *tail, *p; | 656 | struct pbe *p; |
| 702 | void *m; | ||
| 703 | int rel = 0; | ||
| 704 | 657 | ||
| 705 | if (!pblist) /* a sanity check */ | 658 | if (!pblist) /* a sanity check */ |
| 706 | return NULL; | 659 | return; |
| 707 | |||
| 708 | pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", | ||
| 709 | swsusp_info.pagedir_pages); | ||
| 710 | 660 | ||
| 711 | /* Clear page flags */ | 661 | /* Clear page flags */ |
| 712 | |||
| 713 | for_each_zone (zone) { | 662 | for_each_zone (zone) { |
| 714 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 663 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) |
| 715 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) | 664 | if (pfn_valid(zone_pfn + zone->zone_start_pfn)) |
| 716 | ClearPageNosaveFree(pfn_to_page(zone_pfn + | 665 | ClearPageNosaveFree(pfn_to_page(zone_pfn + |
| 717 | zone->zone_start_pfn)); | 666 | zone->zone_start_pfn)); |
| 718 | } | 667 | } |
| 719 | 668 | ||
| 720 | /* Mark orig addresses */ | 669 | /* Mark orig addresses */ |
| 721 | |||
| 722 | for_each_pbe (p, pblist) | 670 | for_each_pbe (p, pblist) |
| 723 | SetPageNosaveFree(virt_to_page(p->orig_address)); | 671 | SetPageNosaveFree(virt_to_page(p->orig_address)); |
| 724 | 672 | ||
| 725 | tail = pblist + PB_PAGE_SKIP; | 673 | } |
| 726 | |||
| 727 | /* Relocate colliding pages */ | ||
| 728 | |||
| 729 | for_each_pb_page (pbpage, pblist) { | ||
| 730 | if (PageNosaveFree(virt_to_page((unsigned long)pbpage))) { | ||
| 731 | m = (void *)get_safe_page(GFP_ATOMIC | __GFP_COLD); | ||
| 732 | if (!m) | ||
| 733 | return NULL; | ||
| 734 | memcpy(m, (void *)pbpage, PAGE_SIZE); | ||
| 735 | if (pbpage == pblist) | ||
| 736 | pblist = (struct pbe *)m; | ||
| 737 | else | ||
| 738 | tail->next = (struct pbe *)m; | ||
| 739 | pbpage = (struct pbe *)m; | ||
| 740 | |||
| 741 | /* We have to link the PBEs again */ | ||
| 742 | for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) | ||
| 743 | if (p->next) /* needed to save the end */ | ||
| 744 | p->next = p + 1; | ||
| 745 | |||
| 746 | rel++; | ||
| 747 | } | ||
| 748 | tail = pbpage + PB_PAGE_SKIP; | ||
| 749 | } | ||
| 750 | 674 | ||
| 751 | /* This is for swsusp_free() */ | 675 | static void copy_page_backup_list(struct pbe *dst, struct pbe *src) |
| 752 | for_each_pb_page (pbpage, pblist) { | 676 | { |
| 753 | SetPageNosave(virt_to_page(pbpage)); | 677 | /* We assume both lists contain the same number of elements */ |
| 754 | SetPageNosaveFree(virt_to_page(pbpage)); | 678 | while (src) { |
| 679 | dst->orig_address = src->orig_address; | ||
| 680 | dst->swap_address = src->swap_address; | ||
| 681 | dst = dst->next; | ||
| 682 | src = src->next; | ||
| 755 | } | 683 | } |
| 756 | |||
| 757 | printk("swsusp: Relocated %d pages\n", rel); | ||
| 758 | |||
| 759 | return pblist; | ||
| 760 | } | 684 | } |
| 761 | 685 | ||
| 762 | /* | 686 | /* |
| @@ -770,7 +694,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) | |||
| 770 | 694 | ||
| 771 | static atomic_t io_done = ATOMIC_INIT(0); | 695 | static atomic_t io_done = ATOMIC_INIT(0); |
| 772 | 696 | ||
| 773 | static int end_io(struct bio * bio, unsigned int num, int err) | 697 | static int end_io(struct bio *bio, unsigned int num, int err) |
| 774 | { | 698 | { |
| 775 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 699 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
| 776 | panic("I/O error reading memory image"); | 700 | panic("I/O error reading memory image"); |
| @@ -778,7 +702,7 @@ static int end_io(struct bio * bio, unsigned int num, int err) | |||
| 778 | return 0; | 702 | return 0; |
| 779 | } | 703 | } |
| 780 | 704 | ||
| 781 | static struct block_device * resume_bdev; | 705 | static struct block_device *resume_bdev; |
| 782 | 706 | ||
| 783 | /** | 707 | /** |
| 784 | * submit - submit BIO request. | 708 | * submit - submit BIO request. |
| @@ -791,10 +715,10 @@ static struct block_device * resume_bdev; | |||
| 791 | * Then submit it and wait. | 715 | * Then submit it and wait. |
| 792 | */ | 716 | */ |
| 793 | 717 | ||
| 794 | static int submit(int rw, pgoff_t page_off, void * page) | 718 | static int submit(int rw, pgoff_t page_off, void *page) |
| 795 | { | 719 | { |
| 796 | int error = 0; | 720 | int error = 0; |
| 797 | struct bio * bio; | 721 | struct bio *bio; |
| 798 | 722 | ||
| 799 | bio = bio_alloc(GFP_ATOMIC, 1); | 723 | bio = bio_alloc(GFP_ATOMIC, 1); |
| 800 | if (!bio) | 724 | if (!bio) |
| @@ -823,12 +747,12 @@ static int submit(int rw, pgoff_t page_off, void * page) | |||
| 823 | return error; | 747 | return error; |
| 824 | } | 748 | } |
| 825 | 749 | ||
| 826 | static int bio_read_page(pgoff_t page_off, void * page) | 750 | static int bio_read_page(pgoff_t page_off, void *page) |
| 827 | { | 751 | { |
| 828 | return submit(READ, page_off, page); | 752 | return submit(READ, page_off, page); |
| 829 | } | 753 | } |
| 830 | 754 | ||
| 831 | static int bio_write_page(pgoff_t page_off, void * page) | 755 | static int bio_write_page(pgoff_t page_off, void *page) |
| 832 | { | 756 | { |
| 833 | return submit(WRITE, page_off, page); | 757 | return submit(WRITE, page_off, page); |
| 834 | } | 758 | } |
| @@ -838,7 +762,7 @@ static int bio_write_page(pgoff_t page_off, void * page) | |||
| 838 | * I really don't think that it's foolproof but more than nothing.. | 762 | * I really don't think that it's foolproof but more than nothing.. |
| 839 | */ | 763 | */ |
| 840 | 764 | ||
| 841 | static const char * sanity_check(void) | 765 | static const char *sanity_check(void) |
| 842 | { | 766 | { |
| 843 | dump_info(); | 767 | dump_info(); |
| 844 | if (swsusp_info.version_code != LINUX_VERSION_CODE) | 768 | if (swsusp_info.version_code != LINUX_VERSION_CODE) |
| @@ -864,7 +788,7 @@ static const char * sanity_check(void) | |||
| 864 | 788 | ||
| 865 | static int check_header(void) | 789 | static int check_header(void) |
| 866 | { | 790 | { |
| 867 | const char * reason = NULL; | 791 | const char *reason = NULL; |
| 868 | int error; | 792 | int error; |
| 869 | 793 | ||
| 870 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) | 794 | if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) |
| @@ -895,7 +819,7 @@ static int check_sig(void) | |||
| 895 | * Reset swap signature now. | 819 | * Reset swap signature now. |
| 896 | */ | 820 | */ |
| 897 | error = bio_write_page(0, &swsusp_header); | 821 | error = bio_write_page(0, &swsusp_header); |
| 898 | } else { | 822 | } else { |
| 899 | return -EINVAL; | 823 | return -EINVAL; |
| 900 | } | 824 | } |
| 901 | if (!error) | 825 | if (!error) |
| @@ -912,7 +836,7 @@ static int check_sig(void) | |||
| 912 | 836 | ||
| 913 | static int data_read(struct pbe *pblist) | 837 | static int data_read(struct pbe *pblist) |
| 914 | { | 838 | { |
| 915 | struct pbe * p; | 839 | struct pbe *p; |
| 916 | int error = 0; | 840 | int error = 0; |
| 917 | int i = 0; | 841 | int i = 0; |
| 918 | int mod = swsusp_info.image_pages / 100; | 842 | int mod = swsusp_info.image_pages / 100; |
| @@ -950,7 +874,7 @@ static int data_read(struct pbe *pblist) | |||
| 950 | static int read_pagedir(struct pbe *pblist) | 874 | static int read_pagedir(struct pbe *pblist) |
| 951 | { | 875 | { |
| 952 | struct pbe *pbpage, *p; | 876 | struct pbe *pbpage, *p; |
| 953 | unsigned i = 0; | 877 | unsigned int i = 0; |
| 954 | int error; | 878 | int error; |
| 955 | 879 | ||
| 956 | if (!pblist) | 880 | if (!pblist) |
| @@ -997,20 +921,25 @@ static int read_suspend_image(void) | |||
| 997 | int error = 0; | 921 | int error = 0; |
| 998 | struct pbe *p; | 922 | struct pbe *p; |
| 999 | 923 | ||
| 1000 | if (!(p = alloc_pagedir(nr_copy_pages))) | 924 | if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) |
| 1001 | return -ENOMEM; | 925 | return -ENOMEM; |
| 1002 | 926 | ||
| 1003 | if ((error = read_pagedir(p))) | 927 | if ((error = read_pagedir(p))) |
| 1004 | return error; | 928 | return error; |
| 1005 | |||
| 1006 | create_pbe_list(p, nr_copy_pages); | 929 | create_pbe_list(p, nr_copy_pages); |
| 1007 | 930 | mark_unsafe_pages(p); | |
| 1008 | if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) | 931 | pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); |
| 932 | if (pagedir_nosave) { | ||
| 933 | create_pbe_list(pagedir_nosave, nr_copy_pages); | ||
| 934 | copy_page_backup_list(pagedir_nosave, p); | ||
| 935 | } | ||
| 936 | free_pagedir(p); | ||
| 937 | if (!pagedir_nosave) | ||
| 1009 | return -ENOMEM; | 938 | return -ENOMEM; |
| 1010 | 939 | ||
| 1011 | /* Allocate memory for the image and read the data from swap */ | 940 | /* Allocate memory for the image and read the data from swap */ |
| 1012 | 941 | ||
| 1013 | error = check_pagedir(pagedir_nosave); | 942 | error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); |
| 1014 | 943 | ||
| 1015 | if (!error) | 944 | if (!error) |
| 1016 | error = data_read(pagedir_nosave); | 945 | error = data_read(pagedir_nosave); |
diff --git a/kernel/printk.c b/kernel/printk.c index 3cb9708209bc..e9be027bc930 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -806,7 +806,6 @@ void console_unblank(void) | |||
| 806 | c->unblank(); | 806 | c->unblank(); |
| 807 | release_console_sem(); | 807 | release_console_sem(); |
| 808 | } | 808 | } |
| 809 | EXPORT_SYMBOL(console_unblank); | ||
| 810 | 809 | ||
| 811 | /* | 810 | /* |
| 812 | * Return the console tty driver structure and its associated index | 811 | * Return the console tty driver structure and its associated index |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 863eee8bff47..b88d4186cd7a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -155,7 +155,7 @@ int ptrace_attach(struct task_struct *task) | |||
| 155 | retval = -EPERM; | 155 | retval = -EPERM; |
| 156 | if (task->pid <= 1) | 156 | if (task->pid <= 1) |
| 157 | goto bad; | 157 | goto bad; |
| 158 | if (task == current) | 158 | if (task->tgid == current->tgid) |
| 159 | goto bad; | 159 | goto bad; |
| 160 | /* the same process cannot be attached many times */ | 160 | /* the same process cannot be attached many times */ |
| 161 | if (task->ptrace & PT_PTRACED) | 161 | if (task->ptrace & PT_PTRACED) |
| @@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request, | |||
| 406 | 406 | ||
| 407 | return ret; | 407 | return ret; |
| 408 | } | 408 | } |
| 409 | |||
| 410 | #ifndef __ARCH_SYS_PTRACE | ||
| 411 | static int ptrace_get_task_struct(long request, long pid, | ||
| 412 | struct task_struct **childp) | ||
| 413 | { | ||
| 414 | struct task_struct *child; | ||
| 415 | int ret; | ||
| 416 | |||
| 417 | /* | ||
| 418 | * Callers use child == NULL as an indication to exit early even | ||
| 419 | * when the return value is 0, so make sure it is non-NULL here. | ||
| 420 | */ | ||
| 421 | *childp = NULL; | ||
| 422 | |||
| 423 | if (request == PTRACE_TRACEME) { | ||
| 424 | /* | ||
| 425 | * Are we already being traced? | ||
| 426 | */ | ||
| 427 | if (current->ptrace & PT_PTRACED) | ||
| 428 | return -EPERM; | ||
| 429 | ret = security_ptrace(current->parent, current); | ||
| 430 | if (ret) | ||
| 431 | return -EPERM; | ||
| 432 | /* | ||
| 433 | * Set the ptrace bit in the process ptrace flags. | ||
| 434 | */ | ||
| 435 | current->ptrace |= PT_PTRACED; | ||
| 436 | return 0; | ||
| 437 | } | ||
| 438 | |||
| 439 | /* | ||
| 440 | * You may not mess with init | ||
| 441 | */ | ||
| 442 | if (pid == 1) | ||
| 443 | return -EPERM; | ||
| 444 | |||
| 445 | ret = -ESRCH; | ||
| 446 | read_lock(&tasklist_lock); | ||
| 447 | child = find_task_by_pid(pid); | ||
| 448 | if (child) | ||
| 449 | get_task_struct(child); | ||
| 450 | read_unlock(&tasklist_lock); | ||
| 451 | if (!child) | ||
| 452 | return -ESRCH; | ||
| 453 | |||
| 454 | *childp = child; | ||
| 455 | return 0; | ||
| 456 | } | ||
| 457 | |||
| 458 | asmlinkage long sys_ptrace(long request, long pid, long addr, long data) | ||
| 459 | { | ||
| 460 | struct task_struct *child; | ||
| 461 | long ret; | ||
| 462 | |||
| 463 | /* | ||
| 464 | * This lock_kernel fixes a subtle race with suid exec | ||
| 465 | */ | ||
| 466 | lock_kernel(); | ||
| 467 | ret = ptrace_get_task_struct(request, pid, &child); | ||
| 468 | if (!child) | ||
| 469 | goto out; | ||
| 470 | |||
| 471 | if (request == PTRACE_ATTACH) { | ||
| 472 | ret = ptrace_attach(child); | ||
| 473 | goto out; | ||
| 474 | } | ||
| 475 | |||
| 476 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
| 477 | if (ret < 0) | ||
| 478 | goto out_put_task_struct; | ||
| 479 | |||
| 480 | ret = arch_ptrace(child, request, addr, data); | ||
| 481 | if (ret < 0) | ||
| 482 | goto out_put_task_struct; | ||
| 483 | |||
| 484 | out_put_task_struct: | ||
| 485 | put_task_struct(child); | ||
| 486 | out: | ||
| 487 | unlock_kernel(); | ||
| 488 | return ret; | ||
| 489 | } | ||
| 490 | #endif /* __ARCH_SYS_PTRACE */ | ||
diff --git a/kernel/sched.c b/kernel/sched.c index b4f4eb613537..b6506671b2be 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -206,6 +206,7 @@ struct runqueue { | |||
| 206 | */ | 206 | */ |
| 207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
| 208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
| 209 | unsigned long prio_bias; | ||
| 209 | unsigned long cpu_load[3]; | 210 | unsigned long cpu_load[3]; |
| 210 | #endif | 211 | #endif |
| 211 | unsigned long long nr_switches; | 212 | unsigned long long nr_switches; |
| @@ -659,13 +660,68 @@ static int effective_prio(task_t *p) | |||
| 659 | return prio; | 660 | return prio; |
| 660 | } | 661 | } |
| 661 | 662 | ||
| 663 | #ifdef CONFIG_SMP | ||
| 664 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
| 665 | { | ||
| 666 | rq->prio_bias += MAX_PRIO - prio; | ||
| 667 | } | ||
| 668 | |||
| 669 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
| 670 | { | ||
| 671 | rq->prio_bias -= MAX_PRIO - prio; | ||
| 672 | } | ||
| 673 | |||
| 674 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
| 675 | { | ||
| 676 | rq->nr_running++; | ||
| 677 | if (rt_task(p)) { | ||
| 678 | if (p != rq->migration_thread) | ||
| 679 | /* | ||
| 680 | * The migration thread does the actual balancing. Do | ||
| 681 | * not bias by its priority as the ultra high priority | ||
| 682 | * will skew balancing adversely. | ||
| 683 | */ | ||
| 684 | inc_prio_bias(rq, p->prio); | ||
| 685 | } else | ||
| 686 | inc_prio_bias(rq, p->static_prio); | ||
| 687 | } | ||
| 688 | |||
| 689 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
| 690 | { | ||
| 691 | rq->nr_running--; | ||
| 692 | if (rt_task(p)) { | ||
| 693 | if (p != rq->migration_thread) | ||
| 694 | dec_prio_bias(rq, p->prio); | ||
| 695 | } else | ||
| 696 | dec_prio_bias(rq, p->static_prio); | ||
| 697 | } | ||
| 698 | #else | ||
| 699 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
| 700 | { | ||
| 701 | } | ||
| 702 | |||
| 703 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
| 704 | { | ||
| 705 | } | ||
| 706 | |||
| 707 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
| 708 | { | ||
| 709 | rq->nr_running++; | ||
| 710 | } | ||
| 711 | |||
| 712 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
| 713 | { | ||
| 714 | rq->nr_running--; | ||
| 715 | } | ||
| 716 | #endif | ||
| 717 | |||
| 662 | /* | 718 | /* |
| 663 | * __activate_task - move a task to the runqueue. | 719 | * __activate_task - move a task to the runqueue. |
| 664 | */ | 720 | */ |
| 665 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 721 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
| 666 | { | 722 | { |
| 667 | enqueue_task(p, rq->active); | 723 | enqueue_task(p, rq->active); |
| 668 | rq->nr_running++; | 724 | inc_nr_running(p, rq); |
| 669 | } | 725 | } |
| 670 | 726 | ||
| 671 | /* | 727 | /* |
| @@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
| 674 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 730 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
| 675 | { | 731 | { |
| 676 | enqueue_task_head(p, rq->active); | 732 | enqueue_task_head(p, rq->active); |
| 677 | rq->nr_running++; | 733 | inc_nr_running(p, rq); |
| 678 | } | 734 | } |
| 679 | 735 | ||
| 680 | static int recalc_task_prio(task_t *p, unsigned long long now) | 736 | static int recalc_task_prio(task_t *p, unsigned long long now) |
| @@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
| 759 | } | 815 | } |
| 760 | #endif | 816 | #endif |
| 761 | 817 | ||
| 762 | p->prio = recalc_task_prio(p, now); | 818 | if (!rt_task(p)) |
| 819 | p->prio = recalc_task_prio(p, now); | ||
| 763 | 820 | ||
| 764 | /* | 821 | /* |
| 765 | * This checks to make sure it's not an uninterruptible task | 822 | * This checks to make sure it's not an uninterruptible task |
| @@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
| 793 | */ | 850 | */ |
| 794 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 851 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
| 795 | { | 852 | { |
| 796 | rq->nr_running--; | 853 | dec_nr_running(p, rq); |
| 797 | dequeue_task(p, p->array); | 854 | dequeue_task(p, p->array); |
| 798 | p->array = NULL; | 855 | p->array = NULL; |
| 799 | } | 856 | } |
| @@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
| 808 | #ifdef CONFIG_SMP | 865 | #ifdef CONFIG_SMP |
| 809 | static void resched_task(task_t *p) | 866 | static void resched_task(task_t *p) |
| 810 | { | 867 | { |
| 811 | int need_resched, nrpolling; | 868 | int cpu; |
| 812 | 869 | ||
| 813 | assert_spin_locked(&task_rq(p)->lock); | 870 | assert_spin_locked(&task_rq(p)->lock); |
| 814 | 871 | ||
| 815 | /* minimise the chance of sending an interrupt to poll_idle() */ | 872 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) |
| 816 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 873 | return; |
| 817 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); | 874 | |
| 818 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 875 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
| 876 | |||
| 877 | cpu = task_cpu(p); | ||
| 878 | if (cpu == smp_processor_id()) | ||
| 879 | return; | ||
| 819 | 880 | ||
| 820 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) | 881 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ |
| 821 | smp_send_reschedule(task_cpu(p)); | 882 | smp_mb(); |
| 883 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | ||
| 884 | smp_send_reschedule(cpu); | ||
| 822 | } | 885 | } |
| 823 | #else | 886 | #else |
| 824 | static inline void resched_task(task_t *p) | 887 | static inline void resched_task(task_t *p) |
| 825 | { | 888 | { |
| 889 | assert_spin_locked(&task_rq(p)->lock); | ||
| 826 | set_tsk_need_resched(p); | 890 | set_tsk_need_resched(p); |
| 827 | } | 891 | } |
| 828 | #endif | 892 | #endif |
| @@ -930,27 +994,61 @@ void kick_process(task_t *p) | |||
| 930 | * We want to under-estimate the load of migration sources, to | 994 | * We want to under-estimate the load of migration sources, to |
| 931 | * balance conservatively. | 995 | * balance conservatively. |
| 932 | */ | 996 | */ |
| 933 | static inline unsigned long source_load(int cpu, int type) | 997 | static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) |
| 934 | { | 998 | { |
| 935 | runqueue_t *rq = cpu_rq(cpu); | 999 | runqueue_t *rq = cpu_rq(cpu); |
| 936 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1000 | unsigned long running = rq->nr_running; |
| 1001 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
| 1002 | load_now = running * SCHED_LOAD_SCALE; | ||
| 1003 | |||
| 937 | if (type == 0) | 1004 | if (type == 0) |
| 938 | return load_now; | 1005 | source_load = load_now; |
| 1006 | else | ||
| 1007 | source_load = min(cpu_load, load_now); | ||
| 1008 | |||
| 1009 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
| 1010 | /* | ||
| 1011 | * If we are busy rebalancing the load is biased by | ||
| 1012 | * priority to create 'nice' support across cpus. When | ||
| 1013 | * idle rebalancing we should only bias the source_load if | ||
| 1014 | * there is more than one task running on that queue to | ||
| 1015 | * prevent idle rebalance from trying to pull tasks from a | ||
| 1016 | * queue with only one running task. | ||
| 1017 | */ | ||
| 1018 | source_load = source_load * rq->prio_bias / running; | ||
| 1019 | |||
| 1020 | return source_load; | ||
| 1021 | } | ||
| 939 | 1022 | ||
| 940 | return min(rq->cpu_load[type-1], load_now); | 1023 | static inline unsigned long source_load(int cpu, int type) |
| 1024 | { | ||
| 1025 | return __source_load(cpu, type, NOT_IDLE); | ||
| 941 | } | 1026 | } |
| 942 | 1027 | ||
| 943 | /* | 1028 | /* |
| 944 | * Return a high guess at the load of a migration-target cpu | 1029 | * Return a high guess at the load of a migration-target cpu |
| 945 | */ | 1030 | */ |
| 946 | static inline unsigned long target_load(int cpu, int type) | 1031 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) |
| 947 | { | 1032 | { |
| 948 | runqueue_t *rq = cpu_rq(cpu); | 1033 | runqueue_t *rq = cpu_rq(cpu); |
| 949 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1034 | unsigned long running = rq->nr_running; |
| 1035 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
| 1036 | load_now = running * SCHED_LOAD_SCALE; | ||
| 1037 | |||
| 950 | if (type == 0) | 1038 | if (type == 0) |
| 951 | return load_now; | 1039 | target_load = load_now; |
| 1040 | else | ||
| 1041 | target_load = max(cpu_load, load_now); | ||
| 1042 | |||
| 1043 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
| 1044 | target_load = target_load * rq->prio_bias / running; | ||
| 1045 | |||
| 1046 | return target_load; | ||
| 1047 | } | ||
| 952 | 1048 | ||
| 953 | return max(rq->cpu_load[type-1], load_now); | 1049 | static inline unsigned long target_load(int cpu, int type) |
| 1050 | { | ||
| 1051 | return __target_load(cpu, type, NOT_IDLE); | ||
| 954 | } | 1052 | } |
| 955 | 1053 | ||
| 956 | /* | 1054 | /* |
| @@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
| 1411 | list_add_tail(&p->run_list, ¤t->run_list); | 1509 | list_add_tail(&p->run_list, ¤t->run_list); |
| 1412 | p->array = current->array; | 1510 | p->array = current->array; |
| 1413 | p->array->nr_active++; | 1511 | p->array->nr_active++; |
| 1414 | rq->nr_running++; | 1512 | inc_nr_running(p, rq); |
| 1415 | } | 1513 | } |
| 1416 | set_need_resched(); | 1514 | set_need_resched(); |
| 1417 | } else | 1515 | } else |
| @@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
| 1756 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1854 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
| 1757 | { | 1855 | { |
| 1758 | dequeue_task(p, src_array); | 1856 | dequeue_task(p, src_array); |
| 1759 | src_rq->nr_running--; | 1857 | dec_nr_running(p, src_rq); |
| 1760 | set_task_cpu(p, this_cpu); | 1858 | set_task_cpu(p, this_cpu); |
| 1761 | this_rq->nr_running++; | 1859 | inc_nr_running(p, this_rq); |
| 1762 | enqueue_task(p, this_array); | 1860 | enqueue_task(p, this_array); |
| 1763 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1861 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
| 1764 | + this_rq->timestamp_last_tick; | 1862 | + this_rq->timestamp_last_tick; |
| @@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 1937 | 2035 | ||
| 1938 | /* Bias balancing toward cpus of our domain */ | 2036 | /* Bias balancing toward cpus of our domain */ |
| 1939 | if (local_group) | 2037 | if (local_group) |
| 1940 | load = target_load(i, load_idx); | 2038 | load = __target_load(i, load_idx, idle); |
| 1941 | else | 2039 | else |
| 1942 | load = source_load(i, load_idx); | 2040 | load = __source_load(i, load_idx, idle); |
| 1943 | 2041 | ||
| 1944 | avg_load += load; | 2042 | avg_load += load; |
| 1945 | } | 2043 | } |
| @@ -2044,14 +2142,15 @@ out_balanced: | |||
| 2044 | /* | 2142 | /* |
| 2045 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2143 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
| 2046 | */ | 2144 | */ |
| 2047 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 2145 | static runqueue_t *find_busiest_queue(struct sched_group *group, |
| 2146 | enum idle_type idle) | ||
| 2048 | { | 2147 | { |
| 2049 | unsigned long load, max_load = 0; | 2148 | unsigned long load, max_load = 0; |
| 2050 | runqueue_t *busiest = NULL; | 2149 | runqueue_t *busiest = NULL; |
| 2051 | int i; | 2150 | int i; |
| 2052 | 2151 | ||
| 2053 | for_each_cpu_mask(i, group->cpumask) { | 2152 | for_each_cpu_mask(i, group->cpumask) { |
| 2054 | load = source_load(i, 0); | 2153 | load = __source_load(i, 0, idle); |
| 2055 | 2154 | ||
| 2056 | if (load > max_load) { | 2155 | if (load > max_load) { |
| 2057 | max_load = load; | 2156 | max_load = load; |
| @@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
| 2095 | goto out_balanced; | 2194 | goto out_balanced; |
| 2096 | } | 2195 | } |
| 2097 | 2196 | ||
| 2098 | busiest = find_busiest_queue(group); | 2197 | busiest = find_busiest_queue(group, idle); |
| 2099 | if (!busiest) { | 2198 | if (!busiest) { |
| 2100 | schedstat_inc(sd, lb_nobusyq[idle]); | 2199 | schedstat_inc(sd, lb_nobusyq[idle]); |
| 2101 | goto out_balanced; | 2200 | goto out_balanced; |
| @@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
| 2218 | goto out_balanced; | 2317 | goto out_balanced; |
| 2219 | } | 2318 | } |
| 2220 | 2319 | ||
| 2221 | busiest = find_busiest_queue(group); | 2320 | busiest = find_busiest_queue(group, NEWLY_IDLE); |
| 2222 | if (!busiest) { | 2321 | if (!busiest) { |
| 2223 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2322 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
| 2224 | goto out_balanced; | 2323 | goto out_balanced; |
| @@ -3451,8 +3550,10 @@ void set_user_nice(task_t *p, long nice) | |||
| 3451 | goto out_unlock; | 3550 | goto out_unlock; |
| 3452 | } | 3551 | } |
| 3453 | array = p->array; | 3552 | array = p->array; |
| 3454 | if (array) | 3553 | if (array) { |
| 3455 | dequeue_task(p, array); | 3554 | dequeue_task(p, array); |
| 3555 | dec_prio_bias(rq, p->static_prio); | ||
| 3556 | } | ||
| 3456 | 3557 | ||
| 3457 | old_prio = p->prio; | 3558 | old_prio = p->prio; |
| 3458 | new_prio = NICE_TO_PRIO(nice); | 3559 | new_prio = NICE_TO_PRIO(nice); |
| @@ -3462,6 +3563,7 @@ void set_user_nice(task_t *p, long nice) | |||
| 3462 | 3563 | ||
| 3463 | if (array) { | 3564 | if (array) { |
| 3464 | enqueue_task(p, array); | 3565 | enqueue_task(p, array); |
| 3566 | inc_prio_bias(rq, p->static_prio); | ||
| 3465 | /* | 3567 | /* |
| 3466 | * If the task increased its priority or is running and | 3568 | * If the task increased its priority or is running and |
| 3467 | * lowered its priority, then reschedule its CPU: | 3569 | * lowered its priority, then reschedule its CPU: |
| @@ -3563,8 +3665,6 @@ int idle_cpu(int cpu) | |||
| 3563 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 3665 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; |
| 3564 | } | 3666 | } |
| 3565 | 3667 | ||
| 3566 | EXPORT_SYMBOL_GPL(idle_cpu); | ||
| 3567 | |||
| 3568 | /** | 3668 | /** |
| 3569 | * idle_task - return the idle task for a given cpu. | 3669 | * idle_task - return the idle task for a given cpu. |
| 3570 | * @cpu: the processor in question. | 3670 | * @cpu: the processor in question. |
| @@ -4680,7 +4780,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, | |||
| 4680 | #ifdef CONFIG_HOTPLUG_CPU | 4780 | #ifdef CONFIG_HOTPLUG_CPU |
| 4681 | case CPU_UP_CANCELED: | 4781 | case CPU_UP_CANCELED: |
| 4682 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 4782 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
| 4683 | kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); | 4783 | kthread_bind(cpu_rq(cpu)->migration_thread, |
| 4784 | any_online_cpu(cpu_online_map)); | ||
| 4684 | kthread_stop(cpu_rq(cpu)->migration_thread); | 4785 | kthread_stop(cpu_rq(cpu)->migration_thread); |
| 4685 | cpu_rq(cpu)->migration_thread = NULL; | 4786 | cpu_rq(cpu)->migration_thread = NULL; |
| 4686 | break; | 4787 | break; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index f766b2fc48be..ad3295cdded5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
| 470 | #ifdef CONFIG_HOTPLUG_CPU | 470 | #ifdef CONFIG_HOTPLUG_CPU |
| 471 | case CPU_UP_CANCELED: | 471 | case CPU_UP_CANCELED: |
| 472 | /* Unbind so it can run. Fall thru. */ | 472 | /* Unbind so it can run. Fall thru. */ |
| 473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); | 473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
| 474 | any_online_cpu(cpu_online_map)); | ||
| 474 | case CPU_DEAD: | 475 | case CPU_DEAD: |
| 475 | p = per_cpu(ksoftirqd, hotcpu); | 476 | p = per_cpu(ksoftirqd, hotcpu); |
| 476 | per_cpu(ksoftirqd, hotcpu) = NULL; | 477 | per_cpu(ksoftirqd, hotcpu) = NULL; |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 75976209cea7..c67189a25d52 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -73,9 +73,6 @@ void softlockup_tick(struct pt_regs *regs) | |||
| 73 | static int watchdog(void * __bind_cpu) | 73 | static int watchdog(void * __bind_cpu) |
| 74 | { | 74 | { |
| 75 | struct sched_param param = { .sched_priority = 99 }; | 75 | struct sched_param param = { .sched_priority = 99 }; |
| 76 | int this_cpu = (long) __bind_cpu; | ||
| 77 | |||
| 78 | printk("softlockup thread %d started up.\n", this_cpu); | ||
| 79 | 76 | ||
| 80 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 77 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 81 | current->flags |= PF_NOFREEZE; | 78 | current->flags |= PF_NOFREEZE; |
| @@ -123,7 +120,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 123 | #ifdef CONFIG_HOTPLUG_CPU | 120 | #ifdef CONFIG_HOTPLUG_CPU |
| 124 | case CPU_UP_CANCELED: | 121 | case CPU_UP_CANCELED: |
| 125 | /* Unbind so it can run. Fall thru. */ | 122 | /* Unbind so it can run. Fall thru. */ |
| 126 | kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); | 123 | kthread_bind(per_cpu(watchdog_task, hotcpu), |
| 124 | any_online_cpu(cpu_online_map)); | ||
| 127 | case CPU_DEAD: | 125 | case CPU_DEAD: |
| 128 | p = per_cpu(watchdog_task, hotcpu); | 126 | p = per_cpu(watchdog_task, hotcpu); |
| 129 | per_cpu(watchdog_task, hotcpu) = NULL; | 127 | per_cpu(watchdog_task, hotcpu) = NULL; |
diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed18123c..c43b3e22bbda 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/suspend.h> | 28 | #include <linux/suspend.h> |
| 29 | #include <linux/tty.h> | 29 | #include <linux/tty.h> |
| 30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
| 31 | #include <linux/cn_proc.h> | ||
| 31 | 32 | ||
| 32 | #include <linux/compat.h> | 33 | #include <linux/compat.h> |
| 33 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
| @@ -375,18 +376,21 @@ void emergency_restart(void) | |||
| 375 | } | 376 | } |
| 376 | EXPORT_SYMBOL_GPL(emergency_restart); | 377 | EXPORT_SYMBOL_GPL(emergency_restart); |
| 377 | 378 | ||
| 378 | /** | ||
| 379 | * kernel_restart - reboot the system | ||
| 380 | * | ||
| 381 | * Shutdown everything and perform a clean reboot. | ||
| 382 | * This is not safe to call in interrupt context. | ||
| 383 | */ | ||
| 384 | void kernel_restart_prepare(char *cmd) | 379 | void kernel_restart_prepare(char *cmd) |
| 385 | { | 380 | { |
| 386 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 381 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); |
| 387 | system_state = SYSTEM_RESTART; | 382 | system_state = SYSTEM_RESTART; |
| 388 | device_shutdown(); | 383 | device_shutdown(); |
| 389 | } | 384 | } |
| 385 | |||
| 386 | /** | ||
| 387 | * kernel_restart - reboot the system | ||
| 388 | * @cmd: pointer to buffer containing command to execute for restart | ||
| 389 | * or %NULL | ||
| 390 | * | ||
| 391 | * Shutdown everything and perform a clean reboot. | ||
| 392 | * This is not safe to call in interrupt context. | ||
| 393 | */ | ||
| 390 | void kernel_restart(char *cmd) | 394 | void kernel_restart(char *cmd) |
| 391 | { | 395 | { |
| 392 | kernel_restart_prepare(cmd); | 396 | kernel_restart_prepare(cmd); |
| @@ -623,6 +627,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) | |||
| 623 | current->egid = new_egid; | 627 | current->egid = new_egid; |
| 624 | current->gid = new_rgid; | 628 | current->gid = new_rgid; |
| 625 | key_fsgid_changed(current); | 629 | key_fsgid_changed(current); |
| 630 | proc_id_connector(current, PROC_EVENT_GID); | ||
| 626 | return 0; | 631 | return 0; |
| 627 | } | 632 | } |
| 628 | 633 | ||
| @@ -662,6 +667,7 @@ asmlinkage long sys_setgid(gid_t gid) | |||
| 662 | return -EPERM; | 667 | return -EPERM; |
| 663 | 668 | ||
| 664 | key_fsgid_changed(current); | 669 | key_fsgid_changed(current); |
| 670 | proc_id_connector(current, PROC_EVENT_GID); | ||
| 665 | return 0; | 671 | return 0; |
| 666 | } | 672 | } |
| 667 | 673 | ||
| @@ -751,6 +757,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) | |||
| 751 | current->fsuid = current->euid; | 757 | current->fsuid = current->euid; |
| 752 | 758 | ||
| 753 | key_fsuid_changed(current); | 759 | key_fsuid_changed(current); |
| 760 | proc_id_connector(current, PROC_EVENT_UID); | ||
| 754 | 761 | ||
| 755 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); | 762 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); |
| 756 | } | 763 | } |
| @@ -798,6 +805,7 @@ asmlinkage long sys_setuid(uid_t uid) | |||
| 798 | current->suid = new_suid; | 805 | current->suid = new_suid; |
| 799 | 806 | ||
| 800 | key_fsuid_changed(current); | 807 | key_fsuid_changed(current); |
| 808 | proc_id_connector(current, PROC_EVENT_UID); | ||
| 801 | 809 | ||
| 802 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); | 810 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); |
| 803 | } | 811 | } |
| @@ -846,6 +854,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) | |||
| 846 | current->suid = suid; | 854 | current->suid = suid; |
| 847 | 855 | ||
| 848 | key_fsuid_changed(current); | 856 | key_fsuid_changed(current); |
| 857 | proc_id_connector(current, PROC_EVENT_UID); | ||
| 849 | 858 | ||
| 850 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); | 859 | return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); |
| 851 | } | 860 | } |
| @@ -898,6 +907,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) | |||
| 898 | current->sgid = sgid; | 907 | current->sgid = sgid; |
| 899 | 908 | ||
| 900 | key_fsgid_changed(current); | 909 | key_fsgid_changed(current); |
| 910 | proc_id_connector(current, PROC_EVENT_GID); | ||
| 901 | return 0; | 911 | return 0; |
| 902 | } | 912 | } |
| 903 | 913 | ||
| @@ -940,6 +950,7 @@ asmlinkage long sys_setfsuid(uid_t uid) | |||
| 940 | } | 950 | } |
| 941 | 951 | ||
| 942 | key_fsuid_changed(current); | 952 | key_fsuid_changed(current); |
| 953 | proc_id_connector(current, PROC_EVENT_UID); | ||
| 943 | 954 | ||
| 944 | security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); | 955 | security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); |
| 945 | 956 | ||
| @@ -968,6 +979,7 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
| 968 | } | 979 | } |
| 969 | current->fsgid = gid; | 980 | current->fsgid = gid; |
| 970 | key_fsgid_changed(current); | 981 | key_fsgid_changed(current); |
| 982 | proc_id_connector(current, PROC_EVENT_GID); | ||
| 971 | } | 983 | } |
| 972 | return old_fsgid; | 984 | return old_fsgid; |
| 973 | } | 985 | } |
| @@ -1485,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p); | |||
| 1485 | 1497 | ||
| 1486 | DECLARE_RWSEM(uts_sem); | 1498 | DECLARE_RWSEM(uts_sem); |
| 1487 | 1499 | ||
| 1488 | EXPORT_SYMBOL(uts_sem); | ||
| 1489 | |||
| 1490 | asmlinkage long sys_newuname(struct new_utsname __user * name) | 1500 | asmlinkage long sys_newuname(struct new_utsname __user * name) |
| 1491 | { | 1501 | { |
| 1492 | int errno = 0; | 1502 | int errno = 0; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e2495542..9990e10192e8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = { | |||
| 169 | 169 | ||
| 170 | extern struct proc_dir_entry *proc_sys_root; | 170 | extern struct proc_dir_entry *proc_sys_root; |
| 171 | 171 | ||
| 172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *); | 172 | static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); |
| 173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); | 173 | static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); |
| 174 | #endif | 174 | #endif |
| 175 | 175 | ||
| @@ -952,7 +952,7 @@ static ctl_table fs_table[] = { | |||
| 952 | .data = &aio_nr, | 952 | .data = &aio_nr, |
| 953 | .maxlen = sizeof(aio_nr), | 953 | .maxlen = sizeof(aio_nr), |
| 954 | .mode = 0444, | 954 | .mode = 0444, |
| 955 | .proc_handler = &proc_dointvec, | 955 | .proc_handler = &proc_doulongvec_minmax, |
| 956 | }, | 956 | }, |
| 957 | { | 957 | { |
| 958 | .ctl_name = FS_AIO_MAX_NR, | 958 | .ctl_name = FS_AIO_MAX_NR, |
| @@ -960,7 +960,7 @@ static ctl_table fs_table[] = { | |||
| 960 | .data = &aio_max_nr, | 960 | .data = &aio_max_nr, |
| 961 | .maxlen = sizeof(aio_max_nr), | 961 | .maxlen = sizeof(aio_max_nr), |
| 962 | .mode = 0644, | 962 | .mode = 0644, |
| 963 | .proc_handler = &proc_dointvec, | 963 | .proc_handler = &proc_doulongvec_minmax, |
| 964 | }, | 964 | }, |
| 965 | #ifdef CONFIG_INOTIFY | 965 | #ifdef CONFIG_INOTIFY |
| 966 | { | 966 | { |
| @@ -992,10 +992,51 @@ static ctl_table dev_table[] = { | |||
| 992 | 992 | ||
| 993 | extern void init_irq_proc (void); | 993 | extern void init_irq_proc (void); |
| 994 | 994 | ||
| 995 | static DEFINE_SPINLOCK(sysctl_lock); | ||
| 996 | |||
| 997 | /* called under sysctl_lock */ | ||
| 998 | static int use_table(struct ctl_table_header *p) | ||
| 999 | { | ||
| 1000 | if (unlikely(p->unregistering)) | ||
| 1001 | return 0; | ||
| 1002 | p->used++; | ||
| 1003 | return 1; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | /* called under sysctl_lock */ | ||
| 1007 | static void unuse_table(struct ctl_table_header *p) | ||
| 1008 | { | ||
| 1009 | if (!--p->used) | ||
| 1010 | if (unlikely(p->unregistering)) | ||
| 1011 | complete(p->unregistering); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
| 1015 | static void start_unregistering(struct ctl_table_header *p) | ||
| 1016 | { | ||
| 1017 | /* | ||
| 1018 | * if p->used is 0, nobody will ever touch that entry again; | ||
| 1019 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
| 1020 | */ | ||
| 1021 | if (unlikely(p->used)) { | ||
| 1022 | struct completion wait; | ||
| 1023 | init_completion(&wait); | ||
| 1024 | p->unregistering = &wait; | ||
| 1025 | spin_unlock(&sysctl_lock); | ||
| 1026 | wait_for_completion(&wait); | ||
| 1027 | spin_lock(&sysctl_lock); | ||
| 1028 | } | ||
| 1029 | /* | ||
| 1030 | * do not remove from the list until nobody holds it; walking the | ||
| 1031 | * list in do_sysctl() relies on that. | ||
| 1032 | */ | ||
| 1033 | list_del_init(&p->ctl_entry); | ||
| 1034 | } | ||
| 1035 | |||
| 995 | void __init sysctl_init(void) | 1036 | void __init sysctl_init(void) |
| 996 | { | 1037 | { |
| 997 | #ifdef CONFIG_PROC_FS | 1038 | #ifdef CONFIG_PROC_FS |
| 998 | register_proc_table(root_table, proc_sys_root); | 1039 | register_proc_table(root_table, proc_sys_root, &root_table_header); |
| 999 | init_irq_proc(); | 1040 | init_irq_proc(); |
| 1000 | #endif | 1041 | #endif |
| 1001 | } | 1042 | } |
| @@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
| 1004 | void __user *newval, size_t newlen) | 1045 | void __user *newval, size_t newlen) |
| 1005 | { | 1046 | { |
| 1006 | struct list_head *tmp; | 1047 | struct list_head *tmp; |
| 1048 | int error = -ENOTDIR; | ||
| 1007 | 1049 | ||
| 1008 | if (nlen <= 0 || nlen >= CTL_MAXNAME) | 1050 | if (nlen <= 0 || nlen >= CTL_MAXNAME) |
| 1009 | return -ENOTDIR; | 1051 | return -ENOTDIR; |
| @@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol | |||
| 1012 | if (!oldlenp || get_user(old_len, oldlenp)) | 1054 | if (!oldlenp || get_user(old_len, oldlenp)) |
| 1013 | return -EFAULT; | 1055 | return -EFAULT; |
| 1014 | } | 1056 | } |
| 1057 | spin_lock(&sysctl_lock); | ||
| 1015 | tmp = &root_table_header.ctl_entry; | 1058 | tmp = &root_table_header.ctl_entry; |
| 1016 | do { | 1059 | do { |
| 1017 | struct ctl_table_header *head = | 1060 | struct ctl_table_header *head = |
| 1018 | list_entry(tmp, struct ctl_table_header, ctl_entry); | 1061 | list_entry(tmp, struct ctl_table_header, ctl_entry); |
| 1019 | void *context = NULL; | 1062 | void *context = NULL; |
| 1020 | int error = parse_table(name, nlen, oldval, oldlenp, | 1063 | |
| 1064 | if (!use_table(head)) | ||
| 1065 | continue; | ||
| 1066 | |||
| 1067 | spin_unlock(&sysctl_lock); | ||
| 1068 | |||
| 1069 | error = parse_table(name, nlen, oldval, oldlenp, | ||
| 1021 | newval, newlen, head->ctl_table, | 1070 | newval, newlen, head->ctl_table, |
| 1022 | &context); | 1071 | &context); |
| 1023 | kfree(context); | 1072 | kfree(context); |
| 1073 | |||
| 1074 | spin_lock(&sysctl_lock); | ||
| 1075 | unuse_table(head); | ||
| 1024 | if (error != -ENOTDIR) | 1076 | if (error != -ENOTDIR) |
| 1025 | return error; | 1077 | break; |
| 1026 | tmp = tmp->next; | 1078 | } while ((tmp = tmp->next) != &root_table_header.ctl_entry); |
| 1027 | } while (tmp != &root_table_header.ctl_entry); | 1079 | spin_unlock(&sysctl_lock); |
| 1028 | return -ENOTDIR; | 1080 | return error; |
| 1029 | } | 1081 | } |
| 1030 | 1082 | ||
| 1031 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) | 1083 | asmlinkage long sys_sysctl(struct __sysctl_args __user *args) |
| @@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
| 1236 | return NULL; | 1288 | return NULL; |
| 1237 | tmp->ctl_table = table; | 1289 | tmp->ctl_table = table; |
| 1238 | INIT_LIST_HEAD(&tmp->ctl_entry); | 1290 | INIT_LIST_HEAD(&tmp->ctl_entry); |
| 1291 | tmp->used = 0; | ||
| 1292 | tmp->unregistering = NULL; | ||
| 1293 | spin_lock(&sysctl_lock); | ||
| 1239 | if (insert_at_head) | 1294 | if (insert_at_head) |
| 1240 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1295 | list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); |
| 1241 | else | 1296 | else |
| 1242 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); | 1297 | list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); |
| 1298 | spin_unlock(&sysctl_lock); | ||
| 1243 | #ifdef CONFIG_PROC_FS | 1299 | #ifdef CONFIG_PROC_FS |
| 1244 | register_proc_table(table, proc_sys_root); | 1300 | register_proc_table(table, proc_sys_root, tmp); |
| 1245 | #endif | 1301 | #endif |
| 1246 | return tmp; | 1302 | return tmp; |
| 1247 | } | 1303 | } |
| @@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, | |||
| 1255 | */ | 1311 | */ |
| 1256 | void unregister_sysctl_table(struct ctl_table_header * header) | 1312 | void unregister_sysctl_table(struct ctl_table_header * header) |
| 1257 | { | 1313 | { |
| 1258 | list_del(&header->ctl_entry); | 1314 | might_sleep(); |
| 1315 | spin_lock(&sysctl_lock); | ||
| 1316 | start_unregistering(header); | ||
| 1259 | #ifdef CONFIG_PROC_FS | 1317 | #ifdef CONFIG_PROC_FS |
| 1260 | unregister_proc_table(header->ctl_table, proc_sys_root); | 1318 | unregister_proc_table(header->ctl_table, proc_sys_root); |
| 1261 | #endif | 1319 | #endif |
| 1320 | spin_unlock(&sysctl_lock); | ||
| 1262 | kfree(header); | 1321 | kfree(header); |
| 1263 | } | 1322 | } |
| 1264 | 1323 | ||
| @@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) | |||
| 1269 | #ifdef CONFIG_PROC_FS | 1328 | #ifdef CONFIG_PROC_FS |
| 1270 | 1329 | ||
| 1271 | /* Scan the sysctl entries in table and add them all into /proc */ | 1330 | /* Scan the sysctl entries in table and add them all into /proc */ |
| 1272 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | 1331 | static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) |
| 1273 | { | 1332 | { |
| 1274 | struct proc_dir_entry *de; | 1333 | struct proc_dir_entry *de; |
| 1275 | int len; | 1334 | int len; |
| @@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) | |||
| 1305 | de = create_proc_entry(table->procname, mode, root); | 1364 | de = create_proc_entry(table->procname, mode, root); |
| 1306 | if (!de) | 1365 | if (!de) |
| 1307 | continue; | 1366 | continue; |
| 1367 | de->set = set; | ||
| 1308 | de->data = (void *) table; | 1368 | de->data = (void *) table; |
| 1309 | if (table->proc_handler) | 1369 | if (table->proc_handler) |
| 1310 | de->proc_fops = &proc_sys_file_operations; | 1370 | de->proc_fops = &proc_sys_file_operations; |
| 1311 | } | 1371 | } |
| 1312 | table->de = de; | 1372 | table->de = de; |
| 1313 | if (de->mode & S_IFDIR) | 1373 | if (de->mode & S_IFDIR) |
| 1314 | register_proc_table(table->child, de); | 1374 | register_proc_table(table->child, de, set); |
| 1315 | } | 1375 | } |
| 1316 | } | 1376 | } |
| 1317 | 1377 | ||
| @@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root | |||
| 1336 | continue; | 1396 | continue; |
| 1337 | } | 1397 | } |
| 1338 | 1398 | ||
| 1399 | /* | ||
| 1400 | * In any case, mark the entry as goner; we'll keep it | ||
| 1401 | * around if it's busy, but we'll know to do nothing with | ||
| 1402 | * its fields. We are under sysctl_lock here. | ||
| 1403 | */ | ||
| 1404 | de->data = NULL; | ||
| 1405 | |||
| 1339 | /* Don't unregister proc entries that are still being used.. */ | 1406 | /* Don't unregister proc entries that are still being used.. */ |
| 1340 | if (atomic_read(&de->count)) | 1407 | if (atomic_read(&de->count)) |
| 1341 | continue; | 1408 | continue; |
| @@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, | |||
| 1349 | size_t count, loff_t *ppos) | 1416 | size_t count, loff_t *ppos) |
| 1350 | { | 1417 | { |
| 1351 | int op; | 1418 | int op; |
| 1352 | struct proc_dir_entry *de; | 1419 | struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); |
| 1353 | struct ctl_table *table; | 1420 | struct ctl_table *table; |
| 1354 | size_t res; | 1421 | size_t res; |
| 1355 | ssize_t error; | 1422 | ssize_t error = -ENOTDIR; |
| 1356 | |||
| 1357 | de = PDE(file->f_dentry->d_inode); | ||
| 1358 | if (!de || !de->data) | ||
| 1359 | return -ENOTDIR; | ||
| 1360 | table = (struct ctl_table *) de->data; | ||
| 1361 | if (!table || !table->proc_handler) | ||
| 1362 | return -ENOTDIR; | ||
| 1363 | op = (write ? 002 : 004); | ||
| 1364 | if (ctl_perm(table, op)) | ||
| 1365 | return -EPERM; | ||
| 1366 | 1423 | ||
| 1367 | res = count; | 1424 | spin_lock(&sysctl_lock); |
| 1368 | 1425 | if (de && de->data && use_table(de->set)) { | |
| 1369 | error = (*table->proc_handler) (table, write, file, buf, &res, ppos); | 1426 | /* |
| 1370 | if (error) | 1427 | * at that point we know that sysctl was not unregistered |
| 1371 | return error; | 1428 | * and won't be until we finish |
| 1372 | return res; | 1429 | */ |
| 1430 | spin_unlock(&sysctl_lock); | ||
| 1431 | table = (struct ctl_table *) de->data; | ||
| 1432 | if (!table || !table->proc_handler) | ||
| 1433 | goto out; | ||
| 1434 | error = -EPERM; | ||
| 1435 | op = (write ? 002 : 004); | ||
| 1436 | if (ctl_perm(table, op)) | ||
| 1437 | goto out; | ||
| 1438 | |||
| 1439 | /* careful: calling conventions are nasty here */ | ||
| 1440 | res = count; | ||
| 1441 | error = (*table->proc_handler)(table, write, file, | ||
| 1442 | buf, &res, ppos); | ||
| 1443 | if (!error) | ||
| 1444 | error = res; | ||
| 1445 | out: | ||
| 1446 | spin_lock(&sysctl_lock); | ||
| 1447 | unuse_table(de->set); | ||
| 1448 | } | ||
| 1449 | spin_unlock(&sysctl_lock); | ||
| 1450 | return error; | ||
| 1373 | } | 1451 | } |
| 1374 | 1452 | ||
| 1375 | static int proc_opensys(struct inode *inode, struct file *file) | 1453 | static int proc_opensys(struct inode *inode, struct file *file) |
| @@ -1997,6 +2075,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, | |||
| 1997 | * @filp: the file structure | 2075 | * @filp: the file structure |
| 1998 | * @buffer: the user buffer | 2076 | * @buffer: the user buffer |
| 1999 | * @lenp: the size of the user buffer | 2077 | * @lenp: the size of the user buffer |
| 2078 | * @ppos: pointer to the file position | ||
| 2000 | * | 2079 | * |
| 2001 | * Reads/writes up to table->maxlen/sizeof(unsigned int) integer | 2080 | * Reads/writes up to table->maxlen/sizeof(unsigned int) integer |
| 2002 | * values from/to the user buffer, treated as an ASCII string. | 2081 | * values from/to the user buffer, treated as an ASCII string. |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231bc..42df83d7fad2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
| 524 | list_for_each_entry(wq, &workqueues, list) { | 524 | list_for_each_entry(wq, &workqueues, list) { |
| 525 | /* Unbind so it can run. */ | 525 | /* Unbind so it can run. */ |
| 526 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, | 526 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, |
| 527 | smp_processor_id()); | 527 | any_online_cpu(cpu_online_map)); |
| 528 | cleanup_workqueue_thread(wq, hotcpu); | 528 | cleanup_workqueue_thread(wq, hotcpu); |
| 529 | } | 529 | } |
| 530 | break; | 530 | break; |
