diff options
Diffstat (limited to 'kernel')
38 files changed, 1046 insertions, 163 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 864ff75d65f2..6aebdeb2aa34 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o | |||
100 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 100 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
101 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 101 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
102 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 102 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
103 | obj-$(CONFIG_PADATA) += padata.o | ||
103 | 104 | ||
104 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 105 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
105 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 106 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1fbcc748044a..aa3bee566446 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2936,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2936 | 2936 | ||
2937 | for_each_subsys(root, ss) { | 2937 | for_each_subsys(root, ss) { |
2938 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 2938 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
2939 | |||
2939 | if (IS_ERR(css)) { | 2940 | if (IS_ERR(css)) { |
2940 | err = PTR_ERR(css); | 2941 | err = PTR_ERR(css); |
2941 | goto err_destroy; | 2942 | goto err_destroy; |
2942 | } | 2943 | } |
2943 | init_cgroup_css(css, ss, cgrp); | 2944 | init_cgroup_css(css, ss, cgrp); |
2944 | if (ss->use_id) | 2945 | if (ss->use_id) { |
2945 | if (alloc_css_id(ss, parent, cgrp)) | 2946 | err = alloc_css_id(ss, parent, cgrp); |
2947 | if (err) | ||
2946 | goto err_destroy; | 2948 | goto err_destroy; |
2949 | } | ||
2947 | /* At error, ->destroy() callback has to free assigned ID. */ | 2950 | /* At error, ->destroy() callback has to free assigned ID. */ |
2948 | } | 2951 | } |
2949 | 2952 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index 1c8ddd6ee940..677f25376a38 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu) | |||
151 | 151 | ||
152 | write_lock_irq(&tasklist_lock); | 152 | write_lock_irq(&tasklist_lock); |
153 | for_each_process(p) { | 153 | for_each_process(p) { |
154 | if (task_cpu(p) == cpu && | 154 | if (task_cpu(p) == cpu && p->state == TASK_RUNNING && |
155 | (!cputime_eq(p->utime, cputime_zero) || | 155 | (!cputime_eq(p->utime, cputime_zero) || |
156 | !cputime_eq(p->stime, cputime_zero))) | 156 | !cputime_eq(p->stime, cputime_zero))) |
157 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ | 157 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " |
158 | (state = %ld, flags = %x) \n", | 158 | "(state = %ld, flags = %x)\n", |
159 | p->comm, task_pid_nr(p), cpu, | 159 | p->comm, task_pid_nr(p), cpu, |
160 | p->state, p->flags); | 160 | p->state, p->flags); |
161 | } | 161 | } |
162 | write_unlock_irq(&tasklist_lock); | 162 | write_unlock_irq(&tasklist_lock); |
163 | } | 163 | } |
diff --git a/kernel/cred.c b/kernel/cred.c index dd76cfe5f5b0..1ed8ca18790c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void) | |||
224 | #ifdef CONFIG_KEYS | 224 | #ifdef CONFIG_KEYS |
225 | new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); | 225 | new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); |
226 | if (!new->tgcred) { | 226 | if (!new->tgcred) { |
227 | kfree(new); | 227 | kmem_cache_free(cred_jar, new); |
228 | return NULL; | 228 | return NULL; |
229 | } | 229 | } |
230 | atomic_set(&new->tgcred->usage, 1); | 230 | atomic_set(&new->tgcred->usage, 1); |
diff --git a/kernel/fork.c b/kernel/fork.c index 5b2959b3ffc2..f88bd984df35 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1241 | /* Need tasklist lock for parent etc handling! */ | 1241 | /* Need tasklist lock for parent etc handling! */ |
1242 | write_lock_irq(&tasklist_lock); | 1242 | write_lock_irq(&tasklist_lock); |
1243 | 1243 | ||
1244 | /* | ||
1245 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1246 | * not be changed, nor will its assigned CPU. | ||
1247 | * | ||
1248 | * The cpus_allowed mask of the parent may have changed after it was | ||
1249 | * copied first time - so re-copy it here, then check the child's CPU | ||
1250 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1251 | * parent's CPU). This avoids alot of nasty races. | ||
1252 | */ | ||
1253 | p->cpus_allowed = current->cpus_allowed; | ||
1254 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1255 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1256 | !cpu_online(task_cpu(p)))) | ||
1257 | set_task_cpu(p, smp_processor_id()); | ||
1258 | |||
1259 | /* CLONE_PARENT re-uses the old parent */ | 1244 | /* CLONE_PARENT re-uses the old parent */ |
1260 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1245 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1261 | p->real_parent = current->real_parent; | 1246 | p->real_parent = current->real_parent; |
diff --git a/kernel/futex.c b/kernel/futex.c index d9b3a2228f9d..e7a35f1039e7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
530 | return -EINVAL; | 530 | return -EINVAL; |
531 | 531 | ||
532 | WARN_ON(!atomic_read(&pi_state->refcount)); | 532 | WARN_ON(!atomic_read(&pi_state->refcount)); |
533 | WARN_ON(pid && pi_state->owner && | 533 | |
534 | pi_state->owner->pid != pid); | 534 | /* |
535 | * When pi_state->owner is NULL then the owner died | ||
536 | * and another waiter is on the fly. pi_state->owner | ||
537 | * is fixed up by the task which acquires | ||
538 | * pi_state->rt_mutex. | ||
539 | * | ||
540 | * We do not check for pid == 0 which can happen when | ||
541 | * the owner died and robust_list_exit() cleared the | ||
542 | * TID. | ||
543 | */ | ||
544 | if (pid && pi_state->owner) { | ||
545 | /* | ||
546 | * Bail out if user space manipulated the | ||
547 | * futex value. | ||
548 | */ | ||
549 | if (pid != task_pid_vnr(pi_state->owner)) | ||
550 | return -EINVAL; | ||
551 | } | ||
535 | 552 | ||
536 | atomic_inc(&pi_state->refcount); | 553 | atomic_inc(&pi_state->refcount); |
537 | *ps = pi_state; | 554 | *ps = pi_state; |
@@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
758 | if (!pi_state) | 775 | if (!pi_state) |
759 | return -EINVAL; | 776 | return -EINVAL; |
760 | 777 | ||
778 | /* | ||
779 | * If current does not own the pi_state then the futex is | ||
780 | * inconsistent and user space fiddled with the futex value. | ||
781 | */ | ||
782 | if (pi_state->owner != current) | ||
783 | return -EINVAL; | ||
784 | |||
761 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); | 785 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); |
762 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 786 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
763 | 787 | ||
@@ -1971,7 +1995,7 @@ retry_private: | |||
1971 | /* Unqueue and drop the lock */ | 1995 | /* Unqueue and drop the lock */ |
1972 | unqueue_me_pi(&q); | 1996 | unqueue_me_pi(&q); |
1973 | 1997 | ||
1974 | goto out; | 1998 | goto out_put_key; |
1975 | 1999 | ||
1976 | out_unlock_put_key: | 2000 | out_unlock_put_key: |
1977 | queue_unlock(&q, hb); | 2001 | queue_unlock(&q, hb); |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 50dbd5999588..967e66143e11 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
243 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | 243 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) |
244 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM | 244 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM |
245 | */ | 245 | */ |
246 | int reserve_bp_slot(struct perf_event *bp) | 246 | static int __reserve_bp_slot(struct perf_event *bp) |
247 | { | 247 | { |
248 | struct bp_busy_slots slots = {0}; | 248 | struct bp_busy_slots slots = {0}; |
249 | int ret = 0; | ||
250 | |||
251 | mutex_lock(&nr_bp_mutex); | ||
252 | 249 | ||
253 | fetch_bp_busy_slots(&slots, bp); | 250 | fetch_bp_busy_slots(&slots, bp); |
254 | 251 | ||
255 | /* Flexible counters need to keep at least one slot */ | 252 | /* Flexible counters need to keep at least one slot */ |
256 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | 253 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) |
257 | ret = -ENOSPC; | 254 | return -ENOSPC; |
258 | goto end; | ||
259 | } | ||
260 | 255 | ||
261 | toggle_bp_slot(bp, true); | 256 | toggle_bp_slot(bp, true); |
262 | 257 | ||
263 | end: | 258 | return 0; |
259 | } | ||
260 | |||
261 | int reserve_bp_slot(struct perf_event *bp) | ||
262 | { | ||
263 | int ret; | ||
264 | |||
265 | mutex_lock(&nr_bp_mutex); | ||
266 | |||
267 | ret = __reserve_bp_slot(bp); | ||
268 | |||
264 | mutex_unlock(&nr_bp_mutex); | 269 | mutex_unlock(&nr_bp_mutex); |
265 | 270 | ||
266 | return ret; | 271 | return ret; |
267 | } | 272 | } |
268 | 273 | ||
274 | static void __release_bp_slot(struct perf_event *bp) | ||
275 | { | ||
276 | toggle_bp_slot(bp, false); | ||
277 | } | ||
278 | |||
269 | void release_bp_slot(struct perf_event *bp) | 279 | void release_bp_slot(struct perf_event *bp) |
270 | { | 280 | { |
271 | mutex_lock(&nr_bp_mutex); | 281 | mutex_lock(&nr_bp_mutex); |
272 | 282 | ||
273 | toggle_bp_slot(bp, false); | 283 | __release_bp_slot(bp); |
274 | 284 | ||
275 | mutex_unlock(&nr_bp_mutex); | 285 | mutex_unlock(&nr_bp_mutex); |
276 | } | 286 | } |
277 | 287 | ||
288 | /* | ||
289 | * Allow the kernel debugger to reserve breakpoint slots without | ||
290 | * taking a lock using the dbg_* variant of for the reserve and | ||
291 | * release breakpoint slots. | ||
292 | */ | ||
293 | int dbg_reserve_bp_slot(struct perf_event *bp) | ||
294 | { | ||
295 | if (mutex_is_locked(&nr_bp_mutex)) | ||
296 | return -1; | ||
297 | |||
298 | return __reserve_bp_slot(bp); | ||
299 | } | ||
300 | |||
301 | int dbg_release_bp_slot(struct perf_event *bp) | ||
302 | { | ||
303 | if (mutex_is_locked(&nr_bp_mutex)) | ||
304 | return -1; | ||
305 | |||
306 | __release_bp_slot(bp); | ||
307 | |||
308 | return 0; | ||
309 | } | ||
278 | 310 | ||
279 | int register_perf_hw_breakpoint(struct perf_event *bp) | 311 | int register_perf_hw_breakpoint(struct perf_event *bp) |
280 | { | 312 | { |
@@ -296,6 +328,10 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
296 | if (!bp->attr.disabled || !bp->overflow_handler) | 328 | if (!bp->attr.disabled || !bp->overflow_handler) |
297 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 329 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); |
298 | 330 | ||
331 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ | ||
332 | if (ret) | ||
333 | release_bp_slot(bp); | ||
334 | |||
299 | return ret; | 335 | return ret; |
300 | } | 336 | } |
301 | 337 | ||
@@ -324,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | |||
324 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) | 360 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) |
325 | { | 361 | { |
326 | u64 old_addr = bp->attr.bp_addr; | 362 | u64 old_addr = bp->attr.bp_addr; |
363 | u64 old_len = bp->attr.bp_len; | ||
327 | int old_type = bp->attr.bp_type; | 364 | int old_type = bp->attr.bp_type; |
328 | int old_len = bp->attr.bp_len; | ||
329 | int err = 0; | 365 | int err = 0; |
330 | 366 | ||
331 | perf_event_disable(bp); | 367 | perf_event_disable(bp); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index a9a93d9ee7a7..ef077fb73155 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/console.h> | 32 | #include <linux/console.h> |
33 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/kmsg_dump.h> | ||
35 | 36 | ||
36 | #include <asm/page.h> | 37 | #include <asm/page.h> |
37 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
@@ -1074,6 +1075,9 @@ void crash_kexec(struct pt_regs *regs) | |||
1074 | if (mutex_trylock(&kexec_mutex)) { | 1075 | if (mutex_trylock(&kexec_mutex)) { |
1075 | if (kexec_crash_image) { | 1076 | if (kexec_crash_image) { |
1076 | struct pt_regs fixed_regs; | 1077 | struct pt_regs fixed_regs; |
1078 | |||
1079 | kmsg_dump(KMSG_DUMP_KEXEC); | ||
1080 | |||
1077 | crash_setup_regs(&fixed_regs, regs); | 1081 | crash_setup_regs(&fixed_regs, regs); |
1078 | crash_save_vmcoreinfo(); | 1082 | crash_save_vmcoreinfo(); |
1079 | machine_crash_shutdown(&fixed_regs); | 1083 | machine_crash_shutdown(&fixed_regs); |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 32c5c15d750d..35edbe22e9a9 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) | |||
80 | 80 | ||
81 | buffer = kmalloc(size, gfp_mask); | 81 | buffer = kmalloc(size, gfp_mask); |
82 | if (!buffer) { | 82 | if (!buffer) { |
83 | _kfifo_init(fifo, 0, 0); | 83 | _kfifo_init(fifo, NULL, 0); |
84 | return -ENOMEM; | 84 | return -ENOMEM; |
85 | } | 85 | } |
86 | 86 | ||
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc); | |||
97 | void kfifo_free(struct kfifo *fifo) | 97 | void kfifo_free(struct kfifo *fifo) |
98 | { | 98 | { |
99 | kfree(fifo->buffer); | 99 | kfree(fifo->buffer); |
100 | _kfifo_init(fifo, NULL, 0); | ||
100 | } | 101 | } |
101 | EXPORT_SYMBOL(kfifo_free); | 102 | EXPORT_SYMBOL(kfifo_free); |
102 | 103 | ||
@@ -349,6 +350,7 @@ EXPORT_SYMBOL(__kfifo_from_user_n); | |||
349 | * @fifo: the fifo to be used. | 350 | * @fifo: the fifo to be used. |
350 | * @from: pointer to the data to be added. | 351 | * @from: pointer to the data to be added. |
351 | * @len: the length of the data to be added. | 352 | * @len: the length of the data to be added. |
353 | * @total: the actual returned data length. | ||
352 | * | 354 | * |
353 | * This function copies at most @len bytes from the @from into the | 355 | * This function copies at most @len bytes from the @from into the |
354 | * FIFO depending and returns -EFAULT/0. | 356 | * FIFO depending and returns -EFAULT/0. |
@@ -399,7 +401,7 @@ EXPORT_SYMBOL(__kfifo_to_user_n); | |||
399 | * @fifo: the fifo to be used. | 401 | * @fifo: the fifo to be used. |
400 | * @to: where the data must be copied. | 402 | * @to: where the data must be copied. |
401 | * @len: the size of the destination buffer. | 403 | * @len: the size of the destination buffer. |
402 | @ @lenout: pointer to output variable with copied data | 404 | * @lenout: pointer to output variable with copied data |
403 | * | 405 | * |
404 | * This function copies at most @len bytes from the FIFO into the | 406 | * This function copies at most @len bytes from the FIFO into the |
405 | * @to buffer and 0 or -EFAULT. | 407 | * @to buffer and 0 or -EFAULT. |
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..761fdd2b3034 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs) | |||
583 | smp_wmb(); | 583 | smp_wmb(); |
584 | atomic_set(&cpu_in_kgdb[cpu], 1); | 584 | atomic_set(&cpu_in_kgdb[cpu], 1); |
585 | 585 | ||
586 | /* Disable any cpu specific hw breakpoints */ | ||
587 | kgdb_disable_hw_debug(regs); | ||
588 | |||
586 | /* Wait till primary CPU is done with debugging */ | 589 | /* Wait till primary CPU is done with debugging */ |
587 | while (atomic_read(&passive_cpu_wait[cpu])) | 590 | while (atomic_read(&passive_cpu_wait[cpu])) |
588 | cpu_relax(); | 591 | cpu_relax(); |
@@ -596,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs) | |||
596 | 599 | ||
597 | /* Signal the primary CPU that we are done: */ | 600 | /* Signal the primary CPU that we are done: */ |
598 | atomic_set(&cpu_in_kgdb[cpu], 0); | 601 | atomic_set(&cpu_in_kgdb[cpu], 0); |
599 | touch_softlockup_watchdog(); | 602 | touch_softlockup_watchdog_sync(); |
600 | clocksource_touch_watchdog(); | 603 | clocksource_touch_watchdog(); |
601 | local_irq_restore(flags); | 604 | local_irq_restore(flags); |
602 | } | 605 | } |
@@ -1450,7 +1453,7 @@ acquirelock: | |||
1450 | (kgdb_info[cpu].task && | 1453 | (kgdb_info[cpu].task && |
1451 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | 1454 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { |
1452 | atomic_set(&kgdb_active, -1); | 1455 | atomic_set(&kgdb_active, -1); |
1453 | touch_softlockup_watchdog(); | 1456 | touch_softlockup_watchdog_sync(); |
1454 | clocksource_touch_watchdog(); | 1457 | clocksource_touch_watchdog(); |
1455 | local_irq_restore(flags); | 1458 | local_irq_restore(flags); |
1456 | 1459 | ||
@@ -1550,7 +1553,7 @@ kgdb_restore: | |||
1550 | } | 1553 | } |
1551 | /* Free kgdb_active */ | 1554 | /* Free kgdb_active */ |
1552 | atomic_set(&kgdb_active, -1); | 1555 | atomic_set(&kgdb_active, -1); |
1553 | touch_softlockup_watchdog(); | 1556 | touch_softlockup_watchdog_sync(); |
1554 | clocksource_touch_watchdog(); | 1557 | clocksource_touch_watchdog(); |
1555 | local_irq_restore(flags); | 1558 | local_irq_restore(flags); |
1556 | 1559 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7df302a0204..c4b43430d393 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -93,6 +93,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { | |||
93 | {"native_get_debugreg",}, | 93 | {"native_get_debugreg",}, |
94 | {"irq_entries_start",}, | 94 | {"irq_entries_start",}, |
95 | {"common_interrupt",}, | 95 | {"common_interrupt",}, |
96 | {"mcount",}, /* mcount can be called from everywhere */ | ||
96 | {NULL} /* Terminator */ | 97 | {NULL} /* Terminator */ |
97 | }; | 98 | }; |
98 | 99 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 5feaddcdbe49..c62ec14609b9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2147,7 +2147,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, | |||
2147 | return ret; | 2147 | return ret; |
2148 | 2148 | ||
2149 | return print_irq_inversion_bug(curr, &root, target_entry, | 2149 | return print_irq_inversion_bug(curr, &root, target_entry, |
2150 | this, 1, irqclass); | 2150 | this, 0, irqclass); |
2151 | } | 2151 | } |
2152 | 2152 | ||
2153 | void print_irqtrace_events(struct task_struct *curr) | 2153 | void print_irqtrace_events(struct task_struct *curr) |
diff --git a/kernel/padata.c b/kernel/padata.c new file mode 100644 index 000000000000..6f9bcb8313d6 --- /dev/null +++ b/kernel/padata.c | |||
@@ -0,0 +1,690 @@ | |||
1 | /* | ||
2 | * padata.c - generic interface to process data streams in parallel | ||
3 | * | ||
4 | * Copyright (C) 2008, 2009 secunet Security Networks AG | ||
5 | * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/cpu.h> | ||
25 | #include <linux/padata.h> | ||
26 | #include <linux/mutex.h> | ||
27 | #include <linux/sched.h> | ||
28 | #include <linux/rcupdate.h> | ||
29 | |||
30 | #define MAX_SEQ_NR INT_MAX - NR_CPUS | ||
31 | #define MAX_OBJ_NUM 10000 * NR_CPUS | ||
32 | |||
33 | static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | ||
34 | { | ||
35 | int cpu, target_cpu; | ||
36 | |||
37 | target_cpu = cpumask_first(pd->cpumask); | ||
38 | for (cpu = 0; cpu < cpu_index; cpu++) | ||
39 | target_cpu = cpumask_next(target_cpu, pd->cpumask); | ||
40 | |||
41 | return target_cpu; | ||
42 | } | ||
43 | |||
44 | static int padata_cpu_hash(struct padata_priv *padata) | ||
45 | { | ||
46 | int cpu_index; | ||
47 | struct parallel_data *pd; | ||
48 | |||
49 | pd = padata->pd; | ||
50 | |||
51 | /* | ||
52 | * Hash the sequence numbers to the cpus by taking | ||
53 | * seq_nr mod. number of cpus in use. | ||
54 | */ | ||
55 | cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask); | ||
56 | |||
57 | return padata_index_to_cpu(pd, cpu_index); | ||
58 | } | ||
59 | |||
60 | static void padata_parallel_worker(struct work_struct *work) | ||
61 | { | ||
62 | struct padata_queue *queue; | ||
63 | struct parallel_data *pd; | ||
64 | struct padata_instance *pinst; | ||
65 | LIST_HEAD(local_list); | ||
66 | |||
67 | local_bh_disable(); | ||
68 | queue = container_of(work, struct padata_queue, pwork); | ||
69 | pd = queue->pd; | ||
70 | pinst = pd->pinst; | ||
71 | |||
72 | spin_lock(&queue->parallel.lock); | ||
73 | list_replace_init(&queue->parallel.list, &local_list); | ||
74 | spin_unlock(&queue->parallel.lock); | ||
75 | |||
76 | while (!list_empty(&local_list)) { | ||
77 | struct padata_priv *padata; | ||
78 | |||
79 | padata = list_entry(local_list.next, | ||
80 | struct padata_priv, list); | ||
81 | |||
82 | list_del_init(&padata->list); | ||
83 | |||
84 | padata->parallel(padata); | ||
85 | } | ||
86 | |||
87 | local_bh_enable(); | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * padata_do_parallel - padata parallelization function | ||
92 | * | ||
93 | * @pinst: padata instance | ||
94 | * @padata: object to be parallelized | ||
95 | * @cb_cpu: cpu the serialization callback function will run on, | ||
96 | * must be in the cpumask of padata. | ||
97 | * | ||
98 | * The parallelization callback function will run with BHs off. | ||
99 | * Note: Every object which is parallelized by padata_do_parallel | ||
100 | * must be seen by padata_do_serial. | ||
101 | */ | ||
102 | int padata_do_parallel(struct padata_instance *pinst, | ||
103 | struct padata_priv *padata, int cb_cpu) | ||
104 | { | ||
105 | int target_cpu, err; | ||
106 | struct padata_queue *queue; | ||
107 | struct parallel_data *pd; | ||
108 | |||
109 | rcu_read_lock_bh(); | ||
110 | |||
111 | pd = rcu_dereference(pinst->pd); | ||
112 | |||
113 | err = 0; | ||
114 | if (!(pinst->flags & PADATA_INIT)) | ||
115 | goto out; | ||
116 | |||
117 | err = -EBUSY; | ||
118 | if ((pinst->flags & PADATA_RESET)) | ||
119 | goto out; | ||
120 | |||
121 | if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) | ||
122 | goto out; | ||
123 | |||
124 | err = -EINVAL; | ||
125 | if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) | ||
126 | goto out; | ||
127 | |||
128 | err = -EINPROGRESS; | ||
129 | atomic_inc(&pd->refcnt); | ||
130 | padata->pd = pd; | ||
131 | padata->cb_cpu = cb_cpu; | ||
132 | |||
133 | if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) | ||
134 | atomic_set(&pd->seq_nr, -1); | ||
135 | |||
136 | padata->seq_nr = atomic_inc_return(&pd->seq_nr); | ||
137 | |||
138 | target_cpu = padata_cpu_hash(padata); | ||
139 | queue = per_cpu_ptr(pd->queue, target_cpu); | ||
140 | |||
141 | spin_lock(&queue->parallel.lock); | ||
142 | list_add_tail(&padata->list, &queue->parallel.list); | ||
143 | spin_unlock(&queue->parallel.lock); | ||
144 | |||
145 | queue_work_on(target_cpu, pinst->wq, &queue->pwork); | ||
146 | |||
147 | out: | ||
148 | rcu_read_unlock_bh(); | ||
149 | |||
150 | return err; | ||
151 | } | ||
152 | EXPORT_SYMBOL(padata_do_parallel); | ||
153 | |||
154 | static struct padata_priv *padata_get_next(struct parallel_data *pd) | ||
155 | { | ||
156 | int cpu, num_cpus, empty, calc_seq_nr; | ||
157 | int seq_nr, next_nr, overrun, next_overrun; | ||
158 | struct padata_queue *queue, *next_queue; | ||
159 | struct padata_priv *padata; | ||
160 | struct padata_list *reorder; | ||
161 | |||
162 | empty = 0; | ||
163 | next_nr = -1; | ||
164 | next_overrun = 0; | ||
165 | next_queue = NULL; | ||
166 | |||
167 | num_cpus = cpumask_weight(pd->cpumask); | ||
168 | |||
169 | for_each_cpu(cpu, pd->cpumask) { | ||
170 | queue = per_cpu_ptr(pd->queue, cpu); | ||
171 | reorder = &queue->reorder; | ||
172 | |||
173 | /* | ||
174 | * Calculate the seq_nr of the object that should be | ||
175 | * next in this queue. | ||
176 | */ | ||
177 | overrun = 0; | ||
178 | calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) | ||
179 | + queue->cpu_index; | ||
180 | |||
181 | if (unlikely(calc_seq_nr > pd->max_seq_nr)) { | ||
182 | calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; | ||
183 | overrun = 1; | ||
184 | } | ||
185 | |||
186 | if (!list_empty(&reorder->list)) { | ||
187 | padata = list_entry(reorder->list.next, | ||
188 | struct padata_priv, list); | ||
189 | |||
190 | seq_nr = padata->seq_nr; | ||
191 | BUG_ON(calc_seq_nr != seq_nr); | ||
192 | } else { | ||
193 | seq_nr = calc_seq_nr; | ||
194 | empty++; | ||
195 | } | ||
196 | |||
197 | if (next_nr < 0 || seq_nr < next_nr | ||
198 | || (next_overrun && !overrun)) { | ||
199 | next_nr = seq_nr; | ||
200 | next_overrun = overrun; | ||
201 | next_queue = queue; | ||
202 | } | ||
203 | } | ||
204 | |||
205 | padata = NULL; | ||
206 | |||
207 | if (empty == num_cpus) | ||
208 | goto out; | ||
209 | |||
210 | reorder = &next_queue->reorder; | ||
211 | |||
212 | if (!list_empty(&reorder->list)) { | ||
213 | padata = list_entry(reorder->list.next, | ||
214 | struct padata_priv, list); | ||
215 | |||
216 | if (unlikely(next_overrun)) { | ||
217 | for_each_cpu(cpu, pd->cpumask) { | ||
218 | queue = per_cpu_ptr(pd->queue, cpu); | ||
219 | atomic_set(&queue->num_obj, 0); | ||
220 | } | ||
221 | } | ||
222 | |||
223 | spin_lock(&reorder->lock); | ||
224 | list_del_init(&padata->list); | ||
225 | atomic_dec(&pd->reorder_objects); | ||
226 | spin_unlock(&reorder->lock); | ||
227 | |||
228 | atomic_inc(&next_queue->num_obj); | ||
229 | |||
230 | goto out; | ||
231 | } | ||
232 | |||
233 | if (next_nr % num_cpus == next_queue->cpu_index) { | ||
234 | padata = ERR_PTR(-ENODATA); | ||
235 | goto out; | ||
236 | } | ||
237 | |||
238 | padata = ERR_PTR(-EINPROGRESS); | ||
239 | out: | ||
240 | return padata; | ||
241 | } | ||
242 | |||
243 | static void padata_reorder(struct parallel_data *pd) | ||
244 | { | ||
245 | struct padata_priv *padata; | ||
246 | struct padata_queue *queue; | ||
247 | struct padata_instance *pinst = pd->pinst; | ||
248 | |||
249 | try_again: | ||
250 | if (!spin_trylock_bh(&pd->lock)) | ||
251 | goto out; | ||
252 | |||
253 | while (1) { | ||
254 | padata = padata_get_next(pd); | ||
255 | |||
256 | if (!padata || PTR_ERR(padata) == -EINPROGRESS) | ||
257 | break; | ||
258 | |||
259 | if (PTR_ERR(padata) == -ENODATA) { | ||
260 | spin_unlock_bh(&pd->lock); | ||
261 | goto out; | ||
262 | } | ||
263 | |||
264 | queue = per_cpu_ptr(pd->queue, padata->cb_cpu); | ||
265 | |||
266 | spin_lock(&queue->serial.lock); | ||
267 | list_add_tail(&padata->list, &queue->serial.list); | ||
268 | spin_unlock(&queue->serial.lock); | ||
269 | |||
270 | queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork); | ||
271 | } | ||
272 | |||
273 | spin_unlock_bh(&pd->lock); | ||
274 | |||
275 | if (atomic_read(&pd->reorder_objects)) | ||
276 | goto try_again; | ||
277 | |||
278 | out: | ||
279 | return; | ||
280 | } | ||
281 | |||
282 | static void padata_serial_worker(struct work_struct *work) | ||
283 | { | ||
284 | struct padata_queue *queue; | ||
285 | struct parallel_data *pd; | ||
286 | LIST_HEAD(local_list); | ||
287 | |||
288 | local_bh_disable(); | ||
289 | queue = container_of(work, struct padata_queue, swork); | ||
290 | pd = queue->pd; | ||
291 | |||
292 | spin_lock(&queue->serial.lock); | ||
293 | list_replace_init(&queue->serial.list, &local_list); | ||
294 | spin_unlock(&queue->serial.lock); | ||
295 | |||
296 | while (!list_empty(&local_list)) { | ||
297 | struct padata_priv *padata; | ||
298 | |||
299 | padata = list_entry(local_list.next, | ||
300 | struct padata_priv, list); | ||
301 | |||
302 | list_del_init(&padata->list); | ||
303 | |||
304 | padata->serial(padata); | ||
305 | atomic_dec(&pd->refcnt); | ||
306 | } | ||
307 | local_bh_enable(); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * padata_do_serial - padata serialization function | ||
312 | * | ||
313 | * @padata: object to be serialized. | ||
314 | * | ||
315 | * padata_do_serial must be called for every parallelized object. | ||
316 | * The serialization callback function will run with BHs off. | ||
317 | */ | ||
318 | void padata_do_serial(struct padata_priv *padata) | ||
319 | { | ||
320 | int cpu; | ||
321 | struct padata_queue *queue; | ||
322 | struct parallel_data *pd; | ||
323 | |||
324 | pd = padata->pd; | ||
325 | |||
326 | cpu = get_cpu(); | ||
327 | queue = per_cpu_ptr(pd->queue, cpu); | ||
328 | |||
329 | spin_lock(&queue->reorder.lock); | ||
330 | atomic_inc(&pd->reorder_objects); | ||
331 | list_add_tail(&padata->list, &queue->reorder.list); | ||
332 | spin_unlock(&queue->reorder.lock); | ||
333 | |||
334 | put_cpu(); | ||
335 | |||
336 | padata_reorder(pd); | ||
337 | } | ||
338 | EXPORT_SYMBOL(padata_do_serial); | ||
339 | |||
340 | static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, | ||
341 | const struct cpumask *cpumask) | ||
342 | { | ||
343 | int cpu, cpu_index, num_cpus; | ||
344 | struct padata_queue *queue; | ||
345 | struct parallel_data *pd; | ||
346 | |||
347 | cpu_index = 0; | ||
348 | |||
349 | pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); | ||
350 | if (!pd) | ||
351 | goto err; | ||
352 | |||
353 | pd->queue = alloc_percpu(struct padata_queue); | ||
354 | if (!pd->queue) | ||
355 | goto err_free_pd; | ||
356 | |||
357 | if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) | ||
358 | goto err_free_queue; | ||
359 | |||
360 | for_each_possible_cpu(cpu) { | ||
361 | queue = per_cpu_ptr(pd->queue, cpu); | ||
362 | |||
363 | queue->pd = pd; | ||
364 | |||
365 | if (cpumask_test_cpu(cpu, cpumask) | ||
366 | && cpumask_test_cpu(cpu, cpu_active_mask)) { | ||
367 | queue->cpu_index = cpu_index; | ||
368 | cpu_index++; | ||
369 | } else | ||
370 | queue->cpu_index = -1; | ||
371 | |||
372 | INIT_LIST_HEAD(&queue->reorder.list); | ||
373 | INIT_LIST_HEAD(&queue->parallel.list); | ||
374 | INIT_LIST_HEAD(&queue->serial.list); | ||
375 | spin_lock_init(&queue->reorder.lock); | ||
376 | spin_lock_init(&queue->parallel.lock); | ||
377 | spin_lock_init(&queue->serial.lock); | ||
378 | |||
379 | INIT_WORK(&queue->pwork, padata_parallel_worker); | ||
380 | INIT_WORK(&queue->swork, padata_serial_worker); | ||
381 | atomic_set(&queue->num_obj, 0); | ||
382 | } | ||
383 | |||
384 | cpumask_and(pd->cpumask, cpumask, cpu_active_mask); | ||
385 | |||
386 | num_cpus = cpumask_weight(pd->cpumask); | ||
387 | pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; | ||
388 | |||
389 | atomic_set(&pd->seq_nr, -1); | ||
390 | atomic_set(&pd->reorder_objects, 0); | ||
391 | atomic_set(&pd->refcnt, 0); | ||
392 | pd->pinst = pinst; | ||
393 | spin_lock_init(&pd->lock); | ||
394 | |||
395 | return pd; | ||
396 | |||
397 | err_free_queue: | ||
398 | free_percpu(pd->queue); | ||
399 | err_free_pd: | ||
400 | kfree(pd); | ||
401 | err: | ||
402 | return NULL; | ||
403 | } | ||
404 | |||
405 | static void padata_free_pd(struct parallel_data *pd) | ||
406 | { | ||
407 | free_cpumask_var(pd->cpumask); | ||
408 | free_percpu(pd->queue); | ||
409 | kfree(pd); | ||
410 | } | ||
411 | |||
412 | static void padata_replace(struct padata_instance *pinst, | ||
413 | struct parallel_data *pd_new) | ||
414 | { | ||
415 | struct parallel_data *pd_old = pinst->pd; | ||
416 | |||
417 | pinst->flags |= PADATA_RESET; | ||
418 | |||
419 | rcu_assign_pointer(pinst->pd, pd_new); | ||
420 | |||
421 | synchronize_rcu(); | ||
422 | |||
423 | while (atomic_read(&pd_old->refcnt) != 0) | ||
424 | yield(); | ||
425 | |||
426 | flush_workqueue(pinst->wq); | ||
427 | |||
428 | padata_free_pd(pd_old); | ||
429 | |||
430 | pinst->flags &= ~PADATA_RESET; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * padata_set_cpumask - set the cpumask that padata should use | ||
435 | * | ||
436 | * @pinst: padata instance | ||
437 | * @cpumask: the cpumask to use | ||
438 | */ | ||
439 | int padata_set_cpumask(struct padata_instance *pinst, | ||
440 | cpumask_var_t cpumask) | ||
441 | { | ||
442 | struct parallel_data *pd; | ||
443 | int err = 0; | ||
444 | |||
445 | might_sleep(); | ||
446 | |||
447 | mutex_lock(&pinst->lock); | ||
448 | |||
449 | pd = padata_alloc_pd(pinst, cpumask); | ||
450 | if (!pd) { | ||
451 | err = -ENOMEM; | ||
452 | goto out; | ||
453 | } | ||
454 | |||
455 | cpumask_copy(pinst->cpumask, cpumask); | ||
456 | |||
457 | padata_replace(pinst, pd); | ||
458 | |||
459 | out: | ||
460 | mutex_unlock(&pinst->lock); | ||
461 | |||
462 | return err; | ||
463 | } | ||
464 | EXPORT_SYMBOL(padata_set_cpumask); | ||
465 | |||
466 | static int __padata_add_cpu(struct padata_instance *pinst, int cpu) | ||
467 | { | ||
468 | struct parallel_data *pd; | ||
469 | |||
470 | if (cpumask_test_cpu(cpu, cpu_active_mask)) { | ||
471 | pd = padata_alloc_pd(pinst, pinst->cpumask); | ||
472 | if (!pd) | ||
473 | return -ENOMEM; | ||
474 | |||
475 | padata_replace(pinst, pd); | ||
476 | } | ||
477 | |||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * padata_add_cpu - add a cpu to the padata cpumask | ||
483 | * | ||
484 | * @pinst: padata instance | ||
485 | * @cpu: cpu to add | ||
486 | */ | ||
487 | int padata_add_cpu(struct padata_instance *pinst, int cpu) | ||
488 | { | ||
489 | int err; | ||
490 | |||
491 | might_sleep(); | ||
492 | |||
493 | mutex_lock(&pinst->lock); | ||
494 | |||
495 | cpumask_set_cpu(cpu, pinst->cpumask); | ||
496 | err = __padata_add_cpu(pinst, cpu); | ||
497 | |||
498 | mutex_unlock(&pinst->lock); | ||
499 | |||
500 | return err; | ||
501 | } | ||
502 | EXPORT_SYMBOL(padata_add_cpu); | ||
503 | |||
504 | static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) | ||
505 | { | ||
506 | struct parallel_data *pd; | ||
507 | |||
508 | if (cpumask_test_cpu(cpu, cpu_online_mask)) { | ||
509 | pd = padata_alloc_pd(pinst, pinst->cpumask); | ||
510 | if (!pd) | ||
511 | return -ENOMEM; | ||
512 | |||
513 | padata_replace(pinst, pd); | ||
514 | } | ||
515 | |||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | /* | ||
520 | * padata_remove_cpu - remove a cpu from the padata cpumask | ||
521 | * | ||
522 | * @pinst: padata instance | ||
523 | * @cpu: cpu to remove | ||
524 | */ | ||
525 | int padata_remove_cpu(struct padata_instance *pinst, int cpu) | ||
526 | { | ||
527 | int err; | ||
528 | |||
529 | might_sleep(); | ||
530 | |||
531 | mutex_lock(&pinst->lock); | ||
532 | |||
533 | cpumask_clear_cpu(cpu, pinst->cpumask); | ||
534 | err = __padata_remove_cpu(pinst, cpu); | ||
535 | |||
536 | mutex_unlock(&pinst->lock); | ||
537 | |||
538 | return err; | ||
539 | } | ||
540 | EXPORT_SYMBOL(padata_remove_cpu); | ||
541 | |||
542 | /* | ||
543 | * padata_start - start the parallel processing | ||
544 | * | ||
545 | * @pinst: padata instance to start | ||
546 | */ | ||
547 | void padata_start(struct padata_instance *pinst) | ||
548 | { | ||
549 | might_sleep(); | ||
550 | |||
551 | mutex_lock(&pinst->lock); | ||
552 | pinst->flags |= PADATA_INIT; | ||
553 | mutex_unlock(&pinst->lock); | ||
554 | } | ||
555 | EXPORT_SYMBOL(padata_start); | ||
556 | |||
557 | /* | ||
558 | * padata_stop - stop the parallel processing | ||
559 | * | ||
560 | * @pinst: padata instance to stop | ||
561 | */ | ||
562 | void padata_stop(struct padata_instance *pinst) | ||
563 | { | ||
564 | might_sleep(); | ||
565 | |||
566 | mutex_lock(&pinst->lock); | ||
567 | pinst->flags &= ~PADATA_INIT; | ||
568 | mutex_unlock(&pinst->lock); | ||
569 | } | ||
570 | EXPORT_SYMBOL(padata_stop); | ||
571 | |||
572 | static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, | ||
573 | unsigned long action, void *hcpu) | ||
574 | { | ||
575 | int err; | ||
576 | struct padata_instance *pinst; | ||
577 | int cpu = (unsigned long)hcpu; | ||
578 | |||
579 | pinst = container_of(nfb, struct padata_instance, cpu_notifier); | ||
580 | |||
581 | switch (action) { | ||
582 | case CPU_ONLINE: | ||
583 | case CPU_ONLINE_FROZEN: | ||
584 | if (!cpumask_test_cpu(cpu, pinst->cpumask)) | ||
585 | break; | ||
586 | mutex_lock(&pinst->lock); | ||
587 | err = __padata_add_cpu(pinst, cpu); | ||
588 | mutex_unlock(&pinst->lock); | ||
589 | if (err) | ||
590 | return NOTIFY_BAD; | ||
591 | break; | ||
592 | |||
593 | case CPU_DOWN_PREPARE: | ||
594 | case CPU_DOWN_PREPARE_FROZEN: | ||
595 | if (!cpumask_test_cpu(cpu, pinst->cpumask)) | ||
596 | break; | ||
597 | mutex_lock(&pinst->lock); | ||
598 | err = __padata_remove_cpu(pinst, cpu); | ||
599 | mutex_unlock(&pinst->lock); | ||
600 | if (err) | ||
601 | return NOTIFY_BAD; | ||
602 | break; | ||
603 | |||
604 | case CPU_UP_CANCELED: | ||
605 | case CPU_UP_CANCELED_FROZEN: | ||
606 | if (!cpumask_test_cpu(cpu, pinst->cpumask)) | ||
607 | break; | ||
608 | mutex_lock(&pinst->lock); | ||
609 | __padata_remove_cpu(pinst, cpu); | ||
610 | mutex_unlock(&pinst->lock); | ||
611 | |||
612 | case CPU_DOWN_FAILED: | ||
613 | case CPU_DOWN_FAILED_FROZEN: | ||
614 | if (!cpumask_test_cpu(cpu, pinst->cpumask)) | ||
615 | break; | ||
616 | mutex_lock(&pinst->lock); | ||
617 | __padata_add_cpu(pinst, cpu); | ||
618 | mutex_unlock(&pinst->lock); | ||
619 | } | ||
620 | |||
621 | return NOTIFY_OK; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * padata_alloc - allocate and initialize a padata instance | ||
626 | * | ||
627 | * @cpumask: cpumask that padata uses for parallelization | ||
628 | * @wq: workqueue to use for the allocated padata instance | ||
629 | */ | ||
630 | struct padata_instance *padata_alloc(const struct cpumask *cpumask, | ||
631 | struct workqueue_struct *wq) | ||
632 | { | ||
633 | int err; | ||
634 | struct padata_instance *pinst; | ||
635 | struct parallel_data *pd; | ||
636 | |||
637 | pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); | ||
638 | if (!pinst) | ||
639 | goto err; | ||
640 | |||
641 | pd = padata_alloc_pd(pinst, cpumask); | ||
642 | if (!pd) | ||
643 | goto err_free_inst; | ||
644 | |||
645 | rcu_assign_pointer(pinst->pd, pd); | ||
646 | |||
647 | pinst->wq = wq; | ||
648 | |||
649 | cpumask_copy(pinst->cpumask, cpumask); | ||
650 | |||
651 | pinst->flags = 0; | ||
652 | |||
653 | pinst->cpu_notifier.notifier_call = padata_cpu_callback; | ||
654 | pinst->cpu_notifier.priority = 0; | ||
655 | err = register_hotcpu_notifier(&pinst->cpu_notifier); | ||
656 | if (err) | ||
657 | goto err_free_pd; | ||
658 | |||
659 | mutex_init(&pinst->lock); | ||
660 | |||
661 | return pinst; | ||
662 | |||
663 | err_free_pd: | ||
664 | padata_free_pd(pd); | ||
665 | err_free_inst: | ||
666 | kfree(pinst); | ||
667 | err: | ||
668 | return NULL; | ||
669 | } | ||
670 | EXPORT_SYMBOL(padata_alloc); | ||
671 | |||
672 | /* | ||
673 | * padata_free - free a padata instance | ||
674 | * | ||
675 | * @ padata_inst: padata instance to free | ||
676 | */ | ||
677 | void padata_free(struct padata_instance *pinst) | ||
678 | { | ||
679 | padata_stop(pinst); | ||
680 | |||
681 | synchronize_rcu(); | ||
682 | |||
683 | while (atomic_read(&pinst->pd->refcnt) != 0) | ||
684 | yield(); | ||
685 | |||
686 | unregister_hotcpu_notifier(&pinst->cpu_notifier); | ||
687 | padata_free_pd(pinst->pd); | ||
688 | kfree(pinst); | ||
689 | } | ||
690 | EXPORT_SYMBOL(padata_free); | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 5827f7b97254..c787333282b8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -75,7 +75,6 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
75 | dump_stack(); | 75 | dump_stack(); |
76 | #endif | 76 | #endif |
77 | 77 | ||
78 | kmsg_dump(KMSG_DUMP_PANIC); | ||
79 | /* | 78 | /* |
80 | * If we have crashed and we have a crash kernel loaded let it handle | 79 | * If we have crashed and we have a crash kernel loaded let it handle |
81 | * everything else. | 80 | * everything else. |
@@ -83,6 +82,8 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
83 | */ | 82 | */ |
84 | crash_kexec(NULL); | 83 | crash_kexec(NULL); |
85 | 84 | ||
85 | kmsg_dump(KMSG_DUMP_PANIC); | ||
86 | |||
86 | /* | 87 | /* |
87 | * Note smp_send_stop is the usual smp shutdown function, which | 88 | * Note smp_send_stop is the usual smp shutdown function, which |
88 | * unfortunately means it may not be hardened to work in a panic | 89 | * unfortunately means it may not be hardened to work in a panic |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 603c0d8b5df1..2ae7409bf38f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -3259,8 +3259,6 @@ static void perf_event_task_output(struct perf_event *event, | |||
3259 | task_event->event_id.tid = perf_event_tid(event, task); | 3259 | task_event->event_id.tid = perf_event_tid(event, task); |
3260 | task_event->event_id.ptid = perf_event_tid(event, current); | 3260 | task_event->event_id.ptid = perf_event_tid(event, current); |
3261 | 3261 | ||
3262 | task_event->event_id.time = perf_clock(); | ||
3263 | |||
3264 | perf_output_put(&handle, task_event->event_id); | 3262 | perf_output_put(&handle, task_event->event_id); |
3265 | 3263 | ||
3266 | perf_output_end(&handle); | 3264 | perf_output_end(&handle); |
@@ -3268,6 +3266,9 @@ static void perf_event_task_output(struct perf_event *event, | |||
3268 | 3266 | ||
3269 | static int perf_event_task_match(struct perf_event *event) | 3267 | static int perf_event_task_match(struct perf_event *event) |
3270 | { | 3268 | { |
3269 | if (event->state < PERF_EVENT_STATE_INACTIVE) | ||
3270 | return 0; | ||
3271 | |||
3271 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3272 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
3272 | return 0; | 3273 | return 0; |
3273 | 3274 | ||
@@ -3297,7 +3298,7 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3297 | cpuctx = &get_cpu_var(perf_cpu_context); | 3298 | cpuctx = &get_cpu_var(perf_cpu_context); |
3298 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3299 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3299 | if (!ctx) | 3300 | if (!ctx) |
3300 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3301 | ctx = rcu_dereference(current->perf_event_ctxp); |
3301 | if (ctx) | 3302 | if (ctx) |
3302 | perf_event_task_ctx(ctx, task_event); | 3303 | perf_event_task_ctx(ctx, task_event); |
3303 | put_cpu_var(perf_cpu_context); | 3304 | put_cpu_var(perf_cpu_context); |
@@ -3328,6 +3329,7 @@ static void perf_event_task(struct task_struct *task, | |||
3328 | /* .ppid */ | 3329 | /* .ppid */ |
3329 | /* .tid */ | 3330 | /* .tid */ |
3330 | /* .ptid */ | 3331 | /* .ptid */ |
3332 | .time = perf_clock(), | ||
3331 | }, | 3333 | }, |
3332 | }; | 3334 | }; |
3333 | 3335 | ||
@@ -3377,6 +3379,9 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3377 | 3379 | ||
3378 | static int perf_event_comm_match(struct perf_event *event) | 3380 | static int perf_event_comm_match(struct perf_event *event) |
3379 | { | 3381 | { |
3382 | if (event->state < PERF_EVENT_STATE_INACTIVE) | ||
3383 | return 0; | ||
3384 | |||
3380 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3385 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
3381 | return 0; | 3386 | return 0; |
3382 | 3387 | ||
@@ -3494,6 +3499,9 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
3494 | static int perf_event_mmap_match(struct perf_event *event, | 3499 | static int perf_event_mmap_match(struct perf_event *event, |
3495 | struct perf_mmap_event *mmap_event) | 3500 | struct perf_mmap_event *mmap_event) |
3496 | { | 3501 | { |
3502 | if (event->state < PERF_EVENT_STATE_INACTIVE) | ||
3503 | return 0; | ||
3504 | |||
3497 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3505 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
3498 | return 0; | 3506 | return 0; |
3499 | 3507 | ||
@@ -4571,7 +4579,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
4571 | if (attr->type >= PERF_TYPE_MAX) | 4579 | if (attr->type >= PERF_TYPE_MAX) |
4572 | return -EINVAL; | 4580 | return -EINVAL; |
4573 | 4581 | ||
4574 | if (attr->__reserved_1 || attr->__reserved_2) | 4582 | if (attr->__reserved_1) |
4575 | return -EINVAL; | 4583 | return -EINVAL; |
4576 | 4584 | ||
4577 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) | 4585 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 91e09d3b2eb2..5c36ea9d55d2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -27,6 +27,15 @@ config PM_DEBUG | |||
27 | code. This is helpful when debugging and reporting PM bugs, like | 27 | code. This is helpful when debugging and reporting PM bugs, like |
28 | suspend support. | 28 | suspend support. |
29 | 29 | ||
30 | config PM_ADVANCED_DEBUG | ||
31 | bool "Extra PM attributes in sysfs for low-level debugging/testing" | ||
32 | depends on PM_DEBUG | ||
33 | default n | ||
34 | ---help--- | ||
35 | Add extra sysfs attributes allowing one to access some Power Management | ||
36 | fields of device objects from user space. If you are not a kernel | ||
37 | developer interested in debugging/testing Power Management, say "no". | ||
38 | |||
30 | config PM_VERBOSE | 39 | config PM_VERBOSE |
31 | bool "Verbose Power Management debugging" | 40 | bool "Verbose Power Management debugging" |
32 | depends on PM_DEBUG | 41 | depends on PM_DEBUG |
@@ -85,6 +94,11 @@ config PM_SLEEP | |||
85 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE | 94 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE |
86 | default y | 95 | default y |
87 | 96 | ||
97 | config PM_SLEEP_ADVANCED_DEBUG | ||
98 | bool | ||
99 | depends on PM_ADVANCED_DEBUG | ||
100 | default n | ||
101 | |||
88 | config SUSPEND | 102 | config SUSPEND |
89 | bool "Suspend to RAM and standby" | 103 | bool "Suspend to RAM and standby" |
90 | depends on PM && ARCH_SUSPEND_POSSIBLE | 104 | depends on PM && ARCH_SUSPEND_POSSIBLE |
@@ -222,3 +236,8 @@ config PM_RUNTIME | |||
222 | and the bus type drivers of the buses the devices are on are | 236 | and the bus type drivers of the buses the devices are on are |
223 | responsible for the actual handling of the autosuspend requests and | 237 | responsible for the actual handling of the autosuspend requests and |
224 | wake-up events. | 238 | wake-up events. |
239 | |||
240 | config PM_OPS | ||
241 | bool | ||
242 | depends on PM_SLEEP || PM_RUNTIME | ||
243 | default y | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 0998c7139053..b58800b21fc0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -44,6 +44,32 @@ int pm_notifier_call_chain(unsigned long val) | |||
44 | == NOTIFY_BAD) ? -EINVAL : 0; | 44 | == NOTIFY_BAD) ? -EINVAL : 0; |
45 | } | 45 | } |
46 | 46 | ||
47 | /* If set, devices may be suspended and resumed asynchronously. */ | ||
48 | int pm_async_enabled = 1; | ||
49 | |||
50 | static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, | ||
51 | char *buf) | ||
52 | { | ||
53 | return sprintf(buf, "%d\n", pm_async_enabled); | ||
54 | } | ||
55 | |||
56 | static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, | ||
57 | const char *buf, size_t n) | ||
58 | { | ||
59 | unsigned long val; | ||
60 | |||
61 | if (strict_strtoul(buf, 10, &val)) | ||
62 | return -EINVAL; | ||
63 | |||
64 | if (val > 1) | ||
65 | return -EINVAL; | ||
66 | |||
67 | pm_async_enabled = val; | ||
68 | return n; | ||
69 | } | ||
70 | |||
71 | power_attr(pm_async); | ||
72 | |||
47 | #ifdef CONFIG_PM_DEBUG | 73 | #ifdef CONFIG_PM_DEBUG |
48 | int pm_test_level = TEST_NONE; | 74 | int pm_test_level = TEST_NONE; |
49 | 75 | ||
@@ -208,9 +234,12 @@ static struct attribute * g[] = { | |||
208 | #ifdef CONFIG_PM_TRACE | 234 | #ifdef CONFIG_PM_TRACE |
209 | &pm_trace_attr.attr, | 235 | &pm_trace_attr.attr, |
210 | #endif | 236 | #endif |
211 | #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG) | 237 | #ifdef CONFIG_PM_SLEEP |
238 | &pm_async_attr.attr, | ||
239 | #ifdef CONFIG_PM_DEBUG | ||
212 | &pm_test_attr.attr, | 240 | &pm_test_attr.attr, |
213 | #endif | 241 | #endif |
242 | #endif | ||
214 | NULL, | 243 | NULL, |
215 | }; | 244 | }; |
216 | 245 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 36cb168e4330..830cadecbdfc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1181,7 +1181,7 @@ static void free_unnecessary_pages(void) | |||
1181 | 1181 | ||
1182 | memory_bm_position_reset(©_bm); | 1182 | memory_bm_position_reset(©_bm); |
1183 | 1183 | ||
1184 | while (to_free_normal > 0 && to_free_highmem > 0) { | 1184 | while (to_free_normal > 0 || to_free_highmem > 0) { |
1185 | unsigned long pfn = memory_bm_next_pfn(©_bm); | 1185 | unsigned long pfn = memory_bm_next_pfn(©_bm); |
1186 | struct page *page = pfn_to_page(pfn); | 1186 | struct page *page = pfn_to_page(pfn); |
1187 | 1187 | ||
@@ -1500,7 +1500,7 @@ asmlinkage int swsusp_save(void) | |||
1500 | { | 1500 | { |
1501 | unsigned int nr_pages, nr_highmem; | 1501 | unsigned int nr_pages, nr_highmem; |
1502 | 1502 | ||
1503 | printk(KERN_INFO "PM: Creating hibernation image: \n"); | 1503 | printk(KERN_INFO "PM: Creating hibernation image:\n"); |
1504 | 1504 | ||
1505 | drain_local_pages(NULL); | 1505 | drain_local_pages(NULL); |
1506 | nr_pages = count_data_pages(); | 1506 | nr_pages = count_data_pages(); |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 09b2b0ae9e9d..1d575733d4e1 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -657,10 +657,6 @@ int swsusp_read(unsigned int *flags_p) | |||
657 | struct swsusp_info *header; | 657 | struct swsusp_info *header; |
658 | 658 | ||
659 | *flags_p = swsusp_header->flags; | 659 | *flags_p = swsusp_header->flags; |
660 | if (IS_ERR(resume_bdev)) { | ||
661 | pr_debug("PM: Image device not initialised\n"); | ||
662 | return PTR_ERR(resume_bdev); | ||
663 | } | ||
664 | 660 | ||
665 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 661 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
666 | error = snapshot_write_next(&snapshot, PAGE_SIZE); | 662 | error = snapshot_write_next(&snapshot, PAGE_SIZE); |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c deleted file mode 100644 index 5b3601bd1893..000000000000 --- a/kernel/power/swsusp.c +++ /dev/null | |||
@@ -1,58 +0,0 @@ | |||
1 | /* | ||
2 | * linux/kernel/power/swsusp.c | ||
3 | * | ||
4 | * This file provides code to write suspend image to swap and read it back. | ||
5 | * | ||
6 | * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | ||
7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> | ||
8 | * | ||
9 | * This file is released under the GPLv2. | ||
10 | * | ||
11 | * I'd like to thank the following people for their work: | ||
12 | * | ||
13 | * Pavel Machek <pavel@ucw.cz>: | ||
14 | * Modifications, defectiveness pointing, being with me at the very beginning, | ||
15 | * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. | ||
16 | * | ||
17 | * Steve Doddi <dirk@loth.demon.co.uk>: | ||
18 | * Support the possibility of hardware state restoring. | ||
19 | * | ||
20 | * Raph <grey.havens@earthling.net>: | ||
21 | * Support for preserving states of network devices and virtual console | ||
22 | * (including X and svgatextmode) | ||
23 | * | ||
24 | * Kurt Garloff <garloff@suse.de>: | ||
25 | * Straightened the critical function in order to prevent compilers from | ||
26 | * playing tricks with local variables. | ||
27 | * | ||
28 | * Andreas Mohr <a.mohr@mailto.de> | ||
29 | * | ||
30 | * Alex Badea <vampire@go.ro>: | ||
31 | * Fixed runaway init | ||
32 | * | ||
33 | * Rafael J. Wysocki <rjw@sisk.pl> | ||
34 | * Reworked the freeing of memory and the handling of swap | ||
35 | * | ||
36 | * More state savers are welcome. Especially for the scsi layer... | ||
37 | * | ||
38 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | ||
39 | */ | ||
40 | |||
41 | #include <linux/mm.h> | ||
42 | #include <linux/suspend.h> | ||
43 | #include <linux/spinlock.h> | ||
44 | #include <linux/kernel.h> | ||
45 | #include <linux/major.h> | ||
46 | #include <linux/swap.h> | ||
47 | #include <linux/pm.h> | ||
48 | #include <linux/swapops.h> | ||
49 | #include <linux/bootmem.h> | ||
50 | #include <linux/syscalls.h> | ||
51 | #include <linux/highmem.h> | ||
52 | #include <linux/time.h> | ||
53 | #include <linux/rbtree.h> | ||
54 | #include <linux/io.h> | ||
55 | |||
56 | #include "power.h" | ||
57 | |||
58 | int in_suspend __nosavedata = 0; | ||
diff --git a/kernel/power/user.c b/kernel/power/user.c index bf0014d6a5f0..4d2289626a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -195,6 +195,15 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
195 | return res; | 195 | return res; |
196 | } | 196 | } |
197 | 197 | ||
198 | static void snapshot_deprecated_ioctl(unsigned int cmd) | ||
199 | { | ||
200 | if (printk_ratelimit()) | ||
201 | printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " | ||
202 | "be removed soon, update your suspend-to-disk " | ||
203 | "utilities\n", | ||
204 | __builtin_return_address(0), cmd); | ||
205 | } | ||
206 | |||
198 | static long snapshot_ioctl(struct file *filp, unsigned int cmd, | 207 | static long snapshot_ioctl(struct file *filp, unsigned int cmd, |
199 | unsigned long arg) | 208 | unsigned long arg) |
200 | { | 209 | { |
@@ -246,8 +255,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
246 | data->frozen = 0; | 255 | data->frozen = 0; |
247 | break; | 256 | break; |
248 | 257 | ||
249 | case SNAPSHOT_CREATE_IMAGE: | ||
250 | case SNAPSHOT_ATOMIC_SNAPSHOT: | 258 | case SNAPSHOT_ATOMIC_SNAPSHOT: |
259 | snapshot_deprecated_ioctl(cmd); | ||
260 | case SNAPSHOT_CREATE_IMAGE: | ||
251 | if (data->mode != O_RDONLY || !data->frozen || data->ready) { | 261 | if (data->mode != O_RDONLY || !data->frozen || data->ready) { |
252 | error = -EPERM; | 262 | error = -EPERM; |
253 | break; | 263 | break; |
@@ -275,8 +285,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
275 | data->ready = 0; | 285 | data->ready = 0; |
276 | break; | 286 | break; |
277 | 287 | ||
278 | case SNAPSHOT_PREF_IMAGE_SIZE: | ||
279 | case SNAPSHOT_SET_IMAGE_SIZE: | 288 | case SNAPSHOT_SET_IMAGE_SIZE: |
289 | snapshot_deprecated_ioctl(cmd); | ||
290 | case SNAPSHOT_PREF_IMAGE_SIZE: | ||
280 | image_size = arg; | 291 | image_size = arg; |
281 | break; | 292 | break; |
282 | 293 | ||
@@ -290,15 +301,17 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
290 | error = put_user(size, (loff_t __user *)arg); | 301 | error = put_user(size, (loff_t __user *)arg); |
291 | break; | 302 | break; |
292 | 303 | ||
293 | case SNAPSHOT_AVAIL_SWAP_SIZE: | ||
294 | case SNAPSHOT_AVAIL_SWAP: | 304 | case SNAPSHOT_AVAIL_SWAP: |
305 | snapshot_deprecated_ioctl(cmd); | ||
306 | case SNAPSHOT_AVAIL_SWAP_SIZE: | ||
295 | size = count_swap_pages(data->swap, 1); | 307 | size = count_swap_pages(data->swap, 1); |
296 | size <<= PAGE_SHIFT; | 308 | size <<= PAGE_SHIFT; |
297 | error = put_user(size, (loff_t __user *)arg); | 309 | error = put_user(size, (loff_t __user *)arg); |
298 | break; | 310 | break; |
299 | 311 | ||
300 | case SNAPSHOT_ALLOC_SWAP_PAGE: | ||
301 | case SNAPSHOT_GET_SWAP_PAGE: | 312 | case SNAPSHOT_GET_SWAP_PAGE: |
313 | snapshot_deprecated_ioctl(cmd); | ||
314 | case SNAPSHOT_ALLOC_SWAP_PAGE: | ||
302 | if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { | 315 | if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { |
303 | error = -ENODEV; | 316 | error = -ENODEV; |
304 | break; | 317 | break; |
@@ -321,6 +334,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
321 | break; | 334 | break; |
322 | 335 | ||
323 | case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ | 336 | case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ |
337 | snapshot_deprecated_ioctl(cmd); | ||
324 | if (!swsusp_swap_in_use()) { | 338 | if (!swsusp_swap_in_use()) { |
325 | /* | 339 | /* |
326 | * User space encodes device types as two-byte values, | 340 | * User space encodes device types as two-byte values, |
@@ -362,6 +376,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
362 | break; | 376 | break; |
363 | 377 | ||
364 | case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ | 378 | case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ |
379 | snapshot_deprecated_ioctl(cmd); | ||
365 | error = -EINVAL; | 380 | error = -EINVAL; |
366 | 381 | ||
367 | switch (arg) { | 382 | switch (arg) { |
diff --git a/kernel/printk.c b/kernel/printk.c index 17463ca2e229..1751c456b71f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1467,6 +1467,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); | |||
1467 | static const char const *kmsg_reasons[] = { | 1467 | static const char const *kmsg_reasons[] = { |
1468 | [KMSG_DUMP_OOPS] = "oops", | 1468 | [KMSG_DUMP_OOPS] = "oops", |
1469 | [KMSG_DUMP_PANIC] = "panic", | 1469 | [KMSG_DUMP_PANIC] = "panic", |
1470 | [KMSG_DUMP_KEXEC] = "kexec", | ||
1470 | }; | 1471 | }; |
1471 | 1472 | ||
1472 | static const char *kmsg_to_str(enum kmsg_dump_reason reason) | 1473 | static const char *kmsg_to_str(enum kmsg_dump_reason reason) |
diff --git a/kernel/resource.c b/kernel/resource.c index af96c1e4b54b..24e9e60c1459 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -188,6 +188,36 @@ static int __release_resource(struct resource *old) | |||
188 | return -EINVAL; | 188 | return -EINVAL; |
189 | } | 189 | } |
190 | 190 | ||
191 | static void __release_child_resources(struct resource *r) | ||
192 | { | ||
193 | struct resource *tmp, *p; | ||
194 | resource_size_t size; | ||
195 | |||
196 | p = r->child; | ||
197 | r->child = NULL; | ||
198 | while (p) { | ||
199 | tmp = p; | ||
200 | p = p->sibling; | ||
201 | |||
202 | tmp->parent = NULL; | ||
203 | tmp->sibling = NULL; | ||
204 | __release_child_resources(tmp); | ||
205 | |||
206 | printk(KERN_DEBUG "release child resource %pR\n", tmp); | ||
207 | /* need to restore size, and keep flags */ | ||
208 | size = resource_size(tmp); | ||
209 | tmp->start = 0; | ||
210 | tmp->end = size - 1; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | void release_child_resources(struct resource *r) | ||
215 | { | ||
216 | write_lock(&resource_lock); | ||
217 | __release_child_resources(r); | ||
218 | write_unlock(&resource_lock); | ||
219 | } | ||
220 | |||
191 | /** | 221 | /** |
192 | * request_resource - request and reserve an I/O or memory resource | 222 | * request_resource - request and reserve an I/O or memory resource |
193 | * @root: root resource descriptor | 223 | * @root: root resource descriptor |
@@ -303,8 +333,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, | |||
303 | static int find_resource(struct resource *root, struct resource *new, | 333 | static int find_resource(struct resource *root, struct resource *new, |
304 | resource_size_t size, resource_size_t min, | 334 | resource_size_t size, resource_size_t min, |
305 | resource_size_t max, resource_size_t align, | 335 | resource_size_t max, resource_size_t align, |
306 | void (*alignf)(void *, struct resource *, | 336 | resource_size_t (*alignf)(void *, |
307 | resource_size_t, resource_size_t), | 337 | const struct resource *, |
338 | resource_size_t, | ||
339 | resource_size_t), | ||
308 | void *alignf_data) | 340 | void *alignf_data) |
309 | { | 341 | { |
310 | struct resource *this = root->child; | 342 | struct resource *this = root->child; |
@@ -330,7 +362,7 @@ static int find_resource(struct resource *root, struct resource *new, | |||
330 | tmp.end = max; | 362 | tmp.end = max; |
331 | tmp.start = ALIGN(tmp.start, align); | 363 | tmp.start = ALIGN(tmp.start, align); |
332 | if (alignf) | 364 | if (alignf) |
333 | alignf(alignf_data, &tmp, size, align); | 365 | tmp.start = alignf(alignf_data, &tmp, size, align); |
334 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { | 366 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { |
335 | new->start = tmp.start; | 367 | new->start = tmp.start; |
336 | new->end = tmp.start + size - 1; | 368 | new->end = tmp.start + size - 1; |
@@ -358,8 +390,10 @@ static int find_resource(struct resource *root, struct resource *new, | |||
358 | int allocate_resource(struct resource *root, struct resource *new, | 390 | int allocate_resource(struct resource *root, struct resource *new, |
359 | resource_size_t size, resource_size_t min, | 391 | resource_size_t size, resource_size_t min, |
360 | resource_size_t max, resource_size_t align, | 392 | resource_size_t max, resource_size_t align, |
361 | void (*alignf)(void *, struct resource *, | 393 | resource_size_t (*alignf)(void *, |
362 | resource_size_t, resource_size_t), | 394 | const struct resource *, |
395 | resource_size_t, | ||
396 | resource_size_t), | ||
363 | void *alignf_data) | 397 | void *alignf_data) |
364 | { | 398 | { |
365 | int err; | 399 | int err; |
diff --git a/kernel/sched.c b/kernel/sched.c index c535cc4f6428..3a8fb30a91b1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2320,14 +2320,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2320 | } | 2320 | } |
2321 | 2321 | ||
2322 | /* | 2322 | /* |
2323 | * Called from: | 2323 | * Gets called from 3 sites (exec, fork, wakeup), since it is called without |
2324 | * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done | ||
2325 | * by: | ||
2324 | * | 2326 | * |
2325 | * - fork, @p is stable because it isn't on the tasklist yet | 2327 | * exec: is unstable, retry loop |
2326 | * | 2328 | * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING |
2327 | * - exec, @p is unstable, retry loop | ||
2328 | * | ||
2329 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2330 | * we should be good. | ||
2331 | */ | 2329 | */ |
2332 | static inline | 2330 | static inline |
2333 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | 2331 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
@@ -2620,9 +2618,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2620 | if (p->sched_class->task_fork) | 2618 | if (p->sched_class->task_fork) |
2621 | p->sched_class->task_fork(p); | 2619 | p->sched_class->task_fork(p); |
2622 | 2620 | ||
2623 | #ifdef CONFIG_SMP | ||
2624 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2625 | #endif | ||
2626 | set_task_cpu(p, cpu); | 2621 | set_task_cpu(p, cpu); |
2627 | 2622 | ||
2628 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2623 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
@@ -2652,6 +2647,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2652 | { | 2647 | { |
2653 | unsigned long flags; | 2648 | unsigned long flags; |
2654 | struct rq *rq; | 2649 | struct rq *rq; |
2650 | int cpu = get_cpu(); | ||
2651 | |||
2652 | #ifdef CONFIG_SMP | ||
2653 | /* | ||
2654 | * Fork balancing, do it here and not earlier because: | ||
2655 | * - cpus_allowed can change in the fork path | ||
2656 | * - any previously selected cpu might disappear through hotplug | ||
2657 | * | ||
2658 | * We still have TASK_WAKING but PF_STARTING is gone now, meaning | ||
2659 | * ->cpus_allowed is stable, we have preemption disabled, meaning | ||
2660 | * cpu_online_mask is stable. | ||
2661 | */ | ||
2662 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2663 | set_task_cpu(p, cpu); | ||
2664 | #endif | ||
2655 | 2665 | ||
2656 | rq = task_rq_lock(p, &flags); | 2666 | rq = task_rq_lock(p, &flags); |
2657 | BUG_ON(p->state != TASK_WAKING); | 2667 | BUG_ON(p->state != TASK_WAKING); |
@@ -2665,6 +2675,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2665 | p->sched_class->task_woken(rq, p); | 2675 | p->sched_class->task_woken(rq, p); |
2666 | #endif | 2676 | #endif |
2667 | task_rq_unlock(rq, &flags); | 2677 | task_rq_unlock(rq, &flags); |
2678 | put_cpu(); | ||
2668 | } | 2679 | } |
2669 | 2680 | ||
2670 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2681 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -5530,8 +5541,11 @@ need_resched_nonpreemptible: | |||
5530 | 5541 | ||
5531 | post_schedule(rq); | 5542 | post_schedule(rq); |
5532 | 5543 | ||
5533 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 5544 | if (unlikely(reacquire_kernel_lock(current) < 0)) { |
5545 | prev = rq->curr; | ||
5546 | switch_count = &prev->nivcsw; | ||
5534 | goto need_resched_nonpreemptible; | 5547 | goto need_resched_nonpreemptible; |
5548 | } | ||
5535 | 5549 | ||
5536 | preempt_enable_no_resched(); | 5550 | preempt_enable_no_resched(); |
5537 | if (need_resched()) | 5551 | if (need_resched()) |
@@ -7136,14 +7150,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7136 | * the ->cpus_allowed mask from under waking tasks, which would be | 7150 | * the ->cpus_allowed mask from under waking tasks, which would be |
7137 | * possible when we change rq->lock in ttwu(), so synchronize against | 7151 | * possible when we change rq->lock in ttwu(), so synchronize against |
7138 | * TASK_WAKING to avoid that. | 7152 | * TASK_WAKING to avoid that. |
7153 | * | ||
7154 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
7155 | * might move the task about, we have to validate the target in | ||
7156 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
7139 | */ | 7157 | */ |
7140 | again: | 7158 | again: |
7141 | while (p->state == TASK_WAKING) | 7159 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) |
7142 | cpu_relax(); | 7160 | cpu_relax(); |
7143 | 7161 | ||
7144 | rq = task_rq_lock(p, &flags); | 7162 | rq = task_rq_lock(p, &flags); |
7145 | 7163 | ||
7146 | if (p->state == TASK_WAKING) { | 7164 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { |
7147 | task_rq_unlock(rq, &flags); | 7165 | task_rq_unlock(rq, &flags); |
7148 | goto again; | 7166 | goto again; |
7149 | } | 7167 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 42ac3c9f66f6..8fe7ee81c552 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1508 | * If there's an idle sibling in this domain, make that | 1508 | * If there's an idle sibling in this domain, make that |
1509 | * the wake_affine target instead of the current cpu. | 1509 | * the wake_affine target instead of the current cpu. |
1510 | */ | 1510 | */ |
1511 | if (tmp->flags & SD_PREFER_SIBLING) | 1511 | if (tmp->flags & SD_SHARE_PKG_RESOURCES) |
1512 | target = select_idle_sibling(p, tmp, target); | 1512 | target = select_idle_sibling(p, tmp, target); |
1513 | 1513 | ||
1514 | if (target >= 0) { | 1514 | if (target >= 0) { |
diff --git a/kernel/smp.c b/kernel/smp.c index f10408422444..9867b6bfefce 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -12,8 +12,6 @@ | |||
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/cpu.h> | 13 | #include <linux/cpu.h> |
14 | 14 | ||
15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | ||
16 | |||
17 | static struct { | 15 | static struct { |
18 | struct list_head queue; | 16 | struct list_head queue; |
19 | raw_spinlock_t lock; | 17 | raw_spinlock_t lock; |
@@ -33,12 +31,14 @@ struct call_function_data { | |||
33 | cpumask_var_t cpumask; | 31 | cpumask_var_t cpumask; |
34 | }; | 32 | }; |
35 | 33 | ||
34 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); | ||
35 | |||
36 | struct call_single_queue { | 36 | struct call_single_queue { |
37 | struct list_head list; | 37 | struct list_head list; |
38 | raw_spinlock_t lock; | 38 | raw_spinlock_t lock; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | static DEFINE_PER_CPU(struct call_function_data, cfd_data); | 41 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue); |
42 | 42 | ||
43 | static int | 43 | static int |
44 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | 44 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) |
@@ -256,7 +256,7 @@ void generic_smp_call_function_single_interrupt(void) | |||
256 | } | 256 | } |
257 | } | 257 | } |
258 | 258 | ||
259 | static DEFINE_PER_CPU(struct call_single_data, csd_data); | 259 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); |
260 | 260 | ||
261 | /* | 261 | /* |
262 | * smp_call_function_single - Run a function on a specific CPU | 262 | * smp_call_function_single - Run a function on a specific CPU |
diff --git a/kernel/softirq.c b/kernel/softirq.c index a09502e2ef75..7c1a67ef0274 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill); | |||
500 | */ | 500 | */ |
501 | 501 | ||
502 | /* | 502 | /* |
503 | * The trampoline is called when the hrtimer expires. If this is | 503 | * The trampoline is called when the hrtimer expires. It schedules a tasklet |
504 | * called from the hrtimer interrupt then we schedule the tasklet as | 504 | * to run __tasklet_hrtimer_trampoline() which in turn will call the intended |
505 | * the timer callback function expects to run in softirq context. If | 505 | * hrtimer callback, but from softirq context. |
506 | * it's called in softirq context anyway (i.e. high resolution timers | ||
507 | * disabled) then the hrtimer callback is called right away. | ||
508 | */ | 506 | */ |
509 | static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) | 507 | static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) |
510 | { | 508 | { |
511 | struct tasklet_hrtimer *ttimer = | 509 | struct tasklet_hrtimer *ttimer = |
512 | container_of(timer, struct tasklet_hrtimer, timer); | 510 | container_of(timer, struct tasklet_hrtimer, timer); |
513 | 511 | ||
514 | if (hrtimer_is_hres_active(timer)) { | 512 | tasklet_hi_schedule(&ttimer->tasklet); |
515 | tasklet_hi_schedule(&ttimer->tasklet); | 513 | return HRTIMER_NORESTART; |
516 | return HRTIMER_NORESTART; | ||
517 | } | ||
518 | return ttimer->function(timer); | ||
519 | } | 514 | } |
520 | 515 | ||
521 | /* | 516 | /* |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d22579087e27..0d4c7898ab80 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock); | |||
25 | static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ | 25 | static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ |
26 | static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ | 26 | static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ |
27 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); | 27 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); |
28 | static DEFINE_PER_CPU(bool, softlock_touch_sync); | ||
28 | 29 | ||
29 | static int __read_mostly did_panic; | 30 | static int __read_mostly did_panic; |
30 | int __read_mostly softlockup_thresh = 60; | 31 | int __read_mostly softlockup_thresh = 60; |
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void) | |||
79 | } | 80 | } |
80 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 81 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
81 | 82 | ||
83 | void touch_softlockup_watchdog_sync(void) | ||
84 | { | ||
85 | __raw_get_cpu_var(softlock_touch_sync) = true; | ||
86 | __raw_get_cpu_var(softlockup_touch_ts) = 0; | ||
87 | } | ||
88 | |||
82 | void touch_all_softlockup_watchdogs(void) | 89 | void touch_all_softlockup_watchdogs(void) |
83 | { | 90 | { |
84 | int cpu; | 91 | int cpu; |
@@ -118,6 +125,14 @@ void softlockup_tick(void) | |||
118 | } | 125 | } |
119 | 126 | ||
120 | if (touch_ts == 0) { | 127 | if (touch_ts == 0) { |
128 | if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { | ||
129 | /* | ||
130 | * If the time stamp was touched atomically | ||
131 | * make sure the scheduler tick is up to date. | ||
132 | */ | ||
133 | per_cpu(softlock_touch_sync, this_cpu) = false; | ||
134 | sched_clock_tick(); | ||
135 | } | ||
121 | __touch_softlockup_watchdog(); | 136 | __touch_softlockup_watchdog(); |
122 | return; | 137 | return; |
123 | } | 138 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index 26a6b73a6b85..18bde979f346 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
222 | if (which > PRIO_USER || which < PRIO_PROCESS) | 222 | if (which > PRIO_USER || which < PRIO_PROCESS) |
223 | return -EINVAL; | 223 | return -EINVAL; |
224 | 224 | ||
225 | rcu_read_lock(); | ||
225 | read_lock(&tasklist_lock); | 226 | read_lock(&tasklist_lock); |
226 | switch (which) { | 227 | switch (which) { |
227 | case PRIO_PROCESS: | 228 | case PRIO_PROCESS: |
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
267 | } | 268 | } |
268 | out_unlock: | 269 | out_unlock: |
269 | read_unlock(&tasklist_lock); | 270 | read_unlock(&tasklist_lock); |
271 | rcu_read_unlock(); | ||
270 | 272 | ||
271 | return retval; | 273 | return retval; |
272 | } | 274 | } |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 6f740d9f0948..d7395fdfb9f3 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -259,7 +259,8 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
259 | cpu = *((int *)arg); | 259 | cpu = *((int *)arg); |
260 | list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { | 260 | list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { |
261 | if (cpumask_test_cpu(cpu, dev->cpumask) && | 261 | if (cpumask_test_cpu(cpu, dev->cpumask) && |
262 | cpumask_weight(dev->cpumask) == 1) { | 262 | cpumask_weight(dev->cpumask) == 1 && |
263 | !tick_is_broadcast_device(dev)) { | ||
263 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | 264 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); |
264 | list_del(&dev->list); | 265 | list_del(&dev->list); |
265 | } | 266 | } |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e85c23404d34..13700833c181 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void) | |||
343 | { | 343 | { |
344 | unsigned long flags; | 344 | unsigned long flags; |
345 | 345 | ||
346 | spin_lock_irqsave(&watchdog_lock, flags); | 346 | /* |
347 | * We use trylock here to avoid a potential dead lock when | ||
348 | * kgdb calls this code after the kernel has been stopped with | ||
349 | * watchdog_lock held. When watchdog_lock is held we just | ||
350 | * return and accept, that the watchdog might trigger and mark | ||
351 | * the monitored clock source (usually TSC) unstable. | ||
352 | * | ||
353 | * This does not affect the other caller clocksource_resume() | ||
354 | * because at this point the kernel is UP, interrupts are | ||
355 | * disabled and nothing can hold watchdog_lock. | ||
356 | */ | ||
357 | if (!spin_trylock_irqsave(&watchdog_lock, flags)) | ||
358 | return; | ||
347 | clocksource_reset_watchdog(); | 359 | clocksource_reset_watchdog(); |
348 | spin_unlock_irqrestore(&watchdog_lock, flags); | 360 | spin_unlock_irqrestore(&watchdog_lock, flags); |
349 | } | 361 | } |
@@ -458,8 +470,8 @@ void clocksource_resume(void) | |||
458 | * clocksource_touch_watchdog - Update watchdog | 470 | * clocksource_touch_watchdog - Update watchdog |
459 | * | 471 | * |
460 | * Update the watchdog after exception contexts such as kgdb so as not | 472 | * Update the watchdog after exception contexts such as kgdb so as not |
461 | * to incorrectly trip the watchdog. | 473 | * to incorrectly trip the watchdog. This might fail when the kernel |
462 | * | 474 | * was stopped in code which holds watchdog_lock. |
463 | */ | 475 | */ |
464 | void clocksource_touch_watchdog(void) | 476 | void clocksource_touch_watchdog(void) |
465 | { | 477 | { |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7faaa32fbf4f..e2ab064c6d41 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts) | |||
880 | 880 | ||
881 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); | 881 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); |
882 | } | 882 | } |
883 | EXPORT_SYMBOL_GPL(getboottime); | ||
883 | 884 | ||
884 | /** | 885 | /** |
885 | * monotonic_to_bootbased - Convert the monotonic time to boot based. | 886 | * monotonic_to_bootbased - Convert the monotonic time to boot based. |
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts) | |||
889 | { | 890 | { |
890 | *ts = timespec_add_safe(*ts, total_sleep_time); | 891 | *ts = timespec_add_safe(*ts, total_sleep_time); |
891 | } | 892 | } |
893 | EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | ||
892 | 894 | ||
893 | unsigned long get_seconds(void) | 895 | unsigned long get_seconds(void) |
894 | { | 896 | { |
diff --git a/kernel/timer.c b/kernel/timer.c index 15533b792397..c61a7949387f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1198,6 +1198,7 @@ void update_process_times(int user_tick) | |||
1198 | run_local_timers(); | 1198 | run_local_timers(); |
1199 | rcu_check_callbacks(cpu, user_tick); | 1199 | rcu_check_callbacks(cpu, user_tick); |
1200 | printk_tick(); | 1200 | printk_tick(); |
1201 | perf_event_do_pending(); | ||
1201 | scheduler_tick(); | 1202 | scheduler_tick(); |
1202 | run_posix_cpu_timers(p); | 1203 | run_posix_cpu_timers(p); |
1203 | } | 1204 | } |
@@ -1209,8 +1210,6 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1209 | { | 1210 | { |
1210 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1211 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1211 | 1212 | ||
1212 | perf_event_do_pending(); | ||
1213 | |||
1214 | hrtimer_run_pending(); | 1213 | hrtimer_run_pending(); |
1215 | 1214 | ||
1216 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1215 | if (time_after_eq(jiffies, base->timer_jiffies)) |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..60e2ce0181ee 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER | |||
27 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
28 | bool | 28 | bool |
29 | help | 29 | help |
30 | An arch may pass in a unique value (frame pointer) to both the | 30 | See Documentation/trace/ftrace-design.txt |
31 | entering and exiting of a function. On exit, the value is compared | ||
32 | and if it does not match, then it will panic the kernel. | ||
33 | 31 | ||
34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 32 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
35 | bool | 33 | bool |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..8c1b2d290718 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -464,6 +464,8 @@ struct ring_buffer_iter { | |||
464 | struct ring_buffer_per_cpu *cpu_buffer; | 464 | struct ring_buffer_per_cpu *cpu_buffer; |
465 | unsigned long head; | 465 | unsigned long head; |
466 | struct buffer_page *head_page; | 466 | struct buffer_page *head_page; |
467 | struct buffer_page *cache_reader_page; | ||
468 | unsigned long cache_read; | ||
467 | u64 read_stamp; | 469 | u64 read_stamp; |
468 | }; | 470 | }; |
469 | 471 | ||
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2716 | iter->read_stamp = cpu_buffer->read_stamp; | 2718 | iter->read_stamp = cpu_buffer->read_stamp; |
2717 | else | 2719 | else |
2718 | iter->read_stamp = iter->head_page->page->time_stamp; | 2720 | iter->read_stamp = iter->head_page->page->time_stamp; |
2721 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
2722 | iter->cache_read = cpu_buffer->read; | ||
2719 | } | 2723 | } |
2720 | 2724 | ||
2721 | /** | 2725 | /** |
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3060 | struct ring_buffer_event *event; | 3064 | struct ring_buffer_event *event; |
3061 | int nr_loops = 0; | 3065 | int nr_loops = 0; |
3062 | 3066 | ||
3063 | if (ring_buffer_iter_empty(iter)) | ||
3064 | return NULL; | ||
3065 | |||
3066 | cpu_buffer = iter->cpu_buffer; | 3067 | cpu_buffer = iter->cpu_buffer; |
3067 | buffer = cpu_buffer->buffer; | 3068 | buffer = cpu_buffer->buffer; |
3068 | 3069 | ||
3070 | /* | ||
3071 | * Check if someone performed a consuming read to | ||
3072 | * the buffer. A consuming read invalidates the iterator | ||
3073 | * and we need to reset the iterator in this case. | ||
3074 | */ | ||
3075 | if (unlikely(iter->cache_read != cpu_buffer->read || | ||
3076 | iter->cache_reader_page != cpu_buffer->reader_page)) | ||
3077 | rb_iter_reset(iter); | ||
3078 | |||
3069 | again: | 3079 | again: |
3080 | if (ring_buffer_iter_empty(iter)) | ||
3081 | return NULL; | ||
3082 | |||
3070 | /* | 3083 | /* |
3071 | * We repeat when a timestamp is encountered. | 3084 | * We repeat when a timestamp is encountered. |
3072 | * We can get multiple timestamps by nested interrupts or also | 3085 | * We can get multiple timestamps by nested interrupts or also |
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3081 | if (rb_per_cpu_empty(cpu_buffer)) | 3094 | if (rb_per_cpu_empty(cpu_buffer)) |
3082 | return NULL; | 3095 | return NULL; |
3083 | 3096 | ||
3097 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | ||
3098 | rb_inc_iter(iter); | ||
3099 | goto again; | ||
3100 | } | ||
3101 | |||
3084 | event = rb_iter_head_event(iter); | 3102 | event = rb_iter_head_event(iter); |
3085 | 3103 | ||
3086 | switch (event->type_len) { | 3104 | switch (event->type_len) { |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..eac6875cb990 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[]) | |||
951 | return; | 951 | return; |
952 | } | 952 | } |
953 | 953 | ||
954 | if (WARN_ON_ONCE(pid < 0)) { | ||
955 | strcpy(comm, "<XXX>"); | ||
956 | return; | ||
957 | } | ||
958 | |||
954 | if (pid > PID_MAX_DEFAULT) { | 959 | if (pid > PID_MAX_DEFAULT) { |
955 | strcpy(comm, "<...>"); | 960 | strcpy(comm, "<...>"); |
956 | return; | 961 | return; |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..50b1b8239806 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv) | |||
689 | return -EINVAL; | 689 | return -EINVAL; |
690 | } | 690 | } |
691 | /* an address specified */ | 691 | /* an address specified */ |
692 | ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); | 692 | ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); |
693 | if (ret) { | 693 | if (ret) { |
694 | pr_info("Failed to parse address.\n"); | 694 | pr_info("Failed to parse address.\n"); |
695 | return ret; | 695 | return ret; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee30..f4bc9b27de5f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
157 | unsigned long val, flags; | 157 | unsigned long val, flags; |
158 | char buf[64]; | 158 | char buf[64]; |
159 | int ret; | 159 | int ret; |
160 | int cpu; | ||
160 | 161 | ||
161 | if (count >= sizeof(buf)) | 162 | if (count >= sizeof(buf)) |
162 | return -EINVAL; | 163 | return -EINVAL; |
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, | |||
171 | return ret; | 172 | return ret; |
172 | 173 | ||
173 | local_irq_save(flags); | 174 | local_irq_save(flags); |
175 | |||
176 | /* | ||
177 | * In case we trace inside arch_spin_lock() or after (NMI), | ||
178 | * we will cause circular lock, so we also need to increase | ||
179 | * the percpu trace_active here. | ||
180 | */ | ||
181 | cpu = smp_processor_id(); | ||
182 | per_cpu(trace_active, cpu)++; | ||
183 | |||
174 | arch_spin_lock(&max_stack_lock); | 184 | arch_spin_lock(&max_stack_lock); |
175 | *ptr = val; | 185 | *ptr = val; |
176 | arch_spin_unlock(&max_stack_lock); | 186 | arch_spin_unlock(&max_stack_lock); |
187 | |||
188 | per_cpu(trace_active, cpu)--; | ||
177 | local_irq_restore(flags); | 189 | local_irq_restore(flags); |
178 | 190 | ||
179 | return count; | 191 | return count; |
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
206 | 218 | ||
207 | static void *t_start(struct seq_file *m, loff_t *pos) | 219 | static void *t_start(struct seq_file *m, loff_t *pos) |
208 | { | 220 | { |
221 | int cpu; | ||
222 | |||
209 | local_irq_disable(); | 223 | local_irq_disable(); |
224 | |||
225 | cpu = smp_processor_id(); | ||
226 | per_cpu(trace_active, cpu)++; | ||
227 | |||
210 | arch_spin_lock(&max_stack_lock); | 228 | arch_spin_lock(&max_stack_lock); |
211 | 229 | ||
212 | if (*pos == 0) | 230 | if (*pos == 0) |
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
217 | 235 | ||
218 | static void t_stop(struct seq_file *m, void *p) | 236 | static void t_stop(struct seq_file *m, void *p) |
219 | { | 237 | { |
238 | int cpu; | ||
239 | |||
220 | arch_spin_unlock(&max_stack_lock); | 240 | arch_spin_unlock(&max_stack_lock); |
241 | |||
242 | cpu = smp_processor_id(); | ||
243 | per_cpu(trace_active, cpu)--; | ||
244 | |||
221 | local_irq_enable(); | 245 | local_irq_enable(); |
222 | } | 246 | } |
223 | 247 | ||