aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-02-27 10:18:46 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2010-02-27 10:18:46 -0500
commit018cbffe6819f6f8db20a0a3acd9bab9bfd667e4 (patch)
treefadde2521591998dc653fa094c636e8a547e620d /kernel
parent1dd2980d990068e20045b90c424518cc7f3657ff (diff)
parent60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
Merge commit 'v2.6.33' into perf/core
Merge reason: __percpu annotations need the corresponding sparse address space definition upstream. Conflicts: tools/perf/util/probe-event.c (trivial)
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/futex.c30
-rw-r--r--kernel/hw_breakpoint.c54
-rw-r--r--kernel/kfifo.c6
-rw-r--r--kernel/kgdb.c9
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/perf_event.c13
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/softirq.c15
-rw-r--r--kernel/softlockup.c15
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/clocksource.c18
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_stack.c24
21 files changed, 216 insertions, 82 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1fbcc748044a..aa3bee566446 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2936,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2936 2936
2937 for_each_subsys(root, ss) { 2937 for_each_subsys(root, ss) {
2938 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 2938 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2939
2939 if (IS_ERR(css)) { 2940 if (IS_ERR(css)) {
2940 err = PTR_ERR(css); 2941 err = PTR_ERR(css);
2941 goto err_destroy; 2942 goto err_destroy;
2942 } 2943 }
2943 init_cgroup_css(css, ss, cgrp); 2944 init_cgroup_css(css, ss, cgrp);
2944 if (ss->use_id) 2945 if (ss->use_id) {
2945 if (alloc_css_id(ss, parent, cgrp)) 2946 err = alloc_css_id(ss, parent, cgrp);
2947 if (err)
2946 goto err_destroy; 2948 goto err_destroy;
2949 }
2947 /* At error, ->destroy() callback has to free assigned ID. */ 2950 /* At error, ->destroy() callback has to free assigned ID. */
2948 } 2951 }
2949 2952
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1c8ddd6ee940..677f25376a38 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu)
151 151
152 write_lock_irq(&tasklist_lock); 152 write_lock_irq(&tasklist_lock);
153 for_each_process(p) { 153 for_each_process(p) {
154 if (task_cpu(p) == cpu && 154 if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
155 (!cputime_eq(p->utime, cputime_zero) || 155 (!cputime_eq(p->utime, cputime_zero) ||
156 !cputime_eq(p->stime, cputime_zero))) 156 !cputime_eq(p->stime, cputime_zero)))
157 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ 157 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
158 (state = %ld, flags = %x) \n", 158 "(state = %ld, flags = %x)\n",
159 p->comm, task_pid_nr(p), cpu, 159 p->comm, task_pid_nr(p), cpu,
160 p->state, p->flags); 160 p->state, p->flags);
161 } 161 }
162 write_unlock_irq(&tasklist_lock); 162 write_unlock_irq(&tasklist_lock);
163} 163}
diff --git a/kernel/cred.c b/kernel/cred.c
index dd76cfe5f5b0..1ed8ca18790c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void)
224#ifdef CONFIG_KEYS 224#ifdef CONFIG_KEYS
225 new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); 225 new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
226 if (!new->tgcred) { 226 if (!new->tgcred) {
227 kfree(new); 227 kmem_cache_free(cred_jar, new);
228 return NULL; 228 return NULL;
229 } 229 }
230 atomic_set(&new->tgcred->usage, 1); 230 atomic_set(&new->tgcred->usage, 1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b2959b3ffc2..f88bd984df35 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1241 /* Need tasklist lock for parent etc handling! */ 1241 /* Need tasklist lock for parent etc handling! */
1242 write_lock_irq(&tasklist_lock); 1242 write_lock_irq(&tasklist_lock);
1243 1243
1244 /*
1245 * The task hasn't been attached yet, so its cpus_allowed mask will
1246 * not be changed, nor will its assigned CPU.
1247 *
1248 * The cpus_allowed mask of the parent may have changed after it was
1249 * copied first time - so re-copy it here, then check the child's CPU
1250 * to ensure it is on a valid CPU (and if not, just force it back to
1251 * parent's CPU). This avoids alot of nasty races.
1252 */
1253 p->cpus_allowed = current->cpus_allowed;
1254 p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
1255 if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
1256 !cpu_online(task_cpu(p))))
1257 set_task_cpu(p, smp_processor_id());
1258
1259 /* CLONE_PARENT re-uses the old parent */ 1244 /* CLONE_PARENT re-uses the old parent */
1260 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 1245 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1261 p->real_parent = current->real_parent; 1246 p->real_parent = current->real_parent;
diff --git a/kernel/futex.c b/kernel/futex.c
index d9b3a2228f9d..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
530 return -EINVAL; 530 return -EINVAL;
531 531
532 WARN_ON(!atomic_read(&pi_state->refcount)); 532 WARN_ON(!atomic_read(&pi_state->refcount));
533 WARN_ON(pid && pi_state->owner && 533
534 pi_state->owner->pid != pid); 534 /*
535 * When pi_state->owner is NULL then the owner died
536 * and another waiter is on the fly. pi_state->owner
537 * is fixed up by the task which acquires
538 * pi_state->rt_mutex.
539 *
540 * We do not check for pid == 0 which can happen when
541 * the owner died and robust_list_exit() cleared the
542 * TID.
543 */
544 if (pid && pi_state->owner) {
545 /*
546 * Bail out if user space manipulated the
547 * futex value.
548 */
549 if (pid != task_pid_vnr(pi_state->owner))
550 return -EINVAL;
551 }
535 552
536 atomic_inc(&pi_state->refcount); 553 atomic_inc(&pi_state->refcount);
537 *ps = pi_state; 554 *ps = pi_state;
@@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
758 if (!pi_state) 775 if (!pi_state)
759 return -EINVAL; 776 return -EINVAL;
760 777
778 /*
779 * If current does not own the pi_state then the futex is
780 * inconsistent and user space fiddled with the futex value.
781 */
782 if (pi_state->owner != current)
783 return -EINVAL;
784
761 raw_spin_lock(&pi_state->pi_mutex.wait_lock); 785 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
762 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 786 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
763 787
@@ -1971,7 +1995,7 @@ retry_private:
1971 /* Unqueue and drop the lock */ 1995 /* Unqueue and drop the lock */
1972 unqueue_me_pi(&q); 1996 unqueue_me_pi(&q);
1973 1997
1974 goto out; 1998 goto out_put_key;
1975 1999
1976out_unlock_put_key: 2000out_unlock_put_key:
1977 queue_unlock(&q, hb); 2001 queue_unlock(&q, hb);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
243 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 243 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
244 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 244 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
245 */ 245 */
246int reserve_bp_slot(struct perf_event *bp) 246static int __reserve_bp_slot(struct perf_event *bp)
247{ 247{
248 struct bp_busy_slots slots = {0}; 248 struct bp_busy_slots slots = {0};
249 int ret = 0;
250
251 mutex_lock(&nr_bp_mutex);
252 249
253 fetch_bp_busy_slots(&slots, bp); 250 fetch_bp_busy_slots(&slots, bp);
254 251
255 /* Flexible counters need to keep at least one slot */ 252 /* Flexible counters need to keep at least one slot */
256 if (slots.pinned + (!!slots.flexible) == HBP_NUM) { 253 if (slots.pinned + (!!slots.flexible) == HBP_NUM)
257 ret = -ENOSPC; 254 return -ENOSPC;
258 goto end;
259 }
260 255
261 toggle_bp_slot(bp, true); 256 toggle_bp_slot(bp, true);
262 257
263end: 258 return 0;
259}
260
261int reserve_bp_slot(struct perf_event *bp)
262{
263 int ret;
264
265 mutex_lock(&nr_bp_mutex);
266
267 ret = __reserve_bp_slot(bp);
268
264 mutex_unlock(&nr_bp_mutex); 269 mutex_unlock(&nr_bp_mutex);
265 270
266 return ret; 271 return ret;
267} 272}
268 273
274static void __release_bp_slot(struct perf_event *bp)
275{
276 toggle_bp_slot(bp, false);
277}
278
269void release_bp_slot(struct perf_event *bp) 279void release_bp_slot(struct perf_event *bp)
270{ 280{
271 mutex_lock(&nr_bp_mutex); 281 mutex_lock(&nr_bp_mutex);
272 282
273 toggle_bp_slot(bp, false); 283 __release_bp_slot(bp);
274 284
275 mutex_unlock(&nr_bp_mutex); 285 mutex_unlock(&nr_bp_mutex);
276} 286}
277 287
288/*
289 * Allow the kernel debugger to reserve breakpoint slots without
290 * taking a lock using the dbg_* variant of for the reserve and
291 * release breakpoint slots.
292 */
293int dbg_reserve_bp_slot(struct perf_event *bp)
294{
295 if (mutex_is_locked(&nr_bp_mutex))
296 return -1;
297
298 return __reserve_bp_slot(bp);
299}
300
301int dbg_release_bp_slot(struct perf_event *bp)
302{
303 if (mutex_is_locked(&nr_bp_mutex))
304 return -1;
305
306 __release_bp_slot(bp);
307
308 return 0;
309}
278 310
279int register_perf_hw_breakpoint(struct perf_event *bp) 311int register_perf_hw_breakpoint(struct perf_event *bp)
280{ 312{
@@ -328,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
328int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 360int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
329{ 361{
330 u64 old_addr = bp->attr.bp_addr; 362 u64 old_addr = bp->attr.bp_addr;
363 u64 old_len = bp->attr.bp_len;
331 int old_type = bp->attr.bp_type; 364 int old_type = bp->attr.bp_type;
332 int old_len = bp->attr.bp_len;
333 int err = 0; 365 int err = 0;
334 366
335 perf_event_disable(bp); 367 perf_event_disable(bp);
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 32c5c15d750d..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
80 80
81 buffer = kmalloc(size, gfp_mask); 81 buffer = kmalloc(size, gfp_mask);
82 if (!buffer) { 82 if (!buffer) {
83 _kfifo_init(fifo, 0, 0); 83 _kfifo_init(fifo, NULL, 0);
84 return -ENOMEM; 84 return -ENOMEM;
85 } 85 }
86 86
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc);
97void kfifo_free(struct kfifo *fifo) 97void kfifo_free(struct kfifo *fifo)
98{ 98{
99 kfree(fifo->buffer); 99 kfree(fifo->buffer);
100 _kfifo_init(fifo, NULL, 0);
100} 101}
101EXPORT_SYMBOL(kfifo_free); 102EXPORT_SYMBOL(kfifo_free);
102 103
@@ -349,6 +350,7 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
349 * @fifo: the fifo to be used. 350 * @fifo: the fifo to be used.
350 * @from: pointer to the data to be added. 351 * @from: pointer to the data to be added.
351 * @len: the length of the data to be added. 352 * @len: the length of the data to be added.
353 * @total: the actual returned data length.
352 * 354 *
353 * This function copies at most @len bytes from the @from into the 355 * This function copies at most @len bytes from the @from into the
354 * FIFO depending and returns -EFAULT/0. 356 * FIFO depending and returns -EFAULT/0.
@@ -399,7 +401,7 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
399 * @fifo: the fifo to be used. 401 * @fifo: the fifo to be used.
400 * @to: where the data must be copied. 402 * @to: where the data must be copied.
401 * @len: the size of the destination buffer. 403 * @len: the size of the destination buffer.
402 @ @lenout: pointer to output variable with copied data 404 * @lenout: pointer to output variable with copied data
403 * 405 *
404 * This function copies at most @len bytes from the FIFO into the 406 * This function copies at most @len bytes from the FIFO into the
405 * @to buffer and 0 or -EFAULT. 407 * @to buffer and 0 or -EFAULT.
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..761fdd2b3034 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs)
583 smp_wmb(); 583 smp_wmb();
584 atomic_set(&cpu_in_kgdb[cpu], 1); 584 atomic_set(&cpu_in_kgdb[cpu], 1);
585 585
586 /* Disable any cpu specific hw breakpoints */
587 kgdb_disable_hw_debug(regs);
588
586 /* Wait till primary CPU is done with debugging */ 589 /* Wait till primary CPU is done with debugging */
587 while (atomic_read(&passive_cpu_wait[cpu])) 590 while (atomic_read(&passive_cpu_wait[cpu]))
588 cpu_relax(); 591 cpu_relax();
@@ -596,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs)
596 599
597 /* Signal the primary CPU that we are done: */ 600 /* Signal the primary CPU that we are done: */
598 atomic_set(&cpu_in_kgdb[cpu], 0); 601 atomic_set(&cpu_in_kgdb[cpu], 0);
599 touch_softlockup_watchdog(); 602 touch_softlockup_watchdog_sync();
600 clocksource_touch_watchdog(); 603 clocksource_touch_watchdog();
601 local_irq_restore(flags); 604 local_irq_restore(flags);
602} 605}
@@ -1450,7 +1453,7 @@ acquirelock:
1450 (kgdb_info[cpu].task && 1453 (kgdb_info[cpu].task &&
1451 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { 1454 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
1452 atomic_set(&kgdb_active, -1); 1455 atomic_set(&kgdb_active, -1);
1453 touch_softlockup_watchdog(); 1456 touch_softlockup_watchdog_sync();
1454 clocksource_touch_watchdog(); 1457 clocksource_touch_watchdog();
1455 local_irq_restore(flags); 1458 local_irq_restore(flags);
1456 1459
@@ -1550,7 +1553,7 @@ kgdb_restore:
1550 } 1553 }
1551 /* Free kgdb_active */ 1554 /* Free kgdb_active */
1552 atomic_set(&kgdb_active, -1); 1555 atomic_set(&kgdb_active, -1);
1553 touch_softlockup_watchdog(); 1556 touch_softlockup_watchdog_sync();
1554 clocksource_touch_watchdog(); 1557 clocksource_touch_watchdog();
1555 local_irq_restore(flags); 1558 local_irq_restore(flags);
1556 1559
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5feaddcdbe49..c62ec14609b9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2147,7 +2147,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
2147 return ret; 2147 return ret;
2148 2148
2149 return print_irq_inversion_bug(curr, &root, target_entry, 2149 return print_irq_inversion_bug(curr, &root, target_entry,
2150 this, 1, irqclass); 2150 this, 0, irqclass);
2151} 2151}
2152 2152
2153void print_irqtrace_events(struct task_struct *curr) 2153void print_irqtrace_events(struct task_struct *curr)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index aa6155b5e24c..a661e7991865 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3407,8 +3407,6 @@ static void perf_event_task_output(struct perf_event *event,
3407 task_event->event_id.tid = perf_event_tid(event, task); 3407 task_event->event_id.tid = perf_event_tid(event, task);
3408 task_event->event_id.ptid = perf_event_tid(event, current); 3408 task_event->event_id.ptid = perf_event_tid(event, current);
3409 3409
3410 task_event->event_id.time = perf_clock();
3411
3412 perf_output_put(&handle, task_event->event_id); 3410 perf_output_put(&handle, task_event->event_id);
3413 3411
3414 perf_output_end(&handle); 3412 perf_output_end(&handle);
@@ -3416,7 +3414,7 @@ static void perf_event_task_output(struct perf_event *event,
3416 3414
3417static int perf_event_task_match(struct perf_event *event) 3415static int perf_event_task_match(struct perf_event *event)
3418{ 3416{
3419 if (event->state != PERF_EVENT_STATE_ACTIVE) 3417 if (event->state < PERF_EVENT_STATE_INACTIVE)
3420 return 0; 3418 return 0;
3421 3419
3422 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3420 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -3448,7 +3446,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3448 cpuctx = &get_cpu_var(perf_cpu_context); 3446 cpuctx = &get_cpu_var(perf_cpu_context);
3449 perf_event_task_ctx(&cpuctx->ctx, task_event); 3447 perf_event_task_ctx(&cpuctx->ctx, task_event);
3450 if (!ctx) 3448 if (!ctx)
3451 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3449 ctx = rcu_dereference(current->perf_event_ctxp);
3452 if (ctx) 3450 if (ctx)
3453 perf_event_task_ctx(ctx, task_event); 3451 perf_event_task_ctx(ctx, task_event);
3454 put_cpu_var(perf_cpu_context); 3452 put_cpu_var(perf_cpu_context);
@@ -3479,6 +3477,7 @@ static void perf_event_task(struct task_struct *task,
3479 /* .ppid */ 3477 /* .ppid */
3480 /* .tid */ 3478 /* .tid */
3481 /* .ptid */ 3479 /* .ptid */
3480 .time = perf_clock(),
3482 }, 3481 },
3483 }; 3482 };
3484 3483
@@ -3528,7 +3527,7 @@ static void perf_event_comm_output(struct perf_event *event,
3528 3527
3529static int perf_event_comm_match(struct perf_event *event) 3528static int perf_event_comm_match(struct perf_event *event)
3530{ 3529{
3531 if (event->state != PERF_EVENT_STATE_ACTIVE) 3530 if (event->state < PERF_EVENT_STATE_INACTIVE)
3532 return 0; 3531 return 0;
3533 3532
3534 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3533 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -3648,7 +3647,7 @@ static void perf_event_mmap_output(struct perf_event *event,
3648static int perf_event_mmap_match(struct perf_event *event, 3647static int perf_event_mmap_match(struct perf_event *event,
3649 struct perf_mmap_event *mmap_event) 3648 struct perf_mmap_event *mmap_event)
3650{ 3649{
3651 if (event->state != PERF_EVENT_STATE_ACTIVE) 3650 if (event->state < PERF_EVENT_STATE_INACTIVE)
3652 return 0; 3651 return 0;
3653 3652
3654 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3653 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -4728,7 +4727,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4728 if (attr->type >= PERF_TYPE_MAX) 4727 if (attr->type >= PERF_TYPE_MAX)
4729 return -EINVAL; 4728 return -EINVAL;
4730 4729
4731 if (attr->__reserved_1 || attr->__reserved_2) 4730 if (attr->__reserved_1)
4732 return -EINVAL; 4731 return -EINVAL;
4733 4732
4734 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4733 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
diff --git a/kernel/sched.c b/kernel/sched.c
index 7266b912139f..3e71ebb101c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2320,14 +2320,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2320} 2320}
2321 2321
2322/* 2322/*
2323 * Called from: 2323 * Gets called from 3 sites (exec, fork, wakeup), since it is called without
2324 * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
2325 * by:
2324 * 2326 *
2325 * - fork, @p is stable because it isn't on the tasklist yet 2327 * exec: is unstable, retry loop
2326 * 2328 * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
2327 * - exec, @p is unstable, retry loop
2328 *
2329 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2330 * we should be good.
2331 */ 2329 */
2332static inline 2330static inline
2333int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 2331int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -2620,9 +2618,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
2620 if (p->sched_class->task_fork) 2618 if (p->sched_class->task_fork)
2621 p->sched_class->task_fork(p); 2619 p->sched_class->task_fork(p);
2622 2620
2623#ifdef CONFIG_SMP
2624 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2625#endif
2626 set_task_cpu(p, cpu); 2621 set_task_cpu(p, cpu);
2627 2622
2628#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2623#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2652,6 +2647,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2652{ 2647{
2653 unsigned long flags; 2648 unsigned long flags;
2654 struct rq *rq; 2649 struct rq *rq;
2650 int cpu = get_cpu();
2651
2652#ifdef CONFIG_SMP
2653 /*
2654 * Fork balancing, do it here and not earlier because:
2655 * - cpus_allowed can change in the fork path
2656 * - any previously selected cpu might disappear through hotplug
2657 *
2658 * We still have TASK_WAKING but PF_STARTING is gone now, meaning
2659 * ->cpus_allowed is stable, we have preemption disabled, meaning
2660 * cpu_online_mask is stable.
2661 */
2662 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2663 set_task_cpu(p, cpu);
2664#endif
2655 2665
2656 rq = task_rq_lock(p, &flags); 2666 rq = task_rq_lock(p, &flags);
2657 BUG_ON(p->state != TASK_WAKING); 2667 BUG_ON(p->state != TASK_WAKING);
@@ -2665,6 +2675,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2665 p->sched_class->task_woken(rq, p); 2675 p->sched_class->task_woken(rq, p);
2666#endif 2676#endif
2667 task_rq_unlock(rq, &flags); 2677 task_rq_unlock(rq, &flags);
2678 put_cpu();
2668} 2679}
2669 2680
2670#ifdef CONFIG_PREEMPT_NOTIFIERS 2681#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -7145,14 +7156,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7145 * the ->cpus_allowed mask from under waking tasks, which would be 7156 * the ->cpus_allowed mask from under waking tasks, which would be
7146 * possible when we change rq->lock in ttwu(), so synchronize against 7157 * possible when we change rq->lock in ttwu(), so synchronize against
7147 * TASK_WAKING to avoid that. 7158 * TASK_WAKING to avoid that.
7159 *
7160 * Make an exception for freshly cloned tasks, since cpuset namespaces
7161 * might move the task about, we have to validate the target in
7162 * wake_up_new_task() anyway since the cpu might have gone away.
7148 */ 7163 */
7149again: 7164again:
7150 while (p->state == TASK_WAKING) 7165 while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
7151 cpu_relax(); 7166 cpu_relax();
7152 7167
7153 rq = task_rq_lock(p, &flags); 7168 rq = task_rq_lock(p, &flags);
7154 7169
7155 if (p->state == TASK_WAKING) { 7170 if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
7156 task_rq_unlock(rq, &flags); 7171 task_rq_unlock(rq, &flags);
7157 goto again; 7172 goto again;
7158 } 7173 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a09502e2ef75..7c1a67ef0274 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill);
500 */ 500 */
501 501
502/* 502/*
503 * The trampoline is called when the hrtimer expires. If this is 503 * The trampoline is called when the hrtimer expires. It schedules a tasklet
504 * called from the hrtimer interrupt then we schedule the tasklet as 504 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
505 * the timer callback function expects to run in softirq context. If 505 * hrtimer callback, but from softirq context.
506 * it's called in softirq context anyway (i.e. high resolution timers
507 * disabled) then the hrtimer callback is called right away.
508 */ 506 */
509static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) 507static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
510{ 508{
511 struct tasklet_hrtimer *ttimer = 509 struct tasklet_hrtimer *ttimer =
512 container_of(timer, struct tasklet_hrtimer, timer); 510 container_of(timer, struct tasklet_hrtimer, timer);
513 511
514 if (hrtimer_is_hres_active(timer)) { 512 tasklet_hi_schedule(&ttimer->tasklet);
515 tasklet_hi_schedule(&ttimer->tasklet); 513 return HRTIMER_NORESTART;
516 return HRTIMER_NORESTART;
517 }
518 return ttimer->function(timer);
519} 514}
520 515
521/* 516/*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ 25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ 26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28static DEFINE_PER_CPU(bool, softlock_touch_sync);
28 29
29static int __read_mostly did_panic; 30static int __read_mostly did_panic;
30int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
79} 80}
80EXPORT_SYMBOL(touch_softlockup_watchdog); 81EXPORT_SYMBOL(touch_softlockup_watchdog);
81 82
83void touch_softlockup_watchdog_sync(void)
84{
85 __raw_get_cpu_var(softlock_touch_sync) = true;
86 __raw_get_cpu_var(softlockup_touch_ts) = 0;
87}
88
82void touch_all_softlockup_watchdogs(void) 89void touch_all_softlockup_watchdogs(void)
83{ 90{
84 int cpu; 91 int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
118 } 125 }
119 126
120 if (touch_ts == 0) { 127 if (touch_ts == 0) {
128 if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
129 /*
130 * If the time stamp was touched atomically
131 * make sure the scheduler tick is up to date.
132 */
133 per_cpu(softlock_touch_sync, this_cpu) = false;
134 sched_clock_tick();
135 }
121 __touch_softlockup_watchdog(); 136 __touch_softlockup_watchdog();
122 return; 137 return;
123 } 138 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73a6b85..18bde979f346 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
222 if (which > PRIO_USER || which < PRIO_PROCESS) 222 if (which > PRIO_USER || which < PRIO_PROCESS)
223 return -EINVAL; 223 return -EINVAL;
224 224
225 rcu_read_lock();
225 read_lock(&tasklist_lock); 226 read_lock(&tasklist_lock);
226 switch (which) { 227 switch (which) {
227 case PRIO_PROCESS: 228 case PRIO_PROCESS:
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
267 } 268 }
268out_unlock: 269out_unlock:
269 read_unlock(&tasklist_lock); 270 read_unlock(&tasklist_lock);
271 rcu_read_unlock();
270 272
271 return retval; 273 return retval;
272} 274}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e85c23404d34..13700833c181 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void)
343{ 343{
344 unsigned long flags; 344 unsigned long flags;
345 345
346 spin_lock_irqsave(&watchdog_lock, flags); 346 /*
347 * We use trylock here to avoid a potential dead lock when
348 * kgdb calls this code after the kernel has been stopped with
349 * watchdog_lock held. When watchdog_lock is held we just
350 * return and accept, that the watchdog might trigger and mark
351 * the monitored clock source (usually TSC) unstable.
352 *
353 * This does not affect the other caller clocksource_resume()
354 * because at this point the kernel is UP, interrupts are
355 * disabled and nothing can hold watchdog_lock.
356 */
357 if (!spin_trylock_irqsave(&watchdog_lock, flags))
358 return;
347 clocksource_reset_watchdog(); 359 clocksource_reset_watchdog();
348 spin_unlock_irqrestore(&watchdog_lock, flags); 360 spin_unlock_irqrestore(&watchdog_lock, flags);
349} 361}
@@ -458,8 +470,8 @@ void clocksource_resume(void)
458 * clocksource_touch_watchdog - Update watchdog 470 * clocksource_touch_watchdog - Update watchdog
459 * 471 *
460 * Update the watchdog after exception contexts such as kgdb so as not 472 * Update the watchdog after exception contexts such as kgdb so as not
461 * to incorrectly trip the watchdog. 473 * to incorrectly trip the watchdog. This might fail when the kernel
462 * 474 * was stopped in code which holds watchdog_lock.
463 */ 475 */
464void clocksource_touch_watchdog(void) 476void clocksource_touch_watchdog(void)
465{ 477{
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7faaa32fbf4f..e2ab064c6d41 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts)
880 880
881 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); 881 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
882} 882}
883EXPORT_SYMBOL_GPL(getboottime);
883 884
884/** 885/**
885 * monotonic_to_bootbased - Convert the monotonic time to boot based. 886 * monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts)
889{ 890{
890 *ts = timespec_add_safe(*ts, total_sleep_time); 891 *ts = timespec_add_safe(*ts, total_sleep_time);
891} 892}
893EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
892 894
893unsigned long get_seconds(void) 895unsigned long get_seconds(void)
894{ 896{
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..60e2ce0181ee 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 464 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 465 unsigned long head;
466 struct buffer_page *head_page; 466 struct buffer_page *head_page;
467 struct buffer_page *cache_reader_page;
468 unsigned long cache_read;
467 u64 read_stamp; 469 u64 read_stamp;
468}; 470};
469 471
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2718 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2719 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2720 iter->read_stamp = iter->head_page->page->time_stamp;
2721 iter->cache_reader_page = cpu_buffer->reader_page;
2722 iter->cache_read = cpu_buffer->read;
2719} 2723}
2720 2724
2721/** 2725/**
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3064 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3065 int nr_loops = 0;
3062 3066
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3067 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3068 buffer = cpu_buffer->buffer;
3068 3069
3070 /*
3071 * Check if someone performed a consuming read to
3072 * the buffer. A consuming read invalidates the iterator
3073 * and we need to reset the iterator in this case.
3074 */
3075 if (unlikely(iter->cache_read != cpu_buffer->read ||
3076 iter->cache_reader_page != cpu_buffer->reader_page))
3077 rb_iter_reset(iter);
3078
3069 again: 3079 again:
3080 if (ring_buffer_iter_empty(iter))
3081 return NULL;
3082
3070 /* 3083 /*
3071 * We repeat when a timestamp is encountered. 3084 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3085 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3094 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3095 return NULL;
3083 3096
3097 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3098 rb_inc_iter(iter);
3099 goto again;
3100 }
3101
3084 event = rb_iter_head_event(iter); 3102 event = rb_iter_head_event(iter);
3085 3103
3086 switch (event->type_len) { 3104 switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..eac6875cb990 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 951 return;
952 } 952 }
953 953
954 if (WARN_ON_ONCE(pid < 0)) {
955 strcpy(comm, "<XXX>");
956 return;
957 }
958
954 if (pid > PID_MAX_DEFAULT) { 959 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 960 strcpy(comm, "<...>");
956 return; 961 return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6178abf3637e..356c10227c98 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -673,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
673 return -EINVAL; 673 return -EINVAL;
674 } 674 }
675 /* an address specified */ 675 /* an address specified */
676 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 676 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
677 if (ret) { 677 if (ret) {
678 pr_info("Failed to parse address.\n"); 678 pr_info("Failed to parse address.\n");
679 return ret; 679 return ret;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
210 arch_spin_lock(&max_stack_lock); 228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
238 int cpu;
239
220 arch_spin_unlock(&max_stack_lock); 240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247