Merge commit 'v2.6.33' into perf/core

Merge reason: __percpu annotations need the corresponding sparse address space definition upstream. Conflicts: tools/perf/util/probe-event.c (trivial)
author: Frederic Weisbecker <fweisbec@gmail.com> 2010-02-27 10:18:46 -0500
committer: Frederic Weisbecker <fweisbec@gmail.com> 2010-02-27 10:18:46 -0500
commit: 018cbffe6819f6f8db20a0a3acd9bab9bfd667e4 (patch)
tree: fadde2521591998dc653fa094c636e8a547e620d /kernel
parent: 1dd2980d990068e20045b90c424518cc7f3657ff (diff)
parent: 60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
21 files changed, 216 insertions, 82 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1fbcc748044a..aa3bee566446 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2936,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        for_each_subsys(root, ss) {
                struct cgroup_subsys_state *css = ss->create(ss, cgrp);
                if (IS_ERR(css)) {
                        err = PTR_ERR(css);
                        goto err_destroy;
                }
                init_cgroup_css(css, ss, cgrp);
-                if (ss->use_id)
+                if (ss->use_id) {
-                        if (alloc_css_id(ss, parent, cgrp))
+                        err = alloc_css_id(ss, parent, cgrp);
+                        if (err)
                                goto err_destroy;
+                }
                /* At error, ->destroy() callback has to free assigned ID. */
        }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1c8ddd6ee940..677f25376a38 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu)
        write_lock_irq(&tasklist_lock);
        for_each_process(p) {
-                if (task_cpu(p) == cpu &&
+                if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
                    (!cputime_eq(p->utime, cputime_zero) ||
                     !cputime_eq(p->stime, cputime_zero)))
-                        printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+                        printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
-                                (state = %ld, flags = %x) \n",
+                                "(state = %ld, flags = %x)\n",
-                                 p->comm, task_pid_nr(p), cpu,
+                                p->comm, task_pid_nr(p), cpu,
-                                 p->state, p->flags);
+                                p->state, p->flags);
        }
        write_unlock_irq(&tasklist_lock);
 }
diff --git a/kernel/cred.c b/kernel/cred.c
index dd76cfe5f5b0..1ed8ca18790c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void)
 #ifdef CONFIG_KEYS
        new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
        if (!new->tgcred) {
-                kfree(new);
+                kmem_cache_free(cred_jar, new);
                return NULL;
        }
        atomic_set(&new->tgcred->usage, 1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b2959b3ffc2..f88bd984df35 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
-        /*
-         * The task hasn't been attached yet, so its cpus_allowed mask will
-         * not be changed, nor will its assigned CPU.
-         *
-         * The cpus_allowed mask of the parent may have changed after it was
-         * copied first time - so re-copy it here, then check the child's CPU
-         * to ensure it is on a valid CPU (and if not, just force it back to
-         * parent's CPU). This avoids alot of nasty races.
-         */
-        p->cpus_allowed = current->cpus_allowed;
-        p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
-        if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
-                        !cpu_online(task_cpu(p))))
-                set_task_cpu(p, smp_processor_id());
        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
                p->real_parent = current->real_parent;
diff --git a/kernel/futex.c b/kernel/futex.c
index d9b3a2228f9d..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                                return -EINVAL;
                        WARN_ON(!atomic_read(&pi_state->refcount));
-                        WARN_ON(pid && pi_state->owner &&
-                                pi_state->owner->pid != pid);
+                        /*
+                         * When pi_state->owner is NULL then the owner died
+                         * and another waiter is on the fly. pi_state->owner
+                         * is fixed up by the task which acquires
+                         * pi_state->rt_mutex.
+                         *
+                         * We do not check for pid == 0 which can happen when
+                         * the owner died and robust_list_exit() cleared the
+                         * TID.
+                         */
+                        if (pid && pi_state->owner) {
+                                /*
+                                 * Bail out if user space manipulated the
+                                 * futex value.
+                                 */
+                                if (pid != task_pid_vnr(pi_state->owner))
+                                        return -EINVAL;
+                        }
                        atomic_inc(&pi_state->refcount);
                        *ps = pi_state;
@@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        if (!pi_state)
                return -EINVAL;
+        /*
+         * If current does not own the pi_state then the futex is
+         * inconsistent and user space fiddled with the futex value.
+         */
+        if (pi_state->owner != current)
+                return -EINVAL;
        raw_spin_lock(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
@@ -1971,7 +1995,7 @@ retry_private:
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
-        goto out;
+        goto out_put_key;
 out_unlock_put_key:
        queue_unlock(&q, hb);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
 *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
 */
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
 {
        struct bp_busy_slots slots = {0};
-        int ret = 0;
-        mutex_lock(&nr_bp_mutex);
        fetch_bp_busy_slots(&slots, bp);
        /* Flexible counters need to keep at least one slot */
-        if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
+        if (slots.pinned + (!!slots.flexible) == HBP_NUM)
-                ret = -ENOSPC;
+                return -ENOSPC;
-                goto end;
-        }
        toggle_bp_slot(bp, true);
-end:
+        return 0;
+}
+int reserve_bp_slot(struct perf_event *bp)
+{
+        int ret;
+        mutex_lock(&nr_bp_mutex);
+        ret = __reserve_bp_slot(bp);
        mutex_unlock(&nr_bp_mutex);
        return ret;
 }
+static void __release_bp_slot(struct perf_event *bp)
+{
+        toggle_bp_slot(bp, false);
+}
 void release_bp_slot(struct perf_event *bp)
 {
        mutex_lock(&nr_bp_mutex);
-        toggle_bp_slot(bp, false);
+        __release_bp_slot(bp);
        mutex_unlock(&nr_bp_mutex);
 }
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+        if (mutex_is_locked(&nr_bp_mutex))
+                return -1;
+        return __reserve_bp_slot(bp);
+}
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+        if (mutex_is_locked(&nr_bp_mutex))
+                return -1;
+        __release_bp_slot(bp);
+        return 0;
+}
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
@@ -328,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
        u64 old_addr = bp->attr.bp_addr;
+        u64 old_len = bp->attr.bp_len;
        int old_type = bp->attr.bp_type;
-        int old_len = bp->attr.bp_len;
        int err = 0;
        perf_event_disable(bp);
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 32c5c15d750d..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
        buffer = kmalloc(size, gfp_mask);
        if (!buffer) {
-                _kfifo_init(fifo, 0, 0);
+                _kfifo_init(fifo, NULL, 0);
                return -ENOMEM;
        }
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc);
 void kfifo_free(struct kfifo *fifo)
 {
        kfree(fifo->buffer);
+        _kfifo_init(fifo, NULL, 0);
 }
 EXPORT_SYMBOL(kfifo_free);
@@ -349,6 +350,7 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
 * @fifo: the fifo to be used.
 * @from: pointer to the data to be added.
 * @len: the length of the data to be added.
+ * @total: the actual returned data length.
 *
 * This function copies at most @len bytes from the @from into the
 * FIFO depending and returns -EFAULT/0.
@@ -399,7 +401,7 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
 * @fifo: the fifo to be used.
 * @to: where the data must be copied.
 * @len: the size of the destination buffer.
- @ @lenout: pointer to output variable with copied data
+ * @lenout: pointer to output variable with copied data
 *
 * This function copies at most @len bytes from the FIFO into the
 * @to buffer and 0 or -EFAULT.
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..761fdd2b3034 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs)
        smp_wmb();
        atomic_set(&cpu_in_kgdb[cpu], 1);
+        /* Disable any cpu specific hw breakpoints */
+        kgdb_disable_hw_debug(regs);
        /* Wait till primary CPU is done with debugging */
        while (atomic_read(&passive_cpu_wait[cpu]))
                cpu_relax();
@@ -596,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs)
        /* Signal the primary CPU that we are done: */
        atomic_set(&cpu_in_kgdb[cpu], 0);
-        touch_softlockup_watchdog();
+        touch_softlockup_watchdog_sync();
        clocksource_touch_watchdog();
        local_irq_restore(flags);
 }
@@ -1450,7 +1453,7 @@ acquirelock:
            (kgdb_info[cpu].task &&
             kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
                atomic_set(&kgdb_active, -1);
-                touch_softlockup_watchdog();
+                touch_softlockup_watchdog_sync();
                clocksource_touch_watchdog();
                local_irq_restore(flags);
@@ -1550,7 +1553,7 @@ kgdb_restore:
        }
        /* Free kgdb_active */
        atomic_set(&kgdb_active, -1);
-        touch_softlockup_watchdog();
+        touch_softlockup_watchdog_sync();
        clocksource_touch_watchdog();
        local_irq_restore(flags);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5feaddcdbe49..c62ec14609b9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2147,7 +2147,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
                return ret;
        return print_irq_inversion_bug(curr, &root, target_entry,
-                                        this, 1, irqclass);
+                                        this, 0, irqclass);
 }
 void print_irqtrace_events(struct task_struct *curr)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index aa6155b5e24c..a661e7991865 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3407,8 +3407,6 @@ static void perf_event_task_output(struct perf_event *event,
        task_event->event_id.tid = perf_event_tid(event, task);
        task_event->event_id.ptid = perf_event_tid(event, current);
-        task_event->event_id.time = perf_clock();
        perf_output_put(&handle, task_event->event_id);
        perf_output_end(&handle);
@@ -3416,7 +3414,7 @@ static void perf_event_task_output(struct perf_event *event,
 static int perf_event_task_match(struct perf_event *event)
 {
-        if (event->state != PERF_EVENT_STATE_ACTIVE)
+        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -3448,7 +3446,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
        cpuctx = &get_cpu_var(perf_cpu_context);
        perf_event_task_ctx(&cpuctx->ctx, task_event);
        if (!ctx)
-                ctx = rcu_dereference(task_event->task->perf_event_ctxp);
+                ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
                perf_event_task_ctx(ctx, task_event);
        put_cpu_var(perf_cpu_context);
@@ -3479,6 +3477,7 @@ static void perf_event_task(struct task_struct *task,
                        /* .ppid */
                        /* .tid  */
                        /* .ptid */
+                        .time = perf_clock(),
                },
        };
@@ -3528,7 +3527,7 @@ static void perf_event_comm_output(struct perf_event *event,
 static int perf_event_comm_match(struct perf_event *event)
 {
-        if (event->state != PERF_EVENT_STATE_ACTIVE)
+        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -3648,7 +3647,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 static int perf_event_mmap_match(struct perf_event *event,
                                   struct perf_mmap_event *mmap_event)
 {
-        if (event->state != PERF_EVENT_STATE_ACTIVE)
+        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -4728,7 +4727,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (attr->type >= PERF_TYPE_MAX)
                return -EINVAL;
-        if (attr->__reserved_1 || attr->__reserved_2)
+        if (attr->__reserved_1)
                return -EINVAL;
        if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
diff --git a/kernel/sched.c b/kernel/sched.c
index 7266b912139f..3e71ebb101c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2320,14 +2320,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 }
 /*
- * Called from:
+ * Gets called from 3 sites (exec, fork, wakeup), since it is called without
+ * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
+ * by:
 *
- *  - fork, @p is stable because it isn't on the tasklist yet
+ *  exec:           is unstable, retry loop
- *
+ *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
- *  - exec, @p is unstable, retry loop
- *
- *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
- *             we should be good.
 */
 static inline
 int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -2620,9 +2618,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
        if (p->sched_class->task_fork)
                p->sched_class->task_fork(p);
-#ifdef CONFIG_SMP
-        cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
-#endif
        set_task_cpu(p, cpu);
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2652,6 +2647,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
        unsigned long flags;
        struct rq *rq;
+        int cpu = get_cpu();
+#ifdef CONFIG_SMP
+        /*
+         * Fork balancing, do it here and not earlier because:
+         *  - cpus_allowed can change in the fork path
+         *  - any previously selected cpu might disappear through hotplug
+         *
+         * We still have TASK_WAKING but PF_STARTING is gone now, meaning
+         * ->cpus_allowed is stable, we have preemption disabled, meaning
+         * cpu_online_mask is stable.
+         */
+        cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+        set_task_cpu(p, cpu);
+#endif
        rq = task_rq_lock(p, &flags);
        BUG_ON(p->state != TASK_WAKING);
@@ -2665,6 +2675,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                p->sched_class->task_woken(rq, p);
 #endif
        task_rq_unlock(rq, &flags);
+        put_cpu();
 }
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -7145,14 +7156,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
         * the ->cpus_allowed mask from under waking tasks, which would be
         * possible when we change rq->lock in ttwu(), so synchronize against
         * TASK_WAKING to avoid that.
+         *
+         * Make an exception for freshly cloned tasks, since cpuset namespaces
+         * might move the task about, we have to validate the target in
+         * wake_up_new_task() anyway since the cpu might have gone away.
         */
 again:
-        while (p->state == TASK_WAKING)
+        while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
                cpu_relax();
        rq = task_rq_lock(p, &flags);
-        if (p->state == TASK_WAKING) {
+        if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
                task_rq_unlock(rq, &flags);
                goto again;
        }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a09502e2ef75..7c1a67ef0274 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill);
 */
 /*
- * The trampoline is called when the hrtimer expires. If this is
+ * The trampoline is called when the hrtimer expires. It schedules a tasklet
- * called from the hrtimer interrupt then we schedule the tasklet as
+ * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
- * the timer callback function expects to run in softirq context. If
+ * hrtimer callback, but from softirq context.
- * it's called in softirq context anyway (i.e. high resolution timers
- * disabled) then the hrtimer callback is called right away.
 */
 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
 {
        struct tasklet_hrtimer *ttimer =
                container_of(timer, struct tasklet_hrtimer, timer);
-        if (hrtimer_is_hres_active(timer)) {
+        tasklet_hi_schedule(&ttimer->tasklet);
-                tasklet_hi_schedule(&ttimer->tasklet);
+        return HRTIMER_NORESTART;
-                return HRTIMER_NORESTART;
-        }
-        return ttimer->function(timer);
 }
 /*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
+void touch_softlockup_watchdog_sync(void)
+{
+        __raw_get_cpu_var(softlock_touch_sync) = true;
+        __raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
 void touch_all_softlockup_watchdogs(void)
 {
        int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
        }
        if (touch_ts == 0) {
+                if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+                        /*
+                         * If the time stamp was touched atomically
+                         * make sure the scheduler tick is up to date.
+                         */
+                        per_cpu(softlock_touch_sync, this_cpu) = false;
+                        sched_clock_tick();
+                }
                __touch_softlockup_watchdog();
                return;
        }
diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73a6b85..18bde979f346 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
        if (which > PRIO_USER || which < PRIO_PROCESS)
                return -EINVAL;
+        rcu_read_lock();
        read_lock(&tasklist_lock);
        switch (which) {
                case PRIO_PROCESS:
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
        }
 out_unlock:
        read_unlock(&tasklist_lock);
+        rcu_read_unlock();
        return retval;
 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e85c23404d34..13700833c181 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void)
 {
        unsigned long flags;
-        spin_lock_irqsave(&watchdog_lock, flags);
+        /*
+         * We use trylock here to avoid a potential dead lock when
+         * kgdb calls this code after the kernel has been stopped with
+         * watchdog_lock held. When watchdog_lock is held we just
+         * return and accept, that the watchdog might trigger and mark
+         * the monitored clock source (usually TSC) unstable.
+         *
+         * This does not affect the other caller clocksource_resume()
+         * because at this point the kernel is UP, interrupts are
+         * disabled and nothing can hold watchdog_lock.
+         */
+        if (!spin_trylock_irqsave(&watchdog_lock, flags))
+                return;
        clocksource_reset_watchdog();
        spin_unlock_irqrestore(&watchdog_lock, flags);
 }
@@ -458,8 +470,8 @@ void clocksource_resume(void)
 * clocksource_touch_watchdog - Update watchdog
 *
 * Update the watchdog after exception contexts such as kgdb so as not
- * to incorrectly trip the watchdog.
+ * to incorrectly trip the watchdog. This might fail when the kernel
- *
+ * was stopped in code which holds watchdog_lock.
 */
 void clocksource_touch_watchdog(void)
 {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7faaa32fbf4f..e2ab064c6d41 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts)
        set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
 }
+EXPORT_SYMBOL_GPL(getboottime);
 /**
 * monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts)
 {
        *ts = timespec_add_safe(*ts, total_sleep_time);
 }
+EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 unsigned long get_seconds(void)
 {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..60e2ce0181ee 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FP_TEST
        bool
        help
-         An arch may pass in a unique value (frame pointer) to both the
+          See Documentation/trace/ftrace-design.txt
-         entering and exiting of a function. On exit, the value is compared
-         and if it does not match, then it will panic the kernel.
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
        bool
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
        struct ring_buffer_per_cpu      *cpu_buffer;
        unsigned long                   head;
        struct buffer_page              *head_page;
+        struct buffer_page              *cache_reader_page;
+        unsigned long                   cache_read;
        u64                             read_stamp;
 };
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
                iter->read_stamp = cpu_buffer->read_stamp;
        else
                iter->read_stamp = iter->head_page->page->time_stamp;
+        iter->cache_reader_page = cpu_buffer->reader_page;
+        iter->cache_read = cpu_buffer->read;
 }
 /**
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        struct ring_buffer_event *event;
        int nr_loops = 0;
-        if (ring_buffer_iter_empty(iter))
-                return NULL;
        cpu_buffer = iter->cpu_buffer;
        buffer = cpu_buffer->buffer;
+        /*
+         * Check if someone performed a consuming read to
+         * the buffer. A consuming read invalidates the iterator
+         * and we need to reset the iterator in this case.
+         */
+        if (unlikely(iter->cache_read != cpu_buffer->read ||
+                     iter->cache_reader_page != cpu_buffer->reader_page))
+                rb_iter_reset(iter);
 again:
+        if (ring_buffer_iter_empty(iter))
+                return NULL;
        /*
         * We repeat when a timestamp is encountered.
         * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        if (rb_per_cpu_empty(cpu_buffer))
                return NULL;
+        if (iter->head >= local_read(&iter->head_page->page->commit)) {
+                rb_inc_iter(iter);
+                goto again;
+        }
        event = rb_iter_head_event(iter);
        switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..eac6875cb990 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[])
                return;
        }
+        if (WARN_ON_ONCE(pid < 0)) {
+                strcpy(comm, "<XXX>");
+                return;
+        }
        if (pid > PID_MAX_DEFAULT) {
                strcpy(comm, "<...>");
                return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6178abf3637e..356c10227c98 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -673,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
                        return -EINVAL;
                }
                /* an address specified */
-                ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+                ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
                if (ret) {
                        pr_info("Failed to parse address.\n");
                        return ret;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
        unsigned long val, flags;
        char buf[64];
        int ret;
+        int cpu;
        if (count >= sizeof(buf))
                return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
                return ret;
        local_irq_save(flags);
+        /*
+         * In case we trace inside arch_spin_lock() or after (NMI),
+         * we will cause circular lock, so we also need to increase
+         * the percpu trace_active here.
+         */
+        cpu = smp_processor_id();
+        per_cpu(trace_active, cpu)++;
        arch_spin_lock(&max_stack_lock);
        *ptr = val;
        arch_spin_unlock(&max_stack_lock);
+        per_cpu(trace_active, cpu)--;
        local_irq_restore(flags);
        return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+        int cpu;
        local_irq_disable();
+        cpu = smp_processor_id();
+        per_cpu(trace_active, cpu)++;
        arch_spin_lock(&max_stack_lock);
        if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void t_stop(struct seq_file *m, void *p)
 {
+        int cpu;
        arch_spin_unlock(&max_stack_lock);
+        cpu = smp_processor_id();
+        per_cpu(trace_active, cpu)--;
        local_irq_enable();
 }
author	Frederic Weisbecker <fweisbec@gmail.com>	2010-02-27 10:18:46 -0500
committer	Frederic Weisbecker <fweisbec@gmail.com>	2010-02-27 10:18:46 -0500
commit	018cbffe6819f6f8db20a0a3acd9bab9bfd667e4 (patch)
tree	fadde2521591998dc653fa094c636e8a547e620d /kernel
parent	1dd2980d990068e20045b90c424518cc7f3657ff (diff)
parent	60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)