Merge branch 'linus' into tracing/urgent

Merge reason: Merge up to almost-rc6 to pick up latest perfcounters (on which we'll queue up a dependent fix) Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2009-08-09 06:46:45 -0400
committer: Ingo Molnar <mingo@elte.hu> 2009-08-09 06:46:49 -0400
commit: e3560336be655c6791316482fe288b119f34c427 (patch)
tree: 43ca9a6b489aaa3918b773f78a7eda37458ef0a8 /kernel
parent: 26528e773ecc74fb1b61b7275f86f761cbb340ec (diff)
parent: 7b2aa037e878c939676675969983284a02958ae3 (diff)
12 files changed, 254 insertions, 116 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3737a682cdf5..b6eadfe30e7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -47,6 +47,7 @@
 #include <linux/hash.h>
 #include <linux/namei.h>
 #include <linux/smp_lock.h>
+#include <linux/pid_namespace.h>
 #include <asm/atomic.h>
@@ -734,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 * reference to css->refcnt. In general, this refcnt is expected to goes down
 * to zero, soon.
 *
- * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
 */
 DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 {
-        if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+        if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
                wake_up_all(&cgroup_rmdir_waitq);
 }
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+        css_get(css);
+}
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+        cgroup_wakeup_rmdir_waiter(css->cgroup);
+        css_put(css);
+}
 static int rebind_subsystems(struct cgroupfs_root *root,
                              unsigned long final_bits)
 {
@@ -960,6 +973,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_LIST_HEAD(&cgrp->children);
        INIT_LIST_HEAD(&cgrp->css_sets);
        INIT_LIST_HEAD(&cgrp->release_list);
+        INIT_LIST_HEAD(&cgrp->pids_list);
        init_rwsem(&cgrp->pids_mutex);
 }
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1357,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
         * wake up rmdir() waiter. the rmdir should fail since the cgroup
         * is no longer empty.
         */
-        cgroup_wakeup_rmdir_waiters(cgrp);
+        cgroup_wakeup_rmdir_waiter(cgrp);
        return 0;
 }
@@ -2201,12 +2215,30 @@ err:
        return ret;
 }
+/*
+ * Cache pids for all threads in the same pid namespace that are
+ * opening the same "tasks" file.
+ */
+struct cgroup_pids {
+        /* The node in cgrp->pids_list */
+        struct list_head list;
+        /* The cgroup those pids belong to */
+        struct cgroup *cgrp;
+        /* The namepsace those pids belong to */
+        struct pid_namespace *ns;
+        /* Array of process ids in the cgroup */
+        pid_t *tasks_pids;
+        /* How many files are using the this tasks_pids array */
+        int use_count;
+        /* Length of the current tasks_pids array */
+        int length;
+};
 static int cmppid(const void *a, const void *b)
 {
        return *(pid_t *)a - *(pid_t *)b;
 }
 /*
 * seq_file methods for the "tasks" file. The seq_file position is the
 * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2221,45 +2253,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
         * after a seek to the start). Use a binary-search to find the
         * next pid to display, if any
         */
-        struct cgroup *cgrp = s->private;
+        struct cgroup_pids *cp = s->private;
+        struct cgroup *cgrp = cp->cgrp;
        int index = 0, pid = *pos;
        int *iter;
        down_read(&cgrp->pids_mutex);
        if (pid) {
-                int end = cgrp->pids_length;
+                int end = cp->length;
                while (index < end) {
                        int mid = (index + end) / 2;
-                        if (cgrp->tasks_pids[mid] == pid) {
+                        if (cp->tasks_pids[mid] == pid) {
                                index = mid;
                                break;
-                        } else if (cgrp->tasks_pids[mid] <= pid)
+                        } else if (cp->tasks_pids[mid] <= pid)
                                index = mid + 1;
                        else
                                end = mid;
                }
        }
        /* If we're off the end of the array, we're done */
-        if (index >= cgrp->pids_length)
+        if (index >= cp->length)
                return NULL;
        /* Update the abstract position to be the actual pid that we found */
-        iter = cgrp->tasks_pids + index;
+        iter = cp->tasks_pids + index;
        *pos = *iter;
        return iter;
 }
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
 {
-        struct cgroup *cgrp = s->private;
+        struct cgroup_pids *cp = s->private;
+        struct cgroup *cgrp = cp->cgrp;
        up_read(&cgrp->pids_mutex);
 }
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
-        struct cgroup *cgrp = s->private;
+        struct cgroup_pids *cp = s->private;
        int *p = v;
-        int *end = cgrp->tasks_pids + cgrp->pids_length;
+        int *end = cp->tasks_pids + cp->length;
        /*
         * Advance to the next pid in the array. If this goes off the
@@ -2286,26 +2320,33 @@ static struct seq_operations cgroup_tasks_seq_operations = {
        .show = cgroup_tasks_show,
 };
-static void release_cgroup_pid_array(struct cgroup *cgrp)
+static void release_cgroup_pid_array(struct cgroup_pids *cp)
 {
+        struct cgroup *cgrp = cp->cgrp;
        down_write(&cgrp->pids_mutex);
-        BUG_ON(!cgrp->pids_use_count);
+        BUG_ON(!cp->use_count);
-        if (!--cgrp->pids_use_count) {
+        if (!--cp->use_count) {
-                kfree(cgrp->tasks_pids);
+                list_del(&cp->list);
-                cgrp->tasks_pids = NULL;
+                put_pid_ns(cp->ns);
-                cgrp->pids_length = 0;
+                kfree(cp->tasks_pids);
+                kfree(cp);
        }
        up_write(&cgrp->pids_mutex);
 }
 static int cgroup_tasks_release(struct inode *inode, struct file *file)
 {
-        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+        struct seq_file *seq;
+        struct cgroup_pids *cp;
        if (!(file->f_mode & FMODE_READ))
                return 0;
-        release_cgroup_pid_array(cgrp);
+        seq = file->private_data;
+        cp = seq->private;
+        release_cgroup_pid_array(cp);
        return seq_release(inode, file);
 }
@@ -2324,6 +2365,8 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+        struct pid_namespace *ns = current->nsproxy->pid_ns;
+        struct cgroup_pids *cp;
        pid_t *pidarray;
        int npids;
        int retval;
@@ -2350,20 +2393,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
         * array if necessary
         */
        down_write(&cgrp->pids_mutex);
-        kfree(cgrp->tasks_pids);
-        cgrp->tasks_pids = pidarray;
+        list_for_each_entry(cp, &cgrp->pids_list, list) {
-        cgrp->pids_length = npids;
+                if (ns == cp->ns)
-        cgrp->pids_use_count++;
+                        goto found;
+        }
+        cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+        if (!cp) {
+                up_write(&cgrp->pids_mutex);
+                kfree(pidarray);
+                return -ENOMEM;
+        }
+        cp->cgrp = cgrp;
+        cp->ns = ns;
+        get_pid_ns(ns);
+        list_add(&cp->list, &cgrp->pids_list);
+found:
+        kfree(cp->tasks_pids);
+        cp->tasks_pids = pidarray;
+        cp->length = npids;
+        cp->use_count++;
        up_write(&cgrp->pids_mutex);
        file->f_op = &cgroup_tasks_operations;
        retval = seq_open(file, &cgroup_tasks_seq_operations);
        if (retval) {
-                release_cgroup_pid_array(cgrp);
+                release_cgroup_pid_array(cp);
                return retval;
        }
-        ((struct seq_file *)file->private_data)->private = cgrp;
+        ((struct seq_file *)file->private_data)->private = cp;
        return 0;
 }
@@ -2696,33 +2756,42 @@ again:
        mutex_unlock(&cgroup_mutex);
        /*
+         * In general, subsystem has no css->refcnt after pre_destroy(). But
+         * in racy cases, subsystem may have to get css->refcnt after
+         * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
+         * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
+         * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
+         * and subsystem's reference count handling. Please see css_get/put
+         * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
+         */
+        set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+        /*
         * Call pre_destroy handlers of subsys. Notify subsystems
         * that rmdir() request comes.
         */
        ret = cgroup_call_pre_destroy(cgrp);
-        if (ret)
+        if (ret) {
+                clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
                return ret;
+        }
        mutex_lock(&cgroup_mutex);
        parent = cgrp->parent;
        if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+                clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
-        /*
-         * css_put/get is provided for subsys to grab refcnt to css. In typical
-         * case, subsystem has no reference after pre_destroy(). But, under
-         * hierarchy management, some *temporal* refcnt can be hold.
-         * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
-         * is really busy, it should return -EBUSY at pre_destroy(). wake_up
-         * is called when css_put() is called and refcnt goes down to 0.
-         */
-        set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
        prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
        if (!cgroup_clear_css_refs(cgrp)) {
                mutex_unlock(&cgroup_mutex);
-                schedule();
+                /*
+                 * Because someone may call cgroup_wakeup_rmdir_waiter() before
+                 * prepare_to_wait(), we need to check this flag.
+                 */
+                if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
+                        schedule();
                finish_wait(&cgroup_rmdir_waitq, &wait);
                clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
                if (signal_pending(current))
@@ -3294,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
                        set_bit(CGRP_RELEASABLE, &cgrp->flags);
                        check_for_release(cgrp);
                }
-                cgroup_wakeup_rmdir_waiters(cgrp);
+                cgroup_wakeup_rmdir_waiter(cgrp);
        }
        rcu_read_unlock();
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 9b42695f0d14..021e1138556e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -426,6 +426,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
        init_rwsem(&mm->mmap_sem);
        INIT_LIST_HEAD(&mm->mmlist);
        mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
+        mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
        mm->core_state = NULL;
        mm->nr_ptes = 0;
        set_mm_counter(mm, file_rss, 0);
@@ -567,18 +568,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
         * the value intact in a core dump, and to save the unnecessary
         * trouble otherwise.  Userland only wants this done for a sys_exit.
         */
-        if (tsk->clear_child_tid
+        if (tsk->clear_child_tid) {
-            && !(tsk->flags & PF_SIGNALED)
+                if (!(tsk->flags & PF_SIGNALED) &&
-            && atomic_read(&mm->mm_users) > 1) {
+                    atomic_read(&mm->mm_users) > 1) {
-                u32 __user * tidptr = tsk->clear_child_tid;
+                        /*
+                         * We don't check the error code - if userspace has
+                         * not set up a proper pointer then tough luck.
+                         */
+                        put_user(0, tsk->clear_child_tid);
+                        sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
+                                        1, NULL, NULL, 0);
+                }
                tsk->clear_child_tid = NULL;
-                /*
-                 * We don't check the error code - if userspace has
-                 * not set up a proper pointer then tough luck.
-                 */
-                put_user(0, tidptr);
-                sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
        }
 }
@@ -1268,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
        cgroup_post_fork(p);
+        perf_counter_fork(p);
        return p;
 bad_fork_free_pid:
@@ -1409,9 +1411,6 @@ long do_fork(unsigned long clone_flags,
                        init_completion(&vfork);
                }
-                if (!(clone_flags & CLONE_THREAD))
-                        perf_counter_fork(p);
                audit_finish_fork(p);
                tracehook_report_clone(regs, clone_flags, nr, p);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ae1c35201cc8..f336e2107f98 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1228,7 +1228,7 @@ static int __init parse_crashkernel_mem(char 			*cmdline,
        } while (*cur++ == ',');
        if (*crash_size > 0) {
-                while (*cur != ' ' && *cur != '@')
+                while (*cur && *cur != ' ' && *cur != '@')
                        cur++;
                if (*cur == '@') {
                        cur++;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 16b5739c516a..0540948e29ab 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -694,7 +694,7 @@ int __kprobes register_kprobe(struct kprobe *p)
        p->addr = addr;
        preempt_disable();
-        if (!__kernel_text_address((unsigned long) p->addr) ||
+        if (!kernel_text_address((unsigned long) p->addr) ||
            in_kprobes_functions((unsigned long) p->addr)) {
                preempt_enable();
                return -EINVAL;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 950931041954..673c1aaf7332 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
 static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
+static atomic_t nr_task_counters __read_mostly;
 /*
 * perf counter paranoia level:
@@ -1103,7 +1104,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
                __perf_counter_sync_stat(counter, next_counter);
                counter = list_next_entry(counter, event_entry);
-                next_counter = list_next_entry(counter, event_entry);
+                next_counter = list_next_entry(next_counter, event_entry);
        }
 }
@@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter)
                        atomic_dec(&nr_mmap_counters);
                if (counter->attr.comm)
                        atomic_dec(&nr_comm_counters);
+                if (counter->attr.task)
+                        atomic_dec(&nr_task_counters);
        }
        if (counter->destroy)
@@ -1688,6 +1691,18 @@ static int perf_release(struct inode *inode, struct file *file)
        return 0;
 }
+static u64 perf_counter_read_tree(struct perf_counter *counter)
+{
+        struct perf_counter *child;
+        u64 total = 0;
+        total += perf_counter_read(counter);
+        list_for_each_entry(child, &counter->child_list, child_list)
+                total += perf_counter_read(child);
+        return total;
+}
 /*
 * Read the performance counter - simple non blocking version for now
 */
@@ -1707,7 +1722,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
        WARN_ON_ONCE(counter->ctx->parent_ctx);
        mutex_lock(&counter->child_mutex);
-        values[0] = perf_counter_read(counter);
+        values[0] = perf_counter_read_tree(counter);
        n = 1;
        if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                values[n++] = counter->total_time_enabled +
@@ -2819,10 +2834,12 @@ perf_counter_read_event(struct perf_counter *counter,
 }
 /*
- * fork tracking
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
 */
-struct perf_fork_event {
+struct perf_task_event {
        struct task_struct      *task;
        struct {
@@ -2830,37 +2847,42 @@ struct perf_fork_event {
                u32                             pid;
                u32                             ppid;
+                u32                             tid;
+                u32                             ptid;
        } event;
 };
-static void perf_counter_fork_output(struct perf_counter *counter,
+static void perf_counter_task_output(struct perf_counter *counter,
-                                     struct perf_fork_event *fork_event)
+                                     struct perf_task_event *task_event)
 {
        struct perf_output_handle handle;
-        int size = fork_event->event.header.size;
+        int size = task_event->event.header.size;
-        struct task_struct *task = fork_event->task;
+        struct task_struct *task = task_event->task;
        int ret = perf_output_begin(&handle, counter, size, 0, 0);
        if (ret)
                return;
-        fork_event->event.pid = perf_counter_pid(counter, task);
+        task_event->event.pid = perf_counter_pid(counter, task);
-        fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+        task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
-        perf_output_put(&handle, fork_event->event);
+        task_event->event.tid = perf_counter_tid(counter, task);
+        task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+        perf_output_put(&handle, task_event->event);
        perf_output_end(&handle);
 }
-static int perf_counter_fork_match(struct perf_counter *counter)
+static int perf_counter_task_match(struct perf_counter *counter)
 {
-        if (counter->attr.comm || counter->attr.mmap)
+        if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
                return 1;
        return 0;
 }
-static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
+static void perf_counter_task_ctx(struct perf_counter_context *ctx,
-                                  struct perf_fork_event *fork_event)
+                                  struct perf_task_event *task_event)
 {
        struct perf_counter *counter;
@@ -2869,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
        rcu_read_lock();
        list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-                if (perf_counter_fork_match(counter))
+                if (perf_counter_task_match(counter))
-                        perf_counter_fork_output(counter, fork_event);
+                        perf_counter_task_output(counter, task_event);
        }
        rcu_read_unlock();
 }
-static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+static void perf_counter_task_event(struct perf_task_event *task_event)
 {
        struct perf_cpu_context *cpuctx;
        struct perf_counter_context *ctx;
        cpuctx = &get_cpu_var(perf_cpu_context);
-        perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+        perf_counter_task_ctx(&cpuctx->ctx, task_event);
        put_cpu_var(perf_cpu_context);
        rcu_read_lock();
@@ -2891,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event)
         */
        ctx = rcu_dereference(current->perf_counter_ctxp);
        if (ctx)
-                perf_counter_fork_ctx(ctx, fork_event);
+                perf_counter_task_ctx(ctx, task_event);
        rcu_read_unlock();
 }
-void perf_counter_fork(struct task_struct *task)
+static void perf_counter_task(struct task_struct *task, int new)
 {
-        struct perf_fork_event fork_event;
+        struct perf_task_event task_event;
        if (!atomic_read(&nr_comm_counters) &&
-            !atomic_read(&nr_mmap_counters))
+            !atomic_read(&nr_mmap_counters) &&
+            !atomic_read(&nr_task_counters))
                return;
-        fork_event = (struct perf_fork_event){
+        task_event = (struct perf_task_event){
                .task   = task,
                .event  = {
                        .header = {
-                                .type = PERF_EVENT_FORK,
+                                .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
                                .misc = 0,
-                                .size = sizeof(fork_event.event),
+                                .size = sizeof(task_event.event),
                        },
                        /* .pid  */
                        /* .ppid */
+                        /* .tid  */
+                        /* .ptid */
                },
        };
-        perf_counter_fork_event(&fork_event);
+        perf_counter_task_event(&task_event);
+}
+void perf_counter_fork(struct task_struct *task)
+{
+        perf_counter_task(task, 1);
 }
 /*
@@ -3875,6 +3905,8 @@ done:
                        atomic_inc(&nr_mmap_counters);
                if (counter->attr.comm)
                        atomic_inc(&nr_comm_counters);
+                if (counter->attr.task)
+                        atomic_inc(&nr_task_counters);
        }
        return counter;
@@ -4236,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child)
        struct perf_counter_context *child_ctx;
        unsigned long flags;
-        if (likely(!child->perf_counter_ctxp))
+        if (likely(!child->perf_counter_ctxp)) {
+                perf_counter_task(child, 0);
                return;
+        }
        local_irq_save(flags);
        /*
@@ -4255,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child)
         * incremented the context's refcount before we do put_ctx below.
         */
        spin_lock(&child_ctx->lock);
-        child->perf_counter_ctxp = NULL;
        /*
         * If this context is a clone; unclone it so it can't get
         * swapped to another process while we're removing all
         * the counters from it.
         */
        unclone_ctx(child_ctx);
-        spin_unlock(&child_ctx->lock);
+        spin_unlock_irqrestore(&child_ctx->lock, flags);
-        local_irq_restore(flags);
+        /*
+         * Report the task dead after unscheduling the counters so that we
+         * won't get any samples after PERF_EVENT_EXIT. We can however still
+         * get a few PERF_EVENT_READ events.
+         */
+        perf_counter_task(child, 0);
+        child->perf_counter_ctxp = NULL;
        /*
         * We can recurse on the same lock type through:
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 052ec4d195c7..d089d052c4a9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -202,6 +202,12 @@ static int no_timer_create(struct k_itimer *new_timer)
        return -EOPNOTSUPP;
 }
+static int no_nsleep(const clockid_t which_clock, int flags,
+                     struct timespec *tsave, struct timespec __user *rmtp)
+{
+        return -EOPNOTSUPP;
+}
 /*
 * Return nonzero if we know a priori this clockid_t value is bogus.
 */
@@ -254,6 +260,7 @@ static __init int init_posix_timers(void)
                .clock_get = posix_get_monotonic_raw,
                .clock_set = do_posix_clock_nosettime,
                .timer_create = no_timer_create,
+                .nsleep = no_nsleep,
        };
        register_posix_clock(CLOCK_REALTIME, &clock_realtime);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index e6c251790dde..d014efbf947a 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -81,8 +81,21 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
                if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
                        continue;
-                if (lowest_mask)
+                if (lowest_mask) {
                        cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+                        /*
+                         * We have to ensure that we have at least one bit
+                         * still set in the array, since the map could have
+                         * been concurrently emptied between the first and
+                         * second reads of vec->mask.  If we hit this
+                         * condition, simply act as though we never hit this
+                         * priority level and continue on.
+                         */
+                        if (cpumask_any(lowest_mask) >= nr_cpu_ids)
+                                continue;
+                }
                return 1;
        }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9ffb2b2ceba4..652e8bdef9aa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -611,9 +611,13 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
+        struct task_struct *tsk = NULL;
+        if (entity_is_task(se))
+                tsk = task_of(se);
        if (se->sleep_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
-                struct task_struct *tsk = task_of(se);
                if ((s64)delta < 0)
                        delta = 0;
@@ -624,11 +628,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                se->sleep_start = 0;
                se->sum_sleep_runtime += delta;
-                account_scheduler_latency(tsk, delta >> 10, 1);
+                if (tsk)
+                        account_scheduler_latency(tsk, delta >> 10, 1);
        }
        if (se->block_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->block_start;
-                struct task_struct *tsk = task_of(se);
                if ((s64)delta < 0)
                        delta = 0;
@@ -639,17 +643,19 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                se->block_start = 0;
                se->sum_sleep_runtime += delta;
-                /*
+                if (tsk) {
-                 * Blocking time is in units of nanosecs, so shift by 20 to
+                        /*
-                 * get a milliseconds-range estimation of the amount of
+                         * Blocking time is in units of nanosecs, so shift by
-                 * time that the task spent sleeping:
+                         * 20 to get a milliseconds-range estimation of the
-                 */
+                         * amount of time that the task spent sleeping:
-                if (unlikely(prof_on == SLEEP_PROFILING)) {
+                         */
+                        if (unlikely(prof_on == SLEEP_PROFILING)) {
-                        profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
+                                profile_hits(SLEEP_PROFILING,
-                                     delta >> 20);
+                                                (void *)get_wchan(tsk),
+                                                delta >> 20);
+                        }
+                        account_scheduler_latency(tsk, delta >> 10, 0);
                }
-                account_scheduler_latency(tsk, delta >> 10, 0);
        }
 #endif
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index ccf1ceedaebe..64c5deeaca5d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2454,11 +2454,9 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
        stack_t oss;
        int error;
-        if (uoss) {
+        oss.ss_sp = (void __user *) current->sas_ss_sp;
-                oss.ss_sp = (void __user *) current->sas_ss_sp;
+        oss.ss_size = current->sas_ss_size;
-                oss.ss_size = current->sas_ss_size;
+        oss.ss_flags = sas_ss_flags(sp);
-                oss.ss_flags = sas_ss_flags(sp);
-        }
        if (uss) {
                void __user *ss_sp;
@@ -2466,10 +2464,12 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
                int ss_flags;
                error = -EFAULT;
-                if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
+                if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
-                    || __get_user(ss_sp, &uss->ss_sp)
+                        goto out;
-                    || __get_user(ss_flags, &uss->ss_flags)
+                error = __get_user(ss_sp, &uss->ss_sp) |
-                    || __get_user(ss_size, &uss->ss_size))
+                        __get_user(ss_flags, &uss->ss_flags) |
+                        __get_user(ss_size, &uss->ss_size);
+                if (error)
                        goto out;
                error = -EPERM;
@@ -2501,13 +2501,16 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
                current->sas_ss_size = ss_size;
        }
+        error = 0;
        if (uoss) {
                error = -EFAULT;
-                if (copy_to_user(uoss, &oss, sizeof(oss)))
+                if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
                        goto out;
+                error = __put_user(oss.ss_sp, &uoss->ss_sp) |
+                        __put_user(oss.ss_size, &uoss->ss_size) |
+                        __put_user(oss.ss_flags, &uoss->ss_flags);
        }
-        error = 0;
 out:
        return error;
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index ad63d8501207..94188b8ecc33 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -57,7 +57,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
                        return NOTIFY_BAD;
                break;
-#ifdef CONFIG_CPU_HOTPLUG
+#ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895afecfe..11ba5bb4ed0a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
        mutex_lock(&event_mutex);
        list_for_each_entry(event, &ftrace_events, list) {
-                if (event->id == event_id) {
+                if (event->id == event_id && event->profile_enable) {
                        ret = event->profile_enable(event);
                        break;
                }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 23d2972b22d6..e75276a49cf5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                entry = trace_create_file("enable", 0644, call->dir, call,
                                          enable);
-        if (call->id)
+        if (call->id && call->profile_enable)
                entry = trace_create_file("id", 0444, call->dir, call,
                                          id);
author	Ingo Molnar <mingo@elte.hu>	2009-08-09 06:46:45 -0400
committer	Ingo Molnar <mingo@elte.hu>	2009-08-09 06:46:49 -0400
commit	e3560336be655c6791316482fe288b119f34c427 (patch)
tree	43ca9a6b489aaa3918b773f78a7eda37458ef0a8 /kernel
parent	26528e773ecc74fb1b61b7275f86f761cbb340ec (diff)
parent	7b2aa037e878c939676675969983284a02958ae3 (diff)