Merge branch 'sched/urgent' into sched/core to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2016-05-12 03:18:13 -0400
committer: Ingo Molnar <mingo@kernel.org> 2016-05-12 03:18:13 -0400
commit: eb60b3e5e8dfdd590e586a6fc22daf2f63a7b7e6 (patch)
tree: 1b06e2c1beca8f970685eb13096c7a12480526c6 /kernel
parent: 58fe9c4621b7219e724c0b7af053112f974a08c3 (diff)
parent: 53d3bc773eaa7ab1cf63585e76af7ee869d5e709 (diff)
15 files changed, 212 insertions, 82 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index f2ece3c174a5..8f94ca1860cf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 {
        switch (type) {
        case BPF_TYPE_PROG:
-                atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+                raw = bpf_prog_inc(raw);
                break;
        case BPF_TYPE_MAP:
-                bpf_map_inc(raw, true);
+                raw = bpf_map_inc(raw, true);
                break;
        default:
                WARN_ON_ONCE(1);
@@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
                goto out;
        raw = bpf_any_get(inode->i_private, *type);
-        touch_atime(&path);
+        if (!IS_ERR(raw))
+                touch_atime(&path);
        path_put(&path);
        return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc5e4bd74f8..cf5e9f7ad13a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
        return f.file->private_data;
 }
-void bpf_map_inc(struct bpf_map *map, bool uref)
+/* prog's and map's refcnt limit */
+#define BPF_MAX_REFCNT 32768
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 {
-        atomic_inc(&map->refcnt);
+        if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
+                atomic_dec(&map->refcnt);
+                return ERR_PTR(-EBUSY);
+        }
        if (uref)
                atomic_inc(&map->usercnt);
+        return map;
 }
 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
        if (IS_ERR(map))
                return map;
-        bpf_map_inc(map, true);
+        map = bpf_map_inc(map, true);
        fdput(f);
        return map;
@@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
        return f.file->private_data;
 }
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+        if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
+                atomic_dec(&prog->aux->refcnt);
+                return ERR_PTR(-EBUSY);
+        }
+        return prog;
+}
 /* called by sockets/tracing/seccomp before attaching program to an event
 * pairs with bpf_prog_put()
 */
@@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
        if (IS_ERR(prog))
                return prog;
-        atomic_inc(&prog->aux->refcnt);
+        prog = bpf_prog_inc(prog);
        fdput(f);
        return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 618ef77c302a..c5c17a62f509 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,16 +239,6 @@ static const char * const reg_type_str[] = {
        [CONST_IMM]             = "imm",
 };
-static const struct {
-        int map_type;
-        int func_id;
-} func_limit[] = {
-        {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
-        {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
-        {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
-        {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
-};
 static void print_verifier_state(struct verifier_env *env)
 {
        enum bpf_reg_type t;
@@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 {
-        bool bool_map, bool_func;
-        int i;
        if (!map)
                return 0;
-        for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
+        /* We need a two way check, first is from map perspective ... */
-                bool_map = (map->map_type == func_limit[i].map_type);
+        switch (map->map_type) {
-                bool_func = (func_id == func_limit[i].func_id);
+        case BPF_MAP_TYPE_PROG_ARRAY:
-                /* only when map & func pair match it can continue.
+                if (func_id != BPF_FUNC_tail_call)
-                 * don't allow any other map type to be passed into
+                        goto error;
-                 * the special func;
+                break;
-                 */
+        case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
-                if (bool_func && bool_map != bool_func) {
+                if (func_id != BPF_FUNC_perf_event_read &&
-                        verbose("cannot pass map_type %d into func %d\n",
+                    func_id != BPF_FUNC_perf_event_output)
-                                map->map_type, func_id);
+                        goto error;
-                        return -EINVAL;
+                break;
-                }
+        case BPF_MAP_TYPE_STACK_TRACE:
+                if (func_id != BPF_FUNC_get_stackid)
+                        goto error;
+                break;
+        default:
+                break;
+        }
+        /* ... and second from the function itself. */
+        switch (func_id) {
+        case BPF_FUNC_tail_call:
+                if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+                        goto error;
+                break;
+        case BPF_FUNC_perf_event_read:
+        case BPF_FUNC_perf_event_output:
+                if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
+                        goto error;
+                break;
+        case BPF_FUNC_get_stackid:
+                if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
+                        goto error;
+                break;
+        default:
+                break;
        }
        return 0;
+error:
+        verbose("cannot pass map_type %d into func %d\n",
+                map->map_type, func_id);
+        return -EINVAL;
 }
 static int check_call(struct verifier_env *env, int func_id)
@@ -2030,7 +2045,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
                        if (IS_ERR(map)) {
                                verbose("fd %d is not pointing to valid bpf_map\n",
                                        insn->imm);
-                                fdput(f);
                                return PTR_ERR(map);
                        }
@@ -2050,15 +2064,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
                                return -E2BIG;
                        }
-                        /* remember this map */
-                        env->used_maps[env->used_map_cnt++] = map;
                        /* hold the map. If the program is rejected by verifier,
                         * the map will be released by release_maps() or it
                         * will be used by the valid program until it's unloaded
                         * and all maps are released in free_bpf_prog_info()
                         */
-                        bpf_map_inc(map, false);
+                        map = bpf_map_inc(map, false);
+                        if (IS_ERR(map)) {
+                                fdput(f);
+                                return PTR_ERR(map);
+                        }
+                        env->used_maps[env->used_map_cnt++] = map;
                        fdput(f);
 next_insn:
                        insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05c0b0f..909a7d31ffd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
                                    size_t nbytes, loff_t off, bool threadgroup)
 {
        struct task_struct *tsk;
+        struct cgroup_subsys *ss;
        struct cgroup *cgrp;
        pid_t pid;
-        int ret;
+        int ssid, ret;
        if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
                return -EINVAL;
@@ -2875,8 +2876,10 @@ out_unlock_rcu:
        rcu_read_unlock();
 out_unlock_threadgroup:
        percpu_up_write(&cgroup_threadgroup_rwsem);
+        for_each_subsys(ss, ssid)
+                if (ss->post_attach)
+                        ss->post_attach();
        cgroup_kn_unlock(of->kn);
-        cpuset_post_attach_flush();
        return ret ?: nbytes;
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00ab5c2b7c5b..1902956baba1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -58,7 +58,6 @@
 #include <asm/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
 #include <linux/cgroup.h>
 #include <linux/wait.h>
@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
        }
 }
-void cpuset_post_attach_flush(void)
+static void cpuset_post_attach(void)
 {
        flush_workqueue(cpuset_migrate_mm_wq);
 }
@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
        .can_attach     = cpuset_can_attach,
        .cancel_attach  = cpuset_cancel_attach,
        .attach         = cpuset_attach,
+        .post_attach    = cpuset_post_attach,
        .bind           = cpuset_bind,
        .legacy_cftypes = files,
        .early_init     = true,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 52bedc5a5aaa..c0ded2416615 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -351,7 +351,7 @@ static struct srcu_struct pmus_srcu;
 *   1 - disallow cpu events for unpriv
 *   2 - disallow kernel profiling for unpriv
 */
-int sysctl_perf_event_paranoid __read_mostly = 1;
+int sysctl_perf_event_paranoid __read_mostly = 2;
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -412,7 +412,8 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
        if (ret || !write)
                return ret;
-        if (sysctl_perf_cpu_time_max_percent == 100) {
+        if (sysctl_perf_cpu_time_max_percent == 100 ||
+            sysctl_perf_cpu_time_max_percent == 0) {
                printk(KERN_WARNING
                       "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
                WRITE_ONCE(perf_sample_allowed_ns, 0);
@@ -1105,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
 * function.
 *
 * Lock order:
+ *    cred_guard_mutex
 *      task_struct::perf_event_mutex
 *        perf_event_context::mutex
 *          perf_event::child_mutex;
@@ -3420,7 +3422,6 @@ static struct task_struct *
 find_lively_task_by_vpid(pid_t vpid)
 {
        struct task_struct *task;
-        int err;
        rcu_read_lock();
        if (!vpid)
@@ -3434,16 +3435,7 @@ find_lively_task_by_vpid(pid_t vpid)
        if (!task)
                return ERR_PTR(-ESRCH);
-        /* Reuse ptrace permission checks for now. */
-        err = -EACCES;
-        if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
-                goto errout;
        return task;
-errout:
-        put_task_struct(task);
-        return ERR_PTR(err);
 }
 /*
@@ -8413,6 +8405,24 @@ SYSCALL_DEFINE5(perf_event_open,
        get_online_cpus();
+        if (task) {
+                err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+                if (err)
+                        goto err_cpus;
+                /*
+                 * Reuse ptrace permission checks for now.
+                 *
+                 * We must hold cred_guard_mutex across this and any potential
+                 * perf_install_in_context() call for this new event to
+                 * serialize against exec() altering our credentials (and the
+                 * perf_event_exit_task() that could imply).
+                 */
+                err = -EACCES;
+                if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+                        goto err_cred;
+        }
        if (flags & PERF_FLAG_PID_CGROUP)
                cgroup_fd = pid;
@@ -8420,7 +8430,7 @@ SYSCALL_DEFINE5(perf_event_open,
                                 NULL, NULL, cgroup_fd);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
-                goto err_cpus;
+                goto err_cred;
        }
        if (is_sampling_event(event)) {
@@ -8479,11 +8489,6 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_context;
        }
-        if (task) {
-                put_task_struct(task);
-                task = NULL;
-        }
        /*
         * Look up the group leader (we will attach this event to it):
         */
@@ -8581,6 +8586,11 @@ SYSCALL_DEFINE5(perf_event_open,
        WARN_ON_ONCE(ctx->parent_ctx);
+        /*
+         * This is the point on no return; we cannot fail hereafter. This is
+         * where we start modifying current state.
+         */
        if (move_group) {
                /*
                 * See perf_event_ctx_lock() for comments on the details
@@ -8652,6 +8662,11 @@ SYSCALL_DEFINE5(perf_event_open,
                mutex_unlock(&gctx->mutex);
        mutex_unlock(&ctx->mutex);
+        if (task) {
+                mutex_unlock(&task->signal->cred_guard_mutex);
+                put_task_struct(task);
+        }
        put_online_cpus();
        mutex_lock(&current->perf_event_mutex);
@@ -8684,6 +8699,9 @@ err_alloc:
         */
        if (!event_file)
                free_event(event);
+err_cred:
+        if (task)
+                mutex_unlock(&task->signal->cred_guard_mutex);
 err_cpus:
        put_online_cpus();
 err_task:
@@ -8968,6 +8986,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 /*
 * When a child task exits, feed back event values to parent events.
+ *
+ * Can be called with cred_guard_mutex held when called from
+ * install_exec_creds().
 */
 void perf_event_exit_task(struct task_struct *child)
 {
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3efbee0834a8..a02f2dddd1d7 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,5 +1,6 @@
 #define pr_fmt(fmt) "kcov: " fmt
+#define DISABLE_BRANCH_PROFILING
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/file.h>
@@ -43,7 +44,7 @@ struct kcov {
 * Entry point from instrumented code.
 * This is called once per basic-block/edge.
 */
-void __sanitizer_cov_trace_pc(void)
+void notrace __sanitizer_cov_trace_pc(void)
 {
        struct task_struct *t;
        enum kcov_mode mode;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8d34308ea449..1391d3ee3b86 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1415,6 +1415,9 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_OFFSET(page, lru);
        VMCOREINFO_OFFSET(page, _mapcount);
        VMCOREINFO_OFFSET(page, private);
+        VMCOREINFO_OFFSET(page, compound_dtor);
+        VMCOREINFO_OFFSET(page, compound_order);
+        VMCOREINFO_OFFSET(page, compound_head);
        VMCOREINFO_OFFSET(pglist_data, node_zones);
        VMCOREINFO_OFFSET(pglist_data, nr_zones);
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
@@ -1447,8 +1450,8 @@ static int __init crash_save_vmcoreinfo_init(void)
 #ifdef CONFIG_X86
        VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
 #endif
-#ifdef CONFIG_HUGETLBFS
+#ifdef CONFIG_HUGETLB_PAGE
-        VMCOREINFO_SYMBOL(free_huge_page);
+        VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
 #endif
        arch_crash_save_vmcoreinfo();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d7f94f4c811d..68bc6a654ca3 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2177,15 +2177,37 @@ cache_hit:
        chain->irq_context = hlock->irq_context;
        i = get_first_held_lock(curr, hlock);
        chain->depth = curr->lockdep_depth + 1 - i;
+        BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
+        BUILD_BUG_ON((1UL << 6)  <= ARRAY_SIZE(curr->held_locks));
+        BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
        if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
                chain->base = nr_chain_hlocks;
-                nr_chain_hlocks += chain->depth;
                for (j = 0; j < chain->depth - 1; j++, i++) {
                        int lock_id = curr->held_locks[i].class_idx - 1;
                        chain_hlocks[chain->base + j] = lock_id;
                }
                chain_hlocks[chain->base + j] = class - lock_classes;
        }
+        if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
+                nr_chain_hlocks += chain->depth;
+#ifdef CONFIG_DEBUG_LOCKDEP
+        /*
+         * Important for check_no_collision().
+         */
+        if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
+                if (debug_locks_off_graph_unlock())
+                        return 0;
+                print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
+                dump_stack();
+                return 0;
+        }
+#endif
        hlist_add_head_rcu(&chain->entry, hash_head);
        debug_atomic_inc(chain_lookup_misses);
        inc_chains();
@@ -2933,6 +2955,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
        return 1;
 }
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+        return 2 * !!task->hardirq_context + !!task->softirq_context;
+}
 static int separate_irq_context(struct task_struct *curr,
                struct held_lock *hlock)
 {
@@ -2941,8 +2968,6 @@ static int separate_irq_context(struct task_struct *curr,
        /*
         * Keep track of points where we cross into an interrupt context:
         */
-        hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
-                                curr->softirq_context;
        if (depth) {
                struct held_lock *prev_hlock;
@@ -2974,6 +2999,11 @@ static inline int mark_irqflags(struct task_struct *curr,
        return 1;
 }
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+        return 0;
+}
 static inline int separate_irq_context(struct task_struct *curr,
                struct held_lock *hlock)
 {
@@ -3242,6 +3272,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        hlock->acquire_ip = ip;
        hlock->instance = lock;
        hlock->nest_lock = nest_lock;
+        hlock->irq_context = task_irq_context(curr);
        hlock->trylock = trylock;
        hlock->read = read;
        hlock->check = check;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index dbb61a302548..a0f61effad25 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v)
        int i;
        if (v == SEQ_START_TOKEN) {
+                if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
+                        seq_printf(m, "(buggered) ");
                seq_printf(m, "all lock chains:\n");
                return 0;
        }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ba53a87bb978..0ac6c84f3371 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1395,6 +1395,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
                                     !cpumask_test_cpu(later_rq->cpu,
                                                       &task->cpus_allowed) ||
                                     task_running(rq, task) ||
+                                     !dl_task(task) ||
                                     !task_on_rq_queued(task))) {
                                double_unlock_balance(rq, later_rq);
                                later_rq = NULL;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51f7a4b62985..39fde3660f97 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3099,7 +3099,14 @@ static int idle_balance(struct rq *this_rq);
 #else /* CONFIG_SMP */
-static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
+static inline void update_load_avg(struct sched_entity *se, int not_used)
+{
+        struct cfs_rq *cfs_rq = cfs_rq_of(se);
+        struct rq *rq = rq_of(cfs_rq);
+        cpufreq_trigger_update(rq_clock(rq));
+}
 static inline void
 enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void
@@ -3250,25 +3257,17 @@ static inline void check_schedstat_required(void)
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-        bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
-        bool curr = cfs_rq->curr == se;
        /*
-         * If we're the current task, we must renormalise before calling
+         * Update the normalized vruntime before updating min_vruntime
-         * update_curr().
+         * through calling update_curr().
         */
-        if (renorm && curr)
+        if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
                se->vruntime += cfs_rq->min_vruntime;
-        update_curr(cfs_rq);
        /*
-         * Otherwise, renormalise after, such that we're placed at the current
+         * Update run-time statistics of the 'current'.
-         * moment in time, instead of some random moment in the past.
         */
-        if (renorm && !curr)
+        update_curr(cfs_rq);
-                se->vruntime += cfs_rq->min_vruntime;
        enqueue_entity_load_avg(cfs_rq, se);
        account_entity_enqueue(cfs_rq, se);
        update_cfs_shares(cfs_rq);
@@ -3284,7 +3283,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
                update_stats_enqueue(cfs_rq, se);
                check_spread(cfs_rq, se);
        }
-        if (!curr)
+        if (se != cfs_rq->curr)
                __enqueue_entity(cfs_rq, se);
        se->on_rq = 1;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 68deaf901a12..67afa06cc8bc 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1729,6 +1729,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
                                     !cpumask_test_cpu(lowest_rq->cpu,
                                                       tsk_cpus_allowed(task)) ||
                                     task_running(rq, task) ||
+                                     !rt_task(task) ||
                                     !task_on_rq_queued(task))) {
                                double_unlock_balance(rq, lowest_rq);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 05ddc0820771..6f965864cc02 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2095,8 +2095,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
        trace_create_file("filter", 0644, file->dir, file,
                          &ftrace_event_filter_fops);
-        trace_create_file("trigger", 0644, file->dir, file,
+        /*
-                          &event_trigger_fops);
+         * Only event directories that can be enabled should have
+         * triggers.
+         */
+        if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+                trace_create_file("trigger", 0644, file->dir, file,
+                                  &event_trigger_fops);
        trace_create_file("format", 0444, file->dir, call,
                          &ftrace_event_format_fops);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2232ae3e3ad6..3bfdff06eea7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -666,6 +666,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
         */
        smp_wmb();
        set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+        /*
+         * The following mb guarantees that previous clear of a PENDING bit
+         * will not be reordered with any speculative LOADS or STORES from
+         * work->current_func, which is executed afterwards.  This possible
+         * reordering can lead to a missed execution on attempt to qeueue
+         * the same @work.  E.g. consider this case:
+         *
+         *   CPU#0                         CPU#1
+         *   ----------------------------  --------------------------------
+         *
+         * 1  STORE event_indicated
+         * 2  queue_work_on() {
+         * 3    test_and_set_bit(PENDING)
+         * 4 }                             set_..._and_clear_pending() {
+         * 5                                 set_work_data() # clear bit
+         * 6                                 smp_mb()
+         * 7                               work->current_func() {
+         * 8                                  LOAD event_indicated
+         *                                 }
+         *
+         * Without an explicit full barrier speculative LOAD on line 8 can
+         * be executed before CPU#0 does STORE on line 1.  If that happens,
+         * CPU#0 observes the PENDING bit is still set and new execution of
+         * a @work is not queued in a hope, that CPU#1 will eventually
+         * finish the queued @work.  Meanwhile CPU#1 does not see
+         * event_indicated is set, because speculative LOAD was executed
+         * before actual STORE.
+         */
+        smp_mb();
 }
 static void clear_work_data(struct work_struct *work)
author	Ingo Molnar <mingo@kernel.org>	2016-05-12 03:18:13 -0400
committer	Ingo Molnar <mingo@kernel.org>	2016-05-12 03:18:13 -0400
commit	eb60b3e5e8dfdd590e586a6fc22daf2f63a7b7e6 (patch)
tree	1b06e2c1beca8f970685eb13096c7a12480526c6 /kernel
parent	58fe9c4621b7219e724c0b7af053112f974a08c3 (diff)
parent	53d3bc773eaa7ab1cf63585e76af7ee869d5e709 (diff)