38 files changed, 691 insertions, 527 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 21c7fa615bd3..91e53d04b6a9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx,
 static void wait_for_auditd(unsigned long sleep_time)
 {
        DECLARE_WAITQUEUE(wait, current);
-        set_current_state(TASK_INTERRUPTIBLE);
+        set_current_state(TASK_UNINTERRUPTIBLE);
        add_wait_queue(&audit_backlog_wait, &wait);
        if (audit_backlog_limit &&
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index a291aa23fb3f..43c307dc9453 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
        struct vfsmount *mnt;
        int err;
+        rule->tree = NULL;
        list_for_each_entry(tree, &tree_list, list) {
                if (!strcmp(seed->pathname, tree->pathname)) {
                        put_tree(seed);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 83a2970295d1..6bd4a90d1991 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1021,9 +1021,6 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
 * @seq: netlink audit message sequence (serial) number
 * @data: payload data
 * @datasz: size of payload data
- * @loginuid: loginuid of sender
- * @sessionid: sessionid for netlink audit message
- * @sid: SE Linux Security ID of sender
 */
 int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz)
 {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a9926275f80..a7c9e6ddb979 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1686,11 +1686,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 */
                cgroup_drop_root(opts.new_root);
-                if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) &&
+                if (root->flags != opts.flags) {
-                    root->flags != opts.flags) {
+                        if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
-                        pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
+                                pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
-                        ret = -EINVAL;
+                                ret = -EINVAL;
-                        goto drop_new_super;
+                                goto drop_new_super;
+                        } else {
+                                pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
+                        }
                }
                /* no subsys rebinding, so refcounts don't change */
@@ -2699,13 +2702,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
                goto out;
        }
+        cfe->type = (void *)cft;
+        cfe->dentry = dentry;
+        dentry->d_fsdata = cfe;
+        simple_xattrs_init(&cfe->xattrs);
        mode = cgroup_file_mode(cft);
        error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
        if (!error) {
-                cfe->type = (void *)cft;
-                cfe->dentry = dentry;
-                dentry->d_fsdata = cfe;
-                simple_xattrs_init(&cfe->xattrs);
                list_add_tail(&cfe->node, &parent->files);
                cfe = NULL;
        }
@@ -2953,11 +2957,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
        WARN_ON_ONCE(!rcu_read_lock_held());
        /* if first iteration, pretend we just visited @cgroup */
-        if (!pos) {
+        if (!pos)
-                if (list_empty(&cgroup->children))
-                        return NULL;
                pos = cgroup;
-        }
        /* visit the first child if exists */
        next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
@@ -2965,14 +2966,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
                return next;
        /* no child, visit my or the closest ancestor's next sibling */
-        do {
+        while (pos != cgroup) {
                next = list_entry_rcu(pos->sibling.next, struct cgroup,
                                      sibling);
                if (&next->sibling != &pos->parent->children)
                        return next;
                pos = pos->parent;
-        } while (pos != cgroup);
+        }
        return NULL;
 }
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f07b878..383f8231e436 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -15,7 +15,6 @@
 */
 #include <linux/context_tracking.h>
-#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
@@ -71,6 +70,46 @@ void user_enter(void)
        local_irq_restore(flags);
 }
+#ifdef CONFIG_PREEMPT
+/**
+ * preempt_schedule_context - preempt_schedule called by tracing
+ *
+ * The tracing infrastructure uses preempt_enable_notrace to prevent
+ * recursion and tracing preempt enabling caused by the tracing
+ * infrastructure itself. But as tracing can happen in areas coming
+ * from userspace or just about to enter userspace, a preempt enable
+ * can occur before user_exit() is called. This will cause the scheduler
+ * to be called when the system is still in usermode.
+ *
+ * To prevent this, the preempt_enable_notrace will use this function
+ * instead of preempt_schedule() to exit user context if needed before
+ * calling the scheduler.
+ */
+void __sched notrace preempt_schedule_context(void)
+{
+        struct thread_info *ti = current_thread_info();
+        enum ctx_state prev_ctx;
+        if (likely(ti->preempt_count || irqs_disabled()))
+                return;
+        /*
+         * Need to disable preemption in case user_exit() is traced
+         * and the tracer calls preempt_enable_notrace() causing
+         * an infinite recursion.
+         */
+        preempt_disable_notrace();
+        prev_ctx = exception_enter();
+        preempt_enable_no_resched_notrace();
+        preempt_schedule();
+        preempt_disable_notrace();
+        exception_exit(prev_ctx);
+        preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(preempt_schedule_context);
+#endif /* CONFIG_PREEMPT */
 /**
 * user_exit - Inform the context tracking that the CPU is
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b5e4ab2d427e..198a38883e64 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -133,6 +133,27 @@ static void cpu_hotplug_done(void)
        mutex_unlock(&cpu_hotplug.lock);
 }
+/*
+ * Wait for currently running CPU hotplug operations to complete (if any) and
+ * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
+ * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
+ * hotplug path before performing hotplug operations. So acquiring that lock
+ * guarantees mutual exclusion from any currently running hotplug operations.
+ */
+void cpu_hotplug_disable(void)
+{
+        cpu_maps_update_begin();
+        cpu_hotplug_disabled = 1;
+        cpu_maps_update_done();
+}
+void cpu_hotplug_enable(void)
+{
+        cpu_maps_update_begin();
+        cpu_hotplug_disabled = 0;
+        cpu_maps_update_done();
+}
 #else /* #if CONFIG_HOTPLUG_CPU */
 static void cpu_hotplug_begin(void) {}
 static void cpu_hotplug_done(void) {}
@@ -541,36 +562,6 @@ static int __init alloc_frozen_cpus(void)
 core_initcall(alloc_frozen_cpus);
 /*
- * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU
- * hotplug when tasks are about to be frozen. Also, don't allow the freezer
- * to continue until any currently running CPU hotplug operation gets
- * completed.
- * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the
- * 'cpu_add_remove_lock'. And this same lock is also taken by the regular
- * CPU hotplug path and released only after it is complete. Thus, we
- * (and hence the freezer) will block here until any currently running CPU
- * hotplug operation gets completed.
- */
-void cpu_hotplug_disable_before_freeze(void)
-{
-        cpu_maps_update_begin();
-        cpu_hotplug_disabled = 1;
-        cpu_maps_update_done();
-}
-/*
- * When tasks have been thawed, re-enable regular CPU hotplug (which had been
- * disabled while beginning to freeze tasks).
- */
-void cpu_hotplug_enable_after_thaw(void)
-{
-        cpu_maps_update_begin();
-        cpu_hotplug_disabled = 0;
-        cpu_maps_update_done();
-}
-/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
@@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb,
        case PM_SUSPEND_PREPARE:
        case PM_HIBERNATION_PREPARE:
-                cpu_hotplug_disable_before_freeze();
+                cpu_hotplug_disable();
                break;
        case PM_POST_SUSPEND:
        case PM_POST_HIBERNATION:
-                cpu_hotplug_enable_after_thaw();
+                cpu_hotplug_enable();
                break;
        default:
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index d5585f5e038e..e695c0a0bcb5 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -5,6 +5,7 @@
 #include <linux/cpu.h>
 #include <linux/tick.h>
 #include <linux/mm.h>
+#include <linux/stackprotector.h>
 #include <asm/tlb.h>
@@ -58,6 +59,7 @@ void __weak arch_cpu_idle_dead(void) { }
 void __weak arch_cpu_idle(void)
 {
        cpu_idle_force_poll = 1;
+        local_irq_enable();
 }
 /*
@@ -112,6 +114,21 @@ static void cpu_idle_loop(void)
 void cpu_startup_entry(enum cpuhp_state state)
 {
+        /*
+         * This #ifdef needs to die, but it's too late in the cycle to
+         * make this generic (arm and sh have never invoked the canary
+         * init for the non boot cpus!). Will be fixed in 3.11
+         */
+#ifdef CONFIG_X86
+        /*
+         * If we're the non-boot CPU, nothing set the stack canary up
+         * for us. The boot CPU already has it initialized but no harm
+         * in doing it again. This is a good place for updating it, as
+         * we wont ever return from this function (so the invalid
+         * canaries already on the stack wont ever trigger).
+         */
+        boot_init_stack_canary();
+#endif
        current_set_polling();
        arch_cpu_idle_prepare();
        cpu_idle_loop();
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9dc297faf7c0..b391907d5352 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 static void update_context_time(struct perf_event_context *ctx);
 static u64 perf_event_time(struct perf_event *event);
-static void ring_buffer_attach(struct perf_event *event,
-                               struct ring_buffer *rb);
 void __weak perf_event_print_debug(void)        { }
 extern __weak const char *perf_pmu_name(void)
@@ -2918,6 +2915,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 static void ring_buffer_put(struct ring_buffer *rb);
+static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
 static void free_event(struct perf_event *event)
 {
@@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event)
                if (has_branch_stack(event)) {
                        static_key_slow_dec_deferred(&perf_sched_events);
                        /* is system-wide event */
-                        if (!(event->attach_state & PERF_ATTACH_TASK))
+                        if (!(event->attach_state & PERF_ATTACH_TASK)) {
                                atomic_dec(&per_cpu(perf_branch_stack_events,
                                                    event->cpu));
+                        }
                }
        }
        if (event->rb) {
-                ring_buffer_put(event->rb);
+                struct ring_buffer *rb;
-                event->rb = NULL;
+                /*
+                 * Can happen when we close an event with re-directed output.
+                 *
+                 * Since we have a 0 refcount, perf_mmap_close() will skip
+                 * over us; possibly making our ring_buffer_put() the last.
+                 */
+                mutex_lock(&event->mmap_mutex);
+                rb = event->rb;
+                if (rb) {
+                        rcu_assign_pointer(event->rb, NULL);
+                        ring_buffer_detach(event, rb);
+                        ring_buffer_put(rb); /* could be last */
+                }
+                mutex_unlock(&event->mmap_mutex);
        }
        if (is_cgroup_event(event))
@@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
        unsigned int events = POLL_HUP;
        /*
-         * Race between perf_event_set_output() and perf_poll(): perf_poll()
+         * Pin the event->rb by taking event->mmap_mutex; otherwise
-         * grabs the rb reference but perf_event_set_output() overrides it.
+         * perf_event_set_output() can swizzle our rb and make us miss wakeups.
-         * Here is the timeline for two threads T1, T2:
-         * t0: T1, rb = rcu_dereference(event->rb)
-         * t1: T2, old_rb = event->rb
-         * t2: T2, event->rb = new rb
-         * t3: T2, ring_buffer_detach(old_rb)
-         * t4: T1, ring_buffer_attach(rb1)
-         * t5: T1, poll_wait(event->waitq)
-         *
-         * To avoid this problem, we grab mmap_mutex in perf_poll()
-         * thereby ensuring that the assignment of the new ring buffer
-         * and the detachment of the old buffer appear atomic to perf_poll()
         */
        mutex_lock(&event->mmap_mutex);
+        rb = event->rb;
-        rcu_read_lock();
+        if (rb)
-        rb = rcu_dereference(event->rb);
-        if (rb) {
-                ring_buffer_attach(event, rb);
                events = atomic_xchg(&rb->poll, 0);
-        }
-        rcu_read_unlock();
        mutex_unlock(&event->mmap_mutex);
        poll_wait(file, &event->waitq, wait);
@@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event,
                return;
        spin_lock_irqsave(&rb->event_lock, flags);
-        if (!list_empty(&event->rb_entry))
+        if (list_empty(&event->rb_entry))
-                goto unlock;
+                list_add(&event->rb_entry, &rb->event_list);
-        list_add(&event->rb_entry, &rb->event_list);
-unlock:
        spin_unlock_irqrestore(&rb->event_lock, flags);
 }
-static void ring_buffer_detach(struct perf_event *event,
+static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
-                               struct ring_buffer *rb)
 {
        unsigned long flags;
@@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event)
        rcu_read_lock();
        rb = rcu_dereference(event->rb);
-        if (!rb)
+        if (rb) {
-                goto unlock;
+                list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+                        wake_up_all(&event->waitq);
-        list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+        }
-                wake_up_all(&event->waitq);
-unlock:
        rcu_read_unlock();
 }
@@ -3584,18 +3573,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 static void ring_buffer_put(struct ring_buffer *rb)
 {
-        struct perf_event *event, *n;
-        unsigned long flags;
        if (!atomic_dec_and_test(&rb->refcount))
                return;
-        spin_lock_irqsave(&rb->event_lock, flags);
+        WARN_ON_ONCE(!list_empty(&rb->event_list));
-        list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
-                list_del_init(&event->rb_entry);
-                wake_up_all(&event->waitq);
-        }
-        spin_unlock_irqrestore(&rb->event_lock, flags);
        call_rcu(&rb->rcu_head, rb_free_rcu);
 }
@@ -3605,26 +3586,100 @@ static void perf_mmap_open(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
        atomic_inc(&event->mmap_count);
+        atomic_inc(&event->rb->mmap_count);
 }
+/*
+ * A buffer can be mmap()ed multiple times; either directly through the same
+ * event, or through other events by use of perf_event_set_output().
+ *
+ * In order to undo the VM accounting done by perf_mmap() we need to destroy
+ * the buffer here, where we still have a VM context. This means we need
+ * to detach all events redirecting to us.
+ */
 static void perf_mmap_close(struct vm_area_struct *vma)
 {
        struct perf_event *event = vma->vm_file->private_data;
-        if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+        struct ring_buffer *rb = event->rb;
-                unsigned long size = perf_data_size(event->rb);
+        struct user_struct *mmap_user = rb->mmap_user;
-                struct user_struct *user = event->mmap_user;
+        int mmap_locked = rb->mmap_locked;
-                struct ring_buffer *rb = event->rb;
+        unsigned long size = perf_data_size(rb);
+        atomic_dec(&rb->mmap_count);
+        if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
+                return;
-                atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
+        /* Detach current event from the buffer. */
-                vma->vm_mm->pinned_vm -= event->mmap_locked;
+        rcu_assign_pointer(event->rb, NULL);
-                rcu_assign_pointer(event->rb, NULL);
+        ring_buffer_detach(event, rb);
-                ring_buffer_detach(event, rb);
+        mutex_unlock(&event->mmap_mutex);
+        /* If there's still other mmap()s of this buffer, we're done. */
+        if (atomic_read(&rb->mmap_count)) {
+                ring_buffer_put(rb); /* can't be last */
+                return;
+        }
+        /*
+         * No other mmap()s, detach from all other events that might redirect
+         * into the now unreachable buffer. Somewhat complicated by the
+         * fact that rb::event_lock otherwise nests inside mmap_mutex.
+         */
+again:
+        rcu_read_lock();
+        list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
+                if (!atomic_long_inc_not_zero(&event->refcount)) {
+                        /*
+                         * This event is en-route to free_event() which will
+                         * detach it and remove it from the list.
+                         */
+                        continue;
+                }
+                rcu_read_unlock();
+                mutex_lock(&event->mmap_mutex);
+                /*
+                 * Check we didn't race with perf_event_set_output() which can
+                 * swizzle the rb from under us while we were waiting to
+                 * acquire mmap_mutex.
+                 *
+                 * If we find a different rb; ignore this event, a next
+                 * iteration will no longer find it on the list. We have to
+                 * still restart the iteration to make sure we're not now
+                 * iterating the wrong list.
+                 */
+                if (event->rb == rb) {
+                        rcu_assign_pointer(event->rb, NULL);
+                        ring_buffer_detach(event, rb);
+                        ring_buffer_put(rb); /* can't be last, we still have one */
+                }
                mutex_unlock(&event->mmap_mutex);
+                put_event(event);
-                ring_buffer_put(rb);
+                /*
-                free_uid(user);
+                 * Restart the iteration; either we're on the wrong list or
+                 * destroyed its integrity by doing a deletion.
+                 */
+                goto again;
        }
+        rcu_read_unlock();
+        /*
+         * It could be there's still a few 0-ref events on the list; they'll
+         * get cleaned up by free_event() -- they'll also still have their
+         * ref on the rb and will free it whenever they are done with it.
+         *
+         * Aside from that, this buffer is 'fully' detached and unmapped,
+         * undo the VM accounting.
+         */
+        atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+        vma->vm_mm->pinned_vm -= mmap_locked;
+        free_uid(mmap_user);
+        ring_buffer_put(rb); /* could be last */
 }
 static const struct vm_operations_struct perf_mmap_vmops = {
@@ -3674,12 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                return -EINVAL;
        WARN_ON_ONCE(event->ctx->parent_ctx);
+again:
        mutex_lock(&event->mmap_mutex);
        if (event->rb) {
-                if (event->rb->nr_pages == nr_pages)
+                if (event->rb->nr_pages != nr_pages) {
-                        atomic_inc(&event->rb->refcount);
-                else
                        ret = -EINVAL;
+                        goto unlock;
+                }
+                if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
+                        /*
+                         * Raced against perf_mmap_close() through
+                         * perf_event_set_output(). Try again, hope for better
+                         * luck.
+                         */
+                        mutex_unlock(&event->mmap_mutex);
+                        goto again;
+                }
                goto unlock;
        }
@@ -3720,12 +3787,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                ret = -ENOMEM;
                goto unlock;
        }
-        rcu_assign_pointer(event->rb, rb);
+        atomic_set(&rb->mmap_count, 1);
+        rb->mmap_locked = extra;
+        rb->mmap_user = get_current_user();
        atomic_long_add(user_extra, &user->locked_vm);
-        event->mmap_locked = extra;
+        vma->vm_mm->pinned_vm += extra;
-        event->mmap_user = get_current_user();
-        vma->vm_mm->pinned_vm += event->mmap_locked;
+        ring_buffer_attach(event, rb);
+        rcu_assign_pointer(event->rb, rb);
        perf_event_update_userpage(event);
@@ -3734,7 +3805,11 @@ unlock:
                atomic_inc(&event->mmap_count);
        mutex_unlock(&event->mmap_mutex);
-        vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+        /*
+         * Since pinned accounting is per vm we cannot allow fork() to copy our
+         * vma.
+         */
+        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_ops = &perf_mmap_vmops;
        return ret;
@@ -6412,6 +6487,8 @@ set:
        if (atomic_read(&event->mmap_count))
                goto unlock;
+        old_rb = event->rb;
        if (output_event) {
                /* get the rb we want to redirect to */
                rb = ring_buffer_get(output_event);
@@ -6419,16 +6496,28 @@ set:
                        goto unlock;
        }
-        old_rb = event->rb;
-        rcu_assign_pointer(event->rb, rb);
        if (old_rb)
                ring_buffer_detach(event, old_rb);
+        if (rb)
+                ring_buffer_attach(event, rb);
+        rcu_assign_pointer(event->rb, rb);
+        if (old_rb) {
+                ring_buffer_put(old_rb);
+                /*
+                 * Since we detached before setting the new rb, so that we
+                 * could attach the new rb, we could have missed a wakeup.
+                 * Provide it now.
+                 */
+                wake_up_all(&event->waitq);
+        }
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
-        if (old_rb)
-                ring_buffer_put(old_rb);
 out:
        return ret;
 }
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index a64f8aeb5c1f..20185ea64aa6 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -120,7 +120,7 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
        list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
                if (iter->hw.bp_target == tsk &&
                    find_slot_idx(iter) == type &&
-                    cpu == iter->cpu)
+                    (iter->cpu < 0 || cpu == iter->cpu))
                        count += hw_breakpoint_weight(iter);
        }
@@ -149,7 +149,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
                return;
        }
-        for_each_online_cpu(cpu) {
+        for_each_possible_cpu(cpu) {
                unsigned int nr;
                nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -235,7 +235,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
        if (cpu >= 0) {
                toggle_bp_task_slot(bp, cpu, enable, type, weight);
        } else {
-                for_each_online_cpu(cpu)
+                for_each_possible_cpu(cpu)
                        toggle_bp_task_slot(bp, cpu, enable, type, weight);
        }
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index eb675c4d59df..ca6599723be5 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -31,6 +31,10 @@ struct ring_buffer {
        spinlock_t                      event_lock;
        struct list_head                event_list;
+        atomic_t                        mmap_count;
+        unsigned long                   mmap_locked;
+        struct user_struct              *mmap_user;
        struct perf_event_mmap_page     *user_page;
        void                            *data_pages[0];
 };
diff --git a/kernel/exit.c b/kernel/exit.c
index af2eb3cbd499..7bb73f9d09db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -649,7 +649,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
         *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
         */
        forget_original_parent(tsk);
-        exit_task_namespaces(tsk);
        write_lock_irq(&tasklist_lock);
        if (group_dead)
@@ -795,6 +794,7 @@ void do_exit(long code)
        exit_shm(tsk);
        exit_files(tsk);
        exit_fs(tsk);
+        exit_task_namespaces(tsk);
        exit_task_work(tsk);
        check_stack_usage();
        exit_thread();
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 5a83dde8ca0c..54a4d5223238 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -143,7 +143,10 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
 * irq_domain_add_simple() - Allocate and register a simple irq_domain.
 * @of_node: pointer to interrupt controller's device tree node.
 * @size: total number of irqs in mapping
- * @first_irq: first number of irq block assigned to the domain
+ * @first_irq: first number of irq block assigned to the domain,
+ *      pass zero to assign irqs on-the-fly. This will result in a
+ *      linear IRQ domain so it is important to use irq_create_mapping()
+ *      for each used IRQ, especially when SPARSE_IRQ is enabled.
 * @ops: map/unmap domain callbacks
 * @host_data: Controller private data pointer
 *
@@ -191,6 +194,7 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
        /* A linear domain is the default */
        return irq_domain_add_linear(of_node, size, ops, host_data);
 }
+EXPORT_SYMBOL_GPL(irq_domain_add_simple);
 /**
 * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
@@ -397,11 +401,12 @@ static void irq_domain_disassociate_many(struct irq_domain *domain,
        while (count--) {
                int irq = irq_base + count;
                struct irq_data *irq_data = irq_get_irq_data(irq);
-                irq_hw_number_t hwirq = irq_data->hwirq;
+                irq_hw_number_t hwirq;
                if (WARN_ON(!irq_data || irq_data->domain != domain))
                        continue;
+                hwirq = irq_data->hwirq;
                irq_set_status_flags(irq, IRQ_NOREQUEST);
                /* remove chip and handler */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1296e72e4161..8241906c4b61 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -569,6 +569,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
        int retval = 0;
        helper_lock();
+        if (!sub_info->path) {
+                retval = -EINVAL;
+                goto out;
+        }
        if (sub_info->path[0] == '\0')
                goto out;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3fed7f0cbcdf..bddf3b201a48 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -467,6 +467,7 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
 /* Optimization staging list, protected by kprobe_mutex */
 static LIST_HEAD(optimizing_list);
 static LIST_HEAD(unoptimizing_list);
+static LIST_HEAD(freeing_list);
 static void kprobe_optimizer(struct work_struct *work);
 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
@@ -504,7 +505,7 @@ static __kprobes void do_optimize_kprobes(void)
 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
 * if need) kprobes listed on unoptimizing_list.
 */
-static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+static __kprobes void do_unoptimize_kprobes(void)
 {
        struct optimized_kprobe *op, *tmp;
@@ -515,9 +516,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
        /* Ditto to do_optimize_kprobes */
        get_online_cpus();
        mutex_lock(&text_mutex);
-        arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+        arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
        /* Loop free_list for disarming */
-        list_for_each_entry_safe(op, tmp, free_list, list) {
+        list_for_each_entry_safe(op, tmp, &freeing_list, list) {
                /* Disarm probes if marked disabled */
                if (kprobe_disabled(&op->kp))
                        arch_disarm_kprobe(&op->kp);
@@ -536,11 +537,11 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
 }
 /* Reclaim all kprobes on the free_list */
-static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+static __kprobes void do_free_cleaned_kprobes(void)
 {
        struct optimized_kprobe *op, *tmp;
-        list_for_each_entry_safe(op, tmp, free_list, list) {
+        list_for_each_entry_safe(op, tmp, &freeing_list, list) {
                BUG_ON(!kprobe_unused(&op->kp));
                list_del_init(&op->list);
                free_aggr_kprobe(&op->kp);
@@ -556,8 +557,6 @@ static __kprobes void kick_kprobe_optimizer(void)
 /* Kprobe jump optimizer */
 static __kprobes void kprobe_optimizer(struct work_struct *work)
 {
-        LIST_HEAD(free_list);
        mutex_lock(&kprobe_mutex);
        /* Lock modules while optimizing kprobes */
        mutex_lock(&module_mutex);
@@ -566,7 +565,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
         * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
         * kprobes before waiting for quiesence period.
         */
-        do_unoptimize_kprobes(&free_list);
+        do_unoptimize_kprobes();
        /*
         * Step 2: Wait for quiesence period to ensure all running interrupts
@@ -581,7 +580,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
        do_optimize_kprobes();
        /* Step 4: Free cleaned kprobes after quiesence period */
-        do_free_cleaned_kprobes(&free_list);
+        do_free_cleaned_kprobes();
        mutex_unlock(&module_mutex);
        mutex_unlock(&kprobe_mutex);
@@ -723,8 +722,19 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
        if (!list_empty(&op->list))
                /* Dequeue from the (un)optimization queue */
                list_del_init(&op->list);
        op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+        if (kprobe_unused(p)) {
+                /* Enqueue if it is unused */
+                list_add(&op->list, &freeing_list);
+                /*
+                 * Remove unused probes from the hash list. After waiting
+                 * for synchronization, this probe is reclaimed.
+                 * (reclaiming is done by do_free_cleaned_kprobes().)
+                 */
+                hlist_del_rcu(&op->kp.hlist);
+        }
        /* Don't touch the code, because it is already freed. */
        arch_remove_optimized_kprobe(op);
 }
diff --git a/kernel/module.c b/kernel/module.c
index b049939177f6..cab4bce49c23 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2431,10 +2431,10 @@ static void kmemleak_load_module(const struct module *mod,
        kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
        for (i = 1; i < info->hdr->e_shnum; i++) {
-                const char *name = info->secstrings + info->sechdrs[i].sh_name;
+                /* Scan all writable sections that's not executable */
-                if (!(info->sechdrs[i].sh_flags & SHF_ALLOC))
+                if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
-                        continue;
+                    !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
-                if (!strstarts(name, ".data") && !strstarts(name, ".bss"))
+                    (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
                        continue;
                kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
@@ -2769,24 +2769,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
        mod->trace_events = section_objs(info, "_ftrace_events",
                                         sizeof(*mod->trace_events),
                                         &mod->num_trace_events);
-        /*
-         * This section contains pointers to allocated objects in the trace
-         * code and not scanning it leads to false positives.
-         */
-        kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
-                           mod->num_trace_events, GFP_KERNEL);
 #endif
 #ifdef CONFIG_TRACING
        mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
                                         sizeof(*mod->trace_bprintk_fmt_start),
                                         &mod->num_trace_bprintk_fmt);
-        /*
-         * This section contains pointers to allocated objects in the trace
-         * code and not scanning it leads to false positives.
-         */
-        kmemleak_scan_area(mod->trace_bprintk_fmt_start,
-                           sizeof(*mod->trace_bprintk_fmt_start) *
-                           mod->num_trace_bprintk_fmt, GFP_KERNEL);
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
        /* sechdrs[0].sh_size is always zero */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 42670e9b44e0..c7f31aa272f7 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock)
        return error;
 }
-static inline union cpu_time_count
+static inline unsigned long long
 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
 {
-        union cpu_time_count ret;
+        unsigned long long ret;
-        ret.sched = 0;          /* high half always zero when .cpu used */
+        ret = 0;                /* high half always zero when .cpu used */
        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-                ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+                ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
        } else {
-                ret.cpu = timespec_to_cputime(tp);
+                ret = cputime_to_expires(timespec_to_cputime(tp));
        }
        return ret;
 }
 static void sample_to_timespec(const clockid_t which_clock,
-                               union cpu_time_count cpu,
+                               unsigned long long expires,
                               struct timespec *tp)
 {
        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
-                *tp = ns_to_timespec(cpu.sched);
+                *tp = ns_to_timespec(expires);
        else
-                cputime_to_timespec(cpu.cpu, tp);
+                cputime_to_timespec((__force cputime_t)expires, tp);
-}
-static inline int cpu_time_before(const clockid_t which_clock,
-                                  union cpu_time_count now,
-                                  union cpu_time_count then)
-{
-        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-                return now.sched < then.sched;
-        }  else {
-                return now.cpu < then.cpu;
-        }
-}
-static inline void cpu_time_add(const clockid_t which_clock,
-                                union cpu_time_count *acc,
-                                union cpu_time_count val)
-{
-        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-                acc->sched += val.sched;
-        }  else {
-                acc->cpu += val.cpu;
-        }
-}
-static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
-                                                union cpu_time_count a,
-                                                union cpu_time_count b)
-{
-        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-                a.sched -= b.sched;
-        }  else {
-                a.cpu -= b.cpu;
-        }
-        return a;
 }
 /*
@@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
 * given the current clock sample.
 */
 static void bump_cpu_timer(struct k_itimer *timer,
-                                  union cpu_time_count now)
+                           unsigned long long now)
 {
        int i;
+        unsigned long long delta, incr;
-        if (timer->it.cpu.incr.sched == 0)
+        if (timer->it.cpu.incr == 0)
                return;
-        if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
+        if (now < timer->it.cpu.expires)
-                unsigned long long delta, incr;
+                return;
-                if (now.sched < timer->it.cpu.expires.sched)
+        incr = timer->it.cpu.incr;
-                        return;
+        delta = now + incr - timer->it.cpu.expires;
-                incr = timer->it.cpu.incr.sched;
-                delta = now.sched + incr - timer->it.cpu.expires.sched;
-                /* Don't use (incr*2 < delta), incr*2 might overflow. */
-                for (i = 0; incr < delta - incr; i++)
-                        incr = incr << 1;
-                for (; i >= 0; incr >>= 1, i--) {
-                        if (delta < incr)
-                                continue;
-                        timer->it.cpu.expires.sched += incr;
-                        timer->it_overrun += 1 << i;
-                        delta -= incr;
-                }
-        } else {
-                cputime_t delta, incr;
-                if (now.cpu < timer->it.cpu.expires.cpu)
+        /* Don't use (incr*2 < delta), incr*2 might overflow. */
-                        return;
+        for (i = 0; incr < delta - incr; i++)
-                incr = timer->it.cpu.incr.cpu;
+                incr = incr << 1;
-                delta = now.cpu + incr - timer->it.cpu.expires.cpu;
-                /* Don't use (incr*2 < delta), incr*2 might overflow. */
+        for (; i >= 0; incr >>= 1, i--) {
-                for (i = 0; incr < delta - incr; i++)
+                if (delta < incr)
-                             incr += incr;
+                        continue;
-                for (; i >= 0; incr = incr >> 1, i--) {
-                        if (delta < incr)
+                timer->it.cpu.expires += incr;
-                                continue;
+                timer->it_overrun += 1 << i;
-                        timer->it.cpu.expires.cpu += incr;
+                delta -= incr;
-                        timer->it_overrun += 1 << i;
-                        delta -= incr;
-                }
        }
 }
@@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime)
        return 0;
 }
-static inline cputime_t prof_ticks(struct task_struct *p)
+static inline unsigned long long prof_ticks(struct task_struct *p)
 {
        cputime_t utime, stime;
        task_cputime(p, &utime, &stime);
-        return utime + stime;
+        return cputime_to_expires(utime + stime);
 }
-static inline cputime_t virt_ticks(struct task_struct *p)
+static inline unsigned long long virt_ticks(struct task_struct *p)
 {
        cputime_t utime;
        task_cputime(p, &utime, NULL);
-        return utime;
+        return cputime_to_expires(utime);
 }
 static int
@@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 * Sample a per-thread clock for the given task.
 */
 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
-                            union cpu_time_count *cpu)
+                            unsigned long long *sample)
 {
        switch (CPUCLOCK_WHICH(which_clock)) {
        default:
                return -EINVAL;
        case CPUCLOCK_PROF:
-                cpu->cpu = prof_ticks(p);
+                *sample = prof_ticks(p);
                break;
        case CPUCLOCK_VIRT:
-                cpu->cpu = virt_ticks(p);
+                *sample = virt_ticks(p);
                break;
        case CPUCLOCK_SCHED:
-                cpu->sched = task_sched_runtime(p);
+                *sample = task_sched_runtime(p);
                break;
        }
        return 0;
@@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 */
 static int cpu_clock_sample_group(const clockid_t which_clock,
                                  struct task_struct *p,
-                                  union cpu_time_count *cpu)
+                                  unsigned long long *sample)
 {
        struct task_cputime cputime;
@@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
                return -EINVAL;
        case CPUCLOCK_PROF:
                thread_group_cputime(p, &cputime);
-                cpu->cpu = cputime.utime + cputime.stime;
+                *sample = cputime_to_expires(cputime.utime + cputime.stime);
                break;
        case CPUCLOCK_VIRT:
                thread_group_cputime(p, &cputime);
-                cpu->cpu = cputime.utime;
+                *sample = cputime_to_expires(cputime.utime);
                break;
        case CPUCLOCK_SCHED:
                thread_group_cputime(p, &cputime);
-                cpu->sched = cputime.sum_exec_runtime;
+                *sample = cputime.sum_exec_runtime;
                break;
        }
        return 0;
@@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
        const pid_t pid = CPUCLOCK_PID(which_clock);
        int error = -EINVAL;
-        union cpu_time_count rtn;
+        unsigned long long rtn;
        if (pid == 0) {
                /*
@@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
        return ret;
 }
+static void cleanup_timers_list(struct list_head *head,
+                                unsigned long long curr)
+{
+        struct cpu_timer_list *timer, *next;
+        list_for_each_entry_safe(timer, next, head, entry)
+                list_del_init(&timer->entry);
+}
 /*
 * Clean out CPU timers still ticking when a thread exited.  The task
 * pointer is cleared, and the expiry time is replaced with the residual
@@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head,
                           cputime_t utime, cputime_t stime,
                           unsigned long long sum_exec_runtime)
 {
-        struct cpu_timer_list *timer, *next;
-        cputime_t ptime = utime + stime;
-        list_for_each_entry_safe(timer, next, head, entry) {
-                list_del_init(&timer->entry);
-                if (timer->expires.cpu < ptime) {
-                        timer->expires.cpu = 0;
-                } else {
-                        timer->expires.cpu -= ptime;
-                }
-        }
-        ++head;
+        cputime_t ptime = utime + stime;
-        list_for_each_entry_safe(timer, next, head, entry) {
-                list_del_init(&timer->entry);
-                if (timer->expires.cpu < utime) {
-                        timer->expires.cpu = 0;
-                } else {
-                        timer->expires.cpu -= utime;
-                }
-        }
-        ++head;
+        cleanup_timers_list(head, cputime_to_expires(ptime));
-        list_for_each_entry_safe(timer, next, head, entry) {
+        cleanup_timers_list(++head, cputime_to_expires(utime));
-                list_del_init(&timer->entry);
+        cleanup_timers_list(++head, sum_exec_runtime);
-                if (timer->expires.sched < sum_exec_runtime) {
-                        timer->expires.sched = 0;
-                } else {
-                        timer->expires.sched -= sum_exec_runtime;
-                }
-        }
 }
 /*
@@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
                       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
-static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
+static void clear_dead_task(struct k_itimer *itimer, unsigned long long now)
 {
+        struct cpu_timer_list *timer = &itimer->it.cpu;
        /*
         * That's all for this thread or process.
         * We leave our residual in expires to be reported.
         */
-        put_task_struct(timer->it.cpu.task);
+        put_task_struct(timer->task);
-        timer->it.cpu.task = NULL;
+        timer->task = NULL;
-        timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
+        if (timer->expires < now) {
-                                             timer->it.cpu.expires,
+                timer->expires = 0;
-                                             now);
+        } else {
+                timer->expires -= now;
+        }
 }
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
@@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer)
        listpos = head;
        list_for_each_entry(next, head, entry) {
-                if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+                if (nt->expires < next->expires)
                        break;
                listpos = &next->entry;
        }
        list_add(&nt->entry, listpos);
        if (listpos == head) {
-                union cpu_time_count *exp = &nt->expires;
+                unsigned long long exp = nt->expires;
                /*
                 * We are the new earliest-expiring POSIX 1.b timer, hence
@@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer)
                switch (CPUCLOCK_WHICH(timer->it_clock)) {
                case CPUCLOCK_PROF:
-                        if (expires_gt(cputime_expires->prof_exp, exp->cpu))
+                        if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
-                                cputime_expires->prof_exp = exp->cpu;
+                                cputime_expires->prof_exp = expires_to_cputime(exp);
                        break;
                case CPUCLOCK_VIRT:
-                        if (expires_gt(cputime_expires->virt_exp, exp->cpu))
+                        if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
-                                cputime_expires->virt_exp = exp->cpu;
+                                cputime_expires->virt_exp = expires_to_cputime(exp);
                        break;
                case CPUCLOCK_SCHED:
                        if (cputime_expires->sched_exp == 0 ||
-                            cputime_expires->sched_exp > exp->sched)
+                            cputime_expires->sched_exp > exp)
-                                cputime_expires->sched_exp = exp->sched;
+                                cputime_expires->sched_exp = exp;
                        break;
                }
        }
@@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer)
                /*
                 * User don't want any signal.
                 */
-                timer->it.cpu.expires.sched = 0;
+                timer->it.cpu.expires = 0;
        } else if (unlikely(timer->sigq == NULL)) {
                /*
                 * This a special case for clock_nanosleep,
                 * not a normal timer from sys_timer_create.
                 */
                wake_up_process(timer->it_process);
-                timer->it.cpu.expires.sched = 0;
+                timer->it.cpu.expires = 0;
-        } else if (timer->it.cpu.incr.sched == 0) {
+        } else if (timer->it.cpu.incr == 0) {
                /*
                 * One-shot timer.  Clear it as soon as it's fired.
                 */
                posix_timer_event(timer, 0);
-                timer->it.cpu.expires.sched = 0;
+                timer->it.cpu.expires = 0;
        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
                /*
                 * The signal did not get queued because the signal
@@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer)
 */
 static int cpu_timer_sample_group(const clockid_t which_clock,
                                  struct task_struct *p,
-                                  union cpu_time_count *cpu)
+                                  unsigned long long *sample)
 {
        struct task_cputime cputime;
@@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
        default:
                return -EINVAL;
        case CPUCLOCK_PROF:
-                cpu->cpu = cputime.utime + cputime.stime;
+                *sample = cputime_to_expires(cputime.utime + cputime.stime);
                break;
        case CPUCLOCK_VIRT:
-                cpu->cpu = cputime.utime;
+                *sample = cputime_to_expires(cputime.utime);
                break;
        case CPUCLOCK_SCHED:
-                cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+                *sample = cputime.sum_exec_runtime + task_delta_exec(p);
                break;
        }
        return 0;
@@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                               struct itimerspec *new, struct itimerspec *old)
 {
        struct task_struct *p = timer->it.cpu.task;
-        union cpu_time_count old_expires, new_expires, old_incr, val;
+        unsigned long long old_expires, new_expires, old_incr, val;
        int ret;
        if (unlikely(p == NULL)) {
@@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        }
        if (old) {
-                if (old_expires.sched == 0) {
+                if (old_expires == 0) {
                        old->it_value.tv_sec = 0;
                        old->it_value.tv_nsec = 0;
                } else {
@@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                         * new setting.
                         */
                        bump_cpu_timer(timer, val);
-                        if (cpu_time_before(timer->it_clock, val,
+                        if (val < timer->it.cpu.expires) {
-                                            timer->it.cpu.expires)) {
+                                old_expires = timer->it.cpu.expires - val;
-                                old_expires = cpu_time_sub(
-                                        timer->it_clock,
-                                        timer->it.cpu.expires, val);
                                sample_to_timespec(timer->it_clock,
                                                   old_expires,
                                                   &old->it_value);
@@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                goto out;
        }
-        if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
+        if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
-                cpu_time_add(timer->it_clock, &new_expires, val);
+                new_expires += val;
        }
        /*
@@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
         * arm the timer (we'll just fake it for timer_gettime).
         */
        timer->it.cpu.expires = new_expires;
-        if (new_expires.sched != 0 &&
+        if (new_expires != 0 && val < new_expires) {
-            cpu_time_before(timer->it_clock, val, new_expires)) {
                arm_timer(timer);
        }
@@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        timer->it_overrun_last = 0;
        timer->it_overrun = -1;
-        if (new_expires.sched != 0 &&
+        if (new_expires != 0 && !(val < new_expires)) {
-            !cpu_time_before(timer->it_clock, val, new_expires)) {
                /*
                 * The designated time already passed, so we notify
                 * immediately, even if the thread never runs to
@@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 {
-        union cpu_time_count now;
+        unsigned long long now;
        struct task_struct *p = timer->it.cpu.task;
        int clear_dead;
@@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
        sample_to_timespec(timer->it_clock,
                           timer->it.cpu.incr, &itp->it_interval);
-        if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all.  */
+        if (timer->it.cpu.expires == 0) {       /* Timer not armed at all.  */
                itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
                return;
        }
@@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                         */
                        put_task_struct(p);
                        timer->it.cpu.task = NULL;
-                        timer->it.cpu.expires.sched = 0;
+                        timer->it.cpu.expires = 0;
                        read_unlock(&tasklist_lock);
                        goto dead;
                } else {
@@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                goto dead;
        }
-        if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
+        if (now < timer->it.cpu.expires) {
                sample_to_timespec(timer->it_clock,
-                                   cpu_time_sub(timer->it_clock,
+                                   timer->it.cpu.expires - now,
-                                                timer->it.cpu.expires, now),
                                   &itp->it_value);
        } else {
                /*
@@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
        }
 }
+static unsigned long long
+check_timers_list(struct list_head *timers,
+                  struct list_head *firing,
+                  unsigned long long curr)
+{
+        int maxfire = 20;
+        while (!list_empty(timers)) {
+                struct cpu_timer_list *t;
+                t = list_first_entry(timers, struct cpu_timer_list, entry);
+                if (!--maxfire || curr < t->expires)
+                        return t->expires;
+                t->firing = 1;
+                list_move_tail(&t->entry, firing);
+        }
+        return 0;
+}
 /*
 * Check for any per-thread CPU timers that have fired and move them off
 * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
@@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 static void check_thread_timers(struct task_struct *tsk,
                                struct list_head *firing)
 {
-        int maxfire;
        struct list_head *timers = tsk->cpu_timers;
        struct signal_struct *const sig = tsk->signal;
+        struct task_cputime *tsk_expires = &tsk->cputime_expires;
+        unsigned long long expires;
        unsigned long soft;
-        maxfire = 20;
+        expires = check_timers_list(timers, firing, prof_ticks(tsk));
-        tsk->cputime_expires.prof_exp = 0;
+        tsk_expires->prof_exp = expires_to_cputime(expires);
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *t = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
-                        tsk->cputime_expires.prof_exp = t->expires.cpu;
-                        break;
-                }
-                t->firing = 1;
-                list_move_tail(&t->entry, firing);
-        }
-        ++timers;
+        expires = check_timers_list(++timers, firing, virt_ticks(tsk));
-        maxfire = 20;
+        tsk_expires->virt_exp = expires_to_cputime(expires);
-        tsk->cputime_expires.virt_exp = 0;
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *t = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
-                        tsk->cputime_expires.virt_exp = t->expires.cpu;
-                        break;
-                }
-                t->firing = 1;
-                list_move_tail(&t->entry, firing);
-        }
-        ++timers;
+        tsk_expires->sched_exp = check_timers_list(++timers, firing,
-        maxfire = 20;
+                                                   tsk->se.sum_exec_runtime);
-        tsk->cputime_expires.sched_exp = 0;
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *t = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-                        tsk->cputime_expires.sched_exp = t->expires.sched;
-                        break;
-                }
-                t->firing = 1;
-                list_move_tail(&t->entry, firing);
-        }
        /*
         * Check for the special case thread timers.
@@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig)
 static u32 onecputick;
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
-                             cputime_t *expires, cputime_t cur_time, int signo)
+                             unsigned long long *expires,
+                             unsigned long long cur_time, int signo)
 {
        if (!it->expires)
                return;
@@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 static void check_process_timers(struct task_struct *tsk,
                                 struct list_head *firing)
 {
-        int maxfire;
        struct signal_struct *const sig = tsk->signal;
-        cputime_t utime, ptime, virt_expires, prof_expires;
+        unsigned long long utime, ptime, virt_expires, prof_expires;
        unsigned long long sum_sched_runtime, sched_expires;
        struct list_head *timers = sig->cpu_timers;
        struct task_cputime cputime;
@@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk,
         * Collect the current process totals.
         */
        thread_group_cputimer(tsk, &cputime);
-        utime = cputime.utime;
+        utime = cputime_to_expires(cputime.utime);
-        ptime = utime + cputime.stime;
+        ptime = utime + cputime_to_expires(cputime.stime);
        sum_sched_runtime = cputime.sum_exec_runtime;
-        maxfire = 20;
-        prof_expires = 0;
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *tl = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || ptime < tl->expires.cpu) {
-                        prof_expires = tl->expires.cpu;
-                        break;
-                }
-                tl->firing = 1;
-                list_move_tail(&tl->entry, firing);
-        }
-        ++timers;
+        prof_expires = check_timers_list(timers, firing, ptime);
-        maxfire = 20;
+        virt_expires = check_timers_list(++timers, firing, utime);
-        virt_expires = 0;
+        sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *tl = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || utime < tl->expires.cpu) {
-                        virt_expires = tl->expires.cpu;
-                        break;
-                }
-                tl->firing = 1;
-                list_move_tail(&tl->entry, firing);
-        }
-        ++timers;
-        maxfire = 20;
-        sched_expires = 0;
-        while (!list_empty(timers)) {
-                struct cpu_timer_list *tl = list_first_entry(timers,
-                                                      struct cpu_timer_list,
-                                                      entry);
-                if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
-                        sched_expires = tl->expires.sched;
-                        break;
-                }
-                tl->firing = 1;
-                list_move_tail(&tl->entry, firing);
-        }
        /*
         * Check for the special case process timers.
@@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk,
                }
        }
-        sig->cputime_expires.prof_exp = prof_expires;
+        sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
-        sig->cputime_expires.virt_exp = virt_expires;
+        sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
        sig->cputime_expires.sched_exp = sched_expires;
        if (task_cputime_zero(&sig->cputime_expires))
                stop_process_timers(sig);
@@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk,
 void posix_cpu_timer_schedule(struct k_itimer *timer)
 {
        struct task_struct *p = timer->it.cpu.task;
-        union cpu_time_count now;
+        unsigned long long now;
        if (unlikely(p == NULL))
                /*
@@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                         */
                        put_task_struct(p);
                        timer->it.cpu.task = p = NULL;
-                        timer->it.cpu.expires.sched = 0;
+                        timer->it.cpu.expires = 0;
                        goto out_unlock;
                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
                        /*
@@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                         * not yet reaped.  Take this opportunity to
                         * drop our task ref.
                         */
+                        cpu_timer_sample_group(timer->it_clock, p, &now);
                        clear_dead_task(timer, now);
                        goto out_unlock;
                }
@@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                           cputime_t *newval, cputime_t *oldval)
 {
-        union cpu_time_count now;
+        unsigned long long now;
        BUG_ON(clock_idx == CPUCLOCK_SCHED);
        cpu_timer_sample_group(clock_idx, tsk, &now);
@@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                 * it to be absolute.
                 */
                if (*oldval) {
-                        if (*oldval <= now.cpu) {
+                        if (*oldval <= now) {
                                /* Just about to fire. */
                                *oldval = cputime_one_jiffy;
                        } else {
-                                *oldval -= now.cpu;
+                                *oldval -= now;
                        }
                }
                if (!*newval)
                        goto out;
-                *newval += now.cpu;
+                *newval += now;
        }
        /*
@@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
                }
                while (!signal_pending(current)) {
-                        if (timer.it.cpu.expires.sched == 0) {
+                        if (timer.it.cpu.expires == 0) {
                                /*
                                 * Our timer fired and was reset, below
                                 * deletion can not fail.
diff --git a/kernel/printk.c b/kernel/printk.c
index fa36e1494420..8212c1aef125 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -363,6 +363,53 @@ static void log_store(int facility, int level,
        log_next_seq++;
 }
+#ifdef CONFIG_SECURITY_DMESG_RESTRICT
+int dmesg_restrict = 1;
+#else
+int dmesg_restrict;
+#endif
+static int syslog_action_restricted(int type)
+{
+        if (dmesg_restrict)
+                return 1;
+        /*
+         * Unless restricted, we allow "read all" and "get buffer size"
+         * for everybody.
+         */
+        return type != SYSLOG_ACTION_READ_ALL &&
+               type != SYSLOG_ACTION_SIZE_BUFFER;
+}
+static int check_syslog_permissions(int type, bool from_file)
+{
+        /*
+         * If this is from /proc/kmsg and we've already opened it, then we've
+         * already done the capabilities checks at open time.
+         */
+        if (from_file && type != SYSLOG_ACTION_OPEN)
+                return 0;
+        if (syslog_action_restricted(type)) {
+                if (capable(CAP_SYSLOG))
+                        return 0;
+                /*
+                 * For historical reasons, accept CAP_SYS_ADMIN too, with
+                 * a warning.
+                 */
+                if (capable(CAP_SYS_ADMIN)) {
+                        pr_warn_once("%s (%d): Attempt to access syslog with "
+                                     "CAP_SYS_ADMIN but no CAP_SYSLOG "
+                                     "(deprecated).\n",
+                                 current->comm, task_pid_nr(current));
+                        return 0;
+                }
+                return -EPERM;
+        }
+        return security_syslog(type);
+}
 /* /dev/kmsg - userspace message inject/listen interface */
 struct devkmsg_user {
        u64 seq;
@@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file)
        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
                return 0;
-        err = security_syslog(SYSLOG_ACTION_READ_ALL);
+        err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
+                                       SYSLOG_FROM_READER);
        if (err)
                return err;
@@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level)
 }
 #endif
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
-static int syslog_action_restricted(int type)
-{
-        if (dmesg_restrict)
-                return 1;
-        /* Unless restricted, we allow "read all" and "get buffer size" for everybody */
-        return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER;
-}
-static int check_syslog_permissions(int type, bool from_file)
-{
-        /*
-         * If this is from /proc/kmsg and we've already opened it, then we've
-         * already done the capabilities checks at open time.
-         */
-        if (from_file && type != SYSLOG_ACTION_OPEN)
-                return 0;
-        if (syslog_action_restricted(type)) {
-                if (capable(CAP_SYSLOG))
-                        return 0;
-                /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
-                if (capable(CAP_SYS_ADMIN)) {
-                        printk_once(KERN_WARNING "%s (%d): "
-                                 "Attempt to access syslog with CAP_SYS_ADMIN "
-                                 "but no CAP_SYSLOG (deprecated).\n",
-                                 current->comm, task_pid_nr(current));
-                        return 0;
-                }
-                return -EPERM;
-        }
-        return 0;
-}
 #if defined(CONFIG_PRINTK_TIME)
 static bool printk_time = 1;
 #else
@@ -1249,7 +1258,7 @@ out:
 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 {
-        return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
+        return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aed981a3f69c..335a7ae697f5 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -665,20 +665,22 @@ static int ptrace_peek_siginfo(struct task_struct *child,
                if (unlikely(is_compat_task())) {
                        compat_siginfo_t __user *uinfo = compat_ptr(data);
-                        ret = copy_siginfo_to_user32(uinfo, &info);
+                        if (copy_siginfo_to_user32(uinfo, &info) ||
-                        ret |= __put_user(info.si_code, &uinfo->si_code);
+                            __put_user(info.si_code, &uinfo->si_code)) {
+                                ret = -EFAULT;
+                                break;
+                        }
                } else
 #endif
                {
                        siginfo_t __user *uinfo = (siginfo_t __user *) data;
-                        ret = copy_siginfo_to_user(uinfo, &info);
+                        if (copy_siginfo_to_user(uinfo, &info) ||
-                        ret |= __put_user(info.si_code, &uinfo->si_code);
+                            __put_user(info.si_code, &uinfo->si_code)) {
-                }
+                                ret = -EFAULT;
+                                break;
-                if (ret) {
+                        }
-                        ret = -EFAULT;
-                        break;
                }
                data += sizeof(siginfo_t);
diff --git a/kernel/range.c b/kernel/range.c
index 071b0ab455cb..322ea8e93e4b 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sort.h>
+#include <linux/string.h>
 #include <linux/range.h>
 int add_range(struct range *range, int az, int nr_range, u64 start, u64 end)
@@ -32,9 +32,8 @@ int add_range_with_merge(struct range *range, int az, int nr_range,
        if (start >= end)
                return nr_range;
-        /* Try to merge it with old one: */
+        /* get new start/end: */
        for (i = 0; i < nr_range; i++) {
-                u64 final_start, final_end;
                u64 common_start, common_end;
                if (!range[i].end)
@@ -45,12 +44,16 @@ int add_range_with_merge(struct range *range, int az, int nr_range,
                if (common_start > common_end)
                        continue;
-                final_start = min(range[i].start, start);
+                /* new start/end, will add it back at last */
-                final_end = max(range[i].end, end);
+                start = min(range[i].start, start);
+                end = max(range[i].end, end);
-                range[i].start = final_start;
+                memmove(&range[i], &range[i + 1],
-                range[i].end =  final_end;
+                        (nr_range - (i + 1)) * sizeof(range[i]));
-                return nr_range;
+                range[nr_range - 1].start = 0;
+                range[nr_range - 1].end   = 0;
+                nr_range--;
+                i--;
        }
        /* Need to add it: */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 16ea67925015..35380019f0fc 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq(&rnp->lock);
 #ifdef CONFIG_PROVE_RCU_DELAY
-                if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
+                if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
                    system_state == SYSTEM_RUNNING)
-                        schedule_timeout_uninterruptible(2);
+                        udelay(200);
 #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
                cond_resched();
        }
@@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
        }
 }
+static void rsp_wakeup(struct irq_work *work)
+{
+        struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
+        /* Wake up rcu_gp_kthread() to start the grace period. */
+        wake_up(&rsp->gp_wq);
+}
 /*
 * Start a new RCU grace period if warranted, re-initializing the hierarchy
 * in preparation for detecting the next grace period.  The caller must hold
@@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
        }
        rsp->gp_flags = RCU_GP_FLAG_INIT;
-        /* Wake up rcu_gp_kthread() to start the grace period. */
+        /*
-        wake_up(&rsp->gp_wq);
+         * We can't do wakeups while holding the rnp->lock, as that
+         * could cause possible deadlocks with the rq->lock. Deter
+         * the wakeup to interrupt context.
+         */
+        irq_work_queue(&rsp->wakeup_work);
 }
 /*
@@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
        rsp->rda = rda;
        init_waitqueue_head(&rsp->gp_wq);
+        init_irq_work(&rsp->wakeup_work, rsp_wakeup);
        rnp = rsp->level[rcu_num_lvls - 1];
        for_each_possible_cpu(i) {
                while (i > rnp->grphi)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index da77a8f57ff9..4df503470e42 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -27,6 +27,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/irq_work.h>
 /*
 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -442,6 +443,7 @@ struct rcu_state {
        char *name;                             /* Name of structure. */
        char abbr;                              /* Abbreviated name. */
        struct list_head flavors;               /* List of RCU flavors. */
+        struct irq_work wakeup_work;            /* Postponed wakeups */
 };
 /* Values for rcu_state structure's gp_flags field. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 170814dc418f..3db5a375d8dd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -88,7 +88,7 @@ static void __init rcu_bootup_announce_oddness(void)
 #ifdef CONFIG_RCU_NOCB_CPU
 #ifndef CONFIG_RCU_NOCB_CPU_NONE
        if (!have_rcu_nocb_mask) {
-                alloc_bootmem_cpumask_var(&rcu_nocb_mask);
+                zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
                have_rcu_nocb_mask = true;
        }
 #ifdef CONFIG_RCU_NOCB_CPU_ZERO
@@ -1667,7 +1667,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
        rdtp->last_accelerate = jiffies;
        /* Request timer delay depending on laziness, and round. */
-        if (rdtp->all_lazy) {
+        if (!rdtp->all_lazy) {
                *dj = round_up(rcu_idle_gp_delay + jiffies,
                               rcu_idle_gp_delay) - jiffies;
        } else {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 58453b8272fd..e8b335016c52 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -633,7 +633,19 @@ void wake_up_nohz_cpu(int cpu)
 static inline bool got_nohz_idle_kick(void)
 {
        int cpu = smp_processor_id();
-        return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
+        if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
+                return false;
+        if (idle_cpu(cpu) && !need_resched())
+                return true;
+        /*
+         * We can't run Idle Load Balance on this CPU for this time so we
+         * cancel it and clear NOHZ_BALANCE_KICK
+         */
+        clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
+        return false;
 }
 #else /* CONFIG_NO_HZ_COMMON */
@@ -1393,8 +1405,9 @@ static void sched_ttwu_pending(void)
 void scheduler_ipi(void)
 {
-        if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
+        if (llist_empty(&this_rq()->wake_list)
-            && !tick_nohz_full_cpu(smp_processor_id()))
+                        && !tick_nohz_full_cpu(smp_processor_id())
+                        && !got_nohz_idle_kick())
                return;
        /*
@@ -1417,7 +1430,7 @@ void scheduler_ipi(void)
        /*
         * Check if someone kicked us for doing the nohz idle load balance.
         */
-        if (unlikely(got_nohz_idle_kick() && !need_resched())) {
+        if (unlikely(got_nohz_idle_kick())) {
                this_rq()->idle_balance = 1;
                raise_softirq_irqoff(SCHED_SOFTIRQ);
        }
@@ -4745,7 +4758,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         */
        idle->sched_class = &idle_sched_class;
        ftrace_graph_init_idle_task(idle, cpu);
-        vtime_init_idle(idle);
+        vtime_init_idle(idle, cpu);
 #if defined(CONFIG_SMP)
        sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index cc2dc3eea8a3..b5ccba22603b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -747,17 +747,17 @@ void arch_vtime_task_switch(struct task_struct *prev)
        write_seqlock(&current->vtime_seqlock);
        current->vtime_snap_whence = VTIME_SYS;
-        current->vtime_snap = sched_clock();
+        current->vtime_snap = sched_clock_cpu(smp_processor_id());
        write_sequnlock(&current->vtime_seqlock);
 }
-void vtime_init_idle(struct task_struct *t)
+void vtime_init_idle(struct task_struct *t, int cpu)
 {
        unsigned long flags;
        write_seqlock_irqsave(&t->vtime_seqlock, flags);
        t->vtime_snap_whence = VTIME_SYS;
-        t->vtime_snap = sched_clock();
+        t->vtime_snap = sched_clock_cpu(cpu);
        write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
 }
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 2ef90a51ec5e..71bac979d5ee 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -162,6 +162,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 */
 /**
+ * cputimer_running - return true if cputimer is running
+ *
+ * @tsk:        Pointer to target task.
+ */
+static inline bool cputimer_running(struct task_struct *tsk)
+{
+        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+        if (!cputimer->running)
+                return false;
+        /*
+         * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
+         * in __exit_signal(), we won't account to the signal struct further
+         * cputime consumed by that task, even though the task can still be
+         * ticking after __exit_signal().
+         *
+         * In order to keep a consistent behaviour between thread group cputime
+         * and thread group cputimer accounting, lets also ignore the cputime
+         * elapsing after __exit_signal() in any thread group timer running.
+         *
+         * This makes sure that POSIX CPU clocks and timers are synchronized, so
+         * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
+         * clock delta is behind the expiring timer value.
+         */
+        if (unlikely(!tsk->sighand))
+                return false;
+        return true;
+}
+/**
 * account_group_user_time - Maintain utime for a thread group.
 *
 * @tsk:        Pointer to task structure.
@@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 {
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-        if (!cputimer->running)
+        if (!cputimer_running(tsk))
                return;
        raw_spin_lock(&cputimer->lock);
@@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 {
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-        if (!cputimer->running)
+        if (!cputimer_running(tsk))
                return;
        raw_spin_lock(&cputimer->lock);
@@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
 {
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-        if (!cputimer->running)
+        if (!cputimer_running(tsk))
                return;
        raw_spin_lock(&cputimer->lock);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b5197dcb0dad..3d6833f125d3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -195,8 +195,12 @@ void local_bh_enable_ip(unsigned long ip)
 EXPORT_SYMBOL(local_bh_enable_ip);
 /*
- * We restart softirq processing for at most 2 ms,
+ * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
- * and if need_resched() is not set.
+ * but break the loop if need_resched() is set or after 2 ms.
+ * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
+ * certain cases, such as stop_machine(), jiffies may cease to
+ * increment and so we need the MAX_SOFTIRQ_RESTART limit as
+ * well to make sure we eventually return from this method.
 *
 * These limits have been established via experimentation.
 * The two things to balance is latency against fairness -
@@ -204,6 +208,7 @@ EXPORT_SYMBOL(local_bh_enable_ip);
 * should not be able to lock up the box.
 */
 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
+#define MAX_SOFTIRQ_RESTART 10
 asmlinkage void __do_softirq(void)
 {
@@ -212,6 +217,7 @@ asmlinkage void __do_softirq(void)
        unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
        int cpu;
        unsigned long old_flags = current->flags;
+        int max_restart = MAX_SOFTIRQ_RESTART;
        /*
         * Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -265,7 +271,8 @@ restart:
        pending = local_softirq_pending();
        if (pending) {
-                if (time_before(jiffies, end) && !need_resched())
+                if (time_before(jiffies, end) && !need_resched() &&
+                    --max_restart)
                        goto restart;
                wakeup_softirqd();
diff --git a/kernel/sys.c b/kernel/sys.c
index b95d3c72ba21..2bbd9a73b54c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_reboot_notifier);
+/* Add backwards compatibility for stable trees. */
+#ifndef PF_NO_SETAFFINITY
+#define PF_NO_SETAFFINITY               PF_THREAD_BOUND
+#endif
+static void migrate_to_reboot_cpu(void)
+{
+        /* The boot cpu is always logical cpu 0 */
+        int cpu = 0;
+        cpu_hotplug_disable();
+        /* Make certain the cpu I'm about to reboot on is online */
+        if (!cpu_online(cpu))
+                cpu = cpumask_first(cpu_online_mask);
+        /* Prevent races with other tasks migrating this task */
+        current->flags |= PF_NO_SETAFFINITY;
+        /* Make certain I only run on the appropriate processor */
+        set_cpus_allowed_ptr(current, cpumask_of(cpu));
+}
 /**
 *      kernel_restart - reboot the system
 *      @cmd: pointer to buffer containing command to execute for restart
@@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 void kernel_restart(char *cmd)
 {
        kernel_restart_prepare(cmd);
-        disable_nonboot_cpus();
+        migrate_to_reboot_cpu();
        syscore_shutdown();
        if (!cmd)
                printk(KERN_EMERG "Restarting system.\n");
@@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
        kernel_shutdown_prepare(SYSTEM_HALT);
-        disable_nonboot_cpus();
+        migrate_to_reboot_cpu();
        syscore_shutdown();
        printk(KERN_EMERG "System halted.\n");
        kmsg_dump(KMSG_DUMP_HALT);
@@ -419,7 +442,7 @@ void kernel_power_off(void)
        kernel_shutdown_prepare(SYSTEM_POWER_OFF);
        if (pm_power_off_prepare)
                pm_power_off_prepare();
-        disable_nonboot_cpus();
+        migrate_to_reboot_cpu();
        syscore_shutdown();
        printk(KERN_EMERG "Power down.\n");
        kmsg_dump(KMSG_DUMP_POWEROFF);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 12ff13a838c6..8f5b3b98577b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -874,7 +874,6 @@ static void hardpps_update_phase(long error)
 void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 {
        struct pps_normtime pts_norm, freq_norm;
-        unsigned long flags;
        pts_norm = pps_normalize_ts(*phase_ts);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 4430fa695b48..6d3f91631de6 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -583,6 +583,12 @@ again:
                }
        }
+        /*
+         * Remove the current cpu from the pending mask. The event is
+         * delivered immediately in tick_do_broadcast() !
+         */
+        cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
        /* Take care of enforced broadcast requests */
        cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
        cpumask_clear(tick_broadcast_force_mask);
@@ -654,8 +660,8 @@ void tick_broadcast_oneshot_control(unsigned long reason)
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
-                WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
+                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                        clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
                        /*
                         * We only reprogram the broadcast timer if we
@@ -672,8 +678,6 @@ void tick_broadcast_oneshot_control(unsigned long reason)
        } else {
                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
-                        if (dev->next_event.tv64 == KTIME_MAX)
-                                goto out;
                        /*
                         * The cpu which was handling the broadcast
                         * timer marked this cpu in the broadcast
@@ -688,6 +692,11 @@ void tick_broadcast_oneshot_control(unsigned long reason)
                                goto out;
                        /*
+                         * Bail out if there is no next event.
+                         */
+                        if (dev->next_event.tv64 == KTIME_MAX)
+                                goto out;
+                        /*
                         * If the pending bit is not set, then we are
                         * either the CPU handling the broadcast
                         * interrupt or we got woken by something else.
@@ -771,10 +780,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                bc->event_handler = tick_handle_oneshot_broadcast;
-                /* Take the do_timer update */
-                if (!tick_nohz_full_cpu(cpu))
-                        tick_do_timer_cpu = cpu;
                /*
                 * We must be careful here. There might be other CPUs
                 * waiting for periodic broadcast. We need to set the
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f4208138fbf4..0cf1c1453181 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -306,7 +306,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
                 * we can't safely shutdown that CPU.
                 */
                if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
-                        return -EINVAL;
+                        return NOTIFY_BAD;
                break;
        }
        return NOTIFY_OK;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 846d0a1f235e..48b9fffabdc2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -991,6 +991,14 @@ static int timekeeping_suspend(void)
        read_persistent_clock(&timekeeping_suspend_time);
+        /*
+         * On some systems the persistent_clock can not be detected at
+         * timekeeping_init by its return value, so if we see a valid
+         * value returned, update the persistent_clock_exists flag.
+         */
+        if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
+                persistent_clock_exist = true;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&timekeeper_seq);
        timekeeping_forward_now(tk);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b549b0f5b977..6c508ff33c62 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -120,22 +120,22 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 /*
 * Traverse the ftrace_global_list, invoking all entries.  The reason that we
- * can use rcu_dereference_raw() is that elements removed from this list
+ * can use rcu_dereference_raw_notrace() is that elements removed from this list
 * are simply leaked, so there is no need to interact with a grace-period
- * mechanism.  The rcu_dereference_raw() calls are needed to handle
+ * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
 * concurrent insertions into the ftrace_global_list.
 *
 * Silly Alpha and silly pointer-speculation compiler optimizations!
 */
 #define do_for_each_ftrace_op(op, list)                 \
-        op = rcu_dereference_raw(list);                 \
+        op = rcu_dereference_raw_notrace(list);                 \
        do
 /*
 * Optimized for just a single item in the list (as that is the normal case).
 */
 #define while_for_each_ftrace_op(op)                            \
-        while (likely(op = rcu_dereference_raw((op)->next)) &&  \
+        while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&  \
               unlikely((op) != &ftrace_list_end))
 static inline void ftrace_ops_init(struct ftrace_ops *ops)
@@ -779,7 +779,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
        if (hlist_empty(hhd))
                return NULL;
-        hlist_for_each_entry_rcu(rec, hhd, node) {
+        hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
                if (rec->ip == ip)
                        return rec;
        }
@@ -1165,7 +1165,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
        hhd = &hash->buckets[key];
-        hlist_for_each_entry_rcu(entry, hhd, hlist) {
+        hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) {
                if (entry->ip == ip)
                        return entry;
        }
@@ -1422,8 +1422,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
        struct ftrace_hash *notrace_hash;
        int ret;
-        filter_hash = rcu_dereference_raw(ops->filter_hash);
+        filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
-        notrace_hash = rcu_dereference_raw(ops->notrace_hash);
+        notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
        if ((ftrace_hash_empty(filter_hash) ||
             ftrace_lookup_ip(filter_hash, ip)) &&
@@ -2920,7 +2920,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
         * on the hash. rcu_read_lock is too dangerous here.
         */
        preempt_disable_notrace();
-        hlist_for_each_entry_rcu(entry, hhd, node) {
+        hlist_for_each_entry_rcu_notrace(entry, hhd, node) {
                if (entry->ip == ip)
                        entry->ops->func(ip, parent_ip, &entry->data);
        }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b59aea2c48c2..e444ff88f0a4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
        if (cpu == RING_BUFFER_ALL_CPUS)
                work = &buffer->irq_work;
        else {
+                if (!cpumask_test_cpu(cpu, buffer->cpumask))
+                        return -EINVAL;
                cpu_buffer = buffer->buffers[cpu];
                work = &cpu_buffer->irq_work;
        }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ae6fa2d1cdf7..e71a8be4a6ee 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -652,8 +652,6 @@ static struct {
        ARCH_TRACE_CLOCKS
 };
-int trace_clock_id;
 /*
 * trace_parser_get_init - gets the buffer for trace parser
 */
@@ -843,7 +841,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
        memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
        max_data->pid = tsk->pid;
-        max_data->uid = task_uid(tsk);
+        /*
+         * If tsk == current, then use current_uid(), as that does not use
+         * RCU. The irq tracer can be called out of RCU scope.
+         */
+        if (tsk == current)
+                max_data->uid = current_uid();
+        else
+                max_data->uid = task_uid(tsk);
        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
        max_data->policy = tsk->policy;
        max_data->rt_priority = tsk->rt_priority;
@@ -2818,7 +2824,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
-        if (trace_clocks[trace_clock_id].in_ns)
+        if (trace_clocks[tr->clock_id].in_ns)
                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
        /* stop the trace while dumping if we are not opening "snapshot" */
@@ -3817,7 +3823,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
                iter->iter_flags |= TRACE_FILE_LAT_FMT;
        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
-        if (trace_clocks[trace_clock_id].in_ns)
+        if (trace_clocks[tr->clock_id].in_ns)
                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
        iter->cpu_file = tc->cpu;
@@ -5087,7 +5093,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "bytes: %ld\n", cnt);
-        if (trace_clocks[trace_clock_id].in_ns) {
+        if (trace_clocks[tr->clock_id].in_ns) {
                /* local or global for trace_clock */
                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
                usec_rem = do_div(t, USEC_PER_SEC);
@@ -6216,10 +6222,15 @@ __init static int tracer_alloc_buffers(void)
        trace_init_cmdlines();
-        register_tracer(&nop_trace);
+        /*
+         * register_tracer() might reference current_trace, so it
+         * needs to be set before we register anything. This is
+         * just a bootstrap of current_trace anyway.
+         */
        global_trace.current_trace = &nop_trace;
+        register_tracer(&nop_trace);
        /* All seems OK, enable tracing */
        tracing_disabled = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 711ca7d3e7f1..20572ed88c5c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -700,8 +700,6 @@ enum print_line_t print_trace_line(struct trace_iterator *iter);
 extern unsigned long trace_flags;
-extern int trace_clock_id;
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7a0cf68027cc..27963e2bf4bf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2072,8 +2072,10 @@ event_enable_func(struct ftrace_hash *hash,
 out_reg:
        /* Don't let event modules unload while probe registered */
        ret = try_module_get(file->event_call->mod);
-        if (!ret)
+        if (!ret) {
+                ret = -EBUSY;
                goto out_free;
+        }
        ret = __ftrace_event_enable_disable(file, 1, 1);
        if (ret < 0)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 55e2cf66967b..2901e3b88590 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1159,7 +1159,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
        /* stop the tracing. */
        tracing_stop();
        /* check the trace buffer */
-        ret = trace_test_buffer(tr, &count);
+        ret = trace_test_buffer(&tr->trace_buffer, &count);
        trace->reset(tr);
        tracing_start();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ae602809efb..ee8e29a2320c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -296,7 +296,7 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
 struct workqueue_struct *system_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_wq);
+EXPORT_SYMBOL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
 struct workqueue_struct *system_long_wq __read_mostly;
@@ -1411,7 +1411,7 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
        local_irq_restore(flags);
        return ret;
 }
-EXPORT_SYMBOL_GPL(queue_work_on);
+EXPORT_SYMBOL(queue_work_on);
 void delayed_work_timer_fn(unsigned long __data)
 {
@@ -1485,7 +1485,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
        local_irq_restore(flags);
        return ret;
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+EXPORT_SYMBOL(queue_delayed_work_on);
 /**
 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
@@ -2059,6 +2059,7 @@ static bool manage_workers(struct worker *worker)
        if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
                spin_unlock_irq(&pool->lock);
                mutex_lock(&pool->manager_mutex);
+                spin_lock_irq(&pool->lock);
                ret = true;
        }
@@ -4904,7 +4905,8 @@ static void __init wq_numa_init(void)
        BUG_ON(!tbl);
        for_each_node(node)
-                BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node));
+                BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+                                node_online(node) ? node : NUMA_NO_NODE));
        for_each_possible_cpu(cpu) {
                node = cpu_to_node(cpu);