27 files changed, 855 insertions, 468 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3f1ca934a237..ceee0c54c6a4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -33,6 +33,7 @@
 #include <linux/init_task.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/magic.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/mount.h>
@@ -1604,7 +1605,8 @@ out_unlock:
        if (ret)
                return ERR_PTR(ret);
-        dentry = kernfs_mount(fs_type, flags, root->kf_root, &new_sb);
+        dentry = kernfs_mount(fs_type, flags, root->kf_root,
+                                CGROUP_SUPER_MAGIC, &new_sb);
        if (IS_ERR(dentry) || !new_sb)
                cgroup_put(&root->cgrp);
        return dentry;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 2956c8da1605..1adf62b39b96 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -534,7 +534,7 @@ return_normal:
                        kgdb_info[cpu].exception_state &=
                                ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
                        kgdb_info[cpu].enter_kgdb--;
-                        smp_mb__before_atomic_dec();
+                        smp_mb__before_atomic();
                        atomic_dec(&slaves_in_kgdb);
                        dbg_touch_watchdogs();
                        local_irq_restore(flags);
@@ -662,7 +662,7 @@ kgdb_restore:
        kgdb_info[cpu].exception_state &=
                ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
        kgdb_info[cpu].enter_kgdb--;
-        smp_mb__before_atomic_dec();
+        smp_mb__before_atomic();
        atomic_dec(&masters_in_kgdb);
        /* Free kgdb_active */
        atomic_set(&kgdb_active, -1);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..689237a0c5e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
+#include <linux/module.h>
 #include "internal.h"
@@ -1443,6 +1444,11 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
+struct remove_event {
+        struct perf_event *event;
+        bool detach_group;
+};
 /*
 * Cross CPU call to remove a performance event
 *
@@ -1451,12 +1457,15 @@ group_sched_out(struct perf_event *group_event,
 */
 static int __perf_remove_from_context(void *info)
 {
-        struct perf_event *event = info;
+        struct remove_event *re = info;
+        struct perf_event *event = re->event;
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
+        if (re->detach_group)
+                perf_group_detach(event);
        list_del_event(event, ctx);
        if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
                ctx->is_active = 0;
@@ -1481,10 +1490,14 @@ static int __perf_remove_from_context(void *info)
 * When called from perf_event_exit_task, it's OK because the
 * context has been detached from its task.
 */
-static void perf_remove_from_context(struct perf_event *event)
+static void perf_remove_from_context(struct perf_event *event, bool detach_group)
 {
        struct perf_event_context *ctx = event->ctx;
        struct task_struct *task = ctx->task;
+        struct remove_event re = {
+                .event = event,
+                .detach_group = detach_group,
+        };
        lockdep_assert_held(&ctx->mutex);
@@ -1493,12 +1506,12 @@ static void perf_remove_from_context(struct perf_event *event)
                 * Per cpu events are removed via an smp call and
                 * the removal is always successful.
                 */
-                cpu_function_call(event->cpu, __perf_remove_from_context, event);
+                cpu_function_call(event->cpu, __perf_remove_from_context, &re);
                return;
        }
 retry:
-        if (!task_function_call(task, __perf_remove_from_context, event))
+        if (!task_function_call(task, __perf_remove_from_context, &re))
                return;
        raw_spin_lock_irq(&ctx->lock);
@@ -1515,6 +1528,8 @@ retry:
         * Since the task isn't running, its safe to remove the event, us
         * holding the ctx->lock ensures the task won't get scheduled in.
         */
+        if (detach_group)
+                perf_group_detach(event);
        list_del_event(event, ctx);
        raw_spin_unlock_irq(&ctx->lock);
 }
@@ -1663,6 +1678,8 @@ event_sched_in(struct perf_event *event,
        u64 tstamp = perf_event_time(event);
        int ret = 0;
+        lockdep_assert_held(&ctx->lock);
        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;
@@ -3178,7 +3195,8 @@ static void free_event_rcu(struct rcu_head *head)
 }
 static void ring_buffer_put(struct ring_buffer *rb);
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
+static void ring_buffer_attach(struct perf_event *event,
+                               struct ring_buffer *rb);
 static void unaccount_event_cpu(struct perf_event *event, int cpu)
 {
@@ -3229,17 +3247,19 @@ static void __free_event(struct perf_event *event)
        if (event->ctx)
                put_ctx(event->ctx);
+        if (event->pmu)
+                module_put(event->pmu->module);
        call_rcu(&event->rcu_head, free_event_rcu);
 }
-static void free_event(struct perf_event *event)
+static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
        unaccount_event(event);
        if (event->rb) {
-                struct ring_buffer *rb;
                /*
                 * Can happen when we close an event with re-directed output.
                 *
@@ -3247,57 +3267,38 @@ static void free_event(struct perf_event *event)
                 * over us; possibly making our ring_buffer_put() the last.
                 */
                mutex_lock(&event->mmap_mutex);
-                rb = event->rb;
+                ring_buffer_attach(event, NULL);
-                if (rb) {
-                        rcu_assign_pointer(event->rb, NULL);
-                        ring_buffer_detach(event, rb);
-                        ring_buffer_put(rb); /* could be last */
-                }
                mutex_unlock(&event->mmap_mutex);
        }
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
        __free_event(event);
 }
-int perf_event_release_kernel(struct perf_event *event)
+/*
+ * Used to free events which have a known refcount of 1, such as in error paths
+ * where the event isn't exposed yet and inherited events.
+ */
+static void free_event(struct perf_event *event)
 {
-        struct perf_event_context *ctx = event->ctx;
+        if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
+                                "unexpected event refcount: %ld; ptr=%p\n",
-        WARN_ON_ONCE(ctx->parent_ctx);
+                                atomic_long_read(&event->refcount), event)) {
-        /*
+                /* leak to avoid use-after-free */
-         * There are two ways this annotation is useful:
+                return;
-         *
+        }
-         *  1) there is a lock recursion from perf_event_exit_task
-         *     see the comment there.
-         *
-         *  2) there is a lock-inversion with mmap_sem through
-         *     perf_event_read_group(), which takes faults while
-         *     holding ctx->mutex, however this is called after
-         *     the last filedesc died, so there is no possibility
-         *     to trigger the AB-BA case.
-         */
-        mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
-        raw_spin_lock_irq(&ctx->lock);
-        perf_group_detach(event);
-        raw_spin_unlock_irq(&ctx->lock);
-        perf_remove_from_context(event);
-        mutex_unlock(&ctx->mutex);
-        free_event(event);
-        return 0;
+        _free_event(event);
 }
-EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 /*
 * Called when the last reference to the file is gone.
 */
 static void put_event(struct perf_event *event)
 {
+        struct perf_event_context *ctx = event->ctx;
        struct task_struct *owner;
        if (!atomic_long_dec_and_test(&event->refcount))
@@ -3336,9 +3337,33 @@ static void put_event(struct perf_event *event)
                put_task_struct(owner);
        }
-        perf_event_release_kernel(event);
+        WARN_ON_ONCE(ctx->parent_ctx);
+        /*
+         * There are two ways this annotation is useful:
+         *
+         *  1) there is a lock recursion from perf_event_exit_task
+         *     see the comment there.
+         *
+         *  2) there is a lock-inversion with mmap_sem through
+         *     perf_event_read_group(), which takes faults while
+         *     holding ctx->mutex, however this is called after
+         *     the last filedesc died, so there is no possibility
+         *     to trigger the AB-BA case.
+         */
+        mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
+        perf_remove_from_context(event, true);
+        mutex_unlock(&ctx->mutex);
+        _free_event(event);
 }
+int perf_event_release_kernel(struct perf_event *event)
+{
+        put_event(event);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 static int perf_release(struct inode *inode, struct file *file)
 {
        put_event(file->private_data);
@@ -3839,28 +3864,47 @@ unlock:
 static void ring_buffer_attach(struct perf_event *event,
                               struct ring_buffer *rb)
 {
+        struct ring_buffer *old_rb = NULL;
        unsigned long flags;
-        if (!list_empty(&event->rb_entry))
+        if (event->rb) {
-                return;
+                /*
+                 * Should be impossible, we set this when removing
+                 * event->rb_entry and wait/clear when adding event->rb_entry.
+                 */
+                WARN_ON_ONCE(event->rcu_pending);
-        spin_lock_irqsave(&rb->event_lock, flags);
+                old_rb = event->rb;
-        if (list_empty(&event->rb_entry))
+                event->rcu_batches = get_state_synchronize_rcu();
-                list_add(&event->rb_entry, &rb->event_list);
+                event->rcu_pending = 1;
-        spin_unlock_irqrestore(&rb->event_lock, flags);
-}
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
+                spin_lock_irqsave(&old_rb->event_lock, flags);
-{
+                list_del_rcu(&event->rb_entry);
-        unsigned long flags;
+                spin_unlock_irqrestore(&old_rb->event_lock, flags);
+        }
-        if (list_empty(&event->rb_entry))
+        if (event->rcu_pending && rb) {
-                return;
+                cond_synchronize_rcu(event->rcu_batches);
+                event->rcu_pending = 0;
+        }
+        if (rb) {
+                spin_lock_irqsave(&rb->event_lock, flags);
+                list_add_rcu(&event->rb_entry, &rb->event_list);
+                spin_unlock_irqrestore(&rb->event_lock, flags);
+        }
+        rcu_assign_pointer(event->rb, rb);
-        spin_lock_irqsave(&rb->event_lock, flags);
+        if (old_rb) {
-        list_del_init(&event->rb_entry);
+                ring_buffer_put(old_rb);
-        wake_up_all(&event->waitq);
+                /*
-        spin_unlock_irqrestore(&rb->event_lock, flags);
+                 * Since we detached before setting the new rb, so that we
+                 * could attach the new rb, we could have missed a wakeup.
+                 * Provide it now.
+                 */
+                wake_up_all(&event->waitq);
+        }
 }
 static void ring_buffer_wakeup(struct perf_event *event)
@@ -3929,7 +3973,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 {
        struct perf_event *event = vma->vm_file->private_data;
-        struct ring_buffer *rb = event->rb;
+        struct ring_buffer *rb = ring_buffer_get(event);
        struct user_struct *mmap_user = rb->mmap_user;
        int mmap_locked = rb->mmap_locked;
        unsigned long size = perf_data_size(rb);
@@ -3937,18 +3981,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        atomic_dec(&rb->mmap_count);
        if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
-                return;
+                goto out_put;
-        /* Detach current event from the buffer. */
+        ring_buffer_attach(event, NULL);
-        rcu_assign_pointer(event->rb, NULL);
-        ring_buffer_detach(event, rb);
        mutex_unlock(&event->mmap_mutex);
        /* If there's still other mmap()s of this buffer, we're done. */
-        if (atomic_read(&rb->mmap_count)) {
+        if (atomic_read(&rb->mmap_count))
-                ring_buffer_put(rb); /* can't be last */
+                goto out_put;
-                return;
-        }
        /*
         * No other mmap()s, detach from all other events that might redirect
@@ -3978,11 +4018,9 @@ again:
                 * still restart the iteration to make sure we're not now
                 * iterating the wrong list.
                 */
-                if (event->rb == rb) {
+                if (event->rb == rb)
-                        rcu_assign_pointer(event->rb, NULL);
+                        ring_buffer_attach(event, NULL);
-                        ring_buffer_detach(event, rb);
-                        ring_buffer_put(rb); /* can't be last, we still have one */
-                }
                mutex_unlock(&event->mmap_mutex);
                put_event(event);
@@ -4007,6 +4045,7 @@ again:
        vma->vm_mm->pinned_vm -= mmap_locked;
        free_uid(mmap_user);
+out_put:
        ring_buffer_put(rb); /* could be last */
 }
@@ -4124,7 +4163,6 @@ again:
        vma->vm_mm->pinned_vm += extra;
        ring_buffer_attach(event, rb);
-        rcu_assign_pointer(event->rb, rb);
        perf_event_init_userpage(event);
        perf_event_update_userpage(event);
@@ -5408,6 +5446,9 @@ struct swevent_htable {
        /* Recursion avoidance in each contexts */
        int                             recursion[PERF_NR_CONTEXTS];
+        /* Keeps track of cpu being initialized/exited */
+        bool                            online;
 };
 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5654,8 +5695,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        hwc->state = !(flags & PERF_EF_START);
        head = find_swevent_head(swhash, event);
-        if (WARN_ON_ONCE(!head))
+        if (!head) {
+                /*
+                 * We can race with cpu hotplug code. Do not
+                 * WARN if the cpu just got unplugged.
+                 */
+                WARN_ON_ONCE(swhash->online);
                return -EINVAL;
+        }
        hlist_add_head_rcu(&event->hlist_entry, head);
@@ -6551,6 +6598,7 @@ free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
 }
+EXPORT_SYMBOL_GPL(perf_pmu_register);
 void perf_pmu_unregister(struct pmu *pmu)
 {
@@ -6572,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
        put_device(pmu->dev);
        free_pmu_context(pmu);
 }
+EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 struct pmu *perf_init_event(struct perf_event *event)
 {
@@ -6585,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
        pmu = idr_find(&pmu_idr, event->attr.type);
        rcu_read_unlock();
        if (pmu) {
+                if (!try_module_get(pmu->module)) {
+                        pmu = ERR_PTR(-ENODEV);
+                        goto unlock;
+                }
                event->pmu = pmu;
                ret = pmu->event_init(event);
                if (ret)
@@ -6593,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
        }
        list_for_each_entry_rcu(pmu, &pmus, entry) {
+                if (!try_module_get(pmu->module)) {
+                        pmu = ERR_PTR(-ENODEV);
+                        goto unlock;
+                }
                event->pmu = pmu;
                ret = pmu->event_init(event);
                if (!ret)
@@ -6771,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 err_pmu:
        if (event->destroy)
                event->destroy(event);
+        module_put(pmu->module);
 err_ns:
        if (event->ns)
                put_pid_ns(event->ns);
@@ -6914,7 +6972,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-        struct ring_buffer *rb = NULL, *old_rb = NULL;
+        struct ring_buffer *rb = NULL;
        int ret = -EINVAL;
        if (!output_event)
@@ -6942,8 +7000,6 @@ set:
        if (atomic_read(&event->mmap_count))
                goto unlock;
-        old_rb = event->rb;
        if (output_event) {
                /* get the rb we want to redirect to */
                rb = ring_buffer_get(output_event);
@@ -6951,23 +7007,7 @@ set:
                        goto unlock;
        }
-        if (old_rb)
+        ring_buffer_attach(event, rb);
-                ring_buffer_detach(event, old_rb);
-        if (rb)
-                ring_buffer_attach(event, rb);
-        rcu_assign_pointer(event->rb, rb);
-        if (old_rb) {
-                ring_buffer_put(old_rb);
-                /*
-                 * Since we detached before setting the new rb, so that we
-                 * could attach the new rb, we could have missed a wakeup.
-                 * Provide it now.
-                 */
-                wake_up_all(&event->waitq);
-        }
        ret = 0;
 unlock:
@@ -7018,6 +7058,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
                        return -EINVAL;
+        } else {
+                if (attr.sample_period & (1ULL << 63))
+                        return -EINVAL;
        }
        /*
@@ -7055,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
+        if (task && group_leader &&
+            group_leader->attr.inherit != attr.inherit) {
+                err = -EINVAL;
+                goto err_task;
+        }
        get_online_cpus();
        event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
                                 NULL, NULL);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
-                goto err_task;
+                goto err_cpus;
        }
        if (flags & PERF_FLAG_PID_CGROUP) {
                err = perf_cgroup_connect(pid, event, &attr, group_leader);
                if (err) {
                        __free_event(event);
-                        goto err_task;
+                        goto err_cpus;
                }
        }
@@ -7165,7 +7214,7 @@ SYSCALL_DEFINE5(perf_event_open,
                struct perf_event_context *gctx = group_leader->ctx;
                mutex_lock(&gctx->mutex);
-                perf_remove_from_context(group_leader);
+                perf_remove_from_context(group_leader, false);
                /*
                 * Removing from the context ends up with disabled
@@ -7175,7 +7224,7 @@ SYSCALL_DEFINE5(perf_event_open,
                perf_event__state_init(group_leader);
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                        perf_remove_from_context(sibling);
+                        perf_remove_from_context(sibling, false);
                        perf_event__state_init(sibling);
                        put_ctx(gctx);
                }
@@ -7230,8 +7279,9 @@ err_context:
        put_ctx(ctx);
 err_alloc:
        free_event(event);
-err_task:
+err_cpus:
        put_online_cpus();
+err_task:
        if (task)
                put_task_struct(task);
 err_group_fd:
@@ -7305,7 +7355,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        mutex_lock(&src_ctx->mutex);
        list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                 event_entry) {
-                perf_remove_from_context(event);
+                perf_remove_from_context(event, false);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
                list_add(&event->migrate_entry, &events);
@@ -7367,13 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
                         struct perf_event_context *child_ctx,
                         struct task_struct *child)
 {
-        if (child_event->parent) {
+        perf_remove_from_context(child_event, true);
-                raw_spin_lock_irq(&child_ctx->lock);
-                perf_group_detach(child_event);
-                raw_spin_unlock_irq(&child_ctx->lock);
-        }
-        perf_remove_from_context(child_event);
        /*
         * It can happen that the parent exits first, and has events
@@ -7388,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-        struct perf_event *child_event, *tmp;
+        struct perf_event *child_event;
        struct perf_event_context *child_ctx;
        unsigned long flags;
@@ -7442,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        mutex_lock(&child_ctx->mutex);
-again:
+        list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
-        list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
-                                 group_entry)
                __perf_event_exit_task(child_event, child_ctx, child);
-        list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
-                                 group_entry)
-                __perf_event_exit_task(child_event, child_ctx, child);
-        /*
-         * If the last event was a group event, it will have appended all
-         * its siblings to the list, but we obtained 'tmp' before that which
-         * will still point to the list head terminating the iteration.
-         */
-        if (!list_empty(&child_ctx->pinned_groups) ||
-            !list_empty(&child_ctx->flexible_groups))
-                goto again;
        mutex_unlock(&child_ctx->mutex);
        put_ctx(child_ctx);
@@ -7724,6 +7753,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
         * swapped under us.
         */
        parent_ctx = perf_pin_task_context(parent, ctxn);
+        if (!parent_ctx)
+                return 0;
        /*
         * No need to check if parent_ctx != NULL here; since we saw
@@ -7835,6 +7866,7 @@ static void perf_event_init_cpu(int cpu)
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
        mutex_lock(&swhash->hlist_mutex);
+        swhash->online = true;
        if (swhash->hlist_refcount > 0) {
                struct swevent_hlist *hlist;
@@ -7857,14 +7889,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
 static void __perf_event_exit_context(void *__info)
 {
+        struct remove_event re = { .detach_group = false };
        struct perf_event_context *ctx = __info;
-        struct perf_event *event;
        perf_pmu_rotate_stop(ctx->pmu);
        rcu_read_lock();
-        list_for_each_entry_rcu(event, &ctx->event_list, event_entry)
+        list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-                __perf_remove_from_context(event);
+                __perf_remove_from_context(&re);
        rcu_read_unlock();
 }
@@ -7892,6 +7924,7 @@ static void perf_event_exit_cpu(int cpu)
        perf_event_exit_cpu_context(cpu);
        mutex_lock(&swhash->hlist_mutex);
+        swhash->online = false;
        swevent_hlist_release(swhash);
        mutex_unlock(&swhash->hlist_mutex);
 }
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b66369d..d1edc5e6fd03 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
 /* Have a copy of original instruction */
 #define UPROBE_COPY_INSN        0
-/* Can skip singlestep */
-#define UPROBE_SKIP_SSTEP       1
 struct uprobe {
        struct rb_node          rb_node;        /* node in the rb tree */
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
        uprobe->offset = offset;
        init_rwsem(&uprobe->register_rwsem);
        init_rwsem(&uprobe->consumer_rwsem);
-        /* For now assume that the instruction need not be single-stepped */
-        __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
        /* add to uprobes_tree, sorted on inode:offset */
        cur_uprobe = insert_uprobe(uprobe);
        /* a uprobe exists for this inode:offset combination */
        if (cur_uprobe) {
                kfree(uprobe);
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
        return true;
 }
-/*
- * Avoid singlestepping the original instruction if the original instruction
- * is a NOP or can be emulated.
- */
-static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
-{
-        if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
-                if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
-                        return true;
-                clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
-        }
-        return false;
-}
 static void mmf_recalc_uprobes(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
        handler_chain(uprobe, regs);
-        if (can_skip_sstep(uprobe, regs))
+        if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
                goto out;
        if (!pre_ssout(uprobe, regs, bp_vaddr))
                return;
-        /* can_skip_sstep() succeeded, or restart if can't singlestep */
+        /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
 out:
        put_uprobe(uprobe);
 }
@@ -1886,10 +1867,11 @@ out:
 static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
 {
        struct uprobe *uprobe;
+        int err = 0;
        uprobe = utask->active_uprobe;
        if (utask->state == UTASK_SSTEP_ACK)
-                arch_uprobe_post_xol(&uprobe->arch, regs);
+                err = arch_uprobe_post_xol(&uprobe->arch, regs);
        else if (utask->state == UTASK_SSTEP_TRAPPED)
                arch_uprobe_abort_xol(&uprobe->arch, regs);
        else
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
        spin_lock_irq(&current->sighand->siglock);
        recalc_sigpending(); /* see uprobe_deny_signal() */
        spin_unlock_irq(&current->sighand->siglock);
+        if (unlikely(err)) {
+                uprobe_warn(current, "execute the probed insn, sending SIGILL.");
+                force_sig_info(SIGILL, SEND_SIG_FORCED, current);
+        }
 }
 /*
diff --git a/kernel/futex.c b/kernel/futex.c
index 5f589279e462..89bc9d59ac65 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -267,7 +267,7 @@ static inline void futex_get_mm(union futex_key *key)
         * get_futex_key() implies a full barrier. This is relied upon
         * as full barrier (B), see the ordering comment above.
         */
-        smp_mb__after_atomic_inc();
+        smp_mb__after_atomic();
 }
 /*
@@ -280,7 +280,7 @@ static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
        /*
         * Full barrier (A), see the ordering comment above.
         */
-        smp_mb__after_atomic_inc();
+        smp_mb__after_atomic();
 #endif
 }
@@ -745,7 +745,8 @@ void exit_pi_state_list(struct task_struct *curr)
 static int
 lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
-                union futex_key *key, struct futex_pi_state **ps)
+                union futex_key *key, struct futex_pi_state **ps,
+                struct task_struct *task)
 {
        struct futex_pi_state *pi_state = NULL;
        struct futex_q *this, *next;
@@ -786,6 +787,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                                        return -EINVAL;
                        }
+                        /*
+                         * Protect against a corrupted uval. If uval
+                         * is 0x80000000 then pid is 0 and the waiter
+                         * bit is set. So the deadlock check in the
+                         * calling code has failed and we did not fall
+                         * into the check above due to !pid.
+                         */
+                        if (task && pi_state->owner == task)
+                                return -EDEADLK;
                        atomic_inc(&pi_state->refcount);
                        *ps = pi_state;
@@ -803,6 +814,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        if (!p)
                return -ESRCH;
+        if (!p->mm) {
+                put_task_struct(p);
+                return -EPERM;
+        }
        /*
         * We need to look at the task state flags to figure out,
         * whether the task is exiting. To protect against the do_exit
@@ -935,7 +951,7 @@ retry:
         * We dont have the lock. Look up the PI state (or create it if
         * we are the first waiter):
         */
-        ret = lookup_pi_state(uval, hb, key, ps);
+        ret = lookup_pi_state(uval, hb, key, ps, task);
        if (unlikely(ret)) {
                switch (ret) {
@@ -1347,7 +1363,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
 *
 * Return:
 *  0 - failed to acquire the lock atomically;
- *  1 - acquired the lock;
+ * >0 - acquired the lock, return value is vpid of the top_waiter
 * <0 - error
 */
 static int futex_proxy_trylock_atomic(u32 __user *pifutex,
@@ -1358,7 +1374,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 {
        struct futex_q *top_waiter = NULL;
        u32 curval;
-        int ret;
+        int ret, vpid;
        if (get_futex_value_locked(&curval, pifutex))
                return -EFAULT;
@@ -1386,11 +1402,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
         * the contended case or if set_waiters is 1.  The pi_state is returned
         * in ps in contended cases.
         */
+        vpid = task_pid_vnr(top_waiter->task);
        ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
                                   set_waiters);
-        if (ret == 1)
+        if (ret == 1) {
                requeue_pi_wake_futex(top_waiter, key2, hb2);
+                return vpid;
+        }
        return ret;
 }
@@ -1421,7 +1439,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
        struct futex_pi_state *pi_state = NULL;
        struct futex_hash_bucket *hb1, *hb2;
        struct futex_q *this, *next;
-        u32 curval2;
        if (requeue_pi) {
                /*
@@ -1509,16 +1526,25 @@ retry_private:
                 * At this point the top_waiter has either taken uaddr2 or is
                 * waiting on it.  If the former, then the pi_state will not
                 * exist yet, look it up one more time to ensure we have a
-                 * reference to it.
+                 * reference to it. If the lock was taken, ret contains the
+                 * vpid of the top waiter task.
                 */
-                if (ret == 1) {
+                if (ret > 0) {
                        WARN_ON(pi_state);
                        drop_count++;
                        task_count++;
-                        ret = get_futex_value_locked(&curval2, uaddr2);
+                        /*
-                        if (!ret)
+                         * If we acquired the lock, then the user
-                                ret = lookup_pi_state(curval2, hb2, &key2,
+                         * space value of uaddr2 should be vpid. It
-                                                      &pi_state);
+                         * cannot be changed by the top waiter as it
+                         * is blocked on hb2 lock if it tries to do
+                         * so. If something fiddled with it behind our
+                         * back the pi state lookup might unearth
+                         * it. So we rather use the known value than
+                         * rereading and handing potential crap to
+                         * lookup_pi_state.
+                         */
+                        ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
                }
                switch (ret) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e0501fe7140d..3ab28993f6e0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        return ret;
 }
+EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
 /**
 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8380ad203bc..28c57069ef68 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1683,6 +1683,14 @@ int kernel_kexec(void)
                kexec_in_progress = true;
                kernel_restart_prepare(NULL);
                migrate_to_reboot_cpu();
+                /*
+                 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
+                 * no further code needs to use CPU hotplug (which is true in
+                 * the reboot case). However, the kexec path depends on using
+                 * CPU hotplug again; so re-enable it here.
+                 */
+                cpu_hotplug_enable();
                printk(KERN_EMERG "Starting new kernel\n");
                machine_shutdown();
        }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6b375af4958d..0ac67a5861c5 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -498,7 +498,7 @@ int __usermodehelper_disable(enum umh_disable_depth depth)
 static void helper_lock(void)
 {
        atomic_inc(&running_helpers);
-        smp_mb__after_atomic_inc();
+        smp_mb__after_atomic();
 }
 static void helper_unlock(void)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 2495a9b14ac8..6683ccef9fff 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -37,6 +37,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(uevent_seqnum);
+#ifdef CONFIG_UEVENT_HELPER
 /* uevent helper program, used during early boot */
 static ssize_t uevent_helper_show(struct kobject *kobj,
                                  struct kobj_attribute *attr, char *buf)
@@ -56,7 +57,7 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
        return count;
 }
 KERNEL_ATTR_RW(uevent_helper);
+#endif
 #ifdef CONFIG_PROFILING
 static ssize_t profiling_show(struct kobject *kobj,
@@ -189,7 +190,9 @@ EXPORT_SYMBOL_GPL(kernel_kobj);
 static struct attribute * kernel_attrs[] = {
        &fscaps_attr.attr,
        &uevent_seqnum_attr.attr,
+#ifdef CONFIG_UEVENT_HELPER
        &uevent_helper_attr.attr,
+#endif
 #ifdef CONFIG_PROFILING
        &profiling_attr.attr,
 #endif
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 4f560cfedc8f..51c4b24b6328 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -54,9 +54,9 @@ enum {
 * table (if it's not there yet), and we check it for lock order
 * conflicts and deadlocks.
 */
-#define MAX_LOCKDEP_ENTRIES     16384UL
+#define MAX_LOCKDEP_ENTRIES     32768UL
-#define MAX_LOCKDEP_CHAINS_BITS 15
+#define MAX_LOCKDEP_CHAINS_BITS 16
 #define MAX_LOCKDEP_CHAINS      (1UL << MAX_LOCKDEP_CHAINS_BITS)
 #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
@@ -65,7 +65,7 @@ enum {
 * Stack-trace: tightly packed array of stack backtrace
 * addresses. Protected by the hash_lock.
 */
-#define MAX_STACK_TRACE_ENTRIES 262144UL
+#define MAX_STACK_TRACE_ENTRIES 524288UL
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 23343be46e91..0955b885d0dc 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -82,14 +82,14 @@ struct lock_writer_stress_stats {
 };
 static struct lock_writer_stress_stats *lwsa;
-#if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE)
+#if defined(MODULE)
 #define LOCKTORTURE_RUNNABLE_INIT 1
 #else
 #define LOCKTORTURE_RUNNABLE_INIT 0
 #endif
 int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT;
 module_param(locktorture_runnable, int, 0444);
-MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot");
+MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init");
 /* Forward reference. */
 static void lock_torture_cleanup(void);
@@ -219,7 +219,8 @@ static int lock_torture_writer(void *arg)
        set_user_nice(current, MAX_NICE);
        do {
-                schedule_timeout_uninterruptible(1);
+                if ((torture_random(&rand) & 0xfffff) == 0)
+                        schedule_timeout_uninterruptible(1);
                cur_ops->writelock();
                if (WARN_ON_ONCE(lock_is_write_held))
                        lwsp->n_write_lock_fail++;
@@ -354,7 +355,8 @@ static int __init lock_torture_init(void)
                &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
        };
-        torture_init_begin(torture_type, verbose, &locktorture_runnable);
+        if (!torture_init_begin(torture_type, verbose, &locktorture_runnable))
+                return -EBUSY;
        /* Process args and tell the world that the torturer is on the job. */
        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index aa4dff04b594..a620d4d08ca6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -343,9 +343,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
         * top_waiter can be NULL, when we are in the deboosting
         * mode!
         */
-        if (top_waiter && (!task_has_pi_waiters(task) ||
+        if (top_waiter) {
-                           top_waiter != task_top_pi_waiter(task)))
+                if (!task_has_pi_waiters(task))
-                goto out_unlock_pi;
+                        goto out_unlock_pi;
+                /*
+                 * If deadlock detection is off, we stop here if we
+                 * are not the top pi waiter of the task.
+                 */
+                if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
+                        goto out_unlock_pi;
+        }
        /*
         * When deadlock detection is off then we check, if further
@@ -361,7 +368,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                goto retry;
        }
-        /* Deadlock detection */
+        /*
+         * Deadlock detection. If the lock is the same as the original
+         * lock which caused us to walk the lock chain or if the
+         * current lock is owned by the task which initiated the chain
+         * walk, we detected a deadlock.
+         */
        if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
                debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
                raw_spin_unlock(&lock->wait_lock);
@@ -527,6 +539,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        unsigned long flags;
        int chain_walk = 0, res;
+        /*
+         * Early deadlock detection. We really don't want the task to
+         * enqueue on itself just to untangle the mess later. It's not
+         * only an optimization. We drop the locks, so another waiter
+         * can come in before the chain walk detects the deadlock. So
+         * the other will detect the deadlock and return -EDEADLOCK,
+         * which is wrong, as the other waiter is not in a deadlock
+         * situation.
+         */
+        if (detect_deadlock && owner == task)
+                return -EDEADLK;
        raw_spin_lock_irqsave(&task->pi_lock, flags);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 1d66e08e897d..b4219ff87b8c 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -12,6 +12,55 @@
 #include <linux/export.h>
 /*
+ * Guide to the rw_semaphore's count field for common values.
+ * (32-bit case illustrated, similar for 64-bit)
+ *
+ * 0x0000000X   (1) X readers active or attempting lock, no writer waiting
+ *                  X = #active_readers + #readers attempting to lock
+ *                  (X*ACTIVE_BIAS)
+ *
+ * 0x00000000   rwsem is unlocked, and no one is waiting for the lock or
+ *              attempting to read lock or write lock.
+ *
+ * 0xffff000X   (1) X readers active or attempting lock, with waiters for lock
+ *                  X = #active readers + # readers attempting lock
+ *                  (X*ACTIVE_BIAS + WAITING_BIAS)
+ *              (2) 1 writer attempting lock, no waiters for lock
+ *                  X-1 = #active readers + #readers attempting lock
+ *                  ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
+ *              (3) 1 writer active, no waiters for lock
+ *                  X-1 = #active readers + #readers attempting lock
+ *                  ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
+ *
+ * 0xffff0001   (1) 1 reader active or attempting lock, waiters for lock
+ *                  (WAITING_BIAS + ACTIVE_BIAS)
+ *              (2) 1 writer active or attempting lock, no waiters for lock
+ *                  (ACTIVE_WRITE_BIAS)
+ *
+ * 0xffff0000   (1) There are writers or readers queued but none active
+ *                  or in the process of attempting lock.
+ *                  (WAITING_BIAS)
+ *              Note: writer can attempt to steal lock for this count by adding
+ *              ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
+ *
+ * 0xfffe0001   (1) 1 writer active, or attempting lock. Waiters on queue.
+ *                  (ACTIVE_WRITE_BIAS + WAITING_BIAS)
+ *
+ * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
+ *       the count becomes more than 0 for successful lock acquisition,
+ *       i.e. the case where there are only readers or nobody has lock.
+ *       (1st and 2nd case above).
+ *
+ *       Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
+ *       checking the count becomes ACTIVE_WRITE_BIAS for successful lock
+ *       acquisition (i.e. nobody else has lock or attempts lock).  If
+ *       unsuccessful, in rwsem_down_write_failed, we'll check to see if there
+ *       are only waiters but none active (5th case above), and attempt to
+ *       steal the lock.
+ *
+ */
+/*
 * Initialize an rwsem:
 */
 void __init_rwsem(struct rw_semaphore *sem, const char *name,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7228258b85ec..221229cf0190 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2413,6 +2413,7 @@ int unregister_console(struct console *console)
        if (console_drivers != NULL && console->flags & CON_CONSDEV)
                console_drivers->flags |= CON_CONSDEV;
+        console->flags &= ~CON_ENABLED;
        console_unlock();
        console_sysfs_notify();
        return res;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index bd30bc61bc05..7fa34f86e5ba 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -58,9 +58,11 @@ torture_param(int, fqs_duration, 0,
              "Duration of fqs bursts (us), 0 to disable");
 torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
 torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
+torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(bool, gp_normal, false,
             "Use normal (non-expedited) GP wait primitives");
+torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
 torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
 torture_param(int, n_barrier_cbs, 0,
             "# of callbacks/kthreads for barrier testing");
@@ -138,6 +140,18 @@ static long n_barrier_attempts;
 static long n_barrier_successes;
 static struct list_head rcu_torture_removed;
+static int rcu_torture_writer_state;
+#define RTWS_FIXED_DELAY        0
+#define RTWS_DELAY              1
+#define RTWS_REPLACE            2
+#define RTWS_DEF_FREE           3
+#define RTWS_EXP_SYNC           4
+#define RTWS_COND_GET           5
+#define RTWS_COND_SYNC          6
+#define RTWS_SYNC               7
+#define RTWS_STUTTER            8
+#define RTWS_STOPPING           9
 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
 #define RCUTORTURE_RUNNABLE_INIT 1
 #else
@@ -214,6 +228,7 @@ rcu_torture_free(struct rcu_torture *p)
 */
 struct rcu_torture_ops {
+        int ttype;
        void (*init)(void);
        int (*readlock)(void);
        void (*read_delay)(struct torture_random_state *rrsp);
@@ -222,6 +237,8 @@ struct rcu_torture_ops {
        void (*deferred_free)(struct rcu_torture *p);
        void (*sync)(void);
        void (*exp_sync)(void);
+        unsigned long (*get_state)(void);
+        void (*cond_sync)(unsigned long oldstate);
        void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
        void (*cb_barrier)(void);
        void (*fqs)(void);
@@ -273,10 +290,48 @@ static int rcu_torture_completed(void)
        return rcu_batches_completed();
 }
+/*
+ * Update callback in the pipe.  This should be invoked after a grace period.
+ */
+static bool
+rcu_torture_pipe_update_one(struct rcu_torture *rp)
+{
+        int i;
+        i = rp->rtort_pipe_count;
+        if (i > RCU_TORTURE_PIPE_LEN)
+                i = RCU_TORTURE_PIPE_LEN;
+        atomic_inc(&rcu_torture_wcount[i]);
+        if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+                rp->rtort_mbtest = 0;
+                return true;
+        }
+        return false;
+}
+/*
+ * Update all callbacks in the pipe.  Suitable for synchronous grace-period
+ * primitives.
+ */
+static void
+rcu_torture_pipe_update(struct rcu_torture *old_rp)
+{
+        struct rcu_torture *rp;
+        struct rcu_torture *rp1;
+        if (old_rp)
+                list_add(&old_rp->rtort_free, &rcu_torture_removed);
+        list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
+                if (rcu_torture_pipe_update_one(rp)) {
+                        list_del(&rp->rtort_free);
+                        rcu_torture_free(rp);
+                }
+        }
+}
 static void
 rcu_torture_cb(struct rcu_head *p)
 {
-        int i;
        struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
        if (torture_must_stop_irq()) {
@@ -284,16 +339,10 @@ rcu_torture_cb(struct rcu_head *p)
                /* The next initialization will pick up the pieces. */
                return;
        }
-        i = rp->rtort_pipe_count;
+        if (rcu_torture_pipe_update_one(rp))
-        if (i > RCU_TORTURE_PIPE_LEN)
-                i = RCU_TORTURE_PIPE_LEN;
-        atomic_inc(&rcu_torture_wcount[i]);
-        if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
-                rp->rtort_mbtest = 0;
                rcu_torture_free(rp);
-        } else {
+        else
                cur_ops->deferred_free(rp);
-        }
 }
 static int rcu_no_completed(void)
@@ -312,6 +361,7 @@ static void rcu_sync_torture_init(void)
 }
 static struct rcu_torture_ops rcu_ops = {
+        .ttype          = RCU_FLAVOR,
        .init           = rcu_sync_torture_init,
        .readlock       = rcu_torture_read_lock,
        .read_delay     = rcu_read_delay,
@@ -320,6 +370,8 @@ static struct rcu_torture_ops rcu_ops = {
        .deferred_free  = rcu_torture_deferred_free,
        .sync           = synchronize_rcu,
        .exp_sync       = synchronize_rcu_expedited,
+        .get_state      = get_state_synchronize_rcu,
+        .cond_sync      = cond_synchronize_rcu,
        .call           = call_rcu,
        .cb_barrier     = rcu_barrier,
        .fqs            = rcu_force_quiescent_state,
@@ -355,6 +407,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
 }
 static struct rcu_torture_ops rcu_bh_ops = {
+        .ttype          = RCU_BH_FLAVOR,
        .init           = rcu_sync_torture_init,
        .readlock       = rcu_bh_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
@@ -397,6 +450,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 static struct rcu_torture_ops rcu_busted_ops = {
+        .ttype          = INVALID_RCU_FLAVOR,
        .init           = rcu_sync_torture_init,
        .readlock       = rcu_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
@@ -479,9 +533,11 @@ static void srcu_torture_stats(char *page)
        page += sprintf(page, "%s%s per-CPU(idx=%d):",
                       torture_type, TORTURE_FLAG, idx);
        for_each_possible_cpu(cpu) {
-                page += sprintf(page, " %d(%lu,%lu)", cpu,
+                long c0, c1;
-                               per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
-                               per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
+                c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
+                c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
+                page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
        }
        sprintf(page, "\n");
 }
@@ -492,6 +548,7 @@ static void srcu_torture_synchronize_expedited(void)
 }
 static struct rcu_torture_ops srcu_ops = {
+        .ttype          = SRCU_FLAVOR,
        .init           = rcu_sync_torture_init,
        .readlock       = srcu_torture_read_lock,
        .read_delay     = srcu_read_delay,
@@ -527,6 +584,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
 }
 static struct rcu_torture_ops sched_ops = {
+        .ttype          = RCU_SCHED_FLAVOR,
        .init           = rcu_sync_torture_init,
        .readlock       = sched_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
@@ -688,23 +746,59 @@ rcu_torture_fqs(void *arg)
 static int
 rcu_torture_writer(void *arg)
 {
-        bool exp;
+        unsigned long gp_snap;
+        bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
+        bool gp_sync1 = gp_sync;
        int i;
        struct rcu_torture *rp;
-        struct rcu_torture *rp1;
        struct rcu_torture *old_rp;
        static DEFINE_TORTURE_RANDOM(rand);
+        int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
+                           RTWS_COND_GET, RTWS_SYNC };
+        int nsynctypes = 0;
        VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-        set_user_nice(current, MAX_NICE);
+        /* Initialize synctype[] array.  If none set, take default. */
+        if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync)
+                gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
+        if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
+                synctype[nsynctypes++] = RTWS_COND_GET;
+        else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
+                pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
+        if (gp_exp1 && cur_ops->exp_sync)
+                synctype[nsynctypes++] = RTWS_EXP_SYNC;
+        else if (gp_exp && !cur_ops->exp_sync)
+                pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
+        if (gp_normal1 && cur_ops->deferred_free)
+                synctype[nsynctypes++] = RTWS_DEF_FREE;
+        else if (gp_normal && !cur_ops->deferred_free)
+                pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
+        if (gp_sync1 && cur_ops->sync)
+                synctype[nsynctypes++] = RTWS_SYNC;
+        else if (gp_sync && !cur_ops->sync)
+                pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
+        if (WARN_ONCE(nsynctypes == 0,
+                      "rcu_torture_writer: No update-side primitives.\n")) {
+                /*
+                 * No updates primitives, so don't try updating.
+                 * The resulting test won't be testing much, hence the
+                 * above WARN_ONCE().
+                 */
+                rcu_torture_writer_state = RTWS_STOPPING;
+                torture_kthread_stopping("rcu_torture_writer");
+        }
        do {
+                rcu_torture_writer_state = RTWS_FIXED_DELAY;
                schedule_timeout_uninterruptible(1);
                rp = rcu_torture_alloc();
                if (rp == NULL)
                        continue;
                rp->rtort_pipe_count = 0;
+                rcu_torture_writer_state = RTWS_DELAY;
                udelay(torture_random(&rand) & 0x3ff);
+                rcu_torture_writer_state = RTWS_REPLACE;
                old_rp = rcu_dereference_check(rcu_torture_current,
                                               current == writer_task);
                rp->rtort_mbtest = 1;
@@ -716,35 +810,42 @@ rcu_torture_writer(void *arg)
                                i = RCU_TORTURE_PIPE_LEN;
                        atomic_inc(&rcu_torture_wcount[i]);
                        old_rp->rtort_pipe_count++;
-                        if (gp_normal == gp_exp)
+                        switch (synctype[torture_random(&rand) % nsynctypes]) {
-                                exp = !!(torture_random(&rand) & 0x80);
+                        case RTWS_DEF_FREE:
-                        else
+                                rcu_torture_writer_state = RTWS_DEF_FREE;
-                                exp = gp_exp;
-                        if (!exp) {
                                cur_ops->deferred_free(old_rp);
-                        } else {
+                                break;
+                        case RTWS_EXP_SYNC:
+                                rcu_torture_writer_state = RTWS_EXP_SYNC;
                                cur_ops->exp_sync();
-                                list_add(&old_rp->rtort_free,
+                                rcu_torture_pipe_update(old_rp);
-                                         &rcu_torture_removed);
+                                break;
-                                list_for_each_entry_safe(rp, rp1,
+                        case RTWS_COND_GET:
-                                                         &rcu_torture_removed,
+                                rcu_torture_writer_state = RTWS_COND_GET;
-                                                         rtort_free) {
+                                gp_snap = cur_ops->get_state();
-                                        i = rp->rtort_pipe_count;
+                                i = torture_random(&rand) % 16;
-                                        if (i > RCU_TORTURE_PIPE_LEN)
+                                if (i != 0)
-                                                i = RCU_TORTURE_PIPE_LEN;
+                                        schedule_timeout_interruptible(i);
-                                        atomic_inc(&rcu_torture_wcount[i]);
+                                udelay(torture_random(&rand) % 1000);
-                                        if (++rp->rtort_pipe_count >=
+                                rcu_torture_writer_state = RTWS_COND_SYNC;
-                                            RCU_TORTURE_PIPE_LEN) {
+                                cur_ops->cond_sync(gp_snap);
-                                                rp->rtort_mbtest = 0;
+                                rcu_torture_pipe_update(old_rp);
-                                                list_del(&rp->rtort_free);
+                                break;
-                                                rcu_torture_free(rp);
+                        case RTWS_SYNC:
-                                        }
+                                rcu_torture_writer_state = RTWS_SYNC;
-                                 }
+                                cur_ops->sync();
+                                rcu_torture_pipe_update(old_rp);
+                                break;
+                        default:
+                                WARN_ON_ONCE(1);
+                                break;
                        }
                }
                rcutorture_record_progress(++rcu_torture_current_version);
+                rcu_torture_writer_state = RTWS_STUTTER;
                stutter_wait("rcu_torture_writer");
        } while (!torture_must_stop());
+        rcu_torture_writer_state = RTWS_STOPPING;
        torture_kthread_stopping("rcu_torture_writer");
        return 0;
 }
@@ -784,7 +885,7 @@ rcu_torture_fakewriter(void *arg)
        return 0;
 }
-void rcutorture_trace_dump(void)
+static void rcutorture_trace_dump(void)
 {
        static atomic_t beenhere = ATOMIC_INIT(0);
@@ -918,11 +1019,13 @@ rcu_torture_reader(void *arg)
                __this_cpu_inc(rcu_torture_batch[completed]);
                preempt_enable();
                cur_ops->readunlock(idx);
-                schedule();
+                cond_resched();
                stutter_wait("rcu_torture_reader");
        } while (!torture_must_stop());
-        if (irqreader && cur_ops->irq_capable)
+        if (irqreader && cur_ops->irq_capable) {
                del_timer_sync(&t);
+                destroy_timer_on_stack(&t);
+        }
        torture_kthread_stopping("rcu_torture_reader");
        return 0;
 }
@@ -937,6 +1040,7 @@ rcu_torture_printk(char *page)
        int i;
        long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
        long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+        static unsigned long rtcv_snap = ULONG_MAX;
        for_each_possible_cpu(cpu) {
                for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -997,6 +1101,22 @@ rcu_torture_printk(char *page)
        page += sprintf(page, "\n");
        if (cur_ops->stats)
                cur_ops->stats(page);
+        if (rtcv_snap == rcu_torture_current_version &&
+            rcu_torture_current != NULL) {
+                int __maybe_unused flags;
+                unsigned long __maybe_unused gpnum;
+                unsigned long __maybe_unused completed;
+                rcutorture_get_gp_data(cur_ops->ttype,
+                                       &flags, &gpnum, &completed);
+                page += sprintf(page,
+                                "??? Writer stall state %d g%lu c%lu f%#x\n",
+                                rcu_torture_writer_state,
+                                gpnum, completed, flags);
+                show_rcu_gp_kthreads();
+                rcutorture_trace_dump();
+        }
+        rtcv_snap = rcu_torture_current_version;
 }
 /*
@@ -1146,7 +1266,7 @@ static int __init rcu_torture_stall_init(void)
 }
 /* Callback function for RCU barrier testing. */
-void rcu_torture_barrier_cbf(struct rcu_head *rcu)
+static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
 {
        atomic_inc(&barrier_cbs_invoked);
 }
@@ -1416,7 +1536,8 @@ rcu_torture_init(void)
                &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
        };
-        torture_init_begin(torture_type, verbose, &rcutorture_runnable);
+        if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+                return -EBUSY;
        /* Process args and tell the world that the torturer is on the job. */
        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -1441,10 +1562,13 @@ rcu_torture_init(void)
        if (cur_ops->init)
                cur_ops->init(); /* no "goto unwind" prior to this point!!! */
-        if (nreaders >= 0)
+        if (nreaders >= 0) {
                nrealreaders = nreaders;
-        else
+        } else {
-                nrealreaders = 2 * num_online_cpus();
+                nrealreaders = num_online_cpus() - 1;
+                if (nrealreaders <= 0)
+                        nrealreaders = 1;
+        }
        rcu_torture_print_module_parms(cur_ops, "Start of test");
        /* Set up the freelist. */
@@ -1533,7 +1657,8 @@ rcu_torture_init(void)
                fqs_duration = 0;
        if (fqs_duration) {
                /* Create the fqs thread */
-                torture_create_kthread(rcu_torture_fqs, NULL, fqs_task);
+                firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
+                                                  fqs_task);
                if (firsterr)
                        goto unwind;
        }
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 431528520562..858c56569127 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -144,7 +144,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
                return;
        rcp->ticks_this_gp++;
        j = jiffies;
-        js = rcp->jiffies_stall;
+        js = ACCESS_ONCE(rcp->jiffies_stall);
        if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
                pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
                       rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
@@ -152,17 +152,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
                dump_stack();
        }
        if (*rcp->curtail && ULONG_CMP_GE(j, js))
-                rcp->jiffies_stall = jiffies +
+                ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
                        3 * rcu_jiffies_till_stall_check() + 3;
        else if (ULONG_CMP_GE(j, js))
-                rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+                ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
 }
 static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
 {
        rcp->ticks_this_gp = 0;
        rcp->gp_start = jiffies;
-        rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+        ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
 }
 static void check_cpu_stalls(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210a..f1ba77363fbb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -101,7 +101,7 @@ DEFINE_PER_CPU(struct rcu_data, sname##_data)
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
 RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
-static struct rcu_state *rcu_state;
+static struct rcu_state *rcu_state_p;
 LIST_HEAD(rcu_struct_flavors);
 /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
@@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
-static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                                  struct rcu_data *rdp);
 static void force_qs_rnp(struct rcu_state *rsp,
                         int (*f)(struct rcu_data *rsp, bool *isidle,
@@ -271,6 +271,15 @@ long rcu_batches_completed_bh(void)
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 /*
+ * Force a quiescent state.
+ */
+void rcu_force_quiescent_state(void)
+{
+        force_quiescent_state(rcu_state_p);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+/*
 * Force a quiescent state for RCU BH.
 */
 void rcu_bh_force_quiescent_state(void)
@@ -280,6 +289,21 @@ void rcu_bh_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 /*
+ * Show the state of the grace-period kthreads.
+ */
+void show_rcu_gp_kthreads(void)
+{
+        struct rcu_state *rsp;
+        for_each_rcu_flavor(rsp) {
+                pr_info("%s: wait state: %d ->state: %#lx\n",
+                        rsp->name, rsp->gp_state, rsp->gp_kthread->state);
+                /* sched_show_task(rsp->gp_kthread); */
+        }
+}
+EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
+/*
 * Record the number of times rcutorture tests have been initiated and
 * terminated.  This information allows the debugfs tracing stats to be
 * correlated to the rcutorture messages, even when the rcutorture module
@@ -294,6 +318,39 @@ void rcutorture_record_test_transition(void)
 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
 /*
+ * Send along grace-period-related data for rcutorture diagnostics.
+ */
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+                            unsigned long *gpnum, unsigned long *completed)
+{
+        struct rcu_state *rsp = NULL;
+        switch (test_type) {
+        case RCU_FLAVOR:
+                rsp = rcu_state_p;
+                break;
+        case RCU_BH_FLAVOR:
+                rsp = &rcu_bh_state;
+                break;
+        case RCU_SCHED_FLAVOR:
+                rsp = &rcu_sched_state;
+                break;
+        default:
+                break;
+        }
+        if (rsp != NULL) {
+                *flags = ACCESS_ONCE(rsp->gp_flags);
+                *gpnum = ACCESS_ONCE(rsp->gpnum);
+                *completed = ACCESS_ONCE(rsp->completed);
+                return;
+        }
+        *flags = 0;
+        *gpnum = 0;
+        *completed = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
+/*
 * Record the number of writer passes through the current rcutorture test.
 * This is also used to correlate debugfs tracing stats with the rcutorture
 * messages.
@@ -324,6 +381,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 }
 /*
+ * Return the root node of the specified rcu_state structure.
+ */
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+{
+        return &rsp->node[0];
+}
+/*
+ * Is there any need for future grace periods?
+ * Interrupts must be disabled.  If the caller does not hold the root
+ * rnp_node structure's ->lock, the results are advisory only.
+ */
+static int rcu_future_needs_gp(struct rcu_state *rsp)
+{
+        struct rcu_node *rnp = rcu_get_root(rsp);
+        int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1;
+        int *fp = &rnp->need_future_gp[idx];
+        return ACCESS_ONCE(*fp);
+}
+/*
 * Does the current CPU require a not-yet-started grace period?
 * The caller must have disabled interrupts to prevent races with
 * normal callback registry.
@@ -335,7 +414,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
        if (rcu_gp_in_progress(rsp))
                return 0;  /* No, a grace period is already in progress. */
-        if (rcu_nocb_needs_gp(rsp))
+        if (rcu_future_needs_gp(rsp))
                return 1;  /* Yes, a no-CBs CPU needs one. */
        if (!rdp->nxttail[RCU_NEXT_TAIL])
                return 0;  /* No, this is a no-CBs (or offline) CPU. */
@@ -350,14 +429,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 }
 /*
- * Return the root node of the specified rcu_state structure.
- */
-static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
-{
-        return &rsp->node[0];
-}
-/*
 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
 *
 * If the new value of the ->dynticks_nesting counter now is zero,
@@ -387,9 +458,9 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
        }
        rcu_prepare_for_idle(smp_processor_id());
        /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-        smp_mb__before_atomic_inc();  /* See above. */
+        smp_mb__before_atomic();  /* See above. */
        atomic_inc(&rdtp->dynticks);
-        smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
+        smp_mb__after_atomic();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
        /*
@@ -507,10 +578,10 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
                               int user)
 {
-        smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
+        smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-        smp_mb__after_atomic_inc();  /* See above. */
+        smp_mb__after_atomic();  /* See above. */
        WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
        rcu_cleanup_after_idle(smp_processor_id());
        trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
@@ -635,10 +706,10 @@ void rcu_nmi_enter(void)
            (atomic_read(&rdtp->dynticks) & 0x1))
                return;
        rdtp->dynticks_nmi_nesting++;
-        smp_mb__before_atomic_inc();  /* Force delay from prior write. */
+        smp_mb__before_atomic();  /* Force delay from prior write. */
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-        smp_mb__after_atomic_inc();  /* See above. */
+        smp_mb__after_atomic();  /* See above. */
        WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 }
@@ -657,9 +728,9 @@ void rcu_nmi_exit(void)
            --rdtp->dynticks_nmi_nesting != 0)
                return;
        /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-        smp_mb__before_atomic_inc();  /* See above. */
+        smp_mb__before_atomic();  /* See above. */
        atomic_inc(&rdtp->dynticks);
-        smp_mb__after_atomic_inc();  /* Force delay to next write. */
+        smp_mb__after_atomic();  /* Force delay to next write. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
@@ -758,7 +829,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
 {
        rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
        rcu_sysidle_check_cpu(rdp, isidle, maxj);
-        return (rdp->dynticks_snap & 0x1) == 0;
+        if ((rdp->dynticks_snap & 0x1) == 0) {
+                trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
+                return 1;
+        } else {
+                return 0;
+        }
 }
 /*
@@ -834,7 +910,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
         * we will beat on the first one until it gets unstuck, then move
         * to the next.  Only do this for the primary flavor of RCU.
         */
-        if (rdp->rsp == rcu_state &&
+        if (rdp->rsp == rcu_state_p &&
            ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
                rdp->rsp->jiffies_resched += 5;
                resched_cpu(rdp->cpu);
@@ -851,7 +927,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
        rsp->gp_start = j;
        smp_wmb(); /* Record start time before stall time. */
        j1 = rcu_jiffies_till_stall_check();
-        rsp->jiffies_stall = j + j1;
+        ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
        rsp->jiffies_resched = j + j1 / 2;
 }
@@ -890,12 +966,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
        /* Only let one CPU complain about others per time interval. */
        raw_spin_lock_irqsave(&rnp->lock, flags);
-        delta = jiffies - rsp->jiffies_stall;
+        delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
        if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
-        rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+        ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        /*
@@ -932,9 +1008,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
        print_cpu_stall_info_end();
        for_each_possible_cpu(cpu)
                totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
-        pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
+        pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
               smp_processor_id(), (long)(jiffies - rsp->gp_start),
-               rsp->gpnum, rsp->completed, totqlen);
+               (long)rsp->gpnum, (long)rsp->completed, totqlen);
        if (ndetected == 0)
                pr_err("INFO: Stall ended before state dump start\n");
        else if (!trigger_all_cpu_backtrace())
@@ -947,12 +1023,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
        force_quiescent_state(rsp);  /* Kick them all. */
 }
-/*
- * This function really isn't for public consumption, but RCU is special in
- * that context switches can allow the state machine to make progress.
- */
-extern void resched_cpu(int cpu);
 static void print_cpu_stall(struct rcu_state *rsp)
 {
        int cpu;
@@ -971,14 +1041,15 @@ static void print_cpu_stall(struct rcu_state *rsp)
        print_cpu_stall_info_end();
        for_each_possible_cpu(cpu)
                totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
-        pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
+        pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
-                jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
+                jiffies - rsp->gp_start,
+                (long)rsp->gpnum, (long)rsp->completed, totqlen);
        if (!trigger_all_cpu_backtrace())
                dump_stack();
        raw_spin_lock_irqsave(&rnp->lock, flags);
-        if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
+        if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
-                rsp->jiffies_stall = jiffies +
+                ACCESS_ONCE(rsp->jiffies_stall) = jiffies +
                                     3 * rcu_jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1062,7 +1133,7 @@ void rcu_cpu_stall_reset(void)
        struct rcu_state *rsp;
        for_each_rcu_flavor(rsp)
-                rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
+                ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2;
 }
 /*
@@ -1123,15 +1194,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 /*
 * Start some future grace period, as needed to handle newly arrived
 * callbacks.  The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field.
+ * rcu_node structure's ->need_future_gp field.  Returns true if there
+ * is reason to awaken the grace-period kthread.
 *
 * The caller must hold the specified rcu_node structure's ->lock.
 */
-static unsigned long __maybe_unused
+static bool __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+                    unsigned long *c_out)
 {
        unsigned long c;
        int i;
+        bool ret = false;
        struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
        /*
@@ -1142,7 +1216,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
        trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
        if (rnp->need_future_gp[c & 0x1]) {
                trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
-                return c;
+                goto out;
        }
        /*
@@ -1156,7 +1230,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
            ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
                rnp->need_future_gp[c & 0x1]++;
                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
-                return c;
+                goto out;
        }
        /*
@@ -1197,12 +1271,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
        } else {
                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
-                rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+                ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
        }
 unlock_out:
        if (rnp != rnp_root)
                raw_spin_unlock(&rnp_root->lock);
-        return c;
+out:
+        if (c_out != NULL)
+                *c_out = c;
+        return ret;
 }
 /*
@@ -1226,25 +1303,43 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 }
 /*
+ * Awaken the grace-period kthread for the specified flavor of RCU.
+ * Don't do a self-awaken, and don't bother awakening when there is
+ * nothing for the grace-period kthread to do (as in several CPUs
+ * raced to awaken, and we lost), and finally don't try to awaken
+ * a kthread that has not yet been created.
+ */
+static void rcu_gp_kthread_wake(struct rcu_state *rsp)
+{
+        if (current == rsp->gp_kthread ||
+            !ACCESS_ONCE(rsp->gp_flags) ||
+            !rsp->gp_kthread)
+                return;
+        wake_up(&rsp->gp_wq);
+}
+/*
 * If there is room, assign a ->completed number to any callbacks on
 * this CPU that have not already been assigned.  Also accelerate any
 * callbacks that were previously assigned a ->completed number that has
 * since proven to be too conservative, which can happen if callbacks get
 * assigned a ->completed number while RCU is idle, but with reference to
 * a non-root rcu_node structure.  This function is idempotent, so it does
- * not hurt to call it repeatedly.
+ * not hurt to call it repeatedly.  Returns an flag saying that we should
+ * awaken the RCU grace-period kthread.
 *
 * The caller must hold rnp->lock with interrupts disabled.
 */
-static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                               struct rcu_data *rdp)
 {
        unsigned long c;
        int i;
+        bool ret;
        /* If the CPU has no callbacks, nothing to do. */
        if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-                return;
+                return false;
        /*
         * Starting from the sublist containing the callbacks most
@@ -1273,7 +1368,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
         * be grouped into.
         */
        if (++i >= RCU_NEXT_TAIL)
-                return;
+                return false;
        /*
         * Assign all subsequent callbacks' ->completed number to the next
@@ -1285,13 +1380,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                rdp->nxtcompleted[i] = c;
        }
        /* Record any needed additional grace periods. */
-        rcu_start_future_gp(rnp, rdp);
+        ret = rcu_start_future_gp(rnp, rdp, NULL);
        /* Trace depending on how much we were able to accelerate. */
        if (!*rdp->nxttail[RCU_WAIT_TAIL])
                trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
        else
                trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
+        return ret;
 }
 /*
@@ -1300,17 +1396,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
 * sublist.  This function is idempotent, so it does not hurt to
 * invoke it repeatedly.  As long as it is not invoked -too- often...
+ * Returns true if the RCU grace-period kthread needs to be awakened.
 *
 * The caller must hold rnp->lock with interrupts disabled.
 */
-static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                            struct rcu_data *rdp)
 {
        int i, j;
        /* If the CPU has no callbacks, nothing to do. */
        if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-                return;
+                return false;
        /*
         * Find all callbacks whose ->completed numbers indicate that they
@@ -1334,26 +1431,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
        }
        /* Classify any remaining callbacks. */
-        rcu_accelerate_cbs(rsp, rnp, rdp);
+        return rcu_accelerate_cbs(rsp, rnp, rdp);
 }
 /*
 * Update CPU-local rcu_data state to record the beginnings and ends of
 * grace periods.  The caller must hold the ->lock of the leaf rcu_node
 * structure corresponding to the current CPU, and must have irqs disabled.
+ * Returns true if the grace-period kthread needs to be awakened.
 */
-static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
+                              struct rcu_data *rdp)
 {
+        bool ret;
        /* Handle the ends of any preceding grace periods first. */
        if (rdp->completed == rnp->completed) {
                /* No grace period end, so just accelerate recent callbacks. */
-                rcu_accelerate_cbs(rsp, rnp, rdp);
+                ret = rcu_accelerate_cbs(rsp, rnp, rdp);
        } else {
                /* Advance callbacks. */
-                rcu_advance_cbs(rsp, rnp, rdp);
+                ret = rcu_advance_cbs(rsp, rnp, rdp);
                /* Remember that we saw this grace-period completion. */
                rdp->completed = rnp->completed;
@@ -1372,11 +1473,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
                rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
                zero_cpu_stall_ticks(rdp);
        }
+        return ret;
 }
 static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
 {
        unsigned long flags;
+        bool needwake;
        struct rcu_node *rnp;
        local_irq_save(flags);
@@ -1388,8 +1491,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
                return;
        }
        smp_mb__after_unlock_lock();
-        __note_gp_changes(rsp, rnp, rdp);
+        needwake = __note_gp_changes(rsp, rnp, rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
+        if (needwake)
+                rcu_gp_kthread_wake(rsp);
 }
 /*
@@ -1403,12 +1508,12 @@ static int rcu_gp_init(struct rcu_state *rsp)
        rcu_bind_gp_kthread();
        raw_spin_lock_irq(&rnp->lock);
        smp_mb__after_unlock_lock();
-        if (rsp->gp_flags == 0) {
+        if (!ACCESS_ONCE(rsp->gp_flags)) {
                /* Spurious wakeup, tell caller to go back to sleep.  */
                raw_spin_unlock_irq(&rnp->lock);
                return 0;
        }
-        rsp->gp_flags = 0; /* Clear all flags: New grace period. */
+        ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */
        if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
                /*
@@ -1453,7 +1558,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
                WARN_ON_ONCE(rnp->completed != rsp->completed);
                ACCESS_ONCE(rnp->completed) = rsp->completed;
                if (rnp == rdp->mynode)
-                        __note_gp_changes(rsp, rnp, rdp);
+                        (void)__note_gp_changes(rsp, rnp, rdp);
                rcu_preempt_boost_start_gp(rnp);
                trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
                                            rnp->level, rnp->grplo,
@@ -1501,7 +1606,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
-                rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
+                ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
                raw_spin_unlock_irq(&rnp->lock);
        }
        return fqs_state;
@@ -1513,6 +1618,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
 static void rcu_gp_cleanup(struct rcu_state *rsp)
 {
        unsigned long gp_duration;
+        bool needgp = false;
        int nocb = 0;
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1548,7 +1654,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                ACCESS_ONCE(rnp->completed) = rsp->gpnum;
                rdp = this_cpu_ptr(rsp->rda);
                if (rnp == rdp->mynode)
-                        __note_gp_changes(rsp, rnp, rdp);
+                        needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
                /* smp_mb() provided by prior unlock-lock pair. */
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
@@ -1564,9 +1670,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
        rsp->fqs_state = RCU_GP_IDLE;
        rdp = this_cpu_ptr(rsp->rda);
-        rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
+        /* Advance CBs to reduce false positives below. */
-        if (cpu_needs_another_gp(rsp, rdp)) {
+        needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
-                rsp->gp_flags = RCU_GP_FLAG_INIT;
+        if (needgp || cpu_needs_another_gp(rsp, rdp)) {
+                ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
                trace_rcu_grace_period(rsp->name,
                                       ACCESS_ONCE(rsp->gpnum),
                                       TPS("newreq"));
@@ -1593,6 +1700,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        trace_rcu_grace_period(rsp->name,
                                               ACCESS_ONCE(rsp->gpnum),
                                               TPS("reqwait"));
+                        rsp->gp_state = RCU_GP_WAIT_GPS;
                        wait_event_interruptible(rsp->gp_wq,
                                                 ACCESS_ONCE(rsp->gp_flags) &
                                                 RCU_GP_FLAG_INIT);
@@ -1620,6 +1728,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        trace_rcu_grace_period(rsp->name,
                                               ACCESS_ONCE(rsp->gpnum),
                                               TPS("fqswait"));
+                        rsp->gp_state = RCU_GP_WAIT_FQS;
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
                                        ((gf = ACCESS_ONCE(rsp->gp_flags)) &
                                         RCU_GP_FLAG_FQS) ||
@@ -1665,14 +1774,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
        }
 }
-static void rsp_wakeup(struct irq_work *work)
-{
-        struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
-        /* Wake up rcu_gp_kthread() to start the grace period. */
-        wake_up(&rsp->gp_wq);
-}
 /*
 * Start a new RCU grace period if warranted, re-initializing the hierarchy
 * in preparation for detecting the next grace period.  The caller must hold
@@ -1681,8 +1782,10 @@ static void rsp_wakeup(struct irq_work *work)
 * Note that it is legal for a dying CPU (which is marked as offline) to
 * invoke this function.  This can happen when the dying CPU reports its
 * quiescent state.
+ *
+ * Returns true if the grace-period kthread must be awakened.
 */
-static void
+static bool
 rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                      struct rcu_data *rdp)
 {
@@ -1693,20 +1796,18 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                 * or a grace period is already in progress.
                 * Either way, don't start a new grace period.
                 */
-                return;
+                return false;
        }
-        rsp->gp_flags = RCU_GP_FLAG_INIT;
+        ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
        trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
                               TPS("newreq"));
        /*
         * We can't do wakeups while holding the rnp->lock, as that
         * could cause possible deadlocks with the rq->lock. Defer
-         * the wakeup to interrupt context.  And don't bother waking
+         * the wakeup to our caller.
-         * up the running kthread.
         */
-        if (current != rsp->gp_kthread)
+        return true;
-                irq_work_queue(&rsp->wakeup_work);
 }
 /*
@@ -1715,12 +1816,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 * is invoked indirectly from rcu_advance_cbs(), which would result in
 * endless recursion -- or would do so if it wasn't for the self-deadlock
 * that is encountered beforehand.
+ *
+ * Returns true if the grace-period kthread needs to be awakened.
 */
-static void
+static bool rcu_start_gp(struct rcu_state *rsp)
-rcu_start_gp(struct rcu_state *rsp)
 {
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
        struct rcu_node *rnp = rcu_get_root(rsp);
+        bool ret = false;
        /*
         * If there is no grace period in progress right now, any
@@ -1730,8 +1833,9 @@ rcu_start_gp(struct rcu_state *rsp)
         * resulting in pointless grace periods.  So, advance callbacks
         * then start the grace period!
         */
-        rcu_advance_cbs(rsp, rnp, rdp);
+        ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
-        rcu_start_gp_advanced(rsp, rnp, rdp);
+        ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
+        return ret;
 }
 /*
@@ -1820,6 +1924,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 {
        unsigned long flags;
        unsigned long mask;
+        bool needwake;
        struct rcu_node *rnp;
        rnp = rdp->mynode;
@@ -1848,9 +1953,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
                 * This GP can't end until cpu checks in, so all of our
                 * callbacks can be processed during the next GP.
                 */
-                rcu_accelerate_cbs(rsp, rnp, rdp);
+                needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
                rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
+                if (needwake)
+                        rcu_gp_kthread_wake(rsp);
        }
 }
@@ -1951,7 +2058,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 {
        int i;
-        struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
        /* No-CBs CPUs are handled specially. */
        if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2320,7 +2427,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                return;  /* Someone beat us to it. */
        }
-        rsp->gp_flags |= RCU_GP_FLAG_FQS;
+        ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
        raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
        wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
 }
@@ -2334,7 +2441,8 @@ static void
 __rcu_process_callbacks(struct rcu_state *rsp)
 {
        unsigned long flags;
-        struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+        bool needwake;
+        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
        WARN_ON_ONCE(rdp->beenonline == 0);
@@ -2345,8 +2453,10 @@ __rcu_process_callbacks(struct rcu_state *rsp)
        local_irq_save(flags);
        if (cpu_needs_another_gp(rsp, rdp)) {
                raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
-                rcu_start_gp(rsp);
+                needwake = rcu_start_gp(rsp);
                raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+                if (needwake)
+                        rcu_gp_kthread_wake(rsp);
        } else {
                local_irq_restore(flags);
        }
@@ -2404,6 +2514,8 @@ static void invoke_rcu_core(void)
 static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                            struct rcu_head *head, unsigned long flags)
 {
+        bool needwake;
        /*
         * If called from an extended quiescent state, invoke the RCU
         * core in order to force a re-evaluation of RCU's idleness.
@@ -2433,8 +2545,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                        raw_spin_lock(&rnp_root->lock);
                        smp_mb__after_unlock_lock();
-                        rcu_start_gp(rsp);
+                        needwake = rcu_start_gp(rsp);
                        raw_spin_unlock(&rnp_root->lock);
+                        if (needwake)
+                                rcu_gp_kthread_wake(rsp);
                } else {
                        /* Give the grace period a kick. */
                        rdp->blimit = LONG_MAX;
@@ -2537,6 +2651,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 /*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks. Until then, this
+ * function may only be called from __kfree_rcu().
+ */
+void kfree_call_rcu(struct rcu_head *head,
+                    void (*func)(struct rcu_head *rcu))
+{
+        __call_rcu(head, func, rcu_state_p, -1, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+/*
 * Because a context switch is a grace period for RCU-sched and RCU-bh,
 * any blocking grace-period wait automatically implies a grace period
 * if there is only one CPU online at any point time during execution
@@ -2659,7 +2787,7 @@ unsigned long get_state_synchronize_rcu(void)
         * time-consuming work between get_state_synchronize_rcu()
         * and cond_synchronize_rcu().
         */
-        return smp_load_acquire(&rcu_state->gpnum);
+        return smp_load_acquire(&rcu_state_p->gpnum);
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
@@ -2685,7 +2813,7 @@ void cond_synchronize_rcu(unsigned long oldstate)
         * Ensure that this load happens before any RCU-destructive
         * actions the caller might carry out after we return.
         */
-        newstate = smp_load_acquire(&rcu_state->completed);
+        newstate = smp_load_acquire(&rcu_state_p->completed);
        if (ULONG_CMP_GE(oldstate, newstate))
                synchronize_rcu();
 }
@@ -2790,7 +2918,7 @@ void synchronize_sched_expedited(void)
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
                        /* ensure test happens before caller kfree */
-                        smp_mb__before_atomic_inc(); /* ^^^ */
+                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone1);
                        return;
                }
@@ -2808,7 +2936,7 @@ void synchronize_sched_expedited(void)
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
                        /* ensure test happens before caller kfree */
-                        smp_mb__before_atomic_inc(); /* ^^^ */
+                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone2);
                        return;
                }
@@ -2837,7 +2965,7 @@ void synchronize_sched_expedited(void)
                s = atomic_long_read(&rsp->expedited_done);
                if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
                        /* ensure test happens before caller kfree */
-                        smp_mb__before_atomic_inc(); /* ^^^ */
+                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_done_lost);
                        break;
                }
@@ -2988,7 +3116,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 static void rcu_barrier_func(void *type)
 {
        struct rcu_state *rsp = type;
-        struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
        _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
        atomic_inc(&rsp->barrier_cpu_count);
@@ -3160,7 +3288,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 * that this CPU cannot possibly have any RCU callbacks in flight yet.
 */
 static void
-rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 {
        unsigned long flags;
        unsigned long mask;
@@ -3173,7 +3301,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
        /* Set up local state, ensuring consistent view of global state. */
        raw_spin_lock_irqsave(&rnp->lock, flags);
        rdp->beenonline = 1;     /* We have now been online. */
-        rdp->preemptible = preemptible;
        rdp->qlen_last_fqs_check = 0;
        rdp->n_force_qs_snap = rsp->n_force_qs;
        rdp->blimit = blimit;
@@ -3217,8 +3344,7 @@ static void rcu_prepare_cpu(int cpu)
        struct rcu_state *rsp;
        for_each_rcu_flavor(rsp)
-                rcu_init_percpu_data(cpu, rsp,
+                rcu_init_percpu_data(cpu, rsp);
-                                     strcmp(rsp->name, "rcu_preempt") == 0);
 }
 /*
@@ -3228,7 +3354,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
                                    unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
-        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;
        struct rcu_state *rsp;
@@ -3402,8 +3528,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
                        rnp->qsmaskinit = 0;
                        rnp->grplo = j * cpustride;
                        rnp->grphi = (j + 1) * cpustride - 1;
-                        if (rnp->grphi >= NR_CPUS)
+                        if (rnp->grphi >= nr_cpu_ids)
-                                rnp->grphi = NR_CPUS - 1;
+                                rnp->grphi = nr_cpu_ids - 1;
                        if (i == 0) {
                                rnp->grpnum = 0;
                                rnp->grpmask = 0;
@@ -3422,7 +3548,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
        rsp->rda = rda;
        init_waitqueue_head(&rsp->gp_wq);
-        init_irq_work(&rsp->wakeup_work, rsp_wakeup);
        rnp = rsp->level[rcu_num_lvls - 1];
        for_each_possible_cpu(i) {
                while (i > rnp->grphi)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 75dc3c39a02a..bf2c1e669691 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -252,7 +252,6 @@ struct rcu_data {
        bool            passed_quiesce; /* User-mode/idle loop etc. */
        bool            qs_pending;     /* Core waits for quiesc state. */
        bool            beenonline;     /* CPU online at least once. */
-        bool            preemptible;    /* Preemptible RCU? */
        struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
        unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
 #ifdef CONFIG_RCU_CPU_STALL_INFO
@@ -406,7 +405,8 @@ struct rcu_state {
        unsigned long completed;                /* # of last completed gp. */
        struct task_struct *gp_kthread;         /* Task for grace periods. */
        wait_queue_head_t gp_wq;                /* Where GP task waits. */
-        int gp_flags;                           /* Commands for GP task. */
+        short gp_flags;                         /* Commands for GP task. */
+        short gp_state;                         /* GP kthread sleep state. */
        /* End of fields guarded by root rcu_node's lock. */
@@ -462,13 +462,17 @@ struct rcu_state {
        const char *name;                       /* Name of structure. */
        char abbr;                              /* Abbreviated name. */
        struct list_head flavors;               /* List of RCU flavors. */
-        struct irq_work wakeup_work;            /* Postponed wakeups */
 };
 /* Values for rcu_state structure's gp_flags field. */
 #define RCU_GP_FLAG_INIT 0x1    /* Need grace-period initialization. */
 #define RCU_GP_FLAG_FQS  0x2    /* Need grace-period quiescent-state forcing. */
+/* Values for rcu_state structure's gp_flags field. */
+#define RCU_GP_WAIT_INIT 0      /* Initial state. */
+#define RCU_GP_WAIT_GPS  1      /* Wait for grace-period start. */
+#define RCU_GP_WAIT_FQS  2      /* Wait for force-quiescent-state time. */
 extern struct list_head rcu_struct_flavors;
 /* Sequence through rcu_state structures for each RCU flavor. */
@@ -547,7 +551,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
 static void print_cpu_stall_info_end(void);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static void increment_cpu_stall_ticks(void);
-static int rcu_nocb_needs_gp(struct rcu_state *rsp);
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 962d1d589929..cbc2c45265e2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -116,7 +116,7 @@ static void __init rcu_bootup_announce_oddness(void)
 #ifdef CONFIG_TREE_PREEMPT_RCU
 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *rcu_state = &rcu_preempt_state;
+static struct rcu_state *rcu_state_p = &rcu_preempt_state;
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
@@ -149,15 +149,6 @@ long rcu_batches_completed(void)
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 /*
- * Force a quiescent state for preemptible RCU.
- */
-void rcu_force_quiescent_state(void)
-{
-        force_quiescent_state(&rcu_preempt_state);
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-/*
 * Record a preemptible-RCU quiescent state for the specified CPU.  Note
 * that this just means that the task currently running on the CPU is
 * not in a quiescent state.  There might be any number of tasks blocked
@@ -688,20 +679,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu);
-/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks.  Until then, this
- * function may only be called from __kfree_rcu().
- */
-void kfree_call_rcu(struct rcu_head *head,
-                    void (*func)(struct rcu_head *rcu))
-{
-        __call_rcu(head, func, &rcu_preempt_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
 /**
 * synchronize_rcu - wait until a grace period has elapsed.
 *
@@ -970,7 +947,7 @@ void exit_rcu(void)
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-static struct rcu_state *rcu_state = &rcu_sched_state;
+static struct rcu_state *rcu_state_p = &rcu_sched_state;
 /*
 * Tell them what RCU they are running.
@@ -991,16 +968,6 @@ long rcu_batches_completed(void)
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 /*
- * Force a quiescent state for RCU, which, because there is no preemptible
- * RCU, becomes the same as rcu-sched.
- */
-void rcu_force_quiescent_state(void)
-{
-        rcu_sched_force_quiescent_state();
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-/*
 * Because preemptible RCU does not exist, we never have to check for
 * CPUs being in quiescent states.
 */
@@ -1080,22 +1047,6 @@ static void rcu_preempt_check_callbacks(int cpu)
 }
 /*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks.  Until then, this
- * function may only be called from __kfree_rcu().
- *
- * Because there is no preemptible RCU, we use RCU-sched instead.
- */
-void kfree_call_rcu(struct rcu_head *head,
-                    void (*func)(struct rcu_head *rcu))
-{
-        __call_rcu(head, func, &rcu_sched_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-/*
 * Wait for an rcu-preempt grace period, but make it happen quickly.
 * But because preemptible RCU does not exist, map to rcu-sched.
 */
@@ -1517,11 +1468,11 @@ static int __init rcu_spawn_kthreads(void)
        for_each_possible_cpu(cpu)
                per_cpu(rcu_cpu_has_work, cpu) = 0;
        BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
-        rnp = rcu_get_root(rcu_state);
+        rnp = rcu_get_root(rcu_state_p);
-        (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+        (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
        if (NUM_RCU_NODES > 1) {
-                rcu_for_each_leaf_node(rcu_state, rnp)
+                rcu_for_each_leaf_node(rcu_state_p, rnp)
-                        (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+                        (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
        }
        return 0;
 }
@@ -1529,12 +1480,12 @@ early_initcall(rcu_spawn_kthreads);
 static void rcu_prepare_kthreads(int cpu)
 {
-        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;
        /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
        if (rcu_scheduler_fully_active)
-                (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
+                (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
 }
 #else /* #ifdef CONFIG_RCU_BOOST */
@@ -1744,6 +1695,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
 static void rcu_prepare_for_idle(int cpu)
 {
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
+        bool needwake;
        struct rcu_data *rdp;
        struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
        struct rcu_node *rnp;
@@ -1792,8 +1744,10 @@ static void rcu_prepare_for_idle(int cpu)
                rnp = rdp->mynode;
                raw_spin_lock(&rnp->lock); /* irqs already disabled. */
                smp_mb__after_unlock_lock();
-                rcu_accelerate_cbs(rsp, rnp, rdp);
+                needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+                if (needwake)
+                        rcu_gp_kthread_wake(rsp);
        }
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
@@ -1855,7 +1809,7 @@ static void rcu_oom_notify_cpu(void *unused)
        struct rcu_data *rdp;
        for_each_rcu_flavor(rsp) {
-                rdp = __this_cpu_ptr(rsp->rda);
+                rdp = raw_cpu_ptr(rsp->rda);
                if (rdp->qlen_lazy != 0) {
                        atomic_inc(&oom_callback_count);
                        rsp->call(&rdp->oom_head, rcu_oom_callback);
@@ -1997,7 +1951,7 @@ static void increment_cpu_stall_ticks(void)
        struct rcu_state *rsp;
        for_each_rcu_flavor(rsp)
-                __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
+                raw_cpu_inc(rsp->rda->ticks_this_gp);
 }
 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
@@ -2068,19 +2022,6 @@ static int __init parse_rcu_nocb_poll(char *arg)
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
 /*
- * Do any no-CBs CPUs need another grace period?
- *
- * Interrupts must be disabled.  If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
-        struct rcu_node *rnp = rcu_get_root(rsp);
-        return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
-}
-/*
 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
 * grace period.
 */
@@ -2109,7 +2050,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 }
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
-/* Is the specified CPU a no-CPUs CPU? */
+/* Is the specified CPU a no-CBs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
        if (have_rcu_nocb_mask)
@@ -2243,12 +2184,15 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
        unsigned long c;
        bool d;
        unsigned long flags;
+        bool needwake;
        struct rcu_node *rnp = rdp->mynode;
        raw_spin_lock_irqsave(&rnp->lock, flags);
        smp_mb__after_unlock_lock();
-        c = rcu_start_future_gp(rnp, rdp);
+        needwake = rcu_start_future_gp(rnp, rdp, &c);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
+        if (needwake)
+                rcu_gp_kthread_wake(rdp->rsp);
        /*
         * Wait for the grace period.  Do so interruptibly to avoid messing
@@ -2402,11 +2346,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
-        return 0;
-}
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
 }
@@ -2523,9 +2462,9 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
        /* Record start of fully idle period. */
        j = jiffies;
        ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
-        smp_mb__before_atomic_inc();
+        smp_mb__before_atomic();
        atomic_inc(&rdtp->dynticks_idle);
-        smp_mb__after_atomic_inc();
+        smp_mb__after_atomic();
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
 }
@@ -2590,9 +2529,9 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
        }
        /* Record end of idle period. */
-        smp_mb__before_atomic_inc();
+        smp_mb__before_atomic();
        atomic_inc(&rdtp->dynticks_idle);
-        smp_mb__after_atomic_inc();
+        smp_mb__after_atomic();
        WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
        /*
@@ -2657,20 +2596,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
 }
 /*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
- * timekeeping CPU.
- */
-static void rcu_bind_gp_kthread(void)
-{
-        int cpu = ACCESS_ONCE(tick_do_timer_cpu);
-        if (cpu < 0 || cpu >= nr_cpu_ids)
-                return;
-        if (raw_smp_processor_id() != cpu)
-                set_cpus_allowed_ptr(current, cpumask_of(cpu));
-}
-/*
 * Return a delay in jiffies based on the number of CPUs, rcu_node
 * leaf fanout, and jiffies tick rate.  The idea is to allow larger
 * systems more time to transition to full-idle state in order to
@@ -2734,7 +2659,8 @@ static void rcu_sysidle(unsigned long j)
 static void rcu_sysidle_cancel(void)
 {
        smp_mb();
-        ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+        if (full_sysidle_state > RCU_SYSIDLE_SHORT)
+                ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
 }
 /*
@@ -2880,10 +2806,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
        return false;
 }
-static void rcu_bind_gp_kthread(void)
-{
-}
 static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
                                  unsigned long maxj)
 {
@@ -2914,3 +2836,19 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
        return 0;
 }
+/*
+ * Bind the grace-period kthread for the sysidle flavor of RCU to the
+ * timekeeping CPU.
+ */
+static void rcu_bind_gp_kthread(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+        int cpu = ACCESS_ONCE(tick_do_timer_cpu);
+        if (cpu < 0 || cpu >= nr_cpu_ids)
+                return;
+        if (raw_smp_processor_id() != cpu)
+                set_cpus_allowed_ptr(current, cpumask_of(cpu));
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c0a9b0af469..a2aeb4df0f60 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -320,6 +320,18 @@ int rcu_jiffies_till_stall_check(void)
        return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
 }
+void rcu_sysrq_start(void)
+{
+        if (!rcu_cpu_stall_suppress)
+                rcu_cpu_stall_suppress = 2;
+}
+void rcu_sysrq_end(void)
+{
+        if (rcu_cpu_stall_suppress == 2)
+                rcu_cpu_stall_suppress = 0;
+}
 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
 {
        rcu_cpu_stall_suppress = 1;
@@ -338,3 +350,21 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+/*
+ * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
+ */
+DEFINE_PER_CPU(int, rcu_cond_resched_count);
+/*
+ * Report a set of RCU quiescent states, for use by cond_resched()
+ * and friends.  Out of line due to being called infrequently.
+ */
+void rcu_resched(void)
+{
+        preempt_disable();
+        __this_cpu_write(rcu_cond_resched_count, 0);
+        rcu_note_context_switch(smp_processor_id());
+        preempt_enable();
+}
diff --git a/kernel/resource.c b/kernel/resource.c
index 8957d686e29b..3c2237ac32db 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1288,13 +1288,10 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
                if (p->flags & IORESOURCE_BUSY)
                        continue;
-                printk(KERN_WARNING "resource map sanity check conflict: "
+                printk(KERN_WARNING "resource sanity check: requesting [mem %#010llx-%#010llx], which spans more than %s %pR\n",
-                       "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
                       (unsigned long long)addr,
                       (unsigned long long)(addr + size - 1),
-                       (unsigned long long)p->start,
+                       p->name, p);
-                       (unsigned long long)p->end,
-                       p->name);
                err = -1;
                break;
        }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 321d800e4baa..913c6d6cc2c1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,6 +90,22 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
+#ifdef smp_mb__before_atomic
+void __smp_mb__before_atomic(void)
+{
+        smp_mb__before_atomic();
+}
+EXPORT_SYMBOL(__smp_mb__before_atomic);
+#endif
+#ifdef smp_mb__after_atomic
+void __smp_mb__after_atomic(void)
+{
+        smp_mb__after_atomic();
+}
+EXPORT_SYMBOL(__smp_mb__after_atomic);
+#endif
 void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
        unsigned long delta;
@@ -4101,6 +4117,7 @@ static void __cond_resched(void)
 int __sched _cond_resched(void)
 {
+        rcu_cond_resched();
        if (should_resched()) {
                __cond_resched();
                return 1;
@@ -4119,15 +4136,18 @@ EXPORT_SYMBOL(_cond_resched);
 */
 int __cond_resched_lock(spinlock_t *lock)
 {
+        bool need_rcu_resched = rcu_should_resched();
        int resched = should_resched();
        int ret = 0;
        lockdep_assert_held(lock);
-        if (spin_needbreak(lock) || resched) {
+        if (spin_needbreak(lock) || resched || need_rcu_resched) {
                spin_unlock(lock);
                if (resched)
                        __cond_resched();
+                else if (unlikely(need_rcu_resched))
+                        rcu_resched();
                else
                        cpu_relax();
                ret = 1;
@@ -4141,6 +4161,7 @@ int __sched __cond_resched_softirq(void)
 {
        BUG_ON(!in_softirq());
+        rcu_cond_resched();  /* BH disabled OK, just recording QSes. */
        if (should_resched()) {
                local_bh_enable();
                __cond_resched();
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8834243abee2..981fcd7dc394 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -165,7 +165,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
                 * do a write memory barrier, and then update the count, to
                 * make sure the vector is visible when count is set.
                 */
-                smp_mb__before_atomic_inc();
+                smp_mb__before_atomic();
                atomic_inc(&(vec)->count);
                do_mb = 1;
        }
@@ -185,14 +185,14 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
                 * the new priority vec.
                 */
                if (do_mb)
-                        smp_mb__after_atomic_inc();
+                        smp_mb__after_atomic();
                /*
                 * When removing from the vector, we decrement the counter first
                 * do a memory barrier and then clear the mask.
                 */
                atomic_dec(&(vec)->count);
-                smp_mb__after_atomic_inc();
+                smp_mb__after_atomic();
                cpumask_clear_cpu(cpu, vec->mask);
        }
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 7d50f794e248..0ffa20ae657b 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -394,7 +394,7 @@ EXPORT_SYMBOL(__wake_up_bit);
 *
 * In order for this to function properly, as it uses waitqueue_active()
 * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_clear_bit(), but in some
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
 * cases where bitflags are manipulated non-atomically under a lock, one
 * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
 * because spin_unlock() does not guarantee a memory barrier.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 92f24f5e8d52..5918d227730f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -232,7 +232,6 @@ asmlinkage __visible void __do_softirq(void)
        bool in_hardirq;
        __u32 pending;
        int softirq_bit;
-        int cpu;
        /*
         * Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -247,7 +246,6 @@ asmlinkage __visible void __do_softirq(void)
        __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
        in_hardirq = lockdep_softirq_start();
-        cpu = smp_processor_id();
 restart:
        /* Reset the pending bitmask before enabling irqs */
        set_softirq_pending(0);
@@ -276,11 +274,11 @@ restart:
                               prev_count, preempt_count());
                        preempt_count_set(prev_count);
                }
-                rcu_bh_qs(cpu);
                h++;
                pending >>= softirq_bit;
        }
+        rcu_bh_qs(smp_processor_id());
        local_irq_disable();
        pending = local_softirq_pending();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74f5b580fe34..bc966a8ffc3e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -643,7 +643,7 @@ static struct ctl_table kern_table[] = {
                .extra2         = &one,
        },
 #endif
+#ifdef CONFIG_UEVENT_HELPER
        {
                .procname       = "hotplug",
                .data           = &uevent_helper,
@@ -651,7 +651,7 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dostring,
        },
+#endif
 #ifdef CONFIG_CHR_DEV_SG
        {
                .procname       = "sg-big-buff",
diff --git a/kernel/torture.c b/kernel/torture.c
index acc9afc2f26e..40bb511cca48 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -335,13 +335,8 @@ static void torture_shuffle_tasks(void)
        shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask);
        if (shuffle_idle_cpu >= nr_cpu_ids)
                shuffle_idle_cpu = -1;
-        if (shuffle_idle_cpu != -1) {
+        else
                cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
-                if (cpumask_empty(shuffle_tmp_mask)) {
-                        put_online_cpus();
-                        return;
-                }
-        }
        mutex_lock(&shuffle_task_mutex);
        list_for_each_entry(stp, &shuffle_task_list, st_l)
@@ -533,7 +528,11 @@ void stutter_wait(const char *title)
        while (ACCESS_ONCE(stutter_pause_test) ||
               (torture_runnable && !ACCESS_ONCE(*torture_runnable))) {
                if (stutter_pause_test)
-                        schedule_timeout_interruptible(1);
+                        if (ACCESS_ONCE(stutter_pause_test) == 1)
+                                schedule_timeout_interruptible(1);
+                        else
+                                while (ACCESS_ONCE(stutter_pause_test))
+                                        cond_resched();
                else
                        schedule_timeout_interruptible(round_jiffies_relative(HZ));
                torture_shutdown_absorb(title);
@@ -550,7 +549,11 @@ static int torture_stutter(void *arg)
        VERBOSE_TOROUT_STRING("torture_stutter task started");
        do {
                if (!torture_must_stop()) {
-                        schedule_timeout_interruptible(stutter);
+                        if (stutter > 1) {
+                                schedule_timeout_interruptible(stutter - 1);
+                                ACCESS_ONCE(stutter_pause_test) = 2;
+                        }
+                        schedule_timeout_interruptible(1);
                        ACCESS_ONCE(stutter_pause_test) = 1;
                }
                if (!torture_must_stop())
@@ -596,21 +599,27 @@ static void torture_stutter_cleanup(void)
 * The runnable parameter points to a flag that controls whether or not
 * the test is currently runnable.  If there is no such flag, pass in NULL.
 */
-void __init torture_init_begin(char *ttype, bool v, int *runnable)
+bool torture_init_begin(char *ttype, bool v, int *runnable)
 {
        mutex_lock(&fullstop_mutex);
+        if (torture_type != NULL) {
+                pr_alert("torture_init_begin: refusing %s init: %s running",
+                         ttype, torture_type);
+                mutex_unlock(&fullstop_mutex);
+                return false;
+        }
        torture_type = ttype;
        verbose = v;
        torture_runnable = runnable;
        fullstop = FULLSTOP_DONTSTOP;
+        return true;
 }
 EXPORT_SYMBOL_GPL(torture_init_begin);
 /*
 * Tell the torture module that initialization is complete.
 */
-void __init torture_init_end(void)
+void torture_init_end(void)
 {
        mutex_unlock(&fullstop_mutex);
        register_reboot_notifier(&torture_shutdown_nb);
@@ -642,6 +651,9 @@ bool torture_cleanup(void)
        torture_shuffle_cleanup();
        torture_stutter_cleanup();
        torture_onoff_cleanup();
+        mutex_lock(&fullstop_mutex);
+        torture_type = NULL;
+        mutex_unlock(&fullstop_mutex);
        return false;
 }
 EXPORT_SYMBOL_GPL(torture_cleanup);
@@ -674,8 +686,10 @@ EXPORT_SYMBOL_GPL(torture_must_stop_irq);
 */
 void torture_kthread_stopping(char *title)
 {
-        if (verbose)
+        char buf[128];
-                VERBOSE_TOROUT_STRING(title);
+        snprintf(buf, sizeof(buf), "Stopping %s", title);
+        VERBOSE_TOROUT_STRING(buf);
        while (!kthread_should_stop()) {
                torture_shutdown_absorb(title);
                schedule_timeout_uninterruptible(1);