57 files changed, 2197 insertions, 1064 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index bc010ee272b6..5c0e7666811d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
+obj-$(CONFIG_TORTURE_TEST) += torture.o
 $(obj)/configs.o: $(obj)/config_data.h
diff --git a/kernel/audit.c b/kernel/audit.c
index 34c5a2310fbf..95a20f3f52f1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -182,7 +182,7 @@ struct audit_buffer {
 struct audit_reply {
        __u32 portid;
-        pid_t pid;
+        struct net *net;        
        struct sk_buff *skb;
 };
@@ -500,7 +500,7 @@ int audit_send_list(void *_dest)
 {
        struct audit_netlink_list *dest = _dest;
        struct sk_buff *skb;
-        struct net *net = get_net_ns_by_pid(dest->pid);
+        struct net *net = dest->net;
        struct audit_net *aunet = net_generic(net, audit_net_id);
        /* wait for parent to finish and send an ACK */
@@ -510,6 +510,7 @@ int audit_send_list(void *_dest)
        while ((skb = __skb_dequeue(&dest->q)) != NULL)
                netlink_unicast(aunet->nlsk, skb, dest->portid, 0);
+        put_net(net);
        kfree(dest);
        return 0;
@@ -543,7 +544,7 @@ out_kfree_skb:
 static int audit_send_reply_thread(void *arg)
 {
        struct audit_reply *reply = (struct audit_reply *)arg;
-        struct net *net = get_net_ns_by_pid(reply->pid);
+        struct net *net = reply->net;
        struct audit_net *aunet = net_generic(net, audit_net_id);
        mutex_lock(&audit_cmd_mutex);
@@ -552,12 +553,13 @@ static int audit_send_reply_thread(void *arg)
        /* Ignore failure. It'll only happen if the sender goes away,
           because our timeout is set to infinite. */
        netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0);
+        put_net(net);
        kfree(reply);
        return 0;
 }
 /**
 * audit_send_reply - send an audit reply message via netlink
- * @portid: netlink port to which to send reply
+ * @request_skb: skb of request we are replying to (used to target the reply)
 * @seq: sequence number
 * @type: audit message type
 * @done: done (last) flag
@@ -568,9 +570,11 @@ static int audit_send_reply_thread(void *arg)
 * Allocates an skb, builds the netlink message, and sends it to the port id.
 * No failure notifications.
 */
-static void audit_send_reply(__u32 portid, int seq, int type, int done,
+static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done,
                             int multi, const void *payload, int size)
 {
+        u32 portid = NETLINK_CB(request_skb).portid;
+        struct net *net = sock_net(NETLINK_CB(request_skb).sk);
        struct sk_buff *skb;
        struct task_struct *tsk;
        struct audit_reply *reply = kmalloc(sizeof(struct audit_reply),
@@ -583,8 +587,8 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done,
        if (!skb)
                goto out;
+        reply->net = get_net(net);
        reply->portid = portid;
-        reply->pid = task_pid_vnr(current);
        reply->skb = skb;
        tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply");
@@ -604,9 +608,19 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
        int err = 0;
        /* Only support the initial namespaces for now. */
+        /*
+         * We return ECONNREFUSED because it tricks userspace into thinking
+         * that audit was not configured into the kernel.  Lots of users
+         * configure their PAM stack (because that's what the distro does)
+         * to reject login if unable to send messages to audit.  If we return
+         * ECONNREFUSED the PAM stack thinks the kernel does not have audit
+         * configured in and will let login proceed.  If we return EPERM
+         * userspace will reject all logins.  This should be removed when we
+         * support non init namespaces!!
+         */
        if ((current_user_ns() != &init_user_ns) ||
            (task_active_pid_ns(current) != &init_pid_ns))
-                return -EPERM;
+                return -ECONNREFUSED;
        switch (msg_type) {
        case AUDIT_LIST:
@@ -673,8 +687,7 @@ static int audit_get_feature(struct sk_buff *skb)
        seq = nlmsg_hdr(skb)->nlmsg_seq;
-        audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+        audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af));
-                         &af, sizeof(af));
        return 0;
 }
@@ -794,8 +807,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                s.backlog               = skb_queue_len(&audit_skb_queue);
                s.version               = AUDIT_VERSION_LATEST;
                s.backlog_wait_time     = audit_backlog_wait_time;
-                audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+                audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
-                                 &s, sizeof(s));
                break;
        }
        case AUDIT_SET: {
@@ -905,7 +917,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                           seq, data, nlmsg_len(nlh));
                break;
        case AUDIT_LIST_RULES:
-                err = audit_list_rules_send(NETLINK_CB(skb).portid, seq);
+                err = audit_list_rules_send(skb, seq);
                break;
        case AUDIT_TRIM:
                audit_trim_trees();
@@ -970,8 +982,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                        memcpy(sig_data->ctx, ctx, len);
                        security_release_secctx(ctx, len);
                }
-                audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO,
+                audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0,
-                                0, 0, sig_data, sizeof(*sig_data) + len);
+                                 sig_data, sizeof(*sig_data) + len);
                kfree(sig_data);
                break;
        case AUDIT_TTY_GET: {
@@ -983,8 +995,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                s.log_passwd = tsk->signal->audit_tty_log_passwd;
                spin_unlock(&tsk->sighand->siglock);
-                audit_send_reply(NETLINK_CB(skb).portid, seq,
+                audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
-                                 AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
                break;
        }
        case AUDIT_TTY_SET: {
diff --git a/kernel/audit.h b/kernel/audit.h
index 57cc64d67718..8df132214606 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -247,7 +247,7 @@ extern void		    audit_panic(const char *message);
 struct audit_netlink_list {
        __u32 portid;
-        pid_t pid;
+        struct net *net;
        struct sk_buff_head q;
 };
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 67ccf0e7cca9..135944a7b28a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -916,7 +916,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
                                   struct fsnotify_mark *inode_mark,
                                   struct fsnotify_mark *vfsmount_mark,
                                   u32 mask, void *data, int data_type,
-                                   const unsigned char *file_name)
+                                   const unsigned char *file_name, u32 cookie)
 {
        return 0;
 }
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 2596fac5dcb4..70b4554d2fbe 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -471,7 +471,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
                                    struct fsnotify_mark *inode_mark,
                                    struct fsnotify_mark *vfsmount_mark,
                                    u32 mask, void *data, int data_type,
-                                    const unsigned char *dname)
+                                    const unsigned char *dname, u32 cookie)
 {
        struct inode *inode;
        struct audit_parent *parent;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 14a78cca384e..92062fd6cc8c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -29,6 +29,8 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include "audit.h"
 /*
@@ -1065,11 +1067,13 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data,
 /**
 * audit_list_rules_send - list the audit rules
- * @portid: target portid for netlink audit messages
+ * @request_skb: skb of request we are replying to (used to target the reply)
 * @seq: netlink audit message sequence (serial) number
 */
-int audit_list_rules_send(__u32 portid, int seq)
+int audit_list_rules_send(struct sk_buff *request_skb, int seq)
 {
+        u32 portid = NETLINK_CB(request_skb).portid;
+        struct net *net = sock_net(NETLINK_CB(request_skb).sk);
        struct task_struct *tsk;
        struct audit_netlink_list *dest;
        int err = 0;
@@ -1083,8 +1087,8 @@ int audit_list_rules_send(__u32 portid, int seq)
        dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);
        if (!dest)
                return -ENOMEM;
+        dest->net = get_net(net);
        dest->portid = portid;
-        dest->pid = task_pid_vnr(current);
        skb_queue_head_init(&dest->q);
        mutex_lock(&audit_filter_mutex);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e2f46ba37f72..0c753ddd223b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -886,7 +886,9 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                 * per-subsystem and moved to css->id so that lookups are
                 * successful until the target css is released.
                 */
+                mutex_lock(&cgroup_mutex);
                idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+                mutex_unlock(&cgroup_mutex);
                cgrp->id = -1;
                call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
@@ -1566,10 +1568,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                mutex_lock(&cgroup_mutex);
                mutex_lock(&cgroup_root_mutex);
-                root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp,
+                ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
-                                           0, 1, GFP_KERNEL);
+                if (ret < 0)
-                if (root_cgrp->id < 0)
                        goto unlock_drop;
+                root_cgrp->id = ret;
                /* Check for name clashes with existing mounts */
                ret = -EBUSY;
@@ -2763,10 +2765,7 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
         */
        update_before = cgroup_serial_nr_next;
-        mutex_unlock(&cgroup_mutex);
        /* add/rm files for all cgroups created before */
-        rcu_read_lock();
        css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
                struct cgroup *cgrp = css->cgroup;
@@ -2775,23 +2774,19 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
                inode = cgrp->dentry->d_inode;
                dget(cgrp->dentry);
-                rcu_read_unlock();
                dput(prev);
                prev = cgrp->dentry;
+                mutex_unlock(&cgroup_mutex);
                mutex_lock(&inode->i_mutex);
                mutex_lock(&cgroup_mutex);
                if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
                        ret = cgroup_addrm_files(cgrp, cfts, is_add);
-                mutex_unlock(&cgroup_mutex);
                mutex_unlock(&inode->i_mutex);
-                rcu_read_lock();
                if (ret)
                        break;
        }
-        rcu_read_unlock();
+        mutex_unlock(&cgroup_mutex);
        dput(prev);
        deactivate_super(sb);
        return ret;
@@ -2910,9 +2905,14 @@ static void cgroup_enable_task_cg_lists(void)
                 * We should check if the process is exiting, otherwise
                 * it will race with cgroup_exit() in that the list
                 * entry won't be deleted though the process has exited.
+                 * Do it while holding siglock so that we don't end up
+                 * racing against cgroup_exit().
                 */
+                spin_lock_irq(&p->sighand->siglock);
                if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
                        list_add(&p->cg_list, &task_css_set(p)->tasks);
+                spin_unlock_irq(&p->sighand->siglock);
                task_unlock(p);
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
@@ -4112,17 +4112,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
        err = percpu_ref_init(&css->refcnt, css_release);
        if (err)
-                goto err_free;
+                goto err_free_css;
        init_css(css, ss, cgrp);
        err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);
        if (err)
-                goto err_free;
+                goto err_free_percpu_ref;
        err = online_css(css);
        if (err)
-                goto err_free;
+                goto err_clear_dir;
        dget(cgrp->dentry);
        css_get(css->parent);
@@ -4138,8 +4138,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
        return 0;
-err_free:
+err_clear_dir:
+        cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
+err_free_percpu_ref:
        percpu_ref_cancel_init(&css->refcnt);
+err_free_css:
        ss->css_free(css);
        return err;
 }
@@ -4158,7 +4161,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        struct cgroup *cgrp;
        struct cgroup_name *name;
        struct cgroupfs_root *root = parent->root;
-        int ssid, err = 0;
+        int ssid, err;
        struct cgroup_subsys *ss;
        struct super_block *sb = root->sb;
@@ -4168,19 +4171,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                return -ENOMEM;
        name = cgroup_alloc_name(dentry);
-        if (!name)
+        if (!name) {
+                err = -ENOMEM;
                goto err_free_cgrp;
+        }
        rcu_assign_pointer(cgrp->name, name);
        /*
-         * Temporarily set the pointer to NULL, so idr_find() won't return
-         * a half-baked cgroup.
-         */
-        cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
-        if (cgrp->id < 0)
-                goto err_free_name;
-        /*
         * Only live parents can have children.  Note that the liveliness
         * check isn't strictly necessary because cgroup_mkdir() and
         * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
@@ -4189,7 +4186,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
         */
        if (!cgroup_lock_live_group(parent)) {
                err = -ENODEV;
-                goto err_free_id;
+                goto err_free_name;
+        }
+        /*
+         * Temporarily set the pointer to NULL, so idr_find() won't return
+         * a half-baked cgroup.
+         */
+        cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+        if (cgrp->id < 0) {
+                err = -ENOMEM;
+                goto err_unlock;
        }
        /* Grab a reference on the superblock so the hierarchy doesn't
@@ -4221,7 +4228,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
         */
        err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
        if (err < 0)
-                goto err_unlock;
+                goto err_free_id;
        lockdep_assert_held(&dentry->d_inode->i_mutex);
        cgrp->serial_nr = cgroup_serial_nr_next++;
@@ -4257,12 +4264,12 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        return 0;
-err_unlock:
-        mutex_unlock(&cgroup_mutex);
-        /* Release the reference count that we took on the superblock */
-        deactivate_super(sb);
 err_free_id:
        idr_remove(&root->cgroup_idr, cgrp->id);
+        /* Release the reference count that we took on the superblock */
+        deactivate_super(sb);
+err_unlock:
+        mutex_unlock(&cgroup_mutex);
 err_free_name:
        kfree(rcu_dereference_raw(cgrp->name));
 err_free_cgrp:
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4410ac6a55f1..e6b1b66afe52 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -974,12 +974,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 *    Temporarilly set tasks mems_allowed to target nodes of migration,
 *    so that the migration code can allocate pages on these nodes.
 *
- *    Call holding cpuset_mutex, so current's cpuset won't change
- *    during this call, as manage_mutex holds off any cpuset_attach()
- *    calls.  Therefore we don't need to take task_lock around the
- *    call to guarantee_online_mems(), as we know no one is changing
- *    our task's cpuset.
- *
 *    While the mm_struct we are migrating is typically from some
 *    other task, the task_struct mems_allowed that we are hacking
 *    is for our current task, which must allocate new pages for that
@@ -996,8 +990,10 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
        do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+        rcu_read_lock();
        mems_cs = effective_nodemask_cpuset(task_cs(tsk));
        guarantee_online_mems(mems_cs, &tsk->mems_allowed);
+        rcu_read_unlock();
 }
 /*
@@ -2486,9 +2482,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
        task_lock(current);
        cs = nearest_hardwall_ancestor(task_cs(current));
+        allowed = node_isset(node, cs->mems_allowed);
        task_unlock(current);
-        allowed = node_isset(node, cs->mems_allowed);
        mutex_unlock(&callback_mutex);
        return allowed;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 56003c6edfd3..fa0b2d4ad83c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7856,14 +7856,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
 static void __perf_event_exit_context(void *__info)
 {
        struct perf_event_context *ctx = __info;
-        struct perf_event *event, *tmp;
+        struct perf_event *event;
        perf_pmu_rotate_stop(ctx->pmu);
-        list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+        rcu_read_lock();
-                __perf_remove_from_context(event);
+        list_for_each_entry_rcu(event, &ctx->event_list, event_entry)
-        list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
                __perf_remove_from_context(event);
+        rcu_read_unlock();
 }
 static void perf_event_exit_cpu_context(int cpu)
@@ -7887,11 +7887,11 @@ static void perf_event_exit_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
+        perf_event_exit_cpu_context(cpu);
        mutex_lock(&swhash->hlist_mutex);
        swevent_hlist_release(swhash);
        mutex_unlock(&swhash->hlist_mutex);
-        perf_event_exit_cpu_context(cpu);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }
diff --git a/kernel/futex.c b/kernel/futex.c
index 44a1261cb9ff..67dacaf93e56 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -157,7 +157,9 @@
 * enqueue.
 */
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
 int __read_mostly futex_cmpxchg_enabled;
+#endif
 /*
 * Futex flags used to encode options to functions and preserve them across
@@ -234,6 +236,7 @@ static const struct futex_q futex_q_init = {
 * waiting on a futex.
 */
 struct futex_hash_bucket {
+        atomic_t waiters;
        spinlock_t lock;
        struct plist_head chain;
 } ____cacheline_aligned_in_smp;
@@ -253,22 +256,37 @@ static inline void futex_get_mm(union futex_key *key)
        smp_mb__after_atomic_inc();
 }
-static inline bool hb_waiters_pending(struct futex_hash_bucket *hb)
+/*
+ * Reflects a new waiter being added to the waitqueue.
+ */
+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
 {
 #ifdef CONFIG_SMP
+        atomic_inc(&hb->waiters);
        /*
-         * Tasks trying to enter the critical region are most likely
+         * Full barrier (A), see the ordering comment above.
-         * potential waiters that will be added to the plist. Ensure
-         * that wakers won't miss to-be-slept tasks in the window between
-         * the wait call and the actual plist_add.
         */
-        if (spin_is_locked(&hb->lock))
+        smp_mb__after_atomic_inc();
-                return true;
+#endif
-        smp_rmb(); /* Make sure we check the lock state first */
+}
+/*
+ * Reflects a waiter being removed from the waitqueue by wakeup
+ * paths.
+ */
+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+        atomic_dec(&hb->waiters);
+#endif
+}
-        return !plist_head_empty(&hb->chain);
+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+        return atomic_read(&hb->waiters);
 #else
-        return true;
+        return 1;
 #endif
 }
@@ -954,6 +972,7 @@ static void __unqueue_futex(struct futex_q *q)
        hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
        plist_del(&q->list, &hb->chain);
+        hb_waiters_dec(hb);
 }
 /*
@@ -1257,7 +1276,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
         */
        if (likely(&hb1->chain != &hb2->chain)) {
                plist_del(&q->list, &hb1->chain);
+                hb_waiters_dec(hb1);
                plist_add(&q->list, &hb2->chain);
+                hb_waiters_inc(hb2);
                q->lock_ptr = &hb2->lock;
        }
        get_futex_key_refs(key2);
@@ -1600,6 +1621,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
        struct futex_hash_bucket *hb;
        hb = hash_futex(&q->key);
+        /*
+         * Increment the counter before taking the lock so that
+         * a potential waker won't miss a to-be-slept task that is
+         * waiting for the spinlock. This is safe as all queue_lock()
+         * users end up calling queue_me(). Similarly, for housekeeping,
+         * decrement the counter at queue_unlock() when some error has
+         * occurred and we don't end up adding the task to the list.
+         */
+        hb_waiters_inc(hb);
        q->lock_ptr = &hb->lock;
        spin_lock(&hb->lock); /* implies MB (A) */
@@ -1611,6 +1643,7 @@ queue_unlock(struct futex_hash_bucket *hb)
        __releases(&hb->lock)
 {
        spin_unlock(&hb->lock);
+        hb_waiters_dec(hb);
 }
 /**
@@ -2342,6 +2375,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
                 * Unqueue the futex_q and determine which it was.
                 */
                plist_del(&q->list, &hb->chain);
+                hb_waiters_dec(hb);
                /* Handle spurious wakeups gracefully */
                ret = -EWOULDBLOCK;
@@ -2843,9 +2877,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
        return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }
-static int __init futex_init(void)
+static void __init futex_detect_cmpxchg(void)
 {
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
        u32 curval;
+        /*
+         * This will fail and we want it. Some arch implementations do
+         * runtime detection of the futex_atomic_cmpxchg_inatomic()
+         * functionality. We want to know that before we call in any
+         * of the complex code paths. Also we want to prevent
+         * registration of robust lists in that case. NULL is
+         * guaranteed to fault and we get -EFAULT on functional
+         * implementation, the non-functional ones will return
+         * -ENOSYS.
+         */
+        if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
+                futex_cmpxchg_enabled = 1;
+#endif
+}
+static int __init futex_init(void)
+{
        unsigned int futex_shift;
        unsigned long i;
@@ -2861,20 +2914,11 @@ static int __init futex_init(void)
                                               &futex_shift, NULL,
                                               futex_hashsize, futex_hashsize);
        futex_hashsize = 1UL << futex_shift;
-        /*
-         * This will fail and we want it. Some arch implementations do
+        futex_detect_cmpxchg();
-         * runtime detection of the futex_atomic_cmpxchg_inatomic()
-         * functionality. We want to know that before we call in any
-         * of the complex code paths. Also we want to prevent
-         * registration of robust lists in that case. NULL is
-         * guaranteed to fault and we get -EFAULT on functional
-         * implementation, the non-functional ones will return
-         * -ENOSYS.
-         */
-        if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-                futex_cmpxchg_enabled = 1;
        for (i = 0; i < futex_hashsize; i++) {
+                atomic_set(&futex_queues[i].waiters, 0);
                plist_head_init(&futex_queues[i].chain);
                spin_lock_init(&futex_queues[i].lock);
        }
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index bd8e788d71e0..1ef0606797c9 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -73,6 +73,51 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq,
 EXPORT_SYMBOL(devm_request_threaded_irq);
 /**
+ *      devm_request_any_context_irq - allocate an interrupt line for a managed device
+ *      @dev: device to request interrupt for
+ *      @irq: Interrupt line to allocate
+ *      @handler: Function to be called when the IRQ occurs
+ *      @thread_fn: function to be called in a threaded interrupt context. NULL
+ *                  for devices which handle everything in @handler
+ *      @irqflags: Interrupt type flags
+ *      @devname: An ascii name for the claiming device
+ *      @dev_id: A cookie passed back to the handler function
+ *
+ *      Except for the extra @dev argument, this function takes the
+ *      same arguments and performs the same function as
+ *      request_any_context_irq().  IRQs requested with this function will be
+ *      automatically freed on driver detach.
+ *
+ *      If an IRQ allocated with this function needs to be freed
+ *      separately, devm_free_irq() must be used.
+ */
+int devm_request_any_context_irq(struct device *dev, unsigned int irq,
+                              irq_handler_t handler, unsigned long irqflags,
+                              const char *devname, void *dev_id)
+{
+        struct irq_devres *dr;
+        int rc;
+        dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
+                          GFP_KERNEL);
+        if (!dr)
+                return -ENOMEM;
+        rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id);
+        if (rc) {
+                devres_free(dr);
+                return rc;
+        }
+        dr->irq = irq;
+        dr->dev_id = dev_id;
+        devres_add(dev, dr);
+        return 0;
+}
+EXPORT_SYMBOL(devm_request_any_context_irq);
+/**
 *      devm_free_irq - free an interrupt
 *      @dev: device to free interrupt for
 *      @irq: Interrupt line to free
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 192a302d6cfd..8ab8e9390297 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -274,6 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 {
        return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
+EXPORT_SYMBOL(irq_to_desc);
 static void free_desc(unsigned int irq)
 {
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cf68bb36fe58..f14033700c25 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -10,6 +10,7 @@
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/topology.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 481a13c43b17..d3bf660cb57f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
 static void wake_threads_waitq(struct irq_desc *desc)
 {
-        if (atomic_dec_and_test(&desc->threads_active) &&
+        if (atomic_dec_and_test(&desc->threads_active))
-            waitqueue_active(&desc->wait_for_threads))
                wake_up(&desc->wait_for_threads);
 }
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index d945a949760f..e660964086e2 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -19,6 +19,8 @@
 #include <linux/sched.h>
 #include <linux/capability.h>
+#include <linux/rcupdate.h>     /* rcu_expedited */
 #define KERNEL_ATTR_RO(_name) \
 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5e7f66..306a76b51e0f 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
-obj-y += mutex.o semaphore.o rwsem.o lglock.o
+obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -23,3 +23,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
+obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index eb8a54783fa0..bf0c6b0dd9c5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1936,12 +1936,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
        for (;;) {
                int distance = curr->lockdep_depth - depth + 1;
-                hlock = curr->held_locks + depth-1;
+                hlock = curr->held_locks + depth - 1;
                /*
                 * Only non-recursive-read entries get new dependencies
                 * added:
                 */
-                if (hlock->read != 2) {
+                if (hlock->read != 2 && hlock->check) {
                        if (!check_prev_add(curr, hlock, next,
                                                distance, trylock_loop))
                                return 0;
@@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
         * (If lookup_chain_cache() returns with 1 it acquires
         * graph_lock for us)
         */
-        if (!hlock->trylock && (hlock->check == 2) &&
+        if (!hlock->trylock && hlock->check &&
            lookup_chain_cache(curr, hlock, chain_key)) {
                /*
                 * Check whether last held lock:
@@ -2517,7 +2517,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
                BUG_ON(usage_bit >= LOCK_USAGE_STATES);
-                if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
+                if (!hlock->check)
                        continue;
                if (!mark_lock(curr, hlock, usage_bit))
@@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        int class_idx;
        u64 chain_key;
-        if (!prove_locking)
-                check = 1;
        if (unlikely(!debug_locks))
                return 0;
@@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return 0;
-        if (lock->key == &__lockdep_no_validate__)
+        if (!prove_locking || lock->key == &__lockdep_no_validate__)
-                check = 1;
+                check = 0;
        if (subclass < NR_LOCKDEP_CACHING_CLASSES)
                class = lock->class_cache[subclass];
@@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        hlock->holdtime_stamp = lockstat_clock();
 #endif
-        if (check == 2 && !mark_irqflags(curr, hlock))
+        if (check && !mark_irqflags(curr, hlock))
                return 0;
        /* mark it as used: */
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
new file mode 100644
index 000000000000..f26b1a18e34e
--- /dev/null
+++ b/kernel/locking/locktorture.c
@@ -0,0 +1,452 @@
+/*
+ * Module-based torture test facility for locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2014
+ *
+ * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ *      Based on kernel/rcu/torture.c.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/trace_clock.h>
+#include <asm/byteorder.h>
+#include <linux/torture.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
+torture_param(int, nwriters_stress, -1,
+             "Number of write-locking stress-test threads");
+torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
+torture_param(int, onoff_interval, 0,
+             "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shuffle_interval, 3,
+             "Number of jiffies between shuffles, 0=disable");
+torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
+torture_param(int, stat_interval, 60,
+             "Number of seconds between stats printk()s");
+torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
+torture_param(bool, verbose, true,
+             "Enable verbose debugging printk()s");
+static char *torture_type = "spin_lock";
+module_param(torture_type, charp, 0444);
+MODULE_PARM_DESC(torture_type,
+                 "Type of lock to torture (spin_lock, spin_lock_irq, ...)");
+static atomic_t n_lock_torture_errors;
+static struct task_struct *stats_task;
+static struct task_struct **writer_tasks;
+static int nrealwriters_stress;
+static bool lock_is_write_held;
+struct lock_writer_stress_stats {
+        long n_write_lock_fail;
+        long n_write_lock_acquired;
+};
+static struct lock_writer_stress_stats *lwsa;
+#if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE)
+#define LOCKTORTURE_RUNNABLE_INIT 1
+#else
+#define LOCKTORTURE_RUNNABLE_INIT 0
+#endif
+int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT;
+module_param(locktorture_runnable, int, 0444);
+MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot");
+/* Forward reference. */
+static void lock_torture_cleanup(void);
+/*
+ * Operations vector for selecting different types of tests.
+ */
+struct lock_torture_ops {
+        void (*init)(void);
+        int (*writelock)(void);
+        void (*write_delay)(struct torture_random_state *trsp);
+        void (*writeunlock)(void);
+        unsigned long flags;
+        const char *name;
+};
+static struct lock_torture_ops *cur_ops;
+/*
+ * Definitions for lock torture testing.
+ */
+static int torture_lock_busted_write_lock(void)
+{
+        return 0;  /* BUGGY, do not use in real life!!! */
+}
+static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
+{
+        const unsigned long longdelay_us = 100;
+        /* We want a long delay occasionally to force massive contention.  */
+        if (!(torture_random(trsp) %
+              (nrealwriters_stress * 2000 * longdelay_us)))
+                mdelay(longdelay_us);
+#ifdef CONFIG_PREEMPT
+        if (!(torture_random(trsp) % (nrealwriters_stress * 20000)))
+                preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+static void torture_lock_busted_write_unlock(void)
+{
+          /* BUGGY, do not use in real life!!! */
+}
+static struct lock_torture_ops lock_busted_ops = {
+        .writelock      = torture_lock_busted_write_lock,
+        .write_delay    = torture_lock_busted_write_delay,
+        .writeunlock    = torture_lock_busted_write_unlock,
+        .name           = "lock_busted"
+};
+static DEFINE_SPINLOCK(torture_spinlock);
+static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock)
+{
+        spin_lock(&torture_spinlock);
+        return 0;
+}
+static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
+{
+        const unsigned long shortdelay_us = 2;
+        const unsigned long longdelay_us = 100;
+        /* We want a short delay mostly to emulate likely code, and
+         * we want a long delay occasionally to force massive contention.
+         */
+        if (!(torture_random(trsp) %
+              (nrealwriters_stress * 2000 * longdelay_us)))
+                mdelay(longdelay_us);
+        if (!(torture_random(trsp) %
+              (nrealwriters_stress * 2 * shortdelay_us)))
+                udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+        if (!(torture_random(trsp) % (nrealwriters_stress * 20000)))
+                preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
+{
+        spin_unlock(&torture_spinlock);
+}
+static struct lock_torture_ops spin_lock_ops = {
+        .writelock      = torture_spin_lock_write_lock,
+        .write_delay    = torture_spin_lock_write_delay,
+        .writeunlock    = torture_spin_lock_write_unlock,
+        .name           = "spin_lock"
+};
+static int torture_spin_lock_write_lock_irq(void)
+__acquires(torture_spinlock_irq)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&torture_spinlock, flags);
+        cur_ops->flags = flags;
+        return 0;
+}
+static void torture_lock_spin_write_unlock_irq(void)
+__releases(torture_spinlock)
+{
+        spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags);
+}
+static struct lock_torture_ops spin_lock_irq_ops = {
+        .writelock      = torture_spin_lock_write_lock_irq,
+        .write_delay    = torture_spin_lock_write_delay,
+        .writeunlock    = torture_lock_spin_write_unlock_irq,
+        .name           = "spin_lock_irq"
+};
+/*
+ * Lock torture writer kthread.  Repeatedly acquires and releases
+ * the lock, checking for duplicate acquisitions.
+ */
+static int lock_torture_writer(void *arg)
+{
+        struct lock_writer_stress_stats *lwsp = arg;
+        static DEFINE_TORTURE_RANDOM(rand);
+        VERBOSE_TOROUT_STRING("lock_torture_writer task started");
+        set_user_nice(current, 19);
+        do {
+                schedule_timeout_uninterruptible(1);
+                cur_ops->writelock();
+                if (WARN_ON_ONCE(lock_is_write_held))
+                        lwsp->n_write_lock_fail++;
+                lock_is_write_held = 1;
+                lwsp->n_write_lock_acquired++;
+                cur_ops->write_delay(&rand);
+                lock_is_write_held = 0;
+                cur_ops->writeunlock();
+                stutter_wait("lock_torture_writer");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("lock_torture_writer");
+        return 0;
+}
+/*
+ * Create an lock-torture-statistics message in the specified buffer.
+ */
+static void lock_torture_printk(char *page)
+{
+        bool fail = 0;
+        int i;
+        long max = 0;
+        long min = lwsa[0].n_write_lock_acquired;
+        long long sum = 0;
+        for (i = 0; i < nrealwriters_stress; i++) {
+                if (lwsa[i].n_write_lock_fail)
+                        fail = true;
+                sum += lwsa[i].n_write_lock_acquired;
+                if (max < lwsa[i].n_write_lock_fail)
+                        max = lwsa[i].n_write_lock_fail;
+                if (min > lwsa[i].n_write_lock_fail)
+                        min = lwsa[i].n_write_lock_fail;
+        }
+        page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
+        page += sprintf(page,
+                        "Writes:  Total: %lld  Max/Min: %ld/%ld %s  Fail: %d %s\n",
+                        sum, max, min, max / 2 > min ? "???" : "",
+                        fail, fail ? "!!!" : "");
+        if (fail)
+                atomic_inc(&n_lock_torture_errors);
+}
+/*
+ * Print torture statistics.  Caller must ensure that there is only one
+ * call to this function at a given time!!!  This is normally accomplished
+ * by relying on the module system to only have one copy of the module
+ * loaded, and then by giving the lock_torture_stats kthread full control
+ * (or the init/cleanup functions when lock_torture_stats thread is not
+ * running).
+ */
+static void lock_torture_stats_print(void)
+{
+        int size = nrealwriters_stress * 200 + 8192;
+        char *buf;
+        buf = kmalloc(size, GFP_KERNEL);
+        if (!buf) {
+                pr_err("lock_torture_stats_print: Out of memory, need: %d",
+                       size);
+                return;
+        }
+        lock_torture_printk(buf);
+        pr_alert("%s", buf);
+        kfree(buf);
+}
+/*
+ * Periodically prints torture statistics, if periodic statistics printing
+ * was specified via the stat_interval module parameter.
+ *
+ * No need to worry about fullstop here, since this one doesn't reference
+ * volatile state or register callbacks.
+ */
+static int lock_torture_stats(void *arg)
+{
+        VERBOSE_TOROUT_STRING("lock_torture_stats task started");
+        do {
+                schedule_timeout_interruptible(stat_interval * HZ);
+                lock_torture_stats_print();
+                torture_shutdown_absorb("lock_torture_stats");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("lock_torture_stats");
+        return 0;
+}
+static inline void
+lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
+                                const char *tag)
+{
+        pr_alert("%s" TORTURE_FLAG
+                 "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
+                 torture_type, tag, nrealwriters_stress, stat_interval, verbose,
+                 shuffle_interval, stutter, shutdown_secs,
+                 onoff_interval, onoff_holdoff);
+}
+static void lock_torture_cleanup(void)
+{
+        int i;
+        if (torture_cleanup())
+                return;
+        if (writer_tasks) {
+                for (i = 0; i < nrealwriters_stress; i++)
+                        torture_stop_kthread(lock_torture_writer,
+                                             writer_tasks[i]);
+                kfree(writer_tasks);
+                writer_tasks = NULL;
+        }
+        torture_stop_kthread(lock_torture_stats, stats_task);
+        lock_torture_stats_print();  /* -After- the stats thread is stopped! */
+        if (atomic_read(&n_lock_torture_errors))
+                lock_torture_print_module_parms(cur_ops,
+                                                "End of test: FAILURE");
+        else if (torture_onoff_failures())
+                lock_torture_print_module_parms(cur_ops,
+                                                "End of test: LOCK_HOTPLUG");
+        else
+                lock_torture_print_module_parms(cur_ops,
+                                                "End of test: SUCCESS");
+}
+static int __init lock_torture_init(void)
+{
+        int i;
+        int firsterr = 0;
+        static struct lock_torture_ops *torture_ops[] = {
+                &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
+        };
+        torture_init_begin(torture_type, verbose, &locktorture_runnable);
+        /* Process args and tell the world that the torturer is on the job. */
+        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
+                cur_ops = torture_ops[i];
+                if (strcmp(torture_type, cur_ops->name) == 0)
+                        break;
+        }
+        if (i == ARRAY_SIZE(torture_ops)) {
+                pr_alert("lock-torture: invalid torture type: \"%s\"\n",
+                         torture_type);
+                pr_alert("lock-torture types:");
+                for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+                        pr_alert(" %s", torture_ops[i]->name);
+                pr_alert("\n");
+                torture_init_end();
+                return -EINVAL;
+        }
+        if (cur_ops->init)
+                cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+        if (nwriters_stress >= 0)
+                nrealwriters_stress = nwriters_stress;
+        else
+                nrealwriters_stress = 2 * num_online_cpus();
+        lock_torture_print_module_parms(cur_ops, "Start of test");
+        /* Initialize the statistics so that each run gets its own numbers. */
+        lock_is_write_held = 0;
+        lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL);
+        if (lwsa == NULL) {
+                VERBOSE_TOROUT_STRING("lwsa: Out of memory");
+                firsterr = -ENOMEM;
+                goto unwind;
+        }
+        for (i = 0; i < nrealwriters_stress; i++) {
+                lwsa[i].n_write_lock_fail = 0;
+                lwsa[i].n_write_lock_acquired = 0;
+        }
+        /* Start up the kthreads. */
+        if (onoff_interval > 0) {
+                firsterr = torture_onoff_init(onoff_holdoff * HZ,
+                                              onoff_interval * HZ);
+                if (firsterr)
+                        goto unwind;
+        }
+        if (shuffle_interval > 0) {
+                firsterr = torture_shuffle_init(shuffle_interval);
+                if (firsterr)
+                        goto unwind;
+        }
+        if (shutdown_secs > 0) {
+                firsterr = torture_shutdown_init(shutdown_secs,
+                                                 lock_torture_cleanup);
+                if (firsterr)
+                        goto unwind;
+        }
+        if (stutter > 0) {
+                firsterr = torture_stutter_init(stutter);
+                if (firsterr)
+                        goto unwind;
+        }
+        writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]),
+                               GFP_KERNEL);
+        if (writer_tasks == NULL) {
+                VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
+                firsterr = -ENOMEM;
+                goto unwind;
+        }
+        for (i = 0; i < nrealwriters_stress; i++) {
+                firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i],
+                                                  writer_tasks[i]);
+                if (firsterr)
+                        goto unwind;
+        }
+        if (stat_interval > 0) {
+                firsterr = torture_create_kthread(lock_torture_stats, NULL,
+                                                  stats_task);
+                if (firsterr)
+                        goto unwind;
+        }
+        torture_init_end();
+        return 0;
+unwind:
+        torture_init_end();
+        lock_torture_cleanup();
+        return firsterr;
+}
+module_init(lock_torture_init);
+module_exit(lock_torture_cleanup);
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
new file mode 100644
index 000000000000..838dc9e00669
--- /dev/null
+++ b/kernel/locking/mcs_spinlock.c
@@ -0,0 +1,178 @@
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include "mcs_spinlock.h"
+#ifdef CONFIG_SMP
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ *
+ * Using a single mcs node per CPU is safe because sleeping locks should not be
+ * called from interrupt context and we have preemption disabled while
+ * spinning.
+ */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node);
+/*
+ * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
+ * Can return NULL in case we were the last queued and we updated @lock instead.
+ */
+static inline struct optimistic_spin_queue *
+osq_wait_next(struct optimistic_spin_queue **lock,
+              struct optimistic_spin_queue *node,
+              struct optimistic_spin_queue *prev)
+{
+        struct optimistic_spin_queue *next = NULL;
+        for (;;) {
+                if (*lock == node && cmpxchg(lock, node, prev) == node) {
+                        /*
+                         * We were the last queued, we moved @lock back. @prev
+                         * will now observe @lock and will complete its
+                         * unlock()/unqueue().
+                         */
+                        break;
+                }
+                /*
+                 * We must xchg() the @node->next value, because if we were to
+                 * leave it in, a concurrent unlock()/unqueue() from
+                 * @node->next might complete Step-A and think its @prev is
+                 * still valid.
+                 *
+                 * If the concurrent unlock()/unqueue() wins the race, we'll
+                 * wait for either @lock to point to us, through its Step-B, or
+                 * wait for a new @node->next from its Step-C.
+                 */
+                if (node->next) {
+                        next = xchg(&node->next, NULL);
+                        if (next)
+                                break;
+                }
+                arch_mutex_cpu_relax();
+        }
+        return next;
+}
+bool osq_lock(struct optimistic_spin_queue **lock)
+{
+        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
+        struct optimistic_spin_queue *prev, *next;
+        node->locked = 0;
+        node->next = NULL;
+        node->prev = prev = xchg(lock, node);
+        if (likely(prev == NULL))
+                return true;
+        ACCESS_ONCE(prev->next) = node;
+        /*
+         * Normally @prev is untouchable after the above store; because at that
+         * moment unlock can proceed and wipe the node element from stack.
+         *
+         * However, since our nodes are static per-cpu storage, we're
+         * guaranteed their existence -- this allows us to apply
+         * cmpxchg in an attempt to undo our queueing.
+         */
+        while (!smp_load_acquire(&node->locked)) {
+                /*
+                 * If we need to reschedule bail... so we can block.
+                 */
+                if (need_resched())
+                        goto unqueue;
+                arch_mutex_cpu_relax();
+        }
+        return true;
+unqueue:
+        /*
+         * Step - A  -- stabilize @prev
+         *
+         * Undo our @prev->next assignment; this will make @prev's
+         * unlock()/unqueue() wait for a next pointer since @lock points to us
+         * (or later).
+         */
+        for (;;) {
+                if (prev->next == node &&
+                    cmpxchg(&prev->next, node, NULL) == node)
+                        break;
+                /*
+                 * We can only fail the cmpxchg() racing against an unlock(),
+                 * in which case we should observe @node->locked becomming
+                 * true.
+                 */
+                if (smp_load_acquire(&node->locked))
+                        return true;
+                arch_mutex_cpu_relax();
+                /*
+                 * Or we race against a concurrent unqueue()'s step-B, in which
+                 * case its step-C will write us a new @node->prev pointer.
+                 */
+                prev = ACCESS_ONCE(node->prev);
+        }
+        /*
+         * Step - B -- stabilize @next
+         *
+         * Similar to unlock(), wait for @node->next or move @lock from @node
+         * back to @prev.
+         */
+        next = osq_wait_next(lock, node, prev);
+        if (!next)
+                return false;
+        /*
+         * Step - C -- unlink
+         *
+         * @prev is stable because its still waiting for a new @prev->next
+         * pointer, @next is stable because our @node->next pointer is NULL and
+         * it will wait in Step-A.
+         */
+        ACCESS_ONCE(next->prev) = prev;
+        ACCESS_ONCE(prev->next) = next;
+        return false;
+}
+void osq_unlock(struct optimistic_spin_queue **lock)
+{
+        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
+        struct optimistic_spin_queue *next;
+        /*
+         * Fast path for the uncontended case.
+         */
+        if (likely(cmpxchg(lock, node, NULL) == node))
+                return;
+        /*
+         * Second most likely case.
+         */
+        next = xchg(&node->next, NULL);
+        if (next) {
+                ACCESS_ONCE(next->locked) = 1;
+                return;
+        }
+        next = osq_wait_next(lock, node, NULL);
+        if (next)
+                ACCESS_ONCE(next->locked) = 1;
+}
+#endif
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
new file mode 100644
index 000000000000..a2dbac4aca6b
--- /dev/null
+++ b/kernel/locking/mcs_spinlock.h
@@ -0,0 +1,129 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+#include <asm/mcs_spinlock.h>
+struct mcs_spinlock {
+        struct mcs_spinlock *next;
+        int locked; /* 1 if lock acquired */
+};
+#ifndef arch_mcs_spin_lock_contended
+/*
+ * Using smp_load_acquire() provides a memory barrier that ensures
+ * subsequent operations happen after the lock is acquired.
+ */
+#define arch_mcs_spin_lock_contended(l)                                 \
+do {                                                                    \
+        while (!(smp_load_acquire(l)))                                  \
+                arch_mutex_cpu_relax();                                 \
+} while (0)
+#endif
+#ifndef arch_mcs_spin_unlock_contended
+/*
+ * smp_store_release() provides a memory barrier to ensure all
+ * operations in the critical section has been completed before
+ * unlocking.
+ */
+#define arch_mcs_spin_unlock_contended(l)                               \
+        smp_store_release((l), 1)
+#endif
+/*
+ * Note: the smp_load_acquire/smp_store_release pair is not
+ * sufficient to form a full memory barrier across
+ * cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
+ * For applications that need a full barrier across multiple cpus
+ * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
+ * used after mcs_lock.
+ */
+/*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static inline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+        struct mcs_spinlock *prev;
+        /* Init node */
+        node->locked = 0;
+        node->next   = NULL;
+        prev = xchg(lock, node);
+        if (likely(prev == NULL)) {
+                /*
+                 * Lock acquired, don't need to set node->locked to 1. Threads
+                 * only spin on its own node->locked value for lock acquisition.
+                 * However, since this thread can immediately acquire the lock
+                 * and does not proceed to spin on its own node->locked, this
+                 * value won't be used. If a debug mode is needed to
+                 * audit lock status, then set node->locked value here.
+                 */
+                return;
+        }
+        ACCESS_ONCE(prev->next) = node;
+        /* Wait until the lock holder passes the lock down. */
+        arch_mcs_spin_lock_contended(&node->locked);
+}
+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
+static inline
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+        struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+        if (likely(!next)) {
+                /*
+                 * Release the lock by setting it to NULL
+                 */
+                if (likely(cmpxchg(lock, node, NULL) == node))
+                        return;
+                /* Wait until the next pointer is set */
+                while (!(next = ACCESS_ONCE(node->next)))
+                        arch_mutex_cpu_relax();
+        }
+        /* Pass lock to next waiter. */
+        arch_mcs_spin_unlock_contended(&next->locked);
+}
+/*
+ * Cancellable version of the MCS lock above.
+ *
+ * Intended for adaptive spinning of sleeping locks:
+ * mutex_lock()/rwsem_down_{read,write}() etc.
+ */
+struct optimistic_spin_queue {
+        struct optimistic_spin_queue *next, *prev;
+        int locked; /* 1 if lock acquired */
+};
+extern bool osq_lock(struct optimistic_spin_queue **lock);
+extern void osq_unlock(struct optimistic_spin_queue **lock);
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index faf6f5b53e77..e1191c996c59 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -83,6 +83,12 @@ void debug_mutex_unlock(struct mutex *lock)
        DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
        mutex_clear_owner(lock);
+        /*
+         * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
+         * mutexes so that we can do it here after we've verified state.
+         */
+        atomic_set(&lock->count, 1);
 }
 void debug_mutex_init(struct mutex *lock, const char *name,
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 4dd6e4c219de..14fe72cc8ce7 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include "mcs_spinlock.h"
 /*
 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -33,6 +34,13 @@
 #ifdef CONFIG_DEBUG_MUTEXES
 # include "mutex-debug.h"
 # include <asm-generic/mutex-null.h>
+/*
+ * Must be 0 for the debug case so we do not do the unlock outside of the
+ * wait_lock region. debug_mutex_unlock() will do the actual unlock in this
+ * case.
+ */
+# undef __mutex_slowpath_needs_to_unlock
+# define  __mutex_slowpath_needs_to_unlock()    0
 #else
 # include "mutex.h"
 # include <asm/mutex.h>
@@ -52,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
        INIT_LIST_HEAD(&lock->wait_list);
        mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-        lock->spin_mlock = NULL;
+        lock->osq = NULL;
 #endif
        debug_mutex_init(lock, name, key);
@@ -111,54 +119,7 @@ EXPORT_SYMBOL(mutex_lock);
 * more or less simultaneously, the spinners need to acquire a MCS lock
 * first before spinning on the owner field.
 *
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
 */
-struct mspin_node {
-        struct mspin_node *next ;
-        int               locked;       /* 1 if lock acquired */
-};
-#define MLOCK(mutex)    ((struct mspin_node **)&((mutex)->spin_mlock))
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
-        struct mspin_node *prev;
-        /* Init node */
-        node->locked = 0;
-        node->next   = NULL;
-        prev = xchg(lock, node);
-        if (likely(prev == NULL)) {
-                /* Lock acquired */
-                node->locked = 1;
-                return;
-        }
-        ACCESS_ONCE(prev->next) = node;
-        smp_wmb();
-        /* Wait until the lock holder passes the lock down */
-        while (!ACCESS_ONCE(node->locked))
-                arch_mutex_cpu_relax();
-}
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
-        struct mspin_node *next = ACCESS_ONCE(node->next);
-        if (likely(!next)) {
-                /*
-                 * Release the lock by setting it to NULL
-                 */
-                if (cmpxchg(lock, node, NULL) == node)
-                        return;
-                /* Wait until the next pointer is set */
-                while (!(next = ACCESS_ONCE(node->next)))
-                        arch_mutex_cpu_relax();
-        }
-        ACCESS_ONCE(next->locked) = 1;
-        smp_wmb();
-}
 /*
 * Mutex spinning code migrated from kernel/sched/core.c
@@ -212,6 +173,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
        struct task_struct *owner;
        int retval = 1;
+        if (need_resched())
+                return 0;
        rcu_read_lock();
        owner = ACCESS_ONCE(lock->owner);
        if (owner)
@@ -446,9 +410,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        if (!mutex_can_spin_on_owner(lock))
                goto slowpath;
+        if (!osq_lock(&lock->osq))
+                goto slowpath;
        for (;;) {
                struct task_struct *owner;
-                struct mspin_node  node;
                if (use_ww_ctx && ww_ctx->acquired > 0) {
                        struct ww_mutex *ww;
@@ -463,19 +429,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                         * performed the optimistic spinning cannot be done.
                         */
                        if (ACCESS_ONCE(ww->ctx))
-                                goto slowpath;
+                                break;
                }
                /*
                 * If there's an owner, wait for it to either
                 * release the lock or go to sleep.
                 */
-                mspin_lock(MLOCK(lock), &node);
                owner = ACCESS_ONCE(lock->owner);
-                if (owner && !mutex_spin_on_owner(lock, owner)) {
+                if (owner && !mutex_spin_on_owner(lock, owner))
-                        mspin_unlock(MLOCK(lock), &node);
+                        break;
-                        goto slowpath;
-                }
                if ((atomic_read(&lock->count) == 1) &&
                    (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
@@ -488,11 +451,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                        }
                        mutex_set_owner(lock);
-                        mspin_unlock(MLOCK(lock), &node);
+                        osq_unlock(&lock->osq);
                        preempt_enable();
                        return 0;
                }
-                mspin_unlock(MLOCK(lock), &node);
                /*
                 * When there's no owner, we might have preempted between the
@@ -501,7 +463,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * the owner complete.
                 */
                if (!owner && (need_resched() || rt_task(task)))
-                        goto slowpath;
+                        break;
                /*
                 * The cpu_relax() call is a compiler barrier which forces
@@ -511,7 +473,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 */
                arch_mutex_cpu_relax();
        }
+        osq_unlock(&lock->osq);
 slowpath:
+        /*
+         * If we fell out of the spin path because of need_resched(),
+         * reschedule now, before we try-lock the mutex. This avoids getting
+         * scheduled out right after we obtained the mutex.
+         */
+        if (need_resched())
+                schedule_preempt_disabled();
 #endif
        spin_lock_mutex(&lock->wait_lock, flags);
@@ -717,10 +687,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
        struct mutex *lock = container_of(lock_count, struct mutex, count);
        unsigned long flags;
-        spin_lock_mutex(&lock->wait_lock, flags);
-        mutex_release(&lock->dep_map, nested, _RET_IP_);
-        debug_mutex_unlock(lock);
        /*
         * some architectures leave the lock unlocked in the fastpath failure
         * case, others need to leave it locked. In the later case we have to
@@ -729,6 +695,10 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
        if (__mutex_slowpath_needs_to_unlock())
                atomic_set(&lock->count, 1);
+        spin_lock_mutex(&lock->wait_lock, flags);
+        mutex_release(&lock->dep_map, nested, _RET_IP_);
+        debug_mutex_unlock(lock);
        if (!list_empty(&lock->wait_list)) {
                /* get the first entry from the wait-list: */
                struct mutex_waiter *waiter =
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 2d5cc4ccff7f..db4c8b08a50c 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
         * racy then it does not matter what the result of the test
         * is, we re-check the list after having taken the lock anyway:
         */
-        if (rcu_dereference_raw(nh->head)) {
+        if (rcu_access_pointer(nh->head)) {
                down_read(&nh->rwsem);
                ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
                                        nr_calls);
diff --git a/kernel/power/console.c b/kernel/power/console.c
index eacb8bd8cab4..aba9c545a0e3 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,6 +9,7 @@
 #include <linux/kbd_kern.h>
 #include <linux/vt.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "power.h"
 #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b1d255f04135..4dae9cbe9259 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1076,7 +1076,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
                next_seq = log_next_seq;
                len = 0;
-                prev = 0;
                while (len >= 0 && seq < next_seq) {
                        struct printk_log *msg = log_from_idx(idx);
                        int textlen;
@@ -2788,7 +2787,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
        next_idx = idx;
        l = 0;
-        prev = 0;
        while (seq < dumper->next_seq) {
                struct printk_log *msg = log_from_idx(idx);
diff --git a/kernel/profile.c b/kernel/profile.c
index 6631e1ef55ab..ebdd9c1a86b4 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -549,14 +549,14 @@ static int create_hash_tables(void)
                struct page *page;
                page = alloc_pages_exact_node(node,
-                                GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+                                GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
                                0);
                if (!page)
                        goto out_cleanup;
                per_cpu(cpu_profile_hits, cpu)[1]
                                = (struct profile_hit *)page_address(page);
                page = alloc_pages_exact_node(node,
-                                GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+                                GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
                                0);
                if (!page)
                        goto out_cleanup;
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 01e9ec37a3e3..807ccfbf69b3 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,5 +1,5 @@
 obj-y += update.o srcu.o
-obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o
+obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 79c3877e9c5b..bfda2726ca45 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2011
 *
@@ -23,6 +23,7 @@
 #ifndef __LINUX_RCU_H
 #define __LINUX_RCU_H
+#include <trace/events/rcu.h>
 #ifdef CONFIG_RCU_TRACE
 #define RCU_TRACE(stmt) stmt
 #else /* #ifdef CONFIG_RCU_TRACE */
@@ -116,8 +117,6 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
        }
 }
-extern int rcu_expedited;
 #ifdef CONFIG_RCU_STALL_COMMON
 extern int rcu_cpu_stall_suppress;
diff --git a/kernel/rcu/torture.c b/kernel/rcu/rcutorture.c
index 732f8ae3086a..f59d48597dde 100644
--- a/kernel/rcu/torture.c
+++ b/kernel/rcu/rcutorture.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright (C) IBM Corporation, 2005, 2006
 *
@@ -48,110 +48,58 @@
 #include <linux/slab.h>
 #include <linux/trace_clock.h>
 #include <asm/byteorder.h>
+#include <linux/torture.h>
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
-MODULE_ALIAS("rcutorture");
-#ifdef MODULE_PARAM_PREFIX
+torture_param(int, fqs_duration, 0,
-#undef MODULE_PARAM_PREFIX
+              "Duration of fqs bursts (us), 0 to disable");
-#endif
+torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
-#define MODULE_PARAM_PREFIX "rcutorture."
+torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
+torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
-static int fqs_duration;
+torture_param(bool, gp_normal, false,
-module_param(fqs_duration, int, 0444);
+             "Use normal (non-expedited) GP wait primitives");
-MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable");
+torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
-static int fqs_holdoff;
+torture_param(int, n_barrier_cbs, 0,
-module_param(fqs_holdoff, int, 0444);
+             "# of callbacks/kthreads for barrier testing");
-MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
+torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads");
-static int fqs_stutter = 3;
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
-module_param(fqs_stutter, int, 0444);
+torture_param(int, object_debug, 0,
-MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+             "Enable debug-object double call_rcu() testing");
-static bool gp_exp;
+torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
-module_param(gp_exp, bool, 0444);
+torture_param(int, onoff_interval, 0,
-MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives");
+             "Time between CPU hotplugs (s), 0=disable");
-static bool gp_normal;
+torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles");
-module_param(gp_normal, bool, 0444);
+torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
-MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives");
+torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
-static int irqreader = 1;
+torture_param(int, stall_cpu_holdoff, 10,
-module_param(irqreader, int, 0444);
+             "Time to wait before starting stall (s).");
-MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
+torture_param(int, stat_interval, 60,
-static int n_barrier_cbs;
+             "Number of seconds between stats printk()s");
-module_param(n_barrier_cbs, int, 0444);
+torture_param(int, stutter, 5, "Number of seconds to run/halt test");
-MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
+torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
-static int nfakewriters = 4;
+torture_param(int, test_boost_duration, 4,
-module_param(nfakewriters, int, 0444);
+             "Duration of each boost test, seconds.");
-MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
+torture_param(int, test_boost_interval, 7,
-static int nreaders = -1;
+             "Interval between boost tests, seconds.");
-module_param(nreaders, int, 0444);
+torture_param(bool, test_no_idle_hz, true,
-MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
+             "Test support for tickless idle CPUs");
-static int object_debug;
+torture_param(bool, verbose, true,
-module_param(object_debug, int, 0444);
+             "Enable verbose debugging printk()s");
-MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing");
-static int onoff_holdoff;
-module_param(onoff_holdoff, int, 0444);
-MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)");
-static int onoff_interval;
-module_param(onoff_interval, int, 0444);
-MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
-static int shuffle_interval = 3;
-module_param(shuffle_interval, int, 0444);
-MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
-static int shutdown_secs;
-module_param(shutdown_secs, int, 0444);
-MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable.");
-static int stall_cpu;
-module_param(stall_cpu, int, 0444);
-MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable.");
-static int stall_cpu_holdoff = 10;
-module_param(stall_cpu_holdoff, int, 0444);
-MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s).");
-static int stat_interval = 60;
-module_param(stat_interval, int, 0644);
-MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-static int stutter = 5;
-module_param(stutter, int, 0444);
-MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
-static int test_boost = 1;
-module_param(test_boost, int, 0444);
-MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
-static int test_boost_duration = 4;
-module_param(test_boost_duration, int, 0444);
-MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
-static int test_boost_interval = 7;
-module_param(test_boost_interval, int, 0444);
-MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
-static bool test_no_idle_hz = true;
-module_param(test_no_idle_hz, bool, 0444);
-MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
 static char *torture_type = "rcu";
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
-static bool verbose;
-module_param(verbose, bool, 0444);
-MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-#define TORTURE_FLAG "-torture:"
-#define PRINTK_STRING(s) \
-        do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
-#define VERBOSE_PRINTK_STRING(s) \
-        do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
-#define VERBOSE_PRINTK_ERRSTRING(s) \
-        do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
 static int nrealreaders;
 static struct task_struct *writer_task;
 static struct task_struct **fakewriter_tasks;
 static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
-static struct task_struct *shuffler_task;
-static struct task_struct *stutter_task;
 static struct task_struct *fqs_task;
 static struct task_struct *boost_tasks[NR_CPUS];
-static struct task_struct *shutdown_task;
-#ifdef CONFIG_HOTPLUG_CPU
-static struct task_struct *onoff_task;
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
 static struct task_struct *stall_task;
 static struct task_struct **barrier_cbs_tasks;
 static struct task_struct *barrier_task;
@@ -170,10 +118,10 @@ static struct rcu_torture __rcu *rcu_torture_current;
 static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1],
-        { 0 };
+                      rcu_torture_count) = { 0 };
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1],
-        { 0 };
+                      rcu_torture_batch) = { 0 };
 static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
 static atomic_t n_rcu_torture_alloc;
 static atomic_t n_rcu_torture_alloc_fail;
@@ -186,22 +134,9 @@ static long n_rcu_torture_boost_rterror;
 static long n_rcu_torture_boost_failure;
 static long n_rcu_torture_boosts;
 static long n_rcu_torture_timers;
-static long n_offline_attempts;
-static long n_offline_successes;
-static unsigned long sum_offline;
-static int min_offline = -1;
-static int max_offline;
-static long n_online_attempts;
-static long n_online_successes;
-static unsigned long sum_online;
-static int min_online = -1;
-static int max_online;
 static long n_barrier_attempts;
 static long n_barrier_successes;
 static struct list_head rcu_torture_removed;
-static cpumask_var_t shuffle_tmp_mask;
-static int stutter_pause_test;
 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
 #define RCUTORTURE_RUNNABLE_INIT 1
@@ -232,7 +167,6 @@ static u64 notrace rcu_trace_clock_local(void)
 }
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
-static unsigned long shutdown_time;     /* jiffies to system shutdown. */
 static unsigned long boost_starttime;   /* jiffies of next boost test start. */
 DEFINE_MUTEX(boost_mutex);              /* protect setting boost_starttime */
                                        /*  and boost task create/destroy. */
@@ -242,51 +176,6 @@ static atomic_t barrier_cbs_invoked;	/* Barrier callbacks invoked. */
 static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
 static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
-/* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
-#define FULLSTOP_DONTSTOP 0     /* Normal operation. */
-#define FULLSTOP_SHUTDOWN 1     /* System shutdown with rcutorture running. */
-#define FULLSTOP_RMMOD    2     /* Normal rmmod of rcutorture. */
-static int fullstop = FULLSTOP_RMMOD;
-/*
- * Protect fullstop transitions and spawning of kthreads.
- */
-static DEFINE_MUTEX(fullstop_mutex);
-/* Forward reference. */
-static void rcu_torture_cleanup(void);
-/*
- * Detect and respond to a system shutdown.
- */
-static int
-rcutorture_shutdown_notify(struct notifier_block *unused1,
-                           unsigned long unused2, void *unused3)
-{
-        mutex_lock(&fullstop_mutex);
-        if (fullstop == FULLSTOP_DONTSTOP)
-                fullstop = FULLSTOP_SHUTDOWN;
-        else
-                pr_warn(/* but going down anyway, so... */
-                       "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
-        mutex_unlock(&fullstop_mutex);
-        return NOTIFY_DONE;
-}
-/*
- * Absorb kthreads into a kernel function that won't return, so that
- * they won't ever access module text or data again.
- */
-static void rcutorture_shutdown_absorb(const char *title)
-{
-        if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
-                pr_notice(
-                       "rcutorture thread %s parking due to system shutdown\n",
-                       title);
-                schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
-        }
-}
 /*
 * Allocate an element from the rcu_tortures pool.
 */
@@ -320,44 +209,6 @@ rcu_torture_free(struct rcu_torture *p)
        spin_unlock_bh(&rcu_torture_lock);
 }
-struct rcu_random_state {
-        unsigned long rrs_state;
-        long rrs_count;
-};
-#define RCU_RANDOM_MULT 39916801  /* prime */
-#define RCU_RANDOM_ADD  479001701 /* prime */
-#define RCU_RANDOM_REFRESH 10000
-#define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 }
-/*
- * Crude but fast random-number generator.  Uses a linear congruential
- * generator, with occasional help from cpu_clock().
- */
-static unsigned long
-rcu_random(struct rcu_random_state *rrsp)
-{
-        if (--rrsp->rrs_count < 0) {
-                rrsp->rrs_state += (unsigned long)local_clock();
-                rrsp->rrs_count = RCU_RANDOM_REFRESH;
-        }
-        rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
-        return swahw32(rrsp->rrs_state);
-}
-static void
-rcu_stutter_wait(const char *title)
-{
-        while (stutter_pause_test || !rcutorture_runnable) {
-                if (rcutorture_runnable)
-                        schedule_timeout_interruptible(1);
-                else
-                        schedule_timeout_interruptible(round_jiffies_relative(HZ));
-                rcutorture_shutdown_absorb(title);
-        }
-}
 /*
 * Operations vector for selecting different types of tests.
 */
@@ -365,7 +216,7 @@ rcu_stutter_wait(const char *title)
 struct rcu_torture_ops {
        void (*init)(void);
        int (*readlock)(void);
-        void (*read_delay)(struct rcu_random_state *rrsp);
+        void (*read_delay)(struct torture_random_state *rrsp);
        void (*readunlock)(int idx);
        int (*completed)(void);
        void (*deferred_free)(struct rcu_torture *p);
@@ -392,7 +243,7 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
        return 0;
 }
-static void rcu_read_delay(struct rcu_random_state *rrsp)
+static void rcu_read_delay(struct torture_random_state *rrsp)
 {
        const unsigned long shortdelay_us = 200;
        const unsigned long longdelay_ms = 50;
@@ -401,12 +252,13 @@ static void rcu_read_delay(struct rcu_random_state *rrsp)
         * period, and we want a long delay occasionally to trigger
         * force_quiescent_state. */
-        if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
+        if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
                mdelay(longdelay_ms);
-        if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
+        if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
                udelay(shortdelay_us);
 #ifdef CONFIG_PREEMPT
-        if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
+        if (!preempt_count() &&
+            !(torture_random(rrsp) % (nrealreaders * 20000)))
                preempt_schedule();  /* No QS if preempt_disable() in effect */
 #endif
 }
@@ -427,7 +279,7 @@ rcu_torture_cb(struct rcu_head *p)
        int i;
        struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
-        if (fullstop != FULLSTOP_DONTSTOP) {
+        if (torture_must_stop_irq()) {
                /* Test is ending, just drop callbacks on the floor. */
                /* The next initialization will pick up the pieces. */
                return;
@@ -520,6 +372,48 @@ static struct rcu_torture_ops rcu_bh_ops = {
 };
 /*
+ * Don't even think about trying any of these in real life!!!
+ * The names includes "busted", and they really means it!
+ * The only purpose of these functions is to provide a buggy RCU
+ * implementation to make sure that rcutorture correctly emits
+ * buggy-RCU error messages.
+ */
+static void rcu_busted_torture_deferred_free(struct rcu_torture *p)
+{
+        /* This is a deliberate bug for testing purposes only! */
+        rcu_torture_cb(&p->rtort_rcu);
+}
+static void synchronize_rcu_busted(void)
+{
+        /* This is a deliberate bug for testing purposes only! */
+}
+static void
+call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+        /* This is a deliberate bug for testing purposes only! */
+        func(head);
+}
+static struct rcu_torture_ops rcu_busted_ops = {
+        .init           = rcu_sync_torture_init,
+        .readlock       = rcu_torture_read_lock,
+        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+        .readunlock     = rcu_torture_read_unlock,
+        .completed      = rcu_no_completed,
+        .deferred_free  = rcu_busted_torture_deferred_free,
+        .sync           = synchronize_rcu_busted,
+        .exp_sync       = synchronize_rcu_busted,
+        .call           = call_rcu_busted,
+        .cb_barrier     = NULL,
+        .fqs            = NULL,
+        .stats          = NULL,
+        .irq_capable    = 1,
+        .name           = "rcu_busted"
+};
+/*
 * Definitions for srcu torture testing.
 */
@@ -530,7 +424,7 @@ static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
        return srcu_read_lock(&srcu_ctl);
 }
-static void srcu_read_delay(struct rcu_random_state *rrsp)
+static void srcu_read_delay(struct torture_random_state *rrsp)
 {
        long delay;
        const long uspertick = 1000000 / HZ;
@@ -538,7 +432,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp)
        /* We want there to be long-running readers, but not all the time. */
-        delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
+        delay = torture_random(rrsp) %
+                (nrealreaders * 2 * longdelay * uspertick);
        if (!delay)
                schedule_timeout_interruptible(longdelay);
        else
@@ -677,12 +572,12 @@ static int rcu_torture_boost(void *arg)
        struct rcu_boost_inflight rbi = { .inflight = 0 };
        struct sched_param sp;
-        VERBOSE_PRINTK_STRING("rcu_torture_boost started");
+        VERBOSE_TOROUT_STRING("rcu_torture_boost started");
        /* Set real-time priority. */
        sp.sched_priority = 1;
        if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
-                VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
+                VERBOSE_TOROUT_STRING("rcu_torture_boost RT prio failed!");
                n_rcu_torture_boost_rterror++;
        }
@@ -693,9 +588,8 @@ static int rcu_torture_boost(void *arg)
                oldstarttime = boost_starttime;
                while (ULONG_CMP_LT(jiffies, oldstarttime)) {
                        schedule_timeout_interruptible(oldstarttime - jiffies);
-                        rcu_stutter_wait("rcu_torture_boost");
+                        stutter_wait("rcu_torture_boost");
-                        if (kthread_should_stop() ||
+                        if (torture_must_stop())
-                            fullstop != FULLSTOP_DONTSTOP)
                                goto checkwait;
                }
@@ -710,15 +604,14 @@ static int rcu_torture_boost(void *arg)
                                call_rcu(&rbi.rcu, rcu_torture_boost_cb);
                                if (jiffies - call_rcu_time >
                                         test_boost_duration * HZ - HZ / 2) {
-                                        VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
+                                        VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
                                        n_rcu_torture_boost_failure++;
                                }
                                call_rcu_time = jiffies;
                        }
                        cond_resched();
-                        rcu_stutter_wait("rcu_torture_boost");
+                        stutter_wait("rcu_torture_boost");
-                        if (kthread_should_stop() ||
+                        if (torture_must_stop())
-                            fullstop != FULLSTOP_DONTSTOP)
                                goto checkwait;
                }
@@ -742,16 +635,17 @@ static int rcu_torture_boost(void *arg)
                }
                /* Go do the stutter. */
-checkwait:      rcu_stutter_wait("rcu_torture_boost");
+checkwait:      stutter_wait("rcu_torture_boost");
-        } while (!kthread_should_stop() && fullstop  == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
        /* Clean up and exit. */
-        VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+        while (!kthread_should_stop() || rbi.inflight) {
-        rcutorture_shutdown_absorb("rcu_torture_boost");
+                torture_shutdown_absorb("rcu_torture_boost");
-        while (!kthread_should_stop() || rbi.inflight)
                schedule_timeout_uninterruptible(1);
+        }
        smp_mb(); /* order accesses to ->inflight before stack-frame death. */
        destroy_rcu_head_on_stack(&rbi.rcu);
+        torture_kthread_stopping("rcu_torture_boost");
        return 0;
 }
@@ -766,7 +660,7 @@ rcu_torture_fqs(void *arg)
        unsigned long fqs_resume_time;
        int fqs_burst_remaining;
-        VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
        do {
                fqs_resume_time = jiffies + fqs_stutter * HZ;
                while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
@@ -780,12 +674,9 @@ rcu_torture_fqs(void *arg)
                        udelay(fqs_holdoff);
                        fqs_burst_remaining -= fqs_holdoff;
                }
-                rcu_stutter_wait("rcu_torture_fqs");
+                stutter_wait("rcu_torture_fqs");
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
+        torture_kthread_stopping("rcu_torture_fqs");
-        rcutorture_shutdown_absorb("rcu_torture_fqs");
-        while (!kthread_should_stop())
-                schedule_timeout_uninterruptible(1);
        return 0;
 }
@@ -802,9 +693,9 @@ rcu_torture_writer(void *arg)
        struct rcu_torture *rp;
        struct rcu_torture *rp1;
        struct rcu_torture *old_rp;
-        static DEFINE_RCU_RANDOM(rand);
+        static DEFINE_TORTURE_RANDOM(rand);
-        VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
        set_user_nice(current, 19);
        do {
@@ -813,7 +704,7 @@ rcu_torture_writer(void *arg)
                if (rp == NULL)
                        continue;
                rp->rtort_pipe_count = 0;
-                udelay(rcu_random(&rand) & 0x3ff);
+                udelay(torture_random(&rand) & 0x3ff);
                old_rp = rcu_dereference_check(rcu_torture_current,
                                               current == writer_task);
                rp->rtort_mbtest = 1;
@@ -826,7 +717,7 @@ rcu_torture_writer(void *arg)
                        atomic_inc(&rcu_torture_wcount[i]);
                        old_rp->rtort_pipe_count++;
                        if (gp_normal == gp_exp)
-                                exp = !!(rcu_random(&rand) & 0x80);
+                                exp = !!(torture_random(&rand) & 0x80);
                        else
                                exp = gp_exp;
                        if (!exp) {
@@ -852,12 +743,9 @@ rcu_torture_writer(void *arg)
                        }
                }
                rcutorture_record_progress(++rcu_torture_current_version);
-                rcu_stutter_wait("rcu_torture_writer");
+                stutter_wait("rcu_torture_writer");
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
+        torture_kthread_stopping("rcu_torture_writer");
-        rcutorture_shutdown_absorb("rcu_torture_writer");
-        while (!kthread_should_stop())
-                schedule_timeout_uninterruptible(1);
        return 0;
 }
@@ -868,19 +756,19 @@ rcu_torture_writer(void *arg)
 static int
 rcu_torture_fakewriter(void *arg)
 {
-        DEFINE_RCU_RANDOM(rand);
+        DEFINE_TORTURE_RANDOM(rand);
-        VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
        set_user_nice(current, 19);
        do {
-                schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+                schedule_timeout_uninterruptible(1 + torture_random(&rand)%10);
-                udelay(rcu_random(&rand) & 0x3ff);
+                udelay(torture_random(&rand) & 0x3ff);
                if (cur_ops->cb_barrier != NULL &&
-                    rcu_random(&rand) % (nfakewriters * 8) == 0) {
+                    torture_random(&rand) % (nfakewriters * 8) == 0) {
                        cur_ops->cb_barrier();
                } else if (gp_normal == gp_exp) {
-                        if (rcu_random(&rand) & 0x80)
+                        if (torture_random(&rand) & 0x80)
                                cur_ops->sync();
                        else
                                cur_ops->exp_sync();
@@ -889,13 +777,10 @@ rcu_torture_fakewriter(void *arg)
                } else {
                        cur_ops->exp_sync();
                }
-                rcu_stutter_wait("rcu_torture_fakewriter");
+                stutter_wait("rcu_torture_fakewriter");
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
+        torture_kthread_stopping("rcu_torture_fakewriter");
-        rcutorture_shutdown_absorb("rcu_torture_fakewriter");
-        while (!kthread_should_stop())
-                schedule_timeout_uninterruptible(1);
        return 0;
 }
@@ -921,7 +806,7 @@ static void rcu_torture_timer(unsigned long unused)
        int idx;
        int completed;
        int completed_end;
-        static DEFINE_RCU_RANDOM(rand);
+        static DEFINE_TORTURE_RANDOM(rand);
        static DEFINE_SPINLOCK(rand_lock);
        struct rcu_torture *p;
        int pipe_count;
@@ -980,13 +865,13 @@ rcu_torture_reader(void *arg)
        int completed;
        int completed_end;
        int idx;
-        DEFINE_RCU_RANDOM(rand);
+        DEFINE_TORTURE_RANDOM(rand);
        struct rcu_torture *p;
        int pipe_count;
        struct timer_list t;
        unsigned long long ts;
-        VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_reader task started");
        set_user_nice(current, 19);
        if (irqreader && cur_ops->irq_capable)
                setup_timer_on_stack(&t, rcu_torture_timer, 0);
@@ -1034,14 +919,11 @@ rcu_torture_reader(void *arg)
                preempt_enable();
                cur_ops->readunlock(idx);
                schedule();
-                rcu_stutter_wait("rcu_torture_reader");
+                stutter_wait("rcu_torture_reader");
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
-        rcutorture_shutdown_absorb("rcu_torture_reader");
        if (irqreader && cur_ops->irq_capable)
                del_timer_sync(&t);
-        while (!kthread_should_stop())
+        torture_kthread_stopping("rcu_torture_reader");
-                schedule_timeout_uninterruptible(1);
        return 0;
 }
@@ -1083,13 +965,7 @@ rcu_torture_printk(char *page)
                       n_rcu_torture_boost_failure,
                       n_rcu_torture_boosts,
                       n_rcu_torture_timers);
-        page += sprintf(page,
+        page = torture_onoff_stats(page);
-                       "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
-                       n_online_successes, n_online_attempts,
-                       n_offline_successes, n_offline_attempts,
-                       min_online, max_online,
-                       min_offline, max_offline,
-                       sum_online, sum_offline, HZ);
        page += sprintf(page, "barrier: %ld/%ld:%ld",
                       n_barrier_successes,
                       n_barrier_attempts,
@@ -1150,123 +1026,17 @@ rcu_torture_stats_print(void)
 /*
 * Periodically prints torture statistics, if periodic statistics printing
 * was specified via the stat_interval module parameter.
- *
- * No need to worry about fullstop here, since this one doesn't reference
- * volatile state or register callbacks.
 */
 static int
 rcu_torture_stats(void *arg)
 {
-        VERBOSE_PRINTK_STRING("rcu_torture_stats task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_stats task started");
        do {
                schedule_timeout_interruptible(stat_interval * HZ);
                rcu_torture_stats_print();
-                rcutorture_shutdown_absorb("rcu_torture_stats");
+                torture_shutdown_absorb("rcu_torture_stats");
-        } while (!kthread_should_stop());
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
+        torture_kthread_stopping("rcu_torture_stats");
-        return 0;
-}
-static int rcu_idle_cpu;        /* Force all torture tasks off this CPU */
-/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
- * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
- */
-static void rcu_torture_shuffle_tasks(void)
-{
-        int i;
-        cpumask_setall(shuffle_tmp_mask);
-        get_online_cpus();
-        /* No point in shuffling if there is only one online CPU (ex: UP) */
-        if (num_online_cpus() == 1) {
-                put_online_cpus();
-                return;
-        }
-        if (rcu_idle_cpu != -1)
-                cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask);
-        set_cpus_allowed_ptr(current, shuffle_tmp_mask);
-        if (reader_tasks) {
-                for (i = 0; i < nrealreaders; i++)
-                        if (reader_tasks[i])
-                                set_cpus_allowed_ptr(reader_tasks[i],
-                                                     shuffle_tmp_mask);
-        }
-        if (fakewriter_tasks) {
-                for (i = 0; i < nfakewriters; i++)
-                        if (fakewriter_tasks[i])
-                                set_cpus_allowed_ptr(fakewriter_tasks[i],
-                                                     shuffle_tmp_mask);
-        }
-        if (writer_task)
-                set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
-        if (stats_task)
-                set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
-        if (stutter_task)
-                set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
-        if (fqs_task)
-                set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
-        if (shutdown_task)
-                set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
-#ifdef CONFIG_HOTPLUG_CPU
-        if (onoff_task)
-                set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-        if (stall_task)
-                set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
-        if (barrier_cbs_tasks)
-                for (i = 0; i < n_barrier_cbs; i++)
-                        if (barrier_cbs_tasks[i])
-                                set_cpus_allowed_ptr(barrier_cbs_tasks[i],
-                                                     shuffle_tmp_mask);
-        if (barrier_task)
-                set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
-        if (rcu_idle_cpu == -1)
-                rcu_idle_cpu = num_online_cpus() - 1;
-        else
-                rcu_idle_cpu--;
-        put_online_cpus();
-}
-/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
- * system to become idle at a time and cut off its timer ticks. This is meant
- * to test the support for such tickless idle CPU in RCU.
- */
-static int
-rcu_torture_shuffle(void *arg)
-{
-        VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started");
-        do {
-                schedule_timeout_interruptible(shuffle_interval * HZ);
-                rcu_torture_shuffle_tasks();
-                rcutorture_shutdown_absorb("rcu_torture_shuffle");
-        } while (!kthread_should_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
-        return 0;
-}
-/* Cause the rcutorture test to "stutter", starting and stopping all
- * threads periodically.
- */
-static int
-rcu_torture_stutter(void *arg)
-{
-        VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
-        do {
-                schedule_timeout_interruptible(stutter * HZ);
-                stutter_pause_test = 1;
-                if (!kthread_should_stop())
-                        schedule_timeout_interruptible(stutter * HZ);
-                stutter_pause_test = 0;
-                rcutorture_shutdown_absorb("rcu_torture_stutter");
-        } while (!kthread_should_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
        return 0;
 }
@@ -1293,10 +1063,6 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
                 onoff_interval, onoff_holdoff);
 }
-static struct notifier_block rcutorture_shutdown_nb = {
-        .notifier_call = rcutorture_shutdown_notify,
-};
 static void rcutorture_booster_cleanup(int cpu)
 {
        struct task_struct *t;
@@ -1304,14 +1070,12 @@ static void rcutorture_booster_cleanup(int cpu)
        if (boost_tasks[cpu] == NULL)
                return;
        mutex_lock(&boost_mutex);
-        VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
        t = boost_tasks[cpu];
        boost_tasks[cpu] = NULL;
        mutex_unlock(&boost_mutex);
        /* This must be outside of the mutex, otherwise deadlock! */
-        kthread_stop(t);
+        torture_stop_kthread(rcu_torture_boost, t);
-        boost_tasks[cpu] = NULL;
 }
 static int rcutorture_booster_init(int cpu)
@@ -1323,13 +1087,13 @@ static int rcutorture_booster_init(int cpu)
        /* Don't allow time recalculation while creating a new task. */
        mutex_lock(&boost_mutex);
-        VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
+        VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
        boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
                                                  cpu_to_node(cpu),
                                                  "rcu_torture_boost");
        if (IS_ERR(boost_tasks[cpu])) {
                retval = PTR_ERR(boost_tasks[cpu]);
-                VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
+                VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed");
                n_rcu_torture_boost_ktrerror++;
                boost_tasks[cpu] = NULL;
                mutex_unlock(&boost_mutex);
@@ -1342,175 +1106,6 @@ static int rcutorture_booster_init(int cpu)
 }
 /*
- * Cause the rcutorture test to shutdown the system after the test has
- * run for the time specified by the shutdown_secs module parameter.
- */
-static int
-rcu_torture_shutdown(void *arg)
-{
-        long delta;
-        unsigned long jiffies_snap;
-        VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
-        jiffies_snap = ACCESS_ONCE(jiffies);
-        while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
-               !kthread_should_stop()) {
-                delta = shutdown_time - jiffies_snap;
-                if (verbose)
-                        pr_alert("%s" TORTURE_FLAG
-                                 "rcu_torture_shutdown task: %lu jiffies remaining\n",
-                                 torture_type, delta);
-                schedule_timeout_interruptible(delta);
-                jiffies_snap = ACCESS_ONCE(jiffies);
-        }
-        if (kthread_should_stop()) {
-                VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
-                return 0;
-        }
-        /* OK, shut down the system. */
-        VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
-        shutdown_task = NULL;   /* Avoid self-kill deadlock. */
-        rcu_torture_cleanup();  /* Get the success/failure message. */
-        kernel_power_off();     /* Shut down the system. */
-        return 0;
-}
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Execute random CPU-hotplug operations at the interval specified
- * by the onoff_interval.
- */
-static int
-rcu_torture_onoff(void *arg)
-{
-        int cpu;
-        unsigned long delta;
-        int maxcpu = -1;
-        DEFINE_RCU_RANDOM(rand);
-        int ret;
-        unsigned long starttime;
-        VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
-        for_each_online_cpu(cpu)
-                maxcpu = cpu;
-        WARN_ON(maxcpu < 0);
-        if (onoff_holdoff > 0) {
-                VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff");
-                schedule_timeout_interruptible(onoff_holdoff * HZ);
-                VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff");
-        }
-        while (!kthread_should_stop()) {
-                cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
-                if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
-                        if (verbose)
-                                pr_alert("%s" TORTURE_FLAG
-                                         "rcu_torture_onoff task: offlining %d\n",
-                                         torture_type, cpu);
-                        starttime = jiffies;
-                        n_offline_attempts++;
-                        ret = cpu_down(cpu);
-                        if (ret) {
-                                if (verbose)
-                                        pr_alert("%s" TORTURE_FLAG
-                                                 "rcu_torture_onoff task: offline %d failed: errno %d\n",
-                                                 torture_type, cpu, ret);
-                        } else {
-                                if (verbose)
-                                        pr_alert("%s" TORTURE_FLAG
-                                                 "rcu_torture_onoff task: offlined %d\n",
-                                                 torture_type, cpu);
-                                n_offline_successes++;
-                                delta = jiffies - starttime;
-                                sum_offline += delta;
-                                if (min_offline < 0) {
-                                        min_offline = delta;
-                                        max_offline = delta;
-                                }
-                                if (min_offline > delta)
-                                        min_offline = delta;
-                                if (max_offline < delta)
-                                        max_offline = delta;
-                        }
-                } else if (cpu_is_hotpluggable(cpu)) {
-                        if (verbose)
-                                pr_alert("%s" TORTURE_FLAG
-                                         "rcu_torture_onoff task: onlining %d\n",
-                                         torture_type, cpu);
-                        starttime = jiffies;
-                        n_online_attempts++;
-                        ret = cpu_up(cpu);
-                        if (ret) {
-                                if (verbose)
-                                        pr_alert("%s" TORTURE_FLAG
-                                                 "rcu_torture_onoff task: online %d failed: errno %d\n",
-                                                 torture_type, cpu, ret);
-                        } else {
-                                if (verbose)
-                                        pr_alert("%s" TORTURE_FLAG
-                                                 "rcu_torture_onoff task: onlined %d\n",
-                                                 torture_type, cpu);
-                                n_online_successes++;
-                                delta = jiffies - starttime;
-                                sum_online += delta;
-                                if (min_online < 0) {
-                                        min_online = delta;
-                                        max_online = delta;
-                                }
-                                if (min_online > delta)
-                                        min_online = delta;
-                                if (max_online < delta)
-                                        max_online = delta;
-                        }
-                }
-                schedule_timeout_interruptible(onoff_interval * HZ);
-        }
-        VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
-        return 0;
-}
-static int
-rcu_torture_onoff_init(void)
-{
-        int ret;
-        if (onoff_interval <= 0)
-                return 0;
-        onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
-        if (IS_ERR(onoff_task)) {
-                ret = PTR_ERR(onoff_task);
-                onoff_task = NULL;
-                return ret;
-        }
-        return 0;
-}
-static void rcu_torture_onoff_cleanup(void)
-{
-        if (onoff_task == NULL)
-                return;
-        VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
-        kthread_stop(onoff_task);
-        onoff_task = NULL;
-}
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-static int
-rcu_torture_onoff_init(void)
-{
-        return 0;
-}
-static void rcu_torture_onoff_cleanup(void)
-{
-}
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-/*
 * CPU-stall kthread.  It waits as specified by stall_cpu_holdoff, then
 * induces a CPU stall for the time specified by stall_cpu.
 */
@@ -1518,11 +1113,11 @@ static int rcu_torture_stall(void *args)
 {
        unsigned long stop_at;
-        VERBOSE_PRINTK_STRING("rcu_torture_stall task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
        if (stall_cpu_holdoff > 0) {
-                VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff");
+                VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff");
                schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
-                VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff");
+                VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
        }
        if (!kthread_should_stop()) {
                stop_at = get_seconds() + stall_cpu;
@@ -1536,7 +1131,7 @@ static int rcu_torture_stall(void *args)
                rcu_read_unlock();
                pr_alert("rcu_torture_stall end.\n");
        }
-        rcutorture_shutdown_absorb("rcu_torture_stall");
+        torture_shutdown_absorb("rcu_torture_stall");
        while (!kthread_should_stop())
                schedule_timeout_interruptible(10 * HZ);
        return 0;
@@ -1545,27 +1140,9 @@ static int rcu_torture_stall(void *args)
 /* Spawn CPU-stall kthread, if stall_cpu specified. */
 static int __init rcu_torture_stall_init(void)
 {
-        int ret;
        if (stall_cpu <= 0)
                return 0;
-        stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall");
+        return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
-        if (IS_ERR(stall_task)) {
-                ret = PTR_ERR(stall_task);
-                stall_task = NULL;
-                return ret;
-        }
-        return 0;
-}
-/* Clean up after the CPU-stall kthread, if one was spawned. */
-static void rcu_torture_stall_cleanup(void)
-{
-        if (stall_task == NULL)
-                return;
-        VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
-        kthread_stop(stall_task);
-        stall_task = NULL;
 }
 /* Callback function for RCU barrier testing. */
@@ -1583,28 +1160,24 @@ static int rcu_torture_barrier_cbs(void *arg)
        struct rcu_head rcu;
        init_rcu_head_on_stack(&rcu);
-        VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
+        VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task started");
        set_user_nice(current, 19);
        do {
                wait_event(barrier_cbs_wq[myid],
                           (newphase =
                            ACCESS_ONCE(barrier_phase)) != lastphase ||
-                           kthread_should_stop() ||
+                           torture_must_stop());
-                           fullstop != FULLSTOP_DONTSTOP);
                lastphase = newphase;
                smp_mb(); /* ensure barrier_phase load before ->call(). */
-                if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
+                if (torture_must_stop())
                        break;
                cur_ops->call(&rcu, rcu_torture_barrier_cbf);
                if (atomic_dec_and_test(&barrier_cbs_count))
                        wake_up(&barrier_wq);
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
-        rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
-        while (!kthread_should_stop())
-                schedule_timeout_interruptible(1);
        cur_ops->cb_barrier();
        destroy_rcu_head_on_stack(&rcu);
+        torture_kthread_stopping("rcu_torture_barrier_cbs");
        return 0;
 }
@@ -1613,7 +1186,7 @@ static int rcu_torture_barrier(void *arg)
 {
        int i;
-        VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
+        VERBOSE_TOROUT_STRING("rcu_torture_barrier task starting");
        do {
                atomic_set(&barrier_cbs_invoked, 0);
                atomic_set(&barrier_cbs_count, n_barrier_cbs);
@@ -1623,9 +1196,8 @@ static int rcu_torture_barrier(void *arg)
                        wake_up(&barrier_cbs_wq[i]);
                wait_event(barrier_wq,
                           atomic_read(&barrier_cbs_count) == 0 ||
-                           kthread_should_stop() ||
+                           torture_must_stop());
-                           fullstop != FULLSTOP_DONTSTOP);
+                if (torture_must_stop())
-                if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
                        break;
                n_barrier_attempts++;
                cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */
@@ -1635,11 +1207,8 @@ static int rcu_torture_barrier(void *arg)
                }
                n_barrier_successes++;
                schedule_timeout_interruptible(HZ / 10);
-        } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+        } while (!torture_must_stop());
-        VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
+        torture_kthread_stopping("rcu_torture_barrier");
-        rcutorture_shutdown_absorb("rcu_torture_barrier");
-        while (!kthread_should_stop())
-                schedule_timeout_interruptible(1);
        return 0;
 }
@@ -1672,24 +1241,13 @@ static int rcu_torture_barrier_init(void)
                return -ENOMEM;
        for (i = 0; i < n_barrier_cbs; i++) {
                init_waitqueue_head(&barrier_cbs_wq[i]);
-                barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
+                ret = torture_create_kthread(rcu_torture_barrier_cbs,
-                                                   (void *)(long)i,
+                                             (void *)(long)i,
-                                                   "rcu_torture_barrier_cbs");
+                                             barrier_cbs_tasks[i]);
-                if (IS_ERR(barrier_cbs_tasks[i])) {
+                if (ret)
-                        ret = PTR_ERR(barrier_cbs_tasks[i]);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
-                        barrier_cbs_tasks[i] = NULL;
                        return ret;
-                }
        }
-        barrier_task = kthread_run(rcu_torture_barrier, NULL,
+        return torture_create_kthread(rcu_torture_barrier, NULL, barrier_task);
-                                   "rcu_torture_barrier");
-        if (IS_ERR(barrier_task)) {
-                ret = PTR_ERR(barrier_task);
-                VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
-                barrier_task = NULL;
-        }
-        return 0;
 }
 /* Clean up after RCU barrier testing. */
@@ -1697,19 +1255,11 @@ static void rcu_torture_barrier_cleanup(void)
 {
        int i;
-        if (barrier_task != NULL) {
+        torture_stop_kthread(rcu_torture_barrier, barrier_task);
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
-                kthread_stop(barrier_task);
-                barrier_task = NULL;
-        }
        if (barrier_cbs_tasks != NULL) {
-                for (i = 0; i < n_barrier_cbs; i++) {
+                for (i = 0; i < n_barrier_cbs; i++)
-                        if (barrier_cbs_tasks[i] != NULL) {
+                        torture_stop_kthread(rcu_torture_barrier_cbs,
-                                VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
+                                             barrier_cbs_tasks[i]);
-                                kthread_stop(barrier_cbs_tasks[i]);
-                                barrier_cbs_tasks[i] = NULL;
-                        }
-                }
                kfree(barrier_cbs_tasks);
                barrier_cbs_tasks = NULL;
        }
@@ -1747,90 +1297,42 @@ rcu_torture_cleanup(void)
 {
        int i;
-        mutex_lock(&fullstop_mutex);
        rcutorture_record_test_transition();
-        if (fullstop == FULLSTOP_SHUTDOWN) {
+        if (torture_cleanup()) {
-                pr_warn(/* but going down anyway, so... */
-                       "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
-                mutex_unlock(&fullstop_mutex);
-                schedule_timeout_uninterruptible(10);
                if (cur_ops->cb_barrier != NULL)
                        cur_ops->cb_barrier();
                return;
        }
-        fullstop = FULLSTOP_RMMOD;
-        mutex_unlock(&fullstop_mutex);
-        unregister_reboot_notifier(&rcutorture_shutdown_nb);
-        rcu_torture_barrier_cleanup();
-        rcu_torture_stall_cleanup();
-        if (stutter_task) {
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
-                kthread_stop(stutter_task);
-        }
-        stutter_task = NULL;
-        if (shuffler_task) {
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
-                kthread_stop(shuffler_task);
-                free_cpumask_var(shuffle_tmp_mask);
-        }
-        shuffler_task = NULL;
-        if (writer_task) {
+        rcu_torture_barrier_cleanup();
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
+        torture_stop_kthread(rcu_torture_stall, stall_task);
-                kthread_stop(writer_task);
+        torture_stop_kthread(rcu_torture_writer, writer_task);
-        }
-        writer_task = NULL;
        if (reader_tasks) {
-                for (i = 0; i < nrealreaders; i++) {
+                for (i = 0; i < nrealreaders; i++)
-                        if (reader_tasks[i]) {
+                        torture_stop_kthread(rcu_torture_reader,
-                                VERBOSE_PRINTK_STRING(
+                                             reader_tasks[i]);
-                                        "Stopping rcu_torture_reader task");
-                                kthread_stop(reader_tasks[i]);
-                        }
-                        reader_tasks[i] = NULL;
-                }
                kfree(reader_tasks);
-                reader_tasks = NULL;
        }
        rcu_torture_current = NULL;
        if (fakewriter_tasks) {
                for (i = 0; i < nfakewriters; i++) {
-                        if (fakewriter_tasks[i]) {
+                        torture_stop_kthread(rcu_torture_fakewriter,
-                                VERBOSE_PRINTK_STRING(
+                                             fakewriter_tasks[i]);
-                                        "Stopping rcu_torture_fakewriter task");
-                                kthread_stop(fakewriter_tasks[i]);
-                        }
-                        fakewriter_tasks[i] = NULL;
                }
                kfree(fakewriter_tasks);
                fakewriter_tasks = NULL;
        }
-        if (stats_task) {
+        torture_stop_kthread(rcu_torture_stats, stats_task);
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
+        torture_stop_kthread(rcu_torture_fqs, fqs_task);
-                kthread_stop(stats_task);
-        }
-        stats_task = NULL;
-        if (fqs_task) {
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
-                kthread_stop(fqs_task);
-        }
-        fqs_task = NULL;
        if ((test_boost == 1 && cur_ops->can_boost) ||
            test_boost == 2) {
                unregister_cpu_notifier(&rcutorture_cpu_nb);
                for_each_possible_cpu(i)
                        rcutorture_booster_cleanup(i);
        }
-        if (shutdown_task != NULL) {
-                VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
-                kthread_stop(shutdown_task);
-        }
-        shutdown_task = NULL;
-        rcu_torture_onoff_cleanup();
        /* Wait for all RCU callbacks to fire.  */
@@ -1841,8 +1343,7 @@ rcu_torture_cleanup(void)
        if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
                rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
-        else if (n_online_successes != n_online_attempts ||
+        else if (torture_onoff_failures())
-                 n_offline_successes != n_offline_attempts)
                rcu_torture_print_module_parms(cur_ops,
                                               "End of test: RCU_HOTPLUG");
        else
@@ -1911,12 +1412,11 @@ rcu_torture_init(void)
        int i;
        int cpu;
        int firsterr = 0;
-        int retval;
        static struct rcu_torture_ops *torture_ops[] = {
-                &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+                &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
        };
-        mutex_lock(&fullstop_mutex);
+        torture_init_begin(torture_type, verbose, &rcutorture_runnable);
        /* Process args and tell the world that the torturer is on the job. */
        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -1931,7 +1431,7 @@ rcu_torture_init(void)
                for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
                        pr_alert(" %s", torture_ops[i]->name);
                pr_alert("\n");
-                mutex_unlock(&fullstop_mutex);
+                torture_init_end();
                return -EINVAL;
        }
        if (cur_ops->fqs == NULL && fqs_duration != 0) {
@@ -1946,7 +1446,6 @@ rcu_torture_init(void)
        else
                nrealreaders = 2 * num_online_cpus();
        rcu_torture_print_module_parms(cur_ops, "Start of test");
-        fullstop = FULLSTOP_DONTSTOP;
        /* Set up the freelist. */
@@ -1982,108 +1481,61 @@ rcu_torture_init(void)
        /* Start up the kthreads. */
-        VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task");
+        firsterr = torture_create_kthread(rcu_torture_writer, NULL,
-        writer_task = kthread_create(rcu_torture_writer, NULL,
+                                          writer_task);
-                                     "rcu_torture_writer");
+        if (firsterr)
-        if (IS_ERR(writer_task)) {
-                firsterr = PTR_ERR(writer_task);
-                VERBOSE_PRINTK_ERRSTRING("Failed to create writer");
-                writer_task = NULL;
                goto unwind;
-        }
-        wake_up_process(writer_task);
        fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
                                   GFP_KERNEL);
        if (fakewriter_tasks == NULL) {
-                VERBOSE_PRINTK_ERRSTRING("out of memory");
+                VERBOSE_TOROUT_ERRSTRING("out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
        for (i = 0; i < nfakewriters; i++) {
-                VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
+                firsterr = torture_create_kthread(rcu_torture_fakewriter,
-                fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
+                                                  NULL, fakewriter_tasks[i]);
-                                                  "rcu_torture_fakewriter");
+                if (firsterr)
-                if (IS_ERR(fakewriter_tasks[i])) {
-                        firsterr = PTR_ERR(fakewriter_tasks[i]);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
-                        fakewriter_tasks[i] = NULL;
                        goto unwind;
-                }
        }
        reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
                               GFP_KERNEL);
        if (reader_tasks == NULL) {
-                VERBOSE_PRINTK_ERRSTRING("out of memory");
+                VERBOSE_TOROUT_ERRSTRING("out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
        for (i = 0; i < nrealreaders; i++) {
-                VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task");
+                firsterr = torture_create_kthread(rcu_torture_reader, NULL,
-                reader_tasks[i] = kthread_run(rcu_torture_reader, NULL,
+                                                  reader_tasks[i]);
-                                              "rcu_torture_reader");
+                if (firsterr)
-                if (IS_ERR(reader_tasks[i])) {
-                        firsterr = PTR_ERR(reader_tasks[i]);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create reader");
-                        reader_tasks[i] = NULL;
                        goto unwind;
-                }
        }
        if (stat_interval > 0) {
-                VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task");
+                firsterr = torture_create_kthread(rcu_torture_stats, NULL,
-                stats_task = kthread_run(rcu_torture_stats, NULL,
+                                                  stats_task);
-                                        "rcu_torture_stats");
+                if (firsterr)
-                if (IS_ERR(stats_task)) {
-                        firsterr = PTR_ERR(stats_task);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create stats");
-                        stats_task = NULL;
                        goto unwind;
-                }
        }
        if (test_no_idle_hz) {
-                rcu_idle_cpu = num_online_cpus() - 1;
+                firsterr = torture_shuffle_init(shuffle_interval * HZ);
+                if (firsterr)
-                if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
-                        firsterr = -ENOMEM;
-                        VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask");
-                        goto unwind;
-                }
-                /* Create the shuffler thread */
-                shuffler_task = kthread_run(rcu_torture_shuffle, NULL,
-                                          "rcu_torture_shuffle");
-                if (IS_ERR(shuffler_task)) {
-                        free_cpumask_var(shuffle_tmp_mask);
-                        firsterr = PTR_ERR(shuffler_task);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler");
-                        shuffler_task = NULL;
                        goto unwind;
-                }
        }
        if (stutter < 0)
                stutter = 0;
        if (stutter) {
-                /* Create the stutter thread */
+                firsterr = torture_stutter_init(stutter * HZ);
-                stutter_task = kthread_run(rcu_torture_stutter, NULL,
+                if (firsterr)
-                                          "rcu_torture_stutter");
-                if (IS_ERR(stutter_task)) {
-                        firsterr = PTR_ERR(stutter_task);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
-                        stutter_task = NULL;
                        goto unwind;
-                }
        }
        if (fqs_duration < 0)
                fqs_duration = 0;
        if (fqs_duration) {
-                /* Create the stutter thread */
+                /* Create the fqs thread */
-                fqs_task = kthread_run(rcu_torture_fqs, NULL,
+                torture_create_kthread(rcu_torture_fqs, NULL, fqs_task);
-                                       "rcu_torture_fqs");
+                if (firsterr)
-                if (IS_ERR(fqs_task)) {
-                        firsterr = PTR_ERR(fqs_task);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
-                        fqs_task = NULL;
                        goto unwind;
-                }
        }
        if (test_boost_interval < 1)
                test_boost_interval = 1;
@@ -2097,49 +1549,31 @@ rcu_torture_init(void)
                for_each_possible_cpu(i) {
                        if (cpu_is_offline(i))
                                continue;  /* Heuristic: CPU can go offline. */
-                        retval = rcutorture_booster_init(i);
+                        firsterr = rcutorture_booster_init(i);
-                        if (retval < 0) {
+                        if (firsterr)
-                                firsterr = retval;
                                goto unwind;
-                        }
                }
        }
-        if (shutdown_secs > 0) {
+        firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
-                shutdown_time = jiffies + shutdown_secs * HZ;
+        if (firsterr)
-                shutdown_task = kthread_create(rcu_torture_shutdown, NULL,
-                                               "rcu_torture_shutdown");
-                if (IS_ERR(shutdown_task)) {
-                        firsterr = PTR_ERR(shutdown_task);
-                        VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
-                        shutdown_task = NULL;
-                        goto unwind;
-                }
-                wake_up_process(shutdown_task);
-        }
-        i = rcu_torture_onoff_init();
-        if (i != 0) {
-                firsterr = i;
                goto unwind;
-        }
+        firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ);
-        register_reboot_notifier(&rcutorture_shutdown_nb);
+        if (firsterr)
-        i = rcu_torture_stall_init();
-        if (i != 0) {
-                firsterr = i;
                goto unwind;
-        }
+        firsterr = rcu_torture_stall_init();
-        retval = rcu_torture_barrier_init();
+        if (firsterr)
-        if (retval != 0) {
+                goto unwind;
-                firsterr = retval;
+        firsterr = rcu_torture_barrier_init();
+        if (firsterr)
                goto unwind;
-        }
        if (object_debug)
                rcu_test_debug_objects();
        rcutorture_record_test_transition();
-        mutex_unlock(&fullstop_mutex);
+        torture_init_end();
        return 0;
 unwind:
-        mutex_unlock(&fullstop_mutex);
+        torture_init_end();
        rcu_torture_cleanup();
        return firsterr;
 }
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 3318d8284384..c639556f3fa0 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright (C) IBM Corporation, 2006
 * Copyright (C) Fujitsu, 2012
@@ -36,8 +36,6 @@
 #include <linux/delay.h>
 #include <linux/srcu.h>
-#include <trace/events/rcu.h>
 #include "rcu.h"
 /*
@@ -398,7 +396,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
        rcu_batch_queue(&sp->batch_queue, head);
        if (!sp->running) {
                sp->running = true;
-                schedule_delayed_work(&sp->work, 0);
+                queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
        }
        spin_unlock_irqrestore(&sp->queue_lock, flags);
 }
@@ -674,7 +672,8 @@ static void srcu_reschedule(struct srcu_struct *sp)
        }
        if (pending)
-                schedule_delayed_work(&sp->work, SRCU_INTERVAL);
+                queue_delayed_work(system_power_efficient_wq,
+                                   &sp->work, SRCU_INTERVAL);
 }
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 1254f312d024..d9efcc13008c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2008
 *
@@ -37,10 +37,6 @@
 #include <linux/prefetch.h>
 #include <linux/ftrace_event.h>
-#ifdef CONFIG_RCU_TRACE
-#include <trace/events/rcu.h>
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
 #include "rcu.h"
 /* Forward declarations for tiny_plugin.h. */
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 280d06cae352..431528520562 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -14,8 +14,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright (c) 2010 Linaro
 *
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b3d116cd072d..0c47e300210a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2008
 *
@@ -58,8 +58,6 @@
 #include <linux/suspend.h>
 #include "tree.h"
-#include <trace/events/rcu.h>
 #include "rcu.h"
 MODULE_ALIAS("rcutree");
@@ -837,7 +835,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
         * to the next.  Only do this for the primary flavor of RCU.
         */
        if (rdp->rsp == rcu_state &&
-            ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) {
+            ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
                rdp->rsp->jiffies_resched += 5;
                resched_cpu(rdp->cpu);
        }
@@ -847,7 +845,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 static void record_gp_stall_check_time(struct rcu_state *rsp)
 {
-        unsigned long j = ACCESS_ONCE(jiffies);
+        unsigned long j = jiffies;
        unsigned long j1;
        rsp->gp_start = j;
@@ -1005,7 +1003,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
        if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
                return;
-        j = ACCESS_ONCE(jiffies);
+        j = jiffies;
        /*
         * Lots of memory barriers to reject false positives.
@@ -1423,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp)
        /* Advance to a new grace period and initialize state. */
        record_gp_stall_check_time(rsp);
-        smp_wmb(); /* Record GP times before starting GP. */
+        /* Record GP times before starting GP, hence smp_store_release(). */
-        rsp->gpnum++;
+        smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
        trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
        raw_spin_unlock_irq(&rnp->lock);
        /* Exclude any concurrent CPU-hotplug operations. */
        mutex_lock(&rsp->onoff_mutex);
+        smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */
        /*
         * Set the quiescent-state-needed bits in all the rcu_node
@@ -1557,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
-        smp_mb__after_unlock_lock();
+        smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
        rcu_nocb_gp_set(rnp, nocb);
-        rsp->completed = rsp->gpnum; /* Declare grace period done. */
+        /* Declare grace period done. */
+        ACCESS_ONCE(rsp->completed) = rsp->gpnum;
        trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
        rsp->fqs_state = RCU_GP_IDLE;
        rdp = this_cpu_ptr(rsp->rda);
@@ -2304,7 +2304,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
                if (rnp_old != NULL)
                        raw_spin_unlock(&rnp_old->fqslock);
                if (ret) {
-                        rsp->n_force_qs_lh++;
+                        ACCESS_ONCE(rsp->n_force_qs_lh)++;
                        return;
                }
                rnp_old = rnp;
@@ -2316,7 +2316,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
        smp_mb__after_unlock_lock();
        raw_spin_unlock(&rnp_old->fqslock);
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-                rsp->n_force_qs_lh++;
+                ACCESS_ONCE(rsp->n_force_qs_lh)++;
                raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                return;  /* Someone beat us to it. */
        }
@@ -2639,6 +2639,58 @@ void synchronize_rcu_bh(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+/**
+ * get_state_synchronize_rcu - Snapshot current RCU state
+ *
+ * Returns a cookie that is used by a later call to cond_synchronize_rcu()
+ * to determine whether or not a full grace period has elapsed in the
+ * meantime.
+ */
+unsigned long get_state_synchronize_rcu(void)
+{
+        /*
+         * Any prior manipulation of RCU-protected data must happen
+         * before the load from ->gpnum.
+         */
+        smp_mb();  /* ^^^ */
+        /*
+         * Make sure this load happens before the purportedly
+         * time-consuming work between get_state_synchronize_rcu()
+         * and cond_synchronize_rcu().
+         */
+        return smp_load_acquire(&rcu_state->gpnum);
+}
+EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
+/**
+ * cond_synchronize_rcu - Conditionally wait for an RCU grace period
+ *
+ * @oldstate: return value from earlier call to get_state_synchronize_rcu()
+ *
+ * If a full RCU grace period has elapsed since the earlier call to
+ * get_state_synchronize_rcu(), just return.  Otherwise, invoke
+ * synchronize_rcu() to wait for a full grace period.
+ *
+ * Yes, this function does not take counter wrap into account.  But
+ * counter wrap is harmless.  If the counter wraps, we have waited for
+ * more than 2 billion grace periods (and way more on a 64-bit system!),
+ * so waiting for one additional grace period should be just fine.
+ */
+void cond_synchronize_rcu(unsigned long oldstate)
+{
+        unsigned long newstate;
+        /*
+         * Ensure that this load happens before any RCU-destructive
+         * actions the caller might carry out after we return.
+         */
+        newstate = smp_load_acquire(&rcu_state->completed);
+        if (ULONG_CMP_GE(oldstate, newstate))
+                synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 static int synchronize_sched_expedited_cpu_stop(void *data)
 {
        /*
@@ -2880,7 +2932,7 @@ static int rcu_pending(int cpu)
 * non-NULL, store an indication of whether all callbacks are lazy.
 * (If there are no callbacks, all of them are deemed to be lazy.)
 */
-static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
+static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
 {
        bool al = true;
        bool hc = false;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8c19873f1ac9..75dc3c39a02a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -13,8 +13,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2008
 *
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 6e2ef4b2b920..962d1d589929 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -14,8 +14,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright Red Hat, 2009
 * Copyright IBM Corporation, 2009
@@ -1586,11 +1586,13 @@ static void rcu_prepare_kthreads(int cpu)
 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
 * any flavor of RCU.
 */
+#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
        *delta_jiffies = ULONG_MAX;
        return rcu_cpu_has_callbacks(cpu, NULL);
 }
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 /*
 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
@@ -1656,7 +1658,7 @@ extern int tick_nohz_active;
 * only if it has been awhile since the last time we did so.  Afterwards,
 * if there are any callbacks ready for immediate invocation, return true.
 */
-static bool rcu_try_advance_all_cbs(void)
+static bool __maybe_unused rcu_try_advance_all_cbs(void)
 {
        bool cbs_ready = false;
        struct rcu_data *rdp;
@@ -1696,6 +1698,7 @@ static bool rcu_try_advance_all_cbs(void)
 *
 * The caller must have disabled interrupts.
 */
+#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(int cpu, unsigned long *dj)
 {
        struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
@@ -1726,6 +1729,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
        }
        return 0;
 }
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 /*
 * Prepare a CPU for idle from an RCU perspective.  The first major task
@@ -1739,6 +1743,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
 */
 static void rcu_prepare_for_idle(int cpu)
 {
+#ifndef CONFIG_RCU_NOCB_CPU_ALL
        struct rcu_data *rdp;
        struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
        struct rcu_node *rnp;
@@ -1790,6 +1795,7 @@ static void rcu_prepare_for_idle(int cpu)
                rcu_accelerate_cbs(rsp, rnp, rdp);
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
        }
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 /*
@@ -1799,11 +1805,12 @@ static void rcu_prepare_for_idle(int cpu)
 */
 static void rcu_cleanup_after_idle(int cpu)
 {
+#ifndef CONFIG_RCU_NOCB_CPU_ALL
        if (rcu_is_nocb_cpu(cpu))
                return;
        if (rcu_try_advance_all_cbs())
                invoke_rcu_core();
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 /*
@@ -2101,6 +2108,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
        init_waitqueue_head(&rnp->nocb_gp_wq[1]);
 }
+#ifndef CONFIG_RCU_NOCB_CPU_ALL
 /* Is the specified CPU a no-CPUs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
@@ -2108,6 +2116,7 @@ bool rcu_is_nocb_cpu(int cpu)
                return cpumask_test_cpu(cpu, rcu_nocb_mask);
        return false;
 }
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 /*
 * Enqueue the specified string of rcu_head structures onto the specified
@@ -2893,7 +2902,7 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
 * CPU unless the grace period has extended for too long.
 *
 * This code relies on the fact that all NO_HZ_FULL CPUs are also
- * CONFIG_RCU_NOCB_CPUs.
+ * CONFIG_RCU_NOCB_CPU CPUs.
 */
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
 {
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 4def475336d4..5cdc62e1beeb 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2008
 *
@@ -273,7 +273,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
        seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
                   rsp->n_force_qs, rsp->n_force_qs_ngp,
                   rsp->n_force_qs - rsp->n_force_qs_ngp,
-                   rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
+                   ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
        for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
                if (rnp->level != level) {
                        seq_puts(m, "\n");
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c54609faf233..4c0a9b0af469 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -12,8 +12,8 @@
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
+ * along with this program; if not, you can access it online at
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright IBM Corporation, 2001
 *
@@ -49,7 +49,6 @@
 #include <linux/module.h>
 #define CREATE_TRACE_POINTS
-#include <trace/events/rcu.h>
 #include "rcu.h"
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 43c2bcc35761..b30a2924ef14 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -301,14 +301,14 @@ u64 sched_clock_cpu(int cpu)
        if (unlikely(!sched_clock_running))
                return 0ull;
-        preempt_disable();
+        preempt_disable_notrace();
        scd = cpu_sdc(cpu);
        if (cpu != smp_processor_id())
                clock = sched_clock_remote(scd);
        else
                clock = sched_clock_local(scd);
-        preempt_enable();
+        preempt_enable_notrace();
        return clock;
 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b46131ef6aab..f5c6635b806c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1952,7 +1952,7 @@ static int dl_overflow(struct task_struct *p, int policy,
 {
        struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-        u64 period = attr->sched_period;
+        u64 period = attr->sched_period ?: attr->sched_deadline;
        u64 runtime = attr->sched_runtime;
        u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
        int cpus, err = -1;
@@ -3338,6 +3338,15 @@ recheck:
                                return -EPERM;
                }
+                 /*
+                  * Can't set/change SCHED_DEADLINE policy at all for now
+                  * (safest behavior); in the future we would like to allow
+                  * unprivileged DL tasks to increase their relative deadline
+                  * or reduce their runtime (both ways reducing utilization)
+                  */
+                if (dl_policy(policy))
+                        return -EPERM;
                /*
                 * Treat SCHED_IDLE as nice 20. Only allow a switch to
                 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
@@ -3661,13 +3670,14 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
 * @pid: the pid in question.
 * @uattr: structure containing the extended parameters.
 */
-SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr)
+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
+                               unsigned int, flags)
 {
        struct sched_attr attr;
        struct task_struct *p;
        int retval;
-        if (!uattr || pid < 0)
+        if (!uattr || pid < 0 || flags)
                return -EINVAL;
        if (sched_copy_attr(uattr, &attr))
@@ -3786,7 +3796,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
                attr->size = usize;
        }
-        ret = copy_to_user(uattr, attr, usize);
+        ret = copy_to_user(uattr, attr, attr->size);
        if (ret)
                return -EFAULT;
@@ -3804,8 +3814,8 @@ err_size:
 * @uattr: structure containing the extended parameters.
 * @size: sizeof(attr) for fwd/bwd comp.
 */
-SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-                unsigned int, size)
+                unsigned int, size, unsigned int, flags)
 {
        struct sched_attr attr = {
                .size = sizeof(struct sched_attr),
@@ -3814,7 +3824,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
        int retval;
        if (!uattr || pid < 0 || size > PAGE_SIZE ||
-            size < SCHED_ATTR_SIZE_VER0)
+            size < SCHED_ATTR_SIZE_VER0 || flags)
                return -EINVAL;
        rcu_read_lock();
@@ -7422,6 +7432,7 @@ static int sched_dl_global_constraints(void)
        u64 period = global_rt_period();
        u64 new_bw = to_ratio(period, runtime);
        int cpu, ret = 0;
+        unsigned long flags;
        /*
         * Here we want to check the bandwidth not being set to some
@@ -7435,10 +7446,10 @@ static int sched_dl_global_constraints(void)
        for_each_possible_cpu(cpu) {
                struct dl_bw *dl_b = dl_bw_of(cpu);
-                raw_spin_lock(&dl_b->lock);
+                raw_spin_lock_irqsave(&dl_b->lock, flags);
                if (new_bw < dl_b->total_bw)
                        ret = -EBUSY;
-                raw_spin_unlock(&dl_b->lock);
+                raw_spin_unlock_irqrestore(&dl_b->lock, flags);
                if (ret)
                        break;
@@ -7451,6 +7462,7 @@ static void sched_dl_do_global(void)
 {
        u64 new_bw = -1;
        int cpu;
+        unsigned long flags;
        def_dl_bandwidth.dl_period = global_rt_period();
        def_dl_bandwidth.dl_runtime = global_rt_runtime();
@@ -7464,9 +7476,9 @@ static void sched_dl_do_global(void)
        for_each_possible_cpu(cpu) {
                struct dl_bw *dl_b = dl_bw_of(cpu);
-                raw_spin_lock(&dl_b->lock);
+                raw_spin_lock_irqsave(&dl_b->lock, flags);
                dl_b->bw = new_bw;
-                raw_spin_unlock(&dl_b->lock);
+                raw_spin_unlock_irqrestore(&dl_b->lock, flags);
        }
 }
@@ -7475,7 +7487,8 @@ static int sched_rt_global_validate(void)
        if (sysctl_sched_rt_period <= 0)
                return -EINVAL;
-        if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
+        if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+                (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
                return -EINVAL;
        return 0;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 045fc74e3f09..5b9bb42b2d47 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -70,7 +70,7 @@ static void cpudl_heapify(struct cpudl *cp, int idx)
 static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
 {
-        WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID);
+        WARN_ON(idx == IDX_INVALID || !cpu_present(idx));
        if (dl_time_before(new_dl, cp->elements[idx].dl)) {
                cp->elements[idx].dl = new_dl;
@@ -117,7 +117,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
        }
 out:
-        WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1);
+        WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
        return best_cpu;
 }
@@ -137,7 +137,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
        int old_idx, new_cpu;
        unsigned long flags;
-        WARN_ON(cpu > num_present_cpus());
+        WARN_ON(!cpu_present(cpu));
        raw_spin_lock_irqsave(&cp->lock, flags);
        old_idx = cp->cpu_to_idx[cpu];
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0dd5e0971a07..6e79b3faa4cd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -121,7 +121,7 @@ static inline void dl_clear_overload(struct rq *rq)
 static void update_dl_migration(struct dl_rq *dl_rq)
 {
-        if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) {
+        if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
                if (!dl_rq->overloaded) {
                        dl_set_overload(rq_of_dl_rq(dl_rq));
                        dl_rq->overloaded = 1;
@@ -135,9 +135,7 @@ static void update_dl_migration(struct dl_rq *dl_rq)
 static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
        struct task_struct *p = dl_task_of(dl_se);
-        dl_rq = &rq_of_dl_rq(dl_rq)->dl;
-        dl_rq->dl_nr_total++;
        if (p->nr_cpus_allowed > 1)
                dl_rq->dl_nr_migratory++;
@@ -147,9 +145,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
        struct task_struct *p = dl_task_of(dl_se);
-        dl_rq = &rq_of_dl_rq(dl_rq)->dl;
-        dl_rq->dl_nr_total--;
        if (p->nr_cpus_allowed > 1)
                dl_rq->dl_nr_migratory--;
@@ -566,6 +562,8 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
        return 1;
 }
+extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
 /*
 * Update the current task's runtime statistics (provided it is still
 * a -deadline task and has not been removed from the dl_rq).
@@ -629,11 +627,13 @@ static void update_curr_dl(struct rq *rq)
                struct rt_rq *rt_rq = &rq->rt;
                raw_spin_lock(&rt_rq->rt_runtime_lock);
-                rt_rq->rt_time += delta_exec;
                /*
                 * We'll let actual RT tasks worry about the overflow here, we
-                 * have our own CBS to keep us inline -- see above.
+                 * have our own CBS to keep us inline; only account when RT
+                 * bandwidth is relevant.
                 */
+                if (sched_rt_bandwidth_account(rt_rq))
+                        rt_rq->rt_time += delta_exec;
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
        }
 }
@@ -717,6 +717,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
        WARN_ON(!dl_prio(prio));
        dl_rq->dl_nr_running++;
+        inc_nr_running(rq_of_dl_rq(dl_rq));
        inc_dl_deadline(dl_rq, deadline);
        inc_dl_migration(dl_se, dl_rq);
@@ -730,6 +731,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
        WARN_ON(!dl_prio(prio));
        WARN_ON(!dl_rq->dl_nr_running);
        dl_rq->dl_nr_running--;
+        dec_nr_running(rq_of_dl_rq(dl_rq));
        dec_dl_deadline(dl_rq, dl_se->deadline);
        dec_dl_migration(dl_se, dl_rq);
@@ -836,8 +838,6 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
        if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
                enqueue_pushable_dl_task(rq, p);
-        inc_nr_running(rq);
 }
 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
@@ -850,8 +850,6 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
        update_curr_dl(rq);
        __dequeue_task_dl(rq, p, flags);
-        dec_nr_running(rq);
 }
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 966cc2bfcb77..9b4c4f320130 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1757,6 +1757,8 @@ void task_numa_work(struct callback_head *work)
                        start = end;
                        if (pages <= 0)
                                goto out;
+                        cond_resched();
                } while (end != vma->vm_end);
        }
@@ -6999,15 +7001,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
        /*
-         * Ensure the task's vruntime is normalized, so that when its
+         * Ensure the task's vruntime is normalized, so that when it's
         * switched back to the fair class the enqueue_entity(.flags=0) will
         * do the right thing.
         *
-         * If it was on_rq, then the dequeue_entity(.flags=0) will already
+         * If it's on_rq, then the dequeue_entity(.flags=0) will already
-         * have normalized the vruntime, if it was !on_rq, then only when
+         * have normalized the vruntime, if it's !on_rq, then only when
         * the task is sleeping will it still have non-normalized vruntime.
         */
-        if (!se->on_rq && p->state != TASK_RUNNING) {
+        if (!p->on_rq && p->state != TASK_RUNNING) {
                /*
                 * Fix up our vruntime so that the current sleep doesn't
                 * cause 'unlimited' sleep bonus.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a2740b775b45..1999021042c7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -538,6 +538,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 #endif /* CONFIG_RT_GROUP_SCHED */
+bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
+{
+        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+        return (hrtimer_active(&rt_b->rt_period_timer) ||
+                rt_rq->rt_time < rt_b->rt_runtime);
+}
 #ifdef CONFIG_SMP
 /*
 * We ran out of runtime, see if we can borrow some from our neighbours.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c2119fd20f8b..f964add50f38 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -462,7 +462,6 @@ struct dl_rq {
        } earliest_dl;
        unsigned long dl_nr_migratory;
-        unsigned long dl_nr_total;
        int overloaded;
        /*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 84571e09c907..01fbae5b97b7 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -293,7 +293,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
         */
        smp_call_function_single(min(cpu1, cpu2),
                                 &irq_cpu_stop_queue_work,
-                                 &call_args, 0);
+                                 &call_args, 1);
        lg_local_unlock(&stop_cpus_lock);
        preempt_enable();
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7a925ba456fb..a6a5bf53e86d 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -51,7 +51,13 @@
 * HZ shrinks, so values greater than 8 overflow 32bits when
 * HZ=100.
 */
+#if HZ < 34
+#define JIFFIES_SHIFT   6
+#elif HZ < 67
+#define JIFFIES_SHIFT   7
+#else
 #define JIFFIES_SHIFT   8
+#endif
 static cycle_t jiffies_read(struct clocksource *cs)
 {
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 0abb36464281..4d23dc4d8139 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -116,20 +116,42 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 void __init sched_clock_register(u64 (*read)(void), int bits,
                                 unsigned long rate)
 {
+        u64 res, wrap, new_mask, new_epoch, cyc, ns;
+        u32 new_mult, new_shift;
+        ktime_t new_wrap_kt;
        unsigned long r;
-        u64 res, wrap;
        char r_unit;
        if (cd.rate > rate)
                return;
        WARN_ON(!irqs_disabled());
-        read_sched_clock = read;
-        sched_clock_mask = CLOCKSOURCE_MASK(bits);
-        cd.rate = rate;
        /* calculate the mult/shift to convert counter ticks to ns. */
-        clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600);
+        clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
+        new_mask = CLOCKSOURCE_MASK(bits);
+        /* calculate how many ns until we wrap */
+        wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
+        new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+        /* update epoch for new counter and update epoch_ns from old counter*/
+        new_epoch = read();
+        cyc = read_sched_clock();
+        ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+                          cd.mult, cd.shift);
+        raw_write_seqcount_begin(&cd.seq);
+        read_sched_clock = read;
+        sched_clock_mask = new_mask;
+        cd.rate = rate;
+        cd.wrap_kt = new_wrap_kt;
+        cd.mult = new_mult;
+        cd.shift = new_shift;
+        cd.epoch_cyc = new_epoch;
+        cd.epoch_ns = ns;
+        raw_write_seqcount_end(&cd.seq);
        r = rate;
        if (r >= 4000000) {
@@ -141,22 +163,12 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
        } else
                r_unit = ' ';
-        /* calculate how many ns until we wrap */
-        wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask);
-        cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
        /* calculate the ns resolution of this counter */
-        res = cyc_to_ns(1ULL, cd.mult, cd.shift);
+        res = cyc_to_ns(1ULL, new_mult, new_shift);
        pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
                bits, r, r_unit, res, wrap);
-        update_sched_clock();
-        /*
-         * Ensure that sched_clock() starts off at 0ns
-         */
-        cd.epoch_ns = 0;
        /* Enable IRQ time accounting if we have a fast enough sched_clock */
        if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
                enable_sched_clock_irqtime();
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 43780ab5e279..98977a57ac72 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -756,6 +756,7 @@ out:
 static void tick_broadcast_clear_oneshot(int cpu)
 {
        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
+        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 }
 static void tick_broadcast_init_next_event(struct cpumask *mask,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 0aa4ce81bc16..5b40279ecd71 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1435,7 +1435,8 @@ void update_wall_time(void)
 out:
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        if (clock_set)
-                clock_was_set();
+                /* Have to call _delayed version, since in irq context*/
+                clock_was_set_delayed();
 }
 /**
diff --git a/kernel/torture.c b/kernel/torture.c
new file mode 100644
index 000000000000..acc9afc2f26e
--- /dev/null
+++ b/kernel/torture.c
@@ -0,0 +1,719 @@
+/*
+ * Common functions for in-kernel torture tests.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2014
+ *
+ * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ *      Based on kernel/rcu/torture.c.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/trace_clock.h>
+#include <asm/byteorder.h>
+#include <linux/torture.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
+static char *torture_type;
+static bool verbose;
+/* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
+#define FULLSTOP_DONTSTOP 0     /* Normal operation. */
+#define FULLSTOP_SHUTDOWN 1     /* System shutdown with torture running. */
+#define FULLSTOP_RMMOD    2     /* Normal rmmod of torture. */
+static int fullstop = FULLSTOP_RMMOD;
+static DEFINE_MUTEX(fullstop_mutex);
+static int *torture_runnable;
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Variables for online-offline handling.  Only present if CPU hotplug
+ * is enabled, otherwise does nothing.
+ */
+static struct task_struct *onoff_task;
+static long onoff_holdoff;
+static long onoff_interval;
+static long n_offline_attempts;
+static long n_offline_successes;
+static unsigned long sum_offline;
+static int min_offline = -1;
+static int max_offline;
+static long n_online_attempts;
+static long n_online_successes;
+static unsigned long sum_online;
+static int min_online = -1;
+static int max_online;
+/*
+ * Execute random CPU-hotplug operations at the interval specified
+ * by the onoff_interval.
+ */
+static int
+torture_onoff(void *arg)
+{
+        int cpu;
+        unsigned long delta;
+        int maxcpu = -1;
+        DEFINE_TORTURE_RANDOM(rand);
+        int ret;
+        unsigned long starttime;
+        VERBOSE_TOROUT_STRING("torture_onoff task started");
+        for_each_online_cpu(cpu)
+                maxcpu = cpu;
+        WARN_ON(maxcpu < 0);
+        if (onoff_holdoff > 0) {
+                VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
+                schedule_timeout_interruptible(onoff_holdoff);
+                VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
+        }
+        while (!torture_must_stop()) {
+                cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
+                if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
+                        if (verbose)
+                                pr_alert("%s" TORTURE_FLAG
+                                         "torture_onoff task: offlining %d\n",
+                                         torture_type, cpu);
+                        starttime = jiffies;
+                        n_offline_attempts++;
+                        ret = cpu_down(cpu);
+                        if (ret) {
+                                if (verbose)
+                                        pr_alert("%s" TORTURE_FLAG
+                                                 "torture_onoff task: offline %d failed: errno %d\n",
+                                                 torture_type, cpu, ret);
+                        } else {
+                                if (verbose)
+                                        pr_alert("%s" TORTURE_FLAG
+                                                 "torture_onoff task: offlined %d\n",
+                                                 torture_type, cpu);
+                                n_offline_successes++;
+                                delta = jiffies - starttime;
+                                sum_offline += delta;
+                                if (min_offline < 0) {
+                                        min_offline = delta;
+                                        max_offline = delta;
+                                }
+                                if (min_offline > delta)
+                                        min_offline = delta;
+                                if (max_offline < delta)
+                                        max_offline = delta;
+                        }
+                } else if (cpu_is_hotpluggable(cpu)) {
+                        if (verbose)
+                                pr_alert("%s" TORTURE_FLAG
+                                         "torture_onoff task: onlining %d\n",
+                                         torture_type, cpu);
+                        starttime = jiffies;
+                        n_online_attempts++;
+                        ret = cpu_up(cpu);
+                        if (ret) {
+                                if (verbose)
+                                        pr_alert("%s" TORTURE_FLAG
+                                                 "torture_onoff task: online %d failed: errno %d\n",
+                                                 torture_type, cpu, ret);
+                        } else {
+                                if (verbose)
+                                        pr_alert("%s" TORTURE_FLAG
+                                                 "torture_onoff task: onlined %d\n",
+                                                 torture_type, cpu);
+                                n_online_successes++;
+                                delta = jiffies - starttime;
+                                sum_online += delta;
+                                if (min_online < 0) {
+                                        min_online = delta;
+                                        max_online = delta;
+                                }
+                                if (min_online > delta)
+                                        min_online = delta;
+                                if (max_online < delta)
+                                        max_online = delta;
+                        }
+                }
+                schedule_timeout_interruptible(onoff_interval);
+        }
+        torture_kthread_stopping("torture_onoff");
+        return 0;
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+/*
+ * Initiate online-offline handling.
+ */
+int torture_onoff_init(long ooholdoff, long oointerval)
+{
+        int ret = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+        onoff_holdoff = ooholdoff;
+        onoff_interval = oointerval;
+        if (onoff_interval <= 0)
+                return 0;
+        ret = torture_create_kthread(torture_onoff, NULL, onoff_task);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+        return ret;
+}
+EXPORT_SYMBOL_GPL(torture_onoff_init);
+/*
+ * Clean up after online/offline testing.
+ */
+static void torture_onoff_cleanup(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+        if (onoff_task == NULL)
+                return;
+        VERBOSE_TOROUT_STRING("Stopping torture_onoff task");
+        kthread_stop(onoff_task);
+        onoff_task = NULL;
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+}
+EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
+/*
+ * Print online/offline testing statistics.
+ */
+char *torture_onoff_stats(char *page)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+        page += sprintf(page,
+                       "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+                       n_online_successes, n_online_attempts,
+                       n_offline_successes, n_offline_attempts,
+                       min_online, max_online,
+                       min_offline, max_offline,
+                       sum_online, sum_offline, HZ);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+        return page;
+}
+EXPORT_SYMBOL_GPL(torture_onoff_stats);
+/*
+ * Were all the online/offline operations successful?
+ */
+bool torture_onoff_failures(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+        return n_online_successes != n_online_attempts ||
+               n_offline_successes != n_offline_attempts;
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+        return false;
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+}
+EXPORT_SYMBOL_GPL(torture_onoff_failures);
+#define TORTURE_RANDOM_MULT     39916801  /* prime */
+#define TORTURE_RANDOM_ADD      479001701 /* prime */
+#define TORTURE_RANDOM_REFRESH  10000
+/*
+ * Crude but fast random-number generator.  Uses a linear congruential
+ * generator, with occasional help from cpu_clock().
+ */
+unsigned long
+torture_random(struct torture_random_state *trsp)
+{
+        if (--trsp->trs_count < 0) {
+                trsp->trs_state += (unsigned long)local_clock();
+                trsp->trs_count = TORTURE_RANDOM_REFRESH;
+        }
+        trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT +
+                TORTURE_RANDOM_ADD;
+        return swahw32(trsp->trs_state);
+}
+EXPORT_SYMBOL_GPL(torture_random);
+/*
+ * Variables for shuffling.  The idea is to ensure that each CPU stays
+ * idle for an extended period to test interactions with dyntick idle,
+ * as well as interactions with any per-CPU varibles.
+ */
+struct shuffle_task {
+        struct list_head st_l;
+        struct task_struct *st_t;
+};
+static long shuffle_interval;   /* In jiffies. */
+static struct task_struct *shuffler_task;
+static cpumask_var_t shuffle_tmp_mask;
+static int shuffle_idle_cpu;    /* Force all torture tasks off this CPU */
+static struct list_head shuffle_task_list = LIST_HEAD_INIT(shuffle_task_list);
+static DEFINE_MUTEX(shuffle_task_mutex);
+/*
+ * Register a task to be shuffled.  If there is no memory, just splat
+ * and don't bother registering.
+ */
+void torture_shuffle_task_register(struct task_struct *tp)
+{
+        struct shuffle_task *stp;
+        if (WARN_ON_ONCE(tp == NULL))
+                return;
+        stp = kmalloc(sizeof(*stp), GFP_KERNEL);
+        if (WARN_ON_ONCE(stp == NULL))
+                return;
+        stp->st_t = tp;
+        mutex_lock(&shuffle_task_mutex);
+        list_add(&stp->st_l, &shuffle_task_list);
+        mutex_unlock(&shuffle_task_mutex);
+}
+EXPORT_SYMBOL_GPL(torture_shuffle_task_register);
+/*
+ * Unregister all tasks, for example, at the end of the torture run.
+ */
+static void torture_shuffle_task_unregister_all(void)
+{
+        struct shuffle_task *stp;
+        struct shuffle_task *p;
+        mutex_lock(&shuffle_task_mutex);
+        list_for_each_entry_safe(stp, p, &shuffle_task_list, st_l) {
+                list_del(&stp->st_l);
+                kfree(stp);
+        }
+        mutex_unlock(&shuffle_task_mutex);
+}
+/* Shuffle tasks such that we allow shuffle_idle_cpu to become idle.
+ * A special case is when shuffle_idle_cpu = -1, in which case we allow
+ * the tasks to run on all CPUs.
+ */
+static void torture_shuffle_tasks(void)
+{
+        struct shuffle_task *stp;
+        cpumask_setall(shuffle_tmp_mask);
+        get_online_cpus();
+        /* No point in shuffling if there is only one online CPU (ex: UP) */
+        if (num_online_cpus() == 1) {
+                put_online_cpus();
+                return;
+        }
+        /* Advance to the next CPU.  Upon overflow, don't idle any CPUs. */
+        shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask);
+        if (shuffle_idle_cpu >= nr_cpu_ids)
+                shuffle_idle_cpu = -1;
+        if (shuffle_idle_cpu != -1) {
+                cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
+                if (cpumask_empty(shuffle_tmp_mask)) {
+                        put_online_cpus();
+                        return;
+                }
+        }
+        mutex_lock(&shuffle_task_mutex);
+        list_for_each_entry(stp, &shuffle_task_list, st_l)
+                set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+        mutex_unlock(&shuffle_task_mutex);
+        put_online_cpus();
+}
+/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
+ * system to become idle at a time and cut off its timer ticks. This is meant
+ * to test the support for such tickless idle CPU in RCU.
+ */
+static int torture_shuffle(void *arg)
+{
+        VERBOSE_TOROUT_STRING("torture_shuffle task started");
+        do {
+                schedule_timeout_interruptible(shuffle_interval);
+                torture_shuffle_tasks();
+                torture_shutdown_absorb("torture_shuffle");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("torture_shuffle");
+        return 0;
+}
+/*
+ * Start the shuffler, with shuffint in jiffies.
+ */
+int torture_shuffle_init(long shuffint)
+{
+        shuffle_interval = shuffint;
+        shuffle_idle_cpu = -1;
+        if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
+                VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask");
+                return -ENOMEM;
+        }
+        /* Create the shuffler thread */
+        return torture_create_kthread(torture_shuffle, NULL, shuffler_task);
+}
+EXPORT_SYMBOL_GPL(torture_shuffle_init);
+/*
+ * Stop the shuffling.
+ */
+static void torture_shuffle_cleanup(void)
+{
+        torture_shuffle_task_unregister_all();
+        if (shuffler_task) {
+                VERBOSE_TOROUT_STRING("Stopping torture_shuffle task");
+                kthread_stop(shuffler_task);
+                free_cpumask_var(shuffle_tmp_mask);
+        }
+        shuffler_task = NULL;
+}
+EXPORT_SYMBOL_GPL(torture_shuffle_cleanup);
+/*
+ * Variables for auto-shutdown.  This allows "lights out" torture runs
+ * to be fully scripted.
+ */
+static int shutdown_secs;               /* desired test duration in seconds. */
+static struct task_struct *shutdown_task;
+static unsigned long shutdown_time;     /* jiffies to system shutdown. */
+static void (*torture_shutdown_hook)(void);
+/*
+ * Absorb kthreads into a kernel function that won't return, so that
+ * they won't ever access module text or data again.
+ */
+void torture_shutdown_absorb(const char *title)
+{
+        while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+                pr_notice("torture thread %s parking due to system shutdown\n",
+                          title);
+                schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
+        }
+}
+EXPORT_SYMBOL_GPL(torture_shutdown_absorb);
+/*
+ * Cause the torture test to shutdown the system after the test has
+ * run for the time specified by the shutdown_secs parameter.
+ */
+static int torture_shutdown(void *arg)
+{
+        long delta;
+        unsigned long jiffies_snap;
+        VERBOSE_TOROUT_STRING("torture_shutdown task started");
+        jiffies_snap = jiffies;
+        while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
+               !torture_must_stop()) {
+                delta = shutdown_time - jiffies_snap;
+                if (verbose)
+                        pr_alert("%s" TORTURE_FLAG
+                                 "torture_shutdown task: %lu jiffies remaining\n",
+                                 torture_type, delta);
+                schedule_timeout_interruptible(delta);
+                jiffies_snap = jiffies;
+        }
+        if (torture_must_stop()) {
+                torture_kthread_stopping("torture_shutdown");
+                return 0;
+        }
+        /* OK, shut down the system. */
+        VERBOSE_TOROUT_STRING("torture_shutdown task shutting down system");
+        shutdown_task = NULL;   /* Avoid self-kill deadlock. */
+        if (torture_shutdown_hook)
+                torture_shutdown_hook();
+        else
+                VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
+        kernel_power_off();     /* Shut down the system. */
+        return 0;
+}
+/*
+ * Start up the shutdown task.
+ */
+int torture_shutdown_init(int ssecs, void (*cleanup)(void))
+{
+        int ret = 0;
+        shutdown_secs = ssecs;
+        torture_shutdown_hook = cleanup;
+        if (shutdown_secs > 0) {
+                shutdown_time = jiffies + shutdown_secs * HZ;
+                ret = torture_create_kthread(torture_shutdown, NULL,
+                                             shutdown_task);
+        }
+        return ret;
+}
+EXPORT_SYMBOL_GPL(torture_shutdown_init);
+/*
+ * Detect and respond to a system shutdown.
+ */
+static int torture_shutdown_notify(struct notifier_block *unused1,
+                                   unsigned long unused2, void *unused3)
+{
+        mutex_lock(&fullstop_mutex);
+        if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) {
+                VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected");
+                ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN;
+        } else {
+                pr_warn("Concurrent rmmod and shutdown illegal!\n");
+        }
+        mutex_unlock(&fullstop_mutex);
+        return NOTIFY_DONE;
+}
+static struct notifier_block torture_shutdown_nb = {
+        .notifier_call = torture_shutdown_notify,
+};
+/*
+ * Shut down the shutdown task.  Say what???  Heh!  This can happen if
+ * the torture module gets an rmmod before the shutdown time arrives.  ;-)
+ */
+static void torture_shutdown_cleanup(void)
+{
+        unregister_reboot_notifier(&torture_shutdown_nb);
+        if (shutdown_task != NULL) {
+                VERBOSE_TOROUT_STRING("Stopping torture_shutdown task");
+                kthread_stop(shutdown_task);
+        }
+        shutdown_task = NULL;
+}
+/*
+ * Variables for stuttering, which means to periodically pause and
+ * restart testing in order to catch bugs that appear when load is
+ * suddenly applied to or removed from the system.
+ */
+static struct task_struct *stutter_task;
+static int stutter_pause_test;
+static int stutter;
+/*
+ * Block until the stutter interval ends.  This must be called periodically
+ * by all running kthreads that need to be subject to stuttering.
+ */
+void stutter_wait(const char *title)
+{
+        while (ACCESS_ONCE(stutter_pause_test) ||
+               (torture_runnable && !ACCESS_ONCE(*torture_runnable))) {
+                if (stutter_pause_test)
+                        schedule_timeout_interruptible(1);
+                else
+                        schedule_timeout_interruptible(round_jiffies_relative(HZ));
+                torture_shutdown_absorb(title);
+        }
+}
+EXPORT_SYMBOL_GPL(stutter_wait);
+/*
+ * Cause the torture test to "stutter", starting and stopping all
+ * threads periodically.
+ */
+static int torture_stutter(void *arg)
+{
+        VERBOSE_TOROUT_STRING("torture_stutter task started");
+        do {
+                if (!torture_must_stop()) {
+                        schedule_timeout_interruptible(stutter);
+                        ACCESS_ONCE(stutter_pause_test) = 1;
+                }
+                if (!torture_must_stop())
+                        schedule_timeout_interruptible(stutter);
+                ACCESS_ONCE(stutter_pause_test) = 0;
+                torture_shutdown_absorb("torture_stutter");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("torture_stutter");
+        return 0;
+}
+/*
+ * Initialize and kick off the torture_stutter kthread.
+ */
+int torture_stutter_init(int s)
+{
+        int ret;
+        stutter = s;
+        ret = torture_create_kthread(torture_stutter, NULL, stutter_task);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(torture_stutter_init);
+/*
+ * Cleanup after the torture_stutter kthread.
+ */
+static void torture_stutter_cleanup(void)
+{
+        if (!stutter_task)
+                return;
+        VERBOSE_TOROUT_STRING("Stopping torture_stutter task");
+        kthread_stop(stutter_task);
+        stutter_task = NULL;
+}
+/*
+ * Initialize torture module.  Please note that this is -not- invoked via
+ * the usual module_init() mechanism, but rather by an explicit call from
+ * the client torture module.  This call must be paired with a later
+ * torture_init_end().
+ *
+ * The runnable parameter points to a flag that controls whether or not
+ * the test is currently runnable.  If there is no such flag, pass in NULL.
+ */
+void __init torture_init_begin(char *ttype, bool v, int *runnable)
+{
+        mutex_lock(&fullstop_mutex);
+        torture_type = ttype;
+        verbose = v;
+        torture_runnable = runnable;
+        fullstop = FULLSTOP_DONTSTOP;
+}
+EXPORT_SYMBOL_GPL(torture_init_begin);
+/*
+ * Tell the torture module that initialization is complete.
+ */
+void __init torture_init_end(void)
+{
+        mutex_unlock(&fullstop_mutex);
+        register_reboot_notifier(&torture_shutdown_nb);
+}
+EXPORT_SYMBOL_GPL(torture_init_end);
+/*
+ * Clean up torture module.  Please note that this is -not- invoked via
+ * the usual module_exit() mechanism, but rather by an explicit call from
+ * the client torture module.  Returns true if a race with system shutdown
+ * is detected, otherwise, all kthreads started by functions in this file
+ * will be shut down.
+ *
+ * This must be called before the caller starts shutting down its own
+ * kthreads.
+ */
+bool torture_cleanup(void)
+{
+        mutex_lock(&fullstop_mutex);
+        if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+                pr_warn("Concurrent rmmod and shutdown illegal!\n");
+                mutex_unlock(&fullstop_mutex);
+                schedule_timeout_uninterruptible(10);
+                return true;
+        }
+        ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD;
+        mutex_unlock(&fullstop_mutex);
+        torture_shutdown_cleanup();
+        torture_shuffle_cleanup();
+        torture_stutter_cleanup();
+        torture_onoff_cleanup();
+        return false;
+}
+EXPORT_SYMBOL_GPL(torture_cleanup);
+/*
+ * Is it time for the current torture test to stop?
+ */
+bool torture_must_stop(void)
+{
+        return torture_must_stop_irq() || kthread_should_stop();
+}
+EXPORT_SYMBOL_GPL(torture_must_stop);
+/*
+ * Is it time for the current torture test to stop?  This is the irq-safe
+ * version, hence no check for kthread_should_stop().
+ */
+bool torture_must_stop_irq(void)
+{
+        return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP;
+}
+EXPORT_SYMBOL_GPL(torture_must_stop_irq);
+/*
+ * Each kthread must wait for kthread_should_stop() before returning from
+ * its top-level function, otherwise segfaults ensue.  This function
+ * prints a "stopping" message and waits for kthread_should_stop(), and
+ * should be called from all torture kthreads immediately prior to
+ * returning.
+ */
+void torture_kthread_stopping(char *title)
+{
+        if (verbose)
+                VERBOSE_TOROUT_STRING(title);
+        while (!kthread_should_stop()) {
+                torture_shutdown_absorb(title);
+                schedule_timeout_uninterruptible(1);
+        }
+}
+EXPORT_SYMBOL_GPL(torture_kthread_stopping);
+/*
+ * Create a generic torture kthread that is immediately runnable.  If you
+ * need the kthread to be stopped so that you can do something to it before
+ * it starts, you will need to open-code your own.
+ */
+int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
+                            char *f, struct task_struct **tp)
+{
+        int ret = 0;
+        VERBOSE_TOROUT_STRING(m);
+        *tp = kthread_run(fn, arg, s);
+        if (IS_ERR(*tp)) {
+                ret = PTR_ERR(*tp);
+                VERBOSE_TOROUT_ERRSTRING(f);
+                *tp = NULL;
+        }
+        torture_shuffle_task_register(*tp);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(_torture_create_kthread);
+/*
+ * Stop a generic kthread, emitting a message.
+ */
+void _torture_stop_kthread(char *m, struct task_struct **tp)
+{
+        if (*tp == NULL)
+                return;
+        VERBOSE_TOROUT_STRING(m);
+        kthread_stop(*tp);
+        *tp = NULL;
+}
+EXPORT_SYMBOL_GPL(_torture_stop_kthread);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 294b8a271a04..fc4da2d97f9b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2397,6 +2397,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        write &= RB_WRITE_MASK;
        tail = write - length;
+        /*
+         * If this is the first commit on the page, then it has the same
+         * timestamp as the page itself.
+         */
+        if (!tail)
+                delta = 0;
        /* See if we shot pass the end of this buffer page */
        if (unlikely(write > BUF_PAGE_SIZE))
                return rb_move_tail(cpu_buffer, length, tail,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 815c878f409b..24c1f2382557 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1600,15 +1600,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
+static struct ring_buffer *temp_buffer;
 struct ring_buffer_event *
 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
                          struct ftrace_event_file *ftrace_file,
                          int type, unsigned long len,
                          unsigned long flags, int pc)
 {
+        struct ring_buffer_event *entry;
        *current_rb = ftrace_file->tr->trace_buffer.buffer;
-        return trace_buffer_lock_reserve(*current_rb,
+        entry = trace_buffer_lock_reserve(*current_rb,
                                         type, len, flags, pc);
+        /*
+         * If tracing is off, but we have triggers enabled
+         * we still need to look at the event data. Use the temp_buffer
+         * to store the trace event for the tigger to use. It's recusive
+         * safe and will not be recorded anywhere.
+         */
+        if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
+                *current_rb = temp_buffer;
+                entry = trace_buffer_lock_reserve(*current_rb,
+                                                  type, len, flags, pc);
+        }
+        return entry;
 }
 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
@@ -6494,11 +6510,16 @@ __init static int tracer_alloc_buffers(void)
        raw_spin_lock_init(&global_trace.start_lock);
+        /* Used for event triggers */
+        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
+        if (!temp_buffer)
+                goto out_free_cpumask;
        /* TODO: make the number of buffers hot pluggable with CPUS */
        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
                WARN_ON(1);
-                goto out_free_cpumask;
+                goto out_free_temp_buffer;
        }
        if (global_trace.buffer_disabled)
@@ -6540,6 +6561,8 @@ __init static int tracer_alloc_buffers(void)
        return 0;
+out_free_temp_buffer:
+        ring_buffer_free(temp_buffer);
 out_free_cpumask:
        free_percpu(global_trace.trace_buffer.data);
 #ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e71ffd4eccb5..7b16d40bd64d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,12 +27,6 @@
 DEFINE_MUTEX(event_mutex);
-DEFINE_MUTEX(event_storage_mutex);
-EXPORT_SYMBOL_GPL(event_storage_mutex);
-char event_storage[EVENT_STORAGE_SIZE];
-EXPORT_SYMBOL_GPL(event_storage);
 LIST_HEAD(ftrace_events);
 static LIST_HEAD(ftrace_common_fields);
@@ -1777,6 +1771,16 @@ static void trace_module_add_events(struct module *mod)
 {
        struct ftrace_event_call **call, **start, **end;
+        if (!mod->num_trace_events)
+                return;
+        /* Don't add infrastructure for mods without tracepoints */
+        if (trace_module_has_bad_taint(mod)) {
+                pr_err("%s: module has bad taint, not creating trace events\n",
+                       mod->name);
+                return;
+        }
        start = mod->trace_events;
        end = mod->trace_events + mod->num_trace_events;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 7c3e3e72e2b6..ee0a5098ac43 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void)		\
 #undef __array
 #define __array(type, item, len)                                        \
        do {                                                            \
+                char *type_str = #type"["__stringify(len)"]";           \
                BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
-                mutex_lock(&event_storage_mutex);                       \
+                ret = trace_define_field(event_call, type_str, #item,   \
-                snprintf(event_storage, sizeof(event_storage),          \
-                         "%s[%d]", #type, len);                         \
-                ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), filter_type);    \
-                mutex_unlock(&event_storage_mutex);                     \
                if (ret)                                                \
                        return ret;                                     \
        } while (0);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 29f26540e9c9..031cc5655a51 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
 EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 #ifdef CONFIG_MODULES
+bool trace_module_has_bad_taint(struct module *mod)
+{
+        return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP));
+}
 static int tracepoint_module_coming(struct module *mod)
 {
        struct tp_module *tp_mod, *iter;
@@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod)
         * module headers (for forced load), to make sure we don't cause a crash.
         * Staging and out-of-tree GPL modules are fine.
         */
-        if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)))
+        if (trace_module_has_bad_taint(mod))
                return 0;
        mutex_lock(&tracepoints_mutex);
        tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 240fb62cf394..dd06439b9c84 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -225,7 +225,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)
 *
 *      When there is no mapping defined for the user-namespace uid
 *      pair INVALID_UID is returned.  Callers are expected to test
- *      for and handle handle INVALID_UID being returned.  INVALID_UID
+ *      for and handle INVALID_UID being returned.  INVALID_UID
 *      may be tested for using uid_valid().
 */
 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 82ef9f3b7473..193e977a10ea 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1851,6 +1851,12 @@ static void destroy_worker(struct worker *worker)
        if (worker->flags & WORKER_IDLE)
                pool->nr_idle--;
+        /*
+         * Once WORKER_DIE is set, the kworker may destroy itself at any
+         * point.  Pin to ensure the task stays until we're done with it.
+         */
+        get_task_struct(worker->task);
        list_del_init(&worker->entry);
        worker->flags |= WORKER_DIE;
@@ -1859,6 +1865,7 @@ static void destroy_worker(struct worker *worker)
        spin_unlock_irq(&pool->lock);
        kthread_stop(worker->task);
+        put_task_struct(worker->task);
        kfree(worker);
        spin_lock_irq(&pool->lock);