Merge branch 'locking/urgent' into locking/core, before applying larger changes and to refresh the branch with fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2014-07-17 05:45:29 -0400
committer: Ingo Molnar <mingo@kernel.org> 2014-07-17 05:45:29 -0400
commit: b5e4111f027c4be85dbe97e090530d03c55c4cf4 (patch)
tree: 11e0a37cb59314f4e9a7b2810124a4a7a33140e5 /kernel
parent: 72d5305dcb3637913c2c37e847a4de9028e49244 (diff)
parent: 9de8033f1bbcce5ed23fe5da9ca1a5060207f7ed (diff)
26 files changed, 397 insertions, 147 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 35536d9c0964..76768ee812b2 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
 endif
+config ARCH_SUPPORTS_ATOMIC_RMW
+        bool
 config MUTEX_SPIN_ON_OWNER
        def_bool y
-        depends on SMP && !DEBUG_MUTEXES
+        depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
+config RWSEM_SPIN_ON_OWNER
+       def_bool y
+       depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
 config ARCH_USE_QUEUE_RWLOCK
        bool
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..70776aec2562 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                         int flags, const char *unused_dev_name,
                         void *data)
 {
+        struct super_block *pinned_sb = NULL;
+        struct cgroup_subsys *ss;
        struct cgroup_root *root;
        struct cgroup_sb_opts opts;
        struct dentry *dentry;
        int ret;
+        int i;
        bool new_sb;
        /*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                goto out_unlock;
        }
+        /*
+         * Destruction of cgroup root is asynchronous, so subsystems may
+         * still be dying after the previous unmount.  Let's drain the
+         * dying subsystems.  We just need to ensure that the ones
+         * unmounted previously finish dying and don't care about new ones
+         * starting.  Testing ref liveliness is good enough.
+         */
+        for_each_subsys(ss, i) {
+                if (!(opts.subsys_mask & (1 << i)) ||
+                    ss->root == &cgrp_dfl_root)
+                        continue;
+                if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+                        mutex_unlock(&cgroup_mutex);
+                        msleep(10);
+                        ret = restart_syscall();
+                        goto out_free;
+                }
+                cgroup_put(&ss->root->cgrp);
+        }
        for_each_root(root) {
                bool name_match = false;
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                }
                /*
-                 * A root's lifetime is governed by its root cgroup.
+                 * We want to reuse @root whose lifetime is governed by its
-                 * tryget_live failure indicate that the root is being
+                 * ->cgrp.  Let's check whether @root is alive and keep it
-                 * destroyed.  Wait for destruction to complete so that the
+                 * that way.  As cgroup_kill_sb() can happen anytime, we
-                 * subsystems are free.  We can use wait_queue for the wait
+                 * want to block it by pinning the sb so that @root doesn't
-                 * but this path is super cold.  Let's just sleep for a bit
+                 * get killed before mount is complete.
-                 * and retry.
+                 *
+                 * With the sb pinned, tryget_live can reliably indicate
+                 * whether @root can be reused.  If it's being killed,
+                 * drain it.  We can use wait_queue for the wait but this
+                 * path is super cold.  Let's just sleep a bit and retry.
                 */
-                if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+                pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+                if (IS_ERR(pinned_sb) ||
+                    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
                        mutex_unlock(&cgroup_mutex);
+                        if (!IS_ERR_OR_NULL(pinned_sb))
+                                deactivate_super(pinned_sb);
                        msleep(10);
                        ret = restart_syscall();
                        goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
                                CGROUP_SUPER_MAGIC, &new_sb);
        if (IS_ERR(dentry) || !new_sb)
                cgroup_put(&root->cgrp);
+        /*
+         * If @pinned_sb, we're reusing an existing root and holding an
+         * extra ref on its sb.  Mount is complete.  Put the extra ref.
+         */
+        if (pinned_sb) {
+                WARN_ON(new_sb);
+                deactivate_super(pinned_sb);
+        }
        return dentry;
 }
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
        rcu_read_lock();
        css_for_each_child(child, css) {
-                if (css->flags & CSS_ONLINE) {
+                if (child->flags & CSS_ONLINE) {
                        ret = true;
                        break;
                }
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 019d45008448..5664985c46a0 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/hardirq.h>
 #include <linux/export.h>
+#include <linux/kprobes.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/context_tracking.h>
@@ -104,6 +105,7 @@ void context_tracking_user_enter(void)
        }
        local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(context_tracking_user_enter);
 #ifdef CONFIG_PREEMPT
 /**
@@ -181,6 +183,7 @@ void context_tracking_user_exit(void)
        }
        local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(context_tracking_user_exit);
 /**
 * __context_tracking_task_switch - context switch the syscall callbacks
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..116a4164720a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1181,7 +1181,13 @@ done:
 int current_cpuset_is_being_rebound(void)
 {
-        return task_cs(current) == cpuset_being_rebound;
+        int ret;
+        rcu_read_lock();
+        ret = task_cs(current) == cpuset_being_rebound;
+        rcu_read_unlock();
+        return ret;
 }
 static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
         * resources, wait for the previously scheduled operations before
         * proceeding, so that we don't end up keep removing tasks added
         * after execution capability is restored.
+         *
+         * cpuset_hotplug_work calls back into cgroup core via
+         * cgroup_transfer_tasks() and waiting for it from a cgroupfs
+         * operation like this one can lead to a deadlock through kernfs
+         * active_ref protection.  Let's break the protection.  Losing the
+         * protection is okay as we check whether @cs is online after
+         * grabbing cpuset_mutex anyway.  This only happens on the legacy
+         * hierarchies.
         */
+        css_get(&cs->css);
+        kernfs_break_active_protection(of->kn);
        flush_work(&cpuset_hotplug_work);
        mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
        free_trial_cpuset(trialcs);
 out_unlock:
        mutex_unlock(&cpuset_mutex);
+        kernfs_unbreak_active_protection(of->kn);
+        css_put(&cs->css);
        return retval ?: nbytes;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5fa58e4cffac..a33d9a2bcbd7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -40,6 +40,7 @@
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
 #include <linux/module.h>
+#include <linux/mman.h>
 #include "internal.h"
@@ -5128,6 +5129,7 @@ struct perf_mmap_event {
        int                     maj, min;
        u64                     ino;
        u64                     ino_generation;
+        u32                     prot, flags;
        struct {
                struct perf_event_header        header;
@@ -5169,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
                mmap_event->event_id.header.size += sizeof(mmap_event->min);
                mmap_event->event_id.header.size += sizeof(mmap_event->ino);
                mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
+                mmap_event->event_id.header.size += sizeof(mmap_event->prot);
+                mmap_event->event_id.header.size += sizeof(mmap_event->flags);
        }
        perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5187,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
                perf_output_put(&handle, mmap_event->min);
                perf_output_put(&handle, mmap_event->ino);
                perf_output_put(&handle, mmap_event->ino_generation);
+                perf_output_put(&handle, mmap_event->prot);
+                perf_output_put(&handle, mmap_event->flags);
        }
        __output_copy(&handle, mmap_event->file_name,
@@ -5205,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
        struct file *file = vma->vm_file;
        int maj = 0, min = 0;
        u64 ino = 0, gen = 0;
+        u32 prot = 0, flags = 0;
        unsigned int size;
        char tmp[16];
        char *buf = NULL;
@@ -5235,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                gen = inode->i_generation;
                maj = MAJOR(dev);
                min = MINOR(dev);
+                if (vma->vm_flags & VM_READ)
+                        prot |= PROT_READ;
+                if (vma->vm_flags & VM_WRITE)
+                        prot |= PROT_WRITE;
+                if (vma->vm_flags & VM_EXEC)
+                        prot |= PROT_EXEC;
+                if (vma->vm_flags & VM_MAYSHARE)
+                        flags = MAP_SHARED;
+                else
+                        flags = MAP_PRIVATE;
+                if (vma->vm_flags & VM_DENYWRITE)
+                        flags |= MAP_DENYWRITE;
+                if (vma->vm_flags & VM_MAYEXEC)
+                        flags |= MAP_EXECUTABLE;
+                if (vma->vm_flags & VM_LOCKED)
+                        flags |= MAP_LOCKED;
+                if (vma->vm_flags & VM_HUGETLB)
+                        flags |= MAP_HUGETLB;
                goto got_name;
        } else {
                name = (char *)arch_vma_name(vma);
@@ -5275,6 +5304,8 @@ got_name:
        mmap_event->min = min;
        mmap_event->ino = ino;
        mmap_event->ino_generation = gen;
+        mmap_event->prot = prot;
+        mmap_event->flags = flags;
        if (!(vma->vm_flags & VM_EXEC))
                mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5315,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
                /* .min (attr_mmap2 only) */
                /* .ino (attr_mmap2 only) */
                /* .ino_generation (attr_mmap2 only) */
+                /* .prot (attr_mmap2 only) */
+                /* .flags (attr_mmap2 only) */
        };
        perf_event_mmap_event(&mmap_event);
@@ -6897,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (ret)
                return -EFAULT;
-        /* disabled for now */
-        if (attr->mmap2)
-                return -EINVAL;
        if (attr->__reserved_1)
                return -EINVAL;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c445e392e93f..6f3254e8c137 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
 {
        int err;
-        if (!consumer_del(uprobe, uc))  /* WARN? */
+        if (WARN_ON(!consumer_del(uprobe, uc)))
                return;
        err = register_for_each_vma(uprobe, NULL);
@@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
        int ret = -ENOENT;
        uprobe = find_uprobe(inode, offset);
-        if (!uprobe)
+        if (WARN_ON(!uprobe))
                return ret;
        down_write(&uprobe->register_rwsem);
@@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
        struct uprobe *uprobe;
        uprobe = find_uprobe(inode, offset);
-        if (!uprobe)
+        if (WARN_ON(!uprobe))
                return;
        down_write(&uprobe->register_rwsem);
diff --git a/kernel/fork.c b/kernel/fork.c
index d2799d1fc952..6a13c46cd87d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        total_forks++;
        spin_unlock(&current->sighand->siglock);
+        syscall_tracepoint_update(p);
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
        cgroup_post_fork(p);
        if (clone_flags & CLONE_THREAD)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7339e42a85ab..1487a123db5c 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
 */
 void irq_free_hwirqs(unsigned int from, int cnt)
 {
-        int i;
+        int i, j;
-        for (i = from; cnt > 0; i++, cnt--) {
+        for (i = from, j = cnt; j > 0; i++, j--) {
                irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
                arch_teardown_hwirq(i);
        }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6748688813d0..369f41a94124 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 #ifdef CONFIG_MEMORY_FAILURE
        VMCOREINFO_NUMBER(PG_hwpoison);
 #endif
+        VMCOREINFO_NUMBER(PG_head_mask);
        VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
        arch_crash_save_vmcoreinfo();
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 838dc9e00669..be9ee1559fca 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -14,21 +14,47 @@
 * called from interrupt context and we have preemption disabled while
 * spinning.
 */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
+/*
+ * We use the value 0 to represent "no CPU", thus the encoded value
+ * will be the CPU number incremented by 1.
+ */
+static inline int encode_cpu(int cpu_nr)
+{
+        return cpu_nr + 1;
+}
+static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
+{
+        int cpu_nr = encoded_cpu_val - 1;
+        return per_cpu_ptr(&osq_node, cpu_nr);
+}
 /*
 * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
 * Can return NULL in case we were the last queued and we updated @lock instead.
 */
-static inline struct optimistic_spin_queue *
+static inline struct optimistic_spin_node *
-osq_wait_next(struct optimistic_spin_queue **lock,
+osq_wait_next(struct optimistic_spin_queue *lock,
-              struct optimistic_spin_queue *node,
+              struct optimistic_spin_node *node,
-              struct optimistic_spin_queue *prev)
+              struct optimistic_spin_node *prev)
 {
-        struct optimistic_spin_queue *next = NULL;
+        struct optimistic_spin_node *next = NULL;
+        int curr = encode_cpu(smp_processor_id());
+        int old;
+        /*
+         * If there is a prev node in queue, then the 'old' value will be
+         * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
+         * we're currently last in queue, then the queue will then become empty.
+         */
+        old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
        for (;;) {
-                if (*lock == node && cmpxchg(lock, node, prev) == node) {
+                if (atomic_read(&lock->tail) == curr &&
+                    atomic_cmpxchg(&lock->tail, curr, old) == curr) {
                        /*
                         * We were the last queued, we moved @lock back. @prev
                         * will now observe @lock and will complete its
@@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock,
        return next;
 }
-bool osq_lock(struct optimistic_spin_queue **lock)
+bool osq_lock(struct optimistic_spin_queue *lock)
 {
-        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
+        struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
-        struct optimistic_spin_queue *prev, *next;
+        struct optimistic_spin_node *prev, *next;
+        int curr = encode_cpu(smp_processor_id());
+        int old;
        node->locked = 0;
        node->next = NULL;
+        node->cpu = curr;
-        node->prev = prev = xchg(lock, node);
+        old = atomic_xchg(&lock->tail, curr);
-        if (likely(prev == NULL))
+        if (old == OSQ_UNLOCKED_VAL)
                return true;
+        prev = decode_cpu(old);
+        node->prev = prev;
        ACCESS_ONCE(prev->next) = node;
        /*
@@ -149,20 +180,21 @@ unqueue:
        return false;
 }
-void osq_unlock(struct optimistic_spin_queue **lock)
+void osq_unlock(struct optimistic_spin_queue *lock)
 {
-        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
+        struct optimistic_spin_node *node, *next;
-        struct optimistic_spin_queue *next;
+        int curr = encode_cpu(smp_processor_id());
        /*
         * Fast path for the uncontended case.
         */
-        if (likely(cmpxchg(lock, node, NULL) == node))
+        if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
                return;
        /*
         * Second most likely case.
         */
+        node = this_cpu_ptr(&osq_node);
        next = xchg(&node->next, NULL);
        if (next) {
                ACCESS_ONCE(next->locked) = 1;
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index a2dbac4aca6b..74356dc0ce29 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 * mutex_lock()/rwsem_down_{read,write}() etc.
 */
-struct optimistic_spin_queue {
+struct optimistic_spin_node {
-        struct optimistic_spin_queue *next, *prev;
+        struct optimistic_spin_node *next, *prev;
        int locked; /* 1 if lock acquired */
+        int cpu; /* encoded CPU # value */
 };
-extern bool osq_lock(struct optimistic_spin_queue **lock);
+extern bool osq_lock(struct optimistic_spin_queue *lock);
-extern void osq_unlock(struct optimistic_spin_queue **lock);
+extern void osq_unlock(struct optimistic_spin_queue *lock);
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 11b103d87b27..d3100521388c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -54,7 +54,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
        INIT_LIST_HEAD(&lock->wait_list);
        mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-        lock->osq = NULL;
+        osq_lock_init(&lock->osq);
 #endif
        debug_mutex_init(lock, name, key);
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 9be8a9144978..2c93571162cb 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem)
        unsigned long flags;
        if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
-                ret = (sem->activity != 0);
+                ret = (sem->count != 0);
                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
        }
        return ret;
@@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
        debug_check_no_locks_freed((void *)sem, sizeof(*sem));
        lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-        sem->activity = 0;
+        sem->count = 0;
        raw_spin_lock_init(&sem->wait_lock);
        INIT_LIST_HEAD(&sem->wait_list);
 }
@@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
                waiter = list_entry(next, struct rwsem_waiter, list);
        } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-        sem->activity += woken;
+        sem->count += woken;
 out:
        return sem;
@@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+        if (sem->count >= 0 && list_empty(&sem->wait_list)) {
                /* granted */
-                sem->activity++;
+                sem->count++;
                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
                goto out;
        }
@@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+        if (sem->count >= 0 && list_empty(&sem->wait_list)) {
                /* granted */
-                sem->activity++;
+                sem->count++;
                ret = 1;
        }
@@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
                 * itself into sleep and waiting for system woke it or someone
                 * else in the head of the wait list up.
                 */
-                if (sem->activity == 0)
+                if (sem->count == 0)
                        break;
                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
                raw_spin_lock_irqsave(&sem->wait_lock, flags);
        }
        /* got the lock */
-        sem->activity = -1;
+        sem->count = -1;
        list_del(&waiter.list);
        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity == 0) {
+        if (sem->count == 0) {
                /* got the lock */
-                sem->activity = -1;
+                sem->count = -1;
                ret = 1;
        }
@@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+        if (--sem->count == 0 && !list_empty(&sem->wait_list))
                sem = __rwsem_wake_one_writer(sem);
        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        sem->activity = 0;
+        sem->count = 0;
        if (!list_empty(&sem->wait_list))
                sem = __rwsem_do_wake(sem, 1);
@@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem)
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        sem->activity = 1;
+        sem->count = 1;
        if (!list_empty(&sem->wait_list))
                sem = __rwsem_do_wake(sem, 0);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index dacc32142fcc..a2391ac135c8 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
        sem->count = RWSEM_UNLOCKED_VALUE;
        raw_spin_lock_init(&sem->wait_lock);
        INIT_LIST_HEAD(&sem->wait_list);
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        sem->owner = NULL;
-        sem->osq = NULL;
+        osq_lock_init(&sem->osq);
 #endif
 }
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
        return false;
 }
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 /*
 * Try to acquire write lock before the writer has been put on wait queue.
 */
@@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
        struct task_struct *owner;
-        bool on_cpu = true;
+        bool on_cpu = false;
        if (need_resched())
-                return 0;
+                return false;
        rcu_read_lock();
        owner = ACCESS_ONCE(sem->owner);
@@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
        rcu_read_unlock();
        /*
-         * If sem->owner is not set, the rwsem owner may have
+         * If sem->owner is not set, yet we have just recently entered the
-         * just acquired it and not set the owner yet or the rwsem
+         * slowpath, then there is a possibility reader(s) may have the lock.
-         * has been released.
+         * To be safe, avoid spinning in these situations.
         */
        return on_cpu;
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 42f806de49d4..e2d3bc7f03b4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,7 +12,7 @@
 #include <linux/atomic.h>
-#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
        sem->owner = current;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 49e0a20fd010..fcc2611d3f14 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -35,6 +35,7 @@
 static int nocompress;
 static int noresume;
+static int nohibernate;
 static int resume_wait;
 static unsigned int resume_delay;
 static char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -62,6 +63,11 @@ bool freezer_test_done;
 static const struct platform_hibernation_ops *hibernation_ops;
+bool hibernation_available(void)
+{
+        return (nohibernate == 0);
+}
 /**
 * hibernation_set_ops - Set the global hibernate operations.
 * @ops: Hibernation operations to use in subsequent hibernation transitions.
@@ -642,6 +648,11 @@ int hibernate(void)
 {
        int error;
+        if (!hibernation_available()) {
+                pr_debug("PM: Hibernation not available.\n");
+                return -EPERM;
+        }
        lock_system_sleep();
        /* The snapshot device should not be opened while we're running */
        if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -734,7 +745,7 @@ static int software_resume(void)
        /*
         * If the user said "noresume".. bail out early.
         */
-        if (noresume)
+        if (noresume || !hibernation_available())
                return 0;
        /*
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
        int i;
        char *start = buf;
+        if (!hibernation_available())
+                return sprintf(buf, "[disabled]\n");
        for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
                if (!hibernation_modes[i])
                        continue;
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
        char *p;
        int mode = HIBERNATION_INVALID;
+        if (!hibernation_available())
+                return -EPERM;
        p = memchr(buf, '\n', n);
        len = p ? p - buf : n;
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
                noresume = 1;
        else if (!strncmp(str, "nocompress", 10))
                nocompress = 1;
+        else if (!strncmp(str, "no", 2)) {
+                noresume = 1;
+                nohibernate = 1;
+        }
        return 1;
 }
@@ -1125,9 +1146,23 @@ static int __init resumedelay_setup(char *str)
        return 1;
 }
+static int __init nohibernate_setup(char *str)
+{
+        noresume = 1;
+        nohibernate = 1;
+        return 1;
+}
+static int __init kaslr_nohibernate_setup(char *str)
+{
+        return nohibernate_setup(str);
+}
 __setup("noresume", noresume_setup);
 __setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
 __setup("hibernate=", hibernate_setup);
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
+__setup("nohibernate", nohibernate_setup);
+__setup("kaslr", kaslr_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 573410d6647e..8e90f330f139 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
                        s += sprintf(s,"%s ", pm_states[i].label);
 #endif
-#ifdef CONFIG_HIBERNATION
+        if (hibernation_available())
-        s += sprintf(s, "%s\n", "disk");
+                s += sprintf(s, "disk ");
-#else
        if (s != buf)
                /* convert the last space to a newline */
                *(s-1) = '\n';
-#endif
        return (s - buf);
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98d357584cd6..526e8911460a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
        struct snapshot_data *data;
        int error;
+        if (!hibernation_available())
+                return -EPERM;
        lock_system_sleep();
        if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ea2d5f6962ed..13e839dbca07 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
 /*
 * Can we actually use the console at this time on this cpu?
 *
- * Console drivers may assume that per-cpu resources have been allocated. So
+ * Console drivers may assume that per-cpu resources have
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * been allocated. So unless they're explicitly marked as
- * call them until this CPU is officially up.
+ * being able to cope (CON_ANYTIME) don't call them until
+ * this CPU is officially up.
 */
 static inline int can_use_console(unsigned int cpu)
 {
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
 * console_lock held, and 'console_locked' set) if it
 * is successful, false otherwise.
 */
-static int console_trylock_for_printk(void)
+static int console_trylock_for_printk(unsigned int cpu)
 {
-        unsigned int cpu = smp_processor_id();
        if (!console_trylock())
                return 0;
        /*
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
                 */
                if (!oops_in_progress && !lockdep_recursing(current)) {
                        recursion_bug = 1;
-                        local_irq_restore(flags);
+                        goto out_restore_irqs;
-                        return 0;
                }
                zap_locks();
        }
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
        logbuf_cpu = UINT_MAX;
        raw_spin_unlock(&logbuf_lock);
-        lockdep_on();
-        local_irq_restore(flags);
        /* If called from the scheduler, we can not call up(). */
-        if (in_sched)
+        if (!in_sched) {
-                return printed_len;
+                /*
+                 * Try to acquire and then immediately release the console
-        /*
+                 * semaphore.  The release will print out buffers and wake up
-         * Disable preemption to avoid being preempted while holding
+                 * /dev/kmsg and syslog() users.
-         * console_sem which would prevent anyone from printing to console
+                 */
-         */
+                if (console_trylock_for_printk(this_cpu))
-        preempt_disable();
+                        console_unlock();
-        /*
+        }
-         * Try to acquire and then immediately release the console semaphore.
-         * The release will print out buffers and wake up /dev/kmsg and syslog()
-         * users.
-         */
-        if (console_trylock_for_printk())
-                console_unlock();
-        preempt_enable();
+        lockdep_on();
+out_restore_irqs:
+        local_irq_restore(flags);
        return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
diff --git a/kernel/smp.c b/kernel/smp.c
index 306f8180b0d5..80c33f8de14f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
+static void flush_smp_call_function_queue(bool warn_cpu_offline);
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
@@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
+                /* Fall-through to the CPU_DEAD[_FROZEN] case. */
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                free_cpumask_var(cfd->cpumask);
                free_percpu(cfd->csd);
                break;
+        case CPU_DYING:
+        case CPU_DYING_FROZEN:
+                /*
+                 * The IPIs for the smp-call-function callbacks queued by other
+                 * CPUs might arrive late, either due to hardware latencies or
+                 * because this CPU disabled interrupts (inside stop-machine)
+                 * before the IPIs were sent. So flush out any pending callbacks
+                 * explicitly (without waiting for the IPIs to arrive), to
+                 * ensure that the outgoing CPU doesn't go offline with work
+                 * still pending.
+                 */
+                flush_smp_call_function_queue(false);
+                break;
 #endif
        };
@@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
        return 0;
 }
-/*
+/**
- * Invoked by arch to handle an IPI for call function single. Must be
+ * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
- * called from the arch with interrupts disabled.
+ *
+ * Invoked by arch to handle an IPI for call function single.
+ * Must be called with interrupts disabled.
 */
 void generic_smp_call_function_single_interrupt(void)
 {
+        flush_smp_call_function_queue(true);
+}
+/**
+ * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
+ *
+ * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
+ *                    offline CPU. Skip this check if set to 'false'.
+ *
+ * Flush any pending smp-call-function callbacks queued on this CPU. This is
+ * invoked by the generic IPI handler, as well as by a CPU about to go offline,
+ * to ensure that all pending IPI callbacks are run before it goes completely
+ * offline.
+ *
+ * Loop through the call_single_queue and run all the queued callbacks.
+ * Must be called with interrupts disabled.
+ */
+static void flush_smp_call_function_queue(bool warn_cpu_offline)
+{
+        struct llist_head *head;
        struct llist_node *entry;
        struct call_single_data *csd, *csd_next;
        static bool warned;
-        entry = llist_del_all(&__get_cpu_var(call_single_queue));
+        WARN_ON(!irqs_disabled());
+        head = &__get_cpu_var(call_single_queue);
+        entry = llist_del_all(head);
        entry = llist_reverse_order(entry);
-        /*
+        /* There shouldn't be any pending callbacks on an offline CPU. */
-         * Shouldn't receive this interrupt on a cpu that is not yet online.
+        if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
-         */
+                     !warned && !llist_empty(head))) {
-        if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
                warned = true;
                WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba9ed453c4ed..75b22e22a72c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
-static int min_percpu_pagelist_fract = 8;
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
@@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #ifdef CONFIG_SPARC
 #endif
-#ifdef CONFIG_SPARC64
-extern int sysctl_tsb_ratio;
-#endif
 #ifdef __hppa__
 extern int pwrsw_enabled;
 #endif
@@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
+#ifdef CONFIG_SMP
+        {
+                .procname       = "softlockup_all_cpu_backtrace",
+                .data           = &sysctl_softlockup_all_cpu_backtrace,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &one,
+        },
+#endif /* CONFIG_SMP */
        {
                .procname       = "nmi_watchdog",
                .data           = &watchdog_user_enabled,
@@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(percpu_pagelist_fraction),
                .mode           = 0644,
                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
-                .extra1         = &min_percpu_pagelist_fract,
+                .extra1         = &zero,
        },
 #ifdef CONFIG_MMU
        {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 384ede311717..f243444a3772 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1396,7 +1396,6 @@ void tracing_start(void)
        arch_spin_unlock(&global_trace.max_lock);
-        ftrace_start();
 out:
        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
 }
@@ -1443,7 +1442,6 @@ void tracing_stop(void)
        struct ring_buffer *buffer;
        unsigned long flags;
-        ftrace_stop();
        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
        if (global_trace.stop_count++)
                goto out;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 04fdb5de823c..3c9b97e6b1f4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
        int ret;
        if (file) {
+                if (tu->tp.flags & TP_FLAG_PROFILE)
+                        return -EINTR;
                link = kmalloc(sizeof(*link), GFP_KERNEL);
                if (!link)
                        return -ENOMEM;
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
                list_add_tail_rcu(&link->list, &tu->tp.files);
                tu->tp.flags |= TP_FLAG_TRACE;
-        } else
+        } else {
-                tu->tp.flags |= TP_FLAG_PROFILE;
+                if (tu->tp.flags & TP_FLAG_TRACE)
+                        return -EINTR;
-        ret = uprobe_buffer_enable();
+                tu->tp.flags |= TP_FLAG_PROFILE;
-        if (ret < 0)
+        }
-                return ret;
        WARN_ON(!uprobe_filter_is_empty(&tu->filter));
        if (enabled)
                return 0;
+        ret = uprobe_buffer_enable();
+        if (ret)
+                goto err_flags;
        tu->consumer.filter = filter;
        ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
-        if (ret) {
+        if (ret)
-                if (file) {
+                goto err_buffer;
-                        list_del(&link->list);
-                        kfree(link);
-                        tu->tp.flags &= ~TP_FLAG_TRACE;
-                } else
-                        tu->tp.flags &= ~TP_FLAG_PROFILE;
-        }
+        return 0;
+ err_buffer:
+        uprobe_buffer_disable();
+ err_flags:
+        if (file) {
+                list_del(&link->list);
+                kfree(link);
+                tu->tp.flags &= ~TP_FLAG_TRACE;
+        } else {
+                tu->tp.flags &= ~TP_FLAG_PROFILE;
+        }
        return ret;
 }
@@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
        current->utask->vaddr = (unsigned long) &udd;
-#ifdef CONFIG_PERF_EVENTS
-        if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
-            !uprobe_perf_filter(&tu->consumer, 0, current->mm))
-                return UPROBE_HANDLER_REMOVE;
-#endif
        if (WARN_ON_ONCE(!uprobe_cpu_buffer))
                return 0;
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 33cbd8c203f8..3490407dc7b7 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
 void syscall_regfunc(void)
 {
-        unsigned long flags;
+        struct task_struct *p, *t;
-        struct task_struct *g, *t;
        if (!sys_tracepoint_refcount) {
-                read_lock_irqsave(&tasklist_lock, flags);
+                read_lock(&tasklist_lock);
-                do_each_thread(g, t) {
+                for_each_process_thread(p, t) {
-                        /* Skip kernel threads. */
+                        set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
-                        if (t->mm)
+                }
-                                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+                read_unlock(&tasklist_lock);
-                } while_each_thread(g, t);
-                read_unlock_irqrestore(&tasklist_lock, flags);
        }
        sys_tracepoint_refcount++;
 }
 void syscall_unregfunc(void)
 {
-        unsigned long flags;
+        struct task_struct *p, *t;
-        struct task_struct *g, *t;
        sys_tracepoint_refcount--;
        if (!sys_tracepoint_refcount) {
-                read_lock_irqsave(&tasklist_lock, flags);
+                read_lock(&tasklist_lock);
-                do_each_thread(g, t) {
+                for_each_process_thread(p, t) {
                        clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
-                } while_each_thread(g, t);
+                }
-                read_unlock_irqrestore(&tasklist_lock, flags);
+                read_unlock(&tasklist_lock);
        }
 }
 #endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 516203e665fc..c3319bd1b040 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,6 +31,12 @@
 int watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#endif
 static int __read_mostly watchdog_running;
 static u64 __read_mostly sample_period;
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
+static unsigned long soft_lockup_nmi_warn;
 /* boot commands */
 /*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
+#ifdef CONFIG_SMP
+static int __init softlockup_all_cpu_backtrace_setup(char *str)
+{
+        sysctl_softlockup_all_cpu_backtrace =
+                !!simple_strtol(str, NULL, 0);
+        return 1;
+}
+__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#endif
 /*
 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
        struct pt_regs *regs = get_irq_regs();
        int duration;
+        int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
        /* kick the hardlockup detector */
        watchdog_interrupt_count();
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                if (__this_cpu_read(soft_watchdog_warn) == true)
                        return HRTIMER_RESTART;
+                if (softlockup_all_cpu_backtrace) {
+                        /* Prevent multiple soft-lockup reports if one cpu is already
+                         * engaged in dumping cpu back traces
+                         */
+                        if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
+                                /* Someone else will report us. Let's give up */
+                                __this_cpu_write(soft_watchdog_warn, true);
+                                return HRTIMER_RESTART;
+                        }
+                }
                printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                else
                        dump_stack();
+                if (softlockup_all_cpu_backtrace) {
+                        /* Avoid generating two back traces for current
+                         * given that one is already made above
+                         */
+                        trigger_allbutself_cpu_backtrace();
+                        clear_bit(0, &soft_lockup_nmi_warn);
+                        /* Barrier to sync with other cpus */
+                        smp_mb__after_atomic();
+                }
                if (softlockup_panic)
                        panic("softlockup: hung tasks");
                __this_cpu_write(soft_watchdog_warn, true);
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
        int cpu;
        get_online_cpus();
-        preempt_disable();
        for_each_online_cpu(cpu)
                update_timers(cpu);
-        preempt_enable();
        put_online_cpus();
 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6203d2900877..35974ac69600 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
                }
        }
+        dev_set_uevent_suppress(&wq_dev->dev, false);
        kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
        return 0;
 }
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
        BUG_ON(!tbl);
        for_each_node(node)
-                BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+                BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
                                node_online(node) ? node : NUMA_NO_NODE));
        for_each_possible_cpu(cpu) {
author	Ingo Molnar <mingo@kernel.org>	2014-07-17 05:45:29 -0400
committer	Ingo Molnar <mingo@kernel.org>	2014-07-17 05:45:29 -0400
commit	b5e4111f027c4be85dbe97e090530d03c55c4cf4 (patch)
tree	11e0a37cb59314f4e9a7b2810124a4a7a33140e5 /kernel
parent	72d5305dcb3637913c2c37e847a4de9028e49244 (diff)
parent	9de8033f1bbcce5ed23fe5da9ca1a5060207f7ed (diff)