aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-07-16 09:10:07 -0400
committerIngo Molnar <mingo@kernel.org>2014-07-16 09:10:07 -0400
commitd26fad5b38e1c4667d4f2604936e59c837caa54d (patch)
tree04b524a69a0129c181567445bff18847a1b44721 /kernel
parente720fff6341fe4b95e5a93c939bd3c77fa55ced4 (diff)
parent1795cd9b3a91d4b5473c97f491d63892442212ab (diff)
Merge tag 'v3.16-rc5' into sched/core, to refresh the branch before applying bigger tree-wide changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c58
-rw-r--r--kernel/context_tracking.c3
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/events/core.c37
-rw-r--r--kernel/events/uprobes.c6
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/irqdesc.c4
-rw-r--r--kernel/kexec.c1
-rw-r--r--kernel/locking/rtmutex-debug.h5
-rw-r--r--kernel/locking/rtmutex.c243
-rw-r--r--kernel/locking/rtmutex.h5
-rw-r--r--kernel/power/hibernate.c37
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/power/user.c3
-rw-r--r--kernel/printk/printk.c44
-rw-r--r--kernel/smp.c57
-rw-r--r--kernel/sysctl.c18
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_uprobe.c46
-rw-r--r--kernel/tracepoint.c26
-rw-r--r--kernel/watchdog.c41
-rw-r--r--kernel/workqueue.c3
22 files changed, 530 insertions, 137 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..70776aec2562 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1648 int flags, const char *unused_dev_name, 1648 int flags, const char *unused_dev_name,
1649 void *data) 1649 void *data)
1650{ 1650{
1651 struct super_block *pinned_sb = NULL;
1652 struct cgroup_subsys *ss;
1651 struct cgroup_root *root; 1653 struct cgroup_root *root;
1652 struct cgroup_sb_opts opts; 1654 struct cgroup_sb_opts opts;
1653 struct dentry *dentry; 1655 struct dentry *dentry;
1654 int ret; 1656 int ret;
1657 int i;
1655 bool new_sb; 1658 bool new_sb;
1656 1659
1657 /* 1660 /*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1677 goto out_unlock; 1680 goto out_unlock;
1678 } 1681 }
1679 1682
1683 /*
1684 * Destruction of cgroup root is asynchronous, so subsystems may
1685 * still be dying after the previous unmount. Let's drain the
1686 * dying subsystems. We just need to ensure that the ones
1687 * unmounted previously finish dying and don't care about new ones
1688 * starting. Testing ref liveliness is good enough.
1689 */
1690 for_each_subsys(ss, i) {
1691 if (!(opts.subsys_mask & (1 << i)) ||
1692 ss->root == &cgrp_dfl_root)
1693 continue;
1694
1695 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
1696 mutex_unlock(&cgroup_mutex);
1697 msleep(10);
1698 ret = restart_syscall();
1699 goto out_free;
1700 }
1701 cgroup_put(&ss->root->cgrp);
1702 }
1703
1680 for_each_root(root) { 1704 for_each_root(root) {
1681 bool name_match = false; 1705 bool name_match = false;
1682 1706
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1717 } 1741 }
1718 1742
1719 /* 1743 /*
1720 * A root's lifetime is governed by its root cgroup. 1744 * We want to reuse @root whose lifetime is governed by its
1721 * tryget_live failure indicate that the root is being 1745 * ->cgrp. Let's check whether @root is alive and keep it
1722 * destroyed. Wait for destruction to complete so that the 1746 * that way. As cgroup_kill_sb() can happen anytime, we
1723 * subsystems are free. We can use wait_queue for the wait 1747 * want to block it by pinning the sb so that @root doesn't
1724 * but this path is super cold. Let's just sleep for a bit 1748 * get killed before mount is complete.
1725 * and retry. 1749 *
1750 * With the sb pinned, tryget_live can reliably indicate
1751 * whether @root can be reused. If it's being killed,
1752 * drain it. We can use wait_queue for the wait but this
1753 * path is super cold. Let's just sleep a bit and retry.
1726 */ 1754 */
1727 if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { 1755 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1756 if (IS_ERR(pinned_sb) ||
1757 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1728 mutex_unlock(&cgroup_mutex); 1758 mutex_unlock(&cgroup_mutex);
1759 if (!IS_ERR_OR_NULL(pinned_sb))
1760 deactivate_super(pinned_sb);
1729 msleep(10); 1761 msleep(10);
1730 ret = restart_syscall(); 1762 ret = restart_syscall();
1731 goto out_free; 1763 goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
1770 CGROUP_SUPER_MAGIC, &new_sb); 1802 CGROUP_SUPER_MAGIC, &new_sb);
1771 if (IS_ERR(dentry) || !new_sb) 1803 if (IS_ERR(dentry) || !new_sb)
1772 cgroup_put(&root->cgrp); 1804 cgroup_put(&root->cgrp);
1805
1806 /*
1807 * If @pinned_sb, we're reusing an existing root and holding an
1808 * extra ref on its sb. Mount is complete. Put the extra ref.
1809 */
1810 if (pinned_sb) {
1811 WARN_ON(new_sb);
1812 deactivate_super(pinned_sb);
1813 }
1814
1773 return dentry; 1815 return dentry;
1774} 1816}
1775 1817
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
3328 3370
3329 rcu_read_lock(); 3371 rcu_read_lock();
3330 css_for_each_child(child, css) { 3372 css_for_each_child(child, css) {
3331 if (css->flags & CSS_ONLINE) { 3373 if (child->flags & CSS_ONLINE) {
3332 ret = true; 3374 ret = true;
3333 break; 3375 break;
3334 } 3376 }
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 019d45008448..5664985c46a0 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -19,6 +19,7 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/kprobes.h>
22 23
23#define CREATE_TRACE_POINTS 24#define CREATE_TRACE_POINTS
24#include <trace/events/context_tracking.h> 25#include <trace/events/context_tracking.h>
@@ -104,6 +105,7 @@ void context_tracking_user_enter(void)
104 } 105 }
105 local_irq_restore(flags); 106 local_irq_restore(flags);
106} 107}
108NOKPROBE_SYMBOL(context_tracking_user_enter);
107 109
108#ifdef CONFIG_PREEMPT 110#ifdef CONFIG_PREEMPT
109/** 111/**
@@ -181,6 +183,7 @@ void context_tracking_user_exit(void)
181 } 183 }
182 local_irq_restore(flags); 184 local_irq_restore(flags);
183} 185}
186NOKPROBE_SYMBOL(context_tracking_user_exit);
184 187
185/** 188/**
186 * __context_tracking_task_switch - context switch the syscall callbacks 189 * __context_tracking_task_switch - context switch the syscall callbacks
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..116a4164720a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1181,7 +1181,13 @@ done:
1181 1181
1182int current_cpuset_is_being_rebound(void) 1182int current_cpuset_is_being_rebound(void)
1183{ 1183{
1184 return task_cs(current) == cpuset_being_rebound; 1184 int ret;
1185
1186 rcu_read_lock();
1187 ret = task_cs(current) == cpuset_being_rebound;
1188 rcu_read_unlock();
1189
1190 return ret;
1185} 1191}
1186 1192
1187static int update_relax_domain_level(struct cpuset *cs, s64 val) 1193static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1617 * resources, wait for the previously scheduled operations before 1623 * resources, wait for the previously scheduled operations before
1618 * proceeding, so that we don't end up keep removing tasks added 1624 * proceeding, so that we don't end up keep removing tasks added
1619 * after execution capability is restored. 1625 * after execution capability is restored.
1626 *
1627 * cpuset_hotplug_work calls back into cgroup core via
1628 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
1629 * operation like this one can lead to a deadlock through kernfs
1630 * active_ref protection. Let's break the protection. Losing the
1631 * protection is okay as we check whether @cs is online after
1632 * grabbing cpuset_mutex anyway. This only happens on the legacy
1633 * hierarchies.
1620 */ 1634 */
1635 css_get(&cs->css);
1636 kernfs_break_active_protection(of->kn);
1621 flush_work(&cpuset_hotplug_work); 1637 flush_work(&cpuset_hotplug_work);
1622 1638
1623 mutex_lock(&cpuset_mutex); 1639 mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1645 free_trial_cpuset(trialcs); 1661 free_trial_cpuset(trialcs);
1646out_unlock: 1662out_unlock:
1647 mutex_unlock(&cpuset_mutex); 1663 mutex_unlock(&cpuset_mutex);
1664 kernfs_unbreak_active_protection(of->kn);
1665 css_put(&cs->css);
1648 return retval ?: nbytes; 1666 return retval ?: nbytes;
1649} 1667}
1650 1668
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5fa58e4cffac..a33d9a2bcbd7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -40,6 +40,7 @@
40#include <linux/mm_types.h> 40#include <linux/mm_types.h>
41#include <linux/cgroup.h> 41#include <linux/cgroup.h>
42#include <linux/module.h> 42#include <linux/module.h>
43#include <linux/mman.h>
43 44
44#include "internal.h" 45#include "internal.h"
45 46
@@ -5128,6 +5129,7 @@ struct perf_mmap_event {
5128 int maj, min; 5129 int maj, min;
5129 u64 ino; 5130 u64 ino;
5130 u64 ino_generation; 5131 u64 ino_generation;
5132 u32 prot, flags;
5131 5133
5132 struct { 5134 struct {
5133 struct perf_event_header header; 5135 struct perf_event_header header;
@@ -5169,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
5169 mmap_event->event_id.header.size += sizeof(mmap_event->min); 5171 mmap_event->event_id.header.size += sizeof(mmap_event->min);
5170 mmap_event->event_id.header.size += sizeof(mmap_event->ino); 5172 mmap_event->event_id.header.size += sizeof(mmap_event->ino);
5171 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); 5173 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
5174 mmap_event->event_id.header.size += sizeof(mmap_event->prot);
5175 mmap_event->event_id.header.size += sizeof(mmap_event->flags);
5172 } 5176 }
5173 5177
5174 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); 5178 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5187,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
5187 perf_output_put(&handle, mmap_event->min); 5191 perf_output_put(&handle, mmap_event->min);
5188 perf_output_put(&handle, mmap_event->ino); 5192 perf_output_put(&handle, mmap_event->ino);
5189 perf_output_put(&handle, mmap_event->ino_generation); 5193 perf_output_put(&handle, mmap_event->ino_generation);
5194 perf_output_put(&handle, mmap_event->prot);
5195 perf_output_put(&handle, mmap_event->flags);
5190 } 5196 }
5191 5197
5192 __output_copy(&handle, mmap_event->file_name, 5198 __output_copy(&handle, mmap_event->file_name,
@@ -5205,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
5205 struct file *file = vma->vm_file; 5211 struct file *file = vma->vm_file;
5206 int maj = 0, min = 0; 5212 int maj = 0, min = 0;
5207 u64 ino = 0, gen = 0; 5213 u64 ino = 0, gen = 0;
5214 u32 prot = 0, flags = 0;
5208 unsigned int size; 5215 unsigned int size;
5209 char tmp[16]; 5216 char tmp[16];
5210 char *buf = NULL; 5217 char *buf = NULL;
@@ -5235,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
5235 gen = inode->i_generation; 5242 gen = inode->i_generation;
5236 maj = MAJOR(dev); 5243 maj = MAJOR(dev);
5237 min = MINOR(dev); 5244 min = MINOR(dev);
5245
5246 if (vma->vm_flags & VM_READ)
5247 prot |= PROT_READ;
5248 if (vma->vm_flags & VM_WRITE)
5249 prot |= PROT_WRITE;
5250 if (vma->vm_flags & VM_EXEC)
5251 prot |= PROT_EXEC;
5252
5253 if (vma->vm_flags & VM_MAYSHARE)
5254 flags = MAP_SHARED;
5255 else
5256 flags = MAP_PRIVATE;
5257
5258 if (vma->vm_flags & VM_DENYWRITE)
5259 flags |= MAP_DENYWRITE;
5260 if (vma->vm_flags & VM_MAYEXEC)
5261 flags |= MAP_EXECUTABLE;
5262 if (vma->vm_flags & VM_LOCKED)
5263 flags |= MAP_LOCKED;
5264 if (vma->vm_flags & VM_HUGETLB)
5265 flags |= MAP_HUGETLB;
5266
5238 goto got_name; 5267 goto got_name;
5239 } else { 5268 } else {
5240 name = (char *)arch_vma_name(vma); 5269 name = (char *)arch_vma_name(vma);
@@ -5275,6 +5304,8 @@ got_name:
5275 mmap_event->min = min; 5304 mmap_event->min = min;
5276 mmap_event->ino = ino; 5305 mmap_event->ino = ino;
5277 mmap_event->ino_generation = gen; 5306 mmap_event->ino_generation = gen;
5307 mmap_event->prot = prot;
5308 mmap_event->flags = flags;
5278 5309
5279 if (!(vma->vm_flags & VM_EXEC)) 5310 if (!(vma->vm_flags & VM_EXEC))
5280 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; 5311 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5315,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
5315 /* .min (attr_mmap2 only) */ 5346 /* .min (attr_mmap2 only) */
5316 /* .ino (attr_mmap2 only) */ 5347 /* .ino (attr_mmap2 only) */
5317 /* .ino_generation (attr_mmap2 only) */ 5348 /* .ino_generation (attr_mmap2 only) */
5349 /* .prot (attr_mmap2 only) */
5350 /* .flags (attr_mmap2 only) */
5318 }; 5351 };
5319 5352
5320 perf_event_mmap_event(&mmap_event); 5353 perf_event_mmap_event(&mmap_event);
@@ -6897,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
6897 if (ret) 6930 if (ret)
6898 return -EFAULT; 6931 return -EFAULT;
6899 6932
6900 /* disabled for now */
6901 if (attr->mmap2)
6902 return -EINVAL;
6903
6904 if (attr->__reserved_1) 6933 if (attr->__reserved_1)
6905 return -EINVAL; 6934 return -EINVAL;
6906 6935
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c445e392e93f..6f3254e8c137 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
846{ 846{
847 int err; 847 int err;
848 848
849 if (!consumer_del(uprobe, uc)) /* WARN? */ 849 if (WARN_ON(!consumer_del(uprobe, uc)))
850 return; 850 return;
851 851
852 err = register_for_each_vma(uprobe, NULL); 852 err = register_for_each_vma(uprobe, NULL);
@@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
927 int ret = -ENOENT; 927 int ret = -ENOENT;
928 928
929 uprobe = find_uprobe(inode, offset); 929 uprobe = find_uprobe(inode, offset);
930 if (!uprobe) 930 if (WARN_ON(!uprobe))
931 return ret; 931 return ret;
932 932
933 down_write(&uprobe->register_rwsem); 933 down_write(&uprobe->register_rwsem);
@@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
952 struct uprobe *uprobe; 952 struct uprobe *uprobe;
953 953
954 uprobe = find_uprobe(inode, offset); 954 uprobe = find_uprobe(inode, offset);
955 if (!uprobe) 955 if (WARN_ON(!uprobe))
956 return; 956 return;
957 957
958 down_write(&uprobe->register_rwsem); 958 down_write(&uprobe->register_rwsem);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6ff87f4429a4..962885edbe53 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1486,7 +1486,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1486 1486
1487 total_forks++; 1487 total_forks++;
1488 spin_unlock(&current->sighand->siglock); 1488 spin_unlock(&current->sighand->siglock);
1489 syscall_tracepoint_update(p);
1489 write_unlock_irq(&tasklist_lock); 1490 write_unlock_irq(&tasklist_lock);
1491
1490 proc_fork_connector(p); 1492 proc_fork_connector(p);
1491 cgroup_post_fork(p); 1493 cgroup_post_fork(p);
1492 if (clone_flags & CLONE_THREAD) 1494 if (clone_flags & CLONE_THREAD)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7339e42a85ab..1487a123db5c 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
455 */ 455 */
456void irq_free_hwirqs(unsigned int from, int cnt) 456void irq_free_hwirqs(unsigned int from, int cnt)
457{ 457{
458 int i; 458 int i, j;
459 459
460 for (i = from; cnt > 0; i++, cnt--) { 460 for (i = from, j = cnt; j > 0; i++, j--) {
461 irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); 461 irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
462 arch_teardown_hwirq(i); 462 arch_teardown_hwirq(i);
463 } 463 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6748688813d0..369f41a94124 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
1617#ifdef CONFIG_MEMORY_FAILURE 1617#ifdef CONFIG_MEMORY_FAILURE
1618 VMCOREINFO_NUMBER(PG_hwpoison); 1618 VMCOREINFO_NUMBER(PG_hwpoison);
1619#endif 1619#endif
1620 VMCOREINFO_NUMBER(PG_head_mask);
1620 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1621 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1621 1622
1622 arch_crash_save_vmcoreinfo(); 1623 arch_crash_save_vmcoreinfo();
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index 14193d596d78..ab29b6a22669 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
31{ 31{
32 return (waiter != NULL); 32 return (waiter != NULL);
33} 33}
34
35static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
36{
37 debug_rt_mutex_print_deadlock(w);
38}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a620d4d08ca6..fc605941b9b8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
83 owner = *p; 83 owner = *p;
84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); 84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
85} 85}
86
87/*
88 * Safe fastpath aware unlock:
89 * 1) Clear the waiters bit
90 * 2) Drop lock->wait_lock
91 * 3) Try to unlock the lock with cmpxchg
92 */
93static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
94 __releases(lock->wait_lock)
95{
96 struct task_struct *owner = rt_mutex_owner(lock);
97
98 clear_rt_mutex_waiters(lock);
99 raw_spin_unlock(&lock->wait_lock);
100 /*
101 * If a new waiter comes in between the unlock and the cmpxchg
102 * we have two situations:
103 *
104 * unlock(wait_lock);
105 * lock(wait_lock);
106 * cmpxchg(p, owner, 0) == owner
107 * mark_rt_mutex_waiters(lock);
108 * acquire(lock);
109 * or:
110 *
111 * unlock(wait_lock);
112 * lock(wait_lock);
113 * mark_rt_mutex_waiters(lock);
114 *
115 * cmpxchg(p, owner, 0) != owner
116 * enqueue_waiter();
117 * unlock(wait_lock);
118 * lock(wait_lock);
119 * wake waiter();
120 * unlock(wait_lock);
121 * lock(wait_lock);
122 * acquire(lock);
123 */
124 return rt_mutex_cmpxchg(lock, owner, NULL);
125}
126
86#else 127#else
87# define rt_mutex_cmpxchg(l,c,n) (0) 128# define rt_mutex_cmpxchg(l,c,n) (0)
88static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 129static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
90 lock->owner = (struct task_struct *) 131 lock->owner = (struct task_struct *)
91 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 132 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
92} 133}
134
135/*
136 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
137 */
138static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
139 __releases(lock->wait_lock)
140{
141 lock->owner = NULL;
142 raw_spin_unlock(&lock->wait_lock);
143 return true;
144}
93#endif 145#endif
94 146
95static inline int 147static inline int
@@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
260 */ 312 */
261int max_lock_depth = 1024; 313int max_lock_depth = 1024;
262 314
315static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
316{
317 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
318}
319
263/* 320/*
264 * Adjust the priority chain. Also used for deadlock detection. 321 * Adjust the priority chain. Also used for deadlock detection.
265 * Decreases task's usage by one - may thus free the task. 322 * Decreases task's usage by one - may thus free the task.
266 * 323 *
267 * @task: the task owning the mutex (owner) for which a chain walk is probably 324 * @task: the task owning the mutex (owner) for which a chain walk is
268 * needed 325 * probably needed
269 * @deadlock_detect: do we have to carry out deadlock detection? 326 * @deadlock_detect: do we have to carry out deadlock detection?
270 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 327 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
271 * things for a task that has just got its priority adjusted, and 328 * things for a task that has just got its priority adjusted, and
272 * is waiting on a mutex) 329 * is waiting on a mutex)
330 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
331 * we dropped its pi_lock. Is never dereferenced, only used for
332 * comparison to detect lock chain changes.
273 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 333 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
274 * its priority to the mutex owner (can be NULL in the case 334 * its priority to the mutex owner (can be NULL in the case
275 * depicted above or if the top waiter is gone away and we are 335 * depicted above or if the top waiter is gone away and we are
276 * actually deboosting the owner) 336 * actually deboosting the owner)
277 * @top_task: the current top waiter 337 * @top_task: the current top waiter
278 * 338 *
279 * Returns 0 or -EDEADLK. 339 * Returns 0 or -EDEADLK.
280 */ 340 */
281static int rt_mutex_adjust_prio_chain(struct task_struct *task, 341static int rt_mutex_adjust_prio_chain(struct task_struct *task,
282 int deadlock_detect, 342 int deadlock_detect,
283 struct rt_mutex *orig_lock, 343 struct rt_mutex *orig_lock,
344 struct rt_mutex *next_lock,
284 struct rt_mutex_waiter *orig_waiter, 345 struct rt_mutex_waiter *orig_waiter,
285 struct task_struct *top_task) 346 struct task_struct *top_task)
286{ 347{
@@ -314,7 +375,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
314 } 375 }
315 put_task_struct(task); 376 put_task_struct(task);
316 377
317 return deadlock_detect ? -EDEADLK : 0; 378 return -EDEADLK;
318 } 379 }
319 retry: 380 retry:
320 /* 381 /*
@@ -339,6 +400,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
339 goto out_unlock_pi; 400 goto out_unlock_pi;
340 401
341 /* 402 /*
403 * We dropped all locks after taking a refcount on @task, so
404 * the task might have moved on in the lock chain or even left
405 * the chain completely and blocks now on an unrelated lock or
406 * on @orig_lock.
407 *
408 * We stored the lock on which @task was blocked in @next_lock,
409 * so we can detect the chain change.
410 */
411 if (next_lock != waiter->lock)
412 goto out_unlock_pi;
413
414 /*
342 * Drop out, when the task has no waiters. Note, 415 * Drop out, when the task has no waiters. Note,
343 * top_waiter can be NULL, when we are in the deboosting 416 * top_waiter can be NULL, when we are in the deboosting
344 * mode! 417 * mode!
@@ -377,7 +450,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
377 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 450 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
378 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 451 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
379 raw_spin_unlock(&lock->wait_lock); 452 raw_spin_unlock(&lock->wait_lock);
380 ret = deadlock_detect ? -EDEADLK : 0; 453 ret = -EDEADLK;
381 goto out_unlock_pi; 454 goto out_unlock_pi;
382 } 455 }
383 456
@@ -422,11 +495,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
422 __rt_mutex_adjust_prio(task); 495 __rt_mutex_adjust_prio(task);
423 } 496 }
424 497
498 /*
499 * Check whether the task which owns the current lock is pi
500 * blocked itself. If yes we store a pointer to the lock for
501 * the lock chain change detection above. After we dropped
502 * task->pi_lock next_lock cannot be dereferenced anymore.
503 */
504 next_lock = task_blocked_on_lock(task);
505
425 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 506 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
426 507
427 top_waiter = rt_mutex_top_waiter(lock); 508 top_waiter = rt_mutex_top_waiter(lock);
428 raw_spin_unlock(&lock->wait_lock); 509 raw_spin_unlock(&lock->wait_lock);
429 510
511 /*
512 * We reached the end of the lock chain. Stop right here. No
513 * point to go back just to figure that out.
514 */
515 if (!next_lock)
516 goto out_put_task;
517
430 if (!detect_deadlock && waiter != top_waiter) 518 if (!detect_deadlock && waiter != top_waiter)
431 goto out_put_task; 519 goto out_put_task;
432 520
@@ -536,8 +624,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
536{ 624{
537 struct task_struct *owner = rt_mutex_owner(lock); 625 struct task_struct *owner = rt_mutex_owner(lock);
538 struct rt_mutex_waiter *top_waiter = waiter; 626 struct rt_mutex_waiter *top_waiter = waiter;
539 unsigned long flags; 627 struct rt_mutex *next_lock;
540 int chain_walk = 0, res; 628 int chain_walk = 0, res;
629 unsigned long flags;
541 630
542 /* 631 /*
543 * Early deadlock detection. We really don't want the task to 632 * Early deadlock detection. We really don't want the task to
@@ -548,7 +637,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
548 * which is wrong, as the other waiter is not in a deadlock 637 * which is wrong, as the other waiter is not in a deadlock
549 * situation. 638 * situation.
550 */ 639 */
551 if (detect_deadlock && owner == task) 640 if (owner == task)
552 return -EDEADLK; 641 return -EDEADLK;
553 642
554 raw_spin_lock_irqsave(&task->pi_lock, flags); 643 raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -569,20 +658,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
569 if (!owner) 658 if (!owner)
570 return 0; 659 return 0;
571 660
661 raw_spin_lock_irqsave(&owner->pi_lock, flags);
572 if (waiter == rt_mutex_top_waiter(lock)) { 662 if (waiter == rt_mutex_top_waiter(lock)) {
573 raw_spin_lock_irqsave(&owner->pi_lock, flags);
574 rt_mutex_dequeue_pi(owner, top_waiter); 663 rt_mutex_dequeue_pi(owner, top_waiter);
575 rt_mutex_enqueue_pi(owner, waiter); 664 rt_mutex_enqueue_pi(owner, waiter);
576 665
577 __rt_mutex_adjust_prio(owner); 666 __rt_mutex_adjust_prio(owner);
578 if (owner->pi_blocked_on) 667 if (owner->pi_blocked_on)
579 chain_walk = 1; 668 chain_walk = 1;
580 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 669 } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
581 }
582 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
583 chain_walk = 1; 670 chain_walk = 1;
671 }
584 672
585 if (!chain_walk) 673 /* Store the lock on which owner is blocked or NULL */
674 next_lock = task_blocked_on_lock(owner);
675
676 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
677 /*
678 * Even if full deadlock detection is on, if the owner is not
679 * blocked itself, we can avoid finding this out in the chain
680 * walk.
681 */
682 if (!chain_walk || !next_lock)
586 return 0; 683 return 0;
587 684
588 /* 685 /*
@@ -594,8 +691,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
594 691
595 raw_spin_unlock(&lock->wait_lock); 692 raw_spin_unlock(&lock->wait_lock);
596 693
597 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 694 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
598 task); 695 next_lock, waiter, task);
599 696
600 raw_spin_lock(&lock->wait_lock); 697 raw_spin_lock(&lock->wait_lock);
601 698
@@ -605,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
605/* 702/*
606 * Wake up the next waiter on the lock. 703 * Wake up the next waiter on the lock.
607 * 704 *
608 * Remove the top waiter from the current tasks waiter list and wake it up. 705 * Remove the top waiter from the current tasks pi waiter list and
706 * wake it up.
609 * 707 *
610 * Called with lock->wait_lock held. 708 * Called with lock->wait_lock held.
611 */ 709 */
@@ -626,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
626 */ 724 */
627 rt_mutex_dequeue_pi(current, waiter); 725 rt_mutex_dequeue_pi(current, waiter);
628 726
629 rt_mutex_set_owner(lock, NULL); 727 /*
728 * As we are waking up the top waiter, and the waiter stays
729 * queued on the lock until it gets the lock, this lock
730 * obviously has waiters. Just set the bit here and this has
731 * the added benefit of forcing all new tasks into the
732 * slow path making sure no task of lower priority than
733 * the top waiter can steal this lock.
734 */
735 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
630 736
631 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 737 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
632 738
739 /*
740 * It's safe to dereference waiter as it cannot go away as
741 * long as we hold lock->wait_lock. The waiter task needs to
742 * acquire it in order to dequeue the waiter.
743 */
633 wake_up_process(waiter->task); 744 wake_up_process(waiter->task);
634} 745}
635 746
@@ -644,8 +755,8 @@ static void remove_waiter(struct rt_mutex *lock,
644{ 755{
645 int first = (waiter == rt_mutex_top_waiter(lock)); 756 int first = (waiter == rt_mutex_top_waiter(lock));
646 struct task_struct *owner = rt_mutex_owner(lock); 757 struct task_struct *owner = rt_mutex_owner(lock);
758 struct rt_mutex *next_lock = NULL;
647 unsigned long flags; 759 unsigned long flags;
648 int chain_walk = 0;
649 760
650 raw_spin_lock_irqsave(&current->pi_lock, flags); 761 raw_spin_lock_irqsave(&current->pi_lock, flags);
651 rt_mutex_dequeue(lock, waiter); 762 rt_mutex_dequeue(lock, waiter);
@@ -669,13 +780,13 @@ static void remove_waiter(struct rt_mutex *lock,
669 } 780 }
670 __rt_mutex_adjust_prio(owner); 781 __rt_mutex_adjust_prio(owner);
671 782
672 if (owner->pi_blocked_on) 783 /* Store the lock on which owner is blocked or NULL */
673 chain_walk = 1; 784 next_lock = task_blocked_on_lock(owner);
674 785
675 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 786 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
676 } 787 }
677 788
678 if (!chain_walk) 789 if (!next_lock)
679 return; 790 return;
680 791
681 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 792 /* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -683,7 +794,7 @@ static void remove_waiter(struct rt_mutex *lock,
683 794
684 raw_spin_unlock(&lock->wait_lock); 795 raw_spin_unlock(&lock->wait_lock);
685 796
686 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 797 rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
687 798
688 raw_spin_lock(&lock->wait_lock); 799 raw_spin_lock(&lock->wait_lock);
689} 800}
@@ -696,6 +807,7 @@ static void remove_waiter(struct rt_mutex *lock,
696void rt_mutex_adjust_pi(struct task_struct *task) 807void rt_mutex_adjust_pi(struct task_struct *task)
697{ 808{
698 struct rt_mutex_waiter *waiter; 809 struct rt_mutex_waiter *waiter;
810 struct rt_mutex *next_lock;
699 unsigned long flags; 811 unsigned long flags;
700 812
701 raw_spin_lock_irqsave(&task->pi_lock, flags); 813 raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -706,12 +818,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
706 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 818 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
707 return; 819 return;
708 } 820 }
709 821 next_lock = waiter->lock;
710 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 822 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
711 823
712 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 824 /* gets dropped in rt_mutex_adjust_prio_chain()! */
713 get_task_struct(task); 825 get_task_struct(task);
714 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 826
827 rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
715} 828}
716 829
717/** 830/**
@@ -763,6 +876,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
763 return ret; 876 return ret;
764} 877}
765 878
879static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
880 struct rt_mutex_waiter *w)
881{
882 /*
883 * If the result is not -EDEADLOCK or the caller requested
884 * deadlock detection, nothing to do here.
885 */
886 if (res != -EDEADLOCK || detect_deadlock)
887 return;
888
889 /*
890 * Yell lowdly and stop the task right here.
891 */
892 rt_mutex_print_deadlock(w);
893 while (1) {
894 set_current_state(TASK_INTERRUPTIBLE);
895 schedule();
896 }
897}
898
766/* 899/*
767 * Slow path lock function: 900 * Slow path lock function:
768 */ 901 */
@@ -802,8 +935,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
802 935
803 set_current_state(TASK_RUNNING); 936 set_current_state(TASK_RUNNING);
804 937
805 if (unlikely(ret)) 938 if (unlikely(ret)) {
806 remove_waiter(lock, &waiter); 939 remove_waiter(lock, &waiter);
940 rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
941 }
807 942
808 /* 943 /*
809 * try_to_take_rt_mutex() sets the waiter bit 944 * try_to_take_rt_mutex() sets the waiter bit
@@ -859,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
859 994
860 rt_mutex_deadlock_account_unlock(current); 995 rt_mutex_deadlock_account_unlock(current);
861 996
862 if (!rt_mutex_has_waiters(lock)) { 997 /*
863 lock->owner = NULL; 998 * We must be careful here if the fast path is enabled. If we
864 raw_spin_unlock(&lock->wait_lock); 999 * have no waiters queued we cannot set owner to NULL here
865 return; 1000 * because of:
1001 *
1002 * foo->lock->owner = NULL;
1003 * rtmutex_lock(foo->lock); <- fast path
1004 * free = atomic_dec_and_test(foo->refcnt);
1005 * rtmutex_unlock(foo->lock); <- fast path
1006 * if (free)
1007 * kfree(foo);
1008 * raw_spin_unlock(foo->lock->wait_lock);
1009 *
1010 * So for the fastpath enabled kernel:
1011 *
1012 * Nothing can set the waiters bit as long as we hold
1013 * lock->wait_lock. So we do the following sequence:
1014 *
1015 * owner = rt_mutex_owner(lock);
1016 * clear_rt_mutex_waiters(lock);
1017 * raw_spin_unlock(&lock->wait_lock);
1018 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1019 * return;
1020 * goto retry;
1021 *
1022 * The fastpath disabled variant is simple as all access to
1023 * lock->owner is serialized by lock->wait_lock:
1024 *
1025 * lock->owner = NULL;
1026 * raw_spin_unlock(&lock->wait_lock);
1027 */
1028 while (!rt_mutex_has_waiters(lock)) {
1029 /* Drops lock->wait_lock ! */
1030 if (unlock_rt_mutex_safe(lock) == true)
1031 return;
1032 /* Relock the rtmutex and try again */
1033 raw_spin_lock(&lock->wait_lock);
866 } 1034 }
867 1035
1036 /*
1037 * The wakeup next waiter path does not suffer from the above
1038 * race. See the comments there.
1039 */
868 wakeup_next_waiter(lock); 1040 wakeup_next_waiter(lock);
869 1041
870 raw_spin_unlock(&lock->wait_lock); 1042 raw_spin_unlock(&lock->wait_lock);
@@ -1112,7 +1284,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1112 return 1; 1284 return 1;
1113 } 1285 }
1114 1286
1115 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1287 /* We enforce deadlock detection for futexes */
1288 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
1116 1289
1117 if (ret && !rt_mutex_owner(lock)) { 1290 if (ret && !rt_mutex_owner(lock)) {
1118 /* 1291 /*
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index a1a1dd06421d..f6a1f3c133b1 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -24,3 +24,8 @@
24#define debug_rt_mutex_print_deadlock(w) do { } while (0) 24#define debug_rt_mutex_print_deadlock(w) do { } while (0)
25#define debug_rt_mutex_detect_deadlock(w,d) (d) 25#define debug_rt_mutex_detect_deadlock(w,d) (d)
26#define debug_rt_mutex_reset_waiter(w) do { } while (0) 26#define debug_rt_mutex_reset_waiter(w) do { } while (0)
27
28static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
29{
30 WARN(1, "rtmutex deadlock detected\n");
31}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 49e0a20fd010..fcc2611d3f14 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -35,6 +35,7 @@
35 35
36static int nocompress; 36static int nocompress;
37static int noresume; 37static int noresume;
38static int nohibernate;
38static int resume_wait; 39static int resume_wait;
39static unsigned int resume_delay; 40static unsigned int resume_delay;
40static char resume_file[256] = CONFIG_PM_STD_PARTITION; 41static char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -62,6 +63,11 @@ bool freezer_test_done;
62 63
63static const struct platform_hibernation_ops *hibernation_ops; 64static const struct platform_hibernation_ops *hibernation_ops;
64 65
66bool hibernation_available(void)
67{
68 return (nohibernate == 0);
69}
70
65/** 71/**
66 * hibernation_set_ops - Set the global hibernate operations. 72 * hibernation_set_ops - Set the global hibernate operations.
67 * @ops: Hibernation operations to use in subsequent hibernation transitions. 73 * @ops: Hibernation operations to use in subsequent hibernation transitions.
@@ -642,6 +648,11 @@ int hibernate(void)
642{ 648{
643 int error; 649 int error;
644 650
651 if (!hibernation_available()) {
652 pr_debug("PM: Hibernation not available.\n");
653 return -EPERM;
654 }
655
645 lock_system_sleep(); 656 lock_system_sleep();
646 /* The snapshot device should not be opened while we're running */ 657 /* The snapshot device should not be opened while we're running */
647 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { 658 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -734,7 +745,7 @@ static int software_resume(void)
734 /* 745 /*
735 * If the user said "noresume".. bail out early. 746 * If the user said "noresume".. bail out early.
736 */ 747 */
737 if (noresume) 748 if (noresume || !hibernation_available())
738 return 0; 749 return 0;
739 750
740 /* 751 /*
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
900 int i; 911 int i;
901 char *start = buf; 912 char *start = buf;
902 913
914 if (!hibernation_available())
915 return sprintf(buf, "[disabled]\n");
916
903 for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { 917 for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
904 if (!hibernation_modes[i]) 918 if (!hibernation_modes[i])
905 continue; 919 continue;
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
934 char *p; 948 char *p;
935 int mode = HIBERNATION_INVALID; 949 int mode = HIBERNATION_INVALID;
936 950
951 if (!hibernation_available())
952 return -EPERM;
953
937 p = memchr(buf, '\n', n); 954 p = memchr(buf, '\n', n);
938 len = p ? p - buf : n; 955 len = p ? p - buf : n;
939 956
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
1101 noresume = 1; 1118 noresume = 1;
1102 else if (!strncmp(str, "nocompress", 10)) 1119 else if (!strncmp(str, "nocompress", 10))
1103 nocompress = 1; 1120 nocompress = 1;
1121 else if (!strncmp(str, "no", 2)) {
1122 noresume = 1;
1123 nohibernate = 1;
1124 }
1104 return 1; 1125 return 1;
1105} 1126}
1106 1127
@@ -1125,9 +1146,23 @@ static int __init resumedelay_setup(char *str)
1125 return 1; 1146 return 1;
1126} 1147}
1127 1148
1149static int __init nohibernate_setup(char *str)
1150{
1151 noresume = 1;
1152 nohibernate = 1;
1153 return 1;
1154}
1155
1156static int __init kaslr_nohibernate_setup(char *str)
1157{
1158 return nohibernate_setup(str);
1159}
1160
1128__setup("noresume", noresume_setup); 1161__setup("noresume", noresume_setup);
1129__setup("resume_offset=", resume_offset_setup); 1162__setup("resume_offset=", resume_offset_setup);
1130__setup("resume=", resume_setup); 1163__setup("resume=", resume_setup);
1131__setup("hibernate=", hibernate_setup); 1164__setup("hibernate=", hibernate_setup);
1132__setup("resumewait", resumewait_setup); 1165__setup("resumewait", resumewait_setup);
1133__setup("resumedelay=", resumedelay_setup); 1166__setup("resumedelay=", resumedelay_setup);
1167__setup("nohibernate", nohibernate_setup);
1168__setup("kaslr", kaslr_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 573410d6647e..8e90f330f139 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
300 s += sprintf(s,"%s ", pm_states[i].label); 300 s += sprintf(s,"%s ", pm_states[i].label);
301 301
302#endif 302#endif
303#ifdef CONFIG_HIBERNATION 303 if (hibernation_available())
304 s += sprintf(s, "%s\n", "disk"); 304 s += sprintf(s, "disk ");
305#else
306 if (s != buf) 305 if (s != buf)
307 /* convert the last space to a newline */ 306 /* convert the last space to a newline */
308 *(s-1) = '\n'; 307 *(s-1) = '\n';
309#endif
310 return (s - buf); 308 return (s - buf);
311} 309}
312 310
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98d357584cd6..526e8911460a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
49 struct snapshot_data *data; 49 struct snapshot_data *data;
50 int error; 50 int error;
51 51
52 if (!hibernation_available())
53 return -EPERM;
54
52 lock_system_sleep(); 55 lock_system_sleep();
53 56
54 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { 57 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ea2d5f6962ed..13e839dbca07 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
1416/* 1416/*
1417 * Can we actually use the console at this time on this cpu? 1417 * Can we actually use the console at this time on this cpu?
1418 * 1418 *
1419 * Console drivers may assume that per-cpu resources have been allocated. So 1419 * Console drivers may assume that per-cpu resources have
1420 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't 1420 * been allocated. So unless they're explicitly marked as
1421 * call them until this CPU is officially up. 1421 * being able to cope (CON_ANYTIME) don't call them until
1422 * this CPU is officially up.
1422 */ 1423 */
1423static inline int can_use_console(unsigned int cpu) 1424static inline int can_use_console(unsigned int cpu)
1424{ 1425{
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
1431 * console_lock held, and 'console_locked' set) if it 1432 * console_lock held, and 'console_locked' set) if it
1432 * is successful, false otherwise. 1433 * is successful, false otherwise.
1433 */ 1434 */
1434static int console_trylock_for_printk(void) 1435static int console_trylock_for_printk(unsigned int cpu)
1435{ 1436{
1436 unsigned int cpu = smp_processor_id();
1437
1438 if (!console_trylock()) 1437 if (!console_trylock())
1439 return 0; 1438 return 0;
1440 /* 1439 /*
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1609 */ 1608 */
1610 if (!oops_in_progress && !lockdep_recursing(current)) { 1609 if (!oops_in_progress && !lockdep_recursing(current)) {
1611 recursion_bug = 1; 1610 recursion_bug = 1;
1612 local_irq_restore(flags); 1611 goto out_restore_irqs;
1613 return 0;
1614 } 1612 }
1615 zap_locks(); 1613 zap_locks();
1616 } 1614 }
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
1718 1716
1719 logbuf_cpu = UINT_MAX; 1717 logbuf_cpu = UINT_MAX;
1720 raw_spin_unlock(&logbuf_lock); 1718 raw_spin_unlock(&logbuf_lock);
1721 lockdep_on();
1722 local_irq_restore(flags);
1723 1719
1724 /* If called from the scheduler, we can not call up(). */ 1720 /* If called from the scheduler, we can not call up(). */
1725 if (in_sched) 1721 if (!in_sched) {
1726 return printed_len; 1722 /*
1727 1723 * Try to acquire and then immediately release the console
1728 /* 1724 * semaphore. The release will print out buffers and wake up
1729 * Disable preemption to avoid being preempted while holding 1725 * /dev/kmsg and syslog() users.
1730 * console_sem which would prevent anyone from printing to console 1726 */
1731 */ 1727 if (console_trylock_for_printk(this_cpu))
1732 preempt_disable(); 1728 console_unlock();
1733 /* 1729 }
1734 * Try to acquire and then immediately release the console semaphore.
1735 * The release will print out buffers and wake up /dev/kmsg and syslog()
1736 * users.
1737 */
1738 if (console_trylock_for_printk())
1739 console_unlock();
1740 preempt_enable();
1741 1730
1731 lockdep_on();
1732out_restore_irqs:
1733 local_irq_restore(flags);
1742 return printed_len; 1734 return printed_len;
1743} 1735}
1744EXPORT_SYMBOL(vprintk_emit); 1736EXPORT_SYMBOL(vprintk_emit);
diff --git a/kernel/smp.c b/kernel/smp.c
index a1812d184aed..487653b5844f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
30 30
31static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); 31static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
32 32
33static void flush_smp_call_function_queue(bool warn_cpu_offline);
34
33static int 35static int
34hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) 36hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
35{ 37{
@@ -52,12 +54,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
52#ifdef CONFIG_HOTPLUG_CPU 54#ifdef CONFIG_HOTPLUG_CPU
53 case CPU_UP_CANCELED: 55 case CPU_UP_CANCELED:
54 case CPU_UP_CANCELED_FROZEN: 56 case CPU_UP_CANCELED_FROZEN:
57 /* Fall-through to the CPU_DEAD[_FROZEN] case. */
55 58
56 case CPU_DEAD: 59 case CPU_DEAD:
57 case CPU_DEAD_FROZEN: 60 case CPU_DEAD_FROZEN:
58 free_cpumask_var(cfd->cpumask); 61 free_cpumask_var(cfd->cpumask);
59 free_percpu(cfd->csd); 62 free_percpu(cfd->csd);
60 break; 63 break;
64
65 case CPU_DYING:
66 case CPU_DYING_FROZEN:
67 /*
68 * The IPIs for the smp-call-function callbacks queued by other
69 * CPUs might arrive late, either due to hardware latencies or
70 * because this CPU disabled interrupts (inside stop-machine)
71 * before the IPIs were sent. So flush out any pending callbacks
72 * explicitly (without waiting for the IPIs to arrive), to
73 * ensure that the outgoing CPU doesn't go offline with work
74 * still pending.
75 */
76 flush_smp_call_function_queue(false);
77 break;
61#endif 78#endif
62 }; 79 };
63 80
@@ -178,23 +195,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
178 return 0; 195 return 0;
179} 196}
180 197
181/* 198/**
182 * Invoked by arch to handle an IPI for call function single. Must be 199 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
183 * called from the arch with interrupts disabled. 200 *
201 * Invoked by arch to handle an IPI for call function single.
202 * Must be called with interrupts disabled.
184 */ 203 */
185void generic_smp_call_function_single_interrupt(void) 204void generic_smp_call_function_single_interrupt(void)
186{ 205{
206 flush_smp_call_function_queue(true);
207}
208
209/**
210 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
211 *
212 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
213 * offline CPU. Skip this check if set to 'false'.
214 *
215 * Flush any pending smp-call-function callbacks queued on this CPU. This is
216 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
217 * to ensure that all pending IPI callbacks are run before it goes completely
218 * offline.
219 *
220 * Loop through the call_single_queue and run all the queued callbacks.
221 * Must be called with interrupts disabled.
222 */
223static void flush_smp_call_function_queue(bool warn_cpu_offline)
224{
225 struct llist_head *head;
187 struct llist_node *entry; 226 struct llist_node *entry;
188 struct call_single_data *csd, *csd_next; 227 struct call_single_data *csd, *csd_next;
189 static bool warned; 228 static bool warned;
190 229
191 entry = llist_del_all(&__get_cpu_var(call_single_queue)); 230 WARN_ON(!irqs_disabled());
231
232 head = &__get_cpu_var(call_single_queue);
233 entry = llist_del_all(head);
192 entry = llist_reverse_order(entry); 234 entry = llist_reverse_order(entry);
193 235
194 /* 236 /* There shouldn't be any pending callbacks on an offline CPU. */
195 * Shouldn't receive this interrupt on a cpu that is not yet online. 237 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
196 */ 238 !warned && !llist_empty(head))) {
197 if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
198 warned = true; 239 warned = true;
199 WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); 240 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
200 241
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba9ed453c4ed..75b22e22a72c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
136/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 136/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
137static int maxolduid = 65535; 137static int maxolduid = 65535;
138static int minolduid; 138static int minolduid;
139static int min_percpu_pagelist_fract = 8;
140 139
141static int ngroups_max = NGROUPS_MAX; 140static int ngroups_max = NGROUPS_MAX;
142static const int cap_last_cap = CAP_LAST_CAP; 141static const int cap_last_cap = CAP_LAST_CAP;
@@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
152#ifdef CONFIG_SPARC 151#ifdef CONFIG_SPARC
153#endif 152#endif
154 153
155#ifdef CONFIG_SPARC64
156extern int sysctl_tsb_ratio;
157#endif
158
159#ifdef __hppa__ 154#ifdef __hppa__
160extern int pwrsw_enabled; 155extern int pwrsw_enabled;
161#endif 156#endif
@@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = {
865 .extra1 = &zero, 860 .extra1 = &zero,
866 .extra2 = &one, 861 .extra2 = &one,
867 }, 862 },
863#ifdef CONFIG_SMP
864 {
865 .procname = "softlockup_all_cpu_backtrace",
866 .data = &sysctl_softlockup_all_cpu_backtrace,
867 .maxlen = sizeof(int),
868 .mode = 0644,
869 .proc_handler = proc_dointvec_minmax,
870 .extra1 = &zero,
871 .extra2 = &one,
872 },
873#endif /* CONFIG_SMP */
868 { 874 {
869 .procname = "nmi_watchdog", 875 .procname = "nmi_watchdog",
870 .data = &watchdog_user_enabled, 876 .data = &watchdog_user_enabled,
@@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = {
1321 .maxlen = sizeof(percpu_pagelist_fraction), 1327 .maxlen = sizeof(percpu_pagelist_fraction),
1322 .mode = 0644, 1328 .mode = 0644,
1323 .proc_handler = percpu_pagelist_fraction_sysctl_handler, 1329 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1324 .extra1 = &min_percpu_pagelist_fract, 1330 .extra1 = &zero,
1325 }, 1331 },
1326#ifdef CONFIG_MMU 1332#ifdef CONFIG_MMU
1327 { 1333 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 384ede311717..f243444a3772 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1396,7 +1396,6 @@ void tracing_start(void)
1396 1396
1397 arch_spin_unlock(&global_trace.max_lock); 1397 arch_spin_unlock(&global_trace.max_lock);
1398 1398
1399 ftrace_start();
1400 out: 1399 out:
1401 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); 1400 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1402} 1401}
@@ -1443,7 +1442,6 @@ void tracing_stop(void)
1443 struct ring_buffer *buffer; 1442 struct ring_buffer *buffer;
1444 unsigned long flags; 1443 unsigned long flags;
1445 1444
1446 ftrace_stop();
1447 raw_spin_lock_irqsave(&global_trace.start_lock, flags); 1445 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1448 if (global_trace.stop_count++) 1446 if (global_trace.stop_count++)
1449 goto out; 1447 goto out;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 04fdb5de823c..3c9b97e6b1f4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
893 int ret; 893 int ret;
894 894
895 if (file) { 895 if (file) {
896 if (tu->tp.flags & TP_FLAG_PROFILE)
897 return -EINTR;
898
896 link = kmalloc(sizeof(*link), GFP_KERNEL); 899 link = kmalloc(sizeof(*link), GFP_KERNEL);
897 if (!link) 900 if (!link)
898 return -ENOMEM; 901 return -ENOMEM;
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
901 list_add_tail_rcu(&link->list, &tu->tp.files); 904 list_add_tail_rcu(&link->list, &tu->tp.files);
902 905
903 tu->tp.flags |= TP_FLAG_TRACE; 906 tu->tp.flags |= TP_FLAG_TRACE;
904 } else 907 } else {
905 tu->tp.flags |= TP_FLAG_PROFILE; 908 if (tu->tp.flags & TP_FLAG_TRACE)
909 return -EINTR;
906 910
907 ret = uprobe_buffer_enable(); 911 tu->tp.flags |= TP_FLAG_PROFILE;
908 if (ret < 0) 912 }
909 return ret;
910 913
911 WARN_ON(!uprobe_filter_is_empty(&tu->filter)); 914 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
912 915
913 if (enabled) 916 if (enabled)
914 return 0; 917 return 0;
915 918
919 ret = uprobe_buffer_enable();
920 if (ret)
921 goto err_flags;
922
916 tu->consumer.filter = filter; 923 tu->consumer.filter = filter;
917 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); 924 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
918 if (ret) { 925 if (ret)
919 if (file) { 926 goto err_buffer;
920 list_del(&link->list);
921 kfree(link);
922 tu->tp.flags &= ~TP_FLAG_TRACE;
923 } else
924 tu->tp.flags &= ~TP_FLAG_PROFILE;
925 }
926 927
928 return 0;
929
930 err_buffer:
931 uprobe_buffer_disable();
932
933 err_flags:
934 if (file) {
935 list_del(&link->list);
936 kfree(link);
937 tu->tp.flags &= ~TP_FLAG_TRACE;
938 } else {
939 tu->tp.flags &= ~TP_FLAG_PROFILE;
940 }
927 return ret; 941 return ret;
928} 942}
929 943
@@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
1201 1215
1202 current->utask->vaddr = (unsigned long) &udd; 1216 current->utask->vaddr = (unsigned long) &udd;
1203 1217
1204#ifdef CONFIG_PERF_EVENTS
1205 if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
1206 !uprobe_perf_filter(&tu->consumer, 0, current->mm))
1207 return UPROBE_HANDLER_REMOVE;
1208#endif
1209
1210 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1218 if (WARN_ON_ONCE(!uprobe_cpu_buffer))
1211 return 0; 1219 return 0;
1212 1220
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 33cbd8c203f8..3490407dc7b7 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
492 492
493void syscall_regfunc(void) 493void syscall_regfunc(void)
494{ 494{
495 unsigned long flags; 495 struct task_struct *p, *t;
496 struct task_struct *g, *t;
497 496
498 if (!sys_tracepoint_refcount) { 497 if (!sys_tracepoint_refcount) {
499 read_lock_irqsave(&tasklist_lock, flags); 498 read_lock(&tasklist_lock);
500 do_each_thread(g, t) { 499 for_each_process_thread(p, t) {
501 /* Skip kernel threads. */ 500 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
502 if (t->mm) 501 }
503 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 502 read_unlock(&tasklist_lock);
504 } while_each_thread(g, t);
505 read_unlock_irqrestore(&tasklist_lock, flags);
506 } 503 }
507 sys_tracepoint_refcount++; 504 sys_tracepoint_refcount++;
508} 505}
509 506
510void syscall_unregfunc(void) 507void syscall_unregfunc(void)
511{ 508{
512 unsigned long flags; 509 struct task_struct *p, *t;
513 struct task_struct *g, *t;
514 510
515 sys_tracepoint_refcount--; 511 sys_tracepoint_refcount--;
516 if (!sys_tracepoint_refcount) { 512 if (!sys_tracepoint_refcount) {
517 read_lock_irqsave(&tasklist_lock, flags); 513 read_lock(&tasklist_lock);
518 do_each_thread(g, t) { 514 for_each_process_thread(p, t) {
519 clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 515 clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
520 } while_each_thread(g, t); 516 }
521 read_unlock_irqrestore(&tasklist_lock, flags); 517 read_unlock(&tasklist_lock);
522 } 518 }
523} 519}
524#endif 520#endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 516203e665fc..c3319bd1b040 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,6 +31,12 @@
31 31
32int watchdog_user_enabled = 1; 32int watchdog_user_enabled = 1;
33int __read_mostly watchdog_thresh = 10; 33int __read_mostly watchdog_thresh = 10;
34#ifdef CONFIG_SMP
35int __read_mostly sysctl_softlockup_all_cpu_backtrace;
36#else
37#define sysctl_softlockup_all_cpu_backtrace 0
38#endif
39
34static int __read_mostly watchdog_running; 40static int __read_mostly watchdog_running;
35static u64 __read_mostly sample_period; 41static u64 __read_mostly sample_period;
36 42
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
47static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 53static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
48static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 54static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
49#endif 55#endif
56static unsigned long soft_lockup_nmi_warn;
50 57
51/* boot commands */ 58/* boot commands */
52/* 59/*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
95} 102}
96__setup("nosoftlockup", nosoftlockup_setup); 103__setup("nosoftlockup", nosoftlockup_setup);
97/* */ 104/* */
105#ifdef CONFIG_SMP
106static int __init softlockup_all_cpu_backtrace_setup(char *str)
107{
108 sysctl_softlockup_all_cpu_backtrace =
109 !!simple_strtol(str, NULL, 0);
110 return 1;
111}
112__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
113#endif
98 114
99/* 115/*
100 * Hard-lockup warnings should be triggered after just a few seconds. Soft- 116 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
271 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); 287 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
272 struct pt_regs *regs = get_irq_regs(); 288 struct pt_regs *regs = get_irq_regs();
273 int duration; 289 int duration;
290 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
274 291
275 /* kick the hardlockup detector */ 292 /* kick the hardlockup detector */
276 watchdog_interrupt_count(); 293 watchdog_interrupt_count();
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
317 if (__this_cpu_read(soft_watchdog_warn) == true) 334 if (__this_cpu_read(soft_watchdog_warn) == true)
318 return HRTIMER_RESTART; 335 return HRTIMER_RESTART;
319 336
337 if (softlockup_all_cpu_backtrace) {
338 /* Prevent multiple soft-lockup reports if one cpu is already
339 * engaged in dumping cpu back traces
340 */
341 if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
342 /* Someone else will report us. Let's give up */
343 __this_cpu_write(soft_watchdog_warn, true);
344 return HRTIMER_RESTART;
345 }
346 }
347
320 printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 348 printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
321 smp_processor_id(), duration, 349 smp_processor_id(), duration,
322 current->comm, task_pid_nr(current)); 350 current->comm, task_pid_nr(current));
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
327 else 355 else
328 dump_stack(); 356 dump_stack();
329 357
358 if (softlockup_all_cpu_backtrace) {
359 /* Avoid generating two back traces for current
360 * given that one is already made above
361 */
362 trigger_allbutself_cpu_backtrace();
363
364 clear_bit(0, &soft_lockup_nmi_warn);
365 /* Barrier to sync with other cpus */
366 smp_mb__after_atomic();
367 }
368
330 if (softlockup_panic) 369 if (softlockup_panic)
331 panic("softlockup: hung tasks"); 370 panic("softlockup: hung tasks");
332 __this_cpu_write(soft_watchdog_warn, true); 371 __this_cpu_write(soft_watchdog_warn, true);
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
527 int cpu; 566 int cpu;
528 567
529 get_online_cpus(); 568 get_online_cpus();
530 preempt_disable();
531 for_each_online_cpu(cpu) 569 for_each_online_cpu(cpu)
532 update_timers(cpu); 570 update_timers(cpu);
533 preempt_enable();
534 put_online_cpus(); 571 put_online_cpus();
535} 572}
536 573
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6203d2900877..35974ac69600 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
3284 } 3284 }
3285 } 3285 }
3286 3286
3287 dev_set_uevent_suppress(&wq_dev->dev, false);
3287 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); 3288 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
3288 return 0; 3289 return 0;
3289} 3290}
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
4879 BUG_ON(!tbl); 4880 BUG_ON(!tbl);
4880 4881
4881 for_each_node(node) 4882 for_each_node(node)
4882 BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, 4883 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
4883 node_online(node) ? node : NUMA_NO_NODE)); 4884 node_online(node) ? node : NUMA_NO_NODE));
4884 4885
4885 for_each_possible_cpu(cpu) { 4886 for_each_possible_cpu(cpu) {