diff options
| author | Ingo Molnar <mingo@kernel.org> | 2014-07-17 05:45:29 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-07-17 05:45:29 -0400 |
| commit | b5e4111f027c4be85dbe97e090530d03c55c4cf4 (patch) | |
| tree | 11e0a37cb59314f4e9a7b2810124a4a7a33140e5 /kernel | |
| parent | 72d5305dcb3637913c2c37e847a4de9028e49244 (diff) | |
| parent | 9de8033f1bbcce5ed23fe5da9ca1a5060207f7ed (diff) | |
Merge branch 'locking/urgent' into locking/core, before applying larger changes and to refresh the branch with fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.locks | 9 | ||||
| -rw-r--r-- | kernel/cgroup.c | 58 | ||||
| -rw-r--r-- | kernel/context_tracking.c | 3 | ||||
| -rw-r--r-- | kernel/cpuset.c | 20 | ||||
| -rw-r--r-- | kernel/events/core.c | 37 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 6 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/irq/irqdesc.c | 4 | ||||
| -rw-r--r-- | kernel/kexec.c | 1 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.c | 64 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 9 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 2 | ||||
| -rw-r--r-- | kernel/locking/rwsem-spinlock.c | 28 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 16 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 2 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 37 | ||||
| -rw-r--r-- | kernel/power/main.c | 6 | ||||
| -rw-r--r-- | kernel/power/user.c | 3 | ||||
| -rw-r--r-- | kernel/printk/printk.c | 44 | ||||
| -rw-r--r-- | kernel/smp.c | 57 | ||||
| -rw-r--r-- | kernel/sysctl.c | 18 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 46 | ||||
| -rw-r--r-- | kernel/tracepoint.c | 26 | ||||
| -rw-r--r-- | kernel/watchdog.c | 41 | ||||
| -rw-r--r-- | kernel/workqueue.c | 3 |
26 files changed, 397 insertions, 147 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 35536d9c0964..76768ee812b2 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
| @@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE | |||
| 220 | 220 | ||
| 221 | endif | 221 | endif |
| 222 | 222 | ||
| 223 | config ARCH_SUPPORTS_ATOMIC_RMW | ||
| 224 | bool | ||
| 225 | |||
| 223 | config MUTEX_SPIN_ON_OWNER | 226 | config MUTEX_SPIN_ON_OWNER |
| 224 | def_bool y | 227 | def_bool y |
| 225 | depends on SMP && !DEBUG_MUTEXES | 228 | depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW |
| 229 | |||
| 230 | config RWSEM_SPIN_ON_OWNER | ||
| 231 | def_bool y | ||
| 232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | ||
| 226 | 233 | ||
| 227 | config ARCH_USE_QUEUE_RWLOCK | 234 | config ARCH_USE_QUEUE_RWLOCK |
| 228 | bool | 235 | bool |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7868fc3c0bc5..70776aec2562 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1648 | int flags, const char *unused_dev_name, | 1648 | int flags, const char *unused_dev_name, |
| 1649 | void *data) | 1649 | void *data) |
| 1650 | { | 1650 | { |
| 1651 | struct super_block *pinned_sb = NULL; | ||
| 1652 | struct cgroup_subsys *ss; | ||
| 1651 | struct cgroup_root *root; | 1653 | struct cgroup_root *root; |
| 1652 | struct cgroup_sb_opts opts; | 1654 | struct cgroup_sb_opts opts; |
| 1653 | struct dentry *dentry; | 1655 | struct dentry *dentry; |
| 1654 | int ret; | 1656 | int ret; |
| 1657 | int i; | ||
| 1655 | bool new_sb; | 1658 | bool new_sb; |
| 1656 | 1659 | ||
| 1657 | /* | 1660 | /* |
| @@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1677 | goto out_unlock; | 1680 | goto out_unlock; |
| 1678 | } | 1681 | } |
| 1679 | 1682 | ||
| 1683 | /* | ||
| 1684 | * Destruction of cgroup root is asynchronous, so subsystems may | ||
| 1685 | * still be dying after the previous unmount. Let's drain the | ||
| 1686 | * dying subsystems. We just need to ensure that the ones | ||
| 1687 | * unmounted previously finish dying and don't care about new ones | ||
| 1688 | * starting. Testing ref liveliness is good enough. | ||
| 1689 | */ | ||
| 1690 | for_each_subsys(ss, i) { | ||
| 1691 | if (!(opts.subsys_mask & (1 << i)) || | ||
| 1692 | ss->root == &cgrp_dfl_root) | ||
| 1693 | continue; | ||
| 1694 | |||
| 1695 | if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { | ||
| 1696 | mutex_unlock(&cgroup_mutex); | ||
| 1697 | msleep(10); | ||
| 1698 | ret = restart_syscall(); | ||
| 1699 | goto out_free; | ||
| 1700 | } | ||
| 1701 | cgroup_put(&ss->root->cgrp); | ||
| 1702 | } | ||
| 1703 | |||
| 1680 | for_each_root(root) { | 1704 | for_each_root(root) { |
| 1681 | bool name_match = false; | 1705 | bool name_match = false; |
| 1682 | 1706 | ||
| @@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1717 | } | 1741 | } |
| 1718 | 1742 | ||
| 1719 | /* | 1743 | /* |
| 1720 | * A root's lifetime is governed by its root cgroup. | 1744 | * We want to reuse @root whose lifetime is governed by its |
| 1721 | * tryget_live failure indicate that the root is being | 1745 | * ->cgrp. Let's check whether @root is alive and keep it |
| 1722 | * destroyed. Wait for destruction to complete so that the | 1746 | * that way. As cgroup_kill_sb() can happen anytime, we |
| 1723 | * subsystems are free. We can use wait_queue for the wait | 1747 | * want to block it by pinning the sb so that @root doesn't |
| 1724 | * but this path is super cold. Let's just sleep for a bit | 1748 | * get killed before mount is complete. |
| 1725 | * and retry. | 1749 | * |
| 1750 | * With the sb pinned, tryget_live can reliably indicate | ||
| 1751 | * whether @root can be reused. If it's being killed, | ||
| 1752 | * drain it. We can use wait_queue for the wait but this | ||
| 1753 | * path is super cold. Let's just sleep a bit and retry. | ||
| 1726 | */ | 1754 | */ |
| 1727 | if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | 1755 | pinned_sb = kernfs_pin_sb(root->kf_root, NULL); |
| 1756 | if (IS_ERR(pinned_sb) || | ||
| 1757 | !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | ||
| 1728 | mutex_unlock(&cgroup_mutex); | 1758 | mutex_unlock(&cgroup_mutex); |
| 1759 | if (!IS_ERR_OR_NULL(pinned_sb)) | ||
| 1760 | deactivate_super(pinned_sb); | ||
| 1729 | msleep(10); | 1761 | msleep(10); |
| 1730 | ret = restart_syscall(); | 1762 | ret = restart_syscall(); |
| 1731 | goto out_free; | 1763 | goto out_free; |
| @@ -1770,6 +1802,16 @@ out_free: | |||
| 1770 | CGROUP_SUPER_MAGIC, &new_sb); | 1802 | CGROUP_SUPER_MAGIC, &new_sb); |
| 1771 | if (IS_ERR(dentry) || !new_sb) | 1803 | if (IS_ERR(dentry) || !new_sb) |
| 1772 | cgroup_put(&root->cgrp); | 1804 | cgroup_put(&root->cgrp); |
| 1805 | |||
| 1806 | /* | ||
| 1807 | * If @pinned_sb, we're reusing an existing root and holding an | ||
| 1808 | * extra ref on its sb. Mount is complete. Put the extra ref. | ||
| 1809 | */ | ||
| 1810 | if (pinned_sb) { | ||
| 1811 | WARN_ON(new_sb); | ||
| 1812 | deactivate_super(pinned_sb); | ||
| 1813 | } | ||
| 1814 | |||
| 1773 | return dentry; | 1815 | return dentry; |
| 1774 | } | 1816 | } |
| 1775 | 1817 | ||
| @@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css) | |||
| 3328 | 3370 | ||
| 3329 | rcu_read_lock(); | 3371 | rcu_read_lock(); |
| 3330 | css_for_each_child(child, css) { | 3372 | css_for_each_child(child, css) { |
| 3331 | if (css->flags & CSS_ONLINE) { | 3373 | if (child->flags & CSS_ONLINE) { |
| 3332 | ret = true; | 3374 | ret = true; |
| 3333 | break; | 3375 | break; |
| 3334 | } | 3376 | } |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 019d45008448..5664985c46a0 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
| 21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
| 22 | #include <linux/kprobes.h> | ||
| 22 | 23 | ||
| 23 | #define CREATE_TRACE_POINTS | 24 | #define CREATE_TRACE_POINTS |
| 24 | #include <trace/events/context_tracking.h> | 25 | #include <trace/events/context_tracking.h> |
| @@ -104,6 +105,7 @@ void context_tracking_user_enter(void) | |||
| 104 | } | 105 | } |
| 105 | local_irq_restore(flags); | 106 | local_irq_restore(flags); |
| 106 | } | 107 | } |
| 108 | NOKPROBE_SYMBOL(context_tracking_user_enter); | ||
| 107 | 109 | ||
| 108 | #ifdef CONFIG_PREEMPT | 110 | #ifdef CONFIG_PREEMPT |
| 109 | /** | 111 | /** |
| @@ -181,6 +183,7 @@ void context_tracking_user_exit(void) | |||
| 181 | } | 183 | } |
| 182 | local_irq_restore(flags); | 184 | local_irq_restore(flags); |
| 183 | } | 185 | } |
| 186 | NOKPROBE_SYMBOL(context_tracking_user_exit); | ||
| 184 | 187 | ||
| 185 | /** | 188 | /** |
| 186 | * __context_tracking_task_switch - context switch the syscall callbacks | 189 | * __context_tracking_task_switch - context switch the syscall callbacks |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c696224..116a4164720a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1181,7 +1181,13 @@ done: | |||
| 1181 | 1181 | ||
| 1182 | int current_cpuset_is_being_rebound(void) | 1182 | int current_cpuset_is_being_rebound(void) |
| 1183 | { | 1183 | { |
| 1184 | return task_cs(current) == cpuset_being_rebound; | 1184 | int ret; |
| 1185 | |||
| 1186 | rcu_read_lock(); | ||
| 1187 | ret = task_cs(current) == cpuset_being_rebound; | ||
| 1188 | rcu_read_unlock(); | ||
| 1189 | |||
| 1190 | return ret; | ||
| 1185 | } | 1191 | } |
| 1186 | 1192 | ||
| 1187 | static int update_relax_domain_level(struct cpuset *cs, s64 val) | 1193 | static int update_relax_domain_level(struct cpuset *cs, s64 val) |
| @@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
| 1617 | * resources, wait for the previously scheduled operations before | 1623 | * resources, wait for the previously scheduled operations before |
| 1618 | * proceeding, so that we don't end up keep removing tasks added | 1624 | * proceeding, so that we don't end up keep removing tasks added |
| 1619 | * after execution capability is restored. | 1625 | * after execution capability is restored. |
| 1626 | * | ||
| 1627 | * cpuset_hotplug_work calls back into cgroup core via | ||
| 1628 | * cgroup_transfer_tasks() and waiting for it from a cgroupfs | ||
| 1629 | * operation like this one can lead to a deadlock through kernfs | ||
| 1630 | * active_ref protection. Let's break the protection. Losing the | ||
| 1631 | * protection is okay as we check whether @cs is online after | ||
| 1632 | * grabbing cpuset_mutex anyway. This only happens on the legacy | ||
| 1633 | * hierarchies. | ||
| 1620 | */ | 1634 | */ |
| 1635 | css_get(&cs->css); | ||
| 1636 | kernfs_break_active_protection(of->kn); | ||
| 1621 | flush_work(&cpuset_hotplug_work); | 1637 | flush_work(&cpuset_hotplug_work); |
| 1622 | 1638 | ||
| 1623 | mutex_lock(&cpuset_mutex); | 1639 | mutex_lock(&cpuset_mutex); |
| @@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
| 1645 | free_trial_cpuset(trialcs); | 1661 | free_trial_cpuset(trialcs); |
| 1646 | out_unlock: | 1662 | out_unlock: |
| 1647 | mutex_unlock(&cpuset_mutex); | 1663 | mutex_unlock(&cpuset_mutex); |
| 1664 | kernfs_unbreak_active_protection(of->kn); | ||
| 1665 | css_put(&cs->css); | ||
| 1648 | return retval ?: nbytes; | 1666 | return retval ?: nbytes; |
| 1649 | } | 1667 | } |
| 1650 | 1668 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 5fa58e4cffac..a33d9a2bcbd7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/mm_types.h> | 40 | #include <linux/mm_types.h> |
| 41 | #include <linux/cgroup.h> | 41 | #include <linux/cgroup.h> |
| 42 | #include <linux/module.h> | 42 | #include <linux/module.h> |
| 43 | #include <linux/mman.h> | ||
| 43 | 44 | ||
| 44 | #include "internal.h" | 45 | #include "internal.h" |
| 45 | 46 | ||
| @@ -5128,6 +5129,7 @@ struct perf_mmap_event { | |||
| 5128 | int maj, min; | 5129 | int maj, min; |
| 5129 | u64 ino; | 5130 | u64 ino; |
| 5130 | u64 ino_generation; | 5131 | u64 ino_generation; |
| 5132 | u32 prot, flags; | ||
| 5131 | 5133 | ||
| 5132 | struct { | 5134 | struct { |
| 5133 | struct perf_event_header header; | 5135 | struct perf_event_header header; |
| @@ -5169,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 5169 | mmap_event->event_id.header.size += sizeof(mmap_event->min); | 5171 | mmap_event->event_id.header.size += sizeof(mmap_event->min); |
| 5170 | mmap_event->event_id.header.size += sizeof(mmap_event->ino); | 5172 | mmap_event->event_id.header.size += sizeof(mmap_event->ino); |
| 5171 | mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); | 5173 | mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); |
| 5174 | mmap_event->event_id.header.size += sizeof(mmap_event->prot); | ||
| 5175 | mmap_event->event_id.header.size += sizeof(mmap_event->flags); | ||
| 5172 | } | 5176 | } |
| 5173 | 5177 | ||
| 5174 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | 5178 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); |
| @@ -5187,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 5187 | perf_output_put(&handle, mmap_event->min); | 5191 | perf_output_put(&handle, mmap_event->min); |
| 5188 | perf_output_put(&handle, mmap_event->ino); | 5192 | perf_output_put(&handle, mmap_event->ino); |
| 5189 | perf_output_put(&handle, mmap_event->ino_generation); | 5193 | perf_output_put(&handle, mmap_event->ino_generation); |
| 5194 | perf_output_put(&handle, mmap_event->prot); | ||
| 5195 | perf_output_put(&handle, mmap_event->flags); | ||
| 5190 | } | 5196 | } |
| 5191 | 5197 | ||
| 5192 | __output_copy(&handle, mmap_event->file_name, | 5198 | __output_copy(&handle, mmap_event->file_name, |
| @@ -5205,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
| 5205 | struct file *file = vma->vm_file; | 5211 | struct file *file = vma->vm_file; |
| 5206 | int maj = 0, min = 0; | 5212 | int maj = 0, min = 0; |
| 5207 | u64 ino = 0, gen = 0; | 5213 | u64 ino = 0, gen = 0; |
| 5214 | u32 prot = 0, flags = 0; | ||
| 5208 | unsigned int size; | 5215 | unsigned int size; |
| 5209 | char tmp[16]; | 5216 | char tmp[16]; |
| 5210 | char *buf = NULL; | 5217 | char *buf = NULL; |
| @@ -5235,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
| 5235 | gen = inode->i_generation; | 5242 | gen = inode->i_generation; |
| 5236 | maj = MAJOR(dev); | 5243 | maj = MAJOR(dev); |
| 5237 | min = MINOR(dev); | 5244 | min = MINOR(dev); |
| 5245 | |||
| 5246 | if (vma->vm_flags & VM_READ) | ||
| 5247 | prot |= PROT_READ; | ||
| 5248 | if (vma->vm_flags & VM_WRITE) | ||
| 5249 | prot |= PROT_WRITE; | ||
| 5250 | if (vma->vm_flags & VM_EXEC) | ||
| 5251 | prot |= PROT_EXEC; | ||
| 5252 | |||
| 5253 | if (vma->vm_flags & VM_MAYSHARE) | ||
| 5254 | flags = MAP_SHARED; | ||
| 5255 | else | ||
| 5256 | flags = MAP_PRIVATE; | ||
| 5257 | |||
| 5258 | if (vma->vm_flags & VM_DENYWRITE) | ||
| 5259 | flags |= MAP_DENYWRITE; | ||
| 5260 | if (vma->vm_flags & VM_MAYEXEC) | ||
| 5261 | flags |= MAP_EXECUTABLE; | ||
| 5262 | if (vma->vm_flags & VM_LOCKED) | ||
| 5263 | flags |= MAP_LOCKED; | ||
| 5264 | if (vma->vm_flags & VM_HUGETLB) | ||
| 5265 | flags |= MAP_HUGETLB; | ||
| 5266 | |||
| 5238 | goto got_name; | 5267 | goto got_name; |
| 5239 | } else { | 5268 | } else { |
| 5240 | name = (char *)arch_vma_name(vma); | 5269 | name = (char *)arch_vma_name(vma); |
| @@ -5275,6 +5304,8 @@ got_name: | |||
| 5275 | mmap_event->min = min; | 5304 | mmap_event->min = min; |
| 5276 | mmap_event->ino = ino; | 5305 | mmap_event->ino = ino; |
| 5277 | mmap_event->ino_generation = gen; | 5306 | mmap_event->ino_generation = gen; |
| 5307 | mmap_event->prot = prot; | ||
| 5308 | mmap_event->flags = flags; | ||
| 5278 | 5309 | ||
| 5279 | if (!(vma->vm_flags & VM_EXEC)) | 5310 | if (!(vma->vm_flags & VM_EXEC)) |
| 5280 | mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; | 5311 | mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; |
| @@ -5315,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
| 5315 | /* .min (attr_mmap2 only) */ | 5346 | /* .min (attr_mmap2 only) */ |
| 5316 | /* .ino (attr_mmap2 only) */ | 5347 | /* .ino (attr_mmap2 only) */ |
| 5317 | /* .ino_generation (attr_mmap2 only) */ | 5348 | /* .ino_generation (attr_mmap2 only) */ |
| 5349 | /* .prot (attr_mmap2 only) */ | ||
| 5350 | /* .flags (attr_mmap2 only) */ | ||
| 5318 | }; | 5351 | }; |
| 5319 | 5352 | ||
| 5320 | perf_event_mmap_event(&mmap_event); | 5353 | perf_event_mmap_event(&mmap_event); |
| @@ -6897,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
| 6897 | if (ret) | 6930 | if (ret) |
| 6898 | return -EFAULT; | 6931 | return -EFAULT; |
| 6899 | 6932 | ||
| 6900 | /* disabled for now */ | ||
| 6901 | if (attr->mmap2) | ||
| 6902 | return -EINVAL; | ||
| 6903 | |||
| 6904 | if (attr->__reserved_1) | 6933 | if (attr->__reserved_1) |
| 6905 | return -EINVAL; | 6934 | return -EINVAL; |
| 6906 | 6935 | ||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c445e392e93f..6f3254e8c137 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u | |||
| 846 | { | 846 | { |
| 847 | int err; | 847 | int err; |
| 848 | 848 | ||
| 849 | if (!consumer_del(uprobe, uc)) /* WARN? */ | 849 | if (WARN_ON(!consumer_del(uprobe, uc))) |
| 850 | return; | 850 | return; |
| 851 | 851 | ||
| 852 | err = register_for_each_vma(uprobe, NULL); | 852 | err = register_for_each_vma(uprobe, NULL); |
| @@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset, | |||
| 927 | int ret = -ENOENT; | 927 | int ret = -ENOENT; |
| 928 | 928 | ||
| 929 | uprobe = find_uprobe(inode, offset); | 929 | uprobe = find_uprobe(inode, offset); |
| 930 | if (!uprobe) | 930 | if (WARN_ON(!uprobe)) |
| 931 | return ret; | 931 | return ret; |
| 932 | 932 | ||
| 933 | down_write(&uprobe->register_rwsem); | 933 | down_write(&uprobe->register_rwsem); |
| @@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume | |||
| 952 | struct uprobe *uprobe; | 952 | struct uprobe *uprobe; |
| 953 | 953 | ||
| 954 | uprobe = find_uprobe(inode, offset); | 954 | uprobe = find_uprobe(inode, offset); |
| 955 | if (!uprobe) | 955 | if (WARN_ON(!uprobe)) |
| 956 | return; | 956 | return; |
| 957 | 957 | ||
| 958 | down_write(&uprobe->register_rwsem); | 958 | down_write(&uprobe->register_rwsem); |
diff --git a/kernel/fork.c b/kernel/fork.c index d2799d1fc952..6a13c46cd87d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1487 | 1487 | ||
| 1488 | total_forks++; | 1488 | total_forks++; |
| 1489 | spin_unlock(¤t->sighand->siglock); | 1489 | spin_unlock(¤t->sighand->siglock); |
| 1490 | syscall_tracepoint_update(p); | ||
| 1490 | write_unlock_irq(&tasklist_lock); | 1491 | write_unlock_irq(&tasklist_lock); |
| 1492 | |||
| 1491 | proc_fork_connector(p); | 1493 | proc_fork_connector(p); |
| 1492 | cgroup_post_fork(p); | 1494 | cgroup_post_fork(p); |
| 1493 | if (clone_flags & CLONE_THREAD) | 1495 | if (clone_flags & CLONE_THREAD) |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 7339e42a85ab..1487a123db5c 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
| @@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs); | |||
| 455 | */ | 455 | */ |
| 456 | void irq_free_hwirqs(unsigned int from, int cnt) | 456 | void irq_free_hwirqs(unsigned int from, int cnt) |
| 457 | { | 457 | { |
| 458 | int i; | 458 | int i, j; |
| 459 | 459 | ||
| 460 | for (i = from; cnt > 0; i++, cnt--) { | 460 | for (i = from, j = cnt; j > 0; i++, j--) { |
| 461 | irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); | 461 | irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); |
| 462 | arch_teardown_hwirq(i); | 462 | arch_teardown_hwirq(i); |
| 463 | } | 463 | } |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 6748688813d0..369f41a94124 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
| 1617 | #ifdef CONFIG_MEMORY_FAILURE | 1617 | #ifdef CONFIG_MEMORY_FAILURE |
| 1618 | VMCOREINFO_NUMBER(PG_hwpoison); | 1618 | VMCOREINFO_NUMBER(PG_hwpoison); |
| 1619 | #endif | 1619 | #endif |
| 1620 | VMCOREINFO_NUMBER(PG_head_mask); | ||
| 1620 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | 1621 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); |
| 1621 | 1622 | ||
| 1622 | arch_crash_save_vmcoreinfo(); | 1623 | arch_crash_save_vmcoreinfo(); |
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 838dc9e00669..be9ee1559fca 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c | |||
| @@ -14,21 +14,47 @@ | |||
| 14 | * called from interrupt context and we have preemption disabled while | 14 | * called from interrupt context and we have preemption disabled while |
| 15 | * spinning. | 15 | * spinning. |
| 16 | */ | 16 | */ |
| 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); |
| 18 | |||
| 19 | /* | ||
| 20 | * We use the value 0 to represent "no CPU", thus the encoded value | ||
| 21 | * will be the CPU number incremented by 1. | ||
| 22 | */ | ||
| 23 | static inline int encode_cpu(int cpu_nr) | ||
| 24 | { | ||
| 25 | return cpu_nr + 1; | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) | ||
| 29 | { | ||
| 30 | int cpu_nr = encoded_cpu_val - 1; | ||
| 31 | |||
| 32 | return per_cpu_ptr(&osq_node, cpu_nr); | ||
| 33 | } | ||
| 18 | 34 | ||
| 19 | /* | 35 | /* |
| 20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | 36 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. |
| 21 | * Can return NULL in case we were the last queued and we updated @lock instead. | 37 | * Can return NULL in case we were the last queued and we updated @lock instead. |
| 22 | */ | 38 | */ |
| 23 | static inline struct optimistic_spin_queue * | 39 | static inline struct optimistic_spin_node * |
| 24 | osq_wait_next(struct optimistic_spin_queue **lock, | 40 | osq_wait_next(struct optimistic_spin_queue *lock, |
| 25 | struct optimistic_spin_queue *node, | 41 | struct optimistic_spin_node *node, |
| 26 | struct optimistic_spin_queue *prev) | 42 | struct optimistic_spin_node *prev) |
| 27 | { | 43 | { |
| 28 | struct optimistic_spin_queue *next = NULL; | 44 | struct optimistic_spin_node *next = NULL; |
| 45 | int curr = encode_cpu(smp_processor_id()); | ||
| 46 | int old; | ||
| 47 | |||
| 48 | /* | ||
| 49 | * If there is a prev node in queue, then the 'old' value will be | ||
| 50 | * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if | ||
| 51 | * we're currently last in queue, then the queue will then become empty. | ||
| 52 | */ | ||
| 53 | old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; | ||
| 29 | 54 | ||
| 30 | for (;;) { | 55 | for (;;) { |
| 31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | 56 | if (atomic_read(&lock->tail) == curr && |
| 57 | atomic_cmpxchg(&lock->tail, curr, old) == curr) { | ||
| 32 | /* | 58 | /* |
| 33 | * We were the last queued, we moved @lock back. @prev | 59 | * We were the last queued, we moved @lock back. @prev |
| 34 | * will now observe @lock and will complete its | 60 | * will now observe @lock and will complete its |
| @@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock, | |||
| 59 | return next; | 85 | return next; |
| 60 | } | 86 | } |
| 61 | 87 | ||
| 62 | bool osq_lock(struct optimistic_spin_queue **lock) | 88 | bool osq_lock(struct optimistic_spin_queue *lock) |
| 63 | { | 89 | { |
| 64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | 90 | struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); |
| 65 | struct optimistic_spin_queue *prev, *next; | 91 | struct optimistic_spin_node *prev, *next; |
| 92 | int curr = encode_cpu(smp_processor_id()); | ||
| 93 | int old; | ||
| 66 | 94 | ||
| 67 | node->locked = 0; | 95 | node->locked = 0; |
| 68 | node->next = NULL; | 96 | node->next = NULL; |
| 97 | node->cpu = curr; | ||
| 69 | 98 | ||
| 70 | node->prev = prev = xchg(lock, node); | 99 | old = atomic_xchg(&lock->tail, curr); |
| 71 | if (likely(prev == NULL)) | 100 | if (old == OSQ_UNLOCKED_VAL) |
| 72 | return true; | 101 | return true; |
| 73 | 102 | ||
| 103 | prev = decode_cpu(old); | ||
| 104 | node->prev = prev; | ||
| 74 | ACCESS_ONCE(prev->next) = node; | 105 | ACCESS_ONCE(prev->next) = node; |
| 75 | 106 | ||
| 76 | /* | 107 | /* |
| @@ -149,20 +180,21 @@ unqueue: | |||
| 149 | return false; | 180 | return false; |
| 150 | } | 181 | } |
| 151 | 182 | ||
| 152 | void osq_unlock(struct optimistic_spin_queue **lock) | 183 | void osq_unlock(struct optimistic_spin_queue *lock) |
| 153 | { | 184 | { |
| 154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | 185 | struct optimistic_spin_node *node, *next; |
| 155 | struct optimistic_spin_queue *next; | 186 | int curr = encode_cpu(smp_processor_id()); |
| 156 | 187 | ||
| 157 | /* | 188 | /* |
| 158 | * Fast path for the uncontended case. | 189 | * Fast path for the uncontended case. |
| 159 | */ | 190 | */ |
| 160 | if (likely(cmpxchg(lock, node, NULL) == node)) | 191 | if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) |
| 161 | return; | 192 | return; |
| 162 | 193 | ||
| 163 | /* | 194 | /* |
| 164 | * Second most likely case. | 195 | * Second most likely case. |
| 165 | */ | 196 | */ |
| 197 | node = this_cpu_ptr(&osq_node); | ||
| 166 | next = xchg(&node->next, NULL); | 198 | next = xchg(&node->next, NULL); |
| 167 | if (next) { | 199 | if (next) { |
| 168 | ACCESS_ONCE(next->locked) = 1; | 200 | ACCESS_ONCE(next->locked) = 1; |
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index a2dbac4aca6b..74356dc0ce29 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 118 | * mutex_lock()/rwsem_down_{read,write}() etc. | 118 | * mutex_lock()/rwsem_down_{read,write}() etc. |
| 119 | */ | 119 | */ |
| 120 | 120 | ||
| 121 | struct optimistic_spin_queue { | 121 | struct optimistic_spin_node { |
| 122 | struct optimistic_spin_queue *next, *prev; | 122 | struct optimistic_spin_node *next, *prev; |
| 123 | int locked; /* 1 if lock acquired */ | 123 | int locked; /* 1 if lock acquired */ |
| 124 | int cpu; /* encoded CPU # value */ | ||
| 124 | }; | 125 | }; |
| 125 | 126 | ||
| 126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | 127 | extern bool osq_lock(struct optimistic_spin_queue *lock); |
| 127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | 128 | extern void osq_unlock(struct optimistic_spin_queue *lock); |
| 128 | 129 | ||
| 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ | 130 | #endif /* __LINUX_MCS_SPINLOCK_H */ |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 11b103d87b27..d3100521388c 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -54,7 +54,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
| 54 | INIT_LIST_HEAD(&lock->wait_list); | 54 | INIT_LIST_HEAD(&lock->wait_list); |
| 55 | mutex_clear_owner(lock); | 55 | mutex_clear_owner(lock); |
| 56 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 56 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 57 | lock->osq = NULL; | 57 | osq_lock_init(&lock->osq); |
| 58 | #endif | 58 | #endif |
| 59 | 59 | ||
| 60 | debug_mutex_init(lock, name, key); | 60 | debug_mutex_init(lock, name, key); |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 9be8a9144978..2c93571162cb 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c | |||
| @@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem) | |||
| 26 | unsigned long flags; | 26 | unsigned long flags; |
| 27 | 27 | ||
| 28 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { | 28 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { |
| 29 | ret = (sem->activity != 0); | 29 | ret = (sem->count != 0); |
| 30 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 30 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 31 | } | 31 | } |
| 32 | return ret; | 32 | return ret; |
| @@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, | |||
| 46 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | 46 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
| 47 | lockdep_init_map(&sem->dep_map, name, key, 0); | 47 | lockdep_init_map(&sem->dep_map, name, key, 0); |
| 48 | #endif | 48 | #endif |
| 49 | sem->activity = 0; | 49 | sem->count = 0; |
| 50 | raw_spin_lock_init(&sem->wait_lock); | 50 | raw_spin_lock_init(&sem->wait_lock); |
| 51 | INIT_LIST_HEAD(&sem->wait_list); | 51 | INIT_LIST_HEAD(&sem->wait_list); |
| 52 | } | 52 | } |
| @@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | |||
| 95 | waiter = list_entry(next, struct rwsem_waiter, list); | 95 | waiter = list_entry(next, struct rwsem_waiter, list); |
| 96 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | 96 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); |
| 97 | 97 | ||
| 98 | sem->activity += woken; | 98 | sem->count += woken; |
| 99 | 99 | ||
| 100 | out: | 100 | out: |
| 101 | return sem; | 101 | return sem; |
| @@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
| 126 | 126 | ||
| 127 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 127 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 128 | 128 | ||
| 129 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | 129 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { |
| 130 | /* granted */ | 130 | /* granted */ |
| 131 | sem->activity++; | 131 | sem->count++; |
| 132 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 132 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 133 | goto out; | 133 | goto out; |
| 134 | } | 134 | } |
| @@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem) | |||
| 170 | 170 | ||
| 171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 172 | 172 | ||
| 173 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | 173 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { |
| 174 | /* granted */ | 174 | /* granted */ |
| 175 | sem->activity++; | 175 | sem->count++; |
| 176 | ret = 1; | 176 | ret = 1; |
| 177 | } | 177 | } |
| 178 | 178 | ||
| @@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 206 | * itself into sleep and waiting for system woke it or someone | 206 | * itself into sleep and waiting for system woke it or someone |
| 207 | * else in the head of the wait list up. | 207 | * else in the head of the wait list up. |
| 208 | */ | 208 | */ |
| 209 | if (sem->activity == 0) | 209 | if (sem->count == 0) |
| 210 | break; | 210 | break; |
| 211 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 211 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 212 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 212 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 214 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 214 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 215 | } | 215 | } |
| 216 | /* got the lock */ | 216 | /* got the lock */ |
| 217 | sem->activity = -1; | 217 | sem->count = -1; |
| 218 | list_del(&waiter.list); | 218 | list_del(&waiter.list); |
| 219 | 219 | ||
| 220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem) | |||
| 235 | 235 | ||
| 236 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 236 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 237 | 237 | ||
| 238 | if (sem->activity == 0) { | 238 | if (sem->count == 0) { |
| 239 | /* got the lock */ | 239 | /* got the lock */ |
| 240 | sem->activity = -1; | 240 | sem->count = -1; |
| 241 | ret = 1; | 241 | ret = 1; |
| 242 | } | 242 | } |
| 243 | 243 | ||
| @@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem) | |||
| 255 | 255 | ||
| 256 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 256 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 257 | 257 | ||
| 258 | if (--sem->activity == 0 && !list_empty(&sem->wait_list)) | 258 | if (--sem->count == 0 && !list_empty(&sem->wait_list)) |
| 259 | sem = __rwsem_wake_one_writer(sem); | 259 | sem = __rwsem_wake_one_writer(sem); |
| 260 | 260 | ||
| 261 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 261 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem) | |||
| 270 | 270 | ||
| 271 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 271 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 272 | 272 | ||
| 273 | sem->activity = 0; | 273 | sem->count = 0; |
| 274 | if (!list_empty(&sem->wait_list)) | 274 | if (!list_empty(&sem->wait_list)) |
| 275 | sem = __rwsem_do_wake(sem, 1); | 275 | sem = __rwsem_do_wake(sem, 1); |
| 276 | 276 | ||
| @@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem) | |||
| 287 | 287 | ||
| 288 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 288 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 289 | 289 | ||
| 290 | sem->activity = 1; | 290 | sem->count = 1; |
| 291 | if (!list_empty(&sem->wait_list)) | 291 | if (!list_empty(&sem->wait_list)) |
| 292 | sem = __rwsem_do_wake(sem, 0); | 292 | sem = __rwsem_do_wake(sem, 0); |
| 293 | 293 | ||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index dacc32142fcc..a2391ac135c8 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, | |||
| 82 | sem->count = RWSEM_UNLOCKED_VALUE; | 82 | sem->count = RWSEM_UNLOCKED_VALUE; |
| 83 | raw_spin_lock_init(&sem->wait_lock); | 83 | raw_spin_lock_init(&sem->wait_lock); |
| 84 | INIT_LIST_HEAD(&sem->wait_list); | 84 | INIT_LIST_HEAD(&sem->wait_list); |
| 85 | #ifdef CONFIG_SMP | 85 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 86 | sem->owner = NULL; | 86 | sem->owner = NULL; |
| 87 | sem->osq = NULL; | 87 | osq_lock_init(&sem->osq); |
| 88 | #endif | 88 | #endif |
| 89 | } | 89 | } |
| 90 | 90 | ||
| @@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
| 262 | return false; | 262 | return false; |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | #ifdef CONFIG_SMP | 265 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 266 | /* | 266 | /* |
| 267 | * Try to acquire write lock before the writer has been put on wait queue. | 267 | * Try to acquire write lock before the writer has been put on wait queue. |
| 268 | */ | 268 | */ |
| @@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |||
| 285 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | 285 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
| 286 | { | 286 | { |
| 287 | struct task_struct *owner; | 287 | struct task_struct *owner; |
| 288 | bool on_cpu = true; | 288 | bool on_cpu = false; |
| 289 | 289 | ||
| 290 | if (need_resched()) | 290 | if (need_resched()) |
| 291 | return 0; | 291 | return false; |
| 292 | 292 | ||
| 293 | rcu_read_lock(); | 293 | rcu_read_lock(); |
| 294 | owner = ACCESS_ONCE(sem->owner); | 294 | owner = ACCESS_ONCE(sem->owner); |
| @@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | |||
| 297 | rcu_read_unlock(); | 297 | rcu_read_unlock(); |
| 298 | 298 | ||
| 299 | /* | 299 | /* |
| 300 | * If sem->owner is not set, the rwsem owner may have | 300 | * If sem->owner is not set, yet we have just recently entered the |
| 301 | * just acquired it and not set the owner yet or the rwsem | 301 | * slowpath, then there is a possibility reader(s) may have the lock. |
| 302 | * has been released. | 302 | * To be safe, avoid spinning in these situations. |
| 303 | */ | 303 | */ |
| 304 | return on_cpu; | 304 | return on_cpu; |
| 305 | } | 305 | } |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 42f806de49d4..e2d3bc7f03b4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include <linux/atomic.h> | 13 | #include <linux/atomic.h> |
| 14 | 14 | ||
| 15 | #if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) | 15 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) |
| 17 | { | 17 | { |
| 18 | sem->owner = current; | 18 | sem->owner = current; |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 49e0a20fd010..fcc2611d3f14 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | static int nocompress; | 36 | static int nocompress; |
| 37 | static int noresume; | 37 | static int noresume; |
| 38 | static int nohibernate; | ||
| 38 | static int resume_wait; | 39 | static int resume_wait; |
| 39 | static unsigned int resume_delay; | 40 | static unsigned int resume_delay; |
| 40 | static char resume_file[256] = CONFIG_PM_STD_PARTITION; | 41 | static char resume_file[256] = CONFIG_PM_STD_PARTITION; |
| @@ -62,6 +63,11 @@ bool freezer_test_done; | |||
| 62 | 63 | ||
| 63 | static const struct platform_hibernation_ops *hibernation_ops; | 64 | static const struct platform_hibernation_ops *hibernation_ops; |
| 64 | 65 | ||
| 66 | bool hibernation_available(void) | ||
| 67 | { | ||
| 68 | return (nohibernate == 0); | ||
| 69 | } | ||
| 70 | |||
| 65 | /** | 71 | /** |
| 66 | * hibernation_set_ops - Set the global hibernate operations. | 72 | * hibernation_set_ops - Set the global hibernate operations. |
| 67 | * @ops: Hibernation operations to use in subsequent hibernation transitions. | 73 | * @ops: Hibernation operations to use in subsequent hibernation transitions. |
| @@ -642,6 +648,11 @@ int hibernate(void) | |||
| 642 | { | 648 | { |
| 643 | int error; | 649 | int error; |
| 644 | 650 | ||
| 651 | if (!hibernation_available()) { | ||
| 652 | pr_debug("PM: Hibernation not available.\n"); | ||
| 653 | return -EPERM; | ||
| 654 | } | ||
| 655 | |||
| 645 | lock_system_sleep(); | 656 | lock_system_sleep(); |
| 646 | /* The snapshot device should not be opened while we're running */ | 657 | /* The snapshot device should not be opened while we're running */ |
| 647 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | 658 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
| @@ -734,7 +745,7 @@ static int software_resume(void) | |||
| 734 | /* | 745 | /* |
| 735 | * If the user said "noresume".. bail out early. | 746 | * If the user said "noresume".. bail out early. |
| 736 | */ | 747 | */ |
| 737 | if (noresume) | 748 | if (noresume || !hibernation_available()) |
| 738 | return 0; | 749 | return 0; |
| 739 | 750 | ||
| 740 | /* | 751 | /* |
| @@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 900 | int i; | 911 | int i; |
| 901 | char *start = buf; | 912 | char *start = buf; |
| 902 | 913 | ||
| 914 | if (!hibernation_available()) | ||
| 915 | return sprintf(buf, "[disabled]\n"); | ||
| 916 | |||
| 903 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { | 917 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { |
| 904 | if (!hibernation_modes[i]) | 918 | if (!hibernation_modes[i]) |
| 905 | continue; | 919 | continue; |
| @@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 934 | char *p; | 948 | char *p; |
| 935 | int mode = HIBERNATION_INVALID; | 949 | int mode = HIBERNATION_INVALID; |
| 936 | 950 | ||
| 951 | if (!hibernation_available()) | ||
| 952 | return -EPERM; | ||
| 953 | |||
| 937 | p = memchr(buf, '\n', n); | 954 | p = memchr(buf, '\n', n); |
| 938 | len = p ? p - buf : n; | 955 | len = p ? p - buf : n; |
| 939 | 956 | ||
| @@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str) | |||
| 1101 | noresume = 1; | 1118 | noresume = 1; |
| 1102 | else if (!strncmp(str, "nocompress", 10)) | 1119 | else if (!strncmp(str, "nocompress", 10)) |
| 1103 | nocompress = 1; | 1120 | nocompress = 1; |
| 1121 | else if (!strncmp(str, "no", 2)) { | ||
| 1122 | noresume = 1; | ||
| 1123 | nohibernate = 1; | ||
| 1124 | } | ||
| 1104 | return 1; | 1125 | return 1; |
| 1105 | } | 1126 | } |
| 1106 | 1127 | ||
| @@ -1125,9 +1146,23 @@ static int __init resumedelay_setup(char *str) | |||
| 1125 | return 1; | 1146 | return 1; |
| 1126 | } | 1147 | } |
| 1127 | 1148 | ||
| 1149 | static int __init nohibernate_setup(char *str) | ||
| 1150 | { | ||
| 1151 | noresume = 1; | ||
| 1152 | nohibernate = 1; | ||
| 1153 | return 1; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | static int __init kaslr_nohibernate_setup(char *str) | ||
| 1157 | { | ||
| 1158 | return nohibernate_setup(str); | ||
| 1159 | } | ||
| 1160 | |||
| 1128 | __setup("noresume", noresume_setup); | 1161 | __setup("noresume", noresume_setup); |
| 1129 | __setup("resume_offset=", resume_offset_setup); | 1162 | __setup("resume_offset=", resume_offset_setup); |
| 1130 | __setup("resume=", resume_setup); | 1163 | __setup("resume=", resume_setup); |
| 1131 | __setup("hibernate=", hibernate_setup); | 1164 | __setup("hibernate=", hibernate_setup); |
| 1132 | __setup("resumewait", resumewait_setup); | 1165 | __setup("resumewait", resumewait_setup); |
| 1133 | __setup("resumedelay=", resumedelay_setup); | 1166 | __setup("resumedelay=", resumedelay_setup); |
| 1167 | __setup("nohibernate", nohibernate_setup); | ||
| 1168 | __setup("kaslr", kaslr_nohibernate_setup); | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 573410d6647e..8e90f330f139 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 300 | s += sprintf(s,"%s ", pm_states[i].label); | 300 | s += sprintf(s,"%s ", pm_states[i].label); |
| 301 | 301 | ||
| 302 | #endif | 302 | #endif |
| 303 | #ifdef CONFIG_HIBERNATION | 303 | if (hibernation_available()) |
| 304 | s += sprintf(s, "%s\n", "disk"); | 304 | s += sprintf(s, "disk "); |
| 305 | #else | ||
| 306 | if (s != buf) | 305 | if (s != buf) |
| 307 | /* convert the last space to a newline */ | 306 | /* convert the last space to a newline */ |
| 308 | *(s-1) = '\n'; | 307 | *(s-1) = '\n'; |
| 309 | #endif | ||
| 310 | return (s - buf); | 308 | return (s - buf); |
| 311 | } | 309 | } |
| 312 | 310 | ||
diff --git a/kernel/power/user.c b/kernel/power/user.c index 98d357584cd6..526e8911460a 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 49 | struct snapshot_data *data; | 49 | struct snapshot_data *data; |
| 50 | int error; | 50 | int error; |
| 51 | 51 | ||
| 52 | if (!hibernation_available()) | ||
| 53 | return -EPERM; | ||
| 54 | |||
| 52 | lock_system_sleep(); | 55 | lock_system_sleep(); |
| 53 | 56 | ||
| 54 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | 57 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ea2d5f6962ed..13e839dbca07 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
| @@ -1416,9 +1416,10 @@ static int have_callable_console(void) | |||
| 1416 | /* | 1416 | /* |
| 1417 | * Can we actually use the console at this time on this cpu? | 1417 | * Can we actually use the console at this time on this cpu? |
| 1418 | * | 1418 | * |
| 1419 | * Console drivers may assume that per-cpu resources have been allocated. So | 1419 | * Console drivers may assume that per-cpu resources have |
| 1420 | * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't | 1420 | * been allocated. So unless they're explicitly marked as |
| 1421 | * call them until this CPU is officially up. | 1421 | * being able to cope (CON_ANYTIME) don't call them until |
| 1422 | * this CPU is officially up. | ||
| 1422 | */ | 1423 | */ |
| 1423 | static inline int can_use_console(unsigned int cpu) | 1424 | static inline int can_use_console(unsigned int cpu) |
| 1424 | { | 1425 | { |
| @@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu) | |||
| 1431 | * console_lock held, and 'console_locked' set) if it | 1432 | * console_lock held, and 'console_locked' set) if it |
| 1432 | * is successful, false otherwise. | 1433 | * is successful, false otherwise. |
| 1433 | */ | 1434 | */ |
| 1434 | static int console_trylock_for_printk(void) | 1435 | static int console_trylock_for_printk(unsigned int cpu) |
| 1435 | { | 1436 | { |
| 1436 | unsigned int cpu = smp_processor_id(); | ||
| 1437 | |||
| 1438 | if (!console_trylock()) | 1437 | if (!console_trylock()) |
| 1439 | return 0; | 1438 | return 0; |
| 1440 | /* | 1439 | /* |
| @@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1609 | */ | 1608 | */ |
| 1610 | if (!oops_in_progress && !lockdep_recursing(current)) { | 1609 | if (!oops_in_progress && !lockdep_recursing(current)) { |
| 1611 | recursion_bug = 1; | 1610 | recursion_bug = 1; |
| 1612 | local_irq_restore(flags); | 1611 | goto out_restore_irqs; |
| 1613 | return 0; | ||
| 1614 | } | 1612 | } |
| 1615 | zap_locks(); | 1613 | zap_locks(); |
| 1616 | } | 1614 | } |
| @@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1718 | 1716 | ||
| 1719 | logbuf_cpu = UINT_MAX; | 1717 | logbuf_cpu = UINT_MAX; |
| 1720 | raw_spin_unlock(&logbuf_lock); | 1718 | raw_spin_unlock(&logbuf_lock); |
| 1721 | lockdep_on(); | ||
| 1722 | local_irq_restore(flags); | ||
| 1723 | 1719 | ||
| 1724 | /* If called from the scheduler, we can not call up(). */ | 1720 | /* If called from the scheduler, we can not call up(). */ |
| 1725 | if (in_sched) | 1721 | if (!in_sched) { |
| 1726 | return printed_len; | 1722 | /* |
| 1727 | 1723 | * Try to acquire and then immediately release the console | |
| 1728 | /* | 1724 | * semaphore. The release will print out buffers and wake up |
| 1729 | * Disable preemption to avoid being preempted while holding | 1725 | * /dev/kmsg and syslog() users. |
| 1730 | * console_sem which would prevent anyone from printing to console | 1726 | */ |
| 1731 | */ | 1727 | if (console_trylock_for_printk(this_cpu)) |
| 1732 | preempt_disable(); | 1728 | console_unlock(); |
| 1733 | /* | 1729 | } |
| 1734 | * Try to acquire and then immediately release the console semaphore. | ||
| 1735 | * The release will print out buffers and wake up /dev/kmsg and syslog() | ||
| 1736 | * users. | ||
| 1737 | */ | ||
| 1738 | if (console_trylock_for_printk()) | ||
| 1739 | console_unlock(); | ||
| 1740 | preempt_enable(); | ||
| 1741 | 1730 | ||
| 1731 | lockdep_on(); | ||
| 1732 | out_restore_irqs: | ||
| 1733 | local_irq_restore(flags); | ||
| 1742 | return printed_len; | 1734 | return printed_len; |
| 1743 | } | 1735 | } |
| 1744 | EXPORT_SYMBOL(vprintk_emit); | 1736 | EXPORT_SYMBOL(vprintk_emit); |
diff --git a/kernel/smp.c b/kernel/smp.c index 306f8180b0d5..80c33f8de14f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); | |||
| 29 | 29 | ||
| 30 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); | 30 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); |
| 31 | 31 | ||
| 32 | static void flush_smp_call_function_queue(bool warn_cpu_offline); | ||
| 33 | |||
| 32 | static int | 34 | static int |
| 33 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | 35 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 34 | { | 36 | { |
| @@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 51 | #ifdef CONFIG_HOTPLUG_CPU | 53 | #ifdef CONFIG_HOTPLUG_CPU |
| 52 | case CPU_UP_CANCELED: | 54 | case CPU_UP_CANCELED: |
| 53 | case CPU_UP_CANCELED_FROZEN: | 55 | case CPU_UP_CANCELED_FROZEN: |
| 56 | /* Fall-through to the CPU_DEAD[_FROZEN] case. */ | ||
| 54 | 57 | ||
| 55 | case CPU_DEAD: | 58 | case CPU_DEAD: |
| 56 | case CPU_DEAD_FROZEN: | 59 | case CPU_DEAD_FROZEN: |
| 57 | free_cpumask_var(cfd->cpumask); | 60 | free_cpumask_var(cfd->cpumask); |
| 58 | free_percpu(cfd->csd); | 61 | free_percpu(cfd->csd); |
| 59 | break; | 62 | break; |
| 63 | |||
| 64 | case CPU_DYING: | ||
| 65 | case CPU_DYING_FROZEN: | ||
| 66 | /* | ||
| 67 | * The IPIs for the smp-call-function callbacks queued by other | ||
| 68 | * CPUs might arrive late, either due to hardware latencies or | ||
| 69 | * because this CPU disabled interrupts (inside stop-machine) | ||
| 70 | * before the IPIs were sent. So flush out any pending callbacks | ||
| 71 | * explicitly (without waiting for the IPIs to arrive), to | ||
| 72 | * ensure that the outgoing CPU doesn't go offline with work | ||
| 73 | * still pending. | ||
| 74 | */ | ||
| 75 | flush_smp_call_function_queue(false); | ||
| 76 | break; | ||
| 60 | #endif | 77 | #endif |
| 61 | }; | 78 | }; |
| 62 | 79 | ||
| @@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, | |||
| 177 | return 0; | 194 | return 0; |
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | /* | 197 | /** |
| 181 | * Invoked by arch to handle an IPI for call function single. Must be | 198 | * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks |
| 182 | * called from the arch with interrupts disabled. | 199 | * |
| 200 | * Invoked by arch to handle an IPI for call function single. | ||
| 201 | * Must be called with interrupts disabled. | ||
| 183 | */ | 202 | */ |
| 184 | void generic_smp_call_function_single_interrupt(void) | 203 | void generic_smp_call_function_single_interrupt(void) |
| 185 | { | 204 | { |
| 205 | flush_smp_call_function_queue(true); | ||
| 206 | } | ||
| 207 | |||
| 208 | /** | ||
| 209 | * flush_smp_call_function_queue - Flush pending smp-call-function callbacks | ||
| 210 | * | ||
| 211 | * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an | ||
| 212 | * offline CPU. Skip this check if set to 'false'. | ||
| 213 | * | ||
| 214 | * Flush any pending smp-call-function callbacks queued on this CPU. This is | ||
| 215 | * invoked by the generic IPI handler, as well as by a CPU about to go offline, | ||
| 216 | * to ensure that all pending IPI callbacks are run before it goes completely | ||
| 217 | * offline. | ||
| 218 | * | ||
| 219 | * Loop through the call_single_queue and run all the queued callbacks. | ||
| 220 | * Must be called with interrupts disabled. | ||
| 221 | */ | ||
| 222 | static void flush_smp_call_function_queue(bool warn_cpu_offline) | ||
| 223 | { | ||
| 224 | struct llist_head *head; | ||
| 186 | struct llist_node *entry; | 225 | struct llist_node *entry; |
| 187 | struct call_single_data *csd, *csd_next; | 226 | struct call_single_data *csd, *csd_next; |
| 188 | static bool warned; | 227 | static bool warned; |
| 189 | 228 | ||
| 190 | entry = llist_del_all(&__get_cpu_var(call_single_queue)); | 229 | WARN_ON(!irqs_disabled()); |
| 230 | |||
| 231 | head = &__get_cpu_var(call_single_queue); | ||
| 232 | entry = llist_del_all(head); | ||
| 191 | entry = llist_reverse_order(entry); | 233 | entry = llist_reverse_order(entry); |
| 192 | 234 | ||
| 193 | /* | 235 | /* There shouldn't be any pending callbacks on an offline CPU. */ |
| 194 | * Shouldn't receive this interrupt on a cpu that is not yet online. | 236 | if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && |
| 195 | */ | 237 | !warned && !llist_empty(head))) { |
| 196 | if (unlikely(!cpu_online(smp_processor_id()) && !warned)) { | ||
| 197 | warned = true; | 238 | warned = true; |
| 198 | WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); | 239 | WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); |
| 199 | 240 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba9ed453c4ed..75b22e22a72c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; | |||
| 136 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 136 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
| 137 | static int maxolduid = 65535; | 137 | static int maxolduid = 65535; |
| 138 | static int minolduid; | 138 | static int minolduid; |
| 139 | static int min_percpu_pagelist_fract = 8; | ||
| 140 | 139 | ||
| 141 | static int ngroups_max = NGROUPS_MAX; | 140 | static int ngroups_max = NGROUPS_MAX; |
| 142 | static const int cap_last_cap = CAP_LAST_CAP; | 141 | static const int cap_last_cap = CAP_LAST_CAP; |
| @@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); | |||
| 152 | #ifdef CONFIG_SPARC | 151 | #ifdef CONFIG_SPARC |
| 153 | #endif | 152 | #endif |
| 154 | 153 | ||
| 155 | #ifdef CONFIG_SPARC64 | ||
| 156 | extern int sysctl_tsb_ratio; | ||
| 157 | #endif | ||
| 158 | |||
| 159 | #ifdef __hppa__ | 154 | #ifdef __hppa__ |
| 160 | extern int pwrsw_enabled; | 155 | extern int pwrsw_enabled; |
| 161 | #endif | 156 | #endif |
| @@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = { | |||
| 865 | .extra1 = &zero, | 860 | .extra1 = &zero, |
| 866 | .extra2 = &one, | 861 | .extra2 = &one, |
| 867 | }, | 862 | }, |
| 863 | #ifdef CONFIG_SMP | ||
| 864 | { | ||
| 865 | .procname = "softlockup_all_cpu_backtrace", | ||
| 866 | .data = &sysctl_softlockup_all_cpu_backtrace, | ||
| 867 | .maxlen = sizeof(int), | ||
| 868 | .mode = 0644, | ||
| 869 | .proc_handler = proc_dointvec_minmax, | ||
| 870 | .extra1 = &zero, | ||
| 871 | .extra2 = &one, | ||
| 872 | }, | ||
| 873 | #endif /* CONFIG_SMP */ | ||
| 868 | { | 874 | { |
| 869 | .procname = "nmi_watchdog", | 875 | .procname = "nmi_watchdog", |
| 870 | .data = &watchdog_user_enabled, | 876 | .data = &watchdog_user_enabled, |
| @@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = { | |||
| 1321 | .maxlen = sizeof(percpu_pagelist_fraction), | 1327 | .maxlen = sizeof(percpu_pagelist_fraction), |
| 1322 | .mode = 0644, | 1328 | .mode = 0644, |
| 1323 | .proc_handler = percpu_pagelist_fraction_sysctl_handler, | 1329 | .proc_handler = percpu_pagelist_fraction_sysctl_handler, |
| 1324 | .extra1 = &min_percpu_pagelist_fract, | 1330 | .extra1 = &zero, |
| 1325 | }, | 1331 | }, |
| 1326 | #ifdef CONFIG_MMU | 1332 | #ifdef CONFIG_MMU |
| 1327 | { | 1333 | { |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 384ede311717..f243444a3772 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -1396,7 +1396,6 @@ void tracing_start(void) | |||
| 1396 | 1396 | ||
| 1397 | arch_spin_unlock(&global_trace.max_lock); | 1397 | arch_spin_unlock(&global_trace.max_lock); |
| 1398 | 1398 | ||
| 1399 | ftrace_start(); | ||
| 1400 | out: | 1399 | out: |
| 1401 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); | 1400 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); |
| 1402 | } | 1401 | } |
| @@ -1443,7 +1442,6 @@ void tracing_stop(void) | |||
| 1443 | struct ring_buffer *buffer; | 1442 | struct ring_buffer *buffer; |
| 1444 | unsigned long flags; | 1443 | unsigned long flags; |
| 1445 | 1444 | ||
| 1446 | ftrace_stop(); | ||
| 1447 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); | 1445 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); |
| 1448 | if (global_trace.stop_count++) | 1446 | if (global_trace.stop_count++) |
| 1449 | goto out; | 1447 | goto out; |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 04fdb5de823c..3c9b97e6b1f4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, | |||
| 893 | int ret; | 893 | int ret; |
| 894 | 894 | ||
| 895 | if (file) { | 895 | if (file) { |
| 896 | if (tu->tp.flags & TP_FLAG_PROFILE) | ||
| 897 | return -EINTR; | ||
| 898 | |||
| 896 | link = kmalloc(sizeof(*link), GFP_KERNEL); | 899 | link = kmalloc(sizeof(*link), GFP_KERNEL); |
| 897 | if (!link) | 900 | if (!link) |
| 898 | return -ENOMEM; | 901 | return -ENOMEM; |
| @@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, | |||
| 901 | list_add_tail_rcu(&link->list, &tu->tp.files); | 904 | list_add_tail_rcu(&link->list, &tu->tp.files); |
| 902 | 905 | ||
| 903 | tu->tp.flags |= TP_FLAG_TRACE; | 906 | tu->tp.flags |= TP_FLAG_TRACE; |
| 904 | } else | 907 | } else { |
| 905 | tu->tp.flags |= TP_FLAG_PROFILE; | 908 | if (tu->tp.flags & TP_FLAG_TRACE) |
| 909 | return -EINTR; | ||
| 906 | 910 | ||
| 907 | ret = uprobe_buffer_enable(); | 911 | tu->tp.flags |= TP_FLAG_PROFILE; |
| 908 | if (ret < 0) | 912 | } |
| 909 | return ret; | ||
| 910 | 913 | ||
| 911 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); | 914 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); |
| 912 | 915 | ||
| 913 | if (enabled) | 916 | if (enabled) |
| 914 | return 0; | 917 | return 0; |
| 915 | 918 | ||
| 919 | ret = uprobe_buffer_enable(); | ||
| 920 | if (ret) | ||
| 921 | goto err_flags; | ||
| 922 | |||
| 916 | tu->consumer.filter = filter; | 923 | tu->consumer.filter = filter; |
| 917 | ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); | 924 | ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); |
| 918 | if (ret) { | 925 | if (ret) |
| 919 | if (file) { | 926 | goto err_buffer; |
| 920 | list_del(&link->list); | ||
| 921 | kfree(link); | ||
| 922 | tu->tp.flags &= ~TP_FLAG_TRACE; | ||
| 923 | } else | ||
| 924 | tu->tp.flags &= ~TP_FLAG_PROFILE; | ||
| 925 | } | ||
| 926 | 927 | ||
| 928 | return 0; | ||
| 929 | |||
| 930 | err_buffer: | ||
| 931 | uprobe_buffer_disable(); | ||
| 932 | |||
| 933 | err_flags: | ||
| 934 | if (file) { | ||
| 935 | list_del(&link->list); | ||
| 936 | kfree(link); | ||
| 937 | tu->tp.flags &= ~TP_FLAG_TRACE; | ||
| 938 | } else { | ||
| 939 | tu->tp.flags &= ~TP_FLAG_PROFILE; | ||
| 940 | } | ||
| 927 | return ret; | 941 | return ret; |
| 928 | } | 942 | } |
| 929 | 943 | ||
| @@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) | |||
| 1201 | 1215 | ||
| 1202 | current->utask->vaddr = (unsigned long) &udd; | 1216 | current->utask->vaddr = (unsigned long) &udd; |
| 1203 | 1217 | ||
| 1204 | #ifdef CONFIG_PERF_EVENTS | ||
| 1205 | if ((tu->tp.flags & TP_FLAG_TRACE) == 0 && | ||
| 1206 | !uprobe_perf_filter(&tu->consumer, 0, current->mm)) | ||
| 1207 | return UPROBE_HANDLER_REMOVE; | ||
| 1208 | #endif | ||
| 1209 | |||
| 1210 | if (WARN_ON_ONCE(!uprobe_cpu_buffer)) | 1218 | if (WARN_ON_ONCE(!uprobe_cpu_buffer)) |
| 1211 | return 0; | 1219 | return 0; |
| 1212 | 1220 | ||
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 33cbd8c203f8..3490407dc7b7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
| @@ -492,33 +492,29 @@ static int sys_tracepoint_refcount; | |||
| 492 | 492 | ||
| 493 | void syscall_regfunc(void) | 493 | void syscall_regfunc(void) |
| 494 | { | 494 | { |
| 495 | unsigned long flags; | 495 | struct task_struct *p, *t; |
| 496 | struct task_struct *g, *t; | ||
| 497 | 496 | ||
| 498 | if (!sys_tracepoint_refcount) { | 497 | if (!sys_tracepoint_refcount) { |
| 499 | read_lock_irqsave(&tasklist_lock, flags); | 498 | read_lock(&tasklist_lock); |
| 500 | do_each_thread(g, t) { | 499 | for_each_process_thread(p, t) { |
| 501 | /* Skip kernel threads. */ | 500 | set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); |
| 502 | if (t->mm) | 501 | } |
| 503 | set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); | 502 | read_unlock(&tasklist_lock); |
| 504 | } while_each_thread(g, t); | ||
| 505 | read_unlock_irqrestore(&tasklist_lock, flags); | ||
| 506 | } | 503 | } |
| 507 | sys_tracepoint_refcount++; | 504 | sys_tracepoint_refcount++; |
| 508 | } | 505 | } |
| 509 | 506 | ||
| 510 | void syscall_unregfunc(void) | 507 | void syscall_unregfunc(void) |
| 511 | { | 508 | { |
| 512 | unsigned long flags; | 509 | struct task_struct *p, *t; |
| 513 | struct task_struct *g, *t; | ||
| 514 | 510 | ||
| 515 | sys_tracepoint_refcount--; | 511 | sys_tracepoint_refcount--; |
| 516 | if (!sys_tracepoint_refcount) { | 512 | if (!sys_tracepoint_refcount) { |
| 517 | read_lock_irqsave(&tasklist_lock, flags); | 513 | read_lock(&tasklist_lock); |
| 518 | do_each_thread(g, t) { | 514 | for_each_process_thread(p, t) { |
| 519 | clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); | 515 | clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); |
| 520 | } while_each_thread(g, t); | 516 | } |
| 521 | read_unlock_irqrestore(&tasklist_lock, flags); | 517 | read_unlock(&tasklist_lock); |
| 522 | } | 518 | } |
| 523 | } | 519 | } |
| 524 | #endif | 520 | #endif |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 516203e665fc..c3319bd1b040 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -31,6 +31,12 @@ | |||
| 31 | 31 | ||
| 32 | int watchdog_user_enabled = 1; | 32 | int watchdog_user_enabled = 1; |
| 33 | int __read_mostly watchdog_thresh = 10; | 33 | int __read_mostly watchdog_thresh = 10; |
| 34 | #ifdef CONFIG_SMP | ||
| 35 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | ||
| 36 | #else | ||
| 37 | #define sysctl_softlockup_all_cpu_backtrace 0 | ||
| 38 | #endif | ||
| 39 | |||
| 34 | static int __read_mostly watchdog_running; | 40 | static int __read_mostly watchdog_running; |
| 35 | static u64 __read_mostly sample_period; | 41 | static u64 __read_mostly sample_period; |
| 36 | 42 | ||
| @@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | |||
| 47 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | 53 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); |
| 48 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 54 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
| 49 | #endif | 55 | #endif |
| 56 | static unsigned long soft_lockup_nmi_warn; | ||
| 50 | 57 | ||
| 51 | /* boot commands */ | 58 | /* boot commands */ |
| 52 | /* | 59 | /* |
| @@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str) | |||
| 95 | } | 102 | } |
| 96 | __setup("nosoftlockup", nosoftlockup_setup); | 103 | __setup("nosoftlockup", nosoftlockup_setup); |
| 97 | /* */ | 104 | /* */ |
| 105 | #ifdef CONFIG_SMP | ||
| 106 | static int __init softlockup_all_cpu_backtrace_setup(char *str) | ||
| 107 | { | ||
| 108 | sysctl_softlockup_all_cpu_backtrace = | ||
| 109 | !!simple_strtol(str, NULL, 0); | ||
| 110 | return 1; | ||
| 111 | } | ||
| 112 | __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); | ||
| 113 | #endif | ||
| 98 | 114 | ||
| 99 | /* | 115 | /* |
| 100 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- | 116 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- |
| @@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 271 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); | 287 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); |
| 272 | struct pt_regs *regs = get_irq_regs(); | 288 | struct pt_regs *regs = get_irq_regs(); |
| 273 | int duration; | 289 | int duration; |
| 290 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; | ||
| 274 | 291 | ||
| 275 | /* kick the hardlockup detector */ | 292 | /* kick the hardlockup detector */ |
| 276 | watchdog_interrupt_count(); | 293 | watchdog_interrupt_count(); |
| @@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 317 | if (__this_cpu_read(soft_watchdog_warn) == true) | 334 | if (__this_cpu_read(soft_watchdog_warn) == true) |
| 318 | return HRTIMER_RESTART; | 335 | return HRTIMER_RESTART; |
| 319 | 336 | ||
| 337 | if (softlockup_all_cpu_backtrace) { | ||
| 338 | /* Prevent multiple soft-lockup reports if one cpu is already | ||
| 339 | * engaged in dumping cpu back traces | ||
| 340 | */ | ||
| 341 | if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { | ||
| 342 | /* Someone else will report us. Let's give up */ | ||
| 343 | __this_cpu_write(soft_watchdog_warn, true); | ||
| 344 | return HRTIMER_RESTART; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 320 | printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", | 348 | printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", |
| 321 | smp_processor_id(), duration, | 349 | smp_processor_id(), duration, |
| 322 | current->comm, task_pid_nr(current)); | 350 | current->comm, task_pid_nr(current)); |
| @@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 327 | else | 355 | else |
| 328 | dump_stack(); | 356 | dump_stack(); |
| 329 | 357 | ||
| 358 | if (softlockup_all_cpu_backtrace) { | ||
| 359 | /* Avoid generating two back traces for current | ||
| 360 | * given that one is already made above | ||
| 361 | */ | ||
| 362 | trigger_allbutself_cpu_backtrace(); | ||
| 363 | |||
| 364 | clear_bit(0, &soft_lockup_nmi_warn); | ||
| 365 | /* Barrier to sync with other cpus */ | ||
| 366 | smp_mb__after_atomic(); | ||
| 367 | } | ||
| 368 | |||
| 330 | if (softlockup_panic) | 369 | if (softlockup_panic) |
| 331 | panic("softlockup: hung tasks"); | 370 | panic("softlockup: hung tasks"); |
| 332 | __this_cpu_write(soft_watchdog_warn, true); | 371 | __this_cpu_write(soft_watchdog_warn, true); |
| @@ -527,10 +566,8 @@ static void update_timers_all_cpus(void) | |||
| 527 | int cpu; | 566 | int cpu; |
| 528 | 567 | ||
| 529 | get_online_cpus(); | 568 | get_online_cpus(); |
| 530 | preempt_disable(); | ||
| 531 | for_each_online_cpu(cpu) | 569 | for_each_online_cpu(cpu) |
| 532 | update_timers(cpu); | 570 | update_timers(cpu); |
| 533 | preempt_enable(); | ||
| 534 | put_online_cpus(); | 571 | put_online_cpus(); |
| 535 | } | 572 | } |
| 536 | 573 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6203d2900877..35974ac69600 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) | |||
| 3284 | } | 3284 | } |
| 3285 | } | 3285 | } |
| 3286 | 3286 | ||
| 3287 | dev_set_uevent_suppress(&wq_dev->dev, false); | ||
| 3287 | kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); | 3288 | kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); |
| 3288 | return 0; | 3289 | return 0; |
| 3289 | } | 3290 | } |
| @@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void) | |||
| 4879 | BUG_ON(!tbl); | 4880 | BUG_ON(!tbl); |
| 4880 | 4881 | ||
| 4881 | for_each_node(node) | 4882 | for_each_node(node) |
| 4882 | BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, | 4883 | BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL, |
| 4883 | node_online(node) ? node : NUMA_NO_NODE)); | 4884 | node_online(node) ? node : NUMA_NO_NODE)); |
| 4884 | 4885 | ||
| 4885 | for_each_possible_cpu(cpu) { | 4886 | for_each_possible_cpu(cpu) { |
