aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-07-16 17:09:34 -0400
committerDavid S. Miller <davem@davemloft.net>2014-07-16 17:09:34 -0400
commit1a98c69af1ecd97bfd1f4e4539924a9192434e36 (patch)
treea243defcf921ea174f8e43fce11d06830a6a9c36 /kernel
parent7a575f6b907ea5d207d2b5010293c189616eae34 (diff)
parentb6603fe574af289dbe9eb9fb4c540bca04f5a053 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c58
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/events/uprobes.c6
-rw-r--r--kernel/irq/irqdesc.c4
-rw-r--r--kernel/printk/printk.c44
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_uprobe.c46
-rw-r--r--kernel/workqueue.c3
9 files changed, 122 insertions, 63 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..70776aec2562 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1648 int flags, const char *unused_dev_name, 1648 int flags, const char *unused_dev_name,
1649 void *data) 1649 void *data)
1650{ 1650{
1651 struct super_block *pinned_sb = NULL;
1652 struct cgroup_subsys *ss;
1651 struct cgroup_root *root; 1653 struct cgroup_root *root;
1652 struct cgroup_sb_opts opts; 1654 struct cgroup_sb_opts opts;
1653 struct dentry *dentry; 1655 struct dentry *dentry;
1654 int ret; 1656 int ret;
1657 int i;
1655 bool new_sb; 1658 bool new_sb;
1656 1659
1657 /* 1660 /*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1677 goto out_unlock; 1680 goto out_unlock;
1678 } 1681 }
1679 1682
1683 /*
1684 * Destruction of cgroup root is asynchronous, so subsystems may
1685 * still be dying after the previous unmount. Let's drain the
1686 * dying subsystems. We just need to ensure that the ones
1687 * unmounted previously finish dying and don't care about new ones
1688 * starting. Testing ref liveliness is good enough.
1689 */
1690 for_each_subsys(ss, i) {
1691 if (!(opts.subsys_mask & (1 << i)) ||
1692 ss->root == &cgrp_dfl_root)
1693 continue;
1694
1695 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
1696 mutex_unlock(&cgroup_mutex);
1697 msleep(10);
1698 ret = restart_syscall();
1699 goto out_free;
1700 }
1701 cgroup_put(&ss->root->cgrp);
1702 }
1703
1680 for_each_root(root) { 1704 for_each_root(root) {
1681 bool name_match = false; 1705 bool name_match = false;
1682 1706
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1717 } 1741 }
1718 1742
1719 /* 1743 /*
1720 * A root's lifetime is governed by its root cgroup. 1744 * We want to reuse @root whose lifetime is governed by its
1721 * tryget_live failure indicate that the root is being 1745 * ->cgrp. Let's check whether @root is alive and keep it
1722 * destroyed. Wait for destruction to complete so that the 1746 * that way. As cgroup_kill_sb() can happen anytime, we
1723 * subsystems are free. We can use wait_queue for the wait 1747 * want to block it by pinning the sb so that @root doesn't
1724 * but this path is super cold. Let's just sleep for a bit 1748 * get killed before mount is complete.
1725 * and retry. 1749 *
1750 * With the sb pinned, tryget_live can reliably indicate
1751 * whether @root can be reused. If it's being killed,
1752 * drain it. We can use wait_queue for the wait but this
1753 * path is super cold. Let's just sleep a bit and retry.
1726 */ 1754 */
1727 if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { 1755 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1756 if (IS_ERR(pinned_sb) ||
1757 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1728 mutex_unlock(&cgroup_mutex); 1758 mutex_unlock(&cgroup_mutex);
1759 if (!IS_ERR_OR_NULL(pinned_sb))
1760 deactivate_super(pinned_sb);
1729 msleep(10); 1761 msleep(10);
1730 ret = restart_syscall(); 1762 ret = restart_syscall();
1731 goto out_free; 1763 goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
1770 CGROUP_SUPER_MAGIC, &new_sb); 1802 CGROUP_SUPER_MAGIC, &new_sb);
1771 if (IS_ERR(dentry) || !new_sb) 1803 if (IS_ERR(dentry) || !new_sb)
1772 cgroup_put(&root->cgrp); 1804 cgroup_put(&root->cgrp);
1805
1806 /*
1807 * If @pinned_sb, we're reusing an existing root and holding an
1808 * extra ref on its sb. Mount is complete. Put the extra ref.
1809 */
1810 if (pinned_sb) {
1811 WARN_ON(new_sb);
1812 deactivate_super(pinned_sb);
1813 }
1814
1773 return dentry; 1815 return dentry;
1774} 1816}
1775 1817
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
3328 3370
3329 rcu_read_lock(); 3371 rcu_read_lock();
3330 css_for_each_child(child, css) { 3372 css_for_each_child(child, css) {
3331 if (css->flags & CSS_ONLINE) { 3373 if (child->flags & CSS_ONLINE) {
3332 ret = true; 3374 ret = true;
3333 break; 3375 break;
3334 } 3376 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..116a4164720a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1181,7 +1181,13 @@ done:
1181 1181
1182int current_cpuset_is_being_rebound(void) 1182int current_cpuset_is_being_rebound(void)
1183{ 1183{
1184 return task_cs(current) == cpuset_being_rebound; 1184 int ret;
1185
1186 rcu_read_lock();
1187 ret = task_cs(current) == cpuset_being_rebound;
1188 rcu_read_unlock();
1189
1190 return ret;
1185} 1191}
1186 1192
1187static int update_relax_domain_level(struct cpuset *cs, s64 val) 1193static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1617 * resources, wait for the previously scheduled operations before 1623 * resources, wait for the previously scheduled operations before
1618 * proceeding, so that we don't end up keep removing tasks added 1624 * proceeding, so that we don't end up keep removing tasks added
1619 * after execution capability is restored. 1625 * after execution capability is restored.
1626 *
1627 * cpuset_hotplug_work calls back into cgroup core via
1628 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
1629 * operation like this one can lead to a deadlock through kernfs
1630 * active_ref protection. Let's break the protection. Losing the
1631 * protection is okay as we check whether @cs is online after
1632 * grabbing cpuset_mutex anyway. This only happens on the legacy
1633 * hierarchies.
1620 */ 1634 */
1635 css_get(&cs->css);
1636 kernfs_break_active_protection(of->kn);
1621 flush_work(&cpuset_hotplug_work); 1637 flush_work(&cpuset_hotplug_work);
1622 1638
1623 mutex_lock(&cpuset_mutex); 1639 mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1645 free_trial_cpuset(trialcs); 1661 free_trial_cpuset(trialcs);
1646out_unlock: 1662out_unlock:
1647 mutex_unlock(&cpuset_mutex); 1663 mutex_unlock(&cpuset_mutex);
1664 kernfs_unbreak_active_protection(of->kn);
1665 css_put(&cs->css);
1648 return retval ?: nbytes; 1666 return retval ?: nbytes;
1649} 1667}
1650 1668
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a33d9a2bcbd7..b0c95f0f06fd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2320 next_parent = rcu_dereference(next_ctx->parent_ctx); 2320 next_parent = rcu_dereference(next_ctx->parent_ctx);
2321 2321
2322 /* If neither context have a parent context; they cannot be clones. */ 2322 /* If neither context have a parent context; they cannot be clones. */
2323 if (!parent && !next_parent) 2323 if (!parent || !next_parent)
2324 goto unlock; 2324 goto unlock;
2325 2325
2326 if (next_parent == ctx || next_ctx == parent || next_parent == parent) { 2326 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c445e392e93f..6f3254e8c137 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
846{ 846{
847 int err; 847 int err;
848 848
849 if (!consumer_del(uprobe, uc)) /* WARN? */ 849 if (WARN_ON(!consumer_del(uprobe, uc)))
850 return; 850 return;
851 851
852 err = register_for_each_vma(uprobe, NULL); 852 err = register_for_each_vma(uprobe, NULL);
@@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
927 int ret = -ENOENT; 927 int ret = -ENOENT;
928 928
929 uprobe = find_uprobe(inode, offset); 929 uprobe = find_uprobe(inode, offset);
930 if (!uprobe) 930 if (WARN_ON(!uprobe))
931 return ret; 931 return ret;
932 932
933 down_write(&uprobe->register_rwsem); 933 down_write(&uprobe->register_rwsem);
@@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
952 struct uprobe *uprobe; 952 struct uprobe *uprobe;
953 953
954 uprobe = find_uprobe(inode, offset); 954 uprobe = find_uprobe(inode, offset);
955 if (!uprobe) 955 if (WARN_ON(!uprobe))
956 return; 956 return;
957 957
958 down_write(&uprobe->register_rwsem); 958 down_write(&uprobe->register_rwsem);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7339e42a85ab..1487a123db5c 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
455 */ 455 */
456void irq_free_hwirqs(unsigned int from, int cnt) 456void irq_free_hwirqs(unsigned int from, int cnt)
457{ 457{
458 int i; 458 int i, j;
459 459
460 for (i = from; cnt > 0; i++, cnt--) { 460 for (i = from, j = cnt; j > 0; i++, j--) {
461 irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); 461 irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
462 arch_teardown_hwirq(i); 462 arch_teardown_hwirq(i);
463 } 463 }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ea2d5f6962ed..13e839dbca07 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
1416/* 1416/*
1417 * Can we actually use the console at this time on this cpu? 1417 * Can we actually use the console at this time on this cpu?
1418 * 1418 *
1419 * Console drivers may assume that per-cpu resources have been allocated. So 1419 * Console drivers may assume that per-cpu resources have
1420 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't 1420 * been allocated. So unless they're explicitly marked as
1421 * call them until this CPU is officially up. 1421 * being able to cope (CON_ANYTIME) don't call them until
1422 * this CPU is officially up.
1422 */ 1423 */
1423static inline int can_use_console(unsigned int cpu) 1424static inline int can_use_console(unsigned int cpu)
1424{ 1425{
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
1431 * console_lock held, and 'console_locked' set) if it 1432 * console_lock held, and 'console_locked' set) if it
1432 * is successful, false otherwise. 1433 * is successful, false otherwise.
1433 */ 1434 */
1434static int console_trylock_for_printk(void) 1435static int console_trylock_for_printk(unsigned int cpu)
1435{ 1436{
1436 unsigned int cpu = smp_processor_id();
1437
1438 if (!console_trylock()) 1437 if (!console_trylock())
1439 return 0; 1438 return 0;
1440 /* 1439 /*
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1609 */ 1608 */
1610 if (!oops_in_progress && !lockdep_recursing(current)) { 1609 if (!oops_in_progress && !lockdep_recursing(current)) {
1611 recursion_bug = 1; 1610 recursion_bug = 1;
1612 local_irq_restore(flags); 1611 goto out_restore_irqs;
1613 return 0;
1614 } 1612 }
1615 zap_locks(); 1613 zap_locks();
1616 } 1614 }
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
1718 1716
1719 logbuf_cpu = UINT_MAX; 1717 logbuf_cpu = UINT_MAX;
1720 raw_spin_unlock(&logbuf_lock); 1718 raw_spin_unlock(&logbuf_lock);
1721 lockdep_on();
1722 local_irq_restore(flags);
1723 1719
1724 /* If called from the scheduler, we can not call up(). */ 1720 /* If called from the scheduler, we can not call up(). */
1725 if (in_sched) 1721 if (!in_sched) {
1726 return printed_len; 1722 /*
1727 1723 * Try to acquire and then immediately release the console
1728 /* 1724 * semaphore. The release will print out buffers and wake up
1729 * Disable preemption to avoid being preempted while holding 1725 * /dev/kmsg and syslog() users.
1730 * console_sem which would prevent anyone from printing to console 1726 */
1731 */ 1727 if (console_trylock_for_printk(this_cpu))
1732 preempt_disable(); 1728 console_unlock();
1733 /* 1729 }
1734 * Try to acquire and then immediately release the console semaphore.
1735 * The release will print out buffers and wake up /dev/kmsg and syslog()
1736 * users.
1737 */
1738 if (console_trylock_for_printk())
1739 console_unlock();
1740 preempt_enable();
1741 1730
1731 lockdep_on();
1732out_restore_irqs:
1733 local_irq_restore(flags);
1742 return printed_len; 1734 return printed_len;
1743} 1735}
1744EXPORT_SYMBOL(vprintk_emit); 1736EXPORT_SYMBOL(vprintk_emit);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 384ede311717..f243444a3772 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1396,7 +1396,6 @@ void tracing_start(void)
1396 1396
1397 arch_spin_unlock(&global_trace.max_lock); 1397 arch_spin_unlock(&global_trace.max_lock);
1398 1398
1399 ftrace_start();
1400 out: 1399 out:
1401 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); 1400 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1402} 1401}
@@ -1443,7 +1442,6 @@ void tracing_stop(void)
1443 struct ring_buffer *buffer; 1442 struct ring_buffer *buffer;
1444 unsigned long flags; 1443 unsigned long flags;
1445 1444
1446 ftrace_stop();
1447 raw_spin_lock_irqsave(&global_trace.start_lock, flags); 1445 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1448 if (global_trace.stop_count++) 1446 if (global_trace.stop_count++)
1449 goto out; 1447 goto out;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 04fdb5de823c..3c9b97e6b1f4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
893 int ret; 893 int ret;
894 894
895 if (file) { 895 if (file) {
896 if (tu->tp.flags & TP_FLAG_PROFILE)
897 return -EINTR;
898
896 link = kmalloc(sizeof(*link), GFP_KERNEL); 899 link = kmalloc(sizeof(*link), GFP_KERNEL);
897 if (!link) 900 if (!link)
898 return -ENOMEM; 901 return -ENOMEM;
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
901 list_add_tail_rcu(&link->list, &tu->tp.files); 904 list_add_tail_rcu(&link->list, &tu->tp.files);
902 905
903 tu->tp.flags |= TP_FLAG_TRACE; 906 tu->tp.flags |= TP_FLAG_TRACE;
904 } else 907 } else {
905 tu->tp.flags |= TP_FLAG_PROFILE; 908 if (tu->tp.flags & TP_FLAG_TRACE)
909 return -EINTR;
906 910
907 ret = uprobe_buffer_enable(); 911 tu->tp.flags |= TP_FLAG_PROFILE;
908 if (ret < 0) 912 }
909 return ret;
910 913
911 WARN_ON(!uprobe_filter_is_empty(&tu->filter)); 914 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
912 915
913 if (enabled) 916 if (enabled)
914 return 0; 917 return 0;
915 918
919 ret = uprobe_buffer_enable();
920 if (ret)
921 goto err_flags;
922
916 tu->consumer.filter = filter; 923 tu->consumer.filter = filter;
917 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); 924 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
918 if (ret) { 925 if (ret)
919 if (file) { 926 goto err_buffer;
920 list_del(&link->list);
921 kfree(link);
922 tu->tp.flags &= ~TP_FLAG_TRACE;
923 } else
924 tu->tp.flags &= ~TP_FLAG_PROFILE;
925 }
926 927
928 return 0;
929
930 err_buffer:
931 uprobe_buffer_disable();
932
933 err_flags:
934 if (file) {
935 list_del(&link->list);
936 kfree(link);
937 tu->tp.flags &= ~TP_FLAG_TRACE;
938 } else {
939 tu->tp.flags &= ~TP_FLAG_PROFILE;
940 }
927 return ret; 941 return ret;
928} 942}
929 943
@@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
1201 1215
1202 current->utask->vaddr = (unsigned long) &udd; 1216 current->utask->vaddr = (unsigned long) &udd;
1203 1217
1204#ifdef CONFIG_PERF_EVENTS
1205 if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
1206 !uprobe_perf_filter(&tu->consumer, 0, current->mm))
1207 return UPROBE_HANDLER_REMOVE;
1208#endif
1209
1210 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1218 if (WARN_ON_ONCE(!uprobe_cpu_buffer))
1211 return 0; 1219 return 0;
1212 1220
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6203d2900877..35974ac69600 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
3284 } 3284 }
3285 } 3285 }
3286 3286
3287 dev_set_uevent_suppress(&wq_dev->dev, false);
3287 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); 3288 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
3288 return 0; 3289 return 0;
3289} 3290}
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
4879 BUG_ON(!tbl); 4880 BUG_ON(!tbl);
4880 4881
4881 for_each_node(node) 4882 for_each_node(node)
4882 BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, 4883 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
4883 node_online(node) ? node : NUMA_NO_NODE)); 4884 node_online(node) ? node : NUMA_NO_NODE));
4884 4885
4885 for_each_possible_cpu(cpu) { 4886 for_each_possible_cpu(cpu) {