aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-05-31 00:10:26 -0400
committerPaul Mundt <lethal@linux-sh.org>2011-05-31 00:10:26 -0400
commit8181d3ef26ed1d9eb21e2cdcac374e1f457fdc06 (patch)
tree1a081f09ebcf2a84de899ddeadd0e4c5e48b50d2 /kernel
parent54525552c6ccfd867e819845da14be994e303218 (diff)
parent55922c9d1b84b89cb946c777fddccb3247e7df2c (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into sh-fixes-for-linus
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/capability.c4
-rw-r--r--kernel/cgroup.c587
-rw-r--r--kernel/cgroup_freezer.c26
-rw-r--r--kernel/compat.c8
-rw-r--r--kernel/cpuset.c107
-rw-r--r--kernel/cred.c8
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/fork.c92
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/proc.c55
-rw-r--r--kernel/jump_label.c18
-rw-r--r--kernel/kmod.c100
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/mutex.c25
-rw-r--r--kernel/ns_cgroup.c118
-rw-r--r--kernel/nsproxy.c46
-rw-r--r--kernel/pm_qos_params.c70
-rw-r--r--kernel/posix-timers.c25
-rw-r--r--kernel/power/hibernate.c220
-rw-r--r--kernel/printk.c87
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcutree.c164
-rw-r--r--kernel/rcutree.h30
-rw-r--r--kernel/rcutree_plugin.h24
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/sched.c94
-rw-r--r--kernel/sched_fair.c5
-rw-r--r--kernel/sched_rt.c10
-rw-r--r--kernel/sched_stats.h4
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/ring_buffer.c10
-rw-r--r--kernel/trace/trace.h15
-rw-r--r--kernel/trace/trace_events.c7
-rw-r--r--kernel/trace/trace_output.c27
-rw-r--r--kernel/utsname.c39
-rw-r--r--kernel/watchdog.c9
-rw-r--r--kernel/workqueue.c4
41 files changed, 1328 insertions, 800 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index e9cf19155b46..2d64cfcc8b42 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,7 +61,6 @@ obj-$(CONFIG_COMPAT) += compat.o
61obj-$(CONFIG_CGROUPS) += cgroup.o 61obj-$(CONFIG_CGROUPS) += cgroup.o
62obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o 62obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
63obj-$(CONFIG_CPUSETS) += cpuset.o 63obj-$(CONFIG_CPUSETS) += cpuset.o
64obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
65obj-$(CONFIG_UTS_NS) += utsname.o 64obj-$(CONFIG_UTS_NS) += utsname.o
66obj-$(CONFIG_USER_NS) += user_namespace.o 65obj-$(CONFIG_USER_NS) += user_namespace.o
67obj-$(CONFIG_PID_NS) += pid_namespace.o 66obj-$(CONFIG_PID_NS) += pid_namespace.o
diff --git a/kernel/capability.c b/kernel/capability.c
index 32a80e08ff4b..283c529f8b1c 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -22,12 +22,8 @@
22 */ 22 */
23 23
24const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; 24const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
25const kernel_cap_t __cap_full_set = CAP_FULL_SET;
26const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
27 25
28EXPORT_SYMBOL(__cap_empty_set); 26EXPORT_SYMBOL(__cap_empty_set);
29EXPORT_SYMBOL(__cap_full_set);
30EXPORT_SYMBOL(__cap_init_eff_set);
31 27
32int file_caps_enabled = 1; 28int file_caps_enabled = 1;
33 29
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af5..2731d115d725 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
58#include <linux/eventfd.h> 58#include <linux/eventfd.h>
59#include <linux/poll.h> 59#include <linux/poll.h>
60#include <linux/flex_array.h> /* used in cgroup_attach_proc */
60 61
61#include <asm/atomic.h> 62#include <asm/atomic.h>
62 63
@@ -1735,6 +1736,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1735} 1736}
1736EXPORT_SYMBOL_GPL(cgroup_path); 1737EXPORT_SYMBOL_GPL(cgroup_path);
1737 1738
1739/*
1740 * cgroup_task_migrate - move a task from one cgroup to another.
1741 *
1742 * 'guarantee' is set if the caller promises that a new css_set for the task
1743 * will already exist. If not set, this function might sleep, and can fail with
1744 * -ENOMEM. Otherwise, it can only fail with -ESRCH.
1745 */
1746static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1747 struct task_struct *tsk, bool guarantee)
1748{
1749 struct css_set *oldcg;
1750 struct css_set *newcg;
1751
1752 /*
1753 * get old css_set. we need to take task_lock and refcount it, because
1754 * an exiting task can change its css_set to init_css_set and drop its
1755 * old one without taking cgroup_mutex.
1756 */
1757 task_lock(tsk);
1758 oldcg = tsk->cgroups;
1759 get_css_set(oldcg);
1760 task_unlock(tsk);
1761
1762 /* locate or allocate a new css_set for this task. */
1763 if (guarantee) {
1764 /* we know the css_set we want already exists. */
1765 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1766 read_lock(&css_set_lock);
1767 newcg = find_existing_css_set(oldcg, cgrp, template);
1768 BUG_ON(!newcg);
1769 get_css_set(newcg);
1770 read_unlock(&css_set_lock);
1771 } else {
1772 might_sleep();
1773 /* find_css_set will give us newcg already referenced. */
1774 newcg = find_css_set(oldcg, cgrp);
1775 if (!newcg) {
1776 put_css_set(oldcg);
1777 return -ENOMEM;
1778 }
1779 }
1780 put_css_set(oldcg);
1781
1782 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1783 task_lock(tsk);
1784 if (tsk->flags & PF_EXITING) {
1785 task_unlock(tsk);
1786 put_css_set(newcg);
1787 return -ESRCH;
1788 }
1789 rcu_assign_pointer(tsk->cgroups, newcg);
1790 task_unlock(tsk);
1791
1792 /* Update the css_set linked lists if we're using them */
1793 write_lock(&css_set_lock);
1794 if (!list_empty(&tsk->cg_list))
1795 list_move(&tsk->cg_list, &newcg->tasks);
1796 write_unlock(&css_set_lock);
1797
1798 /*
1799 * We just gained a reference on oldcg by taking it from the task. As
1800 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
1801 * it here; it will be freed under RCU.
1802 */
1803 put_css_set(oldcg);
1804
1805 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1806 return 0;
1807}
1808
1738/** 1809/**
1739 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1810 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1740 * @cgrp: the cgroup the task is attaching to 1811 * @cgrp: the cgroup the task is attaching to
@@ -1745,11 +1816,9 @@ EXPORT_SYMBOL_GPL(cgroup_path);
1745 */ 1816 */
1746int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1817int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1747{ 1818{
1748 int retval = 0; 1819 int retval;
1749 struct cgroup_subsys *ss, *failed_ss = NULL; 1820 struct cgroup_subsys *ss, *failed_ss = NULL;
1750 struct cgroup *oldcgrp; 1821 struct cgroup *oldcgrp;
1751 struct css_set *cg;
1752 struct css_set *newcg;
1753 struct cgroupfs_root *root = cgrp->root; 1822 struct cgroupfs_root *root = cgrp->root;
1754 1823
1755 /* Nothing to do if the task is already in that cgroup */ 1824 /* Nothing to do if the task is already in that cgroup */
@@ -1759,7 +1828,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1759 1828
1760 for_each_subsys(root, ss) { 1829 for_each_subsys(root, ss) {
1761 if (ss->can_attach) { 1830 if (ss->can_attach) {
1762 retval = ss->can_attach(ss, cgrp, tsk, false); 1831 retval = ss->can_attach(ss, cgrp, tsk);
1763 if (retval) { 1832 if (retval) {
1764 /* 1833 /*
1765 * Remember on which subsystem the can_attach() 1834 * Remember on which subsystem the can_attach()
@@ -1771,46 +1840,29 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1771 goto out; 1840 goto out;
1772 } 1841 }
1773 } 1842 }
1843 if (ss->can_attach_task) {
1844 retval = ss->can_attach_task(cgrp, tsk);
1845 if (retval) {
1846 failed_ss = ss;
1847 goto out;
1848 }
1849 }
1774 } 1850 }
1775 1851
1776 task_lock(tsk); 1852 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1777 cg = tsk->cgroups; 1853 if (retval)
1778 get_css_set(cg);
1779 task_unlock(tsk);
1780 /*
1781 * Locate or allocate a new css_set for this task,
1782 * based on its final set of cgroups
1783 */
1784 newcg = find_css_set(cg, cgrp);
1785 put_css_set(cg);
1786 if (!newcg) {
1787 retval = -ENOMEM;
1788 goto out;
1789 }
1790
1791 task_lock(tsk);
1792 if (tsk->flags & PF_EXITING) {
1793 task_unlock(tsk);
1794 put_css_set(newcg);
1795 retval = -ESRCH;
1796 goto out; 1854 goto out;
1797 }
1798 rcu_assign_pointer(tsk->cgroups, newcg);
1799 task_unlock(tsk);
1800
1801 /* Update the css_set linked lists if we're using them */
1802 write_lock(&css_set_lock);
1803 if (!list_empty(&tsk->cg_list))
1804 list_move(&tsk->cg_list, &newcg->tasks);
1805 write_unlock(&css_set_lock);
1806 1855
1807 for_each_subsys(root, ss) { 1856 for_each_subsys(root, ss) {
1857 if (ss->pre_attach)
1858 ss->pre_attach(cgrp);
1859 if (ss->attach_task)
1860 ss->attach_task(cgrp, tsk);
1808 if (ss->attach) 1861 if (ss->attach)
1809 ss->attach(ss, cgrp, oldcgrp, tsk, false); 1862 ss->attach(ss, cgrp, oldcgrp, tsk);
1810 } 1863 }
1811 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1864
1812 synchronize_rcu(); 1865 synchronize_rcu();
1813 put_css_set(cg);
1814 1866
1815 /* 1867 /*
1816 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1868 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -1829,7 +1881,7 @@ out:
1829 */ 1881 */
1830 break; 1882 break;
1831 if (ss->cancel_attach) 1883 if (ss->cancel_attach)
1832 ss->cancel_attach(ss, cgrp, tsk, false); 1884 ss->cancel_attach(ss, cgrp, tsk);
1833 } 1885 }
1834 } 1886 }
1835 return retval; 1887 return retval;
@@ -1860,49 +1912,370 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1860EXPORT_SYMBOL_GPL(cgroup_attach_task_all); 1912EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1861 1913
1862/* 1914/*
1863 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1915 * cgroup_attach_proc works in two stages, the first of which prefetches all
1864 * held. May take task_lock of task 1916 * new css_sets needed (to make sure we have enough memory before committing
1917 * to the move) and stores them in a list of entries of the following type.
1918 * TODO: possible optimization: use css_set->rcu_head for chaining instead
1919 */
1920struct cg_list_entry {
1921 struct css_set *cg;
1922 struct list_head links;
1923};
1924
1925static bool css_set_check_fetched(struct cgroup *cgrp,
1926 struct task_struct *tsk, struct css_set *cg,
1927 struct list_head *newcg_list)
1928{
1929 struct css_set *newcg;
1930 struct cg_list_entry *cg_entry;
1931 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1932
1933 read_lock(&css_set_lock);
1934 newcg = find_existing_css_set(cg, cgrp, template);
1935 if (newcg)
1936 get_css_set(newcg);
1937 read_unlock(&css_set_lock);
1938
1939 /* doesn't exist at all? */
1940 if (!newcg)
1941 return false;
1942 /* see if it's already in the list */
1943 list_for_each_entry(cg_entry, newcg_list, links) {
1944 if (cg_entry->cg == newcg) {
1945 put_css_set(newcg);
1946 return true;
1947 }
1948 }
1949
1950 /* not found */
1951 put_css_set(newcg);
1952 return false;
1953}
1954
1955/*
1956 * Find the new css_set and store it in the list in preparation for moving the
1957 * given task to the given cgroup. Returns 0 or -ENOMEM.
1958 */
1959static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1960 struct list_head *newcg_list)
1961{
1962 struct css_set *newcg;
1963 struct cg_list_entry *cg_entry;
1964
1965 /* ensure a new css_set will exist for this thread */
1966 newcg = find_css_set(cg, cgrp);
1967 if (!newcg)
1968 return -ENOMEM;
1969 /* add it to the list */
1970 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1971 if (!cg_entry) {
1972 put_css_set(newcg);
1973 return -ENOMEM;
1974 }
1975 cg_entry->cg = newcg;
1976 list_add(&cg_entry->links, newcg_list);
1977 return 0;
1978}
1979
1980/**
1981 * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
1982 * @cgrp: the cgroup to attach to
1983 * @leader: the threadgroup leader task_struct of the group to be attached
1984 *
1985 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
1986 * take task_lock of each thread in leader's threadgroup individually in turn.
1987 */
1988int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1989{
1990 int retval, i, group_size;
1991 struct cgroup_subsys *ss, *failed_ss = NULL;
1992 bool cancel_failed_ss = false;
1993 /* guaranteed to be initialized later, but the compiler needs this */
1994 struct cgroup *oldcgrp = NULL;
1995 struct css_set *oldcg;
1996 struct cgroupfs_root *root = cgrp->root;
1997 /* threadgroup list cursor and array */
1998 struct task_struct *tsk;
1999 struct flex_array *group;
2000 /*
2001 * we need to make sure we have css_sets for all the tasks we're
2002 * going to move -before- we actually start moving them, so that in
2003 * case we get an ENOMEM we can bail out before making any changes.
2004 */
2005 struct list_head newcg_list;
2006 struct cg_list_entry *cg_entry, *temp_nobe;
2007
2008 /*
2009 * step 0: in order to do expensive, possibly blocking operations for
2010 * every thread, we cannot iterate the thread group list, since it needs
2011 * rcu or tasklist locked. instead, build an array of all threads in the
2012 * group - threadgroup_fork_lock prevents new threads from appearing,
2013 * and if threads exit, this will just be an over-estimate.
2014 */
2015 group_size = get_nr_threads(leader);
2016 /* flex_array supports very large thread-groups better than kmalloc. */
2017 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2018 GFP_KERNEL);
2019 if (!group)
2020 return -ENOMEM;
2021 /* pre-allocate to guarantee space while iterating in rcu read-side. */
2022 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2023 if (retval)
2024 goto out_free_group_list;
2025
2026 /* prevent changes to the threadgroup list while we take a snapshot. */
2027 rcu_read_lock();
2028 if (!thread_group_leader(leader)) {
2029 /*
2030 * a race with de_thread from another thread's exec() may strip
2031 * us of our leadership, making while_each_thread unsafe to use
2032 * on this task. if this happens, there is no choice but to
2033 * throw this task away and try again (from cgroup_procs_write);
2034 * this is "double-double-toil-and-trouble-check locking".
2035 */
2036 rcu_read_unlock();
2037 retval = -EAGAIN;
2038 goto out_free_group_list;
2039 }
2040 /* take a reference on each task in the group to go in the array. */
2041 tsk = leader;
2042 i = 0;
2043 do {
2044 /* as per above, nr_threads may decrease, but not increase. */
2045 BUG_ON(i >= group_size);
2046 get_task_struct(tsk);
2047 /*
2048 * saying GFP_ATOMIC has no effect here because we did prealloc
2049 * earlier, but it's good form to communicate our expectations.
2050 */
2051 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2052 BUG_ON(retval != 0);
2053 i++;
2054 } while_each_thread(leader, tsk);
2055 /* remember the number of threads in the array for later. */
2056 group_size = i;
2057 rcu_read_unlock();
2058
2059 /*
2060 * step 1: check that we can legitimately attach to the cgroup.
2061 */
2062 for_each_subsys(root, ss) {
2063 if (ss->can_attach) {
2064 retval = ss->can_attach(ss, cgrp, leader);
2065 if (retval) {
2066 failed_ss = ss;
2067 goto out_cancel_attach;
2068 }
2069 }
2070 /* a callback to be run on every thread in the threadgroup. */
2071 if (ss->can_attach_task) {
2072 /* run on each task in the threadgroup. */
2073 for (i = 0; i < group_size; i++) {
2074 tsk = flex_array_get_ptr(group, i);
2075 retval = ss->can_attach_task(cgrp, tsk);
2076 if (retval) {
2077 failed_ss = ss;
2078 cancel_failed_ss = true;
2079 goto out_cancel_attach;
2080 }
2081 }
2082 }
2083 }
2084
2085 /*
2086 * step 2: make sure css_sets exist for all threads to be migrated.
2087 * we use find_css_set, which allocates a new one if necessary.
2088 */
2089 INIT_LIST_HEAD(&newcg_list);
2090 for (i = 0; i < group_size; i++) {
2091 tsk = flex_array_get_ptr(group, i);
2092 /* nothing to do if this task is already in the cgroup */
2093 oldcgrp = task_cgroup_from_root(tsk, root);
2094 if (cgrp == oldcgrp)
2095 continue;
2096 /* get old css_set pointer */
2097 task_lock(tsk);
2098 if (tsk->flags & PF_EXITING) {
2099 /* ignore this task if it's going away */
2100 task_unlock(tsk);
2101 continue;
2102 }
2103 oldcg = tsk->cgroups;
2104 get_css_set(oldcg);
2105 task_unlock(tsk);
2106 /* see if the new one for us is already in the list? */
2107 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2108 /* was already there, nothing to do. */
2109 put_css_set(oldcg);
2110 } else {
2111 /* we don't already have it. get new one. */
2112 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2113 put_css_set(oldcg);
2114 if (retval)
2115 goto out_list_teardown;
2116 }
2117 }
2118
2119 /*
2120 * step 3: now that we're guaranteed success wrt the css_sets, proceed
2121 * to move all tasks to the new cgroup, calling ss->attach_task for each
2122 * one along the way. there are no failure cases after here, so this is
2123 * the commit point.
2124 */
2125 for_each_subsys(root, ss) {
2126 if (ss->pre_attach)
2127 ss->pre_attach(cgrp);
2128 }
2129 for (i = 0; i < group_size; i++) {
2130 tsk = flex_array_get_ptr(group, i);
2131 /* leave current thread as it is if it's already there */
2132 oldcgrp = task_cgroup_from_root(tsk, root);
2133 if (cgrp == oldcgrp)
2134 continue;
2135 /* attach each task to each subsystem */
2136 for_each_subsys(root, ss) {
2137 if (ss->attach_task)
2138 ss->attach_task(cgrp, tsk);
2139 }
2140 /* if the thread is PF_EXITING, it can just get skipped. */
2141 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2142 BUG_ON(retval != 0 && retval != -ESRCH);
2143 }
2144 /* nothing is sensitive to fork() after this point. */
2145
2146 /*
2147 * step 4: do expensive, non-thread-specific subsystem callbacks.
2148 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2149 * being moved, this call will need to be reworked to communicate that.
2150 */
2151 for_each_subsys(root, ss) {
2152 if (ss->attach)
2153 ss->attach(ss, cgrp, oldcgrp, leader);
2154 }
2155
2156 /*
2157 * step 5: success! and cleanup
2158 */
2159 synchronize_rcu();
2160 cgroup_wakeup_rmdir_waiter(cgrp);
2161 retval = 0;
2162out_list_teardown:
2163 /* clean up the list of prefetched css_sets. */
2164 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2165 list_del(&cg_entry->links);
2166 put_css_set(cg_entry->cg);
2167 kfree(cg_entry);
2168 }
2169out_cancel_attach:
2170 /* same deal as in cgroup_attach_task */
2171 if (retval) {
2172 for_each_subsys(root, ss) {
2173 if (ss == failed_ss) {
2174 if (cancel_failed_ss && ss->cancel_attach)
2175 ss->cancel_attach(ss, cgrp, leader);
2176 break;
2177 }
2178 if (ss->cancel_attach)
2179 ss->cancel_attach(ss, cgrp, leader);
2180 }
2181 }
2182 /* clean up the array of referenced threads in the group. */
2183 for (i = 0; i < group_size; i++) {
2184 tsk = flex_array_get_ptr(group, i);
2185 put_task_struct(tsk);
2186 }
2187out_free_group_list:
2188 flex_array_free(group);
2189 return retval;
2190}
2191
2192/*
2193 * Find the task_struct of the task to attach by vpid and pass it along to the
2194 * function to attach either it or all tasks in its threadgroup. Will take
2195 * cgroup_mutex; may take task_lock of task.
1865 */ 2196 */
1866static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) 2197static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
1867{ 2198{
1868 struct task_struct *tsk; 2199 struct task_struct *tsk;
1869 const struct cred *cred = current_cred(), *tcred; 2200 const struct cred *cred = current_cred(), *tcred;
1870 int ret; 2201 int ret;
1871 2202
2203 if (!cgroup_lock_live_group(cgrp))
2204 return -ENODEV;
2205
1872 if (pid) { 2206 if (pid) {
1873 rcu_read_lock(); 2207 rcu_read_lock();
1874 tsk = find_task_by_vpid(pid); 2208 tsk = find_task_by_vpid(pid);
1875 if (!tsk || tsk->flags & PF_EXITING) { 2209 if (!tsk) {
1876 rcu_read_unlock(); 2210 rcu_read_unlock();
2211 cgroup_unlock();
2212 return -ESRCH;
2213 }
2214 if (threadgroup) {
2215 /*
2216 * RCU protects this access, since tsk was found in the
2217 * tid map. a race with de_thread may cause group_leader
2218 * to stop being the leader, but cgroup_attach_proc will
2219 * detect it later.
2220 */
2221 tsk = tsk->group_leader;
2222 } else if (tsk->flags & PF_EXITING) {
2223 /* optimization for the single-task-only case */
2224 rcu_read_unlock();
2225 cgroup_unlock();
1877 return -ESRCH; 2226 return -ESRCH;
1878 } 2227 }
1879 2228
2229 /*
2230 * even if we're attaching all tasks in the thread group, we
2231 * only need to check permissions on one of them.
2232 */
1880 tcred = __task_cred(tsk); 2233 tcred = __task_cred(tsk);
1881 if (cred->euid && 2234 if (cred->euid &&
1882 cred->euid != tcred->uid && 2235 cred->euid != tcred->uid &&
1883 cred->euid != tcred->suid) { 2236 cred->euid != tcred->suid) {
1884 rcu_read_unlock(); 2237 rcu_read_unlock();
2238 cgroup_unlock();
1885 return -EACCES; 2239 return -EACCES;
1886 } 2240 }
1887 get_task_struct(tsk); 2241 get_task_struct(tsk);
1888 rcu_read_unlock(); 2242 rcu_read_unlock();
1889 } else { 2243 } else {
1890 tsk = current; 2244 if (threadgroup)
2245 tsk = current->group_leader;
2246 else
2247 tsk = current;
1891 get_task_struct(tsk); 2248 get_task_struct(tsk);
1892 } 2249 }
1893 2250
1894 ret = cgroup_attach_task(cgrp, tsk); 2251 if (threadgroup) {
2252 threadgroup_fork_write_lock(tsk);
2253 ret = cgroup_attach_proc(cgrp, tsk);
2254 threadgroup_fork_write_unlock(tsk);
2255 } else {
2256 ret = cgroup_attach_task(cgrp, tsk);
2257 }
1895 put_task_struct(tsk); 2258 put_task_struct(tsk);
2259 cgroup_unlock();
1896 return ret; 2260 return ret;
1897} 2261}
1898 2262
1899static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) 2263static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1900{ 2264{
2265 return attach_task_by_pid(cgrp, pid, false);
2266}
2267
2268static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2269{
1901 int ret; 2270 int ret;
1902 if (!cgroup_lock_live_group(cgrp)) 2271 do {
1903 return -ENODEV; 2272 /*
1904 ret = attach_task_by_pid(cgrp, pid); 2273 * attach_proc fails with -EAGAIN if threadgroup leadership
1905 cgroup_unlock(); 2274 * changes in the middle of the operation, in which case we need
2275 * to find the task_struct for the new leader and start over.
2276 */
2277 ret = attach_task_by_pid(cgrp, tgid, true);
2278 } while (ret == -EAGAIN);
1906 return ret; 2279 return ret;
1907} 2280}
1908 2281
@@ -3259,9 +3632,9 @@ static struct cftype files[] = {
3259 { 3632 {
3260 .name = CGROUP_FILE_GENERIC_PREFIX "procs", 3633 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3261 .open = cgroup_procs_open, 3634 .open = cgroup_procs_open,
3262 /* .write_u64 = cgroup_procs_write, TODO */ 3635 .write_u64 = cgroup_procs_write,
3263 .release = cgroup_pidlist_release, 3636 .release = cgroup_pidlist_release,
3264 .mode = S_IRUGO, 3637 .mode = S_IRUGO | S_IWUSR,
3265 }, 3638 },
3266 { 3639 {
3267 .name = "notify_on_release", 3640 .name = "notify_on_release",
@@ -4257,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4257} 4630}
4258 4631
4259/** 4632/**
4260 * cgroup_clone - clone the cgroup the given subsystem is attached to
4261 * @tsk: the task to be moved
4262 * @subsys: the given subsystem
4263 * @nodename: the name for the new cgroup
4264 *
4265 * Duplicate the current cgroup in the hierarchy that the given
4266 * subsystem is attached to, and move this task into the new
4267 * child.
4268 */
4269int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4270 char *nodename)
4271{
4272 struct dentry *dentry;
4273 int ret = 0;
4274 struct cgroup *parent, *child;
4275 struct inode *inode;
4276 struct css_set *cg;
4277 struct cgroupfs_root *root;
4278 struct cgroup_subsys *ss;
4279
4280 /* We shouldn't be called by an unregistered subsystem */
4281 BUG_ON(!subsys->active);
4282
4283 /* First figure out what hierarchy and cgroup we're dealing
4284 * with, and pin them so we can drop cgroup_mutex */
4285 mutex_lock(&cgroup_mutex);
4286 again:
4287 root = subsys->root;
4288 if (root == &rootnode) {
4289 mutex_unlock(&cgroup_mutex);
4290 return 0;
4291 }
4292
4293 /* Pin the hierarchy */
4294 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4295 /* We race with the final deactivate_super() */
4296 mutex_unlock(&cgroup_mutex);
4297 return 0;
4298 }
4299
4300 /* Keep the cgroup alive */
4301 task_lock(tsk);
4302 parent = task_cgroup(tsk, subsys->subsys_id);
4303 cg = tsk->cgroups;
4304 get_css_set(cg);
4305 task_unlock(tsk);
4306
4307 mutex_unlock(&cgroup_mutex);
4308
4309 /* Now do the VFS work to create a cgroup */
4310 inode = parent->dentry->d_inode;
4311
4312 /* Hold the parent directory mutex across this operation to
4313 * stop anyone else deleting the new cgroup */
4314 mutex_lock(&inode->i_mutex);
4315 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4316 if (IS_ERR(dentry)) {
4317 printk(KERN_INFO
4318 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4319 PTR_ERR(dentry));
4320 ret = PTR_ERR(dentry);
4321 goto out_release;
4322 }
4323
4324 /* Create the cgroup directory, which also creates the cgroup */
4325 ret = vfs_mkdir(inode, dentry, 0755);
4326 child = __d_cgrp(dentry);
4327 dput(dentry);
4328 if (ret) {
4329 printk(KERN_INFO
4330 "Failed to create cgroup %s: %d\n", nodename,
4331 ret);
4332 goto out_release;
4333 }
4334
4335 /* The cgroup now exists. Retake cgroup_mutex and check
4336 * that we're still in the same state that we thought we
4337 * were. */
4338 mutex_lock(&cgroup_mutex);
4339 if ((root != subsys->root) ||
4340 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4341 /* Aargh, we raced ... */
4342 mutex_unlock(&inode->i_mutex);
4343 put_css_set(cg);
4344
4345 deactivate_super(root->sb);
4346 /* The cgroup is still accessible in the VFS, but
4347 * we're not going to try to rmdir() it at this
4348 * point. */
4349 printk(KERN_INFO
4350 "Race in cgroup_clone() - leaking cgroup %s\n",
4351 nodename);
4352 goto again;
4353 }
4354
4355 /* do any required auto-setup */
4356 for_each_subsys(root, ss) {
4357 if (ss->post_clone)
4358 ss->post_clone(ss, child);
4359 }
4360
4361 /* All seems fine. Finish by moving the task into the new cgroup */
4362 ret = cgroup_attach_task(child, tsk);
4363 mutex_unlock(&cgroup_mutex);
4364
4365 out_release:
4366 mutex_unlock(&inode->i_mutex);
4367
4368 mutex_lock(&cgroup_mutex);
4369 put_css_set(cg);
4370 mutex_unlock(&cgroup_mutex);
4371 deactivate_super(root->sb);
4372 return ret;
4373}
4374
4375/**
4376 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp 4633 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
4377 * @cgrp: the cgroup in question 4634 * @cgrp: the cgroup in question
4378 * @task: the task in question 4635 * @task: the task in question
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e7bebb7c6c38..e691818d7e45 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
160 */ 160 */
161static int freezer_can_attach(struct cgroup_subsys *ss, 161static int freezer_can_attach(struct cgroup_subsys *ss,
162 struct cgroup *new_cgroup, 162 struct cgroup *new_cgroup,
163 struct task_struct *task, bool threadgroup) 163 struct task_struct *task)
164{ 164{
165 struct freezer *freezer; 165 struct freezer *freezer;
166 166
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
172 if (freezer->state != CGROUP_THAWED) 172 if (freezer->state != CGROUP_THAWED)
173 return -EBUSY; 173 return -EBUSY;
174 174
175 return 0;
176}
177
178static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
179{
175 rcu_read_lock(); 180 rcu_read_lock();
176 if (__cgroup_freezing_or_frozen(task)) { 181 if (__cgroup_freezing_or_frozen(tsk)) {
177 rcu_read_unlock(); 182 rcu_read_unlock();
178 return -EBUSY; 183 return -EBUSY;
179 } 184 }
180 rcu_read_unlock(); 185 rcu_read_unlock();
181
182 if (threadgroup) {
183 struct task_struct *c;
184
185 rcu_read_lock();
186 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
187 if (__cgroup_freezing_or_frozen(c)) {
188 rcu_read_unlock();
189 return -EBUSY;
190 }
191 }
192 rcu_read_unlock();
193 }
194
195 return 0; 186 return 0;
196} 187}
197 188
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
390 .populate = freezer_populate, 381 .populate = freezer_populate,
391 .subsys_id = freezer_subsys_id, 382 .subsys_id = freezer_subsys_id,
392 .can_attach = freezer_can_attach, 383 .can_attach = freezer_can_attach,
384 .can_attach_task = freezer_can_attach_task,
385 .pre_attach = NULL,
386 .attach_task = NULL,
393 .attach = NULL, 387 .attach = NULL,
394 .fork = freezer_fork, 388 .fork = freezer_fork,
395 .exit = NULL, 389 .exit = NULL,
diff --git a/kernel/compat.c b/kernel/compat.c
index 9214dcd087b7..fc9eb093acd5 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
293 return compat_jiffies_to_clock_t(jiffies); 293 return compat_jiffies_to_clock_t(jiffies);
294} 294}
295 295
296#ifdef __ARCH_WANT_SYS_SIGPENDING
297
296/* 298/*
297 * Assumption: old_sigset_t and compat_old_sigset_t are both 299 * Assumption: old_sigset_t and compat_old_sigset_t are both
298 * types that can be passed to put_user()/get_user(). 300 * types that can be passed to put_user()/get_user().
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
312 return ret; 314 return ret;
313} 315}
314 316
317#endif
318
319#ifdef __ARCH_WANT_SYS_SIGPROCMASK
320
315asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, 321asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
316 compat_old_sigset_t __user *oset) 322 compat_old_sigset_t __user *oset)
317{ 323{
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
333 return ret; 339 return ret;
334} 340}
335 341
342#endif
343
336asmlinkage long compat_sys_setrlimit(unsigned int resource, 344asmlinkage long compat_sys_setrlimit(unsigned int resource,
337 struct compat_rlimit __user *rlim) 345 struct compat_rlimit __user *rlim)
338{ 346{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2bb8c2e98fff..9c9b7545c810 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
1367 return val; 1367 return val;
1368} 1368}
1369 1369
1370/* Protected by cgroup_lock */
1371static cpumask_var_t cpus_attach;
1372
1373/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1370/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1374static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1371static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1375 struct task_struct *tsk, bool threadgroup) 1372 struct task_struct *tsk)
1376{ 1373{
1377 int ret;
1378 struct cpuset *cs = cgroup_cs(cont); 1374 struct cpuset *cs = cgroup_cs(cont);
1379 1375
1380 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1376 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1391 if (tsk->flags & PF_THREAD_BOUND) 1387 if (tsk->flags & PF_THREAD_BOUND)
1392 return -EINVAL; 1388 return -EINVAL;
1393 1389
1394 ret = security_task_setscheduler(tsk);
1395 if (ret)
1396 return ret;
1397 if (threadgroup) {
1398 struct task_struct *c;
1399
1400 rcu_read_lock();
1401 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1402 ret = security_task_setscheduler(c);
1403 if (ret) {
1404 rcu_read_unlock();
1405 return ret;
1406 }
1407 }
1408 rcu_read_unlock();
1409 }
1410 return 0; 1390 return 0;
1411} 1391}
1412 1392
1413static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, 1393static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
1414 struct cpuset *cs) 1394{
1395 return security_task_setscheduler(task);
1396}
1397
1398/*
1399 * Protected by cgroup_lock. The nodemasks must be stored globally because
1400 * dynamically allocating them is not allowed in pre_attach, and they must
1401 * persist among pre_attach, attach_task, and attach.
1402 */
1403static cpumask_var_t cpus_attach;
1404static nodemask_t cpuset_attach_nodemask_from;
1405static nodemask_t cpuset_attach_nodemask_to;
1406
1407/* Set-up work for before attaching each task. */
1408static void cpuset_pre_attach(struct cgroup *cont)
1409{
1410 struct cpuset *cs = cgroup_cs(cont);
1411
1412 if (cs == &top_cpuset)
1413 cpumask_copy(cpus_attach, cpu_possible_mask);
1414 else
1415 guarantee_online_cpus(cs, cpus_attach);
1416
1417 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1418}
1419
1420/* Per-thread attachment work. */
1421static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
1415{ 1422{
1416 int err; 1423 int err;
1424 struct cpuset *cs = cgroup_cs(cont);
1425
1417 /* 1426 /*
1418 * can_attach beforehand should guarantee that this doesn't fail. 1427 * can_attach beforehand should guarantee that this doesn't fail.
1419 * TODO: have a better way to handle failure here 1428 * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1421 err = set_cpus_allowed_ptr(tsk, cpus_attach); 1430 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1422 WARN_ON_ONCE(err); 1431 WARN_ON_ONCE(err);
1423 1432
1424 cpuset_change_task_nodemask(tsk, to); 1433 cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
1425 cpuset_update_task_spread_flag(cs, tsk); 1434 cpuset_update_task_spread_flag(cs, tsk);
1426
1427} 1435}
1428 1436
1429static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1437static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1430 struct cgroup *oldcont, struct task_struct *tsk, 1438 struct cgroup *oldcont, struct task_struct *tsk)
1431 bool threadgroup)
1432{ 1439{
1433 struct mm_struct *mm; 1440 struct mm_struct *mm;
1434 struct cpuset *cs = cgroup_cs(cont); 1441 struct cpuset *cs = cgroup_cs(cont);
1435 struct cpuset *oldcs = cgroup_cs(oldcont); 1442 struct cpuset *oldcs = cgroup_cs(oldcont);
1436 static nodemask_t to; /* protected by cgroup_mutex */
1437 1443
1438 if (cs == &top_cpuset) { 1444 /*
1439 cpumask_copy(cpus_attach, cpu_possible_mask); 1445 * Change mm, possibly for multiple threads in a threadgroup. This is
1440 } else { 1446 * expensive and may sleep.
1441 guarantee_online_cpus(cs, cpus_attach); 1447 */
1442 } 1448 cpuset_attach_nodemask_from = oldcs->mems_allowed;
1443 guarantee_online_mems(cs, &to); 1449 cpuset_attach_nodemask_to = cs->mems_allowed;
1444
1445 /* do per-task migration stuff possibly for each in the threadgroup */
1446 cpuset_attach_task(tsk, &to, cs);
1447 if (threadgroup) {
1448 struct task_struct *c;
1449 rcu_read_lock();
1450 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1451 cpuset_attach_task(c, &to, cs);
1452 }
1453 rcu_read_unlock();
1454 }
1455
1456 /* change mm; only needs to be done once even if threadgroup */
1457 to = cs->mems_allowed;
1458 mm = get_task_mm(tsk); 1450 mm = get_task_mm(tsk);
1459 if (mm) { 1451 if (mm) {
1460 mpol_rebind_mm(mm, &to); 1452 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1461 if (is_memory_migrate(cs)) 1453 if (is_memory_migrate(cs))
1462 cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); 1454 cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
1455 &cpuset_attach_nodemask_to);
1463 mmput(mm); 1456 mmput(mm);
1464 } 1457 }
1465} 1458}
@@ -1809,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1809} 1802}
1810 1803
1811/* 1804/*
1812 * post_clone() is called at the end of cgroup_clone(). 1805 * post_clone() is called during cgroup_create() when the
1813 * 'cgroup' was just created automatically as a result of 1806 * clone_children mount argument was specified. The cgroup
1814 * a cgroup_clone(), and the current task is about to 1807 * can not yet have any tasks.
1815 * be moved into 'cgroup'.
1816 * 1808 *
1817 * Currently we refuse to set up the cgroup - thereby 1809 * Currently we refuse to set up the cgroup - thereby
1818 * refusing the task to be entered, and as a result refusing 1810 * refusing the task to be entered, and as a result refusing
@@ -1911,6 +1903,9 @@ struct cgroup_subsys cpuset_subsys = {
1911 .create = cpuset_create, 1903 .create = cpuset_create,
1912 .destroy = cpuset_destroy, 1904 .destroy = cpuset_destroy,
1913 .can_attach = cpuset_can_attach, 1905 .can_attach = cpuset_can_attach,
1906 .can_attach_task = cpuset_can_attach_task,
1907 .pre_attach = cpuset_pre_attach,
1908 .attach_task = cpuset_attach_task,
1914 .attach = cpuset_attach, 1909 .attach = cpuset_attach,
1915 .populate = cpuset_populate, 1910 .populate = cpuset_populate,
1916 .post_clone = cpuset_post_clone, 1911 .post_clone = cpuset_post_clone,
@@ -2195,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2195 rcu_read_lock(); 2190 rcu_read_lock();
2196 cs = task_cs(tsk); 2191 cs = task_cs(tsk);
2197 if (cs) 2192 if (cs)
2198 cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); 2193 do_set_cpus_allowed(tsk, cs->cpus_allowed);
2199 rcu_read_unlock(); 2194 rcu_read_unlock();
2200 2195
2201 /* 2196 /*
@@ -2222,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2222 * Like above we can temporary set any mask and rely on 2217 * Like above we can temporary set any mask and rely on
2223 * set_cpus_allowed_ptr() as synchronization point. 2218 * set_cpus_allowed_ptr() as synchronization point.
2224 */ 2219 */
2225 cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); 2220 do_set_cpus_allowed(tsk, cpu_possible_mask);
2226 cpu = cpumask_any(cpu_active_mask); 2221 cpu = cpumask_any(cpu_active_mask);
2227 } 2222 }
2228 2223
diff --git a/kernel/cred.c b/kernel/cred.c
index 8093c16b84b1..174fa84eca30 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -1,4 +1,4 @@
1/* Task credentials management - see Documentation/credentials.txt 1/* Task credentials management - see Documentation/security/credentials.txt
2 * 2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -49,10 +49,10 @@ struct cred init_cred = {
49 .magic = CRED_MAGIC, 49 .magic = CRED_MAGIC,
50#endif 50#endif
51 .securebits = SECUREBITS_DEFAULT, 51 .securebits = SECUREBITS_DEFAULT,
52 .cap_inheritable = CAP_INIT_INH_SET, 52 .cap_inheritable = CAP_EMPTY_SET,
53 .cap_permitted = CAP_FULL_SET, 53 .cap_permitted = CAP_FULL_SET,
54 .cap_effective = CAP_INIT_EFF_SET, 54 .cap_effective = CAP_FULL_SET,
55 .cap_bset = CAP_INIT_BSET, 55 .cap_bset = CAP_FULL_SET,
56 .user = INIT_USER, 56 .user = INIT_USER,
57 .user_ns = &init_user_ns, 57 .user_ns = &init_user_ns,
58 .group_info = &init_groups, 58 .group_info = &init_groups,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c09767f7db3e..d863b3c057bb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
5028 else 5028 else
5029 perf_event_output(event, nmi, data, regs); 5029 perf_event_output(event, nmi, data, regs);
5030 5030
5031 if (event->fasync && event->pending_kill) {
5032 if (nmi) {
5033 event->pending_wakeup = 1;
5034 irq_work_queue(&event->pending);
5035 } else
5036 perf_event_wakeup(event);
5037 }
5038
5031 return ret; 5039 return ret;
5032} 5040}
5033 5041
diff --git a/kernel/fork.c b/kernel/fork.c
index 2b44d82b8237..0276c30401a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -59,7 +59,6 @@
59#include <linux/taskstats_kern.h> 59#include <linux/taskstats_kern.h>
60#include <linux/random.h> 60#include <linux/random.h>
61#include <linux/tty.h> 61#include <linux/tty.h>
62#include <linux/proc_fs.h>
63#include <linux/blkdev.h> 62#include <linux/blkdev.h>
64#include <linux/fs_struct.h> 63#include <linux/fs_struct.h>
65#include <linux/magic.h> 64#include <linux/magic.h>
@@ -383,15 +382,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
383 get_file(file); 382 get_file(file);
384 if (tmp->vm_flags & VM_DENYWRITE) 383 if (tmp->vm_flags & VM_DENYWRITE)
385 atomic_dec(&inode->i_writecount); 384 atomic_dec(&inode->i_writecount);
386 spin_lock(&mapping->i_mmap_lock); 385 mutex_lock(&mapping->i_mmap_mutex);
387 if (tmp->vm_flags & VM_SHARED) 386 if (tmp->vm_flags & VM_SHARED)
388 mapping->i_mmap_writable++; 387 mapping->i_mmap_writable++;
389 tmp->vm_truncate_count = mpnt->vm_truncate_count;
390 flush_dcache_mmap_lock(mapping); 388 flush_dcache_mmap_lock(mapping);
391 /* insert tmp into the share list, just after mpnt */ 389 /* insert tmp into the share list, just after mpnt */
392 vma_prio_tree_add(tmp, mpnt); 390 vma_prio_tree_add(tmp, mpnt);
393 flush_dcache_mmap_unlock(mapping); 391 flush_dcache_mmap_unlock(mapping);
394 spin_unlock(&mapping->i_mmap_lock); 392 mutex_unlock(&mapping->i_mmap_mutex);
395 } 393 }
396 394
397 /* 395 /*
@@ -522,11 +520,12 @@ struct mm_struct * mm_alloc(void)
522 struct mm_struct * mm; 520 struct mm_struct * mm;
523 521
524 mm = allocate_mm(); 522 mm = allocate_mm();
525 if (mm) { 523 if (!mm)
526 memset(mm, 0, sizeof(*mm)); 524 return NULL;
527 mm = mm_init(mm, current); 525
528 } 526 memset(mm, 0, sizeof(*mm));
529 return mm; 527 mm_init_cpumask(mm);
528 return mm_init(mm, current);
530} 529}
531 530
532/* 531/*
@@ -573,6 +572,57 @@ void mmput(struct mm_struct *mm)
573} 572}
574EXPORT_SYMBOL_GPL(mmput); 573EXPORT_SYMBOL_GPL(mmput);
575 574
575/*
576 * We added or removed a vma mapping the executable. The vmas are only mapped
577 * during exec and are not mapped with the mmap system call.
578 * Callers must hold down_write() on the mm's mmap_sem for these
579 */
580void added_exe_file_vma(struct mm_struct *mm)
581{
582 mm->num_exe_file_vmas++;
583}
584
585void removed_exe_file_vma(struct mm_struct *mm)
586{
587 mm->num_exe_file_vmas--;
588 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
589 fput(mm->exe_file);
590 mm->exe_file = NULL;
591 }
592
593}
594
595void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
596{
597 if (new_exe_file)
598 get_file(new_exe_file);
599 if (mm->exe_file)
600 fput(mm->exe_file);
601 mm->exe_file = new_exe_file;
602 mm->num_exe_file_vmas = 0;
603}
604
605struct file *get_mm_exe_file(struct mm_struct *mm)
606{
607 struct file *exe_file;
608
609 /* We need mmap_sem to protect against races with removal of
610 * VM_EXECUTABLE vmas */
611 down_read(&mm->mmap_sem);
612 exe_file = mm->exe_file;
613 if (exe_file)
614 get_file(exe_file);
615 up_read(&mm->mmap_sem);
616 return exe_file;
617}
618
619static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
620{
621 /* It's safe to write the exe_file pointer without exe_file_lock because
622 * this is called during fork when the task is not yet in /proc */
623 newmm->exe_file = get_mm_exe_file(oldmm);
624}
625
576/** 626/**
577 * get_task_mm - acquire a reference to the task's mm 627 * get_task_mm - acquire a reference to the task's mm
578 * 628 *
@@ -679,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
679 goto fail_nomem; 729 goto fail_nomem;
680 730
681 memcpy(mm, oldmm, sizeof(*mm)); 731 memcpy(mm, oldmm, sizeof(*mm));
732 mm_init_cpumask(mm);
682 733
683 /* Initializing for Swap token stuff */ 734 /* Initializing for Swap token stuff */
684 mm->token_priority = 0; 735 mm->token_priority = 0;
@@ -927,6 +978,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
927 tty_audit_fork(sig); 978 tty_audit_fork(sig);
928 sched_autogroup_fork(sig); 979 sched_autogroup_fork(sig);
929 980
981#ifdef CONFIG_CGROUPS
982 init_rwsem(&sig->threadgroup_fork_lock);
983#endif
984
930 sig->oom_adj = current->signal->oom_adj; 985 sig->oom_adj = current->signal->oom_adj;
931 sig->oom_score_adj = current->signal->oom_score_adj; 986 sig->oom_score_adj = current->signal->oom_score_adj;
932 sig->oom_score_adj_min = current->signal->oom_score_adj_min; 987 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
@@ -1108,6 +1163,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1108 monotonic_to_bootbased(&p->real_start_time); 1163 monotonic_to_bootbased(&p->real_start_time);
1109 p->io_context = NULL; 1164 p->io_context = NULL;
1110 p->audit_context = NULL; 1165 p->audit_context = NULL;
1166 if (clone_flags & CLONE_THREAD)
1167 threadgroup_fork_read_lock(current);
1111 cgroup_fork(p); 1168 cgroup_fork(p);
1112#ifdef CONFIG_NUMA 1169#ifdef CONFIG_NUMA
1113 p->mempolicy = mpol_dup(p->mempolicy); 1170 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1193,12 +1250,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1193 if (clone_flags & CLONE_THREAD) 1250 if (clone_flags & CLONE_THREAD)
1194 p->tgid = current->tgid; 1251 p->tgid = current->tgid;
1195 1252
1196 if (current->nsproxy != p->nsproxy) {
1197 retval = ns_cgroup_clone(p, pid);
1198 if (retval)
1199 goto bad_fork_free_pid;
1200 }
1201
1202 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1253 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1203 /* 1254 /*
1204 * Clear TID on mm_release()? 1255 * Clear TID on mm_release()?
@@ -1312,6 +1363,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1312 write_unlock_irq(&tasklist_lock); 1363 write_unlock_irq(&tasklist_lock);
1313 proc_fork_connector(p); 1364 proc_fork_connector(p);
1314 cgroup_post_fork(p); 1365 cgroup_post_fork(p);
1366 if (clone_flags & CLONE_THREAD)
1367 threadgroup_fork_read_unlock(current);
1315 perf_event_fork(p); 1368 perf_event_fork(p);
1316 return p; 1369 return p;
1317 1370
@@ -1350,6 +1403,8 @@ bad_fork_cleanup_policy:
1350 mpol_put(p->mempolicy); 1403 mpol_put(p->mempolicy);
1351bad_fork_cleanup_cgroup: 1404bad_fork_cleanup_cgroup:
1352#endif 1405#endif
1406 if (clone_flags & CLONE_THREAD)
1407 threadgroup_fork_read_unlock(current);
1353 cgroup_exit(p, cgroup_callbacks_done); 1408 cgroup_exit(p, cgroup_callbacks_done);
1354 delayacct_tsk_free(p); 1409 delayacct_tsk_free(p);
1355 module_put(task_thread_info(p)->exec_domain->module); 1410 module_put(task_thread_info(p)->exec_domain->module);
@@ -1507,6 +1562,13 @@ void __init proc_caches_init(void)
1507 fs_cachep = kmem_cache_create("fs_cache", 1562 fs_cachep = kmem_cache_create("fs_cache",
1508 sizeof(struct fs_struct), 0, 1563 sizeof(struct fs_struct), 0,
1509 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1564 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1565 /*
1566 * FIXME! The "sizeof(struct mm_struct)" currently includes the
1567 * whole struct cpumask for the OFFSTACK case. We could change
1568 * this to *only* allocate as much of it as required by the
1569 * maximum number of CPU's we can ever have. The cpumask_allocation
1570 * is at the end of the structure, exactly for that reason.
1571 */
1510 mm_cachep = kmem_cache_create("mm_struct", 1572 mm_cachep = kmem_cache_create("mm_struct",
1511 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1573 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1512 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1574 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c541ee527ecb..a9205e32a059 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -748,7 +748,7 @@ static inline void retrigger_next_event(void *arg) { }
748 */ 748 */
749void clock_was_set(void) 749void clock_was_set(void)
750{ 750{
751#ifdef CONFIG_HIGHRES_TIMERS 751#ifdef CONFIG_HIGH_RES_TIMERS
752 /* Retrigger the CPU local events everywhere */ 752 /* Retrigger the CPU local events everywhere */
753 on_each_cpu(retrigger_next_event, NULL, 1); 753 on_each_cpu(retrigger_next_event, NULL, 1);
754#endif 754#endif
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f2500f..4bd4faa6323a 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
19 19
20#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
21 21
22static int irq_affinity_proc_show(struct seq_file *m, void *v) 22static int show_irq_affinity(int type, struct seq_file *m, void *v)
23{ 23{
24 struct irq_desc *desc = irq_to_desc((long)m->private); 24 struct irq_desc *desc = irq_to_desc((long)m->private);
25 const struct cpumask *mask = desc->irq_data.affinity; 25 const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
28 if (irqd_is_setaffinity_pending(&desc->irq_data)) 28 if (irqd_is_setaffinity_pending(&desc->irq_data))
29 mask = desc->pending_mask; 29 mask = desc->pending_mask;
30#endif 30#endif
31 seq_cpumask(m, mask); 31 if (type)
32 seq_cpumask_list(m, mask);
33 else
34 seq_cpumask(m, mask);
32 seq_putc(m, '\n'); 35 seq_putc(m, '\n');
33 return 0; 36 return 0;
34} 37}
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
59#endif 62#endif
60 63
61int no_irq_affinity; 64int no_irq_affinity;
62static ssize_t irq_affinity_proc_write(struct file *file, 65static int irq_affinity_proc_show(struct seq_file *m, void *v)
66{
67 return show_irq_affinity(0, m, v);
68}
69
70static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
71{
72 return show_irq_affinity(1, m, v);
73}
74
75
76static ssize_t write_irq_affinity(int type, struct file *file,
63 const char __user *buffer, size_t count, loff_t *pos) 77 const char __user *buffer, size_t count, loff_t *pos)
64{ 78{
65 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; 79 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file,
72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 86 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
73 return -ENOMEM; 87 return -ENOMEM;
74 88
75 err = cpumask_parse_user(buffer, count, new_value); 89 if (type)
90 err = cpumask_parselist_user(buffer, count, new_value);
91 else
92 err = cpumask_parse_user(buffer, count, new_value);
76 if (err) 93 if (err)
77 goto free_cpumask; 94 goto free_cpumask;
78 95
@@ -100,11 +117,28 @@ free_cpumask:
100 return err; 117 return err;
101} 118}
102 119
120static ssize_t irq_affinity_proc_write(struct file *file,
121 const char __user *buffer, size_t count, loff_t *pos)
122{
123 return write_irq_affinity(0, file, buffer, count, pos);
124}
125
126static ssize_t irq_affinity_list_proc_write(struct file *file,
127 const char __user *buffer, size_t count, loff_t *pos)
128{
129 return write_irq_affinity(1, file, buffer, count, pos);
130}
131
103static int irq_affinity_proc_open(struct inode *inode, struct file *file) 132static int irq_affinity_proc_open(struct inode *inode, struct file *file)
104{ 133{
105 return single_open(file, irq_affinity_proc_show, PDE(inode)->data); 134 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
106} 135}
107 136
137static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
138{
139 return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
140}
141
108static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) 142static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
109{ 143{
110 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); 144 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
125 .release = single_release, 159 .release = single_release,
126}; 160};
127 161
162static const struct file_operations irq_affinity_list_proc_fops = {
163 .open = irq_affinity_list_proc_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167 .write = irq_affinity_list_proc_write,
168};
169
128static int default_affinity_show(struct seq_file *m, void *v) 170static int default_affinity_show(struct seq_file *m, void *v)
129{ 171{
130 seq_cpumask(m, irq_default_affinity); 172 seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
289 proc_create_data("affinity_hint", 0400, desc->dir, 331 proc_create_data("affinity_hint", 0400, desc->dir,
290 &irq_affinity_hint_proc_fops, (void *)(long)irq); 332 &irq_affinity_hint_proc_fops, (void *)(long)irq);
291 333
334 /* create /proc/irq/<irq>/smp_affinity_list */
335 proc_create_data("smp_affinity_list", 0600, desc->dir,
336 &irq_affinity_list_proc_fops, (void *)(long)irq);
337
292 proc_create_data("node", 0444, desc->dir, 338 proc_create_data("node", 0444, desc->dir,
293 &irq_node_proc_fops, (void *)(long)irq); 339 &irq_node_proc_fops, (void *)(long)irq);
294#endif 340#endif
@@ -306,6 +352,7 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
306#ifdef CONFIG_SMP 352#ifdef CONFIG_SMP
307 remove_proc_entry("smp_affinity", desc->dir); 353 remove_proc_entry("smp_affinity", desc->dir);
308 remove_proc_entry("affinity_hint", desc->dir); 354 remove_proc_entry("affinity_hint", desc->dir);
355 remove_proc_entry("smp_affinity_list", desc->dir);
309 remove_proc_entry("node", desc->dir); 356 remove_proc_entry("node", desc->dir);
310#endif 357#endif
311 remove_proc_entry("spurious", desc->dir); 358 remove_proc_entry("spurious", desc->dir);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c099fbd1..fa27e750dbc0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
105} 105}
106 106
107static void __jump_label_update(struct jump_label_key *key, 107static void __jump_label_update(struct jump_label_key *key,
108 struct jump_entry *entry, int enable) 108 struct jump_entry *entry,
109 struct jump_entry *stop, int enable)
109{ 110{
110 for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { 111 for (; (entry < stop) &&
112 (entry->key == (jump_label_t)(unsigned long)key);
113 entry++) {
111 /* 114 /*
112 * entry->code set to 0 invalidates module init text sections 115 * entry->code set to 0 invalidates module init text sections
113 * kernel_text_address() verifies we are not in core kernel 116 * kernel_text_address() verifies we are not in core kernel
@@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable)
181 struct jump_label_mod *mod = key->next; 184 struct jump_label_mod *mod = key->next;
182 185
183 while (mod) { 186 while (mod) {
184 __jump_label_update(key, mod->entries, enable); 187 struct module *m = mod->mod;
188
189 __jump_label_update(key, mod->entries,
190 m->jump_entries + m->num_jump_entries,
191 enable);
185 mod = mod->next; 192 mod = mod->next;
186 } 193 }
187} 194}
@@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod)
245 key->next = jlm; 252 key->next = jlm;
246 253
247 if (jump_label_enabled(key)) 254 if (jump_label_enabled(key))
248 __jump_label_update(key, iter, JUMP_LABEL_ENABLE); 255 __jump_label_update(key, iter, iter_stop,
256 JUMP_LABEL_ENABLE);
249 } 257 }
250 258
251 return 0; 259 return 0;
@@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable)
371 379
372 /* if there are no users, entry can be NULL */ 380 /* if there are no users, entry can be NULL */
373 if (entry) 381 if (entry)
374 __jump_label_update(key, entry, enable); 382 __jump_label_update(key, entry, __stop___jump_table, enable);
375 383
376#ifdef CONFIG_MODULES 384#ifdef CONFIG_MODULES
377 __jump_label_mod_update(key, enable); 385 __jump_label_mod_update(key, enable);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 5ae0ff38425f..ad6a81c58b44 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -25,6 +25,7 @@
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/completion.h> 27#include <linux/completion.h>
28#include <linux/cred.h>
28#include <linux/file.h> 29#include <linux/file.h>
29#include <linux/fdtable.h> 30#include <linux/fdtable.h>
30#include <linux/workqueue.h> 31#include <linux/workqueue.h>
@@ -43,6 +44,13 @@ extern int max_threads;
43 44
44static struct workqueue_struct *khelper_wq; 45static struct workqueue_struct *khelper_wq;
45 46
47#define CAP_BSET (void *)1
48#define CAP_PI (void *)2
49
50static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
51static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
52static DEFINE_SPINLOCK(umh_sysctl_lock);
53
46#ifdef CONFIG_MODULES 54#ifdef CONFIG_MODULES
47 55
48/* 56/*
@@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module);
132static int ____call_usermodehelper(void *data) 140static int ____call_usermodehelper(void *data)
133{ 141{
134 struct subprocess_info *sub_info = data; 142 struct subprocess_info *sub_info = data;
143 struct cred *new;
135 int retval; 144 int retval;
136 145
137 spin_lock_irq(&current->sighand->siglock); 146 spin_lock_irq(&current->sighand->siglock);
@@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data)
153 goto fail; 162 goto fail;
154 } 163 }
155 164
165 retval = -ENOMEM;
166 new = prepare_kernel_cred(current);
167 if (!new)
168 goto fail;
169
170 spin_lock(&umh_sysctl_lock);
171 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
172 new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
173 new->cap_inheritable);
174 spin_unlock(&umh_sysctl_lock);
175
176 commit_creds(new);
177
156 retval = kernel_execve(sub_info->path, 178 retval = kernel_execve(sub_info->path,
157 (const char *const *)sub_info->argv, 179 (const char *const *)sub_info->argv,
158 (const char *const *)sub_info->envp); 180 (const char *const *)sub_info->envp);
@@ -420,6 +442,84 @@ unlock:
420} 442}
421EXPORT_SYMBOL(call_usermodehelper_exec); 443EXPORT_SYMBOL(call_usermodehelper_exec);
422 444
445static int proc_cap_handler(struct ctl_table *table, int write,
446 void __user *buffer, size_t *lenp, loff_t *ppos)
447{
448 struct ctl_table t;
449 unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
450 kernel_cap_t new_cap;
451 int err, i;
452
453 if (write && (!capable(CAP_SETPCAP) ||
454 !capable(CAP_SYS_MODULE)))
455 return -EPERM;
456
457 /*
458 * convert from the global kernel_cap_t to the ulong array to print to
459 * userspace if this is a read.
460 */
461 spin_lock(&umh_sysctl_lock);
462 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
463 if (table->data == CAP_BSET)
464 cap_array[i] = usermodehelper_bset.cap[i];
465 else if (table->data == CAP_PI)
466 cap_array[i] = usermodehelper_inheritable.cap[i];
467 else
468 BUG();
469 }
470 spin_unlock(&umh_sysctl_lock);
471
472 t = *table;
473 t.data = &cap_array;
474
475 /*
476 * actually read or write and array of ulongs from userspace. Remember
477 * these are least significant 32 bits first
478 */
479 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
480 if (err < 0)
481 return err;
482
483 /*
484 * convert from the sysctl array of ulongs to the kernel_cap_t
485 * internal representation
486 */
487 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
488 new_cap.cap[i] = cap_array[i];
489
490 /*
491 * Drop everything not in the new_cap (but don't add things)
492 */
493 spin_lock(&umh_sysctl_lock);
494 if (write) {
495 if (table->data == CAP_BSET)
496 usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
497 if (table->data == CAP_PI)
498 usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
499 }
500 spin_unlock(&umh_sysctl_lock);
501
502 return 0;
503}
504
505struct ctl_table usermodehelper_table[] = {
506 {
507 .procname = "bset",
508 .data = CAP_BSET,
509 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
510 .mode = 0600,
511 .proc_handler = proc_cap_handler,
512 },
513 {
514 .procname = "inheritable",
515 .data = CAP_PI,
516 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
517 .mode = 0600,
518 .proc_handler = proc_cap_handler,
519 },
520 { }
521};
522
423void __init usermodehelper_init(void) 523void __init usermodehelper_init(void)
424{ 524{
425 khelper_wq = create_singlethread_workqueue("khelper"); 525 khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3b34d2732bce..4ba7cccb4994 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
202 return; 202 return;
203 } 203 }
204 204
205 p->cpus_allowed = cpumask_of_cpu(cpu); 205 /* It's safe because the task is inactive. */
206 p->rt.nr_cpus_allowed = 1; 206 do_set_cpus_allowed(p, cpumask_of(cpu));
207 p->flags |= PF_THREAD_BOUND; 207 p->flags |= PF_THREAD_BOUND;
208} 208}
209EXPORT_SYMBOL(kthread_bind); 209EXPORT_SYMBOL(kthread_bind);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2c938e2337cd..d607ed5dd441 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock);
131 */ 131 */
132static inline int __sched 132static inline int __sched
133__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, 133__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
134 unsigned long ip) 134 struct lockdep_map *nest_lock, unsigned long ip)
135{ 135{
136 struct task_struct *task = current; 136 struct task_struct *task = current;
137 struct mutex_waiter waiter; 137 struct mutex_waiter waiter;
138 unsigned long flags; 138 unsigned long flags;
139 139
140 preempt_disable(); 140 preempt_disable();
141 mutex_acquire(&lock->dep_map, subclass, 0, ip); 141 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
142 142
143#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 143#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
144 /* 144 /*
@@ -269,16 +269,25 @@ void __sched
269mutex_lock_nested(struct mutex *lock, unsigned int subclass) 269mutex_lock_nested(struct mutex *lock, unsigned int subclass)
270{ 270{
271 might_sleep(); 271 might_sleep();
272 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); 272 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
273} 273}
274 274
275EXPORT_SYMBOL_GPL(mutex_lock_nested); 275EXPORT_SYMBOL_GPL(mutex_lock_nested);
276 276
277void __sched
278_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
279{
280 might_sleep();
281 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
282}
283
284EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
285
277int __sched 286int __sched
278mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) 287mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
279{ 288{
280 might_sleep(); 289 might_sleep();
281 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_); 290 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
282} 291}
283EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); 292EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
284 293
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
287{ 296{
288 might_sleep(); 297 might_sleep();
289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 298 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
290 subclass, _RET_IP_); 299 subclass, NULL, _RET_IP_);
291} 300}
292 301
293EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 302EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count)
393{ 402{
394 struct mutex *lock = container_of(lock_count, struct mutex, count); 403 struct mutex *lock = container_of(lock_count, struct mutex, count);
395 404
396 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); 405 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
397} 406}
398 407
399static noinline int __sched 408static noinline int __sched
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count)
401{ 410{
402 struct mutex *lock = container_of(lock_count, struct mutex, count); 411 struct mutex *lock = container_of(lock_count, struct mutex, count);
403 412
404 return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); 413 return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
405} 414}
406 415
407static noinline int __sched 416static noinline int __sched
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
409{ 418{
410 struct mutex *lock = container_of(lock_count, struct mutex, count); 419 struct mutex *lock = container_of(lock_count, struct mutex, count);
411 420
412 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); 421 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
413} 422}
414#endif 423#endif
415 424
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
deleted file mode 100644
index 2c98ad94ba0e..000000000000
--- a/kernel/ns_cgroup.c
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * ns_cgroup.c - namespace cgroup subsystem
3 *
4 * Copyright 2006, 2007 IBM Corp
5 */
6
7#include <linux/module.h>
8#include <linux/cgroup.h>
9#include <linux/fs.h>
10#include <linux/proc_fs.h>
11#include <linux/slab.h>
12#include <linux/nsproxy.h>
13
14struct ns_cgroup {
15 struct cgroup_subsys_state css;
16};
17
18struct cgroup_subsys ns_subsys;
19
20static inline struct ns_cgroup *cgroup_to_ns(
21 struct cgroup *cgroup)
22{
23 return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
24 struct ns_cgroup, css);
25}
26
27int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
28{
29 char name[PROC_NUMBUF];
30
31 snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
32 return cgroup_clone(task, &ns_subsys, name);
33}
34
35/*
36 * Rules:
37 * 1. you can only enter a cgroup which is a descendant of your current
38 * cgroup
39 * 2. you can only place another process into a cgroup if
40 * a. you have CAP_SYS_ADMIN
41 * b. your cgroup is an ancestor of task's destination cgroup
42 * (hence either you are in the same cgroup as task, or in an
43 * ancestor cgroup thereof)
44 */
45static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
46 struct task_struct *task, bool threadgroup)
47{
48 if (current != task) {
49 if (!capable(CAP_SYS_ADMIN))
50 return -EPERM;
51
52 if (!cgroup_is_descendant(new_cgroup, current))
53 return -EPERM;
54 }
55
56 if (!cgroup_is_descendant(new_cgroup, task))
57 return -EPERM;
58
59 if (threadgroup) {
60 struct task_struct *c;
61 rcu_read_lock();
62 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
63 if (!cgroup_is_descendant(new_cgroup, c)) {
64 rcu_read_unlock();
65 return -EPERM;
66 }
67 }
68 rcu_read_unlock();
69 }
70
71 return 0;
72}
73
74/*
75 * Rules: you can only create a cgroup if
76 * 1. you are capable(CAP_SYS_ADMIN)
77 * 2. the target cgroup is a descendant of your own cgroup
78 */
79static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
80 struct cgroup *cgroup)
81{
82 struct ns_cgroup *ns_cgroup;
83
84 if (!capable(CAP_SYS_ADMIN))
85 return ERR_PTR(-EPERM);
86 if (!cgroup_is_descendant(cgroup, current))
87 return ERR_PTR(-EPERM);
88 if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) {
89 printk("ns_cgroup can't be created with parent "
90 "'clone_children' set.\n");
91 return ERR_PTR(-EINVAL);
92 }
93
94 printk_once("ns_cgroup deprecated: consider using the "
95 "'clone_children' flag without the ns_cgroup.\n");
96
97 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
98 if (!ns_cgroup)
99 return ERR_PTR(-ENOMEM);
100 return &ns_cgroup->css;
101}
102
103static void ns_destroy(struct cgroup_subsys *ss,
104 struct cgroup *cgroup)
105{
106 struct ns_cgroup *ns_cgroup;
107
108 ns_cgroup = cgroup_to_ns(cgroup);
109 kfree(ns_cgroup);
110}
111
112struct cgroup_subsys ns_subsys = {
113 .name = "ns",
114 .can_attach = ns_can_attach,
115 .create = ns_create,
116 .destroy = ns_destroy,
117 .subsys_id = ns_subsys_id,
118};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191ffdd9..d6a00f3de15d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <net/net_namespace.h> 23#include <net/net_namespace.h>
24#include <linux/ipc_namespace.h> 24#include <linux/ipc_namespace.h>
25#include <linux/proc_fs.h>
26#include <linux/file.h>
27#include <linux/syscalls.h>
25 28
26static struct kmem_cache *nsproxy_cachep; 29static struct kmem_cache *nsproxy_cachep;
27 30
@@ -198,10 +201,6 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
198 goto out; 201 goto out;
199 } 202 }
200 203
201 err = ns_cgroup_clone(current, task_pid(current));
202 if (err)
203 put_nsproxy(*new_nsp);
204
205out: 204out:
206 return err; 205 return err;
207} 206}
@@ -233,6 +232,45 @@ void exit_task_namespaces(struct task_struct *p)
233 switch_task_namespaces(p, NULL); 232 switch_task_namespaces(p, NULL);
234} 233}
235 234
235SYSCALL_DEFINE2(setns, int, fd, int, nstype)
236{
237 const struct proc_ns_operations *ops;
238 struct task_struct *tsk = current;
239 struct nsproxy *new_nsproxy;
240 struct proc_inode *ei;
241 struct file *file;
242 int err;
243
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246
247 file = proc_ns_fget(fd);
248 if (IS_ERR(file))
249 return PTR_ERR(file);
250
251 err = -EINVAL;
252 ei = PROC_I(file->f_dentry->d_inode);
253 ops = ei->ns_ops;
254 if (nstype && (ops->type != nstype))
255 goto out;
256
257 new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
258 if (IS_ERR(new_nsproxy)) {
259 err = PTR_ERR(new_nsproxy);
260 goto out;
261 }
262
263 err = ops->install(new_nsproxy, ei->ns);
264 if (err) {
265 free_nsproxy(new_nsproxy);
266 goto out;
267 }
268 switch_task_namespaces(tsk, new_nsproxy);
269out:
270 fput(file);
271 return err;
272}
273
236static int __init nsproxy_cache_init(void) 274static int __init nsproxy_cache_init(void)
237{ 275{
238 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 276 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index beb184689af9..6824ca7d4d0c 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -40,6 +40,7 @@
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/platform_device.h> 41#include <linux/platform_device.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/kernel.h>
43 44
44#include <linux/uaccess.h> 45#include <linux/uaccess.h>
45 46
@@ -53,11 +54,17 @@ enum pm_qos_type {
53 PM_QOS_MIN /* return the smallest value */ 54 PM_QOS_MIN /* return the smallest value */
54}; 55};
55 56
57/*
58 * Note: The lockless read path depends on the CPU accessing
59 * target_value atomically. Atomic access is only guaranteed on all CPU
60 * types linux supports for 32 bit quantites
61 */
56struct pm_qos_object { 62struct pm_qos_object {
57 struct plist_head requests; 63 struct plist_head requests;
58 struct blocking_notifier_head *notifiers; 64 struct blocking_notifier_head *notifiers;
59 struct miscdevice pm_qos_power_miscdev; 65 struct miscdevice pm_qos_power_miscdev;
60 char *name; 66 char *name;
67 s32 target_value; /* Do not change to 64 bit */
61 s32 default_value; 68 s32 default_value;
62 enum pm_qos_type type; 69 enum pm_qos_type type;
63}; 70};
@@ -70,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = {
70 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), 77 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
71 .notifiers = &cpu_dma_lat_notifier, 78 .notifiers = &cpu_dma_lat_notifier,
72 .name = "cpu_dma_latency", 79 .name = "cpu_dma_latency",
73 .default_value = 2000 * USEC_PER_SEC, 80 .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
81 .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
74 .type = PM_QOS_MIN, 82 .type = PM_QOS_MIN,
75}; 83};
76 84
@@ -79,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = {
79 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), 87 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
80 .notifiers = &network_lat_notifier, 88 .notifiers = &network_lat_notifier,
81 .name = "network_latency", 89 .name = "network_latency",
82 .default_value = 2000 * USEC_PER_SEC, 90 .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
91 .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
83 .type = PM_QOS_MIN 92 .type = PM_QOS_MIN
84}; 93};
85 94
@@ -89,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = {
89 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), 98 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
90 .notifiers = &network_throughput_notifier, 99 .notifiers = &network_throughput_notifier,
91 .name = "network_throughput", 100 .name = "network_throughput",
92 .default_value = 0, 101 .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
102 .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
93 .type = PM_QOS_MAX, 103 .type = PM_QOS_MAX,
94}; 104};
95 105
@@ -135,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
135 } 145 }
136} 146}
137 147
148static inline s32 pm_qos_read_value(struct pm_qos_object *o)
149{
150 return o->target_value;
151}
152
153static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
154{
155 o->target_value = value;
156}
157
138static void update_target(struct pm_qos_object *o, struct plist_node *node, 158static void update_target(struct pm_qos_object *o, struct plist_node *node,
139 int del, int value) 159 int del, int value)
140{ 160{
@@ -159,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node,
159 plist_add(node, &o->requests); 179 plist_add(node, &o->requests);
160 } 180 }
161 curr_value = pm_qos_get_value(o); 181 curr_value = pm_qos_get_value(o);
182 pm_qos_set_value(o, curr_value);
162 spin_unlock_irqrestore(&pm_qos_lock, flags); 183 spin_unlock_irqrestore(&pm_qos_lock, flags);
163 184
164 if (prev_value != curr_value) 185 if (prev_value != curr_value)
@@ -193,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor)
193 * pm_qos_request - returns current system wide qos expectation 214 * pm_qos_request - returns current system wide qos expectation
194 * @pm_qos_class: identification of which qos value is requested 215 * @pm_qos_class: identification of which qos value is requested
195 * 216 *
196 * This function returns the current target value in an atomic manner. 217 * This function returns the current target value.
197 */ 218 */
198int pm_qos_request(int pm_qos_class) 219int pm_qos_request(int pm_qos_class)
199{ 220{
200 unsigned long flags; 221 return pm_qos_read_value(pm_qos_array[pm_qos_class]);
201 int value;
202
203 spin_lock_irqsave(&pm_qos_lock, flags);
204 value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
205 spin_unlock_irqrestore(&pm_qos_lock, flags);
206
207 return value;
208} 222}
209EXPORT_SYMBOL_GPL(pm_qos_request); 223EXPORT_SYMBOL_GPL(pm_qos_request);
210 224
@@ -404,24 +418,36 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
404 size_t count, loff_t *f_pos) 418 size_t count, loff_t *f_pos)
405{ 419{
406 s32 value; 420 s32 value;
407 int x;
408 char ascii_value[11];
409 struct pm_qos_request_list *pm_qos_req; 421 struct pm_qos_request_list *pm_qos_req;
410 422
411 if (count == sizeof(s32)) { 423 if (count == sizeof(s32)) {
412 if (copy_from_user(&value, buf, sizeof(s32))) 424 if (copy_from_user(&value, buf, sizeof(s32)))
413 return -EFAULT; 425 return -EFAULT;
414 } else if (count == 11) { /* len('0x12345678/0') */ 426 } else if (count <= 11) { /* ASCII perhaps? */
415 if (copy_from_user(ascii_value, buf, 11)) 427 char ascii_value[11];
428 unsigned long int ulval;
429 int ret;
430
431 if (copy_from_user(ascii_value, buf, count))
416 return -EFAULT; 432 return -EFAULT;
417 if (strlen(ascii_value) != 10) 433
418 return -EINVAL; 434 if (count > 10) {
419 x = sscanf(ascii_value, "%x", &value); 435 if (ascii_value[10] == '\n')
420 if (x != 1) 436 ascii_value[10] = '\0';
437 else
438 return -EINVAL;
439 } else {
440 ascii_value[count] = '\0';
441 }
442 ret = strict_strtoul(ascii_value, 16, &ulval);
443 if (ret) {
444 pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret);
421 return -EINVAL; 445 return -EINVAL;
422 pr_debug("%s, %d, 0x%x\n", ascii_value, x, value); 446 }
423 } else 447 value = (s32)lower_32_bits(ulval);
448 } else {
424 return -EINVAL; 449 return -EINVAL;
450 }
425 451
426 pm_qos_req = filp->private_data; 452 pm_qos_req = filp->private_data;
427 pm_qos_update_request(pm_qos_req, value); 453 pm_qos_update_request(pm_qos_req, value);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a1b5edf1bf92..4556182527f3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
491 return tmr; 491 return tmr;
492} 492}
493 493
494static void k_itimer_rcu_free(struct rcu_head *head)
495{
496 struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
497
498 kmem_cache_free(posix_timers_cache, tmr);
499}
500
494#define IT_ID_SET 1 501#define IT_ID_SET 1
495#define IT_ID_NOT_SET 0 502#define IT_ID_NOT_SET 0
496static void release_posix_timer(struct k_itimer *tmr, int it_id_set) 503static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
503 } 510 }
504 put_pid(tmr->it_pid); 511 put_pid(tmr->it_pid);
505 sigqueue_free(tmr->sigq); 512 sigqueue_free(tmr->sigq);
506 kmem_cache_free(posix_timers_cache, tmr); 513 call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
507} 514}
508 515
509static struct k_clock *clockid_to_kclock(const clockid_t id) 516static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
631static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) 638static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
632{ 639{
633 struct k_itimer *timr; 640 struct k_itimer *timr;
634 /* 641
635 * Watch out here. We do a irqsave on the idr_lock and pass the 642 rcu_read_lock();
636 * flags part over to the timer lock. Must not let interrupts in
637 * while we are moving the lock.
638 */
639 spin_lock_irqsave(&idr_lock, *flags);
640 timr = idr_find(&posix_timers_id, (int)timer_id); 643 timr = idr_find(&posix_timers_id, (int)timer_id);
641 if (timr) { 644 if (timr) {
642 spin_lock(&timr->it_lock); 645 spin_lock_irqsave(&timr->it_lock, *flags);
643 if (timr->it_signal == current->signal) { 646 if (timr->it_signal == current->signal) {
644 spin_unlock(&idr_lock); 647 rcu_read_unlock();
645 return timr; 648 return timr;
646 } 649 }
647 spin_unlock(&timr->it_lock); 650 spin_unlock_irqrestore(&timr->it_lock, *flags);
648 } 651 }
649 spin_unlock_irqrestore(&idr_lock, *flags); 652 rcu_read_unlock();
650 653
651 return NULL; 654 return NULL;
652} 655}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f9bec56d8825..8f7b1db1ece1 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -25,7 +25,6 @@
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <linux/syscore_ops.h> 26#include <linux/syscore_ops.h>
27#include <scsi/scsi_scan.h> 27#include <scsi/scsi_scan.h>
28#include <asm/suspend.h>
29 28
30#include "power.h" 29#include "power.h"
31 30
@@ -55,10 +54,9 @@ static int hibernation_mode = HIBERNATION_SHUTDOWN;
55static const struct platform_hibernation_ops *hibernation_ops; 54static const struct platform_hibernation_ops *hibernation_ops;
56 55
57/** 56/**
58 * hibernation_set_ops - set the global hibernate operations 57 * hibernation_set_ops - Set the global hibernate operations.
59 * @ops: the hibernation operations to use in subsequent hibernation transitions 58 * @ops: Hibernation operations to use in subsequent hibernation transitions.
60 */ 59 */
61
62void hibernation_set_ops(const struct platform_hibernation_ops *ops) 60void hibernation_set_ops(const struct platform_hibernation_ops *ops)
63{ 61{
64 if (ops && !(ops->begin && ops->end && ops->pre_snapshot 62 if (ops && !(ops->begin && ops->end && ops->pre_snapshot
@@ -115,10 +113,9 @@ static int hibernation_test(int level) { return 0; }
115#endif /* !CONFIG_PM_DEBUG */ 113#endif /* !CONFIG_PM_DEBUG */
116 114
117/** 115/**
118 * platform_begin - tell the platform driver that we're starting 116 * platform_begin - Call platform to start hibernation.
119 * hibernation 117 * @platform_mode: Whether or not to use the platform driver.
120 */ 118 */
121
122static int platform_begin(int platform_mode) 119static int platform_begin(int platform_mode)
123{ 120{
124 return (platform_mode && hibernation_ops) ? 121 return (platform_mode && hibernation_ops) ?
@@ -126,10 +123,9 @@ static int platform_begin(int platform_mode)
126} 123}
127 124
128/** 125/**
129 * platform_end - tell the platform driver that we've entered the 126 * platform_end - Call platform to finish transition to the working state.
130 * working state 127 * @platform_mode: Whether or not to use the platform driver.
131 */ 128 */
132
133static void platform_end(int platform_mode) 129static void platform_end(int platform_mode)
134{ 130{
135 if (platform_mode && hibernation_ops) 131 if (platform_mode && hibernation_ops)
@@ -137,8 +133,11 @@ static void platform_end(int platform_mode)
137} 133}
138 134
139/** 135/**
140 * platform_pre_snapshot - prepare the machine for hibernation using the 136 * platform_pre_snapshot - Call platform to prepare the machine for hibernation.
141 * platform driver if so configured and return an error code if it fails 137 * @platform_mode: Whether or not to use the platform driver.
138 *
139 * Use the platform driver to prepare the system for creating a hibernate image,
140 * if so configured, and return an error code if that fails.
142 */ 141 */
143 142
144static int platform_pre_snapshot(int platform_mode) 143static int platform_pre_snapshot(int platform_mode)
@@ -148,10 +147,14 @@ static int platform_pre_snapshot(int platform_mode)
148} 147}
149 148
150/** 149/**
151 * platform_leave - prepare the machine for switching to the normal mode 150 * platform_leave - Call platform to prepare a transition to the working state.
152 * of operation using the platform driver (called with interrupts disabled) 151 * @platform_mode: Whether or not to use the platform driver.
152 *
153 * Use the platform driver prepare to prepare the machine for switching to the
154 * normal mode of operation.
155 *
156 * This routine is called on one CPU with interrupts disabled.
153 */ 157 */
154
155static void platform_leave(int platform_mode) 158static void platform_leave(int platform_mode)
156{ 159{
157 if (platform_mode && hibernation_ops) 160 if (platform_mode && hibernation_ops)
@@ -159,10 +162,14 @@ static void platform_leave(int platform_mode)
159} 162}
160 163
161/** 164/**
162 * platform_finish - switch the machine to the normal mode of operation 165 * platform_finish - Call platform to switch the system to the working state.
163 * using the platform driver (must be called after platform_prepare()) 166 * @platform_mode: Whether or not to use the platform driver.
167 *
168 * Use the platform driver to switch the machine to the normal mode of
169 * operation.
170 *
171 * This routine must be called after platform_prepare().
164 */ 172 */
165
166static void platform_finish(int platform_mode) 173static void platform_finish(int platform_mode)
167{ 174{
168 if (platform_mode && hibernation_ops) 175 if (platform_mode && hibernation_ops)
@@ -170,11 +177,15 @@ static void platform_finish(int platform_mode)
170} 177}
171 178
172/** 179/**
173 * platform_pre_restore - prepare the platform for the restoration from a 180 * platform_pre_restore - Prepare for hibernate image restoration.
174 * hibernation image. If the restore fails after this function has been 181 * @platform_mode: Whether or not to use the platform driver.
175 * called, platform_restore_cleanup() must be called. 182 *
183 * Use the platform driver to prepare the system for resume from a hibernation
184 * image.
185 *
186 * If the restore fails after this function has been called,
187 * platform_restore_cleanup() must be called.
176 */ 188 */
177
178static int platform_pre_restore(int platform_mode) 189static int platform_pre_restore(int platform_mode)
179{ 190{
180 return (platform_mode && hibernation_ops) ? 191 return (platform_mode && hibernation_ops) ?
@@ -182,12 +193,16 @@ static int platform_pre_restore(int platform_mode)
182} 193}
183 194
184/** 195/**
185 * platform_restore_cleanup - switch the platform to the normal mode of 196 * platform_restore_cleanup - Switch to the working state after failing restore.
186 * operation after a failing restore. If platform_pre_restore() has been 197 * @platform_mode: Whether or not to use the platform driver.
187 * called before the failing restore, this function must be called too, 198 *
188 * regardless of the result of platform_pre_restore(). 199 * Use the platform driver to switch the system to the normal mode of operation
200 * after a failing restore.
201 *
202 * If platform_pre_restore() has been called before the failing restore, this
203 * function must be called too, regardless of the result of
204 * platform_pre_restore().
189 */ 205 */
190
191static void platform_restore_cleanup(int platform_mode) 206static void platform_restore_cleanup(int platform_mode)
192{ 207{
193 if (platform_mode && hibernation_ops) 208 if (platform_mode && hibernation_ops)
@@ -195,10 +210,9 @@ static void platform_restore_cleanup(int platform_mode)
195} 210}
196 211
197/** 212/**
198 * platform_recover - recover the platform from a failure to suspend 213 * platform_recover - Recover from a failure to suspend devices.
199 * devices. 214 * @platform_mode: Whether or not to use the platform driver.
200 */ 215 */
201
202static void platform_recover(int platform_mode) 216static void platform_recover(int platform_mode)
203{ 217{
204 if (platform_mode && hibernation_ops && hibernation_ops->recover) 218 if (platform_mode && hibernation_ops && hibernation_ops->recover)
@@ -206,13 +220,12 @@ static void platform_recover(int platform_mode)
206} 220}
207 221
208/** 222/**
209 * swsusp_show_speed - print the time elapsed between two events. 223 * swsusp_show_speed - Print time elapsed between two events during hibernation.
210 * @start: Starting event. 224 * @start: Starting event.
211 * @stop: Final event. 225 * @stop: Final event.
212 * @nr_pages - number of pages processed between @start and @stop 226 * @nr_pages: Number of memory pages processed between @start and @stop.
213 * @msg - introductory message to print 227 * @msg: Additional diagnostic message to print.
214 */ 228 */
215
216void swsusp_show_speed(struct timeval *start, struct timeval *stop, 229void swsusp_show_speed(struct timeval *start, struct timeval *stop,
217 unsigned nr_pages, char *msg) 230 unsigned nr_pages, char *msg)
218{ 231{
@@ -235,25 +248,18 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
235} 248}
236 249
237/** 250/**
238 * create_image - freeze devices that need to be frozen with interrupts 251 * create_image - Create a hibernation image.
239 * off, create the hibernation image and thaw those devices. Control 252 * @platform_mode: Whether or not to use the platform driver.
240 * reappears in this routine after a restore. 253 *
254 * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image
255 * and execute the drivers' .thaw_noirq() callbacks.
256 *
257 * Control reappears in this routine after the subsequent restore.
241 */ 258 */
242
243static int create_image(int platform_mode) 259static int create_image(int platform_mode)
244{ 260{
245 int error; 261 int error;
246 262
247 error = arch_prepare_suspend();
248 if (error)
249 return error;
250
251 /* At this point, dpm_suspend_start() has been called, but *not*
252 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
253 * Otherwise, drivers for some devices (e.g. interrupt controllers)
254 * become desynchronized with the actual state of the hardware
255 * at resume time, and evil weirdness ensues.
256 */
257 error = dpm_suspend_noirq(PMSG_FREEZE); 263 error = dpm_suspend_noirq(PMSG_FREEZE);
258 if (error) { 264 if (error) {
259 printk(KERN_ERR "PM: Some devices failed to power down, " 265 printk(KERN_ERR "PM: Some devices failed to power down, "
@@ -297,9 +303,6 @@ static int create_image(int platform_mode)
297 303
298 Power_up: 304 Power_up:
299 syscore_resume(); 305 syscore_resume();
300 /* NOTE: dpm_resume_noirq() is just a resume() for devices
301 * that suspended with irqs off ... no overall powerup.
302 */
303 306
304 Enable_irqs: 307 Enable_irqs:
305 local_irq_enable(); 308 local_irq_enable();
@@ -317,14 +320,11 @@ static int create_image(int platform_mode)
317} 320}
318 321
319/** 322/**
320 * hibernation_snapshot - quiesce devices and create the hibernation 323 * hibernation_snapshot - Quiesce devices and create a hibernation image.
321 * snapshot image. 324 * @platform_mode: If set, use platform driver to prepare for the transition.
322 * @platform_mode - if set, use the platform driver, if available, to
323 * prepare the platform firmware for the power transition.
324 * 325 *
325 * Must be called with pm_mutex held 326 * This routine must be called with pm_mutex held.
326 */ 327 */
327
328int hibernation_snapshot(int platform_mode) 328int hibernation_snapshot(int platform_mode)
329{ 329{
330 pm_message_t msg = PMSG_RECOVER; 330 pm_message_t msg = PMSG_RECOVER;
@@ -384,13 +384,14 @@ int hibernation_snapshot(int platform_mode)
384} 384}
385 385
386/** 386/**
387 * resume_target_kernel - prepare devices that need to be suspended with 387 * resume_target_kernel - Restore system state from a hibernation image.
388 * interrupts off, restore the contents of highmem that have not been 388 * @platform_mode: Whether or not to use the platform driver.
389 * restored yet from the image and run the low level code that will restore 389 *
390 * the remaining contents of memory and switch to the just restored target 390 * Execute device drivers' .freeze_noirq() callbacks, restore the contents of
391 * kernel. 391 * highmem that have not been restored yet from the image and run the low-level
392 * code that will restore the remaining contents of memory and switch to the
393 * just restored target kernel.
392 */ 394 */
393
394static int resume_target_kernel(bool platform_mode) 395static int resume_target_kernel(bool platform_mode)
395{ 396{
396 int error; 397 int error;
@@ -416,24 +417,26 @@ static int resume_target_kernel(bool platform_mode)
416 if (error) 417 if (error)
417 goto Enable_irqs; 418 goto Enable_irqs;
418 419
419 /* We'll ignore saved state, but this gets preempt count (etc) right */
420 save_processor_state(); 420 save_processor_state();
421 error = restore_highmem(); 421 error = restore_highmem();
422 if (!error) { 422 if (!error) {
423 error = swsusp_arch_resume(); 423 error = swsusp_arch_resume();
424 /* 424 /*
425 * The code below is only ever reached in case of a failure. 425 * The code below is only ever reached in case of a failure.
426 * Otherwise execution continues at place where 426 * Otherwise, execution continues at the place where
427 * swsusp_arch_suspend() was called 427 * swsusp_arch_suspend() was called.
428 */ 428 */
429 BUG_ON(!error); 429 BUG_ON(!error);
430 /* This call to restore_highmem() undos the previous one */ 430 /*
431 * This call to restore_highmem() reverts the changes made by
432 * the previous one.
433 */
431 restore_highmem(); 434 restore_highmem();
432 } 435 }
433 /* 436 /*
434 * The only reason why swsusp_arch_resume() can fail is memory being 437 * The only reason why swsusp_arch_resume() can fail is memory being
435 * very tight, so we have to free it as soon as we can to avoid 438 * very tight, so we have to free it as soon as we can to avoid
436 * subsequent failures 439 * subsequent failures.
437 */ 440 */
438 swsusp_free(); 441 swsusp_free();
439 restore_processor_state(); 442 restore_processor_state();
@@ -456,14 +459,12 @@ static int resume_target_kernel(bool platform_mode)
456} 459}
457 460
458/** 461/**
459 * hibernation_restore - quiesce devices and restore the hibernation 462 * hibernation_restore - Quiesce devices and restore from a hibernation image.
460 * snapshot image. If successful, control returns in hibernation_snaphot() 463 * @platform_mode: If set, use platform driver to prepare for the transition.
461 * @platform_mode - if set, use the platform driver, if available, to
462 * prepare the platform firmware for the transition.
463 * 464 *
464 * Must be called with pm_mutex held 465 * This routine must be called with pm_mutex held. If it is successful, control
466 * reappears in the restored target kernel in hibernation_snaphot().
465 */ 467 */
466
467int hibernation_restore(int platform_mode) 468int hibernation_restore(int platform_mode)
468{ 469{
469 int error; 470 int error;
@@ -483,10 +484,8 @@ int hibernation_restore(int platform_mode)
483} 484}
484 485
485/** 486/**
486 * hibernation_platform_enter - enter the hibernation state using the 487 * hibernation_platform_enter - Power off the system using the platform driver.
487 * platform driver (if available)
488 */ 488 */
489
490int hibernation_platform_enter(void) 489int hibernation_platform_enter(void)
491{ 490{
492 int error; 491 int error;
@@ -557,12 +556,12 @@ int hibernation_platform_enter(void)
557} 556}
558 557
559/** 558/**
560 * power_down - Shut the machine down for hibernation. 559 * power_down - Shut the machine down for hibernation.
561 * 560 *
562 * Use the platform driver, if configured so; otherwise try 561 * Use the platform driver, if configured, to put the system into the sleep
563 * to power off or reboot. 562 * state corresponding to hibernation, or try to power it off or reboot,
563 * depending on the value of hibernation_mode.
564 */ 564 */
565
566static void power_down(void) 565static void power_down(void)
567{ 566{
568 switch (hibernation_mode) { 567 switch (hibernation_mode) {
@@ -599,9 +598,8 @@ static int prepare_processes(void)
599} 598}
600 599
601/** 600/**
602 * hibernate - The granpappy of the built-in hibernation management 601 * hibernate - Carry out system hibernation, including saving the image.
603 */ 602 */
604
605int hibernate(void) 603int hibernate(void)
606{ 604{
607 int error; 605 int error;
@@ -679,17 +677,20 @@ int hibernate(void)
679 677
680 678
681/** 679/**
682 * software_resume - Resume from a saved image. 680 * software_resume - Resume from a saved hibernation image.
683 * 681 *
684 * Called as a late_initcall (so all devices are discovered and 682 * This routine is called as a late initcall, when all devices have been
685 * initialized), we call swsusp to see if we have a saved image or not. 683 * discovered and initialized already.
686 * If so, we quiesce devices, the restore the saved image. We will
687 * return above (in hibernate() ) if everything goes well.
688 * Otherwise, we fail gracefully and return to the normally
689 * scheduled program.
690 * 684 *
685 * The image reading code is called to see if there is a hibernation image
686 * available for reading. If that is the case, devices are quiesced and the
687 * contents of memory is restored from the saved image.
688 *
689 * If this is successful, control reappears in the restored target kernel in
690 * hibernation_snaphot() which returns to hibernate(). Otherwise, the routine
691 * attempts to recover gracefully and make the kernel return to the normal mode
692 * of operation.
691 */ 693 */
692
693static int software_resume(void) 694static int software_resume(void)
694{ 695{
695 int error; 696 int error;
@@ -819,21 +820,17 @@ static const char * const hibernation_modes[] = {
819 [HIBERNATION_TESTPROC] = "testproc", 820 [HIBERNATION_TESTPROC] = "testproc",
820}; 821};
821 822
822/** 823/*
823 * disk - Control hibernation mode 824 * /sys/power/disk - Control hibernation mode.
824 *
825 * Suspend-to-disk can be handled in several ways. We have a few options
826 * for putting the system to sleep - using the platform driver (e.g. ACPI
827 * or other hibernation_ops), powering off the system or rebooting the
828 * system (for testing) as well as the two test modes.
829 * 825 *
830 * The system can support 'platform', and that is known a priori (and 826 * Hibernation can be handled in several ways. There are a few different ways
831 * encoded by the presence of hibernation_ops). However, the user may 827 * to put the system into the sleep state: using the platform driver (e.g. ACPI
832 * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the 828 * or other hibernation_ops), powering it off or rebooting it (for testing
833 * test modes, 'test' or 'testproc'. 829 * mostly), or using one of the two available test modes.
834 * 830 *
835 * show() will display what the mode is currently set to. 831 * The sysfs file /sys/power/disk provides an interface for selecting the
836 * store() will accept one of 832 * hibernation mode to use. Reading from this file causes the available modes
833 * to be printed. There are 5 modes that can be supported:
837 * 834 *
838 * 'platform' 835 * 'platform'
839 * 'shutdown' 836 * 'shutdown'
@@ -841,8 +838,14 @@ static const char * const hibernation_modes[] = {
841 * 'test' 838 * 'test'
842 * 'testproc' 839 * 'testproc'
843 * 840 *
844 * It will only change to 'platform' if the system 841 * If a platform hibernation driver is in use, 'platform' will be supported
845 * supports it (as determined by having hibernation_ops). 842 * and will be used by default. Otherwise, 'shutdown' will be used by default.
843 * The selected option (i.e. the one corresponding to the current value of
844 * hibernation_mode) is enclosed by a square bracket.
845 *
846 * To select a given hibernation mode it is necessary to write the mode's
847 * string representation (as returned by reading from /sys/power/disk) back
848 * into /sys/power/disk.
846 */ 849 */
847 850
848static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, 851static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -875,7 +878,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
875 return buf-start; 878 return buf-start;
876} 879}
877 880
878
879static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, 881static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
880 const char *buf, size_t n) 882 const char *buf, size_t n)
881{ 883{
diff --git a/kernel/printk.c b/kernel/printk.c
index da8ca817eae3..35185392173f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/memblock.h>
34#include <linux/syscalls.h> 35#include <linux/syscalls.h>
35#include <linux/kexec.h> 36#include <linux/kexec.h>
36#include <linux/kdb.h> 37#include <linux/kdb.h>
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void)
167} 168}
168#endif 169#endif
169 170
171/* requested log_buf_len from kernel cmdline */
172static unsigned long __initdata new_log_buf_len;
173
174/* save requested log_buf_len since it's too early to process it */
170static int __init log_buf_len_setup(char *str) 175static int __init log_buf_len_setup(char *str)
171{ 176{
172 unsigned size = memparse(str, &str); 177 unsigned size = memparse(str, &str);
173 unsigned long flags;
174 178
175 if (size) 179 if (size)
176 size = roundup_pow_of_two(size); 180 size = roundup_pow_of_two(size);
177 if (size > log_buf_len) { 181 if (size > log_buf_len)
178 unsigned start, dest_idx, offset; 182 new_log_buf_len = size;
179 char *new_log_buf;
180 183
181 new_log_buf = alloc_bootmem(size); 184 return 0;
182 if (!new_log_buf) { 185}
183 printk(KERN_WARNING "log_buf_len: allocation failed\n"); 186early_param("log_buf_len", log_buf_len_setup);
184 goto out;
185 }
186 187
187 spin_lock_irqsave(&logbuf_lock, flags); 188void __init setup_log_buf(int early)
188 log_buf_len = size; 189{
189 log_buf = new_log_buf; 190 unsigned long flags;
190 191 unsigned start, dest_idx, offset;
191 offset = start = min(con_start, log_start); 192 char *new_log_buf;
192 dest_idx = 0; 193 int free;
193 while (start != log_end) { 194
194 log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; 195 if (!new_log_buf_len)
195 start++; 196 return;
196 dest_idx++; 197
197 } 198 if (early) {
198 log_start -= offset; 199 unsigned long mem;
199 con_start -= offset;
200 log_end -= offset;
201 spin_unlock_irqrestore(&logbuf_lock, flags);
202 200
203 printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); 201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
202 if (mem == MEMBLOCK_ERROR)
203 return;
204 new_log_buf = __va(mem);
205 } else {
206 new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
204 } 207 }
205out:
206 return 1;
207}
208 208
209__setup("log_buf_len=", log_buf_len_setup); 209 if (unlikely(!new_log_buf)) {
210 pr_err("log_buf_len: %ld bytes not available\n",
211 new_log_buf_len);
212 return;
213 }
214
215 spin_lock_irqsave(&logbuf_lock, flags);
216 log_buf_len = new_log_buf_len;
217 log_buf = new_log_buf;
218 new_log_buf_len = 0;
219 free = __LOG_BUF_LEN - log_end;
220
221 offset = start = min(con_start, log_start);
222 dest_idx = 0;
223 while (start != log_end) {
224 unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
225
226 log_buf[dest_idx] = __log_buf[log_idx_mask];
227 start++;
228 dest_idx++;
229 }
230 log_start -= offset;
231 con_start -= offset;
232 log_end -= offset;
233 spin_unlock_irqrestore(&logbuf_lock, flags);
234
235 pr_info("log_buf_len: %d\n", log_buf_len);
236 pr_info("early log buf free: %d(%d%%)\n",
237 free, (free * 100) / __LOG_BUF_LEN);
238}
210 239
211#ifdef CONFIG_BOOT_PRINTK_DELAY 240#ifdef CONFIG_BOOT_PRINTK_DELAY
212 241
diff --git a/kernel/profile.c b/kernel/profile.c
index 14c9f87b9fc9..961b389fe52f 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -303,14 +303,12 @@ static void profile_discard_flip_buffers(void)
303 mutex_unlock(&profile_flip_mutex); 303 mutex_unlock(&profile_flip_mutex);
304} 304}
305 305
306void profile_hits(int type, void *__pc, unsigned int nr_hits) 306static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
307{ 307{
308 unsigned long primary, secondary, flags, pc = (unsigned long)__pc; 308 unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
309 int i, j, cpu; 309 int i, j, cpu;
310 struct profile_hit *hits; 310 struct profile_hit *hits;
311 311
312 if (prof_on != type || !prof_buffer)
313 return;
314 pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); 312 pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
315 i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; 313 i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
316 secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; 314 secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
@@ -417,16 +415,20 @@ out_free:
417#define profile_discard_flip_buffers() do { } while (0) 415#define profile_discard_flip_buffers() do { } while (0)
418#define profile_cpu_callback NULL 416#define profile_cpu_callback NULL
419 417
420void profile_hits(int type, void *__pc, unsigned int nr_hits) 418static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
421{ 419{
422 unsigned long pc; 420 unsigned long pc;
423
424 if (prof_on != type || !prof_buffer)
425 return;
426 pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; 421 pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
427 atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); 422 atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
428} 423}
429#endif /* !CONFIG_SMP */ 424#endif /* !CONFIG_SMP */
425
426void profile_hits(int type, void *__pc, unsigned int nr_hits)
427{
428 if (prof_on != type || !prof_buffer)
429 return;
430 do_profile_hits(type, __pc, nr_hits);
431}
430EXPORT_SYMBOL_GPL(profile_hits); 432EXPORT_SYMBOL_GPL(profile_hits);
431 433
432void profile_tick(int type) 434void profile_tick(int type)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 7a81fc071344..2df115790cd9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -562,7 +562,7 @@ static int ptrace_resume(struct task_struct *child, long request,
562 } 562 }
563 563
564 child->exit_code = data; 564 child->exit_code = data;
565 wake_up_process(child); 565 wake_up_state(child, __TASK_TRACED);
566 566
567 return 0; 567 return 0;
568} 568}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f03181a..77a7671dd147 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -36,7 +36,7 @@
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/nmi.h> 38#include <linux/nmi.h>
39#include <asm/atomic.h> 39#include <linux/atomic.h>
40#include <linux/bitops.h> 40#include <linux/bitops.h>
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
98static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
99DEFINE_PER_CPU(char, rcu_cpu_has_work); 98DEFINE_PER_CPU(char, rcu_cpu_has_work);
100static char rcu_kthreads_spawnable; 99static char rcu_kthreads_spawnable;
101 100
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
163#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
164DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
165 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
166 .dynticks = 1, 165 .dynticks = ATOMIC_INIT(1),
167}; 166};
168#endif /* #ifdef CONFIG_NO_HZ */ 167#endif /* #ifdef CONFIG_NO_HZ */
169 168
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void)
322 unsigned long flags; 321 unsigned long flags;
323 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
324 323
325 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
326 local_irq_save(flags); 324 local_irq_save(flags);
327 rdtp = &__get_cpu_var(rcu_dynticks); 325 rdtp = &__get_cpu_var(rcu_dynticks);
328 rdtp->dynticks++; 326 if (--rdtp->dynticks_nesting) {
329 rdtp->dynticks_nesting--; 327 local_irq_restore(flags);
330 WARN_ON_ONCE(rdtp->dynticks & 0x1); 328 return;
329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
331 local_irq_restore(flags); 335 local_irq_restore(flags);
336
337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched();
332} 343}
333 344
334/* 345/*
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void)
344 355
345 local_irq_save(flags); 356 local_irq_save(flags);
346 rdtp = &__get_cpu_var(rcu_dynticks); 357 rdtp = &__get_cpu_var(rcu_dynticks);
347 rdtp->dynticks++; 358 if (rdtp->dynticks_nesting++) {
348 rdtp->dynticks_nesting++; 359 local_irq_restore(flags);
349 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 360 return;
361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
350 local_irq_restore(flags); 367 local_irq_restore(flags);
351 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
352} 368}
353 369
354/** 370/**
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void)
362{ 378{
363 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
364 380
365 if (rdtp->dynticks & 0x1) 381 if (rdtp->dynticks_nmi_nesting == 0 &&
382 (atomic_read(&rdtp->dynticks) & 0x1))
366 return; 383 return;
367 rdtp->dynticks_nmi++; 384 rdtp->dynticks_nmi_nesting++;
368 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); 385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
369 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 386 atomic_inc(&rdtp->dynticks);
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
370} 390}
371 391
372/** 392/**
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void)
380{ 400{
381 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
382 402
383 if (rdtp->dynticks & 0x1) 403 if (rdtp->dynticks_nmi_nesting == 0 ||
404 --rdtp->dynticks_nmi_nesting != 0)
384 return; 405 return;
385 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
386 rdtp->dynticks_nmi++; 407 smp_mb__before_atomic_inc(); /* See above. */
387 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); 408 atomic_inc(&rdtp->dynticks);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
388} 411}
389 412
390/** 413/**
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void)
395 */ 418 */
396void rcu_irq_enter(void) 419void rcu_irq_enter(void)
397{ 420{
398 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 421 rcu_exit_nohz();
399
400 if (rdtp->dynticks_nesting++)
401 return;
402 rdtp->dynticks++;
403 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
404 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
405} 422}
406 423
407/** 424/**
@@ -413,18 +430,7 @@ void rcu_irq_enter(void)
413 */ 430 */
414void rcu_irq_exit(void) 431void rcu_irq_exit(void)
415{ 432{
416 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 433 rcu_enter_nohz();
417
418 if (--rdtp->dynticks_nesting)
419 return;
420 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
421 rdtp->dynticks++;
422 WARN_ON_ONCE(rdtp->dynticks & 0x1);
423
424 /* If the interrupt queued a callback, get out of dyntick mode. */
425 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
426 __this_cpu_read(rcu_bh_data.nxtlist))
427 set_need_resched();
428} 434}
429 435
430#ifdef CONFIG_SMP 436#ifdef CONFIG_SMP
@@ -436,19 +442,8 @@ void rcu_irq_exit(void)
436 */ 442 */
437static int dyntick_save_progress_counter(struct rcu_data *rdp) 443static int dyntick_save_progress_counter(struct rcu_data *rdp)
438{ 444{
439 int ret; 445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
440 int snap; 446 return 0;
441 int snap_nmi;
442
443 snap = rdp->dynticks->dynticks;
444 snap_nmi = rdp->dynticks->dynticks_nmi;
445 smp_mb(); /* Order sampling of snap with end of grace period. */
446 rdp->dynticks_snap = snap;
447 rdp->dynticks_nmi_snap = snap_nmi;
448 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
449 if (ret)
450 rdp->dynticks_fqs++;
451 return ret;
452} 447}
453 448
454/* 449/*
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
459 */ 454 */
460static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 455static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
461{ 456{
462 long curr; 457 unsigned long curr;
463 long curr_nmi; 458 unsigned long snap;
464 long snap;
465 long snap_nmi;
466 459
467 curr = rdp->dynticks->dynticks; 460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
468 snap = rdp->dynticks_snap; 461 snap = (unsigned long)rdp->dynticks_snap;
469 curr_nmi = rdp->dynticks->dynticks_nmi;
470 snap_nmi = rdp->dynticks_nmi_snap;
471 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
472 462
473 /* 463 /*
474 * If the CPU passed through or entered a dynticks idle phase with 464 * If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
478 * read-side critical section that started before the beginning 468 * read-side critical section that started before the beginning
479 * of the current RCU grace period. 469 * of the current RCU grace period.
480 */ 470 */
481 if ((curr != snap || (curr & 0x1) == 0) && 471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
482 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
483 rdp->dynticks_fqs++; 472 rdp->dynticks_fqs++;
484 return 1; 473 return 1;
485 } 474 }
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
908 unsigned long gp_duration; 897 unsigned long gp_duration;
909 898
910 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900
901 /*
902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed.
904 */
905 smp_mb(); /* See above block comment. */
911 gp_duration = jiffies - rsp->gp_start; 906 gp_duration = jiffies - rsp->gp_start;
912 if (gp_duration > rsp->gp_max) 907 if (gp_duration > rsp->gp_max)
913 rsp->gp_max = gp_duration; 908 rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1455 */ 1450 */
1456static void rcu_process_callbacks(void) 1451static void rcu_process_callbacks(void)
1457{ 1452{
1458 /*
1459 * Memory references from any prior RCU read-side critical sections
1460 * executed by the interrupted code must be seen before any RCU
1461 * grace-period manipulations below.
1462 */
1463 smp_mb(); /* See above block comment. */
1464
1465 __rcu_process_callbacks(&rcu_sched_state, 1453 __rcu_process_callbacks(&rcu_sched_state,
1466 &__get_cpu_var(rcu_sched_data)); 1454 &__get_cpu_var(rcu_sched_data));
1467 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1468 rcu_preempt_process_callbacks(); 1456 rcu_preempt_process_callbacks();
1469 1457
1470 /*
1471 * Memory references from any later RCU read-side critical sections
1472 * executed by the interrupted code must be seen after any RCU
1473 * grace-period manipulations above.
1474 */
1475 smp_mb(); /* See above block comment. */
1476
1477 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1478 rcu_needs_cpu_flush(); 1459 rcu_needs_cpu_flush();
1479} 1460}
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
1494 local_irq_restore(flags); 1475 local_irq_restore(flags);
1495 return; 1476 return;
1496 } 1477 }
1497 wake_up(&__get_cpu_var(rcu_cpu_wq)); 1478 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1498 local_irq_restore(flags); 1479 local_irq_restore(flags);
1499} 1480}
1500 1481
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1544 */ 1525 */
1545static void rcu_cpu_kthread_timer(unsigned long arg) 1526static void rcu_cpu_kthread_timer(unsigned long arg)
1546{ 1527{
1547 unsigned long flags;
1548 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1528 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1549 struct rcu_node *rnp = rdp->mynode; 1529 struct rcu_node *rnp = rdp->mynode;
1550 1530
1551 raw_spin_lock_irqsave(&rnp->lock, flags); 1531 atomic_or(rdp->grpmask, &rnp->wakemask);
1552 rnp->wakemask |= rdp->grpmask;
1553 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1554 invoke_rcu_node_kthread(rnp); 1532 invoke_rcu_node_kthread(rnp);
1555} 1533}
1556 1534
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
1617 unsigned long flags; 1595 unsigned long flags;
1618 int spincnt = 0; 1596 int spincnt = 0;
1619 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1597 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1620 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1621 char work; 1598 char work;
1622 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1599 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1623 1600
1624 for (;;) { 1601 for (;;) {
1625 *statusp = RCU_KTHREAD_WAITING; 1602 *statusp = RCU_KTHREAD_WAITING;
1626 wait_event_interruptible(*wqp, 1603 rcu_wait(*workp != 0 || kthread_should_stop());
1627 *workp != 0 || kthread_should_stop());
1628 local_bh_disable(); 1604 local_bh_disable();
1629 if (rcu_cpu_kthread_should_stop(cpu)) { 1605 if (rcu_cpu_kthread_should_stop(cpu)) {
1630 local_bh_enable(); 1606 local_bh_enable();
@@ -1672,10 +1648,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1672 if (IS_ERR(t)) 1648 if (IS_ERR(t))
1673 return PTR_ERR(t); 1649 return PTR_ERR(t);
1674 kthread_bind(t, cpu); 1650 kthread_bind(t, cpu);
1651 set_task_state(t, TASK_INTERRUPTIBLE);
1675 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1652 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1676 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1653 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1677 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1654 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1678 wake_up_process(t);
1679 sp.sched_priority = RCU_KTHREAD_PRIO; 1655 sp.sched_priority = RCU_KTHREAD_PRIO;
1680 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1656 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1681 return 0; 1657 return 0;
@@ -1698,11 +1674,10 @@ static int rcu_node_kthread(void *arg)
1698 1674
1699 for (;;) { 1675 for (;;) {
1700 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1676 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1701 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); 1677 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1702 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1678 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1703 raw_spin_lock_irqsave(&rnp->lock, flags); 1679 raw_spin_lock_irqsave(&rnp->lock, flags);
1704 mask = rnp->wakemask; 1680 mask = atomic_xchg(&rnp->wakemask, 0);
1705 rnp->wakemask = 0;
1706 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1681 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1707 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1682 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1708 if ((mask & 0x1) == 0) 1683 if ((mask & 0x1) == 0)
@@ -1781,9 +1756,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1781 if (IS_ERR(t)) 1756 if (IS_ERR(t))
1782 return PTR_ERR(t); 1757 return PTR_ERR(t);
1783 raw_spin_lock_irqsave(&rnp->lock, flags); 1758 raw_spin_lock_irqsave(&rnp->lock, flags);
1759 set_task_state(t, TASK_INTERRUPTIBLE);
1784 rnp->node_kthread_task = t; 1760 rnp->node_kthread_task = t;
1785 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1761 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1786 wake_up_process(t);
1787 sp.sched_priority = 99; 1762 sp.sched_priority = 99;
1788 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1763 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1789 } 1764 }
@@ -1800,21 +1775,16 @@ static int __init rcu_spawn_kthreads(void)
1800 1775
1801 rcu_kthreads_spawnable = 1; 1776 rcu_kthreads_spawnable = 1;
1802 for_each_possible_cpu(cpu) { 1777 for_each_possible_cpu(cpu) {
1803 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1804 per_cpu(rcu_cpu_has_work, cpu) = 0; 1778 per_cpu(rcu_cpu_has_work, cpu) = 0;
1805 if (cpu_online(cpu)) 1779 if (cpu_online(cpu))
1806 (void)rcu_spawn_one_cpu_kthread(cpu); 1780 (void)rcu_spawn_one_cpu_kthread(cpu);
1807 } 1781 }
1808 rnp = rcu_get_root(rcu_state); 1782 rnp = rcu_get_root(rcu_state);
1809 init_waitqueue_head(&rnp->node_wq);
1810 rcu_init_boost_waitqueue(rnp);
1811 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1783 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1812 if (NUM_RCU_NODES > 1) 1784 if (NUM_RCU_NODES > 1) {
1813 rcu_for_each_leaf_node(rcu_state, rnp) { 1785 rcu_for_each_leaf_node(rcu_state, rnp)
1814 init_waitqueue_head(&rnp->node_wq);
1815 rcu_init_boost_waitqueue(rnp);
1816 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1786 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1817 } 1787 }
1818 return 0; 1788 return 0;
1819} 1789}
1820early_initcall(rcu_spawn_kthreads); 1790early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 257664815d5d..7b9a08b4aaea 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,11 +84,9 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track nesting level, sort of. */ 87 int dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks; /* Even value for dynticks-idle, else odd. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 int dynticks_nmi; /* Even value for either dynticks-idle or */ 89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
90 /* not in nmi handler, else odd. So this */
91 /* remains even for nmi from irq handler. */
92}; 90};
93 91
94/* RCU's kthread states for tracing. */ 92/* RCU's kthread states for tracing. */
@@ -121,7 +119,9 @@ struct rcu_node {
121 /* elements that need to drain to allow the */ 119 /* elements that need to drain to allow the */
122 /* current expedited grace period to */ 120 /* current expedited grace period to */
123 /* complete (only for TREE_PREEMPT_RCU). */ 121 /* complete (only for TREE_PREEMPT_RCU). */
124 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ 122 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
123 /* Since this has meaning only for leaf */
124 /* rcu_node structures, 32 bits suffices. */
125 unsigned long qsmaskinit; 125 unsigned long qsmaskinit;
126 /* Per-GP initial value for qsmask & expmask. */ 126 /* Per-GP initial value for qsmask & expmask. */
127 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 127 unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -159,9 +159,6 @@ struct rcu_node {
159 struct task_struct *boost_kthread_task; 159 struct task_struct *boost_kthread_task;
160 /* kthread that takes care of priority */ 160 /* kthread that takes care of priority */
161 /* boosting for this rcu_node structure. */ 161 /* boosting for this rcu_node structure. */
162 wait_queue_head_t boost_wq;
163 /* Wait queue on which to park the boost */
164 /* kthread. */
165 unsigned int boost_kthread_status; 162 unsigned int boost_kthread_status;
166 /* State of boost_kthread_task for tracing. */ 163 /* State of boost_kthread_task for tracing. */
167 unsigned long n_tasks_boosted; 164 unsigned long n_tasks_boosted;
@@ -188,9 +185,6 @@ struct rcu_node {
188 /* kthread that takes care of this rcu_node */ 185 /* kthread that takes care of this rcu_node */
189 /* structure, for example, awakening the */ 186 /* structure, for example, awakening the */
190 /* per-CPU kthreads as needed. */ 187 /* per-CPU kthreads as needed. */
191 wait_queue_head_t node_wq;
192 /* Wait queue on which to park the per-node */
193 /* kthread. */
194 unsigned int node_kthread_status; 188 unsigned int node_kthread_status;
195 /* State of node_kthread_task for tracing. */ 189 /* State of node_kthread_task for tracing. */
196} ____cacheline_internodealigned_in_smp; 190} ____cacheline_internodealigned_in_smp;
@@ -284,7 +278,6 @@ struct rcu_data {
284 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
285 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
286 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
287 int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
288#endif /* #ifdef CONFIG_NO_HZ */ 281#endif /* #ifdef CONFIG_NO_HZ */
289 282
290 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
@@ -337,6 +330,16 @@ struct rcu_data {
337 /* scheduling clock irq */ 330 /* scheduling clock irq */
338 /* before ratting on them. */ 331 /* before ratting on them. */
339 332
333#define rcu_wait(cond) \
334do { \
335 for (;;) { \
336 set_current_state(TASK_INTERRUPTIBLE); \
337 if (cond) \
338 break; \
339 schedule(); \
340 } \
341 __set_current_state(TASK_RUNNING); \
342} while (0)
340 343
341/* 344/*
342 * RCU global state, including node hierarchy. This hierarchy is 345 * RCU global state, including node hierarchy. This hierarchy is
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
446static void rcu_preempt_send_cbs_to_online(void); 449static void rcu_preempt_send_cbs_to_online(void);
447static void __init __rcu_init_preempt(void); 450static void __init __rcu_init_preempt(void);
448static void rcu_needs_cpu_flush(void); 451static void rcu_needs_cpu_flush(void);
449static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 452static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 453static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
452 cpumask_var_t cm); 454 cpumask_var_t cm);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3f6559a5f5cd..a767b7dac365 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg)
1196 1196
1197 for (;;) { 1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || 1199 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1200 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp); 1201 more2boost = rcu_boost(rnp);
1203 if (more2boost) 1202 if (more2boost)
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1275} 1274}
1276 1275
1277/* 1276/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not 1277 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU. 1278 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise. 1279 * Returns zero if all is well, a negated errno otherwise.
@@ -1304,9 +1295,9 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1304 if (IS_ERR(t)) 1295 if (IS_ERR(t))
1305 return PTR_ERR(t); 1296 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags); 1297 raw_spin_lock_irqsave(&rnp->lock, flags);
1298 set_task_state(t, TASK_INTERRUPTIBLE);
1307 rnp->boost_kthread_task = t; 1299 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1300 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO; 1301 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1302 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0; 1303 return 0;
@@ -1328,10 +1319,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{ 1319{
1329} 1320}
1330 1321
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1322static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp, 1323 struct rcu_node *rnp,
1337 int rnp_index) 1324 int rnp_index)
@@ -1520,7 +1507,6 @@ int rcu_needs_cpu(int cpu)
1520{ 1507{
1521 int c = 0; 1508 int c = 0;
1522 int snap; 1509 int snap;
1523 int snap_nmi;
1524 int thatcpu; 1510 int thatcpu;
1525 1511
1526 /* Check for being in the holdoff period. */ 1512 /* Check for being in the holdoff period. */
@@ -1531,10 +1517,10 @@ int rcu_needs_cpu(int cpu)
1531 for_each_online_cpu(thatcpu) { 1517 for_each_online_cpu(thatcpu) {
1532 if (thatcpu == cpu) 1518 if (thatcpu == cpu)
1533 continue; 1519 continue;
1534 snap = per_cpu(rcu_dynticks, thatcpu).dynticks; 1520 snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
1535 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; 1521 thatcpu).dynticks);
1536 smp_mb(); /* Order sampling of snap with end of grace period. */ 1522 smp_mb(); /* Order sampling of snap with end of grace period. */
1537 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { 1523 if ((snap & 0x1) != 0) {
1538 per_cpu(rcu_dyntick_drain, cpu) = 0; 1524 per_cpu(rcu_dyntick_drain, cpu) = 0;
1539 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1525 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1540 return rcu_needs_cpu_quick_check(cpu); 1526 return rcu_needs_cpu_quick_check(cpu);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index aa0fd72b4bc7..9678cc3650f5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
69 rdp->passed_quiesc, rdp->passed_quiesc_completed, 69 rdp->passed_quiesc, rdp->passed_quiesc_completed,
70 rdp->qs_pending); 70 rdp->qs_pending);
71#ifdef CONFIG_NO_HZ 71#ifdef CONFIG_NO_HZ
72 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 72 seq_printf(m, " dt=%d/%d/%d df=%lu",
73 rdp->dynticks->dynticks, 73 atomic_read(&rdp->dynticks->dynticks),
74 rdp->dynticks->dynticks_nesting, 74 rdp->dynticks->dynticks_nesting,
75 rdp->dynticks->dynticks_nmi, 75 rdp->dynticks->dynticks_nmi_nesting,
76 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
77#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->qs_pending); 141 rdp->qs_pending);
142#ifdef CONFIG_NO_HZ 142#ifdef CONFIG_NO_HZ
143 seq_printf(m, ",%d,%d,%d,%lu", 143 seq_printf(m, ",%d,%d,%d,%lu",
144 rdp->dynticks->dynticks, 144 atomic_read(&rdp->dynticks->dynticks),
145 rdp->dynticks->dynticks_nesting, 145 rdp->dynticks->dynticks_nesting,
146 rdp->dynticks->dynticks_nmi, 146 rdp->dynticks->dynticks_nmi_nesting,
147 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
148#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
167{ 167{
168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
169#ifdef CONFIG_NO_HZ 169#ifdef CONFIG_NO_HZ
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171#endif /* #ifdef CONFIG_NO_HZ */ 171#endif /* #ifdef CONFIG_NO_HZ */
172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); 172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
173#ifdef CONFIG_TREE_PREEMPT_RCU 173#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/sched.c b/kernel/sched.c
index 2d12893b8b0f..cbb3a0eee58e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
2573 if (!next) 2573 if (!next)
2574 smp_send_reschedule(cpu); 2574 smp_send_reschedule(cpu);
2575} 2575}
2576#endif 2576
2577#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2578static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
2579{
2580 struct rq *rq;
2581 int ret = 0;
2582
2583 rq = __task_rq_lock(p);
2584 if (p->on_cpu) {
2585 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2586 ttwu_do_wakeup(rq, p, wake_flags);
2587 ret = 1;
2588 }
2589 __task_rq_unlock(rq);
2590
2591 return ret;
2592
2593}
2594#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
2595#endif /* CONFIG_SMP */
2577 2596
2578static void ttwu_queue(struct task_struct *p, int cpu) 2597static void ttwu_queue(struct task_struct *p, int cpu)
2579{ 2598{
@@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2631 while (p->on_cpu) { 2650 while (p->on_cpu) {
2632#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 2651#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2633 /* 2652 /*
2634 * If called from interrupt context we could have landed in the 2653 * In case the architecture enables interrupts in
2635 * middle of schedule(), in this case we should take care not 2654 * context_switch(), we cannot busy wait, since that
2636 * to spin on ->on_cpu if p is current, since that would 2655 * would lead to deadlocks when an interrupt hits and
2637 * deadlock. 2656 * tries to wake up @prev. So bail and do a complete
2657 * remote wakeup.
2638 */ 2658 */
2639 if (p == current) { 2659 if (ttwu_activate_remote(p, wake_flags))
2640 ttwu_queue(p, cpu);
2641 goto stat; 2660 goto stat;
2642 } 2661#else
2643#endif
2644 cpu_relax(); 2662 cpu_relax();
2663#endif
2645 } 2664 }
2646 /* 2665 /*
2647 * Pairs with the smp_wmb() in finish_lock_switch(). 2666 * Pairs with the smp_wmb() in finish_lock_switch().
@@ -5841,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5841 idle->state = TASK_RUNNING; 5860 idle->state = TASK_RUNNING;
5842 idle->se.exec_start = sched_clock(); 5861 idle->se.exec_start = sched_clock();
5843 5862
5844 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 5863 do_set_cpus_allowed(idle, cpumask_of(cpu));
5845 /* 5864 /*
5846 * We're having a chicken and egg problem, even though we are 5865 * We're having a chicken and egg problem, even though we are
5847 * holding rq->lock, the cpu isn't yet set to this cpu so the 5866 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5929,6 +5948,16 @@ static inline void sched_init_granularity(void)
5929} 5948}
5930 5949
5931#ifdef CONFIG_SMP 5950#ifdef CONFIG_SMP
5951void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
5952{
5953 if (p->sched_class && p->sched_class->set_cpus_allowed)
5954 p->sched_class->set_cpus_allowed(p, new_mask);
5955 else {
5956 cpumask_copy(&p->cpus_allowed, new_mask);
5957 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5958 }
5959}
5960
5932/* 5961/*
5933 * This is how migration works: 5962 * This is how migration works:
5934 * 5963 *
@@ -5974,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5974 goto out; 6003 goto out;
5975 } 6004 }
5976 6005
5977 if (p->sched_class->set_cpus_allowed) 6006 do_set_cpus_allowed(p, new_mask);
5978 p->sched_class->set_cpus_allowed(p, new_mask);
5979 else {
5980 cpumask_copy(&p->cpus_allowed, new_mask);
5981 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5982 }
5983 6007
5984 /* Can the task run on the task's current CPU? If so, we're done */ 6008 /* Can the task run on the task's current CPU? If so, we're done */
5985 if (cpumask_test_cpu(task_cpu(p), new_mask)) 6009 if (cpumask_test_cpu(task_cpu(p), new_mask))
@@ -8764,42 +8788,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
8764 return 0; 8788 return 0;
8765} 8789}
8766 8790
8767static int
8768cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8769 struct task_struct *tsk, bool threadgroup)
8770{
8771 int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
8772 if (retval)
8773 return retval;
8774 if (threadgroup) {
8775 struct task_struct *c;
8776 rcu_read_lock();
8777 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
8778 retval = cpu_cgroup_can_attach_task(cgrp, c);
8779 if (retval) {
8780 rcu_read_unlock();
8781 return retval;
8782 }
8783 }
8784 rcu_read_unlock();
8785 }
8786 return 0;
8787}
8788
8789static void 8791static void
8790cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 8792cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
8791 struct cgroup *old_cont, struct task_struct *tsk,
8792 bool threadgroup)
8793{ 8793{
8794 sched_move_task(tsk); 8794 sched_move_task(tsk);
8795 if (threadgroup) {
8796 struct task_struct *c;
8797 rcu_read_lock();
8798 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
8799 sched_move_task(c);
8800 }
8801 rcu_read_unlock();
8802 }
8803} 8795}
8804 8796
8805static void 8797static void
@@ -8887,8 +8879,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8887 .name = "cpu", 8879 .name = "cpu",
8888 .create = cpu_cgroup_create, 8880 .create = cpu_cgroup_create,
8889 .destroy = cpu_cgroup_destroy, 8881 .destroy = cpu_cgroup_destroy,
8890 .can_attach = cpu_cgroup_can_attach, 8882 .can_attach_task = cpu_cgroup_can_attach_task,
8891 .attach = cpu_cgroup_attach, 8883 .attach_task = cpu_cgroup_attach_task,
8892 .exit = cpu_cgroup_exit, 8884 .exit = cpu_cgroup_exit,
8893 .populate = cpu_cgroup_populate, 8885 .populate = cpu_cgroup_populate,
8894 .subsys_id = cpu_cgroup_subsys_id, 8886 .subsys_id = cpu_cgroup_subsys_id,
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e32a9b70ee9c..433491c2dc8f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1076 se->on_rq = 0; 1076 se->on_rq = 0;
1077 update_cfs_load(cfs_rq, 0); 1077 update_cfs_load(cfs_rq, 0);
1078 account_entity_dequeue(cfs_rq, se); 1078 account_entity_dequeue(cfs_rq, se);
1079 update_min_vruntime(cfs_rq);
1080 update_cfs_shares(cfs_rq);
1081 1079
1082 /* 1080 /*
1083 * Normalize the entity after updating the min_vruntime because the 1081 * Normalize the entity after updating the min_vruntime because the
@@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1086 */ 1084 */
1087 if (!(flags & DEQUEUE_SLEEP)) 1085 if (!(flags & DEQUEUE_SLEEP))
1088 se->vruntime -= cfs_rq->min_vruntime; 1086 se->vruntime -= cfs_rq->min_vruntime;
1087
1088 update_min_vruntime(cfs_rq);
1089 update_cfs_shares(cfs_rq);
1089} 1090}
1090 1091
1091/* 1092/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 64b2a37c07d0..88725c939e0b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task)
1263 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1263 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1264 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1264 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1265 1265
1266 rcu_read_lock();
1266 for_each_domain(cpu, sd) { 1267 for_each_domain(cpu, sd) {
1267 if (sd->flags & SD_WAKE_AFFINE) { 1268 if (sd->flags & SD_WAKE_AFFINE) {
1268 int best_cpu; 1269 int best_cpu;
@@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task)
1272 * remote processor. 1273 * remote processor.
1273 */ 1274 */
1274 if (this_cpu != -1 && 1275 if (this_cpu != -1 &&
1275 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) 1276 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1277 rcu_read_unlock();
1276 return this_cpu; 1278 return this_cpu;
1279 }
1277 1280
1278 best_cpu = cpumask_first_and(lowest_mask, 1281 best_cpu = cpumask_first_and(lowest_mask,
1279 sched_domain_span(sd)); 1282 sched_domain_span(sd));
1280 if (best_cpu < nr_cpu_ids) 1283 if (best_cpu < nr_cpu_ids) {
1284 rcu_read_unlock();
1281 return best_cpu; 1285 return best_cpu;
1286 }
1282 } 1287 }
1283 } 1288 }
1289 rcu_read_unlock();
1284 1290
1285 /* 1291 /*
1286 * And finally, if there were no matches within the domains 1292 * And finally, if there were no matches within the domains
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 48ddf431db0e..331e01bcd026 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
37 37
38#ifdef CONFIG_SMP 38#ifdef CONFIG_SMP
39 /* domain-specific stats */ 39 /* domain-specific stats */
40 preempt_disable(); 40 rcu_read_lock();
41 for_each_domain(cpu, sd) { 41 for_each_domain(cpu, sd) {
42 enum cpu_idle_type itype; 42 enum cpu_idle_type itype;
43 43
@@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
64 sd->ttwu_wake_remote, sd->ttwu_move_affine, 64 sd->ttwu_wake_remote, sd->ttwu_move_affine,
65 sd->ttwu_move_balance); 65 sd->ttwu_move_balance);
66 } 66 }
67 preempt_enable(); 67 rcu_read_unlock();
68#endif 68#endif
69 } 69 }
70 kfree(mask_str); 70 kfree(mask_str);
diff --git a/kernel/signal.c b/kernel/signal.c
index ad5e818baacc..86c32b884f8e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3023,8 +3023,10 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
3023 3023
3024SYSCALL_DEFINE0(pause) 3024SYSCALL_DEFINE0(pause)
3025{ 3025{
3026 current->state = TASK_INTERRUPTIBLE; 3026 while (!signal_pending(current)) {
3027 schedule(); 3027 current->state = TASK_INTERRUPTIBLE;
3028 schedule();
3029 }
3028 return -ERESTARTNOHAND; 3030 return -ERESTARTNOHAND;
3029} 3031}
3030 3032
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3dd0c46fa3bb..4fc92445a29c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -56,6 +56,7 @@
56#include <linux/kprobes.h> 56#include <linux/kprobes.h>
57#include <linux/pipe_fs_i.h> 57#include <linux/pipe_fs_i.h>
58#include <linux/oom.h> 58#include <linux/oom.h>
59#include <linux/kmod.h>
59 60
60#include <asm/uaccess.h> 61#include <asm/uaccess.h>
61#include <asm/processor.h> 62#include <asm/processor.h>
@@ -616,6 +617,11 @@ static struct ctl_table kern_table[] = {
616 .child = random_table, 617 .child = random_table,
617 }, 618 },
618 { 619 {
620 .procname = "usermodehelper",
621 .mode = 0555,
622 .child = usermodehelper_table,
623 },
624 {
619 .procname = "overflowuid", 625 .procname = "overflowuid",
620 .data = &overflowuid, 626 .data = &overflowuid,
621 .maxlen = sizeof(int), 627 .maxlen = sizeof(int),
@@ -1500,7 +1506,7 @@ static struct ctl_table fs_table[] = {
1500 1506
1501static struct ctl_table debug_table[] = { 1507static struct ctl_table debug_table[] = {
1502#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ 1508#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1503 defined(CONFIG_S390) 1509 defined(CONFIG_S390) || defined(CONFIG_TILE)
1504 { 1510 {
1505 .procname = "exception-trace", 1511 .procname = "exception-trace",
1506 .data = &show_unhandled_signals, 1512 .data = &show_unhandled_signals,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d017c2c82c44..1ee417fcbfa5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
109static void ftrace_global_list_func(unsigned long ip, 109static void ftrace_global_list_func(unsigned long ip,
110 unsigned long parent_ip) 110 unsigned long parent_ip)
111{ 111{
112 struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ 112 struct ftrace_ops *op;
113
114 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
115 return;
113 116
117 trace_recursion_set(TRACE_GLOBAL_BIT);
118 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
114 while (op != &ftrace_list_end) { 119 while (op != &ftrace_list_end) {
115 op->func(ip, parent_ip); 120 op->func(ip, parent_ip);
116 op = rcu_dereference_raw(op->next); /*see above*/ 121 op = rcu_dereference_raw(op->next); /*see above*/
117 }; 122 };
123 trace_recursion_clear(TRACE_GLOBAL_BIT);
118} 124}
119 125
120static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) 126static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -1638,12 +1644,12 @@ static void ftrace_startup_enable(int command)
1638 ftrace_run_update_code(command); 1644 ftrace_run_update_code(command);
1639} 1645}
1640 1646
1641static void ftrace_startup(struct ftrace_ops *ops, int command) 1647static int ftrace_startup(struct ftrace_ops *ops, int command)
1642{ 1648{
1643 bool hash_enable = true; 1649 bool hash_enable = true;
1644 1650
1645 if (unlikely(ftrace_disabled)) 1651 if (unlikely(ftrace_disabled))
1646 return; 1652 return -ENODEV;
1647 1653
1648 ftrace_start_up++; 1654 ftrace_start_up++;
1649 command |= FTRACE_ENABLE_CALLS; 1655 command |= FTRACE_ENABLE_CALLS;
@@ -1662,6 +1668,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command)
1662 ftrace_hash_rec_enable(ops, 1); 1668 ftrace_hash_rec_enable(ops, 1);
1663 1669
1664 ftrace_startup_enable(command); 1670 ftrace_startup_enable(command);
1671
1672 return 0;
1665} 1673}
1666 1674
1667static void ftrace_shutdown(struct ftrace_ops *ops, int command) 1675static void ftrace_shutdown(struct ftrace_ops *ops, int command)
@@ -2501,7 +2509,7 @@ static void __enable_ftrace_function_probe(void)
2501 2509
2502 ret = __register_ftrace_function(&trace_probe_ops); 2510 ret = __register_ftrace_function(&trace_probe_ops);
2503 if (!ret) 2511 if (!ret)
2504 ftrace_startup(&trace_probe_ops, 0); 2512 ret = ftrace_startup(&trace_probe_ops, 0);
2505 2513
2506 ftrace_probe_registered = 1; 2514 ftrace_probe_registered = 1;
2507} 2515}
@@ -3466,7 +3474,11 @@ device_initcall(ftrace_nodyn_init);
3466static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } 3474static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
3467static inline void ftrace_startup_enable(int command) { } 3475static inline void ftrace_startup_enable(int command) { }
3468/* Keep as macros so we do not need to define the commands */ 3476/* Keep as macros so we do not need to define the commands */
3469# define ftrace_startup(ops, command) do { } while (0) 3477# define ftrace_startup(ops, command) \
3478 ({ \
3479 (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
3480 0; \
3481 })
3470# define ftrace_shutdown(ops, command) do { } while (0) 3482# define ftrace_shutdown(ops, command) do { } while (0)
3471# define ftrace_startup_sysctl() do { } while (0) 3483# define ftrace_startup_sysctl() do { } while (0)
3472# define ftrace_shutdown_sysctl() do { } while (0) 3484# define ftrace_shutdown_sysctl() do { } while (0)
@@ -3484,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3484{ 3496{
3485 struct ftrace_ops *op; 3497 struct ftrace_ops *op;
3486 3498
3499 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
3500 return;
3501
3502 trace_recursion_set(TRACE_INTERNAL_BIT);
3487 /* 3503 /*
3488 * Some of the ops may be dynamically allocated, 3504 * Some of the ops may be dynamically allocated,
3489 * they must be freed after a synchronize_sched(). 3505 * they must be freed after a synchronize_sched().
@@ -3496,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3496 op = rcu_dereference_raw(op->next); 3512 op = rcu_dereference_raw(op->next);
3497 }; 3513 };
3498 preempt_enable_notrace(); 3514 preempt_enable_notrace();
3515 trace_recursion_clear(TRACE_INTERNAL_BIT);
3499} 3516}
3500 3517
3501static void clear_ftrace_swapper(void) 3518static void clear_ftrace_swapper(void)
@@ -3799,7 +3816,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
3799 3816
3800 ret = __register_ftrace_function(ops); 3817 ret = __register_ftrace_function(ops);
3801 if (!ret) 3818 if (!ret)
3802 ftrace_startup(ops, 0); 3819 ret = ftrace_startup(ops, 0);
3803 3820
3804 3821
3805 out_unlock: 3822 out_unlock:
@@ -4045,7 +4062,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
4045 ftrace_graph_return = retfunc; 4062 ftrace_graph_return = retfunc;
4046 ftrace_graph_entry = entryfunc; 4063 ftrace_graph_entry = entryfunc;
4047 4064
4048 ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); 4065 ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
4049 4066
4050out: 4067out:
4051 mutex_unlock(&ftrace_lock); 4068 mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0ef7b4b2a1f7..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void)
2216 2216
2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" 2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n", 2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2219 current->trace_recursion, 2219 trace_recursion_buffer(),
2220 hardirq_count() >> HARDIRQ_SHIFT, 2220 hardirq_count() >> HARDIRQ_SHIFT,
2221 softirq_count() >> SOFTIRQ_SHIFT, 2221 softirq_count() >> SOFTIRQ_SHIFT,
2222 in_nmi()); 2222 in_nmi());
@@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void)
2226 2226
2227static inline int trace_recursive_lock(void) 2227static inline int trace_recursive_lock(void)
2228{ 2228{
2229 current->trace_recursion++; 2229 trace_recursion_inc();
2230 2230
2231 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) 2231 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2232 return 0; 2232 return 0;
2233 2233
2234 trace_recursive_fail(); 2234 trace_recursive_fail();
@@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void)
2238 2238
2239static inline void trace_recursive_unlock(void) 2239static inline void trace_recursive_unlock(void)
2240{ 2240{
2241 WARN_ON_ONCE(!current->trace_recursion); 2241 WARN_ON_ONCE(!trace_recursion_buffer());
2242 2242
2243 current->trace_recursion--; 2243 trace_recursion_dec();
2244} 2244}
2245 2245
2246#else 2246#else
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b69c4bd306f..229f8591f61d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
785#include "trace_entries.h" 785#include "trace_entries.h"
786 786
787/* Only current can touch trace_recursion */
788#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
789#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
790
791/* Ring buffer has the 10 LSB bits to count */
792#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
793
794/* for function tracing recursion */
795#define TRACE_INTERNAL_BIT (1<<11)
796#define TRACE_GLOBAL_BIT (1<<12)
797
798#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
799#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
800#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
801
787#endif /* _LINUX_KERNEL_TRACE_H */ 802#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2fe110341359..686ec399f2a8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata =
1657 1657
1658static __init void event_trace_self_test_with_function(void) 1658static __init void event_trace_self_test_with_function(void)
1659{ 1659{
1660 register_ftrace_function(&trace_ops); 1660 int ret;
1661 ret = register_ftrace_function(&trace_ops);
1662 if (WARN_ON(ret < 0)) {
1663 pr_info("Failed to enable function tracer for event tests\n");
1664 return;
1665 }
1661 pr_info("Running tests again, along with the function tracer\n"); 1666 pr_info("Running tests again, along with the function tracer\n");
1662 event_trace_self_tests(); 1667 event_trace_self_tests();
1663 unregister_ftrace_function(&trace_ops); 1668 unregister_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cf535ccedc86..e37de492a9e1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
353} 353}
354EXPORT_SYMBOL(ftrace_print_symbols_seq); 354EXPORT_SYMBOL(ftrace_print_symbols_seq);
355 355
356#if BITS_PER_LONG == 32
357const char *
358ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
359 const struct trace_print_flags_u64 *symbol_array)
360{
361 int i;
362 const char *ret = p->buffer + p->len;
363
364 for (i = 0; symbol_array[i].name; i++) {
365
366 if (val != symbol_array[i].mask)
367 continue;
368
369 trace_seq_puts(p, symbol_array[i].name);
370 break;
371 }
372
373 if (!p->len)
374 trace_seq_printf(p, "0x%llx", val);
375
376 trace_seq_putc(p, 0);
377
378 return ret;
379}
380EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
381#endif
382
356const char * 383const char *
357ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) 384ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
358{ 385{
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 44646179eaba..bff131b9510a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/proc_fs.h>
18 19
19static struct uts_namespace *create_uts_ns(void) 20static struct uts_namespace *create_uts_ns(void)
20{ 21{
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
79 put_user_ns(ns->user_ns); 80 put_user_ns(ns->user_ns);
80 kfree(ns); 81 kfree(ns);
81} 82}
83
84static void *utsns_get(struct task_struct *task)
85{
86 struct uts_namespace *ns = NULL;
87 struct nsproxy *nsproxy;
88
89 rcu_read_lock();
90 nsproxy = task_nsproxy(task);
91 if (nsproxy) {
92 ns = nsproxy->uts_ns;
93 get_uts_ns(ns);
94 }
95 rcu_read_unlock();
96
97 return ns;
98}
99
100static void utsns_put(void *ns)
101{
102 put_uts_ns(ns);
103}
104
105static int utsns_install(struct nsproxy *nsproxy, void *ns)
106{
107 get_uts_ns(ns);
108 put_uts_ns(nsproxy->uts_ns);
109 nsproxy->uts_ns = ns;
110 return 0;
111}
112
113const struct proc_ns_operations utsns_operations = {
114 .name = "uts",
115 .type = CLONE_NEWUTS,
116 .get = utsns_get,
117 .put = utsns_put,
118 .install = utsns_install,
119};
120
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7daa4b072e9f..3d0c56ad4792 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; }
415#endif /* CONFIG_HARDLOCKUP_DETECTOR */ 415#endif /* CONFIG_HARDLOCKUP_DETECTOR */
416 416
417/* prepare/enable/disable routines */ 417/* prepare/enable/disable routines */
418static int watchdog_prepare_cpu(int cpu) 418static void watchdog_prepare_cpu(int cpu)
419{ 419{
420 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); 420 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
421 421
422 WARN_ON(per_cpu(softlockup_watchdog, cpu)); 422 WARN_ON(per_cpu(softlockup_watchdog, cpu));
423 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 423 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
424 hrtimer->function = watchdog_timer_fn; 424 hrtimer->function = watchdog_timer_fn;
425
426 return 0;
427} 425}
428 426
429static int watchdog_enable(int cpu) 427static int watchdog_enable(int cpu)
@@ -542,17 +540,16 @@ static int __cpuinit
542cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 540cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
543{ 541{
544 int hotcpu = (unsigned long)hcpu; 542 int hotcpu = (unsigned long)hcpu;
545 int err = 0;
546 543
547 switch (action) { 544 switch (action) {
548 case CPU_UP_PREPARE: 545 case CPU_UP_PREPARE:
549 case CPU_UP_PREPARE_FROZEN: 546 case CPU_UP_PREPARE_FROZEN:
550 err = watchdog_prepare_cpu(hotcpu); 547 watchdog_prepare_cpu(hotcpu);
551 break; 548 break;
552 case CPU_ONLINE: 549 case CPU_ONLINE:
553 case CPU_ONLINE_FROZEN: 550 case CPU_ONLINE_FROZEN:
554 if (watchdog_enabled) 551 if (watchdog_enabled)
555 err = watchdog_enable(hotcpu); 552 watchdog_enable(hotcpu);
556 break; 553 break;
557#ifdef CONFIG_HOTPLUG_CPU 554#ifdef CONFIG_HOTPLUG_CPU
558 case CPU_UP_CANCELED: 555 case CPU_UP_CANCELED:
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e3378e8d3a5c..0400553f0d04 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2866,9 +2866,7 @@ static int alloc_cwqs(struct workqueue_struct *wq)
2866 } 2866 }
2867 } 2867 }
2868 2868
2869 /* just in case, make sure it's actually aligned 2869 /* just in case, make sure it's actually aligned */
2870 * - this is affected by PERCPU() alignment in vmlinux.lds.S
2871 */
2872 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); 2870 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
2873 return wq->cpu_wq.v ? 0 : -ENOMEM; 2871 return wq->cpu_wq.v ? 0 : -ENOMEM;
2874} 2872}