diff options
author | Paul Mundt <lethal@linux-sh.org> | 2011-05-31 00:10:26 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2011-05-31 00:10:26 -0400 |
commit | 8181d3ef26ed1d9eb21e2cdcac374e1f457fdc06 (patch) | |
tree | 1a081f09ebcf2a84de899ddeadd0e4c5e48b50d2 /kernel | |
parent | 54525552c6ccfd867e819845da14be994e303218 (diff) | |
parent | 55922c9d1b84b89cb946c777fddccb3247e7df2c (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into sh-fixes-for-linus
Diffstat (limited to 'kernel')
41 files changed, 1328 insertions, 800 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e9cf19155b46..2d64cfcc8b42 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -61,7 +61,6 @@ obj-$(CONFIG_COMPAT) += compat.o | |||
61 | obj-$(CONFIG_CGROUPS) += cgroup.o | 61 | obj-$(CONFIG_CGROUPS) += cgroup.o |
62 | obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o | 62 | obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o |
63 | obj-$(CONFIG_CPUSETS) += cpuset.o | 63 | obj-$(CONFIG_CPUSETS) += cpuset.o |
64 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | ||
65 | obj-$(CONFIG_UTS_NS) += utsname.o | 64 | obj-$(CONFIG_UTS_NS) += utsname.o |
66 | obj-$(CONFIG_USER_NS) += user_namespace.o | 65 | obj-$(CONFIG_USER_NS) += user_namespace.o |
67 | obj-$(CONFIG_PID_NS) += pid_namespace.o | 66 | obj-$(CONFIG_PID_NS) += pid_namespace.o |
diff --git a/kernel/capability.c b/kernel/capability.c index 32a80e08ff4b..283c529f8b1c 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -22,12 +22,8 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; | 24 | const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; |
25 | const kernel_cap_t __cap_full_set = CAP_FULL_SET; | ||
26 | const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; | ||
27 | 25 | ||
28 | EXPORT_SYMBOL(__cap_empty_set); | 26 | EXPORT_SYMBOL(__cap_empty_set); |
29 | EXPORT_SYMBOL(__cap_full_set); | ||
30 | EXPORT_SYMBOL(__cap_init_eff_set); | ||
31 | 27 | ||
32 | int file_caps_enabled = 1; | 28 | int file_caps_enabled = 1; |
33 | 29 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 909a35510af5..2731d115d725 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
58 | #include <linux/eventfd.h> | 58 | #include <linux/eventfd.h> |
59 | #include <linux/poll.h> | 59 | #include <linux/poll.h> |
60 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | ||
60 | 61 | ||
61 | #include <asm/atomic.h> | 62 | #include <asm/atomic.h> |
62 | 63 | ||
@@ -1735,6 +1736,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1735 | } | 1736 | } |
1736 | EXPORT_SYMBOL_GPL(cgroup_path); | 1737 | EXPORT_SYMBOL_GPL(cgroup_path); |
1737 | 1738 | ||
1739 | /* | ||
1740 | * cgroup_task_migrate - move a task from one cgroup to another. | ||
1741 | * | ||
1742 | * 'guarantee' is set if the caller promises that a new css_set for the task | ||
1743 | * will already exist. If not set, this function might sleep, and can fail with | ||
1744 | * -ENOMEM. Otherwise, it can only fail with -ESRCH. | ||
1745 | */ | ||
1746 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||
1747 | struct task_struct *tsk, bool guarantee) | ||
1748 | { | ||
1749 | struct css_set *oldcg; | ||
1750 | struct css_set *newcg; | ||
1751 | |||
1752 | /* | ||
1753 | * get old css_set. we need to take task_lock and refcount it, because | ||
1754 | * an exiting task can change its css_set to init_css_set and drop its | ||
1755 | * old one without taking cgroup_mutex. | ||
1756 | */ | ||
1757 | task_lock(tsk); | ||
1758 | oldcg = tsk->cgroups; | ||
1759 | get_css_set(oldcg); | ||
1760 | task_unlock(tsk); | ||
1761 | |||
1762 | /* locate or allocate a new css_set for this task. */ | ||
1763 | if (guarantee) { | ||
1764 | /* we know the css_set we want already exists. */ | ||
1765 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1766 | read_lock(&css_set_lock); | ||
1767 | newcg = find_existing_css_set(oldcg, cgrp, template); | ||
1768 | BUG_ON(!newcg); | ||
1769 | get_css_set(newcg); | ||
1770 | read_unlock(&css_set_lock); | ||
1771 | } else { | ||
1772 | might_sleep(); | ||
1773 | /* find_css_set will give us newcg already referenced. */ | ||
1774 | newcg = find_css_set(oldcg, cgrp); | ||
1775 | if (!newcg) { | ||
1776 | put_css_set(oldcg); | ||
1777 | return -ENOMEM; | ||
1778 | } | ||
1779 | } | ||
1780 | put_css_set(oldcg); | ||
1781 | |||
1782 | /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ | ||
1783 | task_lock(tsk); | ||
1784 | if (tsk->flags & PF_EXITING) { | ||
1785 | task_unlock(tsk); | ||
1786 | put_css_set(newcg); | ||
1787 | return -ESRCH; | ||
1788 | } | ||
1789 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1790 | task_unlock(tsk); | ||
1791 | |||
1792 | /* Update the css_set linked lists if we're using them */ | ||
1793 | write_lock(&css_set_lock); | ||
1794 | if (!list_empty(&tsk->cg_list)) | ||
1795 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1796 | write_unlock(&css_set_lock); | ||
1797 | |||
1798 | /* | ||
1799 | * We just gained a reference on oldcg by taking it from the task. As | ||
1800 | * trading it for newcg is protected by cgroup_mutex, we're safe to drop | ||
1801 | * it here; it will be freed under RCU. | ||
1802 | */ | ||
1803 | put_css_set(oldcg); | ||
1804 | |||
1805 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | ||
1806 | return 0; | ||
1807 | } | ||
1808 | |||
1738 | /** | 1809 | /** |
1739 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1810 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
1740 | * @cgrp: the cgroup the task is attaching to | 1811 | * @cgrp: the cgroup the task is attaching to |
@@ -1745,11 +1816,9 @@ EXPORT_SYMBOL_GPL(cgroup_path); | |||
1745 | */ | 1816 | */ |
1746 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1817 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1747 | { | 1818 | { |
1748 | int retval = 0; | 1819 | int retval; |
1749 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1820 | struct cgroup_subsys *ss, *failed_ss = NULL; |
1750 | struct cgroup *oldcgrp; | 1821 | struct cgroup *oldcgrp; |
1751 | struct css_set *cg; | ||
1752 | struct css_set *newcg; | ||
1753 | struct cgroupfs_root *root = cgrp->root; | 1822 | struct cgroupfs_root *root = cgrp->root; |
1754 | 1823 | ||
1755 | /* Nothing to do if the task is already in that cgroup */ | 1824 | /* Nothing to do if the task is already in that cgroup */ |
@@ -1759,7 +1828,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1759 | 1828 | ||
1760 | for_each_subsys(root, ss) { | 1829 | for_each_subsys(root, ss) { |
1761 | if (ss->can_attach) { | 1830 | if (ss->can_attach) { |
1762 | retval = ss->can_attach(ss, cgrp, tsk, false); | 1831 | retval = ss->can_attach(ss, cgrp, tsk); |
1763 | if (retval) { | 1832 | if (retval) { |
1764 | /* | 1833 | /* |
1765 | * Remember on which subsystem the can_attach() | 1834 | * Remember on which subsystem the can_attach() |
@@ -1771,46 +1840,29 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1771 | goto out; | 1840 | goto out; |
1772 | } | 1841 | } |
1773 | } | 1842 | } |
1843 | if (ss->can_attach_task) { | ||
1844 | retval = ss->can_attach_task(cgrp, tsk); | ||
1845 | if (retval) { | ||
1846 | failed_ss = ss; | ||
1847 | goto out; | ||
1848 | } | ||
1849 | } | ||
1774 | } | 1850 | } |
1775 | 1851 | ||
1776 | task_lock(tsk); | 1852 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); |
1777 | cg = tsk->cgroups; | 1853 | if (retval) |
1778 | get_css_set(cg); | ||
1779 | task_unlock(tsk); | ||
1780 | /* | ||
1781 | * Locate or allocate a new css_set for this task, | ||
1782 | * based on its final set of cgroups | ||
1783 | */ | ||
1784 | newcg = find_css_set(cg, cgrp); | ||
1785 | put_css_set(cg); | ||
1786 | if (!newcg) { | ||
1787 | retval = -ENOMEM; | ||
1788 | goto out; | ||
1789 | } | ||
1790 | |||
1791 | task_lock(tsk); | ||
1792 | if (tsk->flags & PF_EXITING) { | ||
1793 | task_unlock(tsk); | ||
1794 | put_css_set(newcg); | ||
1795 | retval = -ESRCH; | ||
1796 | goto out; | 1854 | goto out; |
1797 | } | ||
1798 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1799 | task_unlock(tsk); | ||
1800 | |||
1801 | /* Update the css_set linked lists if we're using them */ | ||
1802 | write_lock(&css_set_lock); | ||
1803 | if (!list_empty(&tsk->cg_list)) | ||
1804 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1805 | write_unlock(&css_set_lock); | ||
1806 | 1855 | ||
1807 | for_each_subsys(root, ss) { | 1856 | for_each_subsys(root, ss) { |
1857 | if (ss->pre_attach) | ||
1858 | ss->pre_attach(cgrp); | ||
1859 | if (ss->attach_task) | ||
1860 | ss->attach_task(cgrp, tsk); | ||
1808 | if (ss->attach) | 1861 | if (ss->attach) |
1809 | ss->attach(ss, cgrp, oldcgrp, tsk, false); | 1862 | ss->attach(ss, cgrp, oldcgrp, tsk); |
1810 | } | 1863 | } |
1811 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1864 | |
1812 | synchronize_rcu(); | 1865 | synchronize_rcu(); |
1813 | put_css_set(cg); | ||
1814 | 1866 | ||
1815 | /* | 1867 | /* |
1816 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | 1868 | * wake up rmdir() waiter. the rmdir should fail since the cgroup |
@@ -1829,7 +1881,7 @@ out: | |||
1829 | */ | 1881 | */ |
1830 | break; | 1882 | break; |
1831 | if (ss->cancel_attach) | 1883 | if (ss->cancel_attach) |
1832 | ss->cancel_attach(ss, cgrp, tsk, false); | 1884 | ss->cancel_attach(ss, cgrp, tsk); |
1833 | } | 1885 | } |
1834 | } | 1886 | } |
1835 | return retval; | 1887 | return retval; |
@@ -1860,49 +1912,370 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
1860 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 1912 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
1861 | 1913 | ||
1862 | /* | 1914 | /* |
1863 | * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex | 1915 | * cgroup_attach_proc works in two stages, the first of which prefetches all |
1864 | * held. May take task_lock of task | 1916 | * new css_sets needed (to make sure we have enough memory before committing |
1917 | * to the move) and stores them in a list of entries of the following type. | ||
1918 | * TODO: possible optimization: use css_set->rcu_head for chaining instead | ||
1919 | */ | ||
1920 | struct cg_list_entry { | ||
1921 | struct css_set *cg; | ||
1922 | struct list_head links; | ||
1923 | }; | ||
1924 | |||
1925 | static bool css_set_check_fetched(struct cgroup *cgrp, | ||
1926 | struct task_struct *tsk, struct css_set *cg, | ||
1927 | struct list_head *newcg_list) | ||
1928 | { | ||
1929 | struct css_set *newcg; | ||
1930 | struct cg_list_entry *cg_entry; | ||
1931 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1932 | |||
1933 | read_lock(&css_set_lock); | ||
1934 | newcg = find_existing_css_set(cg, cgrp, template); | ||
1935 | if (newcg) | ||
1936 | get_css_set(newcg); | ||
1937 | read_unlock(&css_set_lock); | ||
1938 | |||
1939 | /* doesn't exist at all? */ | ||
1940 | if (!newcg) | ||
1941 | return false; | ||
1942 | /* see if it's already in the list */ | ||
1943 | list_for_each_entry(cg_entry, newcg_list, links) { | ||
1944 | if (cg_entry->cg == newcg) { | ||
1945 | put_css_set(newcg); | ||
1946 | return true; | ||
1947 | } | ||
1948 | } | ||
1949 | |||
1950 | /* not found */ | ||
1951 | put_css_set(newcg); | ||
1952 | return false; | ||
1953 | } | ||
1954 | |||
1955 | /* | ||
1956 | * Find the new css_set and store it in the list in preparation for moving the | ||
1957 | * given task to the given cgroup. Returns 0 or -ENOMEM. | ||
1958 | */ | ||
1959 | static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | ||
1960 | struct list_head *newcg_list) | ||
1961 | { | ||
1962 | struct css_set *newcg; | ||
1963 | struct cg_list_entry *cg_entry; | ||
1964 | |||
1965 | /* ensure a new css_set will exist for this thread */ | ||
1966 | newcg = find_css_set(cg, cgrp); | ||
1967 | if (!newcg) | ||
1968 | return -ENOMEM; | ||
1969 | /* add it to the list */ | ||
1970 | cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); | ||
1971 | if (!cg_entry) { | ||
1972 | put_css_set(newcg); | ||
1973 | return -ENOMEM; | ||
1974 | } | ||
1975 | cg_entry->cg = newcg; | ||
1976 | list_add(&cg_entry->links, newcg_list); | ||
1977 | return 0; | ||
1978 | } | ||
1979 | |||
1980 | /** | ||
1981 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | ||
1982 | * @cgrp: the cgroup to attach to | ||
1983 | * @leader: the threadgroup leader task_struct of the group to be attached | ||
1984 | * | ||
1985 | * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will | ||
1986 | * take task_lock of each thread in leader's threadgroup individually in turn. | ||
1987 | */ | ||
1988 | int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||
1989 | { | ||
1990 | int retval, i, group_size; | ||
1991 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
1992 | bool cancel_failed_ss = false; | ||
1993 | /* guaranteed to be initialized later, but the compiler needs this */ | ||
1994 | struct cgroup *oldcgrp = NULL; | ||
1995 | struct css_set *oldcg; | ||
1996 | struct cgroupfs_root *root = cgrp->root; | ||
1997 | /* threadgroup list cursor and array */ | ||
1998 | struct task_struct *tsk; | ||
1999 | struct flex_array *group; | ||
2000 | /* | ||
2001 | * we need to make sure we have css_sets for all the tasks we're | ||
2002 | * going to move -before- we actually start moving them, so that in | ||
2003 | * case we get an ENOMEM we can bail out before making any changes. | ||
2004 | */ | ||
2005 | struct list_head newcg_list; | ||
2006 | struct cg_list_entry *cg_entry, *temp_nobe; | ||
2007 | |||
2008 | /* | ||
2009 | * step 0: in order to do expensive, possibly blocking operations for | ||
2010 | * every thread, we cannot iterate the thread group list, since it needs | ||
2011 | * rcu or tasklist locked. instead, build an array of all threads in the | ||
2012 | * group - threadgroup_fork_lock prevents new threads from appearing, | ||
2013 | * and if threads exit, this will just be an over-estimate. | ||
2014 | */ | ||
2015 | group_size = get_nr_threads(leader); | ||
2016 | /* flex_array supports very large thread-groups better than kmalloc. */ | ||
2017 | group = flex_array_alloc(sizeof(struct task_struct *), group_size, | ||
2018 | GFP_KERNEL); | ||
2019 | if (!group) | ||
2020 | return -ENOMEM; | ||
2021 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | ||
2022 | retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); | ||
2023 | if (retval) | ||
2024 | goto out_free_group_list; | ||
2025 | |||
2026 | /* prevent changes to the threadgroup list while we take a snapshot. */ | ||
2027 | rcu_read_lock(); | ||
2028 | if (!thread_group_leader(leader)) { | ||
2029 | /* | ||
2030 | * a race with de_thread from another thread's exec() may strip | ||
2031 | * us of our leadership, making while_each_thread unsafe to use | ||
2032 | * on this task. if this happens, there is no choice but to | ||
2033 | * throw this task away and try again (from cgroup_procs_write); | ||
2034 | * this is "double-double-toil-and-trouble-check locking". | ||
2035 | */ | ||
2036 | rcu_read_unlock(); | ||
2037 | retval = -EAGAIN; | ||
2038 | goto out_free_group_list; | ||
2039 | } | ||
2040 | /* take a reference on each task in the group to go in the array. */ | ||
2041 | tsk = leader; | ||
2042 | i = 0; | ||
2043 | do { | ||
2044 | /* as per above, nr_threads may decrease, but not increase. */ | ||
2045 | BUG_ON(i >= group_size); | ||
2046 | get_task_struct(tsk); | ||
2047 | /* | ||
2048 | * saying GFP_ATOMIC has no effect here because we did prealloc | ||
2049 | * earlier, but it's good form to communicate our expectations. | ||
2050 | */ | ||
2051 | retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); | ||
2052 | BUG_ON(retval != 0); | ||
2053 | i++; | ||
2054 | } while_each_thread(leader, tsk); | ||
2055 | /* remember the number of threads in the array for later. */ | ||
2056 | group_size = i; | ||
2057 | rcu_read_unlock(); | ||
2058 | |||
2059 | /* | ||
2060 | * step 1: check that we can legitimately attach to the cgroup. | ||
2061 | */ | ||
2062 | for_each_subsys(root, ss) { | ||
2063 | if (ss->can_attach) { | ||
2064 | retval = ss->can_attach(ss, cgrp, leader); | ||
2065 | if (retval) { | ||
2066 | failed_ss = ss; | ||
2067 | goto out_cancel_attach; | ||
2068 | } | ||
2069 | } | ||
2070 | /* a callback to be run on every thread in the threadgroup. */ | ||
2071 | if (ss->can_attach_task) { | ||
2072 | /* run on each task in the threadgroup. */ | ||
2073 | for (i = 0; i < group_size; i++) { | ||
2074 | tsk = flex_array_get_ptr(group, i); | ||
2075 | retval = ss->can_attach_task(cgrp, tsk); | ||
2076 | if (retval) { | ||
2077 | failed_ss = ss; | ||
2078 | cancel_failed_ss = true; | ||
2079 | goto out_cancel_attach; | ||
2080 | } | ||
2081 | } | ||
2082 | } | ||
2083 | } | ||
2084 | |||
2085 | /* | ||
2086 | * step 2: make sure css_sets exist for all threads to be migrated. | ||
2087 | * we use find_css_set, which allocates a new one if necessary. | ||
2088 | */ | ||
2089 | INIT_LIST_HEAD(&newcg_list); | ||
2090 | for (i = 0; i < group_size; i++) { | ||
2091 | tsk = flex_array_get_ptr(group, i); | ||
2092 | /* nothing to do if this task is already in the cgroup */ | ||
2093 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2094 | if (cgrp == oldcgrp) | ||
2095 | continue; | ||
2096 | /* get old css_set pointer */ | ||
2097 | task_lock(tsk); | ||
2098 | if (tsk->flags & PF_EXITING) { | ||
2099 | /* ignore this task if it's going away */ | ||
2100 | task_unlock(tsk); | ||
2101 | continue; | ||
2102 | } | ||
2103 | oldcg = tsk->cgroups; | ||
2104 | get_css_set(oldcg); | ||
2105 | task_unlock(tsk); | ||
2106 | /* see if the new one for us is already in the list? */ | ||
2107 | if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { | ||
2108 | /* was already there, nothing to do. */ | ||
2109 | put_css_set(oldcg); | ||
2110 | } else { | ||
2111 | /* we don't already have it. get new one. */ | ||
2112 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | ||
2113 | put_css_set(oldcg); | ||
2114 | if (retval) | ||
2115 | goto out_list_teardown; | ||
2116 | } | ||
2117 | } | ||
2118 | |||
2119 | /* | ||
2120 | * step 3: now that we're guaranteed success wrt the css_sets, proceed | ||
2121 | * to move all tasks to the new cgroup, calling ss->attach_task for each | ||
2122 | * one along the way. there are no failure cases after here, so this is | ||
2123 | * the commit point. | ||
2124 | */ | ||
2125 | for_each_subsys(root, ss) { | ||
2126 | if (ss->pre_attach) | ||
2127 | ss->pre_attach(cgrp); | ||
2128 | } | ||
2129 | for (i = 0; i < group_size; i++) { | ||
2130 | tsk = flex_array_get_ptr(group, i); | ||
2131 | /* leave current thread as it is if it's already there */ | ||
2132 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2133 | if (cgrp == oldcgrp) | ||
2134 | continue; | ||
2135 | /* attach each task to each subsystem */ | ||
2136 | for_each_subsys(root, ss) { | ||
2137 | if (ss->attach_task) | ||
2138 | ss->attach_task(cgrp, tsk); | ||
2139 | } | ||
2140 | /* if the thread is PF_EXITING, it can just get skipped. */ | ||
2141 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | ||
2142 | BUG_ON(retval != 0 && retval != -ESRCH); | ||
2143 | } | ||
2144 | /* nothing is sensitive to fork() after this point. */ | ||
2145 | |||
2146 | /* | ||
2147 | * step 4: do expensive, non-thread-specific subsystem callbacks. | ||
2148 | * TODO: if ever a subsystem needs to know the oldcgrp for each task | ||
2149 | * being moved, this call will need to be reworked to communicate that. | ||
2150 | */ | ||
2151 | for_each_subsys(root, ss) { | ||
2152 | if (ss->attach) | ||
2153 | ss->attach(ss, cgrp, oldcgrp, leader); | ||
2154 | } | ||
2155 | |||
2156 | /* | ||
2157 | * step 5: success! and cleanup | ||
2158 | */ | ||
2159 | synchronize_rcu(); | ||
2160 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
2161 | retval = 0; | ||
2162 | out_list_teardown: | ||
2163 | /* clean up the list of prefetched css_sets. */ | ||
2164 | list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { | ||
2165 | list_del(&cg_entry->links); | ||
2166 | put_css_set(cg_entry->cg); | ||
2167 | kfree(cg_entry); | ||
2168 | } | ||
2169 | out_cancel_attach: | ||
2170 | /* same deal as in cgroup_attach_task */ | ||
2171 | if (retval) { | ||
2172 | for_each_subsys(root, ss) { | ||
2173 | if (ss == failed_ss) { | ||
2174 | if (cancel_failed_ss && ss->cancel_attach) | ||
2175 | ss->cancel_attach(ss, cgrp, leader); | ||
2176 | break; | ||
2177 | } | ||
2178 | if (ss->cancel_attach) | ||
2179 | ss->cancel_attach(ss, cgrp, leader); | ||
2180 | } | ||
2181 | } | ||
2182 | /* clean up the array of referenced threads in the group. */ | ||
2183 | for (i = 0; i < group_size; i++) { | ||
2184 | tsk = flex_array_get_ptr(group, i); | ||
2185 | put_task_struct(tsk); | ||
2186 | } | ||
2187 | out_free_group_list: | ||
2188 | flex_array_free(group); | ||
2189 | return retval; | ||
2190 | } | ||
2191 | |||
2192 | /* | ||
2193 | * Find the task_struct of the task to attach by vpid and pass it along to the | ||
2194 | * function to attach either it or all tasks in its threadgroup. Will take | ||
2195 | * cgroup_mutex; may take task_lock of task. | ||
1865 | */ | 2196 | */ |
1866 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) | 2197 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) |
1867 | { | 2198 | { |
1868 | struct task_struct *tsk; | 2199 | struct task_struct *tsk; |
1869 | const struct cred *cred = current_cred(), *tcred; | 2200 | const struct cred *cred = current_cred(), *tcred; |
1870 | int ret; | 2201 | int ret; |
1871 | 2202 | ||
2203 | if (!cgroup_lock_live_group(cgrp)) | ||
2204 | return -ENODEV; | ||
2205 | |||
1872 | if (pid) { | 2206 | if (pid) { |
1873 | rcu_read_lock(); | 2207 | rcu_read_lock(); |
1874 | tsk = find_task_by_vpid(pid); | 2208 | tsk = find_task_by_vpid(pid); |
1875 | if (!tsk || tsk->flags & PF_EXITING) { | 2209 | if (!tsk) { |
1876 | rcu_read_unlock(); | 2210 | rcu_read_unlock(); |
2211 | cgroup_unlock(); | ||
2212 | return -ESRCH; | ||
2213 | } | ||
2214 | if (threadgroup) { | ||
2215 | /* | ||
2216 | * RCU protects this access, since tsk was found in the | ||
2217 | * tid map. a race with de_thread may cause group_leader | ||
2218 | * to stop being the leader, but cgroup_attach_proc will | ||
2219 | * detect it later. | ||
2220 | */ | ||
2221 | tsk = tsk->group_leader; | ||
2222 | } else if (tsk->flags & PF_EXITING) { | ||
2223 | /* optimization for the single-task-only case */ | ||
2224 | rcu_read_unlock(); | ||
2225 | cgroup_unlock(); | ||
1877 | return -ESRCH; | 2226 | return -ESRCH; |
1878 | } | 2227 | } |
1879 | 2228 | ||
2229 | /* | ||
2230 | * even if we're attaching all tasks in the thread group, we | ||
2231 | * only need to check permissions on one of them. | ||
2232 | */ | ||
1880 | tcred = __task_cred(tsk); | 2233 | tcred = __task_cred(tsk); |
1881 | if (cred->euid && | 2234 | if (cred->euid && |
1882 | cred->euid != tcred->uid && | 2235 | cred->euid != tcred->uid && |
1883 | cred->euid != tcred->suid) { | 2236 | cred->euid != tcred->suid) { |
1884 | rcu_read_unlock(); | 2237 | rcu_read_unlock(); |
2238 | cgroup_unlock(); | ||
1885 | return -EACCES; | 2239 | return -EACCES; |
1886 | } | 2240 | } |
1887 | get_task_struct(tsk); | 2241 | get_task_struct(tsk); |
1888 | rcu_read_unlock(); | 2242 | rcu_read_unlock(); |
1889 | } else { | 2243 | } else { |
1890 | tsk = current; | 2244 | if (threadgroup) |
2245 | tsk = current->group_leader; | ||
2246 | else | ||
2247 | tsk = current; | ||
1891 | get_task_struct(tsk); | 2248 | get_task_struct(tsk); |
1892 | } | 2249 | } |
1893 | 2250 | ||
1894 | ret = cgroup_attach_task(cgrp, tsk); | 2251 | if (threadgroup) { |
2252 | threadgroup_fork_write_lock(tsk); | ||
2253 | ret = cgroup_attach_proc(cgrp, tsk); | ||
2254 | threadgroup_fork_write_unlock(tsk); | ||
2255 | } else { | ||
2256 | ret = cgroup_attach_task(cgrp, tsk); | ||
2257 | } | ||
1895 | put_task_struct(tsk); | 2258 | put_task_struct(tsk); |
2259 | cgroup_unlock(); | ||
1896 | return ret; | 2260 | return ret; |
1897 | } | 2261 | } |
1898 | 2262 | ||
1899 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2263 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) |
1900 | { | 2264 | { |
2265 | return attach_task_by_pid(cgrp, pid, false); | ||
2266 | } | ||
2267 | |||
2268 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | ||
2269 | { | ||
1901 | int ret; | 2270 | int ret; |
1902 | if (!cgroup_lock_live_group(cgrp)) | 2271 | do { |
1903 | return -ENODEV; | 2272 | /* |
1904 | ret = attach_task_by_pid(cgrp, pid); | 2273 | * attach_proc fails with -EAGAIN if threadgroup leadership |
1905 | cgroup_unlock(); | 2274 | * changes in the middle of the operation, in which case we need |
2275 | * to find the task_struct for the new leader and start over. | ||
2276 | */ | ||
2277 | ret = attach_task_by_pid(cgrp, tgid, true); | ||
2278 | } while (ret == -EAGAIN); | ||
1906 | return ret; | 2279 | return ret; |
1907 | } | 2280 | } |
1908 | 2281 | ||
@@ -3259,9 +3632,9 @@ static struct cftype files[] = { | |||
3259 | { | 3632 | { |
3260 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | 3633 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", |
3261 | .open = cgroup_procs_open, | 3634 | .open = cgroup_procs_open, |
3262 | /* .write_u64 = cgroup_procs_write, TODO */ | 3635 | .write_u64 = cgroup_procs_write, |
3263 | .release = cgroup_pidlist_release, | 3636 | .release = cgroup_pidlist_release, |
3264 | .mode = S_IRUGO, | 3637 | .mode = S_IRUGO | S_IWUSR, |
3265 | }, | 3638 | }, |
3266 | { | 3639 | { |
3267 | .name = "notify_on_release", | 3640 | .name = "notify_on_release", |
@@ -4257,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4257 | } | 4630 | } |
4258 | 4631 | ||
4259 | /** | 4632 | /** |
4260 | * cgroup_clone - clone the cgroup the given subsystem is attached to | ||
4261 | * @tsk: the task to be moved | ||
4262 | * @subsys: the given subsystem | ||
4263 | * @nodename: the name for the new cgroup | ||
4264 | * | ||
4265 | * Duplicate the current cgroup in the hierarchy that the given | ||
4266 | * subsystem is attached to, and move this task into the new | ||
4267 | * child. | ||
4268 | */ | ||
4269 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | ||
4270 | char *nodename) | ||
4271 | { | ||
4272 | struct dentry *dentry; | ||
4273 | int ret = 0; | ||
4274 | struct cgroup *parent, *child; | ||
4275 | struct inode *inode; | ||
4276 | struct css_set *cg; | ||
4277 | struct cgroupfs_root *root; | ||
4278 | struct cgroup_subsys *ss; | ||
4279 | |||
4280 | /* We shouldn't be called by an unregistered subsystem */ | ||
4281 | BUG_ON(!subsys->active); | ||
4282 | |||
4283 | /* First figure out what hierarchy and cgroup we're dealing | ||
4284 | * with, and pin them so we can drop cgroup_mutex */ | ||
4285 | mutex_lock(&cgroup_mutex); | ||
4286 | again: | ||
4287 | root = subsys->root; | ||
4288 | if (root == &rootnode) { | ||
4289 | mutex_unlock(&cgroup_mutex); | ||
4290 | return 0; | ||
4291 | } | ||
4292 | |||
4293 | /* Pin the hierarchy */ | ||
4294 | if (!atomic_inc_not_zero(&root->sb->s_active)) { | ||
4295 | /* We race with the final deactivate_super() */ | ||
4296 | mutex_unlock(&cgroup_mutex); | ||
4297 | return 0; | ||
4298 | } | ||
4299 | |||
4300 | /* Keep the cgroup alive */ | ||
4301 | task_lock(tsk); | ||
4302 | parent = task_cgroup(tsk, subsys->subsys_id); | ||
4303 | cg = tsk->cgroups; | ||
4304 | get_css_set(cg); | ||
4305 | task_unlock(tsk); | ||
4306 | |||
4307 | mutex_unlock(&cgroup_mutex); | ||
4308 | |||
4309 | /* Now do the VFS work to create a cgroup */ | ||
4310 | inode = parent->dentry->d_inode; | ||
4311 | |||
4312 | /* Hold the parent directory mutex across this operation to | ||
4313 | * stop anyone else deleting the new cgroup */ | ||
4314 | mutex_lock(&inode->i_mutex); | ||
4315 | dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); | ||
4316 | if (IS_ERR(dentry)) { | ||
4317 | printk(KERN_INFO | ||
4318 | "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename, | ||
4319 | PTR_ERR(dentry)); | ||
4320 | ret = PTR_ERR(dentry); | ||
4321 | goto out_release; | ||
4322 | } | ||
4323 | |||
4324 | /* Create the cgroup directory, which also creates the cgroup */ | ||
4325 | ret = vfs_mkdir(inode, dentry, 0755); | ||
4326 | child = __d_cgrp(dentry); | ||
4327 | dput(dentry); | ||
4328 | if (ret) { | ||
4329 | printk(KERN_INFO | ||
4330 | "Failed to create cgroup %s: %d\n", nodename, | ||
4331 | ret); | ||
4332 | goto out_release; | ||
4333 | } | ||
4334 | |||
4335 | /* The cgroup now exists. Retake cgroup_mutex and check | ||
4336 | * that we're still in the same state that we thought we | ||
4337 | * were. */ | ||
4338 | mutex_lock(&cgroup_mutex); | ||
4339 | if ((root != subsys->root) || | ||
4340 | (parent != task_cgroup(tsk, subsys->subsys_id))) { | ||
4341 | /* Aargh, we raced ... */ | ||
4342 | mutex_unlock(&inode->i_mutex); | ||
4343 | put_css_set(cg); | ||
4344 | |||
4345 | deactivate_super(root->sb); | ||
4346 | /* The cgroup is still accessible in the VFS, but | ||
4347 | * we're not going to try to rmdir() it at this | ||
4348 | * point. */ | ||
4349 | printk(KERN_INFO | ||
4350 | "Race in cgroup_clone() - leaking cgroup %s\n", | ||
4351 | nodename); | ||
4352 | goto again; | ||
4353 | } | ||
4354 | |||
4355 | /* do any required auto-setup */ | ||
4356 | for_each_subsys(root, ss) { | ||
4357 | if (ss->post_clone) | ||
4358 | ss->post_clone(ss, child); | ||
4359 | } | ||
4360 | |||
4361 | /* All seems fine. Finish by moving the task into the new cgroup */ | ||
4362 | ret = cgroup_attach_task(child, tsk); | ||
4363 | mutex_unlock(&cgroup_mutex); | ||
4364 | |||
4365 | out_release: | ||
4366 | mutex_unlock(&inode->i_mutex); | ||
4367 | |||
4368 | mutex_lock(&cgroup_mutex); | ||
4369 | put_css_set(cg); | ||
4370 | mutex_unlock(&cgroup_mutex); | ||
4371 | deactivate_super(root->sb); | ||
4372 | return ret; | ||
4373 | } | ||
4374 | |||
4375 | /** | ||
4376 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp | 4633 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp |
4377 | * @cgrp: the cgroup in question | 4634 | * @cgrp: the cgroup in question |
4378 | * @task: the task in question | 4635 | * @task: the task in question |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e7bebb7c6c38..e691818d7e45 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
160 | */ | 160 | */ |
161 | static int freezer_can_attach(struct cgroup_subsys *ss, | 161 | static int freezer_can_attach(struct cgroup_subsys *ss, |
162 | struct cgroup *new_cgroup, | 162 | struct cgroup *new_cgroup, |
163 | struct task_struct *task, bool threadgroup) | 163 | struct task_struct *task) |
164 | { | 164 | { |
165 | struct freezer *freezer; | 165 | struct freezer *freezer; |
166 | 166 | ||
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
172 | if (freezer->state != CGROUP_THAWED) | 172 | if (freezer->state != CGROUP_THAWED) |
173 | return -EBUSY; | 173 | return -EBUSY; |
174 | 174 | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||
179 | { | ||
175 | rcu_read_lock(); | 180 | rcu_read_lock(); |
176 | if (__cgroup_freezing_or_frozen(task)) { | 181 | if (__cgroup_freezing_or_frozen(tsk)) { |
177 | rcu_read_unlock(); | 182 | rcu_read_unlock(); |
178 | return -EBUSY; | 183 | return -EBUSY; |
179 | } | 184 | } |
180 | rcu_read_unlock(); | 185 | rcu_read_unlock(); |
181 | |||
182 | if (threadgroup) { | ||
183 | struct task_struct *c; | ||
184 | |||
185 | rcu_read_lock(); | ||
186 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
187 | if (__cgroup_freezing_or_frozen(c)) { | ||
188 | rcu_read_unlock(); | ||
189 | return -EBUSY; | ||
190 | } | ||
191 | } | ||
192 | rcu_read_unlock(); | ||
193 | } | ||
194 | |||
195 | return 0; | 186 | return 0; |
196 | } | 187 | } |
197 | 188 | ||
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = { | |||
390 | .populate = freezer_populate, | 381 | .populate = freezer_populate, |
391 | .subsys_id = freezer_subsys_id, | 382 | .subsys_id = freezer_subsys_id, |
392 | .can_attach = freezer_can_attach, | 383 | .can_attach = freezer_can_attach, |
384 | .can_attach_task = freezer_can_attach_task, | ||
385 | .pre_attach = NULL, | ||
386 | .attach_task = NULL, | ||
393 | .attach = NULL, | 387 | .attach = NULL, |
394 | .fork = freezer_fork, | 388 | .fork = freezer_fork, |
395 | .exit = NULL, | 389 | .exit = NULL, |
diff --git a/kernel/compat.c b/kernel/compat.c index 9214dcd087b7..fc9eb093acd5 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | |||
293 | return compat_jiffies_to_clock_t(jiffies); | 293 | return compat_jiffies_to_clock_t(jiffies); |
294 | } | 294 | } |
295 | 295 | ||
296 | #ifdef __ARCH_WANT_SYS_SIGPENDING | ||
297 | |||
296 | /* | 298 | /* |
297 | * Assumption: old_sigset_t and compat_old_sigset_t are both | 299 | * Assumption: old_sigset_t and compat_old_sigset_t are both |
298 | * types that can be passed to put_user()/get_user(). | 300 | * types that can be passed to put_user()/get_user(). |
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) | |||
312 | return ret; | 314 | return ret; |
313 | } | 315 | } |
314 | 316 | ||
317 | #endif | ||
318 | |||
319 | #ifdef __ARCH_WANT_SYS_SIGPROCMASK | ||
320 | |||
315 | asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, | 321 | asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, |
316 | compat_old_sigset_t __user *oset) | 322 | compat_old_sigset_t __user *oset) |
317 | { | 323 | { |
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, | |||
333 | return ret; | 339 | return ret; |
334 | } | 340 | } |
335 | 341 | ||
342 | #endif | ||
343 | |||
336 | asmlinkage long compat_sys_setrlimit(unsigned int resource, | 344 | asmlinkage long compat_sys_setrlimit(unsigned int resource, |
337 | struct compat_rlimit __user *rlim) | 345 | struct compat_rlimit __user *rlim) |
338 | { | 346 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2bb8c2e98fff..9c9b7545c810 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1367 | return val; | 1367 | return val; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | /* Protected by cgroup_lock */ | ||
1371 | static cpumask_var_t cpus_attach; | ||
1372 | |||
1373 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1370 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1374 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1371 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1375 | struct task_struct *tsk, bool threadgroup) | 1372 | struct task_struct *tsk) |
1376 | { | 1373 | { |
1377 | int ret; | ||
1378 | struct cpuset *cs = cgroup_cs(cont); | 1374 | struct cpuset *cs = cgroup_cs(cont); |
1379 | 1375 | ||
1380 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1376 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1391 | if (tsk->flags & PF_THREAD_BOUND) | 1387 | if (tsk->flags & PF_THREAD_BOUND) |
1392 | return -EINVAL; | 1388 | return -EINVAL; |
1393 | 1389 | ||
1394 | ret = security_task_setscheduler(tsk); | ||
1395 | if (ret) | ||
1396 | return ret; | ||
1397 | if (threadgroup) { | ||
1398 | struct task_struct *c; | ||
1399 | |||
1400 | rcu_read_lock(); | ||
1401 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1402 | ret = security_task_setscheduler(c); | ||
1403 | if (ret) { | ||
1404 | rcu_read_unlock(); | ||
1405 | return ret; | ||
1406 | } | ||
1407 | } | ||
1408 | rcu_read_unlock(); | ||
1409 | } | ||
1410 | return 0; | 1390 | return 0; |
1411 | } | 1391 | } |
1412 | 1392 | ||
1413 | static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | 1393 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) |
1414 | struct cpuset *cs) | 1394 | { |
1395 | return security_task_setscheduler(task); | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Protected by cgroup_lock. The nodemasks must be stored globally because | ||
1400 | * dynamically allocating them is not allowed in pre_attach, and they must | ||
1401 | * persist among pre_attach, attach_task, and attach. | ||
1402 | */ | ||
1403 | static cpumask_var_t cpus_attach; | ||
1404 | static nodemask_t cpuset_attach_nodemask_from; | ||
1405 | static nodemask_t cpuset_attach_nodemask_to; | ||
1406 | |||
1407 | /* Set-up work for before attaching each task. */ | ||
1408 | static void cpuset_pre_attach(struct cgroup *cont) | ||
1409 | { | ||
1410 | struct cpuset *cs = cgroup_cs(cont); | ||
1411 | |||
1412 | if (cs == &top_cpuset) | ||
1413 | cpumask_copy(cpus_attach, cpu_possible_mask); | ||
1414 | else | ||
1415 | guarantee_online_cpus(cs, cpus_attach); | ||
1416 | |||
1417 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | ||
1418 | } | ||
1419 | |||
1420 | /* Per-thread attachment work. */ | ||
1421 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
1415 | { | 1422 | { |
1416 | int err; | 1423 | int err; |
1424 | struct cpuset *cs = cgroup_cs(cont); | ||
1425 | |||
1417 | /* | 1426 | /* |
1418 | * can_attach beforehand should guarantee that this doesn't fail. | 1427 | * can_attach beforehand should guarantee that this doesn't fail. |
1419 | * TODO: have a better way to handle failure here | 1428 | * TODO: have a better way to handle failure here |
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1421 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1430 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1422 | WARN_ON_ONCE(err); | 1431 | WARN_ON_ONCE(err); |
1423 | 1432 | ||
1424 | cpuset_change_task_nodemask(tsk, to); | 1433 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); |
1425 | cpuset_update_task_spread_flag(cs, tsk); | 1434 | cpuset_update_task_spread_flag(cs, tsk); |
1426 | |||
1427 | } | 1435 | } |
1428 | 1436 | ||
1429 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1437 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1430 | struct cgroup *oldcont, struct task_struct *tsk, | 1438 | struct cgroup *oldcont, struct task_struct *tsk) |
1431 | bool threadgroup) | ||
1432 | { | 1439 | { |
1433 | struct mm_struct *mm; | 1440 | struct mm_struct *mm; |
1434 | struct cpuset *cs = cgroup_cs(cont); | 1441 | struct cpuset *cs = cgroup_cs(cont); |
1435 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1442 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1436 | static nodemask_t to; /* protected by cgroup_mutex */ | ||
1437 | 1443 | ||
1438 | if (cs == &top_cpuset) { | 1444 | /* |
1439 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1445 | * Change mm, possibly for multiple threads in a threadgroup. This is |
1440 | } else { | 1446 | * expensive and may sleep. |
1441 | guarantee_online_cpus(cs, cpus_attach); | 1447 | */ |
1442 | } | 1448 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
1443 | guarantee_online_mems(cs, &to); | 1449 | cpuset_attach_nodemask_to = cs->mems_allowed; |
1444 | |||
1445 | /* do per-task migration stuff possibly for each in the threadgroup */ | ||
1446 | cpuset_attach_task(tsk, &to, cs); | ||
1447 | if (threadgroup) { | ||
1448 | struct task_struct *c; | ||
1449 | rcu_read_lock(); | ||
1450 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1451 | cpuset_attach_task(c, &to, cs); | ||
1452 | } | ||
1453 | rcu_read_unlock(); | ||
1454 | } | ||
1455 | |||
1456 | /* change mm; only needs to be done once even if threadgroup */ | ||
1457 | to = cs->mems_allowed; | ||
1458 | mm = get_task_mm(tsk); | 1450 | mm = get_task_mm(tsk); |
1459 | if (mm) { | 1451 | if (mm) { |
1460 | mpol_rebind_mm(mm, &to); | 1452 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1461 | if (is_memory_migrate(cs)) | 1453 | if (is_memory_migrate(cs)) |
1462 | cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); | 1454 | cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from, |
1455 | &cpuset_attach_nodemask_to); | ||
1463 | mmput(mm); | 1456 | mmput(mm); |
1464 | } | 1457 | } |
1465 | } | 1458 | } |
@@ -1809,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1809 | } | 1802 | } |
1810 | 1803 | ||
1811 | /* | 1804 | /* |
1812 | * post_clone() is called at the end of cgroup_clone(). | 1805 | * post_clone() is called during cgroup_create() when the |
1813 | * 'cgroup' was just created automatically as a result of | 1806 | * clone_children mount argument was specified. The cgroup |
1814 | * a cgroup_clone(), and the current task is about to | 1807 | * can not yet have any tasks. |
1815 | * be moved into 'cgroup'. | ||
1816 | * | 1808 | * |
1817 | * Currently we refuse to set up the cgroup - thereby | 1809 | * Currently we refuse to set up the cgroup - thereby |
1818 | * refusing the task to be entered, and as a result refusing | 1810 | * refusing the task to be entered, and as a result refusing |
@@ -1911,6 +1903,9 @@ struct cgroup_subsys cpuset_subsys = { | |||
1911 | .create = cpuset_create, | 1903 | .create = cpuset_create, |
1912 | .destroy = cpuset_destroy, | 1904 | .destroy = cpuset_destroy, |
1913 | .can_attach = cpuset_can_attach, | 1905 | .can_attach = cpuset_can_attach, |
1906 | .can_attach_task = cpuset_can_attach_task, | ||
1907 | .pre_attach = cpuset_pre_attach, | ||
1908 | .attach_task = cpuset_attach_task, | ||
1914 | .attach = cpuset_attach, | 1909 | .attach = cpuset_attach, |
1915 | .populate = cpuset_populate, | 1910 | .populate = cpuset_populate, |
1916 | .post_clone = cpuset_post_clone, | 1911 | .post_clone = cpuset_post_clone, |
@@ -2195,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |||
2195 | rcu_read_lock(); | 2190 | rcu_read_lock(); |
2196 | cs = task_cs(tsk); | 2191 | cs = task_cs(tsk); |
2197 | if (cs) | 2192 | if (cs) |
2198 | cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); | 2193 | do_set_cpus_allowed(tsk, cs->cpus_allowed); |
2199 | rcu_read_unlock(); | 2194 | rcu_read_unlock(); |
2200 | 2195 | ||
2201 | /* | 2196 | /* |
@@ -2222,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |||
2222 | * Like above we can temporary set any mask and rely on | 2217 | * Like above we can temporary set any mask and rely on |
2223 | * set_cpus_allowed_ptr() as synchronization point. | 2218 | * set_cpus_allowed_ptr() as synchronization point. |
2224 | */ | 2219 | */ |
2225 | cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); | 2220 | do_set_cpus_allowed(tsk, cpu_possible_mask); |
2226 | cpu = cpumask_any(cpu_active_mask); | 2221 | cpu = cpumask_any(cpu_active_mask); |
2227 | } | 2222 | } |
2228 | 2223 | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 8093c16b84b1..174fa84eca30 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Task credentials management - see Documentation/credentials.txt | 1 | /* Task credentials management - see Documentation/security/credentials.txt |
2 | * | 2 | * |
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
@@ -49,10 +49,10 @@ struct cred init_cred = { | |||
49 | .magic = CRED_MAGIC, | 49 | .magic = CRED_MAGIC, |
50 | #endif | 50 | #endif |
51 | .securebits = SECUREBITS_DEFAULT, | 51 | .securebits = SECUREBITS_DEFAULT, |
52 | .cap_inheritable = CAP_INIT_INH_SET, | 52 | .cap_inheritable = CAP_EMPTY_SET, |
53 | .cap_permitted = CAP_FULL_SET, | 53 | .cap_permitted = CAP_FULL_SET, |
54 | .cap_effective = CAP_INIT_EFF_SET, | 54 | .cap_effective = CAP_FULL_SET, |
55 | .cap_bset = CAP_INIT_BSET, | 55 | .cap_bset = CAP_FULL_SET, |
56 | .user = INIT_USER, | 56 | .user = INIT_USER, |
57 | .user_ns = &init_user_ns, | 57 | .user_ns = &init_user_ns, |
58 | .group_info = &init_groups, | 58 | .group_info = &init_groups, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index c09767f7db3e..d863b3c057bb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
5028 | else | 5028 | else |
5029 | perf_event_output(event, nmi, data, regs); | 5029 | perf_event_output(event, nmi, data, regs); |
5030 | 5030 | ||
5031 | if (event->fasync && event->pending_kill) { | ||
5032 | if (nmi) { | ||
5033 | event->pending_wakeup = 1; | ||
5034 | irq_work_queue(&event->pending); | ||
5035 | } else | ||
5036 | perf_event_wakeup(event); | ||
5037 | } | ||
5038 | |||
5031 | return ret; | 5039 | return ret; |
5032 | } | 5040 | } |
5033 | 5041 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 2b44d82b8237..0276c30401a0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -59,7 +59,6 @@ | |||
59 | #include <linux/taskstats_kern.h> | 59 | #include <linux/taskstats_kern.h> |
60 | #include <linux/random.h> | 60 | #include <linux/random.h> |
61 | #include <linux/tty.h> | 61 | #include <linux/tty.h> |
62 | #include <linux/proc_fs.h> | ||
63 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
64 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
65 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
@@ -383,15 +382,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
383 | get_file(file); | 382 | get_file(file); |
384 | if (tmp->vm_flags & VM_DENYWRITE) | 383 | if (tmp->vm_flags & VM_DENYWRITE) |
385 | atomic_dec(&inode->i_writecount); | 384 | atomic_dec(&inode->i_writecount); |
386 | spin_lock(&mapping->i_mmap_lock); | 385 | mutex_lock(&mapping->i_mmap_mutex); |
387 | if (tmp->vm_flags & VM_SHARED) | 386 | if (tmp->vm_flags & VM_SHARED) |
388 | mapping->i_mmap_writable++; | 387 | mapping->i_mmap_writable++; |
389 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | ||
390 | flush_dcache_mmap_lock(mapping); | 388 | flush_dcache_mmap_lock(mapping); |
391 | /* insert tmp into the share list, just after mpnt */ | 389 | /* insert tmp into the share list, just after mpnt */ |
392 | vma_prio_tree_add(tmp, mpnt); | 390 | vma_prio_tree_add(tmp, mpnt); |
393 | flush_dcache_mmap_unlock(mapping); | 391 | flush_dcache_mmap_unlock(mapping); |
394 | spin_unlock(&mapping->i_mmap_lock); | 392 | mutex_unlock(&mapping->i_mmap_mutex); |
395 | } | 393 | } |
396 | 394 | ||
397 | /* | 395 | /* |
@@ -522,11 +520,12 @@ struct mm_struct * mm_alloc(void) | |||
522 | struct mm_struct * mm; | 520 | struct mm_struct * mm; |
523 | 521 | ||
524 | mm = allocate_mm(); | 522 | mm = allocate_mm(); |
525 | if (mm) { | 523 | if (!mm) |
526 | memset(mm, 0, sizeof(*mm)); | 524 | return NULL; |
527 | mm = mm_init(mm, current); | 525 | |
528 | } | 526 | memset(mm, 0, sizeof(*mm)); |
529 | return mm; | 527 | mm_init_cpumask(mm); |
528 | return mm_init(mm, current); | ||
530 | } | 529 | } |
531 | 530 | ||
532 | /* | 531 | /* |
@@ -573,6 +572,57 @@ void mmput(struct mm_struct *mm) | |||
573 | } | 572 | } |
574 | EXPORT_SYMBOL_GPL(mmput); | 573 | EXPORT_SYMBOL_GPL(mmput); |
575 | 574 | ||
575 | /* | ||
576 | * We added or removed a vma mapping the executable. The vmas are only mapped | ||
577 | * during exec and are not mapped with the mmap system call. | ||
578 | * Callers must hold down_write() on the mm's mmap_sem for these | ||
579 | */ | ||
580 | void added_exe_file_vma(struct mm_struct *mm) | ||
581 | { | ||
582 | mm->num_exe_file_vmas++; | ||
583 | } | ||
584 | |||
585 | void removed_exe_file_vma(struct mm_struct *mm) | ||
586 | { | ||
587 | mm->num_exe_file_vmas--; | ||
588 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ | ||
589 | fput(mm->exe_file); | ||
590 | mm->exe_file = NULL; | ||
591 | } | ||
592 | |||
593 | } | ||
594 | |||
595 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||
596 | { | ||
597 | if (new_exe_file) | ||
598 | get_file(new_exe_file); | ||
599 | if (mm->exe_file) | ||
600 | fput(mm->exe_file); | ||
601 | mm->exe_file = new_exe_file; | ||
602 | mm->num_exe_file_vmas = 0; | ||
603 | } | ||
604 | |||
605 | struct file *get_mm_exe_file(struct mm_struct *mm) | ||
606 | { | ||
607 | struct file *exe_file; | ||
608 | |||
609 | /* We need mmap_sem to protect against races with removal of | ||
610 | * VM_EXECUTABLE vmas */ | ||
611 | down_read(&mm->mmap_sem); | ||
612 | exe_file = mm->exe_file; | ||
613 | if (exe_file) | ||
614 | get_file(exe_file); | ||
615 | up_read(&mm->mmap_sem); | ||
616 | return exe_file; | ||
617 | } | ||
618 | |||
619 | static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) | ||
620 | { | ||
621 | /* It's safe to write the exe_file pointer without exe_file_lock because | ||
622 | * this is called during fork when the task is not yet in /proc */ | ||
623 | newmm->exe_file = get_mm_exe_file(oldmm); | ||
624 | } | ||
625 | |||
576 | /** | 626 | /** |
577 | * get_task_mm - acquire a reference to the task's mm | 627 | * get_task_mm - acquire a reference to the task's mm |
578 | * | 628 | * |
@@ -679,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
679 | goto fail_nomem; | 729 | goto fail_nomem; |
680 | 730 | ||
681 | memcpy(mm, oldmm, sizeof(*mm)); | 731 | memcpy(mm, oldmm, sizeof(*mm)); |
732 | mm_init_cpumask(mm); | ||
682 | 733 | ||
683 | /* Initializing for Swap token stuff */ | 734 | /* Initializing for Swap token stuff */ |
684 | mm->token_priority = 0; | 735 | mm->token_priority = 0; |
@@ -927,6 +978,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
927 | tty_audit_fork(sig); | 978 | tty_audit_fork(sig); |
928 | sched_autogroup_fork(sig); | 979 | sched_autogroup_fork(sig); |
929 | 980 | ||
981 | #ifdef CONFIG_CGROUPS | ||
982 | init_rwsem(&sig->threadgroup_fork_lock); | ||
983 | #endif | ||
984 | |||
930 | sig->oom_adj = current->signal->oom_adj; | 985 | sig->oom_adj = current->signal->oom_adj; |
931 | sig->oom_score_adj = current->signal->oom_score_adj; | 986 | sig->oom_score_adj = current->signal->oom_score_adj; |
932 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 987 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
@@ -1108,6 +1163,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1108 | monotonic_to_bootbased(&p->real_start_time); | 1163 | monotonic_to_bootbased(&p->real_start_time); |
1109 | p->io_context = NULL; | 1164 | p->io_context = NULL; |
1110 | p->audit_context = NULL; | 1165 | p->audit_context = NULL; |
1166 | if (clone_flags & CLONE_THREAD) | ||
1167 | threadgroup_fork_read_lock(current); | ||
1111 | cgroup_fork(p); | 1168 | cgroup_fork(p); |
1112 | #ifdef CONFIG_NUMA | 1169 | #ifdef CONFIG_NUMA |
1113 | p->mempolicy = mpol_dup(p->mempolicy); | 1170 | p->mempolicy = mpol_dup(p->mempolicy); |
@@ -1193,12 +1250,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1193 | if (clone_flags & CLONE_THREAD) | 1250 | if (clone_flags & CLONE_THREAD) |
1194 | p->tgid = current->tgid; | 1251 | p->tgid = current->tgid; |
1195 | 1252 | ||
1196 | if (current->nsproxy != p->nsproxy) { | ||
1197 | retval = ns_cgroup_clone(p, pid); | ||
1198 | if (retval) | ||
1199 | goto bad_fork_free_pid; | ||
1200 | } | ||
1201 | |||
1202 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1253 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
1203 | /* | 1254 | /* |
1204 | * Clear TID on mm_release()? | 1255 | * Clear TID on mm_release()? |
@@ -1312,6 +1363,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1312 | write_unlock_irq(&tasklist_lock); | 1363 | write_unlock_irq(&tasklist_lock); |
1313 | proc_fork_connector(p); | 1364 | proc_fork_connector(p); |
1314 | cgroup_post_fork(p); | 1365 | cgroup_post_fork(p); |
1366 | if (clone_flags & CLONE_THREAD) | ||
1367 | threadgroup_fork_read_unlock(current); | ||
1315 | perf_event_fork(p); | 1368 | perf_event_fork(p); |
1316 | return p; | 1369 | return p; |
1317 | 1370 | ||
@@ -1350,6 +1403,8 @@ bad_fork_cleanup_policy: | |||
1350 | mpol_put(p->mempolicy); | 1403 | mpol_put(p->mempolicy); |
1351 | bad_fork_cleanup_cgroup: | 1404 | bad_fork_cleanup_cgroup: |
1352 | #endif | 1405 | #endif |
1406 | if (clone_flags & CLONE_THREAD) | ||
1407 | threadgroup_fork_read_unlock(current); | ||
1353 | cgroup_exit(p, cgroup_callbacks_done); | 1408 | cgroup_exit(p, cgroup_callbacks_done); |
1354 | delayacct_tsk_free(p); | 1409 | delayacct_tsk_free(p); |
1355 | module_put(task_thread_info(p)->exec_domain->module); | 1410 | module_put(task_thread_info(p)->exec_domain->module); |
@@ -1507,6 +1562,13 @@ void __init proc_caches_init(void) | |||
1507 | fs_cachep = kmem_cache_create("fs_cache", | 1562 | fs_cachep = kmem_cache_create("fs_cache", |
1508 | sizeof(struct fs_struct), 0, | 1563 | sizeof(struct fs_struct), 0, |
1509 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1564 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1565 | /* | ||
1566 | * FIXME! The "sizeof(struct mm_struct)" currently includes the | ||
1567 | * whole struct cpumask for the OFFSTACK case. We could change | ||
1568 | * this to *only* allocate as much of it as required by the | ||
1569 | * maximum number of CPU's we can ever have. The cpumask_allocation | ||
1570 | * is at the end of the structure, exactly for that reason. | ||
1571 | */ | ||
1510 | mm_cachep = kmem_cache_create("mm_struct", | 1572 | mm_cachep = kmem_cache_create("mm_struct", |
1511 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1573 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1512 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1574 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c541ee527ecb..a9205e32a059 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -748,7 +748,7 @@ static inline void retrigger_next_event(void *arg) { } | |||
748 | */ | 748 | */ |
749 | void clock_was_set(void) | 749 | void clock_was_set(void) |
750 | { | 750 | { |
751 | #ifdef CONFIG_HIGHRES_TIMERS | 751 | #ifdef CONFIG_HIGH_RES_TIMERS |
752 | /* Retrigger the CPU local events everywhere */ | 752 | /* Retrigger the CPU local events everywhere */ |
753 | on_each_cpu(retrigger_next_event, NULL, 1); | 753 | on_each_cpu(retrigger_next_event, NULL, 1); |
754 | #endif | 754 | #endif |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 834899f2500f..4bd4faa6323a 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | 21 | ||
22 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 22 | static int show_irq_affinity(int type, struct seq_file *m, void *v) |
23 | { | 23 | { |
24 | struct irq_desc *desc = irq_to_desc((long)m->private); | 24 | struct irq_desc *desc = irq_to_desc((long)m->private); |
25 | const struct cpumask *mask = desc->irq_data.affinity; | 25 | const struct cpumask *mask = desc->irq_data.affinity; |
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) | |||
28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) | 28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) |
29 | mask = desc->pending_mask; | 29 | mask = desc->pending_mask; |
30 | #endif | 30 | #endif |
31 | seq_cpumask(m, mask); | 31 | if (type) |
32 | seq_cpumask_list(m, mask); | ||
33 | else | ||
34 | seq_cpumask(m, mask); | ||
32 | seq_putc(m, '\n'); | 35 | seq_putc(m, '\n'); |
33 | return 0; | 36 | return 0; |
34 | } | 37 | } |
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) | |||
59 | #endif | 62 | #endif |
60 | 63 | ||
61 | int no_irq_affinity; | 64 | int no_irq_affinity; |
62 | static ssize_t irq_affinity_proc_write(struct file *file, | 65 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
66 | { | ||
67 | return show_irq_affinity(0, m, v); | ||
68 | } | ||
69 | |||
70 | static int irq_affinity_list_proc_show(struct seq_file *m, void *v) | ||
71 | { | ||
72 | return show_irq_affinity(1, m, v); | ||
73 | } | ||
74 | |||
75 | |||
76 | static ssize_t write_irq_affinity(int type, struct file *file, | ||
63 | const char __user *buffer, size_t count, loff_t *pos) | 77 | const char __user *buffer, size_t count, loff_t *pos) |
64 | { | 78 | { |
65 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; | 79 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; |
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
72 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) | 86 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) |
73 | return -ENOMEM; | 87 | return -ENOMEM; |
74 | 88 | ||
75 | err = cpumask_parse_user(buffer, count, new_value); | 89 | if (type) |
90 | err = cpumask_parselist_user(buffer, count, new_value); | ||
91 | else | ||
92 | err = cpumask_parse_user(buffer, count, new_value); | ||
76 | if (err) | 93 | if (err) |
77 | goto free_cpumask; | 94 | goto free_cpumask; |
78 | 95 | ||
@@ -100,11 +117,28 @@ free_cpumask: | |||
100 | return err; | 117 | return err; |
101 | } | 118 | } |
102 | 119 | ||
120 | static ssize_t irq_affinity_proc_write(struct file *file, | ||
121 | const char __user *buffer, size_t count, loff_t *pos) | ||
122 | { | ||
123 | return write_irq_affinity(0, file, buffer, count, pos); | ||
124 | } | ||
125 | |||
126 | static ssize_t irq_affinity_list_proc_write(struct file *file, | ||
127 | const char __user *buffer, size_t count, loff_t *pos) | ||
128 | { | ||
129 | return write_irq_affinity(1, file, buffer, count, pos); | ||
130 | } | ||
131 | |||
103 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) | 132 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) |
104 | { | 133 | { |
105 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); | 134 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); |
106 | } | 135 | } |
107 | 136 | ||
137 | static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) | ||
138 | { | ||
139 | return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data); | ||
140 | } | ||
141 | |||
108 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) | 142 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) |
109 | { | 143 | { |
110 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); | 144 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); |
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = { | |||
125 | .release = single_release, | 159 | .release = single_release, |
126 | }; | 160 | }; |
127 | 161 | ||
162 | static const struct file_operations irq_affinity_list_proc_fops = { | ||
163 | .open = irq_affinity_list_proc_open, | ||
164 | .read = seq_read, | ||
165 | .llseek = seq_lseek, | ||
166 | .release = single_release, | ||
167 | .write = irq_affinity_list_proc_write, | ||
168 | }; | ||
169 | |||
128 | static int default_affinity_show(struct seq_file *m, void *v) | 170 | static int default_affinity_show(struct seq_file *m, void *v) |
129 | { | 171 | { |
130 | seq_cpumask(m, irq_default_affinity); | 172 | seq_cpumask(m, irq_default_affinity); |
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
289 | proc_create_data("affinity_hint", 0400, desc->dir, | 331 | proc_create_data("affinity_hint", 0400, desc->dir, |
290 | &irq_affinity_hint_proc_fops, (void *)(long)irq); | 332 | &irq_affinity_hint_proc_fops, (void *)(long)irq); |
291 | 333 | ||
334 | /* create /proc/irq/<irq>/smp_affinity_list */ | ||
335 | proc_create_data("smp_affinity_list", 0600, desc->dir, | ||
336 | &irq_affinity_list_proc_fops, (void *)(long)irq); | ||
337 | |||
292 | proc_create_data("node", 0444, desc->dir, | 338 | proc_create_data("node", 0444, desc->dir, |
293 | &irq_node_proc_fops, (void *)(long)irq); | 339 | &irq_node_proc_fops, (void *)(long)irq); |
294 | #endif | 340 | #endif |
@@ -306,6 +352,7 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
306 | #ifdef CONFIG_SMP | 352 | #ifdef CONFIG_SMP |
307 | remove_proc_entry("smp_affinity", desc->dir); | 353 | remove_proc_entry("smp_affinity", desc->dir); |
308 | remove_proc_entry("affinity_hint", desc->dir); | 354 | remove_proc_entry("affinity_hint", desc->dir); |
355 | remove_proc_entry("smp_affinity_list", desc->dir); | ||
309 | remove_proc_entry("node", desc->dir); | 356 | remove_proc_entry("node", desc->dir); |
310 | #endif | 357 | #endif |
311 | remove_proc_entry("spurious", desc->dir); | 358 | remove_proc_entry("spurious", desc->dir); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 74d1c099fbd1..fa27e750dbc0 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, | |||
105 | } | 105 | } |
106 | 106 | ||
107 | static void __jump_label_update(struct jump_label_key *key, | 107 | static void __jump_label_update(struct jump_label_key *key, |
108 | struct jump_entry *entry, int enable) | 108 | struct jump_entry *entry, |
109 | struct jump_entry *stop, int enable) | ||
109 | { | 110 | { |
110 | for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { | 111 | for (; (entry < stop) && |
112 | (entry->key == (jump_label_t)(unsigned long)key); | ||
113 | entry++) { | ||
111 | /* | 114 | /* |
112 | * entry->code set to 0 invalidates module init text sections | 115 | * entry->code set to 0 invalidates module init text sections |
113 | * kernel_text_address() verifies we are not in core kernel | 116 | * kernel_text_address() verifies we are not in core kernel |
@@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable) | |||
181 | struct jump_label_mod *mod = key->next; | 184 | struct jump_label_mod *mod = key->next; |
182 | 185 | ||
183 | while (mod) { | 186 | while (mod) { |
184 | __jump_label_update(key, mod->entries, enable); | 187 | struct module *m = mod->mod; |
188 | |||
189 | __jump_label_update(key, mod->entries, | ||
190 | m->jump_entries + m->num_jump_entries, | ||
191 | enable); | ||
185 | mod = mod->next; | 192 | mod = mod->next; |
186 | } | 193 | } |
187 | } | 194 | } |
@@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod) | |||
245 | key->next = jlm; | 252 | key->next = jlm; |
246 | 253 | ||
247 | if (jump_label_enabled(key)) | 254 | if (jump_label_enabled(key)) |
248 | __jump_label_update(key, iter, JUMP_LABEL_ENABLE); | 255 | __jump_label_update(key, iter, iter_stop, |
256 | JUMP_LABEL_ENABLE); | ||
249 | } | 257 | } |
250 | 258 | ||
251 | return 0; | 259 | return 0; |
@@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable) | |||
371 | 379 | ||
372 | /* if there are no users, entry can be NULL */ | 380 | /* if there are no users, entry can be NULL */ |
373 | if (entry) | 381 | if (entry) |
374 | __jump_label_update(key, entry, enable); | 382 | __jump_label_update(key, entry, __stop___jump_table, enable); |
375 | 383 | ||
376 | #ifdef CONFIG_MODULES | 384 | #ifdef CONFIG_MODULES |
377 | __jump_label_mod_update(key, enable); | 385 | __jump_label_mod_update(key, enable); |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 5ae0ff38425f..ad6a81c58b44 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/completion.h> | 27 | #include <linux/completion.h> |
28 | #include <linux/cred.h> | ||
28 | #include <linux/file.h> | 29 | #include <linux/file.h> |
29 | #include <linux/fdtable.h> | 30 | #include <linux/fdtable.h> |
30 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
@@ -43,6 +44,13 @@ extern int max_threads; | |||
43 | 44 | ||
44 | static struct workqueue_struct *khelper_wq; | 45 | static struct workqueue_struct *khelper_wq; |
45 | 46 | ||
47 | #define CAP_BSET (void *)1 | ||
48 | #define CAP_PI (void *)2 | ||
49 | |||
50 | static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; | ||
51 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; | ||
52 | static DEFINE_SPINLOCK(umh_sysctl_lock); | ||
53 | |||
46 | #ifdef CONFIG_MODULES | 54 | #ifdef CONFIG_MODULES |
47 | 55 | ||
48 | /* | 56 | /* |
@@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module); | |||
132 | static int ____call_usermodehelper(void *data) | 140 | static int ____call_usermodehelper(void *data) |
133 | { | 141 | { |
134 | struct subprocess_info *sub_info = data; | 142 | struct subprocess_info *sub_info = data; |
143 | struct cred *new; | ||
135 | int retval; | 144 | int retval; |
136 | 145 | ||
137 | spin_lock_irq(¤t->sighand->siglock); | 146 | spin_lock_irq(¤t->sighand->siglock); |
@@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data) | |||
153 | goto fail; | 162 | goto fail; |
154 | } | 163 | } |
155 | 164 | ||
165 | retval = -ENOMEM; | ||
166 | new = prepare_kernel_cred(current); | ||
167 | if (!new) | ||
168 | goto fail; | ||
169 | |||
170 | spin_lock(&umh_sysctl_lock); | ||
171 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); | ||
172 | new->cap_inheritable = cap_intersect(usermodehelper_inheritable, | ||
173 | new->cap_inheritable); | ||
174 | spin_unlock(&umh_sysctl_lock); | ||
175 | |||
176 | commit_creds(new); | ||
177 | |||
156 | retval = kernel_execve(sub_info->path, | 178 | retval = kernel_execve(sub_info->path, |
157 | (const char *const *)sub_info->argv, | 179 | (const char *const *)sub_info->argv, |
158 | (const char *const *)sub_info->envp); | 180 | (const char *const *)sub_info->envp); |
@@ -420,6 +442,84 @@ unlock: | |||
420 | } | 442 | } |
421 | EXPORT_SYMBOL(call_usermodehelper_exec); | 443 | EXPORT_SYMBOL(call_usermodehelper_exec); |
422 | 444 | ||
445 | static int proc_cap_handler(struct ctl_table *table, int write, | ||
446 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
447 | { | ||
448 | struct ctl_table t; | ||
449 | unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; | ||
450 | kernel_cap_t new_cap; | ||
451 | int err, i; | ||
452 | |||
453 | if (write && (!capable(CAP_SETPCAP) || | ||
454 | !capable(CAP_SYS_MODULE))) | ||
455 | return -EPERM; | ||
456 | |||
457 | /* | ||
458 | * convert from the global kernel_cap_t to the ulong array to print to | ||
459 | * userspace if this is a read. | ||
460 | */ | ||
461 | spin_lock(&umh_sysctl_lock); | ||
462 | for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { | ||
463 | if (table->data == CAP_BSET) | ||
464 | cap_array[i] = usermodehelper_bset.cap[i]; | ||
465 | else if (table->data == CAP_PI) | ||
466 | cap_array[i] = usermodehelper_inheritable.cap[i]; | ||
467 | else | ||
468 | BUG(); | ||
469 | } | ||
470 | spin_unlock(&umh_sysctl_lock); | ||
471 | |||
472 | t = *table; | ||
473 | t.data = &cap_array; | ||
474 | |||
475 | /* | ||
476 | * actually read or write and array of ulongs from userspace. Remember | ||
477 | * these are least significant 32 bits first | ||
478 | */ | ||
479 | err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); | ||
480 | if (err < 0) | ||
481 | return err; | ||
482 | |||
483 | /* | ||
484 | * convert from the sysctl array of ulongs to the kernel_cap_t | ||
485 | * internal representation | ||
486 | */ | ||
487 | for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) | ||
488 | new_cap.cap[i] = cap_array[i]; | ||
489 | |||
490 | /* | ||
491 | * Drop everything not in the new_cap (but don't add things) | ||
492 | */ | ||
493 | spin_lock(&umh_sysctl_lock); | ||
494 | if (write) { | ||
495 | if (table->data == CAP_BSET) | ||
496 | usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); | ||
497 | if (table->data == CAP_PI) | ||
498 | usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); | ||
499 | } | ||
500 | spin_unlock(&umh_sysctl_lock); | ||
501 | |||
502 | return 0; | ||
503 | } | ||
504 | |||
505 | struct ctl_table usermodehelper_table[] = { | ||
506 | { | ||
507 | .procname = "bset", | ||
508 | .data = CAP_BSET, | ||
509 | .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), | ||
510 | .mode = 0600, | ||
511 | .proc_handler = proc_cap_handler, | ||
512 | }, | ||
513 | { | ||
514 | .procname = "inheritable", | ||
515 | .data = CAP_PI, | ||
516 | .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), | ||
517 | .mode = 0600, | ||
518 | .proc_handler = proc_cap_handler, | ||
519 | }, | ||
520 | { } | ||
521 | }; | ||
522 | |||
423 | void __init usermodehelper_init(void) | 523 | void __init usermodehelper_init(void) |
424 | { | 524 | { |
425 | khelper_wq = create_singlethread_workqueue("khelper"); | 525 | khelper_wq = create_singlethread_workqueue("khelper"); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 3b34d2732bce..4ba7cccb4994 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) | |||
202 | return; | 202 | return; |
203 | } | 203 | } |
204 | 204 | ||
205 | p->cpus_allowed = cpumask_of_cpu(cpu); | 205 | /* It's safe because the task is inactive. */ |
206 | p->rt.nr_cpus_allowed = 1; | 206 | do_set_cpus_allowed(p, cpumask_of(cpu)); |
207 | p->flags |= PF_THREAD_BOUND; | 207 | p->flags |= PF_THREAD_BOUND; |
208 | } | 208 | } |
209 | EXPORT_SYMBOL(kthread_bind); | 209 | EXPORT_SYMBOL(kthread_bind); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 2c938e2337cd..d607ed5dd441 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock); | |||
131 | */ | 131 | */ |
132 | static inline int __sched | 132 | static inline int __sched |
133 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | 133 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, |
134 | unsigned long ip) | 134 | struct lockdep_map *nest_lock, unsigned long ip) |
135 | { | 135 | { |
136 | struct task_struct *task = current; | 136 | struct task_struct *task = current; |
137 | struct mutex_waiter waiter; | 137 | struct mutex_waiter waiter; |
138 | unsigned long flags; | 138 | unsigned long flags; |
139 | 139 | ||
140 | preempt_disable(); | 140 | preempt_disable(); |
141 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | 141 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); |
142 | 142 | ||
143 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 143 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
144 | /* | 144 | /* |
@@ -269,16 +269,25 @@ void __sched | |||
269 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) | 269 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) |
270 | { | 270 | { |
271 | might_sleep(); | 271 | might_sleep(); |
272 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); | 272 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); |
273 | } | 273 | } |
274 | 274 | ||
275 | EXPORT_SYMBOL_GPL(mutex_lock_nested); | 275 | EXPORT_SYMBOL_GPL(mutex_lock_nested); |
276 | 276 | ||
277 | void __sched | ||
278 | _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | ||
279 | { | ||
280 | might_sleep(); | ||
281 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); | ||
282 | } | ||
283 | |||
284 | EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); | ||
285 | |||
277 | int __sched | 286 | int __sched |
278 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) | 287 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) |
279 | { | 288 | { |
280 | might_sleep(); | 289 | might_sleep(); |
281 | return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_); | 290 | return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); |
282 | } | 291 | } |
283 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); | 292 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); |
284 | 293 | ||
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | |||
287 | { | 296 | { |
288 | might_sleep(); | 297 | might_sleep(); |
289 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, | 298 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, |
290 | subclass, _RET_IP_); | 299 | subclass, NULL, _RET_IP_); |
291 | } | 300 | } |
292 | 301 | ||
293 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | 302 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); |
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count) | |||
393 | { | 402 | { |
394 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 403 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
395 | 404 | ||
396 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); | 405 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); |
397 | } | 406 | } |
398 | 407 | ||
399 | static noinline int __sched | 408 | static noinline int __sched |
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count) | |||
401 | { | 410 | { |
402 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 411 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
403 | 412 | ||
404 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); | 413 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); |
405 | } | 414 | } |
406 | 415 | ||
407 | static noinline int __sched | 416 | static noinline int __sched |
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count) | |||
409 | { | 418 | { |
410 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 419 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
411 | 420 | ||
412 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); | 421 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); |
413 | } | 422 | } |
414 | #endif | 423 | #endif |
415 | 424 | ||
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c deleted file mode 100644 index 2c98ad94ba0e..000000000000 --- a/kernel/ns_cgroup.c +++ /dev/null | |||
@@ -1,118 +0,0 @@ | |||
1 | /* | ||
2 | * ns_cgroup.c - namespace cgroup subsystem | ||
3 | * | ||
4 | * Copyright 2006, 2007 IBM Corp | ||
5 | */ | ||
6 | |||
7 | #include <linux/module.h> | ||
8 | #include <linux/cgroup.h> | ||
9 | #include <linux/fs.h> | ||
10 | #include <linux/proc_fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/nsproxy.h> | ||
13 | |||
14 | struct ns_cgroup { | ||
15 | struct cgroup_subsys_state css; | ||
16 | }; | ||
17 | |||
18 | struct cgroup_subsys ns_subsys; | ||
19 | |||
20 | static inline struct ns_cgroup *cgroup_to_ns( | ||
21 | struct cgroup *cgroup) | ||
22 | { | ||
23 | return container_of(cgroup_subsys_state(cgroup, ns_subsys_id), | ||
24 | struct ns_cgroup, css); | ||
25 | } | ||
26 | |||
27 | int ns_cgroup_clone(struct task_struct *task, struct pid *pid) | ||
28 | { | ||
29 | char name[PROC_NUMBUF]; | ||
30 | |||
31 | snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid)); | ||
32 | return cgroup_clone(task, &ns_subsys, name); | ||
33 | } | ||
34 | |||
35 | /* | ||
36 | * Rules: | ||
37 | * 1. you can only enter a cgroup which is a descendant of your current | ||
38 | * cgroup | ||
39 | * 2. you can only place another process into a cgroup if | ||
40 | * a. you have CAP_SYS_ADMIN | ||
41 | * b. your cgroup is an ancestor of task's destination cgroup | ||
42 | * (hence either you are in the same cgroup as task, or in an | ||
43 | * ancestor cgroup thereof) | ||
44 | */ | ||
45 | static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, | ||
46 | struct task_struct *task, bool threadgroup) | ||
47 | { | ||
48 | if (current != task) { | ||
49 | if (!capable(CAP_SYS_ADMIN)) | ||
50 | return -EPERM; | ||
51 | |||
52 | if (!cgroup_is_descendant(new_cgroup, current)) | ||
53 | return -EPERM; | ||
54 | } | ||
55 | |||
56 | if (!cgroup_is_descendant(new_cgroup, task)) | ||
57 | return -EPERM; | ||
58 | |||
59 | if (threadgroup) { | ||
60 | struct task_struct *c; | ||
61 | rcu_read_lock(); | ||
62 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
63 | if (!cgroup_is_descendant(new_cgroup, c)) { | ||
64 | rcu_read_unlock(); | ||
65 | return -EPERM; | ||
66 | } | ||
67 | } | ||
68 | rcu_read_unlock(); | ||
69 | } | ||
70 | |||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Rules: you can only create a cgroup if | ||
76 | * 1. you are capable(CAP_SYS_ADMIN) | ||
77 | * 2. the target cgroup is a descendant of your own cgroup | ||
78 | */ | ||
79 | static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | ||
80 | struct cgroup *cgroup) | ||
81 | { | ||
82 | struct ns_cgroup *ns_cgroup; | ||
83 | |||
84 | if (!capable(CAP_SYS_ADMIN)) | ||
85 | return ERR_PTR(-EPERM); | ||
86 | if (!cgroup_is_descendant(cgroup, current)) | ||
87 | return ERR_PTR(-EPERM); | ||
88 | if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { | ||
89 | printk("ns_cgroup can't be created with parent " | ||
90 | "'clone_children' set.\n"); | ||
91 | return ERR_PTR(-EINVAL); | ||
92 | } | ||
93 | |||
94 | printk_once("ns_cgroup deprecated: consider using the " | ||
95 | "'clone_children' flag without the ns_cgroup.\n"); | ||
96 | |||
97 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | ||
98 | if (!ns_cgroup) | ||
99 | return ERR_PTR(-ENOMEM); | ||
100 | return &ns_cgroup->css; | ||
101 | } | ||
102 | |||
103 | static void ns_destroy(struct cgroup_subsys *ss, | ||
104 | struct cgroup *cgroup) | ||
105 | { | ||
106 | struct ns_cgroup *ns_cgroup; | ||
107 | |||
108 | ns_cgroup = cgroup_to_ns(cgroup); | ||
109 | kfree(ns_cgroup); | ||
110 | } | ||
111 | |||
112 | struct cgroup_subsys ns_subsys = { | ||
113 | .name = "ns", | ||
114 | .can_attach = ns_can_attach, | ||
115 | .create = ns_create, | ||
116 | .destroy = ns_destroy, | ||
117 | .subsys_id = ns_subsys_id, | ||
118 | }; | ||
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index a05d191ffdd9..d6a00f3de15d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -22,6 +22,9 @@ | |||
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <net/net_namespace.h> | 23 | #include <net/net_namespace.h> |
24 | #include <linux/ipc_namespace.h> | 24 | #include <linux/ipc_namespace.h> |
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/file.h> | ||
27 | #include <linux/syscalls.h> | ||
25 | 28 | ||
26 | static struct kmem_cache *nsproxy_cachep; | 29 | static struct kmem_cache *nsproxy_cachep; |
27 | 30 | ||
@@ -198,10 +201,6 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
198 | goto out; | 201 | goto out; |
199 | } | 202 | } |
200 | 203 | ||
201 | err = ns_cgroup_clone(current, task_pid(current)); | ||
202 | if (err) | ||
203 | put_nsproxy(*new_nsp); | ||
204 | |||
205 | out: | 204 | out: |
206 | return err; | 205 | return err; |
207 | } | 206 | } |
@@ -233,6 +232,45 @@ void exit_task_namespaces(struct task_struct *p) | |||
233 | switch_task_namespaces(p, NULL); | 232 | switch_task_namespaces(p, NULL); |
234 | } | 233 | } |
235 | 234 | ||
235 | SYSCALL_DEFINE2(setns, int, fd, int, nstype) | ||
236 | { | ||
237 | const struct proc_ns_operations *ops; | ||
238 | struct task_struct *tsk = current; | ||
239 | struct nsproxy *new_nsproxy; | ||
240 | struct proc_inode *ei; | ||
241 | struct file *file; | ||
242 | int err; | ||
243 | |||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | |||
247 | file = proc_ns_fget(fd); | ||
248 | if (IS_ERR(file)) | ||
249 | return PTR_ERR(file); | ||
250 | |||
251 | err = -EINVAL; | ||
252 | ei = PROC_I(file->f_dentry->d_inode); | ||
253 | ops = ei->ns_ops; | ||
254 | if (nstype && (ops->type != nstype)) | ||
255 | goto out; | ||
256 | |||
257 | new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); | ||
258 | if (IS_ERR(new_nsproxy)) { | ||
259 | err = PTR_ERR(new_nsproxy); | ||
260 | goto out; | ||
261 | } | ||
262 | |||
263 | err = ops->install(new_nsproxy, ei->ns); | ||
264 | if (err) { | ||
265 | free_nsproxy(new_nsproxy); | ||
266 | goto out; | ||
267 | } | ||
268 | switch_task_namespaces(tsk, new_nsproxy); | ||
269 | out: | ||
270 | fput(file); | ||
271 | return err; | ||
272 | } | ||
273 | |||
236 | static int __init nsproxy_cache_init(void) | 274 | static int __init nsproxy_cache_init(void) |
237 | { | 275 | { |
238 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); | 276 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index beb184689af9..6824ca7d4d0c 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/string.h> | 40 | #include <linux/string.h> |
41 | #include <linux/platform_device.h> | 41 | #include <linux/platform_device.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/kernel.h> | ||
43 | 44 | ||
44 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
45 | 46 | ||
@@ -53,11 +54,17 @@ enum pm_qos_type { | |||
53 | PM_QOS_MIN /* return the smallest value */ | 54 | PM_QOS_MIN /* return the smallest value */ |
54 | }; | 55 | }; |
55 | 56 | ||
57 | /* | ||
58 | * Note: The lockless read path depends on the CPU accessing | ||
59 | * target_value atomically. Atomic access is only guaranteed on all CPU | ||
60 | * types linux supports for 32 bit quantites | ||
61 | */ | ||
56 | struct pm_qos_object { | 62 | struct pm_qos_object { |
57 | struct plist_head requests; | 63 | struct plist_head requests; |
58 | struct blocking_notifier_head *notifiers; | 64 | struct blocking_notifier_head *notifiers; |
59 | struct miscdevice pm_qos_power_miscdev; | 65 | struct miscdevice pm_qos_power_miscdev; |
60 | char *name; | 66 | char *name; |
67 | s32 target_value; /* Do not change to 64 bit */ | ||
61 | s32 default_value; | 68 | s32 default_value; |
62 | enum pm_qos_type type; | 69 | enum pm_qos_type type; |
63 | }; | 70 | }; |
@@ -70,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = { | |||
70 | .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), | 77 | .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), |
71 | .notifiers = &cpu_dma_lat_notifier, | 78 | .notifiers = &cpu_dma_lat_notifier, |
72 | .name = "cpu_dma_latency", | 79 | .name = "cpu_dma_latency", |
73 | .default_value = 2000 * USEC_PER_SEC, | 80 | .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, |
81 | .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, | ||
74 | .type = PM_QOS_MIN, | 82 | .type = PM_QOS_MIN, |
75 | }; | 83 | }; |
76 | 84 | ||
@@ -79,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = { | |||
79 | .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), | 87 | .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), |
80 | .notifiers = &network_lat_notifier, | 88 | .notifiers = &network_lat_notifier, |
81 | .name = "network_latency", | 89 | .name = "network_latency", |
82 | .default_value = 2000 * USEC_PER_SEC, | 90 | .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, |
91 | .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, | ||
83 | .type = PM_QOS_MIN | 92 | .type = PM_QOS_MIN |
84 | }; | 93 | }; |
85 | 94 | ||
@@ -89,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = { | |||
89 | .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), | 98 | .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), |
90 | .notifiers = &network_throughput_notifier, | 99 | .notifiers = &network_throughput_notifier, |
91 | .name = "network_throughput", | 100 | .name = "network_throughput", |
92 | .default_value = 0, | 101 | .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, |
102 | .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, | ||
93 | .type = PM_QOS_MAX, | 103 | .type = PM_QOS_MAX, |
94 | }; | 104 | }; |
95 | 105 | ||
@@ -135,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) | |||
135 | } | 145 | } |
136 | } | 146 | } |
137 | 147 | ||
148 | static inline s32 pm_qos_read_value(struct pm_qos_object *o) | ||
149 | { | ||
150 | return o->target_value; | ||
151 | } | ||
152 | |||
153 | static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) | ||
154 | { | ||
155 | o->target_value = value; | ||
156 | } | ||
157 | |||
138 | static void update_target(struct pm_qos_object *o, struct plist_node *node, | 158 | static void update_target(struct pm_qos_object *o, struct plist_node *node, |
139 | int del, int value) | 159 | int del, int value) |
140 | { | 160 | { |
@@ -159,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, | |||
159 | plist_add(node, &o->requests); | 179 | plist_add(node, &o->requests); |
160 | } | 180 | } |
161 | curr_value = pm_qos_get_value(o); | 181 | curr_value = pm_qos_get_value(o); |
182 | pm_qos_set_value(o, curr_value); | ||
162 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 183 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
163 | 184 | ||
164 | if (prev_value != curr_value) | 185 | if (prev_value != curr_value) |
@@ -193,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor) | |||
193 | * pm_qos_request - returns current system wide qos expectation | 214 | * pm_qos_request - returns current system wide qos expectation |
194 | * @pm_qos_class: identification of which qos value is requested | 215 | * @pm_qos_class: identification of which qos value is requested |
195 | * | 216 | * |
196 | * This function returns the current target value in an atomic manner. | 217 | * This function returns the current target value. |
197 | */ | 218 | */ |
198 | int pm_qos_request(int pm_qos_class) | 219 | int pm_qos_request(int pm_qos_class) |
199 | { | 220 | { |
200 | unsigned long flags; | 221 | return pm_qos_read_value(pm_qos_array[pm_qos_class]); |
201 | int value; | ||
202 | |||
203 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
204 | value = pm_qos_get_value(pm_qos_array[pm_qos_class]); | ||
205 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
206 | |||
207 | return value; | ||
208 | } | 222 | } |
209 | EXPORT_SYMBOL_GPL(pm_qos_request); | 223 | EXPORT_SYMBOL_GPL(pm_qos_request); |
210 | 224 | ||
@@ -404,24 +418,36 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
404 | size_t count, loff_t *f_pos) | 418 | size_t count, loff_t *f_pos) |
405 | { | 419 | { |
406 | s32 value; | 420 | s32 value; |
407 | int x; | ||
408 | char ascii_value[11]; | ||
409 | struct pm_qos_request_list *pm_qos_req; | 421 | struct pm_qos_request_list *pm_qos_req; |
410 | 422 | ||
411 | if (count == sizeof(s32)) { | 423 | if (count == sizeof(s32)) { |
412 | if (copy_from_user(&value, buf, sizeof(s32))) | 424 | if (copy_from_user(&value, buf, sizeof(s32))) |
413 | return -EFAULT; | 425 | return -EFAULT; |
414 | } else if (count == 11) { /* len('0x12345678/0') */ | 426 | } else if (count <= 11) { /* ASCII perhaps? */ |
415 | if (copy_from_user(ascii_value, buf, 11)) | 427 | char ascii_value[11]; |
428 | unsigned long int ulval; | ||
429 | int ret; | ||
430 | |||
431 | if (copy_from_user(ascii_value, buf, count)) | ||
416 | return -EFAULT; | 432 | return -EFAULT; |
417 | if (strlen(ascii_value) != 10) | 433 | |
418 | return -EINVAL; | 434 | if (count > 10) { |
419 | x = sscanf(ascii_value, "%x", &value); | 435 | if (ascii_value[10] == '\n') |
420 | if (x != 1) | 436 | ascii_value[10] = '\0'; |
437 | else | ||
438 | return -EINVAL; | ||
439 | } else { | ||
440 | ascii_value[count] = '\0'; | ||
441 | } | ||
442 | ret = strict_strtoul(ascii_value, 16, &ulval); | ||
443 | if (ret) { | ||
444 | pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret); | ||
421 | return -EINVAL; | 445 | return -EINVAL; |
422 | pr_debug("%s, %d, 0x%x\n", ascii_value, x, value); | 446 | } |
423 | } else | 447 | value = (s32)lower_32_bits(ulval); |
448 | } else { | ||
424 | return -EINVAL; | 449 | return -EINVAL; |
450 | } | ||
425 | 451 | ||
426 | pm_qos_req = filp->private_data; | 452 | pm_qos_req = filp->private_data; |
427 | pm_qos_update_request(pm_qos_req, value); | 453 | pm_qos_update_request(pm_qos_req, value); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a1b5edf1bf92..4556182527f3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void) | |||
491 | return tmr; | 491 | return tmr; |
492 | } | 492 | } |
493 | 493 | ||
494 | static void k_itimer_rcu_free(struct rcu_head *head) | ||
495 | { | ||
496 | struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); | ||
497 | |||
498 | kmem_cache_free(posix_timers_cache, tmr); | ||
499 | } | ||
500 | |||
494 | #define IT_ID_SET 1 | 501 | #define IT_ID_SET 1 |
495 | #define IT_ID_NOT_SET 0 | 502 | #define IT_ID_NOT_SET 0 |
496 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | 503 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) |
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | |||
503 | } | 510 | } |
504 | put_pid(tmr->it_pid); | 511 | put_pid(tmr->it_pid); |
505 | sigqueue_free(tmr->sigq); | 512 | sigqueue_free(tmr->sigq); |
506 | kmem_cache_free(posix_timers_cache, tmr); | 513 | call_rcu(&tmr->it.rcu, k_itimer_rcu_free); |
507 | } | 514 | } |
508 | 515 | ||
509 | static struct k_clock *clockid_to_kclock(const clockid_t id) | 516 | static struct k_clock *clockid_to_kclock(const clockid_t id) |
@@ -631,22 +638,18 @@ out: | |||
631 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) | 638 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) |
632 | { | 639 | { |
633 | struct k_itimer *timr; | 640 | struct k_itimer *timr; |
634 | /* | 641 | |
635 | * Watch out here. We do a irqsave on the idr_lock and pass the | 642 | rcu_read_lock(); |
636 | * flags part over to the timer lock. Must not let interrupts in | ||
637 | * while we are moving the lock. | ||
638 | */ | ||
639 | spin_lock_irqsave(&idr_lock, *flags); | ||
640 | timr = idr_find(&posix_timers_id, (int)timer_id); | 643 | timr = idr_find(&posix_timers_id, (int)timer_id); |
641 | if (timr) { | 644 | if (timr) { |
642 | spin_lock(&timr->it_lock); | 645 | spin_lock_irqsave(&timr->it_lock, *flags); |
643 | if (timr->it_signal == current->signal) { | 646 | if (timr->it_signal == current->signal) { |
644 | spin_unlock(&idr_lock); | 647 | rcu_read_unlock(); |
645 | return timr; | 648 | return timr; |
646 | } | 649 | } |
647 | spin_unlock(&timr->it_lock); | 650 | spin_unlock_irqrestore(&timr->it_lock, *flags); |
648 | } | 651 | } |
649 | spin_unlock_irqrestore(&idr_lock, *flags); | 652 | rcu_read_unlock(); |
650 | 653 | ||
651 | return NULL; | 654 | return NULL; |
652 | } | 655 | } |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f9bec56d8825..8f7b1db1ece1 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/gfp.h> | 25 | #include <linux/gfp.h> |
26 | #include <linux/syscore_ops.h> | 26 | #include <linux/syscore_ops.h> |
27 | #include <scsi/scsi_scan.h> | 27 | #include <scsi/scsi_scan.h> |
28 | #include <asm/suspend.h> | ||
29 | 28 | ||
30 | #include "power.h" | 29 | #include "power.h" |
31 | 30 | ||
@@ -55,10 +54,9 @@ static int hibernation_mode = HIBERNATION_SHUTDOWN; | |||
55 | static const struct platform_hibernation_ops *hibernation_ops; | 54 | static const struct platform_hibernation_ops *hibernation_ops; |
56 | 55 | ||
57 | /** | 56 | /** |
58 | * hibernation_set_ops - set the global hibernate operations | 57 | * hibernation_set_ops - Set the global hibernate operations. |
59 | * @ops: the hibernation operations to use in subsequent hibernation transitions | 58 | * @ops: Hibernation operations to use in subsequent hibernation transitions. |
60 | */ | 59 | */ |
61 | |||
62 | void hibernation_set_ops(const struct platform_hibernation_ops *ops) | 60 | void hibernation_set_ops(const struct platform_hibernation_ops *ops) |
63 | { | 61 | { |
64 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot | 62 | if (ops && !(ops->begin && ops->end && ops->pre_snapshot |
@@ -115,10 +113,9 @@ static int hibernation_test(int level) { return 0; } | |||
115 | #endif /* !CONFIG_PM_DEBUG */ | 113 | #endif /* !CONFIG_PM_DEBUG */ |
116 | 114 | ||
117 | /** | 115 | /** |
118 | * platform_begin - tell the platform driver that we're starting | 116 | * platform_begin - Call platform to start hibernation. |
119 | * hibernation | 117 | * @platform_mode: Whether or not to use the platform driver. |
120 | */ | 118 | */ |
121 | |||
122 | static int platform_begin(int platform_mode) | 119 | static int platform_begin(int platform_mode) |
123 | { | 120 | { |
124 | return (platform_mode && hibernation_ops) ? | 121 | return (platform_mode && hibernation_ops) ? |
@@ -126,10 +123,9 @@ static int platform_begin(int platform_mode) | |||
126 | } | 123 | } |
127 | 124 | ||
128 | /** | 125 | /** |
129 | * platform_end - tell the platform driver that we've entered the | 126 | * platform_end - Call platform to finish transition to the working state. |
130 | * working state | 127 | * @platform_mode: Whether or not to use the platform driver. |
131 | */ | 128 | */ |
132 | |||
133 | static void platform_end(int platform_mode) | 129 | static void platform_end(int platform_mode) |
134 | { | 130 | { |
135 | if (platform_mode && hibernation_ops) | 131 | if (platform_mode && hibernation_ops) |
@@ -137,8 +133,11 @@ static void platform_end(int platform_mode) | |||
137 | } | 133 | } |
138 | 134 | ||
139 | /** | 135 | /** |
140 | * platform_pre_snapshot - prepare the machine for hibernation using the | 136 | * platform_pre_snapshot - Call platform to prepare the machine for hibernation. |
141 | * platform driver if so configured and return an error code if it fails | 137 | * @platform_mode: Whether or not to use the platform driver. |
138 | * | ||
139 | * Use the platform driver to prepare the system for creating a hibernate image, | ||
140 | * if so configured, and return an error code if that fails. | ||
142 | */ | 141 | */ |
143 | 142 | ||
144 | static int platform_pre_snapshot(int platform_mode) | 143 | static int platform_pre_snapshot(int platform_mode) |
@@ -148,10 +147,14 @@ static int platform_pre_snapshot(int platform_mode) | |||
148 | } | 147 | } |
149 | 148 | ||
150 | /** | 149 | /** |
151 | * platform_leave - prepare the machine for switching to the normal mode | 150 | * platform_leave - Call platform to prepare a transition to the working state. |
152 | * of operation using the platform driver (called with interrupts disabled) | 151 | * @platform_mode: Whether or not to use the platform driver. |
152 | * | ||
153 | * Use the platform driver prepare to prepare the machine for switching to the | ||
154 | * normal mode of operation. | ||
155 | * | ||
156 | * This routine is called on one CPU with interrupts disabled. | ||
153 | */ | 157 | */ |
154 | |||
155 | static void platform_leave(int platform_mode) | 158 | static void platform_leave(int platform_mode) |
156 | { | 159 | { |
157 | if (platform_mode && hibernation_ops) | 160 | if (platform_mode && hibernation_ops) |
@@ -159,10 +162,14 @@ static void platform_leave(int platform_mode) | |||
159 | } | 162 | } |
160 | 163 | ||
161 | /** | 164 | /** |
162 | * platform_finish - switch the machine to the normal mode of operation | 165 | * platform_finish - Call platform to switch the system to the working state. |
163 | * using the platform driver (must be called after platform_prepare()) | 166 | * @platform_mode: Whether or not to use the platform driver. |
167 | * | ||
168 | * Use the platform driver to switch the machine to the normal mode of | ||
169 | * operation. | ||
170 | * | ||
171 | * This routine must be called after platform_prepare(). | ||
164 | */ | 172 | */ |
165 | |||
166 | static void platform_finish(int platform_mode) | 173 | static void platform_finish(int platform_mode) |
167 | { | 174 | { |
168 | if (platform_mode && hibernation_ops) | 175 | if (platform_mode && hibernation_ops) |
@@ -170,11 +177,15 @@ static void platform_finish(int platform_mode) | |||
170 | } | 177 | } |
171 | 178 | ||
172 | /** | 179 | /** |
173 | * platform_pre_restore - prepare the platform for the restoration from a | 180 | * platform_pre_restore - Prepare for hibernate image restoration. |
174 | * hibernation image. If the restore fails after this function has been | 181 | * @platform_mode: Whether or not to use the platform driver. |
175 | * called, platform_restore_cleanup() must be called. | 182 | * |
183 | * Use the platform driver to prepare the system for resume from a hibernation | ||
184 | * image. | ||
185 | * | ||
186 | * If the restore fails after this function has been called, | ||
187 | * platform_restore_cleanup() must be called. | ||
176 | */ | 188 | */ |
177 | |||
178 | static int platform_pre_restore(int platform_mode) | 189 | static int platform_pre_restore(int platform_mode) |
179 | { | 190 | { |
180 | return (platform_mode && hibernation_ops) ? | 191 | return (platform_mode && hibernation_ops) ? |
@@ -182,12 +193,16 @@ static int platform_pre_restore(int platform_mode) | |||
182 | } | 193 | } |
183 | 194 | ||
184 | /** | 195 | /** |
185 | * platform_restore_cleanup - switch the platform to the normal mode of | 196 | * platform_restore_cleanup - Switch to the working state after failing restore. |
186 | * operation after a failing restore. If platform_pre_restore() has been | 197 | * @platform_mode: Whether or not to use the platform driver. |
187 | * called before the failing restore, this function must be called too, | 198 | * |
188 | * regardless of the result of platform_pre_restore(). | 199 | * Use the platform driver to switch the system to the normal mode of operation |
200 | * after a failing restore. | ||
201 | * | ||
202 | * If platform_pre_restore() has been called before the failing restore, this | ||
203 | * function must be called too, regardless of the result of | ||
204 | * platform_pre_restore(). | ||
189 | */ | 205 | */ |
190 | |||
191 | static void platform_restore_cleanup(int platform_mode) | 206 | static void platform_restore_cleanup(int platform_mode) |
192 | { | 207 | { |
193 | if (platform_mode && hibernation_ops) | 208 | if (platform_mode && hibernation_ops) |
@@ -195,10 +210,9 @@ static void platform_restore_cleanup(int platform_mode) | |||
195 | } | 210 | } |
196 | 211 | ||
197 | /** | 212 | /** |
198 | * platform_recover - recover the platform from a failure to suspend | 213 | * platform_recover - Recover from a failure to suspend devices. |
199 | * devices. | 214 | * @platform_mode: Whether or not to use the platform driver. |
200 | */ | 215 | */ |
201 | |||
202 | static void platform_recover(int platform_mode) | 216 | static void platform_recover(int platform_mode) |
203 | { | 217 | { |
204 | if (platform_mode && hibernation_ops && hibernation_ops->recover) | 218 | if (platform_mode && hibernation_ops && hibernation_ops->recover) |
@@ -206,13 +220,12 @@ static void platform_recover(int platform_mode) | |||
206 | } | 220 | } |
207 | 221 | ||
208 | /** | 222 | /** |
209 | * swsusp_show_speed - print the time elapsed between two events. | 223 | * swsusp_show_speed - Print time elapsed between two events during hibernation. |
210 | * @start: Starting event. | 224 | * @start: Starting event. |
211 | * @stop: Final event. | 225 | * @stop: Final event. |
212 | * @nr_pages - number of pages processed between @start and @stop | 226 | * @nr_pages: Number of memory pages processed between @start and @stop. |
213 | * @msg - introductory message to print | 227 | * @msg: Additional diagnostic message to print. |
214 | */ | 228 | */ |
215 | |||
216 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, | 229 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, |
217 | unsigned nr_pages, char *msg) | 230 | unsigned nr_pages, char *msg) |
218 | { | 231 | { |
@@ -235,25 +248,18 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, | |||
235 | } | 248 | } |
236 | 249 | ||
237 | /** | 250 | /** |
238 | * create_image - freeze devices that need to be frozen with interrupts | 251 | * create_image - Create a hibernation image. |
239 | * off, create the hibernation image and thaw those devices. Control | 252 | * @platform_mode: Whether or not to use the platform driver. |
240 | * reappears in this routine after a restore. | 253 | * |
254 | * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image | ||
255 | * and execute the drivers' .thaw_noirq() callbacks. | ||
256 | * | ||
257 | * Control reappears in this routine after the subsequent restore. | ||
241 | */ | 258 | */ |
242 | |||
243 | static int create_image(int platform_mode) | 259 | static int create_image(int platform_mode) |
244 | { | 260 | { |
245 | int error; | 261 | int error; |
246 | 262 | ||
247 | error = arch_prepare_suspend(); | ||
248 | if (error) | ||
249 | return error; | ||
250 | |||
251 | /* At this point, dpm_suspend_start() has been called, but *not* | ||
252 | * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now. | ||
253 | * Otherwise, drivers for some devices (e.g. interrupt controllers) | ||
254 | * become desynchronized with the actual state of the hardware | ||
255 | * at resume time, and evil weirdness ensues. | ||
256 | */ | ||
257 | error = dpm_suspend_noirq(PMSG_FREEZE); | 263 | error = dpm_suspend_noirq(PMSG_FREEZE); |
258 | if (error) { | 264 | if (error) { |
259 | printk(KERN_ERR "PM: Some devices failed to power down, " | 265 | printk(KERN_ERR "PM: Some devices failed to power down, " |
@@ -297,9 +303,6 @@ static int create_image(int platform_mode) | |||
297 | 303 | ||
298 | Power_up: | 304 | Power_up: |
299 | syscore_resume(); | 305 | syscore_resume(); |
300 | /* NOTE: dpm_resume_noirq() is just a resume() for devices | ||
301 | * that suspended with irqs off ... no overall powerup. | ||
302 | */ | ||
303 | 306 | ||
304 | Enable_irqs: | 307 | Enable_irqs: |
305 | local_irq_enable(); | 308 | local_irq_enable(); |
@@ -317,14 +320,11 @@ static int create_image(int platform_mode) | |||
317 | } | 320 | } |
318 | 321 | ||
319 | /** | 322 | /** |
320 | * hibernation_snapshot - quiesce devices and create the hibernation | 323 | * hibernation_snapshot - Quiesce devices and create a hibernation image. |
321 | * snapshot image. | 324 | * @platform_mode: If set, use platform driver to prepare for the transition. |
322 | * @platform_mode - if set, use the platform driver, if available, to | ||
323 | * prepare the platform firmware for the power transition. | ||
324 | * | 325 | * |
325 | * Must be called with pm_mutex held | 326 | * This routine must be called with pm_mutex held. |
326 | */ | 327 | */ |
327 | |||
328 | int hibernation_snapshot(int platform_mode) | 328 | int hibernation_snapshot(int platform_mode) |
329 | { | 329 | { |
330 | pm_message_t msg = PMSG_RECOVER; | 330 | pm_message_t msg = PMSG_RECOVER; |
@@ -384,13 +384,14 @@ int hibernation_snapshot(int platform_mode) | |||
384 | } | 384 | } |
385 | 385 | ||
386 | /** | 386 | /** |
387 | * resume_target_kernel - prepare devices that need to be suspended with | 387 | * resume_target_kernel - Restore system state from a hibernation image. |
388 | * interrupts off, restore the contents of highmem that have not been | 388 | * @platform_mode: Whether or not to use the platform driver. |
389 | * restored yet from the image and run the low level code that will restore | 389 | * |
390 | * the remaining contents of memory and switch to the just restored target | 390 | * Execute device drivers' .freeze_noirq() callbacks, restore the contents of |
391 | * kernel. | 391 | * highmem that have not been restored yet from the image and run the low-level |
392 | * code that will restore the remaining contents of memory and switch to the | ||
393 | * just restored target kernel. | ||
392 | */ | 394 | */ |
393 | |||
394 | static int resume_target_kernel(bool platform_mode) | 395 | static int resume_target_kernel(bool platform_mode) |
395 | { | 396 | { |
396 | int error; | 397 | int error; |
@@ -416,24 +417,26 @@ static int resume_target_kernel(bool platform_mode) | |||
416 | if (error) | 417 | if (error) |
417 | goto Enable_irqs; | 418 | goto Enable_irqs; |
418 | 419 | ||
419 | /* We'll ignore saved state, but this gets preempt count (etc) right */ | ||
420 | save_processor_state(); | 420 | save_processor_state(); |
421 | error = restore_highmem(); | 421 | error = restore_highmem(); |
422 | if (!error) { | 422 | if (!error) { |
423 | error = swsusp_arch_resume(); | 423 | error = swsusp_arch_resume(); |
424 | /* | 424 | /* |
425 | * The code below is only ever reached in case of a failure. | 425 | * The code below is only ever reached in case of a failure. |
426 | * Otherwise execution continues at place where | 426 | * Otherwise, execution continues at the place where |
427 | * swsusp_arch_suspend() was called | 427 | * swsusp_arch_suspend() was called. |
428 | */ | 428 | */ |
429 | BUG_ON(!error); | 429 | BUG_ON(!error); |
430 | /* This call to restore_highmem() undos the previous one */ | 430 | /* |
431 | * This call to restore_highmem() reverts the changes made by | ||
432 | * the previous one. | ||
433 | */ | ||
431 | restore_highmem(); | 434 | restore_highmem(); |
432 | } | 435 | } |
433 | /* | 436 | /* |
434 | * The only reason why swsusp_arch_resume() can fail is memory being | 437 | * The only reason why swsusp_arch_resume() can fail is memory being |
435 | * very tight, so we have to free it as soon as we can to avoid | 438 | * very tight, so we have to free it as soon as we can to avoid |
436 | * subsequent failures | 439 | * subsequent failures. |
437 | */ | 440 | */ |
438 | swsusp_free(); | 441 | swsusp_free(); |
439 | restore_processor_state(); | 442 | restore_processor_state(); |
@@ -456,14 +459,12 @@ static int resume_target_kernel(bool platform_mode) | |||
456 | } | 459 | } |
457 | 460 | ||
458 | /** | 461 | /** |
459 | * hibernation_restore - quiesce devices and restore the hibernation | 462 | * hibernation_restore - Quiesce devices and restore from a hibernation image. |
460 | * snapshot image. If successful, control returns in hibernation_snaphot() | 463 | * @platform_mode: If set, use platform driver to prepare for the transition. |
461 | * @platform_mode - if set, use the platform driver, if available, to | ||
462 | * prepare the platform firmware for the transition. | ||
463 | * | 464 | * |
464 | * Must be called with pm_mutex held | 465 | * This routine must be called with pm_mutex held. If it is successful, control |
466 | * reappears in the restored target kernel in hibernation_snaphot(). | ||
465 | */ | 467 | */ |
466 | |||
467 | int hibernation_restore(int platform_mode) | 468 | int hibernation_restore(int platform_mode) |
468 | { | 469 | { |
469 | int error; | 470 | int error; |
@@ -483,10 +484,8 @@ int hibernation_restore(int platform_mode) | |||
483 | } | 484 | } |
484 | 485 | ||
485 | /** | 486 | /** |
486 | * hibernation_platform_enter - enter the hibernation state using the | 487 | * hibernation_platform_enter - Power off the system using the platform driver. |
487 | * platform driver (if available) | ||
488 | */ | 488 | */ |
489 | |||
490 | int hibernation_platform_enter(void) | 489 | int hibernation_platform_enter(void) |
491 | { | 490 | { |
492 | int error; | 491 | int error; |
@@ -557,12 +556,12 @@ int hibernation_platform_enter(void) | |||
557 | } | 556 | } |
558 | 557 | ||
559 | /** | 558 | /** |
560 | * power_down - Shut the machine down for hibernation. | 559 | * power_down - Shut the machine down for hibernation. |
561 | * | 560 | * |
562 | * Use the platform driver, if configured so; otherwise try | 561 | * Use the platform driver, if configured, to put the system into the sleep |
563 | * to power off or reboot. | 562 | * state corresponding to hibernation, or try to power it off or reboot, |
563 | * depending on the value of hibernation_mode. | ||
564 | */ | 564 | */ |
565 | |||
566 | static void power_down(void) | 565 | static void power_down(void) |
567 | { | 566 | { |
568 | switch (hibernation_mode) { | 567 | switch (hibernation_mode) { |
@@ -599,9 +598,8 @@ static int prepare_processes(void) | |||
599 | } | 598 | } |
600 | 599 | ||
601 | /** | 600 | /** |
602 | * hibernate - The granpappy of the built-in hibernation management | 601 | * hibernate - Carry out system hibernation, including saving the image. |
603 | */ | 602 | */ |
604 | |||
605 | int hibernate(void) | 603 | int hibernate(void) |
606 | { | 604 | { |
607 | int error; | 605 | int error; |
@@ -679,17 +677,20 @@ int hibernate(void) | |||
679 | 677 | ||
680 | 678 | ||
681 | /** | 679 | /** |
682 | * software_resume - Resume from a saved image. | 680 | * software_resume - Resume from a saved hibernation image. |
683 | * | 681 | * |
684 | * Called as a late_initcall (so all devices are discovered and | 682 | * This routine is called as a late initcall, when all devices have been |
685 | * initialized), we call swsusp to see if we have a saved image or not. | 683 | * discovered and initialized already. |
686 | * If so, we quiesce devices, the restore the saved image. We will | ||
687 | * return above (in hibernate() ) if everything goes well. | ||
688 | * Otherwise, we fail gracefully and return to the normally | ||
689 | * scheduled program. | ||
690 | * | 684 | * |
685 | * The image reading code is called to see if there is a hibernation image | ||
686 | * available for reading. If that is the case, devices are quiesced and the | ||
687 | * contents of memory is restored from the saved image. | ||
688 | * | ||
689 | * If this is successful, control reappears in the restored target kernel in | ||
690 | * hibernation_snaphot() which returns to hibernate(). Otherwise, the routine | ||
691 | * attempts to recover gracefully and make the kernel return to the normal mode | ||
692 | * of operation. | ||
691 | */ | 693 | */ |
692 | |||
693 | static int software_resume(void) | 694 | static int software_resume(void) |
694 | { | 695 | { |
695 | int error; | 696 | int error; |
@@ -819,21 +820,17 @@ static const char * const hibernation_modes[] = { | |||
819 | [HIBERNATION_TESTPROC] = "testproc", | 820 | [HIBERNATION_TESTPROC] = "testproc", |
820 | }; | 821 | }; |
821 | 822 | ||
822 | /** | 823 | /* |
823 | * disk - Control hibernation mode | 824 | * /sys/power/disk - Control hibernation mode. |
824 | * | ||
825 | * Suspend-to-disk can be handled in several ways. We have a few options | ||
826 | * for putting the system to sleep - using the platform driver (e.g. ACPI | ||
827 | * or other hibernation_ops), powering off the system or rebooting the | ||
828 | * system (for testing) as well as the two test modes. | ||
829 | * | 825 | * |
830 | * The system can support 'platform', and that is known a priori (and | 826 | * Hibernation can be handled in several ways. There are a few different ways |
831 | * encoded by the presence of hibernation_ops). However, the user may | 827 | * to put the system into the sleep state: using the platform driver (e.g. ACPI |
832 | * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the | 828 | * or other hibernation_ops), powering it off or rebooting it (for testing |
833 | * test modes, 'test' or 'testproc'. | 829 | * mostly), or using one of the two available test modes. |
834 | * | 830 | * |
835 | * show() will display what the mode is currently set to. | 831 | * The sysfs file /sys/power/disk provides an interface for selecting the |
836 | * store() will accept one of | 832 | * hibernation mode to use. Reading from this file causes the available modes |
833 | * to be printed. There are 5 modes that can be supported: | ||
837 | * | 834 | * |
838 | * 'platform' | 835 | * 'platform' |
839 | * 'shutdown' | 836 | * 'shutdown' |
@@ -841,8 +838,14 @@ static const char * const hibernation_modes[] = { | |||
841 | * 'test' | 838 | * 'test' |
842 | * 'testproc' | 839 | * 'testproc' |
843 | * | 840 | * |
844 | * It will only change to 'platform' if the system | 841 | * If a platform hibernation driver is in use, 'platform' will be supported |
845 | * supports it (as determined by having hibernation_ops). | 842 | * and will be used by default. Otherwise, 'shutdown' will be used by default. |
843 | * The selected option (i.e. the one corresponding to the current value of | ||
844 | * hibernation_mode) is enclosed by a square bracket. | ||
845 | * | ||
846 | * To select a given hibernation mode it is necessary to write the mode's | ||
847 | * string representation (as returned by reading from /sys/power/disk) back | ||
848 | * into /sys/power/disk. | ||
846 | */ | 849 | */ |
847 | 850 | ||
848 | static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, | 851 | static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, |
@@ -875,7 +878,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
875 | return buf-start; | 878 | return buf-start; |
876 | } | 879 | } |
877 | 880 | ||
878 | |||
879 | static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, | 881 | static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, |
880 | const char *buf, size_t n) | 882 | const char *buf, size_t n) |
881 | { | 883 | { |
diff --git a/kernel/printk.c b/kernel/printk.c index da8ca817eae3..35185392173f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/memblock.h> | ||
34 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
35 | #include <linux/kexec.h> | 36 | #include <linux/kexec.h> |
36 | #include <linux/kdb.h> | 37 | #include <linux/kdb.h> |
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void) | |||
167 | } | 168 | } |
168 | #endif | 169 | #endif |
169 | 170 | ||
171 | /* requested log_buf_len from kernel cmdline */ | ||
172 | static unsigned long __initdata new_log_buf_len; | ||
173 | |||
174 | /* save requested log_buf_len since it's too early to process it */ | ||
170 | static int __init log_buf_len_setup(char *str) | 175 | static int __init log_buf_len_setup(char *str) |
171 | { | 176 | { |
172 | unsigned size = memparse(str, &str); | 177 | unsigned size = memparse(str, &str); |
173 | unsigned long flags; | ||
174 | 178 | ||
175 | if (size) | 179 | if (size) |
176 | size = roundup_pow_of_two(size); | 180 | size = roundup_pow_of_two(size); |
177 | if (size > log_buf_len) { | 181 | if (size > log_buf_len) |
178 | unsigned start, dest_idx, offset; | 182 | new_log_buf_len = size; |
179 | char *new_log_buf; | ||
180 | 183 | ||
181 | new_log_buf = alloc_bootmem(size); | 184 | return 0; |
182 | if (!new_log_buf) { | 185 | } |
183 | printk(KERN_WARNING "log_buf_len: allocation failed\n"); | 186 | early_param("log_buf_len", log_buf_len_setup); |
184 | goto out; | ||
185 | } | ||
186 | 187 | ||
187 | spin_lock_irqsave(&logbuf_lock, flags); | 188 | void __init setup_log_buf(int early) |
188 | log_buf_len = size; | 189 | { |
189 | log_buf = new_log_buf; | 190 | unsigned long flags; |
190 | 191 | unsigned start, dest_idx, offset; | |
191 | offset = start = min(con_start, log_start); | 192 | char *new_log_buf; |
192 | dest_idx = 0; | 193 | int free; |
193 | while (start != log_end) { | 194 | |
194 | log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; | 195 | if (!new_log_buf_len) |
195 | start++; | 196 | return; |
196 | dest_idx++; | 197 | |
197 | } | 198 | if (early) { |
198 | log_start -= offset; | 199 | unsigned long mem; |
199 | con_start -= offset; | ||
200 | log_end -= offset; | ||
201 | spin_unlock_irqrestore(&logbuf_lock, flags); | ||
202 | 200 | ||
203 | printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); | 201 | mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); |
202 | if (mem == MEMBLOCK_ERROR) | ||
203 | return; | ||
204 | new_log_buf = __va(mem); | ||
205 | } else { | ||
206 | new_log_buf = alloc_bootmem_nopanic(new_log_buf_len); | ||
204 | } | 207 | } |
205 | out: | ||
206 | return 1; | ||
207 | } | ||
208 | 208 | ||
209 | __setup("log_buf_len=", log_buf_len_setup); | 209 | if (unlikely(!new_log_buf)) { |
210 | pr_err("log_buf_len: %ld bytes not available\n", | ||
211 | new_log_buf_len); | ||
212 | return; | ||
213 | } | ||
214 | |||
215 | spin_lock_irqsave(&logbuf_lock, flags); | ||
216 | log_buf_len = new_log_buf_len; | ||
217 | log_buf = new_log_buf; | ||
218 | new_log_buf_len = 0; | ||
219 | free = __LOG_BUF_LEN - log_end; | ||
220 | |||
221 | offset = start = min(con_start, log_start); | ||
222 | dest_idx = 0; | ||
223 | while (start != log_end) { | ||
224 | unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1); | ||
225 | |||
226 | log_buf[dest_idx] = __log_buf[log_idx_mask]; | ||
227 | start++; | ||
228 | dest_idx++; | ||
229 | } | ||
230 | log_start -= offset; | ||
231 | con_start -= offset; | ||
232 | log_end -= offset; | ||
233 | spin_unlock_irqrestore(&logbuf_lock, flags); | ||
234 | |||
235 | pr_info("log_buf_len: %d\n", log_buf_len); | ||
236 | pr_info("early log buf free: %d(%d%%)\n", | ||
237 | free, (free * 100) / __LOG_BUF_LEN); | ||
238 | } | ||
210 | 239 | ||
211 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 240 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
212 | 241 | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 14c9f87b9fc9..961b389fe52f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -303,14 +303,12 @@ static void profile_discard_flip_buffers(void) | |||
303 | mutex_unlock(&profile_flip_mutex); | 303 | mutex_unlock(&profile_flip_mutex); |
304 | } | 304 | } |
305 | 305 | ||
306 | void profile_hits(int type, void *__pc, unsigned int nr_hits) | 306 | static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) |
307 | { | 307 | { |
308 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; | 308 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; |
309 | int i, j, cpu; | 309 | int i, j, cpu; |
310 | struct profile_hit *hits; | 310 | struct profile_hit *hits; |
311 | 311 | ||
312 | if (prof_on != type || !prof_buffer) | ||
313 | return; | ||
314 | pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); | 312 | pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); |
315 | i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; | 313 | i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; |
316 | secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; | 314 | secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; |
@@ -417,16 +415,20 @@ out_free: | |||
417 | #define profile_discard_flip_buffers() do { } while (0) | 415 | #define profile_discard_flip_buffers() do { } while (0) |
418 | #define profile_cpu_callback NULL | 416 | #define profile_cpu_callback NULL |
419 | 417 | ||
420 | void profile_hits(int type, void *__pc, unsigned int nr_hits) | 418 | static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) |
421 | { | 419 | { |
422 | unsigned long pc; | 420 | unsigned long pc; |
423 | |||
424 | if (prof_on != type || !prof_buffer) | ||
425 | return; | ||
426 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; | 421 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; |
427 | atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); | 422 | atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); |
428 | } | 423 | } |
429 | #endif /* !CONFIG_SMP */ | 424 | #endif /* !CONFIG_SMP */ |
425 | |||
426 | void profile_hits(int type, void *__pc, unsigned int nr_hits) | ||
427 | { | ||
428 | if (prof_on != type || !prof_buffer) | ||
429 | return; | ||
430 | do_profile_hits(type, __pc, nr_hits); | ||
431 | } | ||
430 | EXPORT_SYMBOL_GPL(profile_hits); | 432 | EXPORT_SYMBOL_GPL(profile_hits); |
431 | 433 | ||
432 | void profile_tick(int type) | 434 | void profile_tick(int type) |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 7a81fc071344..2df115790cd9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -562,7 +562,7 @@ static int ptrace_resume(struct task_struct *child, long request, | |||
562 | } | 562 | } |
563 | 563 | ||
564 | child->exit_code = data; | 564 | child->exit_code = data; |
565 | wake_up_process(child); | 565 | wake_up_state(child, __TASK_TRACED); |
566 | 566 | ||
567 | return 0; | 567 | return 0; |
568 | } | 568 | } |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f07d2f03181a..77a7671dd147 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -36,7 +36,7 @@ | |||
36 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/nmi.h> | 38 | #include <linux/nmi.h> |
39 | #include <asm/atomic.h> | 39 | #include <linux/atomic.h> |
40 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |||
95 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 95 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
96 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | 96 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); |
97 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 97 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
98 | static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); | ||
99 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 98 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
100 | static char rcu_kthreads_spawnable; | 99 | static char rcu_kthreads_spawnable; |
101 | 100 | ||
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); | |||
163 | #ifdef CONFIG_NO_HZ | 162 | #ifdef CONFIG_NO_HZ |
164 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 163 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
165 | .dynticks_nesting = 1, | 164 | .dynticks_nesting = 1, |
166 | .dynticks = 1, | 165 | .dynticks = ATOMIC_INIT(1), |
167 | }; | 166 | }; |
168 | #endif /* #ifdef CONFIG_NO_HZ */ | 167 | #endif /* #ifdef CONFIG_NO_HZ */ |
169 | 168 | ||
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void) | |||
322 | unsigned long flags; | 321 | unsigned long flags; |
323 | struct rcu_dynticks *rdtp; | 322 | struct rcu_dynticks *rdtp; |
324 | 323 | ||
325 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
326 | local_irq_save(flags); | 324 | local_irq_save(flags); |
327 | rdtp = &__get_cpu_var(rcu_dynticks); | 325 | rdtp = &__get_cpu_var(rcu_dynticks); |
328 | rdtp->dynticks++; | 326 | if (--rdtp->dynticks_nesting) { |
329 | rdtp->dynticks_nesting--; | 327 | local_irq_restore(flags); |
330 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | 328 | return; |
329 | } | ||
330 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
331 | smp_mb__before_atomic_inc(); /* See above. */ | ||
332 | atomic_inc(&rdtp->dynticks); | ||
333 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
334 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
331 | local_irq_restore(flags); | 335 | local_irq_restore(flags); |
336 | |||
337 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
338 | if (in_irq() && | ||
339 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
340 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
341 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
342 | set_need_resched(); | ||
332 | } | 343 | } |
333 | 344 | ||
334 | /* | 345 | /* |
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void) | |||
344 | 355 | ||
345 | local_irq_save(flags); | 356 | local_irq_save(flags); |
346 | rdtp = &__get_cpu_var(rcu_dynticks); | 357 | rdtp = &__get_cpu_var(rcu_dynticks); |
347 | rdtp->dynticks++; | 358 | if (rdtp->dynticks_nesting++) { |
348 | rdtp->dynticks_nesting++; | 359 | local_irq_restore(flags); |
349 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | 360 | return; |
361 | } | ||
362 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | ||
363 | atomic_inc(&rdtp->dynticks); | ||
364 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
365 | smp_mb__after_atomic_inc(); /* See above. */ | ||
366 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
350 | local_irq_restore(flags); | 367 | local_irq_restore(flags); |
351 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
352 | } | 368 | } |
353 | 369 | ||
354 | /** | 370 | /** |
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void) | |||
362 | { | 378 | { |
363 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 379 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
364 | 380 | ||
365 | if (rdtp->dynticks & 0x1) | 381 | if (rdtp->dynticks_nmi_nesting == 0 && |
382 | (atomic_read(&rdtp->dynticks) & 0x1)) | ||
366 | return; | 383 | return; |
367 | rdtp->dynticks_nmi++; | 384 | rdtp->dynticks_nmi_nesting++; |
368 | WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); | 385 | smp_mb__before_atomic_inc(); /* Force delay from prior write. */ |
369 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | 386 | atomic_inc(&rdtp->dynticks); |
387 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
388 | smp_mb__after_atomic_inc(); /* See above. */ | ||
389 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
370 | } | 390 | } |
371 | 391 | ||
372 | /** | 392 | /** |
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void) | |||
380 | { | 400 | { |
381 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 401 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
382 | 402 | ||
383 | if (rdtp->dynticks & 0x1) | 403 | if (rdtp->dynticks_nmi_nesting == 0 || |
404 | --rdtp->dynticks_nmi_nesting != 0) | ||
384 | return; | 405 | return; |
385 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | 406 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
386 | rdtp->dynticks_nmi++; | 407 | smp_mb__before_atomic_inc(); /* See above. */ |
387 | WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); | 408 | atomic_inc(&rdtp->dynticks); |
409 | smp_mb__after_atomic_inc(); /* Force delay to next write. */ | ||
410 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
388 | } | 411 | } |
389 | 412 | ||
390 | /** | 413 | /** |
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void) | |||
395 | */ | 418 | */ |
396 | void rcu_irq_enter(void) | 419 | void rcu_irq_enter(void) |
397 | { | 420 | { |
398 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 421 | rcu_exit_nohz(); |
399 | |||
400 | if (rdtp->dynticks_nesting++) | ||
401 | return; | ||
402 | rdtp->dynticks++; | ||
403 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | ||
404 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
405 | } | 422 | } |
406 | 423 | ||
407 | /** | 424 | /** |
@@ -413,18 +430,7 @@ void rcu_irq_enter(void) | |||
413 | */ | 430 | */ |
414 | void rcu_irq_exit(void) | 431 | void rcu_irq_exit(void) |
415 | { | 432 | { |
416 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 433 | rcu_enter_nohz(); |
417 | |||
418 | if (--rdtp->dynticks_nesting) | ||
419 | return; | ||
420 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
421 | rdtp->dynticks++; | ||
422 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | ||
423 | |||
424 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
425 | if (__this_cpu_read(rcu_sched_data.nxtlist) || | ||
426 | __this_cpu_read(rcu_bh_data.nxtlist)) | ||
427 | set_need_resched(); | ||
428 | } | 434 | } |
429 | 435 | ||
430 | #ifdef CONFIG_SMP | 436 | #ifdef CONFIG_SMP |
@@ -436,19 +442,8 @@ void rcu_irq_exit(void) | |||
436 | */ | 442 | */ |
437 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 443 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
438 | { | 444 | { |
439 | int ret; | 445 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
440 | int snap; | 446 | return 0; |
441 | int snap_nmi; | ||
442 | |||
443 | snap = rdp->dynticks->dynticks; | ||
444 | snap_nmi = rdp->dynticks->dynticks_nmi; | ||
445 | smp_mb(); /* Order sampling of snap with end of grace period. */ | ||
446 | rdp->dynticks_snap = snap; | ||
447 | rdp->dynticks_nmi_snap = snap_nmi; | ||
448 | ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); | ||
449 | if (ret) | ||
450 | rdp->dynticks_fqs++; | ||
451 | return ret; | ||
452 | } | 447 | } |
453 | 448 | ||
454 | /* | 449 | /* |
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
459 | */ | 454 | */ |
460 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 455 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
461 | { | 456 | { |
462 | long curr; | 457 | unsigned long curr; |
463 | long curr_nmi; | 458 | unsigned long snap; |
464 | long snap; | ||
465 | long snap_nmi; | ||
466 | 459 | ||
467 | curr = rdp->dynticks->dynticks; | 460 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); |
468 | snap = rdp->dynticks_snap; | 461 | snap = (unsigned long)rdp->dynticks_snap; |
469 | curr_nmi = rdp->dynticks->dynticks_nmi; | ||
470 | snap_nmi = rdp->dynticks_nmi_snap; | ||
471 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
472 | 462 | ||
473 | /* | 463 | /* |
474 | * If the CPU passed through or entered a dynticks idle phase with | 464 | * If the CPU passed through or entered a dynticks idle phase with |
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
478 | * read-side critical section that started before the beginning | 468 | * read-side critical section that started before the beginning |
479 | * of the current RCU grace period. | 469 | * of the current RCU grace period. |
480 | */ | 470 | */ |
481 | if ((curr != snap || (curr & 0x1) == 0) && | 471 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { |
482 | (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { | ||
483 | rdp->dynticks_fqs++; | 472 | rdp->dynticks_fqs++; |
484 | return 1; | 473 | return 1; |
485 | } | 474 | } |
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
908 | unsigned long gp_duration; | 897 | unsigned long gp_duration; |
909 | 898 | ||
910 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 899 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
900 | |||
901 | /* | ||
902 | * Ensure that all grace-period and pre-grace-period activity | ||
903 | * is seen before the assignment to rsp->completed. | ||
904 | */ | ||
905 | smp_mb(); /* See above block comment. */ | ||
911 | gp_duration = jiffies - rsp->gp_start; | 906 | gp_duration = jiffies - rsp->gp_start; |
912 | if (gp_duration > rsp->gp_max) | 907 | if (gp_duration > rsp->gp_max) |
913 | rsp->gp_max = gp_duration; | 908 | rsp->gp_max = gp_duration; |
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1455 | */ | 1450 | */ |
1456 | static void rcu_process_callbacks(void) | 1451 | static void rcu_process_callbacks(void) |
1457 | { | 1452 | { |
1458 | /* | ||
1459 | * Memory references from any prior RCU read-side critical sections | ||
1460 | * executed by the interrupted code must be seen before any RCU | ||
1461 | * grace-period manipulations below. | ||
1462 | */ | ||
1463 | smp_mb(); /* See above block comment. */ | ||
1464 | |||
1465 | __rcu_process_callbacks(&rcu_sched_state, | 1453 | __rcu_process_callbacks(&rcu_sched_state, |
1466 | &__get_cpu_var(rcu_sched_data)); | 1454 | &__get_cpu_var(rcu_sched_data)); |
1467 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1455 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1468 | rcu_preempt_process_callbacks(); | 1456 | rcu_preempt_process_callbacks(); |
1469 | 1457 | ||
1470 | /* | ||
1471 | * Memory references from any later RCU read-side critical sections | ||
1472 | * executed by the interrupted code must be seen after any RCU | ||
1473 | * grace-period manipulations above. | ||
1474 | */ | ||
1475 | smp_mb(); /* See above block comment. */ | ||
1476 | |||
1477 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | 1458 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ |
1478 | rcu_needs_cpu_flush(); | 1459 | rcu_needs_cpu_flush(); |
1479 | } | 1460 | } |
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void) | |||
1494 | local_irq_restore(flags); | 1475 | local_irq_restore(flags); |
1495 | return; | 1476 | return; |
1496 | } | 1477 | } |
1497 | wake_up(&__get_cpu_var(rcu_cpu_wq)); | 1478 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
1498 | local_irq_restore(flags); | 1479 | local_irq_restore(flags); |
1499 | } | 1480 | } |
1500 | 1481 | ||
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | |||
1544 | */ | 1525 | */ |
1545 | static void rcu_cpu_kthread_timer(unsigned long arg) | 1526 | static void rcu_cpu_kthread_timer(unsigned long arg) |
1546 | { | 1527 | { |
1547 | unsigned long flags; | ||
1548 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); | 1528 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); |
1549 | struct rcu_node *rnp = rdp->mynode; | 1529 | struct rcu_node *rnp = rdp->mynode; |
1550 | 1530 | ||
1551 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1531 | atomic_or(rdp->grpmask, &rnp->wakemask); |
1552 | rnp->wakemask |= rdp->grpmask; | ||
1553 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1554 | invoke_rcu_node_kthread(rnp); | 1532 | invoke_rcu_node_kthread(rnp); |
1555 | } | 1533 | } |
1556 | 1534 | ||
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg) | |||
1617 | unsigned long flags; | 1595 | unsigned long flags; |
1618 | int spincnt = 0; | 1596 | int spincnt = 0; |
1619 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); | 1597 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); |
1620 | wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); | ||
1621 | char work; | 1598 | char work; |
1622 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1599 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
1623 | 1600 | ||
1624 | for (;;) { | 1601 | for (;;) { |
1625 | *statusp = RCU_KTHREAD_WAITING; | 1602 | *statusp = RCU_KTHREAD_WAITING; |
1626 | wait_event_interruptible(*wqp, | 1603 | rcu_wait(*workp != 0 || kthread_should_stop()); |
1627 | *workp != 0 || kthread_should_stop()); | ||
1628 | local_bh_disable(); | 1604 | local_bh_disable(); |
1629 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1605 | if (rcu_cpu_kthread_should_stop(cpu)) { |
1630 | local_bh_enable(); | 1606 | local_bh_enable(); |
@@ -1672,10 +1648,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1672 | if (IS_ERR(t)) | 1648 | if (IS_ERR(t)) |
1673 | return PTR_ERR(t); | 1649 | return PTR_ERR(t); |
1674 | kthread_bind(t, cpu); | 1650 | kthread_bind(t, cpu); |
1651 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1675 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1652 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1676 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | 1653 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); |
1677 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | 1654 | per_cpu(rcu_cpu_kthread_task, cpu) = t; |
1678 | wake_up_process(t); | ||
1679 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1655 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1680 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1656 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1681 | return 0; | 1657 | return 0; |
@@ -1698,11 +1674,10 @@ static int rcu_node_kthread(void *arg) | |||
1698 | 1674 | ||
1699 | for (;;) { | 1675 | for (;;) { |
1700 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; | 1676 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; |
1701 | wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); | 1677 | rcu_wait(atomic_read(&rnp->wakemask) != 0); |
1702 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; | 1678 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; |
1703 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1679 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1704 | mask = rnp->wakemask; | 1680 | mask = atomic_xchg(&rnp->wakemask, 0); |
1705 | rnp->wakemask = 0; | ||
1706 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | 1681 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ |
1707 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { | 1682 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { |
1708 | if ((mask & 0x1) == 0) | 1683 | if ((mask & 0x1) == 0) |
@@ -1781,9 +1756,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1781 | if (IS_ERR(t)) | 1756 | if (IS_ERR(t)) |
1782 | return PTR_ERR(t); | 1757 | return PTR_ERR(t); |
1783 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1758 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1759 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1784 | rnp->node_kthread_task = t; | 1760 | rnp->node_kthread_task = t; |
1785 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1761 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1786 | wake_up_process(t); | ||
1787 | sp.sched_priority = 99; | 1762 | sp.sched_priority = 99; |
1788 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1763 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1789 | } | 1764 | } |
@@ -1800,21 +1775,16 @@ static int __init rcu_spawn_kthreads(void) | |||
1800 | 1775 | ||
1801 | rcu_kthreads_spawnable = 1; | 1776 | rcu_kthreads_spawnable = 1; |
1802 | for_each_possible_cpu(cpu) { | 1777 | for_each_possible_cpu(cpu) { |
1803 | init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); | ||
1804 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1778 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1805 | if (cpu_online(cpu)) | 1779 | if (cpu_online(cpu)) |
1806 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1780 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1807 | } | 1781 | } |
1808 | rnp = rcu_get_root(rcu_state); | 1782 | rnp = rcu_get_root(rcu_state); |
1809 | init_waitqueue_head(&rnp->node_wq); | ||
1810 | rcu_init_boost_waitqueue(rnp); | ||
1811 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1783 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1812 | if (NUM_RCU_NODES > 1) | 1784 | if (NUM_RCU_NODES > 1) { |
1813 | rcu_for_each_leaf_node(rcu_state, rnp) { | 1785 | rcu_for_each_leaf_node(rcu_state, rnp) |
1814 | init_waitqueue_head(&rnp->node_wq); | ||
1815 | rcu_init_boost_waitqueue(rnp); | ||
1816 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1786 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1817 | } | 1787 | } |
1818 | return 0; | 1788 | return 0; |
1819 | } | 1789 | } |
1820 | early_initcall(rcu_spawn_kthreads); | 1790 | early_initcall(rcu_spawn_kthreads); |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 257664815d5d..7b9a08b4aaea 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -84,11 +84,9 @@ | |||
84 | * Dynticks per-CPU state. | 84 | * Dynticks per-CPU state. |
85 | */ | 85 | */ |
86 | struct rcu_dynticks { | 86 | struct rcu_dynticks { |
87 | int dynticks_nesting; /* Track nesting level, sort of. */ | 87 | int dynticks_nesting; /* Track irq/process nesting level. */ |
88 | int dynticks; /* Even value for dynticks-idle, else odd. */ | 88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
89 | int dynticks_nmi; /* Even value for either dynticks-idle or */ | 89 | atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ |
90 | /* not in nmi handler, else odd. So this */ | ||
91 | /* remains even for nmi from irq handler. */ | ||
92 | }; | 90 | }; |
93 | 91 | ||
94 | /* RCU's kthread states for tracing. */ | 92 | /* RCU's kthread states for tracing. */ |
@@ -121,7 +119,9 @@ struct rcu_node { | |||
121 | /* elements that need to drain to allow the */ | 119 | /* elements that need to drain to allow the */ |
122 | /* current expedited grace period to */ | 120 | /* current expedited grace period to */ |
123 | /* complete (only for TREE_PREEMPT_RCU). */ | 121 | /* complete (only for TREE_PREEMPT_RCU). */ |
124 | unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ | 122 | atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ |
123 | /* Since this has meaning only for leaf */ | ||
124 | /* rcu_node structures, 32 bits suffices. */ | ||
125 | unsigned long qsmaskinit; | 125 | unsigned long qsmaskinit; |
126 | /* Per-GP initial value for qsmask & expmask. */ | 126 | /* Per-GP initial value for qsmask & expmask. */ |
127 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 127 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
@@ -159,9 +159,6 @@ struct rcu_node { | |||
159 | struct task_struct *boost_kthread_task; | 159 | struct task_struct *boost_kthread_task; |
160 | /* kthread that takes care of priority */ | 160 | /* kthread that takes care of priority */ |
161 | /* boosting for this rcu_node structure. */ | 161 | /* boosting for this rcu_node structure. */ |
162 | wait_queue_head_t boost_wq; | ||
163 | /* Wait queue on which to park the boost */ | ||
164 | /* kthread. */ | ||
165 | unsigned int boost_kthread_status; | 162 | unsigned int boost_kthread_status; |
166 | /* State of boost_kthread_task for tracing. */ | 163 | /* State of boost_kthread_task for tracing. */ |
167 | unsigned long n_tasks_boosted; | 164 | unsigned long n_tasks_boosted; |
@@ -188,9 +185,6 @@ struct rcu_node { | |||
188 | /* kthread that takes care of this rcu_node */ | 185 | /* kthread that takes care of this rcu_node */ |
189 | /* structure, for example, awakening the */ | 186 | /* structure, for example, awakening the */ |
190 | /* per-CPU kthreads as needed. */ | 187 | /* per-CPU kthreads as needed. */ |
191 | wait_queue_head_t node_wq; | ||
192 | /* Wait queue on which to park the per-node */ | ||
193 | /* kthread. */ | ||
194 | unsigned int node_kthread_status; | 188 | unsigned int node_kthread_status; |
195 | /* State of node_kthread_task for tracing. */ | 189 | /* State of node_kthread_task for tracing. */ |
196 | } ____cacheline_internodealigned_in_smp; | 190 | } ____cacheline_internodealigned_in_smp; |
@@ -284,7 +278,6 @@ struct rcu_data { | |||
284 | /* 3) dynticks interface. */ | 278 | /* 3) dynticks interface. */ |
285 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ | 279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ |
286 | int dynticks_snap; /* Per-GP tracking for dynticks. */ | 280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ |
287 | int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ | ||
288 | #endif /* #ifdef CONFIG_NO_HZ */ | 281 | #endif /* #ifdef CONFIG_NO_HZ */ |
289 | 282 | ||
290 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 283 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
@@ -337,6 +330,16 @@ struct rcu_data { | |||
337 | /* scheduling clock irq */ | 330 | /* scheduling clock irq */ |
338 | /* before ratting on them. */ | 331 | /* before ratting on them. */ |
339 | 332 | ||
333 | #define rcu_wait(cond) \ | ||
334 | do { \ | ||
335 | for (;;) { \ | ||
336 | set_current_state(TASK_INTERRUPTIBLE); \ | ||
337 | if (cond) \ | ||
338 | break; \ | ||
339 | schedule(); \ | ||
340 | } \ | ||
341 | __set_current_state(TASK_RUNNING); \ | ||
342 | } while (0) | ||
340 | 343 | ||
341 | /* | 344 | /* |
342 | * RCU global state, including node hierarchy. This hierarchy is | 345 | * RCU global state, including node hierarchy. This hierarchy is |
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | |||
446 | static void rcu_preempt_send_cbs_to_online(void); | 449 | static void rcu_preempt_send_cbs_to_online(void); |
447 | static void __init __rcu_init_preempt(void); | 450 | static void __init __rcu_init_preempt(void); |
448 | static void rcu_needs_cpu_flush(void); | 451 | static void rcu_needs_cpu_flush(void); |
449 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); | ||
450 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 452 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
451 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 453 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
452 | cpumask_var_t cm); | 454 | cpumask_var_t cm); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3f6559a5f5cd..a767b7dac365 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg) | |||
1196 | 1196 | ||
1197 | for (;;) { | 1197 | for (;;) { |
1198 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1198 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
1199 | wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || | 1199 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
1200 | rnp->exp_tasks); | ||
1201 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1200 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
1202 | more2boost = rcu_boost(rnp); | 1201 | more2boost = rcu_boost(rnp); |
1203 | if (more2boost) | 1202 | if (more2boost) |
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1275 | } | 1274 | } |
1276 | 1275 | ||
1277 | /* | 1276 | /* |
1278 | * Initialize the RCU-boost waitqueue. | ||
1279 | */ | ||
1280 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1281 | { | ||
1282 | init_waitqueue_head(&rnp->boost_wq); | ||
1283 | } | ||
1284 | |||
1285 | /* | ||
1286 | * Create an RCU-boost kthread for the specified node if one does not | 1277 | * Create an RCU-boost kthread for the specified node if one does not |
1287 | * already exist. We only create this kthread for preemptible RCU. | 1278 | * already exist. We only create this kthread for preemptible RCU. |
1288 | * Returns zero if all is well, a negated errno otherwise. | 1279 | * Returns zero if all is well, a negated errno otherwise. |
@@ -1304,9 +1295,9 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1304 | if (IS_ERR(t)) | 1295 | if (IS_ERR(t)) |
1305 | return PTR_ERR(t); | 1296 | return PTR_ERR(t); |
1306 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1297 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1298 | set_task_state(t, TASK_INTERRUPTIBLE); | ||
1307 | rnp->boost_kthread_task = t; | 1299 | rnp->boost_kthread_task = t; |
1308 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1300 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1309 | wake_up_process(t); | ||
1310 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1301 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1311 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1302 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1312 | return 0; | 1303 | return 0; |
@@ -1328,10 +1319,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1328 | { | 1319 | { |
1329 | } | 1320 | } |
1330 | 1321 | ||
1331 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1332 | { | ||
1333 | } | ||
1334 | |||
1335 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1322 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
1336 | struct rcu_node *rnp, | 1323 | struct rcu_node *rnp, |
1337 | int rnp_index) | 1324 | int rnp_index) |
@@ -1520,7 +1507,6 @@ int rcu_needs_cpu(int cpu) | |||
1520 | { | 1507 | { |
1521 | int c = 0; | 1508 | int c = 0; |
1522 | int snap; | 1509 | int snap; |
1523 | int snap_nmi; | ||
1524 | int thatcpu; | 1510 | int thatcpu; |
1525 | 1511 | ||
1526 | /* Check for being in the holdoff period. */ | 1512 | /* Check for being in the holdoff period. */ |
@@ -1531,10 +1517,10 @@ int rcu_needs_cpu(int cpu) | |||
1531 | for_each_online_cpu(thatcpu) { | 1517 | for_each_online_cpu(thatcpu) { |
1532 | if (thatcpu == cpu) | 1518 | if (thatcpu == cpu) |
1533 | continue; | 1519 | continue; |
1534 | snap = per_cpu(rcu_dynticks, thatcpu).dynticks; | 1520 | snap = atomic_add_return(0, &per_cpu(rcu_dynticks, |
1535 | snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; | 1521 | thatcpu).dynticks); |
1536 | smp_mb(); /* Order sampling of snap with end of grace period. */ | 1522 | smp_mb(); /* Order sampling of snap with end of grace period. */ |
1537 | if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { | 1523 | if ((snap & 0x1) != 0) { |
1538 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 1524 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
1539 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 1525 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
1540 | return rcu_needs_cpu_quick_check(cpu); | 1526 | return rcu_needs_cpu_quick_check(cpu); |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index aa0fd72b4bc7..9678cc3650f5 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
69 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 69 | rdp->passed_quiesc, rdp->passed_quiesc_completed, |
70 | rdp->qs_pending); | 70 | rdp->qs_pending); |
71 | #ifdef CONFIG_NO_HZ | 71 | #ifdef CONFIG_NO_HZ |
72 | seq_printf(m, " dt=%d/%d dn=%d df=%lu", | 72 | seq_printf(m, " dt=%d/%d/%d df=%lu", |
73 | rdp->dynticks->dynticks, | 73 | atomic_read(&rdp->dynticks->dynticks), |
74 | rdp->dynticks->dynticks_nesting, | 74 | rdp->dynticks->dynticks_nesting, |
75 | rdp->dynticks->dynticks_nmi, | 75 | rdp->dynticks->dynticks_nmi_nesting, |
76 | rdp->dynticks_fqs); | 76 | rdp->dynticks_fqs); |
77 | #endif /* #ifdef CONFIG_NO_HZ */ | 77 | #endif /* #ifdef CONFIG_NO_HZ */ |
78 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 78 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
141 | rdp->qs_pending); | 141 | rdp->qs_pending); |
142 | #ifdef CONFIG_NO_HZ | 142 | #ifdef CONFIG_NO_HZ |
143 | seq_printf(m, ",%d,%d,%d,%lu", | 143 | seq_printf(m, ",%d,%d,%d,%lu", |
144 | rdp->dynticks->dynticks, | 144 | atomic_read(&rdp->dynticks->dynticks), |
145 | rdp->dynticks->dynticks_nesting, | 145 | rdp->dynticks->dynticks_nesting, |
146 | rdp->dynticks->dynticks_nmi, | 146 | rdp->dynticks->dynticks_nmi_nesting, |
147 | rdp->dynticks_fqs); | 147 | rdp->dynticks_fqs); |
148 | #endif /* #ifdef CONFIG_NO_HZ */ | 148 | #endif /* #ifdef CONFIG_NO_HZ */ |
149 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 149 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
167 | { | 167 | { |
168 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | 168 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); |
169 | #ifdef CONFIG_NO_HZ | 169 | #ifdef CONFIG_NO_HZ |
170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 170 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
171 | #endif /* #ifdef CONFIG_NO_HZ */ | 171 | #endif /* #ifdef CONFIG_NO_HZ */ |
172 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); | 172 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); |
173 | #ifdef CONFIG_TREE_PREEMPT_RCU | 173 | #ifdef CONFIG_TREE_PREEMPT_RCU |
diff --git a/kernel/sched.c b/kernel/sched.c index 2d12893b8b0f..cbb3a0eee58e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) | |||
2573 | if (!next) | 2573 | if (!next) |
2574 | smp_send_reschedule(cpu); | 2574 | smp_send_reschedule(cpu); |
2575 | } | 2575 | } |
2576 | #endif | 2576 | |
2577 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
2578 | static int ttwu_activate_remote(struct task_struct *p, int wake_flags) | ||
2579 | { | ||
2580 | struct rq *rq; | ||
2581 | int ret = 0; | ||
2582 | |||
2583 | rq = __task_rq_lock(p); | ||
2584 | if (p->on_cpu) { | ||
2585 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); | ||
2586 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2587 | ret = 1; | ||
2588 | } | ||
2589 | __task_rq_unlock(rq); | ||
2590 | |||
2591 | return ret; | ||
2592 | |||
2593 | } | ||
2594 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
2595 | #endif /* CONFIG_SMP */ | ||
2577 | 2596 | ||
2578 | static void ttwu_queue(struct task_struct *p, int cpu) | 2597 | static void ttwu_queue(struct task_struct *p, int cpu) |
2579 | { | 2598 | { |
@@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2631 | while (p->on_cpu) { | 2650 | while (p->on_cpu) { |
2632 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 2651 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2633 | /* | 2652 | /* |
2634 | * If called from interrupt context we could have landed in the | 2653 | * In case the architecture enables interrupts in |
2635 | * middle of schedule(), in this case we should take care not | 2654 | * context_switch(), we cannot busy wait, since that |
2636 | * to spin on ->on_cpu if p is current, since that would | 2655 | * would lead to deadlocks when an interrupt hits and |
2637 | * deadlock. | 2656 | * tries to wake up @prev. So bail and do a complete |
2657 | * remote wakeup. | ||
2638 | */ | 2658 | */ |
2639 | if (p == current) { | 2659 | if (ttwu_activate_remote(p, wake_flags)) |
2640 | ttwu_queue(p, cpu); | ||
2641 | goto stat; | 2660 | goto stat; |
2642 | } | 2661 | #else |
2643 | #endif | ||
2644 | cpu_relax(); | 2662 | cpu_relax(); |
2663 | #endif | ||
2645 | } | 2664 | } |
2646 | /* | 2665 | /* |
2647 | * Pairs with the smp_wmb() in finish_lock_switch(). | 2666 | * Pairs with the smp_wmb() in finish_lock_switch(). |
@@ -5841,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5841 | idle->state = TASK_RUNNING; | 5860 | idle->state = TASK_RUNNING; |
5842 | idle->se.exec_start = sched_clock(); | 5861 | idle->se.exec_start = sched_clock(); |
5843 | 5862 | ||
5844 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 5863 | do_set_cpus_allowed(idle, cpumask_of(cpu)); |
5845 | /* | 5864 | /* |
5846 | * We're having a chicken and egg problem, even though we are | 5865 | * We're having a chicken and egg problem, even though we are |
5847 | * holding rq->lock, the cpu isn't yet set to this cpu so the | 5866 | * holding rq->lock, the cpu isn't yet set to this cpu so the |
@@ -5929,6 +5948,16 @@ static inline void sched_init_granularity(void) | |||
5929 | } | 5948 | } |
5930 | 5949 | ||
5931 | #ifdef CONFIG_SMP | 5950 | #ifdef CONFIG_SMP |
5951 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | ||
5952 | { | ||
5953 | if (p->sched_class && p->sched_class->set_cpus_allowed) | ||
5954 | p->sched_class->set_cpus_allowed(p, new_mask); | ||
5955 | else { | ||
5956 | cpumask_copy(&p->cpus_allowed, new_mask); | ||
5957 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | ||
5958 | } | ||
5959 | } | ||
5960 | |||
5932 | /* | 5961 | /* |
5933 | * This is how migration works: | 5962 | * This is how migration works: |
5934 | * | 5963 | * |
@@ -5974,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5974 | goto out; | 6003 | goto out; |
5975 | } | 6004 | } |
5976 | 6005 | ||
5977 | if (p->sched_class->set_cpus_allowed) | 6006 | do_set_cpus_allowed(p, new_mask); |
5978 | p->sched_class->set_cpus_allowed(p, new_mask); | ||
5979 | else { | ||
5980 | cpumask_copy(&p->cpus_allowed, new_mask); | ||
5981 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | ||
5982 | } | ||
5983 | 6007 | ||
5984 | /* Can the task run on the task's current CPU? If so, we're done */ | 6008 | /* Can the task run on the task's current CPU? If so, we're done */ |
5985 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 6009 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
@@ -8764,42 +8788,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
8764 | return 0; | 8788 | return 0; |
8765 | } | 8789 | } |
8766 | 8790 | ||
8767 | static int | ||
8768 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
8769 | struct task_struct *tsk, bool threadgroup) | ||
8770 | { | ||
8771 | int retval = cpu_cgroup_can_attach_task(cgrp, tsk); | ||
8772 | if (retval) | ||
8773 | return retval; | ||
8774 | if (threadgroup) { | ||
8775 | struct task_struct *c; | ||
8776 | rcu_read_lock(); | ||
8777 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
8778 | retval = cpu_cgroup_can_attach_task(cgrp, c); | ||
8779 | if (retval) { | ||
8780 | rcu_read_unlock(); | ||
8781 | return retval; | ||
8782 | } | ||
8783 | } | ||
8784 | rcu_read_unlock(); | ||
8785 | } | ||
8786 | return 0; | ||
8787 | } | ||
8788 | |||
8789 | static void | 8791 | static void |
8790 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 8792 | cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
8791 | struct cgroup *old_cont, struct task_struct *tsk, | ||
8792 | bool threadgroup) | ||
8793 | { | 8793 | { |
8794 | sched_move_task(tsk); | 8794 | sched_move_task(tsk); |
8795 | if (threadgroup) { | ||
8796 | struct task_struct *c; | ||
8797 | rcu_read_lock(); | ||
8798 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
8799 | sched_move_task(c); | ||
8800 | } | ||
8801 | rcu_read_unlock(); | ||
8802 | } | ||
8803 | } | 8795 | } |
8804 | 8796 | ||
8805 | static void | 8797 | static void |
@@ -8887,8 +8879,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8887 | .name = "cpu", | 8879 | .name = "cpu", |
8888 | .create = cpu_cgroup_create, | 8880 | .create = cpu_cgroup_create, |
8889 | .destroy = cpu_cgroup_destroy, | 8881 | .destroy = cpu_cgroup_destroy, |
8890 | .can_attach = cpu_cgroup_can_attach, | 8882 | .can_attach_task = cpu_cgroup_can_attach_task, |
8891 | .attach = cpu_cgroup_attach, | 8883 | .attach_task = cpu_cgroup_attach_task, |
8892 | .exit = cpu_cgroup_exit, | 8884 | .exit = cpu_cgroup_exit, |
8893 | .populate = cpu_cgroup_populate, | 8885 | .populate = cpu_cgroup_populate, |
8894 | .subsys_id = cpu_cgroup_subsys_id, | 8886 | .subsys_id = cpu_cgroup_subsys_id, |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e32a9b70ee9c..433491c2dc8f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1076 | se->on_rq = 0; | 1076 | se->on_rq = 0; |
1077 | update_cfs_load(cfs_rq, 0); | 1077 | update_cfs_load(cfs_rq, 0); |
1078 | account_entity_dequeue(cfs_rq, se); | 1078 | account_entity_dequeue(cfs_rq, se); |
1079 | update_min_vruntime(cfs_rq); | ||
1080 | update_cfs_shares(cfs_rq); | ||
1081 | 1079 | ||
1082 | /* | 1080 | /* |
1083 | * Normalize the entity after updating the min_vruntime because the | 1081 | * Normalize the entity after updating the min_vruntime because the |
@@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1086 | */ | 1084 | */ |
1087 | if (!(flags & DEQUEUE_SLEEP)) | 1085 | if (!(flags & DEQUEUE_SLEEP)) |
1088 | se->vruntime -= cfs_rq->min_vruntime; | 1086 | se->vruntime -= cfs_rq->min_vruntime; |
1087 | |||
1088 | update_min_vruntime(cfs_rq); | ||
1089 | update_cfs_shares(cfs_rq); | ||
1089 | } | 1090 | } |
1090 | 1091 | ||
1091 | /* | 1092 | /* |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 64b2a37c07d0..88725c939e0b 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task) | |||
1263 | if (!cpumask_test_cpu(this_cpu, lowest_mask)) | 1263 | if (!cpumask_test_cpu(this_cpu, lowest_mask)) |
1264 | this_cpu = -1; /* Skip this_cpu opt if not among lowest */ | 1264 | this_cpu = -1; /* Skip this_cpu opt if not among lowest */ |
1265 | 1265 | ||
1266 | rcu_read_lock(); | ||
1266 | for_each_domain(cpu, sd) { | 1267 | for_each_domain(cpu, sd) { |
1267 | if (sd->flags & SD_WAKE_AFFINE) { | 1268 | if (sd->flags & SD_WAKE_AFFINE) { |
1268 | int best_cpu; | 1269 | int best_cpu; |
@@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task) | |||
1272 | * remote processor. | 1273 | * remote processor. |
1273 | */ | 1274 | */ |
1274 | if (this_cpu != -1 && | 1275 | if (this_cpu != -1 && |
1275 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) | 1276 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { |
1277 | rcu_read_unlock(); | ||
1276 | return this_cpu; | 1278 | return this_cpu; |
1279 | } | ||
1277 | 1280 | ||
1278 | best_cpu = cpumask_first_and(lowest_mask, | 1281 | best_cpu = cpumask_first_and(lowest_mask, |
1279 | sched_domain_span(sd)); | 1282 | sched_domain_span(sd)); |
1280 | if (best_cpu < nr_cpu_ids) | 1283 | if (best_cpu < nr_cpu_ids) { |
1284 | rcu_read_unlock(); | ||
1281 | return best_cpu; | 1285 | return best_cpu; |
1286 | } | ||
1282 | } | 1287 | } |
1283 | } | 1288 | } |
1289 | rcu_read_unlock(); | ||
1284 | 1290 | ||
1285 | /* | 1291 | /* |
1286 | * And finally, if there were no matches within the domains | 1292 | * And finally, if there were no matches within the domains |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 48ddf431db0e..331e01bcd026 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
37 | 37 | ||
38 | #ifdef CONFIG_SMP | 38 | #ifdef CONFIG_SMP |
39 | /* domain-specific stats */ | 39 | /* domain-specific stats */ |
40 | preempt_disable(); | 40 | rcu_read_lock(); |
41 | for_each_domain(cpu, sd) { | 41 | for_each_domain(cpu, sd) { |
42 | enum cpu_idle_type itype; | 42 | enum cpu_idle_type itype; |
43 | 43 | ||
@@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
64 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | 64 | sd->ttwu_wake_remote, sd->ttwu_move_affine, |
65 | sd->ttwu_move_balance); | 65 | sd->ttwu_move_balance); |
66 | } | 66 | } |
67 | preempt_enable(); | 67 | rcu_read_unlock(); |
68 | #endif | 68 | #endif |
69 | } | 69 | } |
70 | kfree(mask_str); | 70 | kfree(mask_str); |
diff --git a/kernel/signal.c b/kernel/signal.c index ad5e818baacc..86c32b884f8e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -3023,8 +3023,10 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) | |||
3023 | 3023 | ||
3024 | SYSCALL_DEFINE0(pause) | 3024 | SYSCALL_DEFINE0(pause) |
3025 | { | 3025 | { |
3026 | current->state = TASK_INTERRUPTIBLE; | 3026 | while (!signal_pending(current)) { |
3027 | schedule(); | 3027 | current->state = TASK_INTERRUPTIBLE; |
3028 | schedule(); | ||
3029 | } | ||
3028 | return -ERESTARTNOHAND; | 3030 | return -ERESTARTNOHAND; |
3029 | } | 3031 | } |
3030 | 3032 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3dd0c46fa3bb..4fc92445a29c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/kprobes.h> | 56 | #include <linux/kprobes.h> |
57 | #include <linux/pipe_fs_i.h> | 57 | #include <linux/pipe_fs_i.h> |
58 | #include <linux/oom.h> | 58 | #include <linux/oom.h> |
59 | #include <linux/kmod.h> | ||
59 | 60 | ||
60 | #include <asm/uaccess.h> | 61 | #include <asm/uaccess.h> |
61 | #include <asm/processor.h> | 62 | #include <asm/processor.h> |
@@ -616,6 +617,11 @@ static struct ctl_table kern_table[] = { | |||
616 | .child = random_table, | 617 | .child = random_table, |
617 | }, | 618 | }, |
618 | { | 619 | { |
620 | .procname = "usermodehelper", | ||
621 | .mode = 0555, | ||
622 | .child = usermodehelper_table, | ||
623 | }, | ||
624 | { | ||
619 | .procname = "overflowuid", | 625 | .procname = "overflowuid", |
620 | .data = &overflowuid, | 626 | .data = &overflowuid, |
621 | .maxlen = sizeof(int), | 627 | .maxlen = sizeof(int), |
@@ -1500,7 +1506,7 @@ static struct ctl_table fs_table[] = { | |||
1500 | 1506 | ||
1501 | static struct ctl_table debug_table[] = { | 1507 | static struct ctl_table debug_table[] = { |
1502 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ | 1508 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ |
1503 | defined(CONFIG_S390) | 1509 | defined(CONFIG_S390) || defined(CONFIG_TILE) |
1504 | { | 1510 | { |
1505 | .procname = "exception-trace", | 1511 | .procname = "exception-trace", |
1506 | .data = &show_unhandled_signals, | 1512 | .data = &show_unhandled_signals, |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d017c2c82c44..1ee417fcbfa5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); | |||
109 | static void ftrace_global_list_func(unsigned long ip, | 109 | static void ftrace_global_list_func(unsigned long ip, |
110 | unsigned long parent_ip) | 110 | unsigned long parent_ip) |
111 | { | 111 | { |
112 | struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ | 112 | struct ftrace_ops *op; |
113 | |||
114 | if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) | ||
115 | return; | ||
113 | 116 | ||
117 | trace_recursion_set(TRACE_GLOBAL_BIT); | ||
118 | op = rcu_dereference_raw(ftrace_global_list); /*see above*/ | ||
114 | while (op != &ftrace_list_end) { | 119 | while (op != &ftrace_list_end) { |
115 | op->func(ip, parent_ip); | 120 | op->func(ip, parent_ip); |
116 | op = rcu_dereference_raw(op->next); /*see above*/ | 121 | op = rcu_dereference_raw(op->next); /*see above*/ |
117 | }; | 122 | }; |
123 | trace_recursion_clear(TRACE_GLOBAL_BIT); | ||
118 | } | 124 | } |
119 | 125 | ||
120 | static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) | 126 | static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) |
@@ -1638,12 +1644,12 @@ static void ftrace_startup_enable(int command) | |||
1638 | ftrace_run_update_code(command); | 1644 | ftrace_run_update_code(command); |
1639 | } | 1645 | } |
1640 | 1646 | ||
1641 | static void ftrace_startup(struct ftrace_ops *ops, int command) | 1647 | static int ftrace_startup(struct ftrace_ops *ops, int command) |
1642 | { | 1648 | { |
1643 | bool hash_enable = true; | 1649 | bool hash_enable = true; |
1644 | 1650 | ||
1645 | if (unlikely(ftrace_disabled)) | 1651 | if (unlikely(ftrace_disabled)) |
1646 | return; | 1652 | return -ENODEV; |
1647 | 1653 | ||
1648 | ftrace_start_up++; | 1654 | ftrace_start_up++; |
1649 | command |= FTRACE_ENABLE_CALLS; | 1655 | command |= FTRACE_ENABLE_CALLS; |
@@ -1662,6 +1668,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command) | |||
1662 | ftrace_hash_rec_enable(ops, 1); | 1668 | ftrace_hash_rec_enable(ops, 1); |
1663 | 1669 | ||
1664 | ftrace_startup_enable(command); | 1670 | ftrace_startup_enable(command); |
1671 | |||
1672 | return 0; | ||
1665 | } | 1673 | } |
1666 | 1674 | ||
1667 | static void ftrace_shutdown(struct ftrace_ops *ops, int command) | 1675 | static void ftrace_shutdown(struct ftrace_ops *ops, int command) |
@@ -2501,7 +2509,7 @@ static void __enable_ftrace_function_probe(void) | |||
2501 | 2509 | ||
2502 | ret = __register_ftrace_function(&trace_probe_ops); | 2510 | ret = __register_ftrace_function(&trace_probe_ops); |
2503 | if (!ret) | 2511 | if (!ret) |
2504 | ftrace_startup(&trace_probe_ops, 0); | 2512 | ret = ftrace_startup(&trace_probe_ops, 0); |
2505 | 2513 | ||
2506 | ftrace_probe_registered = 1; | 2514 | ftrace_probe_registered = 1; |
2507 | } | 2515 | } |
@@ -3466,7 +3474,11 @@ device_initcall(ftrace_nodyn_init); | |||
3466 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } | 3474 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } |
3467 | static inline void ftrace_startup_enable(int command) { } | 3475 | static inline void ftrace_startup_enable(int command) { } |
3468 | /* Keep as macros so we do not need to define the commands */ | 3476 | /* Keep as macros so we do not need to define the commands */ |
3469 | # define ftrace_startup(ops, command) do { } while (0) | 3477 | # define ftrace_startup(ops, command) \ |
3478 | ({ \ | ||
3479 | (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ | ||
3480 | 0; \ | ||
3481 | }) | ||
3470 | # define ftrace_shutdown(ops, command) do { } while (0) | 3482 | # define ftrace_shutdown(ops, command) do { } while (0) |
3471 | # define ftrace_startup_sysctl() do { } while (0) | 3483 | # define ftrace_startup_sysctl() do { } while (0) |
3472 | # define ftrace_shutdown_sysctl() do { } while (0) | 3484 | # define ftrace_shutdown_sysctl() do { } while (0) |
@@ -3484,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) | |||
3484 | { | 3496 | { |
3485 | struct ftrace_ops *op; | 3497 | struct ftrace_ops *op; |
3486 | 3498 | ||
3499 | if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) | ||
3500 | return; | ||
3501 | |||
3502 | trace_recursion_set(TRACE_INTERNAL_BIT); | ||
3487 | /* | 3503 | /* |
3488 | * Some of the ops may be dynamically allocated, | 3504 | * Some of the ops may be dynamically allocated, |
3489 | * they must be freed after a synchronize_sched(). | 3505 | * they must be freed after a synchronize_sched(). |
@@ -3496,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) | |||
3496 | op = rcu_dereference_raw(op->next); | 3512 | op = rcu_dereference_raw(op->next); |
3497 | }; | 3513 | }; |
3498 | preempt_enable_notrace(); | 3514 | preempt_enable_notrace(); |
3515 | trace_recursion_clear(TRACE_INTERNAL_BIT); | ||
3499 | } | 3516 | } |
3500 | 3517 | ||
3501 | static void clear_ftrace_swapper(void) | 3518 | static void clear_ftrace_swapper(void) |
@@ -3799,7 +3816,7 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
3799 | 3816 | ||
3800 | ret = __register_ftrace_function(ops); | 3817 | ret = __register_ftrace_function(ops); |
3801 | if (!ret) | 3818 | if (!ret) |
3802 | ftrace_startup(ops, 0); | 3819 | ret = ftrace_startup(ops, 0); |
3803 | 3820 | ||
3804 | 3821 | ||
3805 | out_unlock: | 3822 | out_unlock: |
@@ -4045,7 +4062,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
4045 | ftrace_graph_return = retfunc; | 4062 | ftrace_graph_return = retfunc; |
4046 | ftrace_graph_entry = entryfunc; | 4063 | ftrace_graph_entry = entryfunc; |
4047 | 4064 | ||
4048 | ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); | 4065 | ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); |
4049 | 4066 | ||
4050 | out: | 4067 | out: |
4051 | mutex_unlock(&ftrace_lock); | 4068 | mutex_unlock(&ftrace_lock); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0ef7b4b2a1f7..b0c7aa407943 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void) | |||
2216 | 2216 | ||
2217 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | 2217 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" |
2218 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | 2218 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", |
2219 | current->trace_recursion, | 2219 | trace_recursion_buffer(), |
2220 | hardirq_count() >> HARDIRQ_SHIFT, | 2220 | hardirq_count() >> HARDIRQ_SHIFT, |
2221 | softirq_count() >> SOFTIRQ_SHIFT, | 2221 | softirq_count() >> SOFTIRQ_SHIFT, |
2222 | in_nmi()); | 2222 | in_nmi()); |
@@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void) | |||
2226 | 2226 | ||
2227 | static inline int trace_recursive_lock(void) | 2227 | static inline int trace_recursive_lock(void) |
2228 | { | 2228 | { |
2229 | current->trace_recursion++; | 2229 | trace_recursion_inc(); |
2230 | 2230 | ||
2231 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | 2231 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) |
2232 | return 0; | 2232 | return 0; |
2233 | 2233 | ||
2234 | trace_recursive_fail(); | 2234 | trace_recursive_fail(); |
@@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void) | |||
2238 | 2238 | ||
2239 | static inline void trace_recursive_unlock(void) | 2239 | static inline void trace_recursive_unlock(void) |
2240 | { | 2240 | { |
2241 | WARN_ON_ONCE(!current->trace_recursion); | 2241 | WARN_ON_ONCE(!trace_recursion_buffer()); |
2242 | 2242 | ||
2243 | current->trace_recursion--; | 2243 | trace_recursion_dec(); |
2244 | } | 2244 | } |
2245 | 2245 | ||
2246 | #else | 2246 | #else |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b69c4bd306f..229f8591f61d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[]; | |||
784 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 784 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) |
785 | #include "trace_entries.h" | 785 | #include "trace_entries.h" |
786 | 786 | ||
787 | /* Only current can touch trace_recursion */ | ||
788 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
789 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
790 | |||
791 | /* Ring buffer has the 10 LSB bits to count */ | ||
792 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||
793 | |||
794 | /* for function tracing recursion */ | ||
795 | #define TRACE_INTERNAL_BIT (1<<11) | ||
796 | #define TRACE_GLOBAL_BIT (1<<12) | ||
797 | |||
798 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | ||
799 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | ||
800 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | ||
801 | |||
787 | #endif /* _LINUX_KERNEL_TRACE_H */ | 802 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2fe110341359..686ec399f2a8 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata = | |||
1657 | 1657 | ||
1658 | static __init void event_trace_self_test_with_function(void) | 1658 | static __init void event_trace_self_test_with_function(void) |
1659 | { | 1659 | { |
1660 | register_ftrace_function(&trace_ops); | 1660 | int ret; |
1661 | ret = register_ftrace_function(&trace_ops); | ||
1662 | if (WARN_ON(ret < 0)) { | ||
1663 | pr_info("Failed to enable function tracer for event tests\n"); | ||
1664 | return; | ||
1665 | } | ||
1661 | pr_info("Running tests again, along with the function tracer\n"); | 1666 | pr_info("Running tests again, along with the function tracer\n"); |
1662 | event_trace_self_tests(); | 1667 | event_trace_self_tests(); |
1663 | unregister_ftrace_function(&trace_ops); | 1668 | unregister_ftrace_function(&trace_ops); |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cf535ccedc86..e37de492a9e1 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
353 | } | 353 | } |
354 | EXPORT_SYMBOL(ftrace_print_symbols_seq); | 354 | EXPORT_SYMBOL(ftrace_print_symbols_seq); |
355 | 355 | ||
356 | #if BITS_PER_LONG == 32 | ||
357 | const char * | ||
358 | ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, | ||
359 | const struct trace_print_flags_u64 *symbol_array) | ||
360 | { | ||
361 | int i; | ||
362 | const char *ret = p->buffer + p->len; | ||
363 | |||
364 | for (i = 0; symbol_array[i].name; i++) { | ||
365 | |||
366 | if (val != symbol_array[i].mask) | ||
367 | continue; | ||
368 | |||
369 | trace_seq_puts(p, symbol_array[i].name); | ||
370 | break; | ||
371 | } | ||
372 | |||
373 | if (!p->len) | ||
374 | trace_seq_printf(p, "0x%llx", val); | ||
375 | |||
376 | trace_seq_putc(p, 0); | ||
377 | |||
378 | return ret; | ||
379 | } | ||
380 | EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); | ||
381 | #endif | ||
382 | |||
356 | const char * | 383 | const char * |
357 | ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) | 384 | ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) |
358 | { | 385 | { |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 44646179eaba..bff131b9510a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
18 | #include <linux/proc_fs.h> | ||
18 | 19 | ||
19 | static struct uts_namespace *create_uts_ns(void) | 20 | static struct uts_namespace *create_uts_ns(void) |
20 | { | 21 | { |
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref) | |||
79 | put_user_ns(ns->user_ns); | 80 | put_user_ns(ns->user_ns); |
80 | kfree(ns); | 81 | kfree(ns); |
81 | } | 82 | } |
83 | |||
84 | static void *utsns_get(struct task_struct *task) | ||
85 | { | ||
86 | struct uts_namespace *ns = NULL; | ||
87 | struct nsproxy *nsproxy; | ||
88 | |||
89 | rcu_read_lock(); | ||
90 | nsproxy = task_nsproxy(task); | ||
91 | if (nsproxy) { | ||
92 | ns = nsproxy->uts_ns; | ||
93 | get_uts_ns(ns); | ||
94 | } | ||
95 | rcu_read_unlock(); | ||
96 | |||
97 | return ns; | ||
98 | } | ||
99 | |||
100 | static void utsns_put(void *ns) | ||
101 | { | ||
102 | put_uts_ns(ns); | ||
103 | } | ||
104 | |||
105 | static int utsns_install(struct nsproxy *nsproxy, void *ns) | ||
106 | { | ||
107 | get_uts_ns(ns); | ||
108 | put_uts_ns(nsproxy->uts_ns); | ||
109 | nsproxy->uts_ns = ns; | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | const struct proc_ns_operations utsns_operations = { | ||
114 | .name = "uts", | ||
115 | .type = CLONE_NEWUTS, | ||
116 | .get = utsns_get, | ||
117 | .put = utsns_put, | ||
118 | .install = utsns_install, | ||
119 | }; | ||
120 | |||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7daa4b072e9f..3d0c56ad4792 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; } | |||
415 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 415 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
416 | 416 | ||
417 | /* prepare/enable/disable routines */ | 417 | /* prepare/enable/disable routines */ |
418 | static int watchdog_prepare_cpu(int cpu) | 418 | static void watchdog_prepare_cpu(int cpu) |
419 | { | 419 | { |
420 | struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); | 420 | struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); |
421 | 421 | ||
422 | WARN_ON(per_cpu(softlockup_watchdog, cpu)); | 422 | WARN_ON(per_cpu(softlockup_watchdog, cpu)); |
423 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 423 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
424 | hrtimer->function = watchdog_timer_fn; | 424 | hrtimer->function = watchdog_timer_fn; |
425 | |||
426 | return 0; | ||
427 | } | 425 | } |
428 | 426 | ||
429 | static int watchdog_enable(int cpu) | 427 | static int watchdog_enable(int cpu) |
@@ -542,17 +540,16 @@ static int __cpuinit | |||
542 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 540 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
543 | { | 541 | { |
544 | int hotcpu = (unsigned long)hcpu; | 542 | int hotcpu = (unsigned long)hcpu; |
545 | int err = 0; | ||
546 | 543 | ||
547 | switch (action) { | 544 | switch (action) { |
548 | case CPU_UP_PREPARE: | 545 | case CPU_UP_PREPARE: |
549 | case CPU_UP_PREPARE_FROZEN: | 546 | case CPU_UP_PREPARE_FROZEN: |
550 | err = watchdog_prepare_cpu(hotcpu); | 547 | watchdog_prepare_cpu(hotcpu); |
551 | break; | 548 | break; |
552 | case CPU_ONLINE: | 549 | case CPU_ONLINE: |
553 | case CPU_ONLINE_FROZEN: | 550 | case CPU_ONLINE_FROZEN: |
554 | if (watchdog_enabled) | 551 | if (watchdog_enabled) |
555 | err = watchdog_enable(hotcpu); | 552 | watchdog_enable(hotcpu); |
556 | break; | 553 | break; |
557 | #ifdef CONFIG_HOTPLUG_CPU | 554 | #ifdef CONFIG_HOTPLUG_CPU |
558 | case CPU_UP_CANCELED: | 555 | case CPU_UP_CANCELED: |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e3378e8d3a5c..0400553f0d04 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -2866,9 +2866,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2866 | } | 2866 | } |
2867 | } | 2867 | } |
2868 | 2868 | ||
2869 | /* just in case, make sure it's actually aligned | 2869 | /* just in case, make sure it's actually aligned */ |
2870 | * - this is affected by PERCPU() alignment in vmlinux.lds.S | ||
2871 | */ | ||
2872 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); | 2870 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); |
2873 | return wq->cpu_wq.v ? 0 : -ENOMEM; | 2871 | return wq->cpu_wq.v ? 0 : -ENOMEM; |
2874 | } | 2872 | } |