diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-03-26 11:18:44 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-03-26 11:19:03 -0400 |
commit | 7fd52392c56361a40f0c630a82b36b95ca31eac6 (patch) | |
tree | 14091de24c6b28ea4cae9826f98aeedb7be091f5 /kernel | |
parent | b01c3a0010aabadf745f3e7fdb9cab682e0a28a2 (diff) | |
parent | e22057c8599373e5caef0bc42bdb95d2a361ab0d (diff) |
Merge branch 'linus' into perf/urgent
Merge reason: we need to fix a non-trivial merge conflict.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
44 files changed, 1391 insertions, 1385 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 2d9de86b7e76..cb41b9547c9f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -27,7 +27,6 @@ obj-y += power/ | |||
27 | 27 | ||
28 | obj-$(CONFIG_FREEZER) += freezer.o | 28 | obj-$(CONFIG_FREEZER) += freezer.o |
29 | obj-$(CONFIG_PROFILING) += profile.o | 29 | obj-$(CONFIG_PROFILING) += profile.o |
30 | obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o | ||
31 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
32 | obj-y += time/ | 31 | obj-y += time/ |
33 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 32 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
diff --git a/kernel/audit.c b/kernel/audit.c index bb0eb5bb9a0a..1c7f2c61416b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1418,7 +1418,7 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | |||
1418 | 1418 | ||
1419 | /* This is a helper-function to print the escaped d_path */ | 1419 | /* This is a helper-function to print the escaped d_path */ |
1420 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, | 1420 | void audit_log_d_path(struct audit_buffer *ab, const char *prefix, |
1421 | struct path *path) | 1421 | const struct path *path) |
1422 | { | 1422 | { |
1423 | char *p, *pathname; | 1423 | char *p, *pathname; |
1424 | 1424 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a5d3b5325f77..f4ea4b6f3cf1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -818,7 +818,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
818 | 818 | ||
819 | for_each_subsys(cgrp->root, ss) | 819 | for_each_subsys(cgrp->root, ss) |
820 | if (ss->pre_destroy) { | 820 | if (ss->pre_destroy) { |
821 | ret = ss->pre_destroy(ss, cgrp); | 821 | ret = ss->pre_destroy(cgrp); |
822 | if (ret) | 822 | if (ret) |
823 | break; | 823 | break; |
824 | } | 824 | } |
@@ -846,7 +846,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
846 | * Release the subsystem state objects. | 846 | * Release the subsystem state objects. |
847 | */ | 847 | */ |
848 | for_each_subsys(cgrp->root, ss) | 848 | for_each_subsys(cgrp->root, ss) |
849 | ss->destroy(ss, cgrp); | 849 | ss->destroy(cgrp); |
850 | 850 | ||
851 | cgrp->root->number_of_cgroups--; | 851 | cgrp->root->number_of_cgroups--; |
852 | mutex_unlock(&cgroup_mutex); | 852 | mutex_unlock(&cgroup_mutex); |
@@ -1015,7 +1015,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1015 | list_move(&ss->sibling, &root->subsys_list); | 1015 | list_move(&ss->sibling, &root->subsys_list); |
1016 | ss->root = root; | 1016 | ss->root = root; |
1017 | if (ss->bind) | 1017 | if (ss->bind) |
1018 | ss->bind(ss, cgrp); | 1018 | ss->bind(cgrp); |
1019 | mutex_unlock(&ss->hierarchy_mutex); | 1019 | mutex_unlock(&ss->hierarchy_mutex); |
1020 | /* refcount was already taken, and we're keeping it */ | 1020 | /* refcount was already taken, and we're keeping it */ |
1021 | } else if (bit & removed_bits) { | 1021 | } else if (bit & removed_bits) { |
@@ -1025,7 +1025,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1025 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 1025 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
1026 | mutex_lock(&ss->hierarchy_mutex); | 1026 | mutex_lock(&ss->hierarchy_mutex); |
1027 | if (ss->bind) | 1027 | if (ss->bind) |
1028 | ss->bind(ss, dummytop); | 1028 | ss->bind(dummytop); |
1029 | dummytop->subsys[i]->cgroup = dummytop; | 1029 | dummytop->subsys[i]->cgroup = dummytop; |
1030 | cgrp->subsys[i] = NULL; | 1030 | cgrp->subsys[i] = NULL; |
1031 | subsys[i]->root = &rootnode; | 1031 | subsys[i]->root = &rootnode; |
@@ -1472,7 +1472,6 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1472 | 1472 | ||
1473 | struct inode *inode = | 1473 | struct inode *inode = |
1474 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); | 1474 | cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); |
1475 | struct dentry *dentry; | ||
1476 | 1475 | ||
1477 | if (!inode) | 1476 | if (!inode) |
1478 | return -ENOMEM; | 1477 | return -ENOMEM; |
@@ -1481,12 +1480,9 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1481 | inode->i_op = &cgroup_dir_inode_operations; | 1480 | inode->i_op = &cgroup_dir_inode_operations; |
1482 | /* directories start off with i_nlink == 2 (for "." entry) */ | 1481 | /* directories start off with i_nlink == 2 (for "." entry) */ |
1483 | inc_nlink(inode); | 1482 | inc_nlink(inode); |
1484 | dentry = d_alloc_root(inode); | 1483 | sb->s_root = d_make_root(inode); |
1485 | if (!dentry) { | 1484 | if (!sb->s_root) |
1486 | iput(inode); | ||
1487 | return -ENOMEM; | 1485 | return -ENOMEM; |
1488 | } | ||
1489 | sb->s_root = dentry; | ||
1490 | /* for everything else we want ->d_op set */ | 1486 | /* for everything else we want ->d_op set */ |
1491 | sb->s_d_op = &cgroup_dops; | 1487 | sb->s_d_op = &cgroup_dops; |
1492 | return 0; | 1488 | return 0; |
@@ -1763,6 +1759,7 @@ EXPORT_SYMBOL_GPL(cgroup_path); | |||
1763 | struct task_and_cgroup { | 1759 | struct task_and_cgroup { |
1764 | struct task_struct *task; | 1760 | struct task_struct *task; |
1765 | struct cgroup *cgrp; | 1761 | struct cgroup *cgrp; |
1762 | struct css_set *cg; | ||
1766 | }; | 1763 | }; |
1767 | 1764 | ||
1768 | struct cgroup_taskset { | 1765 | struct cgroup_taskset { |
@@ -1843,11 +1840,10 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); | |||
1843 | * will already exist. If not set, this function might sleep, and can fail with | 1840 | * will already exist. If not set, this function might sleep, and can fail with |
1844 | * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. | 1841 | * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. |
1845 | */ | 1842 | */ |
1846 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | 1843 | static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, |
1847 | struct task_struct *tsk, bool guarantee) | 1844 | struct task_struct *tsk, struct css_set *newcg) |
1848 | { | 1845 | { |
1849 | struct css_set *oldcg; | 1846 | struct css_set *oldcg; |
1850 | struct css_set *newcg; | ||
1851 | 1847 | ||
1852 | /* | 1848 | /* |
1853 | * We are synchronized through threadgroup_lock() against PF_EXITING | 1849 | * We are synchronized through threadgroup_lock() against PF_EXITING |
@@ -1857,23 +1853,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
1857 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 1853 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
1858 | oldcg = tsk->cgroups; | 1854 | oldcg = tsk->cgroups; |
1859 | 1855 | ||
1860 | /* locate or allocate a new css_set for this task. */ | ||
1861 | if (guarantee) { | ||
1862 | /* we know the css_set we want already exists. */ | ||
1863 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1864 | read_lock(&css_set_lock); | ||
1865 | newcg = find_existing_css_set(oldcg, cgrp, template); | ||
1866 | BUG_ON(!newcg); | ||
1867 | get_css_set(newcg); | ||
1868 | read_unlock(&css_set_lock); | ||
1869 | } else { | ||
1870 | might_sleep(); | ||
1871 | /* find_css_set will give us newcg already referenced. */ | ||
1872 | newcg = find_css_set(oldcg, cgrp); | ||
1873 | if (!newcg) | ||
1874 | return -ENOMEM; | ||
1875 | } | ||
1876 | |||
1877 | task_lock(tsk); | 1856 | task_lock(tsk); |
1878 | rcu_assign_pointer(tsk->cgroups, newcg); | 1857 | rcu_assign_pointer(tsk->cgroups, newcg); |
1879 | task_unlock(tsk); | 1858 | task_unlock(tsk); |
@@ -1892,7 +1871,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
1892 | put_css_set(oldcg); | 1871 | put_css_set(oldcg); |
1893 | 1872 | ||
1894 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1873 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1895 | return 0; | ||
1896 | } | 1874 | } |
1897 | 1875 | ||
1898 | /** | 1876 | /** |
@@ -1910,6 +1888,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1910 | struct cgroup *oldcgrp; | 1888 | struct cgroup *oldcgrp; |
1911 | struct cgroupfs_root *root = cgrp->root; | 1889 | struct cgroupfs_root *root = cgrp->root; |
1912 | struct cgroup_taskset tset = { }; | 1890 | struct cgroup_taskset tset = { }; |
1891 | struct css_set *newcg; | ||
1913 | 1892 | ||
1914 | /* @tsk either already exited or can't exit until the end */ | 1893 | /* @tsk either already exited or can't exit until the end */ |
1915 | if (tsk->flags & PF_EXITING) | 1894 | if (tsk->flags & PF_EXITING) |
@@ -1925,7 +1904,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1925 | 1904 | ||
1926 | for_each_subsys(root, ss) { | 1905 | for_each_subsys(root, ss) { |
1927 | if (ss->can_attach) { | 1906 | if (ss->can_attach) { |
1928 | retval = ss->can_attach(ss, cgrp, &tset); | 1907 | retval = ss->can_attach(cgrp, &tset); |
1929 | if (retval) { | 1908 | if (retval) { |
1930 | /* | 1909 | /* |
1931 | * Remember on which subsystem the can_attach() | 1910 | * Remember on which subsystem the can_attach() |
@@ -1939,13 +1918,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1939 | } | 1918 | } |
1940 | } | 1919 | } |
1941 | 1920 | ||
1942 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); | 1921 | newcg = find_css_set(tsk->cgroups, cgrp); |
1943 | if (retval) | 1922 | if (!newcg) { |
1923 | retval = -ENOMEM; | ||
1944 | goto out; | 1924 | goto out; |
1925 | } | ||
1926 | |||
1927 | cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); | ||
1945 | 1928 | ||
1946 | for_each_subsys(root, ss) { | 1929 | for_each_subsys(root, ss) { |
1947 | if (ss->attach) | 1930 | if (ss->attach) |
1948 | ss->attach(ss, cgrp, &tset); | 1931 | ss->attach(cgrp, &tset); |
1949 | } | 1932 | } |
1950 | 1933 | ||
1951 | synchronize_rcu(); | 1934 | synchronize_rcu(); |
@@ -1967,7 +1950,7 @@ out: | |||
1967 | */ | 1950 | */ |
1968 | break; | 1951 | break; |
1969 | if (ss->cancel_attach) | 1952 | if (ss->cancel_attach) |
1970 | ss->cancel_attach(ss, cgrp, &tset); | 1953 | ss->cancel_attach(cgrp, &tset); |
1971 | } | 1954 | } |
1972 | } | 1955 | } |
1973 | return retval; | 1956 | return retval; |
@@ -1997,66 +1980,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
1997 | } | 1980 | } |
1998 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 1981 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
1999 | 1982 | ||
2000 | /* | ||
2001 | * cgroup_attach_proc works in two stages, the first of which prefetches all | ||
2002 | * new css_sets needed (to make sure we have enough memory before committing | ||
2003 | * to the move) and stores them in a list of entries of the following type. | ||
2004 | * TODO: possible optimization: use css_set->rcu_head for chaining instead | ||
2005 | */ | ||
2006 | struct cg_list_entry { | ||
2007 | struct css_set *cg; | ||
2008 | struct list_head links; | ||
2009 | }; | ||
2010 | |||
2011 | static bool css_set_check_fetched(struct cgroup *cgrp, | ||
2012 | struct task_struct *tsk, struct css_set *cg, | ||
2013 | struct list_head *newcg_list) | ||
2014 | { | ||
2015 | struct css_set *newcg; | ||
2016 | struct cg_list_entry *cg_entry; | ||
2017 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
2018 | |||
2019 | read_lock(&css_set_lock); | ||
2020 | newcg = find_existing_css_set(cg, cgrp, template); | ||
2021 | read_unlock(&css_set_lock); | ||
2022 | |||
2023 | /* doesn't exist at all? */ | ||
2024 | if (!newcg) | ||
2025 | return false; | ||
2026 | /* see if it's already in the list */ | ||
2027 | list_for_each_entry(cg_entry, newcg_list, links) | ||
2028 | if (cg_entry->cg == newcg) | ||
2029 | return true; | ||
2030 | |||
2031 | /* not found */ | ||
2032 | return false; | ||
2033 | } | ||
2034 | |||
2035 | /* | ||
2036 | * Find the new css_set and store it in the list in preparation for moving the | ||
2037 | * given task to the given cgroup. Returns 0 or -ENOMEM. | ||
2038 | */ | ||
2039 | static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | ||
2040 | struct list_head *newcg_list) | ||
2041 | { | ||
2042 | struct css_set *newcg; | ||
2043 | struct cg_list_entry *cg_entry; | ||
2044 | |||
2045 | /* ensure a new css_set will exist for this thread */ | ||
2046 | newcg = find_css_set(cg, cgrp); | ||
2047 | if (!newcg) | ||
2048 | return -ENOMEM; | ||
2049 | /* add it to the list */ | ||
2050 | cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); | ||
2051 | if (!cg_entry) { | ||
2052 | put_css_set(newcg); | ||
2053 | return -ENOMEM; | ||
2054 | } | ||
2055 | cg_entry->cg = newcg; | ||
2056 | list_add(&cg_entry->links, newcg_list); | ||
2057 | return 0; | ||
2058 | } | ||
2059 | |||
2060 | /** | 1983 | /** |
2061 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | 1984 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup |
2062 | * @cgrp: the cgroup to attach to | 1985 | * @cgrp: the cgroup to attach to |
@@ -2070,20 +1993,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2070 | int retval, i, group_size; | 1993 | int retval, i, group_size; |
2071 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1994 | struct cgroup_subsys *ss, *failed_ss = NULL; |
2072 | /* guaranteed to be initialized later, but the compiler needs this */ | 1995 | /* guaranteed to be initialized later, but the compiler needs this */ |
2073 | struct css_set *oldcg; | ||
2074 | struct cgroupfs_root *root = cgrp->root; | 1996 | struct cgroupfs_root *root = cgrp->root; |
2075 | /* threadgroup list cursor and array */ | 1997 | /* threadgroup list cursor and array */ |
2076 | struct task_struct *tsk; | 1998 | struct task_struct *tsk; |
2077 | struct task_and_cgroup *tc; | 1999 | struct task_and_cgroup *tc; |
2078 | struct flex_array *group; | 2000 | struct flex_array *group; |
2079 | struct cgroup_taskset tset = { }; | 2001 | struct cgroup_taskset tset = { }; |
2080 | /* | ||
2081 | * we need to make sure we have css_sets for all the tasks we're | ||
2082 | * going to move -before- we actually start moving them, so that in | ||
2083 | * case we get an ENOMEM we can bail out before making any changes. | ||
2084 | */ | ||
2085 | struct list_head newcg_list; | ||
2086 | struct cg_list_entry *cg_entry, *temp_nobe; | ||
2087 | 2002 | ||
2088 | /* | 2003 | /* |
2089 | * step 0: in order to do expensive, possibly blocking operations for | 2004 | * step 0: in order to do expensive, possibly blocking operations for |
@@ -2102,23 +2017,14 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2102 | if (retval) | 2017 | if (retval) |
2103 | goto out_free_group_list; | 2018 | goto out_free_group_list; |
2104 | 2019 | ||
2105 | /* prevent changes to the threadgroup list while we take a snapshot. */ | ||
2106 | read_lock(&tasklist_lock); | ||
2107 | if (!thread_group_leader(leader)) { | ||
2108 | /* | ||
2109 | * a race with de_thread from another thread's exec() may strip | ||
2110 | * us of our leadership, making while_each_thread unsafe to use | ||
2111 | * on this task. if this happens, there is no choice but to | ||
2112 | * throw this task away and try again (from cgroup_procs_write); | ||
2113 | * this is "double-double-toil-and-trouble-check locking". | ||
2114 | */ | ||
2115 | read_unlock(&tasklist_lock); | ||
2116 | retval = -EAGAIN; | ||
2117 | goto out_free_group_list; | ||
2118 | } | ||
2119 | |||
2120 | tsk = leader; | 2020 | tsk = leader; |
2121 | i = 0; | 2021 | i = 0; |
2022 | /* | ||
2023 | * Prevent freeing of tasks while we take a snapshot. Tasks that are | ||
2024 | * already PF_EXITING could be freed from underneath us unless we | ||
2025 | * take an rcu_read_lock. | ||
2026 | */ | ||
2027 | rcu_read_lock(); | ||
2122 | do { | 2028 | do { |
2123 | struct task_and_cgroup ent; | 2029 | struct task_and_cgroup ent; |
2124 | 2030 | ||
@@ -2128,24 +2034,24 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2128 | 2034 | ||
2129 | /* as per above, nr_threads may decrease, but not increase. */ | 2035 | /* as per above, nr_threads may decrease, but not increase. */ |
2130 | BUG_ON(i >= group_size); | 2036 | BUG_ON(i >= group_size); |
2131 | /* | ||
2132 | * saying GFP_ATOMIC has no effect here because we did prealloc | ||
2133 | * earlier, but it's good form to communicate our expectations. | ||
2134 | */ | ||
2135 | ent.task = tsk; | 2037 | ent.task = tsk; |
2136 | ent.cgrp = task_cgroup_from_root(tsk, root); | 2038 | ent.cgrp = task_cgroup_from_root(tsk, root); |
2137 | /* nothing to do if this task is already in the cgroup */ | 2039 | /* nothing to do if this task is already in the cgroup */ |
2138 | if (ent.cgrp == cgrp) | 2040 | if (ent.cgrp == cgrp) |
2139 | continue; | 2041 | continue; |
2042 | /* | ||
2043 | * saying GFP_ATOMIC has no effect here because we did prealloc | ||
2044 | * earlier, but it's good form to communicate our expectations. | ||
2045 | */ | ||
2140 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | 2046 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); |
2141 | BUG_ON(retval != 0); | 2047 | BUG_ON(retval != 0); |
2142 | i++; | 2048 | i++; |
2143 | } while_each_thread(leader, tsk); | 2049 | } while_each_thread(leader, tsk); |
2050 | rcu_read_unlock(); | ||
2144 | /* remember the number of threads in the array for later. */ | 2051 | /* remember the number of threads in the array for later. */ |
2145 | group_size = i; | 2052 | group_size = i; |
2146 | tset.tc_array = group; | 2053 | tset.tc_array = group; |
2147 | tset.tc_array_len = group_size; | 2054 | tset.tc_array_len = group_size; |
2148 | read_unlock(&tasklist_lock); | ||
2149 | 2055 | ||
2150 | /* methods shouldn't be called if no task is actually migrating */ | 2056 | /* methods shouldn't be called if no task is actually migrating */ |
2151 | retval = 0; | 2057 | retval = 0; |
@@ -2157,7 +2063,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2157 | */ | 2063 | */ |
2158 | for_each_subsys(root, ss) { | 2064 | for_each_subsys(root, ss) { |
2159 | if (ss->can_attach) { | 2065 | if (ss->can_attach) { |
2160 | retval = ss->can_attach(ss, cgrp, &tset); | 2066 | retval = ss->can_attach(cgrp, &tset); |
2161 | if (retval) { | 2067 | if (retval) { |
2162 | failed_ss = ss; | 2068 | failed_ss = ss; |
2163 | goto out_cancel_attach; | 2069 | goto out_cancel_attach; |
@@ -2169,17 +2075,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2169 | * step 2: make sure css_sets exist for all threads to be migrated. | 2075 | * step 2: make sure css_sets exist for all threads to be migrated. |
2170 | * we use find_css_set, which allocates a new one if necessary. | 2076 | * we use find_css_set, which allocates a new one if necessary. |
2171 | */ | 2077 | */ |
2172 | INIT_LIST_HEAD(&newcg_list); | ||
2173 | for (i = 0; i < group_size; i++) { | 2078 | for (i = 0; i < group_size; i++) { |
2174 | tc = flex_array_get(group, i); | 2079 | tc = flex_array_get(group, i); |
2175 | oldcg = tc->task->cgroups; | 2080 | tc->cg = find_css_set(tc->task->cgroups, cgrp); |
2176 | 2081 | if (!tc->cg) { | |
2177 | /* if we don't already have it in the list get a new one */ | 2082 | retval = -ENOMEM; |
2178 | if (!css_set_check_fetched(cgrp, tc->task, oldcg, | 2083 | goto out_put_css_set_refs; |
2179 | &newcg_list)) { | ||
2180 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | ||
2181 | if (retval) | ||
2182 | goto out_list_teardown; | ||
2183 | } | 2084 | } |
2184 | } | 2085 | } |
2185 | 2086 | ||
@@ -2190,8 +2091,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2190 | */ | 2091 | */ |
2191 | for (i = 0; i < group_size; i++) { | 2092 | for (i = 0; i < group_size; i++) { |
2192 | tc = flex_array_get(group, i); | 2093 | tc = flex_array_get(group, i); |
2193 | retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); | 2094 | cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg); |
2194 | BUG_ON(retval); | ||
2195 | } | 2095 | } |
2196 | /* nothing is sensitive to fork() after this point. */ | 2096 | /* nothing is sensitive to fork() after this point. */ |
2197 | 2097 | ||
@@ -2200,7 +2100,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2200 | */ | 2100 | */ |
2201 | for_each_subsys(root, ss) { | 2101 | for_each_subsys(root, ss) { |
2202 | if (ss->attach) | 2102 | if (ss->attach) |
2203 | ss->attach(ss, cgrp, &tset); | 2103 | ss->attach(cgrp, &tset); |
2204 | } | 2104 | } |
2205 | 2105 | ||
2206 | /* | 2106 | /* |
@@ -2209,21 +2109,22 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2209 | synchronize_rcu(); | 2109 | synchronize_rcu(); |
2210 | cgroup_wakeup_rmdir_waiter(cgrp); | 2110 | cgroup_wakeup_rmdir_waiter(cgrp); |
2211 | retval = 0; | 2111 | retval = 0; |
2212 | out_list_teardown: | 2112 | out_put_css_set_refs: |
2213 | /* clean up the list of prefetched css_sets. */ | 2113 | if (retval) { |
2214 | list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { | 2114 | for (i = 0; i < group_size; i++) { |
2215 | list_del(&cg_entry->links); | 2115 | tc = flex_array_get(group, i); |
2216 | put_css_set(cg_entry->cg); | 2116 | if (!tc->cg) |
2217 | kfree(cg_entry); | 2117 | break; |
2118 | put_css_set(tc->cg); | ||
2119 | } | ||
2218 | } | 2120 | } |
2219 | out_cancel_attach: | 2121 | out_cancel_attach: |
2220 | /* same deal as in cgroup_attach_task */ | ||
2221 | if (retval) { | 2122 | if (retval) { |
2222 | for_each_subsys(root, ss) { | 2123 | for_each_subsys(root, ss) { |
2223 | if (ss == failed_ss) | 2124 | if (ss == failed_ss) |
2224 | break; | 2125 | break; |
2225 | if (ss->cancel_attach) | 2126 | if (ss->cancel_attach) |
2226 | ss->cancel_attach(ss, cgrp, &tset); | 2127 | ss->cancel_attach(cgrp, &tset); |
2227 | } | 2128 | } |
2228 | } | 2129 | } |
2229 | out_free_group_list: | 2130 | out_free_group_list: |
@@ -2245,22 +2146,14 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
2245 | if (!cgroup_lock_live_group(cgrp)) | 2146 | if (!cgroup_lock_live_group(cgrp)) |
2246 | return -ENODEV; | 2147 | return -ENODEV; |
2247 | 2148 | ||
2149 | retry_find_task: | ||
2150 | rcu_read_lock(); | ||
2248 | if (pid) { | 2151 | if (pid) { |
2249 | rcu_read_lock(); | ||
2250 | tsk = find_task_by_vpid(pid); | 2152 | tsk = find_task_by_vpid(pid); |
2251 | if (!tsk) { | 2153 | if (!tsk) { |
2252 | rcu_read_unlock(); | 2154 | rcu_read_unlock(); |
2253 | cgroup_unlock(); | 2155 | ret= -ESRCH; |
2254 | return -ESRCH; | 2156 | goto out_unlock_cgroup; |
2255 | } | ||
2256 | if (threadgroup) { | ||
2257 | /* | ||
2258 | * RCU protects this access, since tsk was found in the | ||
2259 | * tid map. a race with de_thread may cause group_leader | ||
2260 | * to stop being the leader, but cgroup_attach_proc will | ||
2261 | * detect it later. | ||
2262 | */ | ||
2263 | tsk = tsk->group_leader; | ||
2264 | } | 2157 | } |
2265 | /* | 2158 | /* |
2266 | * even if we're attaching all tasks in the thread group, we | 2159 | * even if we're attaching all tasks in the thread group, we |
@@ -2271,29 +2164,38 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
2271 | cred->euid != tcred->uid && | 2164 | cred->euid != tcred->uid && |
2272 | cred->euid != tcred->suid) { | 2165 | cred->euid != tcred->suid) { |
2273 | rcu_read_unlock(); | 2166 | rcu_read_unlock(); |
2274 | cgroup_unlock(); | 2167 | ret = -EACCES; |
2275 | return -EACCES; | 2168 | goto out_unlock_cgroup; |
2276 | } | 2169 | } |
2277 | get_task_struct(tsk); | 2170 | } else |
2278 | rcu_read_unlock(); | 2171 | tsk = current; |
2279 | } else { | ||
2280 | if (threadgroup) | ||
2281 | tsk = current->group_leader; | ||
2282 | else | ||
2283 | tsk = current; | ||
2284 | get_task_struct(tsk); | ||
2285 | } | ||
2286 | |||
2287 | threadgroup_lock(tsk); | ||
2288 | 2172 | ||
2289 | if (threadgroup) | 2173 | if (threadgroup) |
2174 | tsk = tsk->group_leader; | ||
2175 | get_task_struct(tsk); | ||
2176 | rcu_read_unlock(); | ||
2177 | |||
2178 | threadgroup_lock(tsk); | ||
2179 | if (threadgroup) { | ||
2180 | if (!thread_group_leader(tsk)) { | ||
2181 | /* | ||
2182 | * a race with de_thread from another thread's exec() | ||
2183 | * may strip us of our leadership, if this happens, | ||
2184 | * there is no choice but to throw this task away and | ||
2185 | * try again; this is | ||
2186 | * "double-double-toil-and-trouble-check locking". | ||
2187 | */ | ||
2188 | threadgroup_unlock(tsk); | ||
2189 | put_task_struct(tsk); | ||
2190 | goto retry_find_task; | ||
2191 | } | ||
2290 | ret = cgroup_attach_proc(cgrp, tsk); | 2192 | ret = cgroup_attach_proc(cgrp, tsk); |
2291 | else | 2193 | } else |
2292 | ret = cgroup_attach_task(cgrp, tsk); | 2194 | ret = cgroup_attach_task(cgrp, tsk); |
2293 | |||
2294 | threadgroup_unlock(tsk); | 2195 | threadgroup_unlock(tsk); |
2295 | 2196 | ||
2296 | put_task_struct(tsk); | 2197 | put_task_struct(tsk); |
2198 | out_unlock_cgroup: | ||
2297 | cgroup_unlock(); | 2199 | cgroup_unlock(); |
2298 | return ret; | 2200 | return ret; |
2299 | } | 2201 | } |
@@ -2305,16 +2207,7 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | |||
2305 | 2207 | ||
2306 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | 2208 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) |
2307 | { | 2209 | { |
2308 | int ret; | 2210 | return attach_task_by_pid(cgrp, tgid, true); |
2309 | do { | ||
2310 | /* | ||
2311 | * attach_proc fails with -EAGAIN if threadgroup leadership | ||
2312 | * changes in the middle of the operation, in which case we need | ||
2313 | * to find the task_struct for the new leader and start over. | ||
2314 | */ | ||
2315 | ret = attach_task_by_pid(cgrp, tgid, true); | ||
2316 | } while (ret == -EAGAIN); | ||
2317 | return ret; | ||
2318 | } | 2211 | } |
2319 | 2212 | ||
2320 | /** | 2213 | /** |
@@ -2804,15 +2697,20 @@ static void cgroup_advance_iter(struct cgroup *cgrp, | |||
2804 | * using their cgroups capability, we don't maintain the lists running | 2697 | * using their cgroups capability, we don't maintain the lists running |
2805 | * through each css_set to its tasks until we see the list actually | 2698 | * through each css_set to its tasks until we see the list actually |
2806 | * used - in other words after the first call to cgroup_iter_start(). | 2699 | * used - in other words after the first call to cgroup_iter_start(). |
2807 | * | ||
2808 | * The tasklist_lock is not held here, as do_each_thread() and | ||
2809 | * while_each_thread() are protected by RCU. | ||
2810 | */ | 2700 | */ |
2811 | static void cgroup_enable_task_cg_lists(void) | 2701 | static void cgroup_enable_task_cg_lists(void) |
2812 | { | 2702 | { |
2813 | struct task_struct *p, *g; | 2703 | struct task_struct *p, *g; |
2814 | write_lock(&css_set_lock); | 2704 | write_lock(&css_set_lock); |
2815 | use_task_css_set_links = 1; | 2705 | use_task_css_set_links = 1; |
2706 | /* | ||
2707 | * We need tasklist_lock because RCU is not safe against | ||
2708 | * while_each_thread(). Besides, a forking task that has passed | ||
2709 | * cgroup_post_fork() without seeing use_task_css_set_links = 1 | ||
2710 | * is not guaranteed to have its child immediately visible in the | ||
2711 | * tasklist if we walk through it with RCU. | ||
2712 | */ | ||
2713 | read_lock(&tasklist_lock); | ||
2816 | do_each_thread(g, p) { | 2714 | do_each_thread(g, p) { |
2817 | task_lock(p); | 2715 | task_lock(p); |
2818 | /* | 2716 | /* |
@@ -2824,6 +2722,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
2824 | list_add(&p->cg_list, &p->cgroups->tasks); | 2722 | list_add(&p->cg_list, &p->cgroups->tasks); |
2825 | task_unlock(p); | 2723 | task_unlock(p); |
2826 | } while_each_thread(g, p); | 2724 | } while_each_thread(g, p); |
2725 | read_unlock(&tasklist_lock); | ||
2827 | write_unlock(&css_set_lock); | 2726 | write_unlock(&css_set_lock); |
2828 | } | 2727 | } |
2829 | 2728 | ||
@@ -3043,6 +2942,38 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
3043 | * | 2942 | * |
3044 | */ | 2943 | */ |
3045 | 2944 | ||
2945 | /* which pidlist file are we talking about? */ | ||
2946 | enum cgroup_filetype { | ||
2947 | CGROUP_FILE_PROCS, | ||
2948 | CGROUP_FILE_TASKS, | ||
2949 | }; | ||
2950 | |||
2951 | /* | ||
2952 | * A pidlist is a list of pids that virtually represents the contents of one | ||
2953 | * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, | ||
2954 | * a pair (one each for procs, tasks) for each pid namespace that's relevant | ||
2955 | * to the cgroup. | ||
2956 | */ | ||
2957 | struct cgroup_pidlist { | ||
2958 | /* | ||
2959 | * used to find which pidlist is wanted. doesn't change as long as | ||
2960 | * this particular list stays in the list. | ||
2961 | */ | ||
2962 | struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; | ||
2963 | /* array of xids */ | ||
2964 | pid_t *list; | ||
2965 | /* how many elements the above list has */ | ||
2966 | int length; | ||
2967 | /* how many files are using the current array */ | ||
2968 | int use_count; | ||
2969 | /* each of these stored in a list by its cgroup */ | ||
2970 | struct list_head links; | ||
2971 | /* pointer to the cgroup we belong to, for list removal purposes */ | ||
2972 | struct cgroup *owner; | ||
2973 | /* protects the other fields */ | ||
2974 | struct rw_semaphore mutex; | ||
2975 | }; | ||
2976 | |||
3046 | /* | 2977 | /* |
3047 | * The following two functions "fix" the issue where there are more pids | 2978 | * The following two functions "fix" the issue where there are more pids |
3048 | * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. | 2979 | * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. |
@@ -3827,7 +3758,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3827 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | 3758 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); |
3828 | 3759 | ||
3829 | for_each_subsys(root, ss) { | 3760 | for_each_subsys(root, ss) { |
3830 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 3761 | struct cgroup_subsys_state *css = ss->create(cgrp); |
3831 | 3762 | ||
3832 | if (IS_ERR(css)) { | 3763 | if (IS_ERR(css)) { |
3833 | err = PTR_ERR(css); | 3764 | err = PTR_ERR(css); |
@@ -3841,7 +3772,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3841 | } | 3772 | } |
3842 | /* At error, ->destroy() callback has to free assigned ID. */ | 3773 | /* At error, ->destroy() callback has to free assigned ID. */ |
3843 | if (clone_children(parent) && ss->post_clone) | 3774 | if (clone_children(parent) && ss->post_clone) |
3844 | ss->post_clone(ss, cgrp); | 3775 | ss->post_clone(cgrp); |
3845 | } | 3776 | } |
3846 | 3777 | ||
3847 | cgroup_lock_hierarchy(root); | 3778 | cgroup_lock_hierarchy(root); |
@@ -3875,7 +3806,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3875 | 3806 | ||
3876 | for_each_subsys(root, ss) { | 3807 | for_each_subsys(root, ss) { |
3877 | if (cgrp->subsys[ss->subsys_id]) | 3808 | if (cgrp->subsys[ss->subsys_id]) |
3878 | ss->destroy(ss, cgrp); | 3809 | ss->destroy(cgrp); |
3879 | } | 3810 | } |
3880 | 3811 | ||
3881 | mutex_unlock(&cgroup_mutex); | 3812 | mutex_unlock(&cgroup_mutex); |
@@ -4099,7 +4030,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
4099 | /* Create the top cgroup state for this subsystem */ | 4030 | /* Create the top cgroup state for this subsystem */ |
4100 | list_add(&ss->sibling, &rootnode.subsys_list); | 4031 | list_add(&ss->sibling, &rootnode.subsys_list); |
4101 | ss->root = &rootnode; | 4032 | ss->root = &rootnode; |
4102 | css = ss->create(ss, dummytop); | 4033 | css = ss->create(dummytop); |
4103 | /* We don't handle early failures gracefully */ | 4034 | /* We don't handle early failures gracefully */ |
4104 | BUG_ON(IS_ERR(css)); | 4035 | BUG_ON(IS_ERR(css)); |
4105 | init_cgroup_css(css, ss, dummytop); | 4036 | init_cgroup_css(css, ss, dummytop); |
@@ -4188,7 +4119,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4188 | * no ss->create seems to need anything important in the ss struct, so | 4119 | * no ss->create seems to need anything important in the ss struct, so |
4189 | * this can happen first (i.e. before the rootnode attachment). | 4120 | * this can happen first (i.e. before the rootnode attachment). |
4190 | */ | 4121 | */ |
4191 | css = ss->create(ss, dummytop); | 4122 | css = ss->create(dummytop); |
4192 | if (IS_ERR(css)) { | 4123 | if (IS_ERR(css)) { |
4193 | /* failure case - need to deassign the subsys[] slot. */ | 4124 | /* failure case - need to deassign the subsys[] slot. */ |
4194 | subsys[i] = NULL; | 4125 | subsys[i] = NULL; |
@@ -4206,7 +4137,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4206 | int ret = cgroup_init_idr(ss, css); | 4137 | int ret = cgroup_init_idr(ss, css); |
4207 | if (ret) { | 4138 | if (ret) { |
4208 | dummytop->subsys[ss->subsys_id] = NULL; | 4139 | dummytop->subsys[ss->subsys_id] = NULL; |
4209 | ss->destroy(ss, dummytop); | 4140 | ss->destroy(dummytop); |
4210 | subsys[i] = NULL; | 4141 | subsys[i] = NULL; |
4211 | mutex_unlock(&cgroup_mutex); | 4142 | mutex_unlock(&cgroup_mutex); |
4212 | return ret; | 4143 | return ret; |
@@ -4304,7 +4235,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4304 | * pointer to find their state. note that this also takes care of | 4235 | * pointer to find their state. note that this also takes care of |
4305 | * freeing the css_id. | 4236 | * freeing the css_id. |
4306 | */ | 4237 | */ |
4307 | ss->destroy(ss, dummytop); | 4238 | ss->destroy(dummytop); |
4308 | dummytop->subsys[ss->subsys_id] = NULL; | 4239 | dummytop->subsys[ss->subsys_id] = NULL; |
4309 | 4240 | ||
4310 | mutex_unlock(&cgroup_mutex); | 4241 | mutex_unlock(&cgroup_mutex); |
@@ -4580,7 +4511,7 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
4580 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | 4511 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { |
4581 | struct cgroup_subsys *ss = subsys[i]; | 4512 | struct cgroup_subsys *ss = subsys[i]; |
4582 | if (ss->fork) | 4513 | if (ss->fork) |
4583 | ss->fork(ss, child); | 4514 | ss->fork(child); |
4584 | } | 4515 | } |
4585 | } | 4516 | } |
4586 | } | 4517 | } |
@@ -4596,6 +4527,17 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
4596 | */ | 4527 | */ |
4597 | void cgroup_post_fork(struct task_struct *child) | 4528 | void cgroup_post_fork(struct task_struct *child) |
4598 | { | 4529 | { |
4530 | /* | ||
4531 | * use_task_css_set_links is set to 1 before we walk the tasklist | ||
4532 | * under the tasklist_lock and we read it here after we added the child | ||
4533 | * to the tasklist under the tasklist_lock as well. If the child wasn't | ||
4534 | * yet in the tasklist when we walked through it from | ||
4535 | * cgroup_enable_task_cg_lists(), then use_task_css_set_links value | ||
4536 | * should be visible now due to the paired locking and barriers implied | ||
4537 | * by LOCK/UNLOCK: it is written before the tasklist_lock unlock | ||
4538 | * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock | ||
4539 | * lock on fork. | ||
4540 | */ | ||
4599 | if (use_task_css_set_links) { | 4541 | if (use_task_css_set_links) { |
4600 | write_lock(&css_set_lock); | 4542 | write_lock(&css_set_lock); |
4601 | if (list_empty(&child->cg_list)) { | 4543 | if (list_empty(&child->cg_list)) { |
@@ -4682,7 +4624,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4682 | struct cgroup *old_cgrp = | 4624 | struct cgroup *old_cgrp = |
4683 | rcu_dereference_raw(cg->subsys[i])->cgroup; | 4625 | rcu_dereference_raw(cg->subsys[i])->cgroup; |
4684 | struct cgroup *cgrp = task_cgroup(tsk, i); | 4626 | struct cgroup *cgrp = task_cgroup(tsk, i); |
4685 | ss->exit(ss, cgrp, old_cgrp, tsk); | 4627 | ss->exit(cgrp, old_cgrp, tsk); |
4686 | } | 4628 | } |
4687 | } | 4629 | } |
4688 | } | 4630 | } |
@@ -4939,9 +4881,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | |||
4939 | 4881 | ||
4940 | rcu_assign_pointer(id->css, NULL); | 4882 | rcu_assign_pointer(id->css, NULL); |
4941 | rcu_assign_pointer(css->id, NULL); | 4883 | rcu_assign_pointer(css->id, NULL); |
4942 | write_lock(&ss->id_lock); | 4884 | spin_lock(&ss->id_lock); |
4943 | idr_remove(&ss->idr, id->id); | 4885 | idr_remove(&ss->idr, id->id); |
4944 | write_unlock(&ss->id_lock); | 4886 | spin_unlock(&ss->id_lock); |
4945 | kfree_rcu(id, rcu_head); | 4887 | kfree_rcu(id, rcu_head); |
4946 | } | 4888 | } |
4947 | EXPORT_SYMBOL_GPL(free_css_id); | 4889 | EXPORT_SYMBOL_GPL(free_css_id); |
@@ -4967,10 +4909,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
4967 | error = -ENOMEM; | 4909 | error = -ENOMEM; |
4968 | goto err_out; | 4910 | goto err_out; |
4969 | } | 4911 | } |
4970 | write_lock(&ss->id_lock); | 4912 | spin_lock(&ss->id_lock); |
4971 | /* Don't use 0. allocates an ID of 1-65535 */ | 4913 | /* Don't use 0. allocates an ID of 1-65535 */ |
4972 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | 4914 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); |
4973 | write_unlock(&ss->id_lock); | 4915 | spin_unlock(&ss->id_lock); |
4974 | 4916 | ||
4975 | /* Returns error when there are no free spaces for new ID.*/ | 4917 | /* Returns error when there are no free spaces for new ID.*/ |
4976 | if (error) { | 4918 | if (error) { |
@@ -4985,9 +4927,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
4985 | return newid; | 4927 | return newid; |
4986 | remove_idr: | 4928 | remove_idr: |
4987 | error = -ENOSPC; | 4929 | error = -ENOSPC; |
4988 | write_lock(&ss->id_lock); | 4930 | spin_lock(&ss->id_lock); |
4989 | idr_remove(&ss->idr, myid); | 4931 | idr_remove(&ss->idr, myid); |
4990 | write_unlock(&ss->id_lock); | 4932 | spin_unlock(&ss->id_lock); |
4991 | err_out: | 4933 | err_out: |
4992 | kfree(newid); | 4934 | kfree(newid); |
4993 | return ERR_PTR(error); | 4935 | return ERR_PTR(error); |
@@ -4999,7 +4941,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
4999 | { | 4941 | { |
5000 | struct css_id *newid; | 4942 | struct css_id *newid; |
5001 | 4943 | ||
5002 | rwlock_init(&ss->id_lock); | 4944 | spin_lock_init(&ss->id_lock); |
5003 | idr_init(&ss->idr); | 4945 | idr_init(&ss->idr); |
5004 | 4946 | ||
5005 | newid = get_new_cssid(ss, 0); | 4947 | newid = get_new_cssid(ss, 0); |
@@ -5087,6 +5029,8 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
5087 | return NULL; | 5029 | return NULL; |
5088 | 5030 | ||
5089 | BUG_ON(!ss->use_id); | 5031 | BUG_ON(!ss->use_id); |
5032 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
5033 | |||
5090 | /* fill start point for scan */ | 5034 | /* fill start point for scan */ |
5091 | tmpid = id; | 5035 | tmpid = id; |
5092 | while (1) { | 5036 | while (1) { |
@@ -5094,10 +5038,7 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
5094 | * scan next entry from bitmap(tree), tmpid is updated after | 5038 | * scan next entry from bitmap(tree), tmpid is updated after |
5095 | * idr_get_next(). | 5039 | * idr_get_next(). |
5096 | */ | 5040 | */ |
5097 | read_lock(&ss->id_lock); | ||
5098 | tmp = idr_get_next(&ss->idr, &tmpid); | 5041 | tmp = idr_get_next(&ss->idr, &tmpid); |
5099 | read_unlock(&ss->id_lock); | ||
5100 | |||
5101 | if (!tmp) | 5042 | if (!tmp) |
5102 | break; | 5043 | break; |
5103 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { | 5044 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { |
@@ -5137,8 +5078,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) | |||
5137 | } | 5078 | } |
5138 | 5079 | ||
5139 | #ifdef CONFIG_CGROUP_DEBUG | 5080 | #ifdef CONFIG_CGROUP_DEBUG |
5140 | static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, | 5081 | static struct cgroup_subsys_state *debug_create(struct cgroup *cont) |
5141 | struct cgroup *cont) | ||
5142 | { | 5082 | { |
5143 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | 5083 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); |
5144 | 5084 | ||
@@ -5148,7 +5088,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, | |||
5148 | return css; | 5088 | return css; |
5149 | } | 5089 | } |
5150 | 5090 | ||
5151 | static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | 5091 | static void debug_destroy(struct cgroup *cont) |
5152 | { | 5092 | { |
5153 | kfree(cont->subsys[debug_subsys_id]); | 5093 | kfree(cont->subsys[debug_subsys_id]); |
5154 | } | 5094 | } |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fc0646b78a64..f86e93920b62 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -128,8 +128,7 @@ struct cgroup_subsys freezer_subsys; | |||
128 | * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) | 128 | * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) |
129 | * sighand->siglock | 129 | * sighand->siglock |
130 | */ | 130 | */ |
131 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | 131 | static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) |
132 | struct cgroup *cgroup) | ||
133 | { | 132 | { |
134 | struct freezer *freezer; | 133 | struct freezer *freezer; |
135 | 134 | ||
@@ -142,8 +141,7 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | |||
142 | return &freezer->css; | 141 | return &freezer->css; |
143 | } | 142 | } |
144 | 143 | ||
145 | static void freezer_destroy(struct cgroup_subsys *ss, | 144 | static void freezer_destroy(struct cgroup *cgroup) |
146 | struct cgroup *cgroup) | ||
147 | { | 145 | { |
148 | struct freezer *freezer = cgroup_freezer(cgroup); | 146 | struct freezer *freezer = cgroup_freezer(cgroup); |
149 | 147 | ||
@@ -164,8 +162,7 @@ static bool is_task_frozen_enough(struct task_struct *task) | |||
164 | * a write to that file racing against an attach, and hence the | 162 | * a write to that file racing against an attach, and hence the |
165 | * can_attach() result will remain valid until the attach completes. | 163 | * can_attach() result will remain valid until the attach completes. |
166 | */ | 164 | */ |
167 | static int freezer_can_attach(struct cgroup_subsys *ss, | 165 | static int freezer_can_attach(struct cgroup *new_cgroup, |
168 | struct cgroup *new_cgroup, | ||
169 | struct cgroup_taskset *tset) | 166 | struct cgroup_taskset *tset) |
170 | { | 167 | { |
171 | struct freezer *freezer; | 168 | struct freezer *freezer; |
@@ -185,7 +182,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
185 | return 0; | 182 | return 0; |
186 | } | 183 | } |
187 | 184 | ||
188 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | 185 | static void freezer_fork(struct task_struct *task) |
189 | { | 186 | { |
190 | struct freezer *freezer; | 187 | struct freezer *freezer; |
191 | 188 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a09ac2b9a661..1010cc61931f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
964 | { | 964 | { |
965 | bool need_loop; | 965 | bool need_loop; |
966 | 966 | ||
967 | repeat: | ||
968 | /* | 967 | /* |
969 | * Allow tasks that have access to memory reserves because they have | 968 | * Allow tasks that have access to memory reserves because they have |
970 | * been OOM killed to get memory anywhere. | 969 | * been OOM killed to get memory anywhere. |
@@ -983,45 +982,19 @@ repeat: | |||
983 | */ | 982 | */ |
984 | need_loop = task_has_mempolicy(tsk) || | 983 | need_loop = task_has_mempolicy(tsk) || |
985 | !nodes_intersects(*newmems, tsk->mems_allowed); | 984 | !nodes_intersects(*newmems, tsk->mems_allowed); |
986 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | ||
987 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | ||
988 | 985 | ||
989 | /* | 986 | if (need_loop) |
990 | * ensure checking ->mems_allowed_change_disable after setting all new | 987 | write_seqcount_begin(&tsk->mems_allowed_seq); |
991 | * allowed nodes. | ||
992 | * | ||
993 | * the read-side task can see an nodemask with new allowed nodes and | ||
994 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
995 | * disallowed ones continuous, it can see the new allowed bits. | ||
996 | * | ||
997 | * And if setting all new allowed nodes is after the checking, setting | ||
998 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
999 | * continuous, and the read-side task may find no node to alloc page. | ||
1000 | */ | ||
1001 | smp_mb(); | ||
1002 | 988 | ||
1003 | /* | 989 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
1004 | * Allocation of memory is very fast, we needn't sleep when waiting | 990 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
1005 | * for the read-side. | ||
1006 | */ | ||
1007 | while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
1008 | task_unlock(tsk); | ||
1009 | if (!task_curr(tsk)) | ||
1010 | yield(); | ||
1011 | goto repeat; | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * ensure checking ->mems_allowed_change_disable before clearing all new | ||
1016 | * disallowed nodes. | ||
1017 | * | ||
1018 | * if clearing newly disallowed bits before the checking, the read-side | ||
1019 | * task may find no node to alloc page. | ||
1020 | */ | ||
1021 | smp_mb(); | ||
1022 | 991 | ||
1023 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 992 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
1024 | tsk->mems_allowed = *newmems; | 993 | tsk->mems_allowed = *newmems; |
994 | |||
995 | if (need_loop) | ||
996 | write_seqcount_end(&tsk->mems_allowed_seq); | ||
997 | |||
1025 | task_unlock(tsk); | 998 | task_unlock(tsk); |
1026 | } | 999 | } |
1027 | 1000 | ||
@@ -1399,8 +1372,7 @@ static nodemask_t cpuset_attach_nodemask_from; | |||
1399 | static nodemask_t cpuset_attach_nodemask_to; | 1372 | static nodemask_t cpuset_attach_nodemask_to; |
1400 | 1373 | ||
1401 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1374 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1402 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 1375 | static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
1403 | struct cgroup_taskset *tset) | ||
1404 | { | 1376 | { |
1405 | struct cpuset *cs = cgroup_cs(cgrp); | 1377 | struct cpuset *cs = cgroup_cs(cgrp); |
1406 | struct task_struct *task; | 1378 | struct task_struct *task; |
@@ -1436,8 +1408,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
1436 | return 0; | 1408 | return 0; |
1437 | } | 1409 | } |
1438 | 1410 | ||
1439 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 1411 | static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
1440 | struct cgroup_taskset *tset) | ||
1441 | { | 1412 | { |
1442 | struct mm_struct *mm; | 1413 | struct mm_struct *mm; |
1443 | struct task_struct *task; | 1414 | struct task_struct *task; |
@@ -1833,8 +1804,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1833 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex | 1804 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex |
1834 | * held. | 1805 | * held. |
1835 | */ | 1806 | */ |
1836 | static void cpuset_post_clone(struct cgroup_subsys *ss, | 1807 | static void cpuset_post_clone(struct cgroup *cgroup) |
1837 | struct cgroup *cgroup) | ||
1838 | { | 1808 | { |
1839 | struct cgroup *parent, *child; | 1809 | struct cgroup *parent, *child; |
1840 | struct cpuset *cs, *parent_cs; | 1810 | struct cpuset *cs, *parent_cs; |
@@ -1857,13 +1827,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1857 | 1827 | ||
1858 | /* | 1828 | /* |
1859 | * cpuset_create - create a cpuset | 1829 | * cpuset_create - create a cpuset |
1860 | * ss: cpuset cgroup subsystem | ||
1861 | * cont: control group that the new cpuset will be part of | 1830 | * cont: control group that the new cpuset will be part of |
1862 | */ | 1831 | */ |
1863 | 1832 | ||
1864 | static struct cgroup_subsys_state *cpuset_create( | 1833 | static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) |
1865 | struct cgroup_subsys *ss, | ||
1866 | struct cgroup *cont) | ||
1867 | { | 1834 | { |
1868 | struct cpuset *cs; | 1835 | struct cpuset *cs; |
1869 | struct cpuset *parent; | 1836 | struct cpuset *parent; |
@@ -1902,7 +1869,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1902 | * will call async_rebuild_sched_domains(). | 1869 | * will call async_rebuild_sched_domains(). |
1903 | */ | 1870 | */ |
1904 | 1871 | ||
1905 | static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | 1872 | static void cpuset_destroy(struct cgroup *cont) |
1906 | { | 1873 | { |
1907 | struct cpuset *cs = cgroup_cs(cont); | 1874 | struct cpuset *cs = cgroup_cs(cont); |
1908 | 1875 | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 5791612a4045..97b36eeca4c9 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/keyctl.h> | 16 | #include <linux/keyctl.h> |
17 | #include <linux/init_task.h> | 17 | #include <linux/init_task.h> |
18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
19 | #include <linux/binfmts.h> | ||
19 | #include <linux/cn_proc.h> | 20 | #include <linux/cn_proc.h> |
20 | 21 | ||
21 | #if 0 | 22 | #if 0 |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0d7c08784efb..3f88a45e6f0a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/delay.h> | 41 | #include <linux/delay.h> |
42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
43 | #include <linux/sysrq.h> | 43 | #include <linux/sysrq.h> |
44 | #include <linux/reboot.h> | ||
44 | #include <linux/init.h> | 45 | #include <linux/init.h> |
45 | #include <linux/kgdb.h> | 46 | #include <linux/kgdb.h> |
46 | #include <linux/kdb.h> | 47 | #include <linux/kdb.h> |
@@ -75,6 +76,8 @@ static int exception_level; | |||
75 | struct kgdb_io *dbg_io_ops; | 76 | struct kgdb_io *dbg_io_ops; |
76 | static DEFINE_SPINLOCK(kgdb_registration_lock); | 77 | static DEFINE_SPINLOCK(kgdb_registration_lock); |
77 | 78 | ||
79 | /* Action for the reboot notifiter, a global allow kdb to change it */ | ||
80 | static int kgdbreboot; | ||
78 | /* kgdb console driver is loaded */ | 81 | /* kgdb console driver is loaded */ |
79 | static int kgdb_con_registered; | 82 | static int kgdb_con_registered; |
80 | /* determine if kgdb console output should be used */ | 83 | /* determine if kgdb console output should be used */ |
@@ -96,6 +99,7 @@ static int __init opt_kgdb_con(char *str) | |||
96 | early_param("kgdbcon", opt_kgdb_con); | 99 | early_param("kgdbcon", opt_kgdb_con); |
97 | 100 | ||
98 | module_param(kgdb_use_con, int, 0644); | 101 | module_param(kgdb_use_con, int, 0644); |
102 | module_param(kgdbreboot, int, 0644); | ||
99 | 103 | ||
100 | /* | 104 | /* |
101 | * Holds information about breakpoints in a kernel. These breakpoints are | 105 | * Holds information about breakpoints in a kernel. These breakpoints are |
@@ -784,6 +788,33 @@ void __init dbg_late_init(void) | |||
784 | kdb_init(KDB_INIT_FULL); | 788 | kdb_init(KDB_INIT_FULL); |
785 | } | 789 | } |
786 | 790 | ||
791 | static int | ||
792 | dbg_notify_reboot(struct notifier_block *this, unsigned long code, void *x) | ||
793 | { | ||
794 | /* | ||
795 | * Take the following action on reboot notify depending on value: | ||
796 | * 1 == Enter debugger | ||
797 | * 0 == [the default] detatch debug client | ||
798 | * -1 == Do nothing... and use this until the board resets | ||
799 | */ | ||
800 | switch (kgdbreboot) { | ||
801 | case 1: | ||
802 | kgdb_breakpoint(); | ||
803 | case -1: | ||
804 | goto done; | ||
805 | } | ||
806 | if (!dbg_kdb_mode) | ||
807 | gdbstub_exit(code); | ||
808 | done: | ||
809 | return NOTIFY_DONE; | ||
810 | } | ||
811 | |||
812 | static struct notifier_block dbg_reboot_notifier = { | ||
813 | .notifier_call = dbg_notify_reboot, | ||
814 | .next = NULL, | ||
815 | .priority = INT_MAX, | ||
816 | }; | ||
817 | |||
787 | static void kgdb_register_callbacks(void) | 818 | static void kgdb_register_callbacks(void) |
788 | { | 819 | { |
789 | if (!kgdb_io_module_registered) { | 820 | if (!kgdb_io_module_registered) { |
@@ -791,6 +822,7 @@ static void kgdb_register_callbacks(void) | |||
791 | kgdb_arch_init(); | 822 | kgdb_arch_init(); |
792 | if (!dbg_is_early) | 823 | if (!dbg_is_early) |
793 | kgdb_arch_late(); | 824 | kgdb_arch_late(); |
825 | register_reboot_notifier(&dbg_reboot_notifier); | ||
794 | atomic_notifier_chain_register(&panic_notifier_list, | 826 | atomic_notifier_chain_register(&panic_notifier_list, |
795 | &kgdb_panic_event_nb); | 827 | &kgdb_panic_event_nb); |
796 | #ifdef CONFIG_MAGIC_SYSRQ | 828 | #ifdef CONFIG_MAGIC_SYSRQ |
@@ -812,6 +844,7 @@ static void kgdb_unregister_callbacks(void) | |||
812 | */ | 844 | */ |
813 | if (kgdb_io_module_registered) { | 845 | if (kgdb_io_module_registered) { |
814 | kgdb_io_module_registered = 0; | 846 | kgdb_io_module_registered = 0; |
847 | unregister_reboot_notifier(&dbg_reboot_notifier); | ||
815 | atomic_notifier_chain_unregister(&panic_notifier_list, | 848 | atomic_notifier_chain_unregister(&panic_notifier_list, |
816 | &kgdb_panic_event_nb); | 849 | &kgdb_panic_event_nb); |
817 | kgdb_arch_exit(); | 850 | kgdb_arch_exit(); |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index c22d8c28ad84..ce615e064482 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
@@ -1111,6 +1111,13 @@ void gdbstub_exit(int status) | |||
1111 | unsigned char checksum, ch, buffer[3]; | 1111 | unsigned char checksum, ch, buffer[3]; |
1112 | int loop; | 1112 | int loop; |
1113 | 1113 | ||
1114 | if (!kgdb_connected) | ||
1115 | return; | ||
1116 | kgdb_connected = 0; | ||
1117 | |||
1118 | if (!dbg_io_ops || dbg_kdb_mode) | ||
1119 | return; | ||
1120 | |||
1114 | buffer[0] = 'W'; | 1121 | buffer[0] = 'W'; |
1115 | buffer[1] = hex_asc_hi(status); | 1122 | buffer[1] = hex_asc_hi(status); |
1116 | buffer[2] = hex_asc_lo(status); | 1123 | buffer[2] = hex_asc_lo(status); |
@@ -1129,5 +1136,6 @@ void gdbstub_exit(int status) | |||
1129 | dbg_io_ops->write_char(hex_asc_lo(checksum)); | 1136 | dbg_io_ops->write_char(hex_asc_lo(checksum)); |
1130 | 1137 | ||
1131 | /* make sure the output is flushed, lest the bootloader clobber it */ | 1138 | /* make sure the output is flushed, lest the bootloader clobber it */ |
1132 | dbg_io_ops->flush(); | 1139 | if (dbg_io_ops->flush) |
1140 | dbg_io_ops->flush(); | ||
1133 | } | 1141 | } |
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 20059ef4459a..8418c2f8ec5d 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c | |||
@@ -153,6 +153,13 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp) | |||
153 | } else { | 153 | } else { |
154 | kdb_printf("%s: failed to set breakpoint at 0x%lx\n", | 154 | kdb_printf("%s: failed to set breakpoint at 0x%lx\n", |
155 | __func__, bp->bp_addr); | 155 | __func__, bp->bp_addr); |
156 | #ifdef CONFIG_DEBUG_RODATA | ||
157 | if (!bp->bp_type) { | ||
158 | kdb_printf("Software breakpoints are unavailable.\n" | ||
159 | " Change the kernel CONFIG_DEBUG_RODATA=n\n" | ||
160 | " OR use hw breaks: help bph\n"); | ||
161 | } | ||
162 | #endif | ||
156 | return 1; | 163 | return 1; |
157 | } | 164 | } |
158 | return 0; | 165 | return 0; |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 4802eb5840e1..9b5f17da1c56 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
@@ -689,7 +689,7 @@ kdb_printit: | |||
689 | if (!dbg_kdb_mode && kgdb_connected) { | 689 | if (!dbg_kdb_mode && kgdb_connected) { |
690 | gdbstub_msg_write(kdb_buffer, retlen); | 690 | gdbstub_msg_write(kdb_buffer, retlen); |
691 | } else { | 691 | } else { |
692 | if (!dbg_io_ops->is_console) { | 692 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
693 | len = strlen(kdb_buffer); | 693 | len = strlen(kdb_buffer); |
694 | cp = kdb_buffer; | 694 | cp = kdb_buffer; |
695 | while (len--) { | 695 | while (len--) { |
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index 4bca634975c0..118527aa60ea 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ | 25 | #define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ |
26 | 26 | ||
27 | static int kbd_exists; | 27 | static int kbd_exists; |
28 | static int kbd_last_ret; | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Check if the keyboard controller has a keypress for us. | 31 | * Check if the keyboard controller has a keypress for us. |
@@ -90,8 +91,11 @@ int kdb_get_kbd_char(void) | |||
90 | return -1; | 91 | return -1; |
91 | } | 92 | } |
92 | 93 | ||
93 | if ((scancode & 0x80) != 0) | 94 | if ((scancode & 0x80) != 0) { |
95 | if (scancode == 0x9c) | ||
96 | kbd_last_ret = 0; | ||
94 | return -1; | 97 | return -1; |
98 | } | ||
95 | 99 | ||
96 | scancode &= 0x7f; | 100 | scancode &= 0x7f; |
97 | 101 | ||
@@ -178,35 +182,82 @@ int kdb_get_kbd_char(void) | |||
178 | return -1; /* ignore unprintables */ | 182 | return -1; /* ignore unprintables */ |
179 | } | 183 | } |
180 | 184 | ||
181 | if ((scancode & 0x7f) == 0x1c) { | 185 | if (scancode == 0x1c) { |
182 | /* | 186 | kbd_last_ret = 1; |
183 | * enter key. All done. Absorb the release scancode. | 187 | return 13; |
184 | */ | 188 | } |
189 | |||
190 | return keychar & 0xff; | ||
191 | } | ||
192 | EXPORT_SYMBOL_GPL(kdb_get_kbd_char); | ||
193 | |||
194 | /* | ||
195 | * Best effort cleanup of ENTER break codes on leaving KDB. Called on | ||
196 | * exiting KDB, when we know we processed an ENTER or KP ENTER scan | ||
197 | * code. | ||
198 | */ | ||
199 | void kdb_kbd_cleanup_state(void) | ||
200 | { | ||
201 | int scancode, scanstatus; | ||
202 | |||
203 | /* | ||
204 | * Nothing to clean up, since either | ||
205 | * ENTER was never pressed, or has already | ||
206 | * gotten cleaned up. | ||
207 | */ | ||
208 | if (!kbd_last_ret) | ||
209 | return; | ||
210 | |||
211 | kbd_last_ret = 0; | ||
212 | /* | ||
213 | * Enter key. Need to absorb the break code here, lest it gets | ||
214 | * leaked out if we exit KDB as the result of processing 'g'. | ||
215 | * | ||
216 | * This has several interesting implications: | ||
217 | * + Need to handle KP ENTER, which has break code 0xe0 0x9c. | ||
218 | * + Need to handle repeat ENTER and repeat KP ENTER. Repeats | ||
219 | * only get a break code at the end of the repeated | ||
220 | * sequence. This means we can't propagate the repeated key | ||
221 | * press, and must swallow it away. | ||
222 | * + Need to handle possible PS/2 mouse input. | ||
223 | * + Need to handle mashed keys. | ||
224 | */ | ||
225 | |||
226 | while (1) { | ||
185 | while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) | 227 | while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) |
186 | ; | 228 | cpu_relax(); |
187 | 229 | ||
188 | /* | 230 | /* |
189 | * Fetch the scancode | 231 | * Fetch the scancode. |
190 | */ | 232 | */ |
191 | scancode = inb(KBD_DATA_REG); | 233 | scancode = inb(KBD_DATA_REG); |
192 | scanstatus = inb(KBD_STATUS_REG); | 234 | scanstatus = inb(KBD_STATUS_REG); |
193 | 235 | ||
194 | while (scanstatus & KBD_STAT_MOUSE_OBF) { | 236 | /* |
195 | scancode = inb(KBD_DATA_REG); | 237 | * Skip mouse input. |
196 | scanstatus = inb(KBD_STATUS_REG); | 238 | */ |
197 | } | 239 | if (scanstatus & KBD_STAT_MOUSE_OBF) |
240 | continue; | ||
198 | 241 | ||
199 | if (scancode != 0x9c) { | 242 | /* |
200 | /* | 243 | * If we see 0xe0, this is either a break code for KP |
201 | * Wasn't an enter-release, why not? | 244 | * ENTER, or a repeat make for KP ENTER. Either way, |
202 | */ | 245 | * since the second byte is equivalent to an ENTER, |
203 | kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", | 246 | * skip the 0xe0 and try again. |
204 | scancode, scanstatus); | 247 | * |
205 | } | 248 | * If we see 0x1c, this must be a repeat ENTER or KP |
249 | * ENTER (and we swallowed 0xe0 before). Try again. | ||
250 | * | ||
251 | * We can also see make and break codes for other keys | ||
252 | * mashed before or after pressing ENTER. Thus, if we | ||
253 | * see anything other than 0x9c, we have to try again. | ||
254 | * | ||
255 | * Note, if you held some key as ENTER was depressed, | ||
256 | * that break code would get leaked out. | ||
257 | */ | ||
258 | if (scancode != 0x9c) | ||
259 | continue; | ||
206 | 260 | ||
207 | return 13; | 261 | return; |
208 | } | 262 | } |
209 | |||
210 | return keychar & 0xff; | ||
211 | } | 263 | } |
212 | EXPORT_SYMBOL_GPL(kdb_get_kbd_char); | ||
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index e2ae7349437f..67b847dfa2bb 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -1400,6 +1400,9 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, | |||
1400 | if (KDB_STATE(DOING_SS)) | 1400 | if (KDB_STATE(DOING_SS)) |
1401 | KDB_STATE_CLEAR(SSBPT); | 1401 | KDB_STATE_CLEAR(SSBPT); |
1402 | 1402 | ||
1403 | /* Clean up any keyboard devices before leaving */ | ||
1404 | kdb_kbd_cleanup_state(); | ||
1405 | |||
1403 | return result; | 1406 | return result; |
1404 | } | 1407 | } |
1405 | 1408 | ||
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index e381d105b40b..47c4e56e513b 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
@@ -246,6 +246,13 @@ extern void debug_kusage(void); | |||
246 | 246 | ||
247 | extern void kdb_set_current_task(struct task_struct *); | 247 | extern void kdb_set_current_task(struct task_struct *); |
248 | extern struct task_struct *kdb_current_task; | 248 | extern struct task_struct *kdb_current_task; |
249 | |||
250 | #ifdef CONFIG_KDB_KEYBOARD | ||
251 | extern void kdb_kbd_cleanup_state(void); | ||
252 | #else /* ! CONFIG_KDB_KEYBOARD */ | ||
253 | #define kdb_kbd_cleanup_state() | ||
254 | #endif /* ! CONFIG_KDB_KEYBOARD */ | ||
255 | |||
249 | #ifdef CONFIG_MODULES | 256 | #ifdef CONFIG_MODULES |
250 | extern struct list_head *kdb_modules; | 257 | extern struct list_head *kdb_modules; |
251 | #endif /* CONFIG_MODULES */ | 258 | #endif /* CONFIG_MODULES */ |
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 7d6fb40d2188..d35cc2d3a4cc 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c | |||
@@ -384,9 +384,9 @@ static int kdb_getphys(void *res, unsigned long addr, size_t size) | |||
384 | if (!pfn_valid(pfn)) | 384 | if (!pfn_valid(pfn)) |
385 | return 1; | 385 | return 1; |
386 | page = pfn_to_page(pfn); | 386 | page = pfn_to_page(pfn); |
387 | vaddr = kmap_atomic(page, KM_KDB); | 387 | vaddr = kmap_atomic(page); |
388 | memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size); | 388 | memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size); |
389 | kunmap_atomic(vaddr, KM_KDB); | 389 | kunmap_atomic(vaddr); |
390 | 390 | ||
391 | return 0; | 391 | return 0; |
392 | } | 392 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3f92a19aa11e..a6a9ec4cd8f5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7154,8 +7154,7 @@ unlock: | |||
7154 | device_initcall(perf_event_sysfs_init); | 7154 | device_initcall(perf_event_sysfs_init); |
7155 | 7155 | ||
7156 | #ifdef CONFIG_CGROUP_PERF | 7156 | #ifdef CONFIG_CGROUP_PERF |
7157 | static struct cgroup_subsys_state *perf_cgroup_create( | 7157 | static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont) |
7158 | struct cgroup_subsys *ss, struct cgroup *cont) | ||
7159 | { | 7158 | { |
7160 | struct perf_cgroup *jc; | 7159 | struct perf_cgroup *jc; |
7161 | 7160 | ||
@@ -7172,8 +7171,7 @@ static struct cgroup_subsys_state *perf_cgroup_create( | |||
7172 | return &jc->css; | 7171 | return &jc->css; |
7173 | } | 7172 | } |
7174 | 7173 | ||
7175 | static void perf_cgroup_destroy(struct cgroup_subsys *ss, | 7174 | static void perf_cgroup_destroy(struct cgroup *cont) |
7176 | struct cgroup *cont) | ||
7177 | { | 7175 | { |
7178 | struct perf_cgroup *jc; | 7176 | struct perf_cgroup *jc; |
7179 | jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), | 7177 | jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), |
@@ -7189,8 +7187,7 @@ static int __perf_cgroup_move(void *info) | |||
7189 | return 0; | 7187 | return 0; |
7190 | } | 7188 | } |
7191 | 7189 | ||
7192 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7190 | static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
7193 | struct cgroup_taskset *tset) | ||
7194 | { | 7191 | { |
7195 | struct task_struct *task; | 7192 | struct task_struct *task; |
7196 | 7193 | ||
@@ -7198,8 +7195,8 @@ static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7198 | task_function_call(task, __perf_cgroup_move, task); | 7195 | task_function_call(task, __perf_cgroup_move, task); |
7199 | } | 7196 | } |
7200 | 7197 | ||
7201 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7198 | static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, |
7202 | struct cgroup *old_cgrp, struct task_struct *task) | 7199 | struct task_struct *task) |
7203 | { | 7200 | { |
7204 | /* | 7201 | /* |
7205 | * cgroup_exit() is called in the copy_process() failure path. | 7202 | * cgroup_exit() is called in the copy_process() failure path. |
diff --git a/kernel/exit.c b/kernel/exit.c index 752d2c0abd19..3db1909faed9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
53 | #include <linux/oom.h> | 53 | #include <linux/oom.h> |
54 | #include <linux/writeback.h> | 54 | #include <linux/writeback.h> |
55 | #include <linux/shm.h> | ||
55 | 56 | ||
56 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
57 | #include <asm/unistd.h> | 58 | #include <asm/unistd.h> |
@@ -424,7 +425,7 @@ void daemonize(const char *name, ...) | |||
424 | */ | 425 | */ |
425 | exit_mm(current); | 426 | exit_mm(current); |
426 | /* | 427 | /* |
427 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | 428 | * We don't want to get frozen, in case system-wide hibernation |
428 | * or suspend transition begins right now. | 429 | * or suspend transition begins right now. |
429 | */ | 430 | */ |
430 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); | 431 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); |
@@ -686,11 +687,11 @@ static void exit_mm(struct task_struct * tsk) | |||
686 | } | 687 | } |
687 | 688 | ||
688 | /* | 689 | /* |
689 | * When we die, we re-parent all our children. | 690 | * When we die, we re-parent all our children, and try to: |
690 | * Try to give them to another thread in our thread | 691 | * 1. give them to another thread in our thread group, if such a member exists |
691 | * group, and if no such member exists, give it to | 692 | * 2. give it to the first ancestor process which prctl'd itself as a |
692 | * the child reaper process (ie "init") in our pid | 693 | * child_subreaper for its children (like a service manager) |
693 | * space. | 694 | * 3. give it to the init process (PID 1) in our pid namespace |
694 | */ | 695 | */ |
695 | static struct task_struct *find_new_reaper(struct task_struct *father) | 696 | static struct task_struct *find_new_reaper(struct task_struct *father) |
696 | __releases(&tasklist_lock) | 697 | __releases(&tasklist_lock) |
@@ -710,8 +711,11 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
710 | 711 | ||
711 | if (unlikely(pid_ns->child_reaper == father)) { | 712 | if (unlikely(pid_ns->child_reaper == father)) { |
712 | write_unlock_irq(&tasklist_lock); | 713 | write_unlock_irq(&tasklist_lock); |
713 | if (unlikely(pid_ns == &init_pid_ns)) | 714 | if (unlikely(pid_ns == &init_pid_ns)) { |
714 | panic("Attempted to kill init!"); | 715 | panic("Attempted to kill init! exitcode=0x%08x\n", |
716 | father->signal->group_exit_code ?: | ||
717 | father->exit_code); | ||
718 | } | ||
715 | 719 | ||
716 | zap_pid_ns_processes(pid_ns); | 720 | zap_pid_ns_processes(pid_ns); |
717 | write_lock_irq(&tasklist_lock); | 721 | write_lock_irq(&tasklist_lock); |
@@ -721,6 +725,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
721 | * forget_original_parent() must move them somewhere. | 725 | * forget_original_parent() must move them somewhere. |
722 | */ | 726 | */ |
723 | pid_ns->child_reaper = init_pid_ns.child_reaper; | 727 | pid_ns->child_reaper = init_pid_ns.child_reaper; |
728 | } else if (father->signal->has_child_subreaper) { | ||
729 | struct task_struct *reaper; | ||
730 | |||
731 | /* | ||
732 | * Find the first ancestor marked as child_subreaper. | ||
733 | * Note that the code below checks same_thread_group(reaper, | ||
734 | * pid_ns->child_reaper). This is what we need to DTRT in a | ||
735 | * PID namespace. However we still need the check above, see | ||
736 | * http://marc.info/?l=linux-kernel&m=131385460420380 | ||
737 | */ | ||
738 | for (reaper = father->real_parent; | ||
739 | reaper != &init_task; | ||
740 | reaper = reaper->real_parent) { | ||
741 | if (same_thread_group(reaper, pid_ns->child_reaper)) | ||
742 | break; | ||
743 | if (!reaper->signal->is_child_subreaper) | ||
744 | continue; | ||
745 | thread = reaper; | ||
746 | do { | ||
747 | if (!(thread->flags & PF_EXITING)) | ||
748 | return reaper; | ||
749 | } while_each_thread(reaper, thread); | ||
750 | } | ||
724 | } | 751 | } |
725 | 752 | ||
726 | return pid_ns->child_reaper; | 753 | return pid_ns->child_reaper; |
@@ -818,25 +845,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
818 | if (group_dead) | 845 | if (group_dead) |
819 | kill_orphaned_pgrp(tsk->group_leader, NULL); | 846 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
820 | 847 | ||
821 | /* Let father know we died | ||
822 | * | ||
823 | * Thread signals are configurable, but you aren't going to use | ||
824 | * that to send signals to arbitrary processes. | ||
825 | * That stops right now. | ||
826 | * | ||
827 | * If the parent exec id doesn't match the exec id we saved | ||
828 | * when we started then we know the parent has changed security | ||
829 | * domain. | ||
830 | * | ||
831 | * If our self_exec id doesn't match our parent_exec_id then | ||
832 | * we have changed execution domain as these two values started | ||
833 | * the same after a fork. | ||
834 | */ | ||
835 | if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && | ||
836 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || | ||
837 | tsk->self_exec_id != tsk->parent_exec_id)) | ||
838 | tsk->exit_signal = SIGCHLD; | ||
839 | |||
840 | if (unlikely(tsk->ptrace)) { | 848 | if (unlikely(tsk->ptrace)) { |
841 | int sig = thread_group_leader(tsk) && | 849 | int sig = thread_group_leader(tsk) && |
842 | thread_group_empty(tsk) && | 850 | thread_group_empty(tsk) && |
@@ -953,7 +961,7 @@ void do_exit(long code) | |||
953 | acct_update_integrals(tsk); | 961 | acct_update_integrals(tsk); |
954 | /* sync mm's RSS info before statistics gathering */ | 962 | /* sync mm's RSS info before statistics gathering */ |
955 | if (tsk->mm) | 963 | if (tsk->mm) |
956 | sync_mm_rss(tsk, tsk->mm); | 964 | sync_mm_rss(tsk->mm); |
957 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 965 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
958 | if (group_dead) { | 966 | if (group_dead) { |
959 | hrtimer_cancel(&tsk->signal->real_timer); | 967 | hrtimer_cancel(&tsk->signal->real_timer); |
diff --git a/kernel/fork.c b/kernel/fork.c index 26a7a6707fa7..b9372a0bff18 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -193,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
193 | WARN_ON(atomic_read(&tsk->usage)); | 193 | WARN_ON(atomic_read(&tsk->usage)); |
194 | WARN_ON(tsk == current); | 194 | WARN_ON(tsk == current); |
195 | 195 | ||
196 | security_task_free(tsk); | ||
196 | exit_creds(tsk); | 197 | exit_creds(tsk); |
197 | delayacct_tsk_free(tsk); | 198 | delayacct_tsk_free(tsk); |
198 | put_signal_struct(tsk->signal); | 199 | put_signal_struct(tsk->signal); |
@@ -355,7 +356,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
355 | charge = 0; | 356 | charge = 0; |
356 | if (mpnt->vm_flags & VM_ACCOUNT) { | 357 | if (mpnt->vm_flags & VM_ACCOUNT) { |
357 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 358 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; |
358 | if (security_vm_enough_memory(len)) | 359 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
359 | goto fail_nomem; | 360 | goto fail_nomem; |
360 | charge = len; | 361 | charge = len; |
361 | } | 362 | } |
@@ -511,6 +512,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
511 | return NULL; | 512 | return NULL; |
512 | } | 513 | } |
513 | 514 | ||
515 | static void check_mm(struct mm_struct *mm) | ||
516 | { | ||
517 | int i; | ||
518 | |||
519 | for (i = 0; i < NR_MM_COUNTERS; i++) { | ||
520 | long x = atomic_long_read(&mm->rss_stat.count[i]); | ||
521 | |||
522 | if (unlikely(x)) | ||
523 | printk(KERN_ALERT "BUG: Bad rss-counter state " | ||
524 | "mm:%p idx:%d val:%ld\n", mm, i, x); | ||
525 | } | ||
526 | |||
527 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
528 | VM_BUG_ON(mm->pmd_huge_pte); | ||
529 | #endif | ||
530 | } | ||
531 | |||
514 | /* | 532 | /* |
515 | * Allocate and initialize an mm_struct. | 533 | * Allocate and initialize an mm_struct. |
516 | */ | 534 | */ |
@@ -538,9 +556,7 @@ void __mmdrop(struct mm_struct *mm) | |||
538 | mm_free_pgd(mm); | 556 | mm_free_pgd(mm); |
539 | destroy_context(mm); | 557 | destroy_context(mm); |
540 | mmu_notifier_mm_destroy(mm); | 558 | mmu_notifier_mm_destroy(mm); |
541 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 559 | check_mm(mm); |
542 | VM_BUG_ON(mm->pmd_huge_pte); | ||
543 | #endif | ||
544 | free_mm(mm); | 560 | free_mm(mm); |
545 | } | 561 | } |
546 | EXPORT_SYMBOL_GPL(__mmdrop); | 562 | EXPORT_SYMBOL_GPL(__mmdrop); |
@@ -1035,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1035 | sig->oom_score_adj = current->signal->oom_score_adj; | 1051 | sig->oom_score_adj = current->signal->oom_score_adj; |
1036 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1052 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
1037 | 1053 | ||
1054 | sig->has_child_subreaper = current->signal->has_child_subreaper || | ||
1055 | current->signal->is_child_subreaper; | ||
1056 | |||
1038 | mutex_init(&sig->cred_guard_mutex); | 1057 | mutex_init(&sig->cred_guard_mutex); |
1039 | 1058 | ||
1040 | return 0; | 1059 | return 0; |
@@ -1222,6 +1241,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1222 | #ifdef CONFIG_CPUSETS | 1241 | #ifdef CONFIG_CPUSETS |
1223 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; | 1242 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; |
1224 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; | 1243 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; |
1244 | seqcount_init(&p->mems_allowed_seq); | ||
1225 | #endif | 1245 | #endif |
1226 | #ifdef CONFIG_TRACE_IRQFLAGS | 1246 | #ifdef CONFIG_TRACE_IRQFLAGS |
1227 | p->irq_events = 0; | 1247 | p->irq_events = 0; |
@@ -1340,7 +1360,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1340 | clear_all_latency_tracing(p); | 1360 | clear_all_latency_tracing(p); |
1341 | 1361 | ||
1342 | /* ok, now we should be set up.. */ | 1362 | /* ok, now we should be set up.. */ |
1343 | p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); | 1363 | if (clone_flags & CLONE_THREAD) |
1364 | p->exit_signal = -1; | ||
1365 | else if (clone_flags & CLONE_PARENT) | ||
1366 | p->exit_signal = current->group_leader->exit_signal; | ||
1367 | else | ||
1368 | p->exit_signal = (clone_flags & CSIGNAL); | ||
1369 | |||
1344 | p->pdeath_signal = 0; | 1370 | p->pdeath_signal = 0; |
1345 | p->exit_state = 0; | 1371 | p->exit_state = 0; |
1346 | 1372 | ||
diff --git a/kernel/freezer.c b/kernel/freezer.c index 9815b8d1eed5..11f82a4d4eae 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
@@ -99,9 +99,9 @@ static void fake_signal_wake_up(struct task_struct *p) | |||
99 | * freeze_task - send a freeze request to given task | 99 | * freeze_task - send a freeze request to given task |
100 | * @p: task to send the request to | 100 | * @p: task to send the request to |
101 | * | 101 | * |
102 | * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE | 102 | * If @p is freezing, the freeze request is sent either by sending a fake |
103 | * flag and either sending a fake signal to it or waking it up, depending | 103 | * signal (if it's not a kernel thread) or waking it up (if it's a kernel |
104 | * on whether it has %PF_FREEZER_NOSIG set. | 104 | * thread). |
105 | * | 105 | * |
106 | * RETURNS: | 106 | * RETURNS: |
107 | * %false, if @p is not freezing or already frozen; %true, otherwise | 107 | * %false, if @p is not freezing or already frozen; %true, otherwise |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1f9e26526b69..af48e59bc2ff 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -1,189 +1,793 @@ | |||
1 | #include <linux/debugfs.h> | ||
2 | #include <linux/hardirq.h> | ||
3 | #include <linux/interrupt.h> | ||
1 | #include <linux/irq.h> | 4 | #include <linux/irq.h> |
5 | #include <linux/irqdesc.h> | ||
2 | #include <linux/irqdomain.h> | 6 | #include <linux/irqdomain.h> |
3 | #include <linux/module.h> | 7 | #include <linux/module.h> |
4 | #include <linux/mutex.h> | 8 | #include <linux/mutex.h> |
5 | #include <linux/of.h> | 9 | #include <linux/of.h> |
6 | #include <linux/of_address.h> | 10 | #include <linux/of_address.h> |
11 | #include <linux/seq_file.h> | ||
7 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/smp.h> | ||
14 | #include <linux/fs.h> | ||
15 | |||
16 | #define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs. | ||
17 | * ie. legacy 8259, gets irqs 1..15 */ | ||
18 | #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ | ||
19 | #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ | ||
20 | #define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ | ||
8 | 21 | ||
9 | static LIST_HEAD(irq_domain_list); | 22 | static LIST_HEAD(irq_domain_list); |
10 | static DEFINE_MUTEX(irq_domain_mutex); | 23 | static DEFINE_MUTEX(irq_domain_mutex); |
11 | 24 | ||
25 | static DEFINE_MUTEX(revmap_trees_mutex); | ||
26 | static unsigned int irq_virq_count = NR_IRQS; | ||
27 | static struct irq_domain *irq_default_domain; | ||
28 | |||
12 | /** | 29 | /** |
13 | * irq_domain_add() - Register an irq_domain | 30 | * irq_domain_alloc() - Allocate a new irq_domain data structure |
14 | * @domain: ptr to initialized irq_domain structure | 31 | * @of_node: optional device-tree node of the interrupt controller |
32 | * @revmap_type: type of reverse mapping to use | ||
33 | * @ops: map/unmap domain callbacks | ||
34 | * @host_data: Controller private data pointer | ||
15 | * | 35 | * |
16 | * Registers an irq_domain structure. The irq_domain must at a minimum be | 36 | * Allocates and initialize and irq_domain structure. Caller is expected to |
17 | * initialized with an ops structure pointer, and either a ->to_irq hook or | 37 | * register allocated irq_domain with irq_domain_register(). Returns pointer |
18 | * a valid irq_base value. Everything else is optional. | 38 | * to IRQ domain, or NULL on failure. |
19 | */ | 39 | */ |
20 | void irq_domain_add(struct irq_domain *domain) | 40 | static struct irq_domain *irq_domain_alloc(struct device_node *of_node, |
41 | unsigned int revmap_type, | ||
42 | const struct irq_domain_ops *ops, | ||
43 | void *host_data) | ||
21 | { | 44 | { |
22 | struct irq_data *d; | 45 | struct irq_domain *domain; |
23 | int hwirq, irq; | ||
24 | 46 | ||
25 | /* | 47 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
26 | * This assumes that the irq_domain owner has already allocated | 48 | if (WARN_ON(!domain)) |
27 | * the irq_descs. This block will be removed when support for dynamic | 49 | return NULL; |
28 | * allocation of irq_descs is added to irq_domain. | 50 | |
29 | */ | 51 | /* Fill structure */ |
30 | irq_domain_for_each_irq(domain, hwirq, irq) { | 52 | domain->revmap_type = revmap_type; |
31 | d = irq_get_irq_data(irq); | 53 | domain->ops = ops; |
32 | if (!d) { | 54 | domain->host_data = host_data; |
33 | WARN(1, "error: assigning domain to non existant irq_desc"); | 55 | domain->of_node = of_node_get(of_node); |
34 | return; | 56 | |
35 | } | 57 | return domain; |
36 | if (d->domain) { | 58 | } |
37 | /* things are broken; just report, don't clean up */ | 59 | |
38 | WARN(1, "error: irq_desc already assigned to a domain"); | 60 | static void irq_domain_add(struct irq_domain *domain) |
39 | return; | 61 | { |
62 | mutex_lock(&irq_domain_mutex); | ||
63 | list_add(&domain->link, &irq_domain_list); | ||
64 | mutex_unlock(&irq_domain_mutex); | ||
65 | pr_debug("irq: Allocated domain of type %d @0x%p\n", | ||
66 | domain->revmap_type, domain); | ||
67 | } | ||
68 | |||
69 | static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | ||
70 | irq_hw_number_t hwirq) | ||
71 | { | ||
72 | irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; | ||
73 | int size = domain->revmap_data.legacy.size; | ||
74 | |||
75 | if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) | ||
76 | return 0; | ||
77 | return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. | ||
82 | * @of_node: pointer to interrupt controller's device tree node. | ||
83 | * @size: total number of irqs in legacy mapping | ||
84 | * @first_irq: first number of irq block assigned to the domain | ||
85 | * @first_hwirq: first hwirq number to use for the translation. Should normally | ||
86 | * be '0', but a positive integer can be used if the effective | ||
87 | * hwirqs numbering does not begin at zero. | ||
88 | * @ops: map/unmap domain callbacks | ||
89 | * @host_data: Controller private data pointer | ||
90 | * | ||
91 | * Note: the map() callback will be called before this function returns | ||
92 | * for all legacy interrupts except 0 (which is always the invalid irq for | ||
93 | * a legacy controller). | ||
94 | */ | ||
95 | struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | ||
96 | unsigned int size, | ||
97 | unsigned int first_irq, | ||
98 | irq_hw_number_t first_hwirq, | ||
99 | const struct irq_domain_ops *ops, | ||
100 | void *host_data) | ||
101 | { | ||
102 | struct irq_domain *domain; | ||
103 | unsigned int i; | ||
104 | |||
105 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); | ||
106 | if (!domain) | ||
107 | return NULL; | ||
108 | |||
109 | domain->revmap_data.legacy.first_irq = first_irq; | ||
110 | domain->revmap_data.legacy.first_hwirq = first_hwirq; | ||
111 | domain->revmap_data.legacy.size = size; | ||
112 | |||
113 | mutex_lock(&irq_domain_mutex); | ||
114 | /* Verify that all the irqs are available */ | ||
115 | for (i = 0; i < size; i++) { | ||
116 | int irq = first_irq + i; | ||
117 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
118 | |||
119 | if (WARN_ON(!irq_data || irq_data->domain)) { | ||
120 | mutex_unlock(&irq_domain_mutex); | ||
121 | of_node_put(domain->of_node); | ||
122 | kfree(domain); | ||
123 | return NULL; | ||
40 | } | 124 | } |
41 | d->domain = domain; | ||
42 | d->hwirq = hwirq; | ||
43 | } | 125 | } |
44 | 126 | ||
45 | mutex_lock(&irq_domain_mutex); | 127 | /* Claim all of the irqs before registering a legacy domain */ |
46 | list_add(&domain->list, &irq_domain_list); | 128 | for (i = 0; i < size; i++) { |
129 | struct irq_data *irq_data = irq_get_irq_data(first_irq + i); | ||
130 | irq_data->hwirq = first_hwirq + i; | ||
131 | irq_data->domain = domain; | ||
132 | } | ||
47 | mutex_unlock(&irq_domain_mutex); | 133 | mutex_unlock(&irq_domain_mutex); |
134 | |||
135 | for (i = 0; i < size; i++) { | ||
136 | int irq = first_irq + i; | ||
137 | int hwirq = first_hwirq + i; | ||
138 | |||
139 | /* IRQ0 gets ignored */ | ||
140 | if (!irq) | ||
141 | continue; | ||
142 | |||
143 | /* Legacy flags are left to default at this point, | ||
144 | * one can then use irq_create_mapping() to | ||
145 | * explicitly change them | ||
146 | */ | ||
147 | ops->map(domain, irq, hwirq); | ||
148 | |||
149 | /* Clear norequest flags */ | ||
150 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
151 | } | ||
152 | |||
153 | irq_domain_add(domain); | ||
154 | return domain; | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain. | ||
159 | * @of_node: pointer to interrupt controller's device tree node. | ||
160 | * @ops: map/unmap domain callbacks | ||
161 | * @host_data: Controller private data pointer | ||
162 | */ | ||
163 | struct irq_domain *irq_domain_add_linear(struct device_node *of_node, | ||
164 | unsigned int size, | ||
165 | const struct irq_domain_ops *ops, | ||
166 | void *host_data) | ||
167 | { | ||
168 | struct irq_domain *domain; | ||
169 | unsigned int *revmap; | ||
170 | |||
171 | revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL); | ||
172 | if (WARN_ON(!revmap)) | ||
173 | return NULL; | ||
174 | |||
175 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); | ||
176 | if (!domain) { | ||
177 | kfree(revmap); | ||
178 | return NULL; | ||
179 | } | ||
180 | domain->revmap_data.linear.size = size; | ||
181 | domain->revmap_data.linear.revmap = revmap; | ||
182 | irq_domain_add(domain); | ||
183 | return domain; | ||
184 | } | ||
185 | |||
186 | struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, | ||
187 | const struct irq_domain_ops *ops, | ||
188 | void *host_data) | ||
189 | { | ||
190 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
191 | IRQ_DOMAIN_MAP_NOMAP, ops, host_data); | ||
192 | if (domain) | ||
193 | irq_domain_add(domain); | ||
194 | return domain; | ||
195 | } | ||
196 | |||
197 | /** | ||
198 | * irq_domain_add_tree() | ||
199 | * @of_node: pointer to interrupt controller's device tree node. | ||
200 | * @ops: map/unmap domain callbacks | ||
201 | * | ||
202 | * Note: The radix tree will be allocated later during boot automatically | ||
203 | * (the reverse mapping will use the slow path until that happens). | ||
204 | */ | ||
205 | struct irq_domain *irq_domain_add_tree(struct device_node *of_node, | ||
206 | const struct irq_domain_ops *ops, | ||
207 | void *host_data) | ||
208 | { | ||
209 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
210 | IRQ_DOMAIN_MAP_TREE, ops, host_data); | ||
211 | if (domain) { | ||
212 | INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); | ||
213 | irq_domain_add(domain); | ||
214 | } | ||
215 | return domain; | ||
48 | } | 216 | } |
49 | 217 | ||
50 | /** | 218 | /** |
51 | * irq_domain_del() - Unregister an irq_domain | 219 | * irq_find_host() - Locates a domain for a given device node |
52 | * @domain: ptr to registered irq_domain. | 220 | * @node: device-tree node of the interrupt controller |
53 | */ | 221 | */ |
54 | void irq_domain_del(struct irq_domain *domain) | 222 | struct irq_domain *irq_find_host(struct device_node *node) |
55 | { | 223 | { |
56 | struct irq_data *d; | 224 | struct irq_domain *h, *found = NULL; |
57 | int hwirq, irq; | 225 | int rc; |
58 | 226 | ||
227 | /* We might want to match the legacy controller last since | ||
228 | * it might potentially be set to match all interrupts in | ||
229 | * the absence of a device node. This isn't a problem so far | ||
230 | * yet though... | ||
231 | */ | ||
59 | mutex_lock(&irq_domain_mutex); | 232 | mutex_lock(&irq_domain_mutex); |
60 | list_del(&domain->list); | 233 | list_for_each_entry(h, &irq_domain_list, link) { |
234 | if (h->ops->match) | ||
235 | rc = h->ops->match(h, node); | ||
236 | else | ||
237 | rc = (h->of_node != NULL) && (h->of_node == node); | ||
238 | |||
239 | if (rc) { | ||
240 | found = h; | ||
241 | break; | ||
242 | } | ||
243 | } | ||
61 | mutex_unlock(&irq_domain_mutex); | 244 | mutex_unlock(&irq_domain_mutex); |
245 | return found; | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(irq_find_host); | ||
248 | |||
249 | /** | ||
250 | * irq_set_default_host() - Set a "default" irq domain | ||
251 | * @domain: default domain pointer | ||
252 | * | ||
253 | * For convenience, it's possible to set a "default" domain that will be used | ||
254 | * whenever NULL is passed to irq_create_mapping(). It makes life easier for | ||
255 | * platforms that want to manipulate a few hard coded interrupt numbers that | ||
256 | * aren't properly represented in the device-tree. | ||
257 | */ | ||
258 | void irq_set_default_host(struct irq_domain *domain) | ||
259 | { | ||
260 | pr_debug("irq: Default domain set to @0x%p\n", domain); | ||
261 | |||
262 | irq_default_domain = domain; | ||
263 | } | ||
264 | |||
265 | /** | ||
266 | * irq_set_virq_count() - Set the maximum number of linux irqs | ||
267 | * @count: number of linux irqs, capped with NR_IRQS | ||
268 | * | ||
269 | * This is mainly for use by platforms like iSeries who want to program | ||
270 | * the virtual irq number in the controller to avoid the reverse mapping | ||
271 | */ | ||
272 | void irq_set_virq_count(unsigned int count) | ||
273 | { | ||
274 | pr_debug("irq: Trying to set virq count to %d\n", count); | ||
62 | 275 | ||
63 | /* Clear the irq_domain assignments */ | 276 | BUG_ON(count < NUM_ISA_INTERRUPTS); |
64 | irq_domain_for_each_irq(domain, hwirq, irq) { | 277 | if (count < NR_IRQS) |
65 | d = irq_get_irq_data(irq); | 278 | irq_virq_count = count; |
66 | d->domain = NULL; | 279 | } |
280 | |||
281 | static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, | ||
282 | irq_hw_number_t hwirq) | ||
283 | { | ||
284 | struct irq_data *irq_data = irq_get_irq_data(virq); | ||
285 | |||
286 | irq_data->hwirq = hwirq; | ||
287 | irq_data->domain = domain; | ||
288 | if (domain->ops->map(domain, virq, hwirq)) { | ||
289 | pr_debug("irq: -> mapping failed, freeing\n"); | ||
290 | irq_data->domain = NULL; | ||
291 | irq_data->hwirq = 0; | ||
292 | return -1; | ||
67 | } | 293 | } |
294 | |||
295 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
296 | |||
297 | return 0; | ||
68 | } | 298 | } |
69 | 299 | ||
70 | #if defined(CONFIG_OF_IRQ) | ||
71 | /** | 300 | /** |
72 | * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec | 301 | * irq_create_direct_mapping() - Allocate an irq for direct mapping |
302 | * @domain: domain to allocate the irq for or NULL for default domain | ||
73 | * | 303 | * |
74 | * Used by the device tree interrupt mapping code to translate a device tree | 304 | * This routine is used for irq controllers which can choose the hardware |
75 | * interrupt specifier to a valid linux irq number. Returns either a valid | 305 | * interrupt numbers they generate. In such a case it's simplest to use |
76 | * linux IRQ number or 0. | 306 | * the linux irq as the hardware interrupt number. |
307 | */ | ||
308 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) | ||
309 | { | ||
310 | unsigned int virq; | ||
311 | |||
312 | if (domain == NULL) | ||
313 | domain = irq_default_domain; | ||
314 | |||
315 | BUG_ON(domain == NULL); | ||
316 | WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP); | ||
317 | |||
318 | virq = irq_alloc_desc_from(1, 0); | ||
319 | if (!virq) { | ||
320 | pr_debug("irq: create_direct virq allocation failed\n"); | ||
321 | return 0; | ||
322 | } | ||
323 | if (virq >= irq_virq_count) { | ||
324 | pr_err("ERROR: no free irqs available below %i maximum\n", | ||
325 | irq_virq_count); | ||
326 | irq_free_desc(virq); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | pr_debug("irq: create_direct obtained virq %d\n", virq); | ||
331 | |||
332 | if (irq_setup_virq(domain, virq, virq)) { | ||
333 | irq_free_desc(virq); | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | return virq; | ||
338 | } | ||
339 | |||
340 | /** | ||
341 | * irq_create_mapping() - Map a hardware interrupt into linux irq space | ||
342 | * @domain: domain owning this hardware interrupt or NULL for default domain | ||
343 | * @hwirq: hardware irq number in that domain space | ||
77 | * | 344 | * |
78 | * When the caller no longer need the irq number returned by this function it | 345 | * Only one mapping per hardware interrupt is permitted. Returns a linux |
79 | * should arrange to call irq_dispose_mapping(). | 346 | * irq number. |
347 | * If the sense/trigger is to be specified, set_irq_type() should be called | ||
348 | * on the number returned from that call. | ||
80 | */ | 349 | */ |
350 | unsigned int irq_create_mapping(struct irq_domain *domain, | ||
351 | irq_hw_number_t hwirq) | ||
352 | { | ||
353 | unsigned int virq, hint; | ||
354 | |||
355 | pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); | ||
356 | |||
357 | /* Look for default domain if nececssary */ | ||
358 | if (domain == NULL) | ||
359 | domain = irq_default_domain; | ||
360 | if (domain == NULL) { | ||
361 | printk(KERN_WARNING "irq_create_mapping called for" | ||
362 | " NULL domain, hwirq=%lx\n", hwirq); | ||
363 | WARN_ON(1); | ||
364 | return 0; | ||
365 | } | ||
366 | pr_debug("irq: -> using domain @%p\n", domain); | ||
367 | |||
368 | /* Check if mapping already exists */ | ||
369 | virq = irq_find_mapping(domain, hwirq); | ||
370 | if (virq) { | ||
371 | pr_debug("irq: -> existing mapping on virq %d\n", virq); | ||
372 | return virq; | ||
373 | } | ||
374 | |||
375 | /* Get a virtual interrupt number */ | ||
376 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
377 | return irq_domain_legacy_revmap(domain, hwirq); | ||
378 | |||
379 | /* Allocate a virtual interrupt number */ | ||
380 | hint = hwirq % irq_virq_count; | ||
381 | if (hint == 0) | ||
382 | hint++; | ||
383 | virq = irq_alloc_desc_from(hint, 0); | ||
384 | if (!virq) | ||
385 | virq = irq_alloc_desc_from(1, 0); | ||
386 | if (!virq) { | ||
387 | pr_debug("irq: -> virq allocation failed\n"); | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | if (irq_setup_virq(domain, virq, hwirq)) { | ||
392 | if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY) | ||
393 | irq_free_desc(virq); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | pr_debug("irq: irq %lu on domain %s mapped to virtual irq %u\n", | ||
398 | hwirq, domain->of_node ? domain->of_node->full_name : "null", virq); | ||
399 | |||
400 | return virq; | ||
401 | } | ||
402 | EXPORT_SYMBOL_GPL(irq_create_mapping); | ||
403 | |||
81 | unsigned int irq_create_of_mapping(struct device_node *controller, | 404 | unsigned int irq_create_of_mapping(struct device_node *controller, |
82 | const u32 *intspec, unsigned int intsize) | 405 | const u32 *intspec, unsigned int intsize) |
83 | { | 406 | { |
84 | struct irq_domain *domain; | 407 | struct irq_domain *domain; |
85 | unsigned long hwirq; | 408 | irq_hw_number_t hwirq; |
86 | unsigned int irq, type; | 409 | unsigned int type = IRQ_TYPE_NONE; |
87 | int rc = -EINVAL; | 410 | unsigned int virq; |
88 | 411 | ||
89 | /* Find a domain which can translate the irq spec */ | 412 | domain = controller ? irq_find_host(controller) : irq_default_domain; |
90 | mutex_lock(&irq_domain_mutex); | 413 | if (!domain) { |
91 | list_for_each_entry(domain, &irq_domain_list, list) { | 414 | #ifdef CONFIG_MIPS |
92 | if (!domain->ops->dt_translate) | 415 | /* |
93 | continue; | 416 | * Workaround to avoid breaking interrupt controller drivers |
94 | rc = domain->ops->dt_translate(domain, controller, | 417 | * that don't yet register an irq_domain. This is temporary |
95 | intspec, intsize, &hwirq, &type); | 418 | * code. ~~~gcl, Feb 24, 2012 |
96 | if (rc == 0) | 419 | * |
97 | break; | 420 | * Scheduled for removal in Linux v3.6. That should be enough |
421 | * time. | ||
422 | */ | ||
423 | if (intsize > 0) | ||
424 | return intspec[0]; | ||
425 | #endif | ||
426 | printk(KERN_WARNING "irq: no irq domain found for %s !\n", | ||
427 | controller->full_name); | ||
428 | return 0; | ||
98 | } | 429 | } |
99 | mutex_unlock(&irq_domain_mutex); | ||
100 | 430 | ||
101 | if (rc != 0) | 431 | /* If domain has no translation, then we assume interrupt line */ |
102 | return 0; | 432 | if (domain->ops->xlate == NULL) |
433 | hwirq = intspec[0]; | ||
434 | else { | ||
435 | if (domain->ops->xlate(domain, controller, intspec, intsize, | ||
436 | &hwirq, &type)) | ||
437 | return 0; | ||
438 | } | ||
439 | |||
440 | /* Create mapping */ | ||
441 | virq = irq_create_mapping(domain, hwirq); | ||
442 | if (!virq) | ||
443 | return virq; | ||
103 | 444 | ||
104 | irq = irq_domain_to_irq(domain, hwirq); | 445 | /* Set type if specified and different than the current one */ |
105 | if (type != IRQ_TYPE_NONE) | 446 | if (type != IRQ_TYPE_NONE && |
106 | irq_set_irq_type(irq, type); | 447 | type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) |
107 | pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", | 448 | irq_set_irq_type(virq, type); |
108 | controller->full_name, (int)hwirq, irq, type); | 449 | return virq; |
109 | return irq; | ||
110 | } | 450 | } |
111 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | 451 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); |
112 | 452 | ||
113 | /** | 453 | /** |
114 | * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() | 454 | * irq_dispose_mapping() - Unmap an interrupt |
115 | * @irq: linux irq number to be discarded | 455 | * @virq: linux irq number of the interrupt to unmap |
456 | */ | ||
457 | void irq_dispose_mapping(unsigned int virq) | ||
458 | { | ||
459 | struct irq_data *irq_data = irq_get_irq_data(virq); | ||
460 | struct irq_domain *domain; | ||
461 | irq_hw_number_t hwirq; | ||
462 | |||
463 | if (!virq || !irq_data) | ||
464 | return; | ||
465 | |||
466 | domain = irq_data->domain; | ||
467 | if (WARN_ON(domain == NULL)) | ||
468 | return; | ||
469 | |||
470 | /* Never unmap legacy interrupts */ | ||
471 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
472 | return; | ||
473 | |||
474 | irq_set_status_flags(virq, IRQ_NOREQUEST); | ||
475 | |||
476 | /* remove chip and handler */ | ||
477 | irq_set_chip_and_handler(virq, NULL, NULL); | ||
478 | |||
479 | /* Make sure it's completed */ | ||
480 | synchronize_irq(virq); | ||
481 | |||
482 | /* Tell the PIC about it */ | ||
483 | if (domain->ops->unmap) | ||
484 | domain->ops->unmap(domain, virq); | ||
485 | smp_mb(); | ||
486 | |||
487 | /* Clear reverse map */ | ||
488 | hwirq = irq_data->hwirq; | ||
489 | switch(domain->revmap_type) { | ||
490 | case IRQ_DOMAIN_MAP_LINEAR: | ||
491 | if (hwirq < domain->revmap_data.linear.size) | ||
492 | domain->revmap_data.linear.revmap[hwirq] = 0; | ||
493 | break; | ||
494 | case IRQ_DOMAIN_MAP_TREE: | ||
495 | mutex_lock(&revmap_trees_mutex); | ||
496 | radix_tree_delete(&domain->revmap_data.tree, hwirq); | ||
497 | mutex_unlock(&revmap_trees_mutex); | ||
498 | break; | ||
499 | } | ||
500 | |||
501 | irq_free_desc(virq); | ||
502 | } | ||
503 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
504 | |||
505 | /** | ||
506 | * irq_find_mapping() - Find a linux irq from an hw irq number. | ||
507 | * @domain: domain owning this hardware interrupt | ||
508 | * @hwirq: hardware irq number in that domain space | ||
509 | * | ||
510 | * This is a slow path, for use by generic code. It's expected that an | ||
511 | * irq controller implementation directly calls the appropriate low level | ||
512 | * mapping function. | ||
513 | */ | ||
514 | unsigned int irq_find_mapping(struct irq_domain *domain, | ||
515 | irq_hw_number_t hwirq) | ||
516 | { | ||
517 | unsigned int i; | ||
518 | unsigned int hint = hwirq % irq_virq_count; | ||
519 | |||
520 | /* Look for default domain if nececssary */ | ||
521 | if (domain == NULL) | ||
522 | domain = irq_default_domain; | ||
523 | if (domain == NULL) | ||
524 | return 0; | ||
525 | |||
526 | /* legacy -> bail early */ | ||
527 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
528 | return irq_domain_legacy_revmap(domain, hwirq); | ||
529 | |||
530 | /* Slow path does a linear search of the map */ | ||
531 | if (hint == 0) | ||
532 | hint = 1; | ||
533 | i = hint; | ||
534 | do { | ||
535 | struct irq_data *data = irq_get_irq_data(i); | ||
536 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) | ||
537 | return i; | ||
538 | i++; | ||
539 | if (i >= irq_virq_count) | ||
540 | i = 1; | ||
541 | } while(i != hint); | ||
542 | return 0; | ||
543 | } | ||
544 | EXPORT_SYMBOL_GPL(irq_find_mapping); | ||
545 | |||
546 | /** | ||
547 | * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number. | ||
548 | * @domain: domain owning this hardware interrupt | ||
549 | * @hwirq: hardware irq number in that domain space | ||
116 | * | 550 | * |
117 | * Calling this function indicates the caller no longer needs a reference to | 551 | * This is a fast path, for use by irq controller code that uses radix tree |
118 | * the linux irq number returned by a prior call to irq_create_of_mapping(). | 552 | * revmaps |
119 | */ | 553 | */ |
120 | void irq_dispose_mapping(unsigned int irq) | 554 | unsigned int irq_radix_revmap_lookup(struct irq_domain *domain, |
555 | irq_hw_number_t hwirq) | ||
121 | { | 556 | { |
557 | struct irq_data *irq_data; | ||
558 | |||
559 | if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) | ||
560 | return irq_find_mapping(domain, hwirq); | ||
561 | |||
562 | /* | ||
563 | * Freeing an irq can delete nodes along the path to | ||
564 | * do the lookup via call_rcu. | ||
565 | */ | ||
566 | rcu_read_lock(); | ||
567 | irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); | ||
568 | rcu_read_unlock(); | ||
569 | |||
122 | /* | 570 | /* |
123 | * nothing yet; will be filled when support for dynamic allocation of | 571 | * If found in radix tree, then fine. |
124 | * irq_descs is added to irq_domain | 572 | * Else fallback to linear lookup - this should not happen in practice |
573 | * as it means that we failed to insert the node in the radix tree. | ||
125 | */ | 574 | */ |
575 | return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq); | ||
126 | } | 576 | } |
127 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
128 | 577 | ||
129 | int irq_domain_simple_dt_translate(struct irq_domain *d, | 578 | /** |
130 | struct device_node *controller, | 579 | * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping. |
131 | const u32 *intspec, unsigned int intsize, | 580 | * @domain: domain owning this hardware interrupt |
132 | unsigned long *out_hwirq, unsigned int *out_type) | 581 | * @virq: linux irq number |
582 | * @hwirq: hardware irq number in that domain space | ||
583 | * | ||
584 | * This is for use by irq controllers that use a radix tree reverse | ||
585 | * mapping for fast lookup. | ||
586 | */ | ||
587 | void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq, | ||
588 | irq_hw_number_t hwirq) | ||
133 | { | 589 | { |
134 | if (d->of_node != controller) | 590 | struct irq_data *irq_data = irq_get_irq_data(virq); |
135 | return -EINVAL; | 591 | |
136 | if (intsize < 1) | 592 | if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) |
137 | return -EINVAL; | 593 | return; |
138 | if (d->nr_irq && ((intspec[0] < d->hwirq_base) || | 594 | |
139 | (intspec[0] >= d->hwirq_base + d->nr_irq))) | 595 | if (virq) { |
140 | return -EINVAL; | 596 | mutex_lock(&revmap_trees_mutex); |
597 | radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); | ||
598 | mutex_unlock(&revmap_trees_mutex); | ||
599 | } | ||
600 | } | ||
601 | |||
602 | /** | ||
603 | * irq_linear_revmap() - Find a linux irq from a hw irq number. | ||
604 | * @domain: domain owning this hardware interrupt | ||
605 | * @hwirq: hardware irq number in that domain space | ||
606 | * | ||
607 | * This is a fast path, for use by irq controller code that uses linear | ||
608 | * revmaps. It does fallback to the slow path if the revmap doesn't exist | ||
609 | * yet and will create the revmap entry with appropriate locking | ||
610 | */ | ||
611 | unsigned int irq_linear_revmap(struct irq_domain *domain, | ||
612 | irq_hw_number_t hwirq) | ||
613 | { | ||
614 | unsigned int *revmap; | ||
615 | |||
616 | if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR)) | ||
617 | return irq_find_mapping(domain, hwirq); | ||
618 | |||
619 | /* Check revmap bounds */ | ||
620 | if (unlikely(hwirq >= domain->revmap_data.linear.size)) | ||
621 | return irq_find_mapping(domain, hwirq); | ||
622 | |||
623 | /* Check if revmap was allocated */ | ||
624 | revmap = domain->revmap_data.linear.revmap; | ||
625 | if (unlikely(revmap == NULL)) | ||
626 | return irq_find_mapping(domain, hwirq); | ||
627 | |||
628 | /* Fill up revmap with slow path if no mapping found */ | ||
629 | if (unlikely(!revmap[hwirq])) | ||
630 | revmap[hwirq] = irq_find_mapping(domain, hwirq); | ||
631 | |||
632 | return revmap[hwirq]; | ||
633 | } | ||
634 | |||
635 | #ifdef CONFIG_VIRQ_DEBUG | ||
636 | static int virq_debug_show(struct seq_file *m, void *private) | ||
637 | { | ||
638 | unsigned long flags; | ||
639 | struct irq_desc *desc; | ||
640 | const char *p; | ||
641 | static const char none[] = "none"; | ||
642 | void *data; | ||
643 | int i; | ||
644 | |||
645 | seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", | ||
646 | "chip name", "chip data", "domain name"); | ||
647 | |||
648 | for (i = 1; i < nr_irqs; i++) { | ||
649 | desc = irq_to_desc(i); | ||
650 | if (!desc) | ||
651 | continue; | ||
652 | |||
653 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
654 | |||
655 | if (desc->action && desc->action->handler) { | ||
656 | struct irq_chip *chip; | ||
657 | |||
658 | seq_printf(m, "%5d ", i); | ||
659 | seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); | ||
660 | |||
661 | chip = irq_desc_get_chip(desc); | ||
662 | if (chip && chip->name) | ||
663 | p = chip->name; | ||
664 | else | ||
665 | p = none; | ||
666 | seq_printf(m, "%-15s ", p); | ||
667 | |||
668 | data = irq_desc_get_chip_data(desc); | ||
669 | seq_printf(m, "0x%16p ", data); | ||
670 | |||
671 | if (desc->irq_data.domain->of_node) | ||
672 | p = desc->irq_data.domain->of_node->full_name; | ||
673 | else | ||
674 | p = none; | ||
675 | seq_printf(m, "%s\n", p); | ||
676 | } | ||
677 | |||
678 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
679 | } | ||
680 | |||
681 | return 0; | ||
682 | } | ||
141 | 683 | ||
684 | static int virq_debug_open(struct inode *inode, struct file *file) | ||
685 | { | ||
686 | return single_open(file, virq_debug_show, inode->i_private); | ||
687 | } | ||
688 | |||
689 | static const struct file_operations virq_debug_fops = { | ||
690 | .open = virq_debug_open, | ||
691 | .read = seq_read, | ||
692 | .llseek = seq_lseek, | ||
693 | .release = single_release, | ||
694 | }; | ||
695 | |||
696 | static int __init irq_debugfs_init(void) | ||
697 | { | ||
698 | if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, | ||
699 | NULL, &virq_debug_fops) == NULL) | ||
700 | return -ENOMEM; | ||
701 | |||
702 | return 0; | ||
703 | } | ||
704 | __initcall(irq_debugfs_init); | ||
705 | #endif /* CONFIG_VIRQ_DEBUG */ | ||
706 | |||
707 | int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, | ||
708 | irq_hw_number_t hwirq) | ||
709 | { | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | /** | ||
714 | * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings | ||
715 | * | ||
716 | * Device Tree IRQ specifier translation function which works with one cell | ||
717 | * bindings where the cell value maps directly to the hwirq number. | ||
718 | */ | ||
719 | int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, | ||
720 | const u32 *intspec, unsigned int intsize, | ||
721 | unsigned long *out_hwirq, unsigned int *out_type) | ||
722 | { | ||
723 | if (WARN_ON(intsize < 1)) | ||
724 | return -EINVAL; | ||
142 | *out_hwirq = intspec[0]; | 725 | *out_hwirq = intspec[0]; |
143 | *out_type = IRQ_TYPE_NONE; | 726 | *out_type = IRQ_TYPE_NONE; |
144 | if (intsize > 1) | ||
145 | *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; | ||
146 | return 0; | 727 | return 0; |
147 | } | 728 | } |
729 | EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); | ||
148 | 730 | ||
149 | /** | 731 | /** |
150 | * irq_domain_create_simple() - Set up a 'simple' translation range | 732 | * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings |
733 | * | ||
734 | * Device Tree IRQ specifier translation function which works with two cell | ||
735 | * bindings where the cell values map directly to the hwirq number | ||
736 | * and linux irq flags. | ||
151 | */ | 737 | */ |
152 | void irq_domain_add_simple(struct device_node *controller, int irq_base) | 738 | int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, |
739 | const u32 *intspec, unsigned int intsize, | ||
740 | irq_hw_number_t *out_hwirq, unsigned int *out_type) | ||
153 | { | 741 | { |
154 | struct irq_domain *domain; | 742 | if (WARN_ON(intsize < 2)) |
155 | 743 | return -EINVAL; | |
156 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 744 | *out_hwirq = intspec[0]; |
157 | if (!domain) { | 745 | *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; |
158 | WARN_ON(1); | 746 | return 0; |
159 | return; | 747 | } |
160 | } | 748 | EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); |
161 | 749 | ||
162 | domain->irq_base = irq_base; | 750 | /** |
163 | domain->of_node = of_node_get(controller); | 751 | * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings |
164 | domain->ops = &irq_domain_simple_ops; | 752 | * |
165 | irq_domain_add(domain); | 753 | * Device Tree IRQ specifier translation function which works with either one |
754 | * or two cell bindings where the cell values map directly to the hwirq number | ||
755 | * and linux irq flags. | ||
756 | * | ||
757 | * Note: don't use this function unless your interrupt controller explicitly | ||
758 | * supports both one and two cell bindings. For the majority of controllers | ||
759 | * the _onecell() or _twocell() variants above should be used. | ||
760 | */ | ||
761 | int irq_domain_xlate_onetwocell(struct irq_domain *d, | ||
762 | struct device_node *ctrlr, | ||
763 | const u32 *intspec, unsigned int intsize, | ||
764 | unsigned long *out_hwirq, unsigned int *out_type) | ||
765 | { | ||
766 | if (WARN_ON(intsize < 1)) | ||
767 | return -EINVAL; | ||
768 | *out_hwirq = intspec[0]; | ||
769 | *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE; | ||
770 | return 0; | ||
166 | } | 771 | } |
167 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | 772 | EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); |
168 | 773 | ||
774 | const struct irq_domain_ops irq_domain_simple_ops = { | ||
775 | .map = irq_domain_simple_map, | ||
776 | .xlate = irq_domain_xlate_onetwocell, | ||
777 | }; | ||
778 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
779 | |||
780 | #ifdef CONFIG_OF_IRQ | ||
169 | void irq_domain_generate_simple(const struct of_device_id *match, | 781 | void irq_domain_generate_simple(const struct of_device_id *match, |
170 | u64 phys_base, unsigned int irq_start) | 782 | u64 phys_base, unsigned int irq_start) |
171 | { | 783 | { |
172 | struct device_node *node; | 784 | struct device_node *node; |
173 | pr_info("looking for phys_base=%llx, irq_start=%i\n", | 785 | pr_debug("looking for phys_base=%llx, irq_start=%i\n", |
174 | (unsigned long long) phys_base, (int) irq_start); | 786 | (unsigned long long) phys_base, (int) irq_start); |
175 | node = of_find_matching_node_by_address(NULL, match, phys_base); | 787 | node = of_find_matching_node_by_address(NULL, match, phys_base); |
176 | if (node) | 788 | if (node) |
177 | irq_domain_add_simple(node, irq_start); | 789 | irq_domain_add_legacy(node, 32, irq_start, 0, |
178 | else | 790 | &irq_domain_simple_ops, NULL); |
179 | pr_info("no node found\n"); | ||
180 | } | 791 | } |
181 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | 792 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); |
182 | #endif /* CONFIG_OF_IRQ */ | 793 | #endif |
183 | |||
184 | struct irq_domain_ops irq_domain_simple_ops = { | ||
185 | #ifdef CONFIG_OF_IRQ | ||
186 | .dt_translate = irq_domain_simple_dt_translate, | ||
187 | #endif /* CONFIG_OF_IRQ */ | ||
188 | }; | ||
189 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index 7b0886786701..a6a675cb9818 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1546,13 +1546,13 @@ int kernel_kexec(void) | |||
1546 | if (error) | 1546 | if (error) |
1547 | goto Resume_console; | 1547 | goto Resume_console; |
1548 | /* At this point, dpm_suspend_start() has been called, | 1548 | /* At this point, dpm_suspend_start() has been called, |
1549 | * but *not* dpm_suspend_noirq(). We *must* call | 1549 | * but *not* dpm_suspend_end(). We *must* call |
1550 | * dpm_suspend_noirq() now. Otherwise, drivers for | 1550 | * dpm_suspend_end() now. Otherwise, drivers for |
1551 | * some devices (e.g. interrupt controllers) become | 1551 | * some devices (e.g. interrupt controllers) become |
1552 | * desynchronized with the actual state of the | 1552 | * desynchronized with the actual state of the |
1553 | * hardware at resume time, and evil weirdness ensues. | 1553 | * hardware at resume time, and evil weirdness ensues. |
1554 | */ | 1554 | */ |
1555 | error = dpm_suspend_noirq(PMSG_FREEZE); | 1555 | error = dpm_suspend_end(PMSG_FREEZE); |
1556 | if (error) | 1556 | if (error) |
1557 | goto Resume_devices; | 1557 | goto Resume_devices; |
1558 | error = disable_nonboot_cpus(); | 1558 | error = disable_nonboot_cpus(); |
@@ -1579,7 +1579,7 @@ int kernel_kexec(void) | |||
1579 | local_irq_enable(); | 1579 | local_irq_enable(); |
1580 | Enable_cpus: | 1580 | Enable_cpus: |
1581 | enable_nonboot_cpus(); | 1581 | enable_nonboot_cpus(); |
1582 | dpm_resume_noirq(PMSG_RESTORE); | 1582 | dpm_resume_start(PMSG_RESTORE); |
1583 | Resume_devices: | 1583 | Resume_devices: |
1584 | dpm_resume_end(PMSG_RESTORE); | 1584 | dpm_resume_end(PMSG_RESTORE); |
1585 | Resume_console: | 1585 | Resume_console: |
diff --git a/kernel/kmod.c b/kernel/kmod.c index a0a88543934e..957a7aab8ebc 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -60,6 +60,43 @@ static DECLARE_RWSEM(umhelper_sem); | |||
60 | */ | 60 | */ |
61 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; | 61 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; |
62 | 62 | ||
63 | static void free_modprobe_argv(struct subprocess_info *info) | ||
64 | { | ||
65 | kfree(info->argv[3]); /* check call_modprobe() */ | ||
66 | kfree(info->argv); | ||
67 | } | ||
68 | |||
69 | static int call_modprobe(char *module_name, int wait) | ||
70 | { | ||
71 | static char *envp[] = { | ||
72 | "HOME=/", | ||
73 | "TERM=linux", | ||
74 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
75 | NULL | ||
76 | }; | ||
77 | |||
78 | char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); | ||
79 | if (!argv) | ||
80 | goto out; | ||
81 | |||
82 | module_name = kstrdup(module_name, GFP_KERNEL); | ||
83 | if (!module_name) | ||
84 | goto free_argv; | ||
85 | |||
86 | argv[0] = modprobe_path; | ||
87 | argv[1] = "-q"; | ||
88 | argv[2] = "--"; | ||
89 | argv[3] = module_name; /* check free_modprobe_argv() */ | ||
90 | argv[4] = NULL; | ||
91 | |||
92 | return call_usermodehelper_fns(modprobe_path, argv, envp, | ||
93 | wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); | ||
94 | free_argv: | ||
95 | kfree(argv); | ||
96 | out: | ||
97 | return -ENOMEM; | ||
98 | } | ||
99 | |||
63 | /** | 100 | /** |
64 | * __request_module - try to load a kernel module | 101 | * __request_module - try to load a kernel module |
65 | * @wait: wait (or not) for the operation to complete | 102 | * @wait: wait (or not) for the operation to complete |
@@ -81,11 +118,6 @@ int __request_module(bool wait, const char *fmt, ...) | |||
81 | char module_name[MODULE_NAME_LEN]; | 118 | char module_name[MODULE_NAME_LEN]; |
82 | unsigned int max_modprobes; | 119 | unsigned int max_modprobes; |
83 | int ret; | 120 | int ret; |
84 | char *argv[] = { modprobe_path, "-q", "--", module_name, NULL }; | ||
85 | static char *envp[] = { "HOME=/", | ||
86 | "TERM=linux", | ||
87 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
88 | NULL }; | ||
89 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); | 121 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); |
90 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ | 122 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ |
91 | static int kmod_loop_msg; | 123 | static int kmod_loop_msg; |
@@ -128,9 +160,7 @@ int __request_module(bool wait, const char *fmt, ...) | |||
128 | 160 | ||
129 | trace_module_request(module_name, wait, _RET_IP_); | 161 | trace_module_request(module_name, wait, _RET_IP_); |
130 | 162 | ||
131 | ret = call_usermodehelper_fns(modprobe_path, argv, envp, | 163 | ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); |
132 | wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC, | ||
133 | NULL, NULL, NULL); | ||
134 | 164 | ||
135 | atomic_dec(&kmod_concurrent); | 165 | atomic_dec(&kmod_concurrent); |
136 | return ret; | 166 | return ret; |
@@ -188,7 +218,7 @@ static int ____call_usermodehelper(void *data) | |||
188 | /* Exec failed? */ | 218 | /* Exec failed? */ |
189 | fail: | 219 | fail: |
190 | sub_info->retval = retval; | 220 | sub_info->retval = retval; |
191 | do_exit(0); | 221 | return 0; |
192 | } | 222 | } |
193 | 223 | ||
194 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | 224 | void call_usermodehelper_freeinfo(struct subprocess_info *info) |
@@ -199,6 +229,19 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info) | |||
199 | } | 229 | } |
200 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | 230 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); |
201 | 231 | ||
232 | static void umh_complete(struct subprocess_info *sub_info) | ||
233 | { | ||
234 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
235 | /* | ||
236 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
237 | * we own sub_info, the UMH_KILLABLE caller has gone away. | ||
238 | */ | ||
239 | if (comp) | ||
240 | complete(comp); | ||
241 | else | ||
242 | call_usermodehelper_freeinfo(sub_info); | ||
243 | } | ||
244 | |||
202 | /* Keventd can't block, but this (a child) can. */ | 245 | /* Keventd can't block, but this (a child) can. */ |
203 | static int wait_for_helper(void *data) | 246 | static int wait_for_helper(void *data) |
204 | { | 247 | { |
@@ -235,7 +278,7 @@ static int wait_for_helper(void *data) | |||
235 | sub_info->retval = ret; | 278 | sub_info->retval = ret; |
236 | } | 279 | } |
237 | 280 | ||
238 | complete(sub_info->complete); | 281 | umh_complete(sub_info); |
239 | return 0; | 282 | return 0; |
240 | } | 283 | } |
241 | 284 | ||
@@ -244,7 +287,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
244 | { | 287 | { |
245 | struct subprocess_info *sub_info = | 288 | struct subprocess_info *sub_info = |
246 | container_of(work, struct subprocess_info, work); | 289 | container_of(work, struct subprocess_info, work); |
247 | enum umh_wait wait = sub_info->wait; | 290 | int wait = sub_info->wait & ~UMH_KILLABLE; |
248 | pid_t pid; | 291 | pid_t pid; |
249 | 292 | ||
250 | /* CLONE_VFORK: wait until the usermode helper has execve'd | 293 | /* CLONE_VFORK: wait until the usermode helper has execve'd |
@@ -269,7 +312,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
269 | case UMH_WAIT_EXEC: | 312 | case UMH_WAIT_EXEC: |
270 | if (pid < 0) | 313 | if (pid < 0) |
271 | sub_info->retval = pid; | 314 | sub_info->retval = pid; |
272 | complete(sub_info->complete); | 315 | umh_complete(sub_info); |
273 | } | 316 | } |
274 | } | 317 | } |
275 | 318 | ||
@@ -435,8 +478,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); | |||
435 | * asynchronously if wait is not set, and runs as a child of keventd. | 478 | * asynchronously if wait is not set, and runs as a child of keventd. |
436 | * (ie. it runs with full root capabilities). | 479 | * (ie. it runs with full root capabilities). |
437 | */ | 480 | */ |
438 | int call_usermodehelper_exec(struct subprocess_info *sub_info, | 481 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
439 | enum umh_wait wait) | ||
440 | { | 482 | { |
441 | DECLARE_COMPLETION_ONSTACK(done); | 483 | DECLARE_COMPLETION_ONSTACK(done); |
442 | int retval = 0; | 484 | int retval = 0; |
@@ -456,9 +498,21 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, | |||
456 | queue_work(khelper_wq, &sub_info->work); | 498 | queue_work(khelper_wq, &sub_info->work); |
457 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ | 499 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ |
458 | goto unlock; | 500 | goto unlock; |
501 | |||
502 | if (wait & UMH_KILLABLE) { | ||
503 | retval = wait_for_completion_killable(&done); | ||
504 | if (!retval) | ||
505 | goto wait_done; | ||
506 | |||
507 | /* umh_complete() will see NULL and free sub_info */ | ||
508 | if (xchg(&sub_info->complete, NULL)) | ||
509 | goto unlock; | ||
510 | /* fallthrough, umh_complete() was already called */ | ||
511 | } | ||
512 | |||
459 | wait_for_completion(&done); | 513 | wait_for_completion(&done); |
514 | wait_done: | ||
460 | retval = sub_info->retval; | 515 | retval = sub_info->retval; |
461 | |||
462 | out: | 516 | out: |
463 | call_usermodehelper_freeinfo(sub_info); | 517 | call_usermodehelper_freeinfo(sub_info); |
464 | unlock: | 518 | unlock: |
diff --git a/kernel/padata.c b/kernel/padata.c index b45259931512..6f10eb285ece 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <linux/sysfs.h> | 29 | #include <linux/sysfs.h> |
30 | #include <linux/rcupdate.h> | 30 | #include <linux/rcupdate.h> |
31 | 31 | ||
32 | #define MAX_SEQ_NR (INT_MAX - NR_CPUS) | ||
33 | #define MAX_OBJ_NUM 1000 | 32 | #define MAX_OBJ_NUM 1000 |
34 | 33 | ||
35 | static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | 34 | static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) |
@@ -43,18 +42,19 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | |||
43 | return target_cpu; | 42 | return target_cpu; |
44 | } | 43 | } |
45 | 44 | ||
46 | static int padata_cpu_hash(struct padata_priv *padata) | 45 | static int padata_cpu_hash(struct parallel_data *pd) |
47 | { | 46 | { |
48 | int cpu_index; | 47 | int cpu_index; |
49 | struct parallel_data *pd; | ||
50 | |||
51 | pd = padata->pd; | ||
52 | 48 | ||
53 | /* | 49 | /* |
54 | * Hash the sequence numbers to the cpus by taking | 50 | * Hash the sequence numbers to the cpus by taking |
55 | * seq_nr mod. number of cpus in use. | 51 | * seq_nr mod. number of cpus in use. |
56 | */ | 52 | */ |
57 | cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu); | 53 | |
54 | spin_lock(&pd->seq_lock); | ||
55 | cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); | ||
56 | pd->seq_nr++; | ||
57 | spin_unlock(&pd->seq_lock); | ||
58 | 58 | ||
59 | return padata_index_to_cpu(pd, cpu_index); | 59 | return padata_index_to_cpu(pd, cpu_index); |
60 | } | 60 | } |
@@ -132,12 +132,7 @@ int padata_do_parallel(struct padata_instance *pinst, | |||
132 | padata->pd = pd; | 132 | padata->pd = pd; |
133 | padata->cb_cpu = cb_cpu; | 133 | padata->cb_cpu = cb_cpu; |
134 | 134 | ||
135 | if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) | 135 | target_cpu = padata_cpu_hash(pd); |
136 | atomic_set(&pd->seq_nr, -1); | ||
137 | |||
138 | padata->seq_nr = atomic_inc_return(&pd->seq_nr); | ||
139 | |||
140 | target_cpu = padata_cpu_hash(padata); | ||
141 | queue = per_cpu_ptr(pd->pqueue, target_cpu); | 136 | queue = per_cpu_ptr(pd->pqueue, target_cpu); |
142 | 137 | ||
143 | spin_lock(&queue->parallel.lock); | 138 | spin_lock(&queue->parallel.lock); |
@@ -173,7 +168,7 @@ EXPORT_SYMBOL(padata_do_parallel); | |||
173 | static struct padata_priv *padata_get_next(struct parallel_data *pd) | 168 | static struct padata_priv *padata_get_next(struct parallel_data *pd) |
174 | { | 169 | { |
175 | int cpu, num_cpus; | 170 | int cpu, num_cpus; |
176 | int next_nr, next_index; | 171 | unsigned int next_nr, next_index; |
177 | struct padata_parallel_queue *queue, *next_queue; | 172 | struct padata_parallel_queue *queue, *next_queue; |
178 | struct padata_priv *padata; | 173 | struct padata_priv *padata; |
179 | struct padata_list *reorder; | 174 | struct padata_list *reorder; |
@@ -189,14 +184,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) | |||
189 | cpu = padata_index_to_cpu(pd, next_index); | 184 | cpu = padata_index_to_cpu(pd, next_index); |
190 | next_queue = per_cpu_ptr(pd->pqueue, cpu); | 185 | next_queue = per_cpu_ptr(pd->pqueue, cpu); |
191 | 186 | ||
192 | if (unlikely(next_nr > pd->max_seq_nr)) { | ||
193 | next_nr = next_nr - pd->max_seq_nr - 1; | ||
194 | next_index = next_nr % num_cpus; | ||
195 | cpu = padata_index_to_cpu(pd, next_index); | ||
196 | next_queue = per_cpu_ptr(pd->pqueue, cpu); | ||
197 | pd->processed = 0; | ||
198 | } | ||
199 | |||
200 | padata = NULL; | 187 | padata = NULL; |
201 | 188 | ||
202 | reorder = &next_queue->reorder; | 189 | reorder = &next_queue->reorder; |
@@ -205,8 +192,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) | |||
205 | padata = list_entry(reorder->list.next, | 192 | padata = list_entry(reorder->list.next, |
206 | struct padata_priv, list); | 193 | struct padata_priv, list); |
207 | 194 | ||
208 | BUG_ON(next_nr != padata->seq_nr); | ||
209 | |||
210 | spin_lock(&reorder->lock); | 195 | spin_lock(&reorder->lock); |
211 | list_del_init(&padata->list); | 196 | list_del_init(&padata->list); |
212 | atomic_dec(&pd->reorder_objects); | 197 | atomic_dec(&pd->reorder_objects); |
@@ -230,6 +215,7 @@ out: | |||
230 | 215 | ||
231 | static void padata_reorder(struct parallel_data *pd) | 216 | static void padata_reorder(struct parallel_data *pd) |
232 | { | 217 | { |
218 | int cb_cpu; | ||
233 | struct padata_priv *padata; | 219 | struct padata_priv *padata; |
234 | struct padata_serial_queue *squeue; | 220 | struct padata_serial_queue *squeue; |
235 | struct padata_instance *pinst = pd->pinst; | 221 | struct padata_instance *pinst = pd->pinst; |
@@ -270,13 +256,14 @@ static void padata_reorder(struct parallel_data *pd) | |||
270 | return; | 256 | return; |
271 | } | 257 | } |
272 | 258 | ||
273 | squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu); | 259 | cb_cpu = padata->cb_cpu; |
260 | squeue = per_cpu_ptr(pd->squeue, cb_cpu); | ||
274 | 261 | ||
275 | spin_lock(&squeue->serial.lock); | 262 | spin_lock(&squeue->serial.lock); |
276 | list_add_tail(&padata->list, &squeue->serial.list); | 263 | list_add_tail(&padata->list, &squeue->serial.list); |
277 | spin_unlock(&squeue->serial.lock); | 264 | spin_unlock(&squeue->serial.lock); |
278 | 265 | ||
279 | queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work); | 266 | queue_work_on(cb_cpu, pinst->wq, &squeue->work); |
280 | } | 267 | } |
281 | 268 | ||
282 | spin_unlock_bh(&pd->lock); | 269 | spin_unlock_bh(&pd->lock); |
@@ -400,7 +387,7 @@ static void padata_init_squeues(struct parallel_data *pd) | |||
400 | /* Initialize all percpu queues used by parallel workers */ | 387 | /* Initialize all percpu queues used by parallel workers */ |
401 | static void padata_init_pqueues(struct parallel_data *pd) | 388 | static void padata_init_pqueues(struct parallel_data *pd) |
402 | { | 389 | { |
403 | int cpu_index, num_cpus, cpu; | 390 | int cpu_index, cpu; |
404 | struct padata_parallel_queue *pqueue; | 391 | struct padata_parallel_queue *pqueue; |
405 | 392 | ||
406 | cpu_index = 0; | 393 | cpu_index = 0; |
@@ -415,9 +402,6 @@ static void padata_init_pqueues(struct parallel_data *pd) | |||
415 | INIT_WORK(&pqueue->work, padata_parallel_worker); | 402 | INIT_WORK(&pqueue->work, padata_parallel_worker); |
416 | atomic_set(&pqueue->num_obj, 0); | 403 | atomic_set(&pqueue->num_obj, 0); |
417 | } | 404 | } |
418 | |||
419 | num_cpus = cpumask_weight(pd->cpumask.pcpu); | ||
420 | pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0; | ||
421 | } | 405 | } |
422 | 406 | ||
423 | /* Allocate and initialize the internal cpumask dependend resources. */ | 407 | /* Allocate and initialize the internal cpumask dependend resources. */ |
@@ -444,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, | |||
444 | padata_init_pqueues(pd); | 428 | padata_init_pqueues(pd); |
445 | padata_init_squeues(pd); | 429 | padata_init_squeues(pd); |
446 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); | 430 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); |
447 | atomic_set(&pd->seq_nr, -1); | 431 | pd->seq_nr = 0; |
448 | atomic_set(&pd->reorder_objects, 0); | 432 | atomic_set(&pd->reorder_objects, 0); |
449 | atomic_set(&pd->refcnt, 0); | 433 | atomic_set(&pd->refcnt, 0); |
450 | pd->pinst = pinst; | 434 | pd->pinst = pinst; |
diff --git a/kernel/params.c b/kernel/params.c index 4bc965d8a1fe..47f5bf12434a 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -15,7 +15,6 @@ | |||
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #include <linux/module.h> | ||
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
20 | #include <linux/string.h> | 19 | #include <linux/string.h> |
21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a8968396046d..17b232869a04 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -168,13 +168,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
168 | while (nr > 0) { | 168 | while (nr > 0) { |
169 | rcu_read_lock(); | 169 | rcu_read_lock(); |
170 | 170 | ||
171 | /* | ||
172 | * Any nested-container's init processes won't ignore the | ||
173 | * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser(). | ||
174 | */ | ||
175 | task = pid_task(find_vpid(nr), PIDTYPE_PID); | 171 | task = pid_task(find_vpid(nr), PIDTYPE_PID); |
176 | if (task) | 172 | if (task && !__fatal_signal_pending(task)) |
177 | send_sig_info(SIGKILL, SEND_SIG_NOINFO, task); | 173 | send_sig_info(SIGKILL, SEND_SIG_FORCED, task); |
178 | 174 | ||
179 | rcu_read_unlock(); | 175 | rcu_read_unlock(); |
180 | 176 | ||
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 07e0e28ffba7..66d808ec5252 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,7 +1,8 @@ | |||
1 | 1 | ||
2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | 2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG |
3 | 3 | ||
4 | obj-$(CONFIG_PM) += main.o qos.o | 4 | obj-y += qos.o |
5 | obj-$(CONFIG_PM) += main.o | ||
5 | obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o | 6 | obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o |
6 | obj-$(CONFIG_FREEZER) += process.o | 7 | obj-$(CONFIG_FREEZER) += process.o |
7 | obj-$(CONFIG_SUSPEND) += suspend.o | 8 | obj-$(CONFIG_SUSPEND) += suspend.o |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6d6d28870335..0a186cfde788 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -245,8 +245,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, | |||
245 | * create_image - Create a hibernation image. | 245 | * create_image - Create a hibernation image. |
246 | * @platform_mode: Whether or not to use the platform driver. | 246 | * @platform_mode: Whether or not to use the platform driver. |
247 | * | 247 | * |
248 | * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image | 248 | * Execute device drivers' "late" and "noirq" freeze callbacks, create a |
249 | * and execute the drivers' .thaw_noirq() callbacks. | 249 | * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. |
250 | * | 250 | * |
251 | * Control reappears in this routine after the subsequent restore. | 251 | * Control reappears in this routine after the subsequent restore. |
252 | */ | 252 | */ |
@@ -254,7 +254,7 @@ static int create_image(int platform_mode) | |||
254 | { | 254 | { |
255 | int error; | 255 | int error; |
256 | 256 | ||
257 | error = dpm_suspend_noirq(PMSG_FREEZE); | 257 | error = dpm_suspend_end(PMSG_FREEZE); |
258 | if (error) { | 258 | if (error) { |
259 | printk(KERN_ERR "PM: Some devices failed to power down, " | 259 | printk(KERN_ERR "PM: Some devices failed to power down, " |
260 | "aborting hibernation\n"); | 260 | "aborting hibernation\n"); |
@@ -306,7 +306,7 @@ static int create_image(int platform_mode) | |||
306 | Platform_finish: | 306 | Platform_finish: |
307 | platform_finish(platform_mode); | 307 | platform_finish(platform_mode); |
308 | 308 | ||
309 | dpm_resume_noirq(in_suspend ? | 309 | dpm_resume_start(in_suspend ? |
310 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 310 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
311 | 311 | ||
312 | return error; | 312 | return error; |
@@ -343,13 +343,13 @@ int hibernation_snapshot(int platform_mode) | |||
343 | * successful freezer test. | 343 | * successful freezer test. |
344 | */ | 344 | */ |
345 | freezer_test_done = true; | 345 | freezer_test_done = true; |
346 | goto Cleanup; | 346 | goto Thaw; |
347 | } | 347 | } |
348 | 348 | ||
349 | error = dpm_prepare(PMSG_FREEZE); | 349 | error = dpm_prepare(PMSG_FREEZE); |
350 | if (error) { | 350 | if (error) { |
351 | dpm_complete(PMSG_RECOVER); | 351 | dpm_complete(PMSG_RECOVER); |
352 | goto Cleanup; | 352 | goto Thaw; |
353 | } | 353 | } |
354 | 354 | ||
355 | suspend_console(); | 355 | suspend_console(); |
@@ -385,6 +385,8 @@ int hibernation_snapshot(int platform_mode) | |||
385 | platform_end(platform_mode); | 385 | platform_end(platform_mode); |
386 | return error; | 386 | return error; |
387 | 387 | ||
388 | Thaw: | ||
389 | thaw_kernel_threads(); | ||
388 | Cleanup: | 390 | Cleanup: |
389 | swsusp_free(); | 391 | swsusp_free(); |
390 | goto Close; | 392 | goto Close; |
@@ -394,16 +396,16 @@ int hibernation_snapshot(int platform_mode) | |||
394 | * resume_target_kernel - Restore system state from a hibernation image. | 396 | * resume_target_kernel - Restore system state from a hibernation image. |
395 | * @platform_mode: Whether or not to use the platform driver. | 397 | * @platform_mode: Whether or not to use the platform driver. |
396 | * | 398 | * |
397 | * Execute device drivers' .freeze_noirq() callbacks, restore the contents of | 399 | * Execute device drivers' "noirq" and "late" freeze callbacks, restore the |
398 | * highmem that have not been restored yet from the image and run the low-level | 400 | * contents of highmem that have not been restored yet from the image and run |
399 | * code that will restore the remaining contents of memory and switch to the | 401 | * the low-level code that will restore the remaining contents of memory and |
400 | * just restored target kernel. | 402 | * switch to the just restored target kernel. |
401 | */ | 403 | */ |
402 | static int resume_target_kernel(bool platform_mode) | 404 | static int resume_target_kernel(bool platform_mode) |
403 | { | 405 | { |
404 | int error; | 406 | int error; |
405 | 407 | ||
406 | error = dpm_suspend_noirq(PMSG_QUIESCE); | 408 | error = dpm_suspend_end(PMSG_QUIESCE); |
407 | if (error) { | 409 | if (error) { |
408 | printk(KERN_ERR "PM: Some devices failed to power down, " | 410 | printk(KERN_ERR "PM: Some devices failed to power down, " |
409 | "aborting resume\n"); | 411 | "aborting resume\n"); |
@@ -460,7 +462,7 @@ static int resume_target_kernel(bool platform_mode) | |||
460 | Cleanup: | 462 | Cleanup: |
461 | platform_restore_cleanup(platform_mode); | 463 | platform_restore_cleanup(platform_mode); |
462 | 464 | ||
463 | dpm_resume_noirq(PMSG_RECOVER); | 465 | dpm_resume_start(PMSG_RECOVER); |
464 | 466 | ||
465 | return error; | 467 | return error; |
466 | } | 468 | } |
@@ -518,7 +520,7 @@ int hibernation_platform_enter(void) | |||
518 | goto Resume_devices; | 520 | goto Resume_devices; |
519 | } | 521 | } |
520 | 522 | ||
521 | error = dpm_suspend_noirq(PMSG_HIBERNATE); | 523 | error = dpm_suspend_end(PMSG_HIBERNATE); |
522 | if (error) | 524 | if (error) |
523 | goto Resume_devices; | 525 | goto Resume_devices; |
524 | 526 | ||
@@ -549,7 +551,7 @@ int hibernation_platform_enter(void) | |||
549 | Platform_finish: | 551 | Platform_finish: |
550 | hibernation_ops->finish(); | 552 | hibernation_ops->finish(); |
551 | 553 | ||
552 | dpm_resume_noirq(PMSG_RESTORE); | 554 | dpm_resume_start(PMSG_RESTORE); |
553 | 555 | ||
554 | Resume_devices: | 556 | Resume_devices: |
555 | entering_platform_hibernation = false; | 557 | entering_platform_hibernation = false; |
@@ -616,7 +618,7 @@ int hibernate(void) | |||
616 | /* Allocate memory management structures */ | 618 | /* Allocate memory management structures */ |
617 | error = create_basic_memory_bitmaps(); | 619 | error = create_basic_memory_bitmaps(); |
618 | if (error) | 620 | if (error) |
619 | goto Exit; | 621 | goto Enable_umh; |
620 | 622 | ||
621 | printk(KERN_INFO "PM: Syncing filesystems ... "); | 623 | printk(KERN_INFO "PM: Syncing filesystems ... "); |
622 | sys_sync(); | 624 | sys_sync(); |
@@ -624,15 +626,11 @@ int hibernate(void) | |||
624 | 626 | ||
625 | error = freeze_processes(); | 627 | error = freeze_processes(); |
626 | if (error) | 628 | if (error) |
627 | goto Finish; | 629 | goto Free_bitmaps; |
628 | 630 | ||
629 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | 631 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); |
630 | if (error) | 632 | if (error || freezer_test_done) |
631 | goto Thaw; | ||
632 | if (freezer_test_done) { | ||
633 | freezer_test_done = false; | ||
634 | goto Thaw; | 633 | goto Thaw; |
635 | } | ||
636 | 634 | ||
637 | if (in_suspend) { | 635 | if (in_suspend) { |
638 | unsigned int flags = 0; | 636 | unsigned int flags = 0; |
@@ -657,8 +655,13 @@ int hibernate(void) | |||
657 | 655 | ||
658 | Thaw: | 656 | Thaw: |
659 | thaw_processes(); | 657 | thaw_processes(); |
660 | Finish: | 658 | |
659 | /* Don't bother checking whether freezer_test_done is true */ | ||
660 | freezer_test_done = false; | ||
661 | |||
662 | Free_bitmaps: | ||
661 | free_basic_memory_bitmaps(); | 663 | free_basic_memory_bitmaps(); |
664 | Enable_umh: | ||
662 | usermodehelper_enable(); | 665 | usermodehelper_enable(); |
663 | Exit: | 666 | Exit: |
664 | pm_notifier_call_chain(PM_POST_HIBERNATION); | 667 | pm_notifier_call_chain(PM_POST_HIBERNATION); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 9824b41e5a18..1c12581f1c62 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -165,16 +165,20 @@ static int suspend_stats_show(struct seq_file *s, void *unused) | |||
165 | last_errno %= REC_FAILED_NUM; | 165 | last_errno %= REC_FAILED_NUM; |
166 | last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; | 166 | last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; |
167 | last_step %= REC_FAILED_NUM; | 167 | last_step %= REC_FAILED_NUM; |
168 | seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" | 168 | seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" |
169 | "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", | 169 | "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", |
170 | "success", suspend_stats.success, | 170 | "success", suspend_stats.success, |
171 | "fail", suspend_stats.fail, | 171 | "fail", suspend_stats.fail, |
172 | "failed_freeze", suspend_stats.failed_freeze, | 172 | "failed_freeze", suspend_stats.failed_freeze, |
173 | "failed_prepare", suspend_stats.failed_prepare, | 173 | "failed_prepare", suspend_stats.failed_prepare, |
174 | "failed_suspend", suspend_stats.failed_suspend, | 174 | "failed_suspend", suspend_stats.failed_suspend, |
175 | "failed_suspend_late", | ||
176 | suspend_stats.failed_suspend_late, | ||
175 | "failed_suspend_noirq", | 177 | "failed_suspend_noirq", |
176 | suspend_stats.failed_suspend_noirq, | 178 | suspend_stats.failed_suspend_noirq, |
177 | "failed_resume", suspend_stats.failed_resume, | 179 | "failed_resume", suspend_stats.failed_resume, |
180 | "failed_resume_early", | ||
181 | suspend_stats.failed_resume_early, | ||
178 | "failed_resume_noirq", | 182 | "failed_resume_noirq", |
179 | suspend_stats.failed_resume_noirq); | 183 | suspend_stats.failed_resume_noirq); |
180 | seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", | 184 | seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", |
@@ -287,16 +291,10 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
287 | 291 | ||
288 | #ifdef CONFIG_SUSPEND | 292 | #ifdef CONFIG_SUSPEND |
289 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { | 293 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { |
290 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) | 294 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { |
295 | error = pm_suspend(state); | ||
291 | break; | 296 | break; |
292 | } | 297 | } |
293 | if (state < PM_SUSPEND_MAX && *s) { | ||
294 | error = enter_state(state); | ||
295 | if (error) { | ||
296 | suspend_stats.fail++; | ||
297 | dpm_save_failed_errno(error); | ||
298 | } else | ||
299 | suspend_stats.success++; | ||
300 | } | 298 | } |
301 | #endif | 299 | #endif |
302 | 300 | ||
diff --git a/kernel/power/power.h b/kernel/power/power.h index 21724eee5206..98f3622d7407 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -177,13 +177,11 @@ extern const char *const pm_states[]; | |||
177 | 177 | ||
178 | extern bool valid_state(suspend_state_t state); | 178 | extern bool valid_state(suspend_state_t state); |
179 | extern int suspend_devices_and_enter(suspend_state_t state); | 179 | extern int suspend_devices_and_enter(suspend_state_t state); |
180 | extern int enter_state(suspend_state_t state); | ||
181 | #else /* !CONFIG_SUSPEND */ | 180 | #else /* !CONFIG_SUSPEND */ |
182 | static inline int suspend_devices_and_enter(suspend_state_t state) | 181 | static inline int suspend_devices_and_enter(suspend_state_t state) |
183 | { | 182 | { |
184 | return -ENOSYS; | 183 | return -ENOSYS; |
185 | } | 184 | } |
186 | static inline int enter_state(suspend_state_t state) { return -ENOSYS; } | ||
187 | static inline bool valid_state(suspend_state_t state) { return false; } | 185 | static inline bool valid_state(suspend_state_t state) { return false; } |
188 | #endif /* !CONFIG_SUSPEND */ | 186 | #endif /* !CONFIG_SUSPEND */ |
189 | 187 | ||
@@ -234,16 +232,14 @@ static inline int suspend_freeze_processes(void) | |||
234 | int error; | 232 | int error; |
235 | 233 | ||
236 | error = freeze_processes(); | 234 | error = freeze_processes(); |
237 | |||
238 | /* | 235 | /* |
239 | * freeze_processes() automatically thaws every task if freezing | 236 | * freeze_processes() automatically thaws every task if freezing |
240 | * fails. So we need not do anything extra upon error. | 237 | * fails. So we need not do anything extra upon error. |
241 | */ | 238 | */ |
242 | if (error) | 239 | if (error) |
243 | goto Finish; | 240 | return error; |
244 | 241 | ||
245 | error = freeze_kernel_threads(); | 242 | error = freeze_kernel_threads(); |
246 | |||
247 | /* | 243 | /* |
248 | * freeze_kernel_threads() thaws only kernel threads upon freezing | 244 | * freeze_kernel_threads() thaws only kernel threads upon freezing |
249 | * failure. So we have to thaw the userspace tasks ourselves. | 245 | * failure. So we have to thaw the userspace tasks ourselves. |
@@ -251,7 +247,6 @@ static inline int suspend_freeze_processes(void) | |||
251 | if (error) | 247 | if (error) |
252 | thaw_processes(); | 248 | thaw_processes(); |
253 | 249 | ||
254 | Finish: | ||
255 | return error; | 250 | return error; |
256 | } | 251 | } |
257 | 252 | ||
diff --git a/kernel/power/process.c b/kernel/power/process.c index 7e426459e60a..0d2aeb226108 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -53,11 +53,9 @@ static int try_to_freeze_tasks(bool user_only) | |||
53 | * It is "frozen enough". If the task does wake | 53 | * It is "frozen enough". If the task does wake |
54 | * up, it will immediately call try_to_freeze. | 54 | * up, it will immediately call try_to_freeze. |
55 | * | 55 | * |
56 | * Because freeze_task() goes through p's | 56 | * Because freeze_task() goes through p's scheduler lock, it's |
57 | * scheduler lock after setting TIF_FREEZE, it's | 57 | * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING |
58 | * guaranteed that either we see TASK_RUNNING or | 58 | * transition can't race with task state testing here. |
59 | * try_to_stop() after schedule() in ptrace/signal | ||
60 | * stop sees TIF_FREEZE. | ||
61 | */ | 59 | */ |
62 | if (!task_is_stopped_or_traced(p) && | 60 | if (!task_is_stopped_or_traced(p) && |
63 | !freezer_should_skip(p)) | 61 | !freezer_should_skip(p)) |
@@ -98,13 +96,15 @@ static int try_to_freeze_tasks(bool user_only) | |||
98 | elapsed_csecs / 100, elapsed_csecs % 100, | 96 | elapsed_csecs / 100, elapsed_csecs % 100, |
99 | todo - wq_busy, wq_busy); | 97 | todo - wq_busy, wq_busy); |
100 | 98 | ||
101 | read_lock(&tasklist_lock); | 99 | if (!wakeup) { |
102 | do_each_thread(g, p) { | 100 | read_lock(&tasklist_lock); |
103 | if (!wakeup && !freezer_should_skip(p) && | 101 | do_each_thread(g, p) { |
104 | p != current && freezing(p) && !frozen(p)) | 102 | if (p != current && !freezer_should_skip(p) |
105 | sched_show_task(p); | 103 | && freezing(p) && !frozen(p)) |
106 | } while_each_thread(g, p); | 104 | sched_show_task(p); |
107 | read_unlock(&tasklist_lock); | 105 | } while_each_thread(g, p); |
106 | read_unlock(&tasklist_lock); | ||
107 | } | ||
108 | } else { | 108 | } else { |
109 | printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, | 109 | printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, |
110 | elapsed_csecs % 100); | 110 | elapsed_csecs % 100); |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 995e3bd3417b..d6d6dbd1ecc0 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
@@ -469,21 +469,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
469 | static int __init pm_qos_power_init(void) | 469 | static int __init pm_qos_power_init(void) |
470 | { | 470 | { |
471 | int ret = 0; | 471 | int ret = 0; |
472 | int i; | ||
472 | 473 | ||
473 | ret = register_pm_qos_misc(&cpu_dma_pm_qos); | 474 | BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); |
474 | if (ret < 0) { | 475 | |
475 | printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); | 476 | for (i = 1; i < PM_QOS_NUM_CLASSES; i++) { |
476 | return ret; | 477 | ret = register_pm_qos_misc(pm_qos_array[i]); |
477 | } | 478 | if (ret < 0) { |
478 | ret = register_pm_qos_misc(&network_lat_pm_qos); | 479 | printk(KERN_ERR "pm_qos_param: %s setup failed\n", |
479 | if (ret < 0) { | 480 | pm_qos_array[i]->name); |
480 | printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); | 481 | return ret; |
481 | return ret; | 482 | } |
482 | } | 483 | } |
483 | ret = register_pm_qos_misc(&network_throughput_pm_qos); | ||
484 | if (ret < 0) | ||
485 | printk(KERN_ERR | ||
486 | "pm_qos_param: network_throughput setup failed\n"); | ||
487 | 484 | ||
488 | return ret; | 485 | return ret; |
489 | } | 486 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6a768e537001..0de28576807d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -711,9 +711,10 @@ static void mark_nosave_pages(struct memory_bitmap *bm) | |||
711 | list_for_each_entry(region, &nosave_regions, list) { | 711 | list_for_each_entry(region, &nosave_regions, list) { |
712 | unsigned long pfn; | 712 | unsigned long pfn; |
713 | 713 | ||
714 | pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", | 714 | pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", |
715 | region->start_pfn << PAGE_SHIFT, | 715 | (unsigned long long) region->start_pfn << PAGE_SHIFT, |
716 | region->end_pfn << PAGE_SHIFT); | 716 | ((unsigned long long) region->end_pfn << PAGE_SHIFT) |
717 | - 1); | ||
717 | 718 | ||
718 | for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | 719 | for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) |
719 | if (pfn_valid(pfn)) { | 720 | if (pfn_valid(pfn)) { |
@@ -1000,20 +1001,20 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
1000 | s_page = pfn_to_page(src_pfn); | 1001 | s_page = pfn_to_page(src_pfn); |
1001 | d_page = pfn_to_page(dst_pfn); | 1002 | d_page = pfn_to_page(dst_pfn); |
1002 | if (PageHighMem(s_page)) { | 1003 | if (PageHighMem(s_page)) { |
1003 | src = kmap_atomic(s_page, KM_USER0); | 1004 | src = kmap_atomic(s_page); |
1004 | dst = kmap_atomic(d_page, KM_USER1); | 1005 | dst = kmap_atomic(d_page); |
1005 | do_copy_page(dst, src); | 1006 | do_copy_page(dst, src); |
1006 | kunmap_atomic(dst, KM_USER1); | 1007 | kunmap_atomic(dst); |
1007 | kunmap_atomic(src, KM_USER0); | 1008 | kunmap_atomic(src); |
1008 | } else { | 1009 | } else { |
1009 | if (PageHighMem(d_page)) { | 1010 | if (PageHighMem(d_page)) { |
1010 | /* Page pointed to by src may contain some kernel | 1011 | /* Page pointed to by src may contain some kernel |
1011 | * data modified by kmap_atomic() | 1012 | * data modified by kmap_atomic() |
1012 | */ | 1013 | */ |
1013 | safe_copy_page(buffer, s_page); | 1014 | safe_copy_page(buffer, s_page); |
1014 | dst = kmap_atomic(d_page, KM_USER0); | 1015 | dst = kmap_atomic(d_page); |
1015 | copy_page(dst, buffer); | 1016 | copy_page(dst, buffer); |
1016 | kunmap_atomic(dst, KM_USER0); | 1017 | kunmap_atomic(dst); |
1017 | } else { | 1018 | } else { |
1018 | safe_copy_page(page_address(d_page), s_page); | 1019 | safe_copy_page(page_address(d_page), s_page); |
1019 | } | 1020 | } |
@@ -1728,9 +1729,9 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
1728 | */ | 1729 | */ |
1729 | void *kaddr; | 1730 | void *kaddr; |
1730 | 1731 | ||
1731 | kaddr = kmap_atomic(page, KM_USER0); | 1732 | kaddr = kmap_atomic(page); |
1732 | copy_page(buffer, kaddr); | 1733 | copy_page(buffer, kaddr); |
1733 | kunmap_atomic(kaddr, KM_USER0); | 1734 | kunmap_atomic(kaddr); |
1734 | handle->buffer = buffer; | 1735 | handle->buffer = buffer; |
1735 | } else { | 1736 | } else { |
1736 | handle->buffer = page_address(page); | 1737 | handle->buffer = page_address(page); |
@@ -2014,9 +2015,9 @@ static void copy_last_highmem_page(void) | |||
2014 | if (last_highmem_page) { | 2015 | if (last_highmem_page) { |
2015 | void *dst; | 2016 | void *dst; |
2016 | 2017 | ||
2017 | dst = kmap_atomic(last_highmem_page, KM_USER0); | 2018 | dst = kmap_atomic(last_highmem_page); |
2018 | copy_page(dst, buffer); | 2019 | copy_page(dst, buffer); |
2019 | kunmap_atomic(dst, KM_USER0); | 2020 | kunmap_atomic(dst); |
2020 | last_highmem_page = NULL; | 2021 | last_highmem_page = NULL; |
2021 | } | 2022 | } |
2022 | } | 2023 | } |
@@ -2309,13 +2310,13 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) | |||
2309 | { | 2310 | { |
2310 | void *kaddr1, *kaddr2; | 2311 | void *kaddr1, *kaddr2; |
2311 | 2312 | ||
2312 | kaddr1 = kmap_atomic(p1, KM_USER0); | 2313 | kaddr1 = kmap_atomic(p1); |
2313 | kaddr2 = kmap_atomic(p2, KM_USER1); | 2314 | kaddr2 = kmap_atomic(p2); |
2314 | copy_page(buf, kaddr1); | 2315 | copy_page(buf, kaddr1); |
2315 | copy_page(kaddr1, kaddr2); | 2316 | copy_page(kaddr1, kaddr2); |
2316 | copy_page(kaddr2, buf); | 2317 | copy_page(kaddr2, buf); |
2317 | kunmap_atomic(kaddr2, KM_USER1); | 2318 | kunmap_atomic(kaddr2); |
2318 | kunmap_atomic(kaddr1, KM_USER0); | 2319 | kunmap_atomic(kaddr1); |
2319 | } | 2320 | } |
2320 | 2321 | ||
2321 | /** | 2322 | /** |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4fd51beed879..88e5c967370d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -37,8 +37,8 @@ const char *const pm_states[PM_SUSPEND_MAX] = { | |||
37 | static const struct platform_suspend_ops *suspend_ops; | 37 | static const struct platform_suspend_ops *suspend_ops; |
38 | 38 | ||
39 | /** | 39 | /** |
40 | * suspend_set_ops - Set the global suspend method table. | 40 | * suspend_set_ops - Set the global suspend method table. |
41 | * @ops: Pointer to ops structure. | 41 | * @ops: Suspend operations to use. |
42 | */ | 42 | */ |
43 | void suspend_set_ops(const struct platform_suspend_ops *ops) | 43 | void suspend_set_ops(const struct platform_suspend_ops *ops) |
44 | { | 44 | { |
@@ -58,11 +58,11 @@ bool valid_state(suspend_state_t state) | |||
58 | } | 58 | } |
59 | 59 | ||
60 | /** | 60 | /** |
61 | * suspend_valid_only_mem - generic memory-only valid callback | 61 | * suspend_valid_only_mem - Generic memory-only valid callback. |
62 | * | 62 | * |
63 | * Platform drivers that implement mem suspend only and only need | 63 | * Platform drivers that implement mem suspend only and only need to check for |
64 | * to check for that in their .valid callback can use this instead | 64 | * that in their .valid() callback can use this instead of rolling their own |
65 | * of rolling their own .valid callback. | 65 | * .valid() callback. |
66 | */ | 66 | */ |
67 | int suspend_valid_only_mem(suspend_state_t state) | 67 | int suspend_valid_only_mem(suspend_state_t state) |
68 | { | 68 | { |
@@ -83,10 +83,11 @@ static int suspend_test(int level) | |||
83 | } | 83 | } |
84 | 84 | ||
85 | /** | 85 | /** |
86 | * suspend_prepare - Do prep work before entering low-power state. | 86 | * suspend_prepare - Prepare for entering system sleep state. |
87 | * | 87 | * |
88 | * This is common code that is called for each state that we're entering. | 88 | * Common code run for every system sleep state that can be entered (except for |
89 | * Run suspend notifiers, allocate a console and stop all processes. | 89 | * hibernation). Run suspend notifiers, allocate the "suspend" console and |
90 | * freeze processes. | ||
90 | */ | 91 | */ |
91 | static int suspend_prepare(void) | 92 | static int suspend_prepare(void) |
92 | { | 93 | { |
@@ -131,9 +132,9 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) | |||
131 | } | 132 | } |
132 | 133 | ||
133 | /** | 134 | /** |
134 | * suspend_enter - enter the desired system sleep state. | 135 | * suspend_enter - Make the system enter the given sleep state. |
135 | * @state: State to enter | 136 | * @state: System sleep state to enter. |
136 | * @wakeup: Returns information that suspend should not be entered again. | 137 | * @wakeup: Returns information that the sleep state should not be re-entered. |
137 | * | 138 | * |
138 | * This function should be called after devices have been suspended. | 139 | * This function should be called after devices have been suspended. |
139 | */ | 140 | */ |
@@ -147,7 +148,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
147 | goto Platform_finish; | 148 | goto Platform_finish; |
148 | } | 149 | } |
149 | 150 | ||
150 | error = dpm_suspend_noirq(PMSG_SUSPEND); | 151 | error = dpm_suspend_end(PMSG_SUSPEND); |
151 | if (error) { | 152 | if (error) { |
152 | printk(KERN_ERR "PM: Some devices failed to power down\n"); | 153 | printk(KERN_ERR "PM: Some devices failed to power down\n"); |
153 | goto Platform_finish; | 154 | goto Platform_finish; |
@@ -189,7 +190,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
189 | if (suspend_ops->wake) | 190 | if (suspend_ops->wake) |
190 | suspend_ops->wake(); | 191 | suspend_ops->wake(); |
191 | 192 | ||
192 | dpm_resume_noirq(PMSG_RESUME); | 193 | dpm_resume_start(PMSG_RESUME); |
193 | 194 | ||
194 | Platform_finish: | 195 | Platform_finish: |
195 | if (suspend_ops->finish) | 196 | if (suspend_ops->finish) |
@@ -199,9 +200,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
199 | } | 200 | } |
200 | 201 | ||
201 | /** | 202 | /** |
202 | * suspend_devices_and_enter - suspend devices and enter the desired system | 203 | * suspend_devices_and_enter - Suspend devices and enter system sleep state. |
203 | * sleep state. | 204 | * @state: System sleep state to enter. |
204 | * @state: state to enter | ||
205 | */ | 205 | */ |
206 | int suspend_devices_and_enter(suspend_state_t state) | 206 | int suspend_devices_and_enter(suspend_state_t state) |
207 | { | 207 | { |
@@ -251,10 +251,10 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
251 | } | 251 | } |
252 | 252 | ||
253 | /** | 253 | /** |
254 | * suspend_finish - Do final work before exiting suspend sequence. | 254 | * suspend_finish - Clean up before finishing the suspend sequence. |
255 | * | 255 | * |
256 | * Call platform code to clean up, restart processes, and free the | 256 | * Call platform code to clean up, restart processes, and free the console that |
257 | * console that we've allocated. This is not called for suspend-to-disk. | 257 | * we've allocated. This routine is not called for hibernation. |
258 | */ | 258 | */ |
259 | static void suspend_finish(void) | 259 | static void suspend_finish(void) |
260 | { | 260 | { |
@@ -265,16 +265,14 @@ static void suspend_finish(void) | |||
265 | } | 265 | } |
266 | 266 | ||
267 | /** | 267 | /** |
268 | * enter_state - Do common work of entering low-power state. | 268 | * enter_state - Do common work needed to enter system sleep state. |
269 | * @state: pm_state structure for state we're entering. | 269 | * @state: System sleep state to enter. |
270 | * | 270 | * |
271 | * Make sure we're the only ones trying to enter a sleep state. Fail | 271 | * Make sure that no one else is trying to put the system into a sleep state. |
272 | * if someone has beat us to it, since we don't want anything weird to | 272 | * Fail if that's not the case. Otherwise, prepare for system suspend, make the |
273 | * happen when we wake up. | 273 | * system enter the given sleep state and clean up after wakeup. |
274 | * Then, do the setup for suspend, enter the state, and cleaup (after | ||
275 | * we've woken up). | ||
276 | */ | 274 | */ |
277 | int enter_state(suspend_state_t state) | 275 | static int enter_state(suspend_state_t state) |
278 | { | 276 | { |
279 | int error; | 277 | int error; |
280 | 278 | ||
@@ -310,24 +308,26 @@ int enter_state(suspend_state_t state) | |||
310 | } | 308 | } |
311 | 309 | ||
312 | /** | 310 | /** |
313 | * pm_suspend - Externally visible function for suspending system. | 311 | * pm_suspend - Externally visible function for suspending the system. |
314 | * @state: Enumerated value of state to enter. | 312 | * @state: System sleep state to enter. |
315 | * | 313 | * |
316 | * Determine whether or not value is within range, get state | 314 | * Check if the value of @state represents one of the supported states, |
317 | * structure, and enter (above). | 315 | * execute enter_state() and update system suspend statistics. |
318 | */ | 316 | */ |
319 | int pm_suspend(suspend_state_t state) | 317 | int pm_suspend(suspend_state_t state) |
320 | { | 318 | { |
321 | int ret; | 319 | int error; |
322 | if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { | 320 | |
323 | ret = enter_state(state); | 321 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) |
324 | if (ret) { | 322 | return -EINVAL; |
325 | suspend_stats.fail++; | 323 | |
326 | dpm_save_failed_errno(ret); | 324 | error = enter_state(state); |
327 | } else | 325 | if (error) { |
328 | suspend_stats.success++; | 326 | suspend_stats.fail++; |
329 | return ret; | 327 | dpm_save_failed_errno(error); |
328 | } else { | ||
329 | suspend_stats.success++; | ||
330 | } | 330 | } |
331 | return -EINVAL; | 331 | return error; |
332 | } | 332 | } |
333 | EXPORT_SYMBOL(pm_suspend); | 333 | EXPORT_SYMBOL(pm_suspend); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 3e100075b13c..33c4329205af 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -249,16 +249,10 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
249 | } | 249 | } |
250 | pm_restore_gfp_mask(); | 250 | pm_restore_gfp_mask(); |
251 | error = hibernation_snapshot(data->platform_support); | 251 | error = hibernation_snapshot(data->platform_support); |
252 | if (error) { | 252 | if (!error) { |
253 | thaw_kernel_threads(); | ||
254 | } else { | ||
255 | error = put_user(in_suspend, (int __user *)arg); | 253 | error = put_user(in_suspend, (int __user *)arg); |
256 | if (!error && !freezer_test_done) | 254 | data->ready = !freezer_test_done && !error; |
257 | data->ready = 1; | 255 | freezer_test_done = false; |
258 | if (freezer_test_done) { | ||
259 | freezer_test_done = false; | ||
260 | thaw_kernel_threads(); | ||
261 | } | ||
262 | } | 256 | } |
263 | break; | 257 | break; |
264 | 258 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 00ab2ca5ed11..ee8d49b9c309 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -231,26 +231,22 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
231 | } | 231 | } |
232 | 232 | ||
233 | static int ptrace_attach(struct task_struct *task, long request, | 233 | static int ptrace_attach(struct task_struct *task, long request, |
234 | unsigned long addr, | ||
234 | unsigned long flags) | 235 | unsigned long flags) |
235 | { | 236 | { |
236 | bool seize = (request == PTRACE_SEIZE); | 237 | bool seize = (request == PTRACE_SEIZE); |
237 | int retval; | 238 | int retval; |
238 | 239 | ||
239 | /* | ||
240 | * SEIZE will enable new ptrace behaviors which will be implemented | ||
241 | * gradually. SEIZE_DEVEL is used to prevent applications | ||
242 | * expecting full SEIZE behaviors trapping on kernel commits which | ||
243 | * are still in the process of implementing them. | ||
244 | * | ||
245 | * Only test programs for new ptrace behaviors being implemented | ||
246 | * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO. | ||
247 | * | ||
248 | * Once SEIZE behaviors are completely implemented, this flag and | ||
249 | * the following test will be removed. | ||
250 | */ | ||
251 | retval = -EIO; | 240 | retval = -EIO; |
252 | if (seize && !(flags & PTRACE_SEIZE_DEVEL)) | 241 | if (seize) { |
253 | goto out; | 242 | if (addr != 0) |
243 | goto out; | ||
244 | if (flags & ~(unsigned long)PTRACE_O_MASK) | ||
245 | goto out; | ||
246 | flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); | ||
247 | } else { | ||
248 | flags = PT_PTRACED; | ||
249 | } | ||
254 | 250 | ||
255 | audit_ptrace(task); | 251 | audit_ptrace(task); |
256 | 252 | ||
@@ -262,7 +258,7 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
262 | 258 | ||
263 | /* | 259 | /* |
264 | * Protect exec's credential calculations against our interference; | 260 | * Protect exec's credential calculations against our interference; |
265 | * interference; SUID, SGID and LSM creds get determined differently | 261 | * SUID, SGID and LSM creds get determined differently |
266 | * under ptrace. | 262 | * under ptrace. |
267 | */ | 263 | */ |
268 | retval = -ERESTARTNOINTR; | 264 | retval = -ERESTARTNOINTR; |
@@ -282,11 +278,11 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
282 | if (task->ptrace) | 278 | if (task->ptrace) |
283 | goto unlock_tasklist; | 279 | goto unlock_tasklist; |
284 | 280 | ||
285 | task->ptrace = PT_PTRACED; | ||
286 | if (seize) | 281 | if (seize) |
287 | task->ptrace |= PT_SEIZED; | 282 | flags |= PT_SEIZED; |
288 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) | 283 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) |
289 | task->ptrace |= PT_PTRACE_CAP; | 284 | flags |= PT_PTRACE_CAP; |
285 | task->ptrace = flags; | ||
290 | 286 | ||
291 | __ptrace_link(task, current); | 287 | __ptrace_link(task, current); |
292 | 288 | ||
@@ -528,30 +524,18 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
528 | 524 | ||
529 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) | 525 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) |
530 | { | 526 | { |
531 | child->ptrace &= ~PT_TRACE_MASK; | 527 | unsigned flags; |
532 | 528 | ||
533 | if (data & PTRACE_O_TRACESYSGOOD) | 529 | if (data & ~(unsigned long)PTRACE_O_MASK) |
534 | child->ptrace |= PT_TRACESYSGOOD; | 530 | return -EINVAL; |
535 | |||
536 | if (data & PTRACE_O_TRACEFORK) | ||
537 | child->ptrace |= PT_TRACE_FORK; | ||
538 | |||
539 | if (data & PTRACE_O_TRACEVFORK) | ||
540 | child->ptrace |= PT_TRACE_VFORK; | ||
541 | |||
542 | if (data & PTRACE_O_TRACECLONE) | ||
543 | child->ptrace |= PT_TRACE_CLONE; | ||
544 | |||
545 | if (data & PTRACE_O_TRACEEXEC) | ||
546 | child->ptrace |= PT_TRACE_EXEC; | ||
547 | |||
548 | if (data & PTRACE_O_TRACEVFORKDONE) | ||
549 | child->ptrace |= PT_TRACE_VFORK_DONE; | ||
550 | 531 | ||
551 | if (data & PTRACE_O_TRACEEXIT) | 532 | /* Avoid intermediate state when all opts are cleared */ |
552 | child->ptrace |= PT_TRACE_EXIT; | 533 | flags = child->ptrace; |
534 | flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); | ||
535 | flags |= (data << PT_OPT_FLAG_SHIFT); | ||
536 | child->ptrace = flags; | ||
553 | 537 | ||
554 | return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; | 538 | return 0; |
555 | } | 539 | } |
556 | 540 | ||
557 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) | 541 | static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info) |
@@ -891,7 +875,7 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, | |||
891 | } | 875 | } |
892 | 876 | ||
893 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { | 877 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
894 | ret = ptrace_attach(child, request, data); | 878 | ret = ptrace_attach(child, request, addr, data); |
895 | /* | 879 | /* |
896 | * Some architectures need to do book-keeping after | 880 | * Some architectures need to do book-keeping after |
897 | * a ptrace attach. | 881 | * a ptrace attach. |
@@ -1034,7 +1018,7 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
1034 | } | 1018 | } |
1035 | 1019 | ||
1036 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { | 1020 | if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { |
1037 | ret = ptrace_attach(child, request, data); | 1021 | ret = ptrace_attach(child, request, addr, data); |
1038 | /* | 1022 | /* |
1039 | * Some architectures need to do book-keeping after | 1023 | * Some architectures need to do book-keeping after |
1040 | * a ptrace attach. | 1024 | * a ptrace attach. |
diff --git a/kernel/resource.c b/kernel/resource.c index 7640b3a947d0..7e8ea66a8c01 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -749,6 +749,7 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t | |||
749 | write_unlock(&resource_lock); | 749 | write_unlock(&resource_lock); |
750 | return result; | 750 | return result; |
751 | } | 751 | } |
752 | EXPORT_SYMBOL(adjust_resource); | ||
752 | 753 | ||
753 | static void __init __reserve_region_with_split(struct resource *root, | 754 | static void __init __reserve_region_with_split(struct resource *root, |
754 | resource_size_t start, resource_size_t end, | 755 | resource_size_t start, resource_size_t end, |
@@ -792,8 +793,6 @@ void __init reserve_region_with_split(struct resource *root, | |||
792 | write_unlock(&resource_lock); | 793 | write_unlock(&resource_lock); |
793 | } | 794 | } |
794 | 795 | ||
795 | EXPORT_SYMBOL(adjust_resource); | ||
796 | |||
797 | /** | 796 | /** |
798 | * resource_alignment - calculate resource's alignment | 797 | * resource_alignment - calculate resource's alignment |
799 | * @res: resource pointer | 798 | * @res: resource pointer |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2bd4647586c..503d6426126d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/ftrace.h> | 71 | #include <linux/ftrace.h> |
72 | #include <linux/slab.h> | 72 | #include <linux/slab.h> |
73 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
74 | #include <linux/binfmts.h> | ||
74 | 75 | ||
75 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
76 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
@@ -7571,8 +7572,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | |||
7571 | struct task_group, css); | 7572 | struct task_group, css); |
7572 | } | 7573 | } |
7573 | 7574 | ||
7574 | static struct cgroup_subsys_state * | 7575 | static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) |
7575 | cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
7576 | { | 7576 | { |
7577 | struct task_group *tg, *parent; | 7577 | struct task_group *tg, *parent; |
7578 | 7578 | ||
@@ -7589,15 +7589,14 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
7589 | return &tg->css; | 7589 | return &tg->css; |
7590 | } | 7590 | } |
7591 | 7591 | ||
7592 | static void | 7592 | static void cpu_cgroup_destroy(struct cgroup *cgrp) |
7593 | cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
7594 | { | 7593 | { |
7595 | struct task_group *tg = cgroup_tg(cgrp); | 7594 | struct task_group *tg = cgroup_tg(cgrp); |
7596 | 7595 | ||
7597 | sched_destroy_group(tg); | 7596 | sched_destroy_group(tg); |
7598 | } | 7597 | } |
7599 | 7598 | ||
7600 | static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7599 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, |
7601 | struct cgroup_taskset *tset) | 7600 | struct cgroup_taskset *tset) |
7602 | { | 7601 | { |
7603 | struct task_struct *task; | 7602 | struct task_struct *task; |
@@ -7615,7 +7614,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7615 | return 0; | 7614 | return 0; |
7616 | } | 7615 | } |
7617 | 7616 | ||
7618 | static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7617 | static void cpu_cgroup_attach(struct cgroup *cgrp, |
7619 | struct cgroup_taskset *tset) | 7618 | struct cgroup_taskset *tset) |
7620 | { | 7619 | { |
7621 | struct task_struct *task; | 7620 | struct task_struct *task; |
@@ -7625,8 +7624,8 @@ static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7625 | } | 7624 | } |
7626 | 7625 | ||
7627 | static void | 7626 | static void |
7628 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7627 | cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, |
7629 | struct cgroup *old_cgrp, struct task_struct *task) | 7628 | struct task_struct *task) |
7630 | { | 7629 | { |
7631 | /* | 7630 | /* |
7632 | * cgroup_exit() is called in the copy_process() failure path. | 7631 | * cgroup_exit() is called in the copy_process() failure path. |
@@ -7976,8 +7975,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7976 | */ | 7975 | */ |
7977 | 7976 | ||
7978 | /* create a new cpu accounting group */ | 7977 | /* create a new cpu accounting group */ |
7979 | static struct cgroup_subsys_state *cpuacct_create( | 7978 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) |
7980 | struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
7981 | { | 7979 | { |
7982 | struct cpuacct *ca; | 7980 | struct cpuacct *ca; |
7983 | 7981 | ||
@@ -8007,8 +8005,7 @@ out: | |||
8007 | } | 8005 | } |
8008 | 8006 | ||
8009 | /* destroy an existing cpu accounting group */ | 8007 | /* destroy an existing cpu accounting group */ |
8010 | static void | 8008 | static void cpuacct_destroy(struct cgroup *cgrp) |
8011 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
8012 | { | 8009 | { |
8013 | struct cpuacct *ca = cgroup_ca(cgrp); | 8010 | struct cpuacct *ca = cgroup_ca(cgrp); |
8014 | 8011 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 8511e39813c7..d523da02dd14 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -58,21 +58,20 @@ static int sig_handler_ignored(void __user *handler, int sig) | |||
58 | (handler == SIG_DFL && sig_kernel_ignore(sig)); | 58 | (handler == SIG_DFL && sig_kernel_ignore(sig)); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int sig_task_ignored(struct task_struct *t, int sig, | 61 | static int sig_task_ignored(struct task_struct *t, int sig, bool force) |
62 | int from_ancestor_ns) | ||
63 | { | 62 | { |
64 | void __user *handler; | 63 | void __user *handler; |
65 | 64 | ||
66 | handler = sig_handler(t, sig); | 65 | handler = sig_handler(t, sig); |
67 | 66 | ||
68 | if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && | 67 | if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && |
69 | handler == SIG_DFL && !from_ancestor_ns) | 68 | handler == SIG_DFL && !force) |
70 | return 1; | 69 | return 1; |
71 | 70 | ||
72 | return sig_handler_ignored(handler, sig); | 71 | return sig_handler_ignored(handler, sig); |
73 | } | 72 | } |
74 | 73 | ||
75 | static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) | 74 | static int sig_ignored(struct task_struct *t, int sig, bool force) |
76 | { | 75 | { |
77 | /* | 76 | /* |
78 | * Blocked signals are never ignored, since the | 77 | * Blocked signals are never ignored, since the |
@@ -82,7 +81,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) | |||
82 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) | 81 | if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) |
83 | return 0; | 82 | return 0; |
84 | 83 | ||
85 | if (!sig_task_ignored(t, sig, from_ancestor_ns)) | 84 | if (!sig_task_ignored(t, sig, force)) |
86 | return 0; | 85 | return 0; |
87 | 86 | ||
88 | /* | 87 | /* |
@@ -855,7 +854,7 @@ static void ptrace_trap_notify(struct task_struct *t) | |||
855 | * Returns true if the signal should be actually delivered, otherwise | 854 | * Returns true if the signal should be actually delivered, otherwise |
856 | * it should be dropped. | 855 | * it should be dropped. |
857 | */ | 856 | */ |
858 | static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | 857 | static int prepare_signal(int sig, struct task_struct *p, bool force) |
859 | { | 858 | { |
860 | struct signal_struct *signal = p->signal; | 859 | struct signal_struct *signal = p->signal; |
861 | struct task_struct *t; | 860 | struct task_struct *t; |
@@ -915,7 +914,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
915 | } | 914 | } |
916 | } | 915 | } |
917 | 916 | ||
918 | return !sig_ignored(p, sig, from_ancestor_ns); | 917 | return !sig_ignored(p, sig, force); |
919 | } | 918 | } |
920 | 919 | ||
921 | /* | 920 | /* |
@@ -1059,7 +1058,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1059 | assert_spin_locked(&t->sighand->siglock); | 1058 | assert_spin_locked(&t->sighand->siglock); |
1060 | 1059 | ||
1061 | result = TRACE_SIGNAL_IGNORED; | 1060 | result = TRACE_SIGNAL_IGNORED; |
1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) | 1061 | if (!prepare_signal(sig, t, |
1062 | from_ancestor_ns || (info == SEND_SIG_FORCED))) | ||
1063 | goto ret; | 1063 | goto ret; |
1064 | 1064 | ||
1065 | pending = group ? &t->signal->shared_pending : &t->pending; | 1065 | pending = group ? &t->signal->shared_pending : &t->pending; |
@@ -1601,7 +1601,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1601 | 1601 | ||
1602 | ret = 1; /* the signal is ignored */ | 1602 | ret = 1; /* the signal is ignored */ |
1603 | result = TRACE_SIGNAL_IGNORED; | 1603 | result = TRACE_SIGNAL_IGNORED; |
1604 | if (!prepare_signal(sig, t, 0)) | 1604 | if (!prepare_signal(sig, t, false)) |
1605 | goto out; | 1605 | goto out; |
1606 | 1606 | ||
1607 | ret = 0; | 1607 | ret = 0; |
@@ -1652,6 +1652,15 @@ bool do_notify_parent(struct task_struct *tsk, int sig) | |||
1652 | BUG_ON(!tsk->ptrace && | 1652 | BUG_ON(!tsk->ptrace && |
1653 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); | 1653 | (tsk->group_leader != tsk || !thread_group_empty(tsk))); |
1654 | 1654 | ||
1655 | if (sig != SIGCHLD) { | ||
1656 | /* | ||
1657 | * This is only possible if parent == real_parent. | ||
1658 | * Check if it has changed security domain. | ||
1659 | */ | ||
1660 | if (tsk->parent_exec_id != tsk->parent->self_exec_id) | ||
1661 | sig = SIGCHLD; | ||
1662 | } | ||
1663 | |||
1655 | info.si_signo = sig; | 1664 | info.si_signo = sig; |
1656 | info.si_errno = 0; | 1665 | info.si_errno = 0; |
1657 | /* | 1666 | /* |
diff --git a/kernel/sys.c b/kernel/sys.c index 888d227fd195..9eb7fcab8df6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1962,6 +1962,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1962 | case PR_SET_MM: | 1962 | case PR_SET_MM: |
1963 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | 1963 | error = prctl_set_mm(arg2, arg3, arg4, arg5); |
1964 | break; | 1964 | break; |
1965 | case PR_SET_CHILD_SUBREAPER: | ||
1966 | me->signal->is_child_subreaper = !!arg2; | ||
1967 | error = 0; | ||
1968 | break; | ||
1969 | case PR_GET_CHILD_SUBREAPER: | ||
1970 | error = put_user(me->signal->is_child_subreaper, | ||
1971 | (int __user *) arg2); | ||
1972 | break; | ||
1965 | default: | 1973 | default: |
1966 | error = -EINVAL; | 1974 | error = -EINVAL; |
1967 | break; | 1975 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f487f257e05e..d48ff4fd44c3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/oom.h> | 58 | #include <linux/oom.h> |
59 | #include <linux/kmod.h> | 59 | #include <linux/kmod.h> |
60 | #include <linux/capability.h> | 60 | #include <linux/capability.h> |
61 | #include <linux/binfmts.h> | ||
61 | 62 | ||
62 | #include <asm/uaccess.h> | 63 | #include <asm/uaccess.h> |
63 | #include <asm/processor.h> | 64 | #include <asm/processor.h> |
@@ -192,20 +193,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, | |||
192 | 193 | ||
193 | #endif | 194 | #endif |
194 | 195 | ||
195 | static struct ctl_table root_table[]; | ||
196 | static struct ctl_table_root sysctl_table_root; | ||
197 | static struct ctl_table_header root_table_header = { | ||
198 | {{.count = 1, | ||
199 | .ctl_table = root_table, | ||
200 | .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, | ||
201 | .root = &sysctl_table_root, | ||
202 | .set = &sysctl_table_root.default_set, | ||
203 | }; | ||
204 | static struct ctl_table_root sysctl_table_root = { | ||
205 | .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), | ||
206 | .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), | ||
207 | }; | ||
208 | |||
209 | static struct ctl_table kern_table[]; | 196 | static struct ctl_table kern_table[]; |
210 | static struct ctl_table vm_table[]; | 197 | static struct ctl_table vm_table[]; |
211 | static struct ctl_table fs_table[]; | 198 | static struct ctl_table fs_table[]; |
@@ -222,7 +209,7 @@ int sysctl_legacy_va_layout; | |||
222 | 209 | ||
223 | /* The default sysctl tables: */ | 210 | /* The default sysctl tables: */ |
224 | 211 | ||
225 | static struct ctl_table root_table[] = { | 212 | static struct ctl_table sysctl_base_table[] = { |
226 | { | 213 | { |
227 | .procname = "kernel", | 214 | .procname = "kernel", |
228 | .mode = 0555, | 215 | .mode = 0555, |
@@ -1559,490 +1546,12 @@ static struct ctl_table dev_table[] = { | |||
1559 | { } | 1546 | { } |
1560 | }; | 1547 | }; |
1561 | 1548 | ||
1562 | static DEFINE_SPINLOCK(sysctl_lock); | 1549 | int __init sysctl_init(void) |
1563 | |||
1564 | /* called under sysctl_lock */ | ||
1565 | static int use_table(struct ctl_table_header *p) | ||
1566 | { | ||
1567 | if (unlikely(p->unregistering)) | ||
1568 | return 0; | ||
1569 | p->used++; | ||
1570 | return 1; | ||
1571 | } | ||
1572 | |||
1573 | /* called under sysctl_lock */ | ||
1574 | static void unuse_table(struct ctl_table_header *p) | ||
1575 | { | ||
1576 | if (!--p->used) | ||
1577 | if (unlikely(p->unregistering)) | ||
1578 | complete(p->unregistering); | ||
1579 | } | ||
1580 | |||
1581 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
1582 | static void start_unregistering(struct ctl_table_header *p) | ||
1583 | { | ||
1584 | /* | ||
1585 | * if p->used is 0, nobody will ever touch that entry again; | ||
1586 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
1587 | */ | ||
1588 | if (unlikely(p->used)) { | ||
1589 | struct completion wait; | ||
1590 | init_completion(&wait); | ||
1591 | p->unregistering = &wait; | ||
1592 | spin_unlock(&sysctl_lock); | ||
1593 | wait_for_completion(&wait); | ||
1594 | spin_lock(&sysctl_lock); | ||
1595 | } else { | ||
1596 | /* anything non-NULL; we'll never dereference it */ | ||
1597 | p->unregistering = ERR_PTR(-EINVAL); | ||
1598 | } | ||
1599 | /* | ||
1600 | * do not remove from the list until nobody holds it; walking the | ||
1601 | * list in do_sysctl() relies on that. | ||
1602 | */ | ||
1603 | list_del_init(&p->ctl_entry); | ||
1604 | } | ||
1605 | |||
1606 | void sysctl_head_get(struct ctl_table_header *head) | ||
1607 | { | ||
1608 | spin_lock(&sysctl_lock); | ||
1609 | head->count++; | ||
1610 | spin_unlock(&sysctl_lock); | ||
1611 | } | ||
1612 | |||
1613 | void sysctl_head_put(struct ctl_table_header *head) | ||
1614 | { | ||
1615 | spin_lock(&sysctl_lock); | ||
1616 | if (!--head->count) | ||
1617 | kfree_rcu(head, rcu); | ||
1618 | spin_unlock(&sysctl_lock); | ||
1619 | } | ||
1620 | |||
1621 | struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
1622 | { | ||
1623 | if (!head) | ||
1624 | BUG(); | ||
1625 | spin_lock(&sysctl_lock); | ||
1626 | if (!use_table(head)) | ||
1627 | head = ERR_PTR(-ENOENT); | ||
1628 | spin_unlock(&sysctl_lock); | ||
1629 | return head; | ||
1630 | } | ||
1631 | |||
1632 | void sysctl_head_finish(struct ctl_table_header *head) | ||
1633 | { | ||
1634 | if (!head) | ||
1635 | return; | ||
1636 | spin_lock(&sysctl_lock); | ||
1637 | unuse_table(head); | ||
1638 | spin_unlock(&sysctl_lock); | ||
1639 | } | ||
1640 | |||
1641 | static struct ctl_table_set * | ||
1642 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
1643 | { | ||
1644 | struct ctl_table_set *set = &root->default_set; | ||
1645 | if (root->lookup) | ||
1646 | set = root->lookup(root, namespaces); | ||
1647 | return set; | ||
1648 | } | ||
1649 | |||
1650 | static struct list_head * | ||
1651 | lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
1652 | { | ||
1653 | struct ctl_table_set *set = lookup_header_set(root, namespaces); | ||
1654 | return &set->list; | ||
1655 | } | ||
1656 | |||
1657 | struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, | ||
1658 | struct ctl_table_header *prev) | ||
1659 | { | ||
1660 | struct ctl_table_root *root; | ||
1661 | struct list_head *header_list; | ||
1662 | struct ctl_table_header *head; | ||
1663 | struct list_head *tmp; | ||
1664 | |||
1665 | spin_lock(&sysctl_lock); | ||
1666 | if (prev) { | ||
1667 | head = prev; | ||
1668 | tmp = &prev->ctl_entry; | ||
1669 | unuse_table(prev); | ||
1670 | goto next; | ||
1671 | } | ||
1672 | tmp = &root_table_header.ctl_entry; | ||
1673 | for (;;) { | ||
1674 | head = list_entry(tmp, struct ctl_table_header, ctl_entry); | ||
1675 | |||
1676 | if (!use_table(head)) | ||
1677 | goto next; | ||
1678 | spin_unlock(&sysctl_lock); | ||
1679 | return head; | ||
1680 | next: | ||
1681 | root = head->root; | ||
1682 | tmp = tmp->next; | ||
1683 | header_list = lookup_header_list(root, namespaces); | ||
1684 | if (tmp != header_list) | ||
1685 | continue; | ||
1686 | |||
1687 | do { | ||
1688 | root = list_entry(root->root_list.next, | ||
1689 | struct ctl_table_root, root_list); | ||
1690 | if (root == &sysctl_table_root) | ||
1691 | goto out; | ||
1692 | header_list = lookup_header_list(root, namespaces); | ||
1693 | } while (list_empty(header_list)); | ||
1694 | tmp = header_list->next; | ||
1695 | } | ||
1696 | out: | ||
1697 | spin_unlock(&sysctl_lock); | ||
1698 | return NULL; | ||
1699 | } | ||
1700 | |||
1701 | struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) | ||
1702 | { | ||
1703 | return __sysctl_head_next(current->nsproxy, prev); | ||
1704 | } | ||
1705 | |||
1706 | void register_sysctl_root(struct ctl_table_root *root) | ||
1707 | { | ||
1708 | spin_lock(&sysctl_lock); | ||
1709 | list_add_tail(&root->root_list, &sysctl_table_root.root_list); | ||
1710 | spin_unlock(&sysctl_lock); | ||
1711 | } | ||
1712 | |||
1713 | /* | ||
1714 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
1715 | * some sysctl variables are readonly even to root. | ||
1716 | */ | ||
1717 | |||
1718 | static int test_perm(int mode, int op) | ||
1719 | { | ||
1720 | if (!current_euid()) | ||
1721 | mode >>= 6; | ||
1722 | else if (in_egroup_p(0)) | ||
1723 | mode >>= 3; | ||
1724 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
1725 | return 0; | ||
1726 | return -EACCES; | ||
1727 | } | ||
1728 | |||
1729 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | ||
1730 | { | ||
1731 | int mode; | ||
1732 | |||
1733 | if (root->permissions) | ||
1734 | mode = root->permissions(root, current->nsproxy, table); | ||
1735 | else | ||
1736 | mode = table->mode; | ||
1737 | |||
1738 | return test_perm(mode, op); | ||
1739 | } | ||
1740 | |||
1741 | static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) | ||
1742 | { | ||
1743 | for (; table->procname; table++) { | ||
1744 | table->parent = parent; | ||
1745 | if (table->child) | ||
1746 | sysctl_set_parent(table, table->child); | ||
1747 | } | ||
1748 | } | ||
1749 | |||
1750 | static __init int sysctl_init(void) | ||
1751 | { | 1550 | { |
1752 | sysctl_set_parent(NULL, root_table); | 1551 | register_sysctl_table(sysctl_base_table); |
1753 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
1754 | sysctl_check_table(current->nsproxy, root_table); | ||
1755 | #endif | ||
1756 | return 0; | 1552 | return 0; |
1757 | } | 1553 | } |
1758 | 1554 | ||
1759 | core_initcall(sysctl_init); | ||
1760 | |||
1761 | static struct ctl_table *is_branch_in(struct ctl_table *branch, | ||
1762 | struct ctl_table *table) | ||
1763 | { | ||
1764 | struct ctl_table *p; | ||
1765 | const char *s = branch->procname; | ||
1766 | |||
1767 | /* branch should have named subdirectory as its first element */ | ||
1768 | if (!s || !branch->child) | ||
1769 | return NULL; | ||
1770 | |||
1771 | /* ... and nothing else */ | ||
1772 | if (branch[1].procname) | ||
1773 | return NULL; | ||
1774 | |||
1775 | /* table should contain subdirectory with the same name */ | ||
1776 | for (p = table; p->procname; p++) { | ||
1777 | if (!p->child) | ||
1778 | continue; | ||
1779 | if (p->procname && strcmp(p->procname, s) == 0) | ||
1780 | return p; | ||
1781 | } | ||
1782 | return NULL; | ||
1783 | } | ||
1784 | |||
1785 | /* see if attaching q to p would be an improvement */ | ||
1786 | static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) | ||
1787 | { | ||
1788 | struct ctl_table *to = p->ctl_table, *by = q->ctl_table; | ||
1789 | struct ctl_table *next; | ||
1790 | int is_better = 0; | ||
1791 | int not_in_parent = !p->attached_by; | ||
1792 | |||
1793 | while ((next = is_branch_in(by, to)) != NULL) { | ||
1794 | if (by == q->attached_by) | ||
1795 | is_better = 1; | ||
1796 | if (to == p->attached_by) | ||
1797 | not_in_parent = 1; | ||
1798 | by = by->child; | ||
1799 | to = next->child; | ||
1800 | } | ||
1801 | |||
1802 | if (is_better && not_in_parent) { | ||
1803 | q->attached_by = by; | ||
1804 | q->attached_to = to; | ||
1805 | q->parent = p; | ||
1806 | } | ||
1807 | } | ||
1808 | |||
1809 | /** | ||
1810 | * __register_sysctl_paths - register a sysctl hierarchy | ||
1811 | * @root: List of sysctl headers to register on | ||
1812 | * @namespaces: Data to compute which lists of sysctl entries are visible | ||
1813 | * @path: The path to the directory the sysctl table is in. | ||
1814 | * @table: the top-level table structure | ||
1815 | * | ||
1816 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1817 | * array. A completely 0 filled entry terminates the table. | ||
1818 | * | ||
1819 | * The members of the &struct ctl_table structure are used as follows: | ||
1820 | * | ||
1821 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
1822 | * enter a sysctl file | ||
1823 | * | ||
1824 | * data - a pointer to data for use by proc_handler | ||
1825 | * | ||
1826 | * maxlen - the maximum size in bytes of the data | ||
1827 | * | ||
1828 | * mode - the file permissions for the /proc/sys file, and for sysctl(2) | ||
1829 | * | ||
1830 | * child - a pointer to the child sysctl table if this entry is a directory, or | ||
1831 | * %NULL. | ||
1832 | * | ||
1833 | * proc_handler - the text handler routine (described below) | ||
1834 | * | ||
1835 | * de - for internal use by the sysctl routines | ||
1836 | * | ||
1837 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
1838 | * | ||
1839 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
1840 | * under /proc; non-leaf nodes will be represented by directories. | ||
1841 | * | ||
1842 | * sysctl(2) can automatically manage read and write requests through | ||
1843 | * the sysctl table. The data and maxlen fields of the ctl_table | ||
1844 | * struct enable minimal validation of the values being written to be | ||
1845 | * performed, and the mode field allows minimal authentication. | ||
1846 | * | ||
1847 | * There must be a proc_handler routine for any terminal nodes | ||
1848 | * mirrored under /proc/sys (non-terminals are handled by a built-in | ||
1849 | * directory handler). Several default handlers are available to | ||
1850 | * cover common cases - | ||
1851 | * | ||
1852 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
1853 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
1854 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
1855 | * | ||
1856 | * It is the handler's job to read the input buffer from user memory | ||
1857 | * and process it. The handler should return 0 on success. | ||
1858 | * | ||
1859 | * This routine returns %NULL on a failure to register, and a pointer | ||
1860 | * to the table header on success. | ||
1861 | */ | ||
1862 | struct ctl_table_header *__register_sysctl_paths( | ||
1863 | struct ctl_table_root *root, | ||
1864 | struct nsproxy *namespaces, | ||
1865 | const struct ctl_path *path, struct ctl_table *table) | ||
1866 | { | ||
1867 | struct ctl_table_header *header; | ||
1868 | struct ctl_table *new, **prevp; | ||
1869 | unsigned int n, npath; | ||
1870 | struct ctl_table_set *set; | ||
1871 | |||
1872 | /* Count the path components */ | ||
1873 | for (npath = 0; path[npath].procname; ++npath) | ||
1874 | ; | ||
1875 | |||
1876 | /* | ||
1877 | * For each path component, allocate a 2-element ctl_table array. | ||
1878 | * The first array element will be filled with the sysctl entry | ||
1879 | * for this, the second will be the sentinel (procname == 0). | ||
1880 | * | ||
1881 | * We allocate everything in one go so that we don't have to | ||
1882 | * worry about freeing additional memory in unregister_sysctl_table. | ||
1883 | */ | ||
1884 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
1885 | (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); | ||
1886 | if (!header) | ||
1887 | return NULL; | ||
1888 | |||
1889 | new = (struct ctl_table *) (header + 1); | ||
1890 | |||
1891 | /* Now connect the dots */ | ||
1892 | prevp = &header->ctl_table; | ||
1893 | for (n = 0; n < npath; ++n, ++path) { | ||
1894 | /* Copy the procname */ | ||
1895 | new->procname = path->procname; | ||
1896 | new->mode = 0555; | ||
1897 | |||
1898 | *prevp = new; | ||
1899 | prevp = &new->child; | ||
1900 | |||
1901 | new += 2; | ||
1902 | } | ||
1903 | *prevp = table; | ||
1904 | header->ctl_table_arg = table; | ||
1905 | |||
1906 | INIT_LIST_HEAD(&header->ctl_entry); | ||
1907 | header->used = 0; | ||
1908 | header->unregistering = NULL; | ||
1909 | header->root = root; | ||
1910 | sysctl_set_parent(NULL, header->ctl_table); | ||
1911 | header->count = 1; | ||
1912 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | ||
1913 | if (sysctl_check_table(namespaces, header->ctl_table)) { | ||
1914 | kfree(header); | ||
1915 | return NULL; | ||
1916 | } | ||
1917 | #endif | ||
1918 | spin_lock(&sysctl_lock); | ||
1919 | header->set = lookup_header_set(root, namespaces); | ||
1920 | header->attached_by = header->ctl_table; | ||
1921 | header->attached_to = root_table; | ||
1922 | header->parent = &root_table_header; | ||
1923 | for (set = header->set; set; set = set->parent) { | ||
1924 | struct ctl_table_header *p; | ||
1925 | list_for_each_entry(p, &set->list, ctl_entry) { | ||
1926 | if (p->unregistering) | ||
1927 | continue; | ||
1928 | try_attach(p, header); | ||
1929 | } | ||
1930 | } | ||
1931 | header->parent->count++; | ||
1932 | list_add_tail(&header->ctl_entry, &header->set->list); | ||
1933 | spin_unlock(&sysctl_lock); | ||
1934 | |||
1935 | return header; | ||
1936 | } | ||
1937 | |||
1938 | /** | ||
1939 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
1940 | * @path: The path to the directory the sysctl table is in. | ||
1941 | * @table: the top-level table structure | ||
1942 | * | ||
1943 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1944 | * array. A completely 0 filled entry terminates the table. | ||
1945 | * | ||
1946 | * See __register_sysctl_paths for more details. | ||
1947 | */ | ||
1948 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
1949 | struct ctl_table *table) | ||
1950 | { | ||
1951 | return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, | ||
1952 | path, table); | ||
1953 | } | ||
1954 | |||
1955 | /** | ||
1956 | * register_sysctl_table - register a sysctl table hierarchy | ||
1957 | * @table: the top-level table structure | ||
1958 | * | ||
1959 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1960 | * array. A completely 0 filled entry terminates the table. | ||
1961 | * | ||
1962 | * See register_sysctl_paths for more details. | ||
1963 | */ | ||
1964 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
1965 | { | ||
1966 | static const struct ctl_path null_path[] = { {} }; | ||
1967 | |||
1968 | return register_sysctl_paths(null_path, table); | ||
1969 | } | ||
1970 | |||
1971 | /** | ||
1972 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
1973 | * @header: the header returned from register_sysctl_table | ||
1974 | * | ||
1975 | * Unregisters the sysctl table and all children. proc entries may not | ||
1976 | * actually be removed until they are no longer used by anyone. | ||
1977 | */ | ||
1978 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
1979 | { | ||
1980 | might_sleep(); | ||
1981 | |||
1982 | if (header == NULL) | ||
1983 | return; | ||
1984 | |||
1985 | spin_lock(&sysctl_lock); | ||
1986 | start_unregistering(header); | ||
1987 | if (!--header->parent->count) { | ||
1988 | WARN_ON(1); | ||
1989 | kfree_rcu(header->parent, rcu); | ||
1990 | } | ||
1991 | if (!--header->count) | ||
1992 | kfree_rcu(header, rcu); | ||
1993 | spin_unlock(&sysctl_lock); | ||
1994 | } | ||
1995 | |||
1996 | int sysctl_is_seen(struct ctl_table_header *p) | ||
1997 | { | ||
1998 | struct ctl_table_set *set = p->set; | ||
1999 | int res; | ||
2000 | spin_lock(&sysctl_lock); | ||
2001 | if (p->unregistering) | ||
2002 | res = 0; | ||
2003 | else if (!set->is_seen) | ||
2004 | res = 1; | ||
2005 | else | ||
2006 | res = set->is_seen(set); | ||
2007 | spin_unlock(&sysctl_lock); | ||
2008 | return res; | ||
2009 | } | ||
2010 | |||
2011 | void setup_sysctl_set(struct ctl_table_set *p, | ||
2012 | struct ctl_table_set *parent, | ||
2013 | int (*is_seen)(struct ctl_table_set *)) | ||
2014 | { | ||
2015 | INIT_LIST_HEAD(&p->list); | ||
2016 | p->parent = parent ? parent : &sysctl_table_root.default_set; | ||
2017 | p->is_seen = is_seen; | ||
2018 | } | ||
2019 | |||
2020 | #else /* !CONFIG_SYSCTL */ | ||
2021 | struct ctl_table_header *register_sysctl_table(struct ctl_table * table) | ||
2022 | { | ||
2023 | return NULL; | ||
2024 | } | ||
2025 | |||
2026 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
2027 | struct ctl_table *table) | ||
2028 | { | ||
2029 | return NULL; | ||
2030 | } | ||
2031 | |||
2032 | void unregister_sysctl_table(struct ctl_table_header * table) | ||
2033 | { | ||
2034 | } | ||
2035 | |||
2036 | void setup_sysctl_set(struct ctl_table_set *p, | ||
2037 | struct ctl_table_set *parent, | ||
2038 | int (*is_seen)(struct ctl_table_set *)) | ||
2039 | { | ||
2040 | } | ||
2041 | |||
2042 | void sysctl_head_put(struct ctl_table_header *head) | ||
2043 | { | ||
2044 | } | ||
2045 | |||
2046 | #endif /* CONFIG_SYSCTL */ | 1555 | #endif /* CONFIG_SYSCTL */ |
2047 | 1556 | ||
2048 | /* | 1557 | /* |
@@ -3008,6 +2517,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); | |||
3008 | EXPORT_SYMBOL(proc_dostring); | 2517 | EXPORT_SYMBOL(proc_dostring); |
3009 | EXPORT_SYMBOL(proc_doulongvec_minmax); | 2518 | EXPORT_SYMBOL(proc_doulongvec_minmax); |
3010 | EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); | 2519 | EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); |
3011 | EXPORT_SYMBOL(register_sysctl_table); | ||
3012 | EXPORT_SYMBOL(register_sysctl_paths); | ||
3013 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c deleted file mode 100644 index 362da653813d..000000000000 --- a/kernel/sysctl_check.c +++ /dev/null | |||
@@ -1,160 +0,0 @@ | |||
1 | #include <linux/stat.h> | ||
2 | #include <linux/sysctl.h> | ||
3 | #include "../fs/xfs/xfs_sysctl.h" | ||
4 | #include <linux/sunrpc/debug.h> | ||
5 | #include <linux/string.h> | ||
6 | #include <net/ip_vs.h> | ||
7 | |||
8 | |||
9 | static int sysctl_depth(struct ctl_table *table) | ||
10 | { | ||
11 | struct ctl_table *tmp; | ||
12 | int depth; | ||
13 | |||
14 | depth = 0; | ||
15 | for (tmp = table; tmp->parent; tmp = tmp->parent) | ||
16 | depth++; | ||
17 | |||
18 | return depth; | ||
19 | } | ||
20 | |||
21 | static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) | ||
22 | { | ||
23 | int i; | ||
24 | |||
25 | for (i = 0; table && i < n; i++) | ||
26 | table = table->parent; | ||
27 | |||
28 | return table; | ||
29 | } | ||
30 | |||
31 | |||
32 | static void sysctl_print_path(struct ctl_table *table) | ||
33 | { | ||
34 | struct ctl_table *tmp; | ||
35 | int depth, i; | ||
36 | depth = sysctl_depth(table); | ||
37 | if (table->procname) { | ||
38 | for (i = depth; i >= 0; i--) { | ||
39 | tmp = sysctl_parent(table, i); | ||
40 | printk("/%s", tmp->procname?tmp->procname:""); | ||
41 | } | ||
42 | } | ||
43 | printk(" "); | ||
44 | } | ||
45 | |||
46 | static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, | ||
47 | struct ctl_table *table) | ||
48 | { | ||
49 | struct ctl_table_header *head; | ||
50 | struct ctl_table *ref, *test; | ||
51 | int depth, cur_depth; | ||
52 | |||
53 | depth = sysctl_depth(table); | ||
54 | |||
55 | for (head = __sysctl_head_next(namespaces, NULL); head; | ||
56 | head = __sysctl_head_next(namespaces, head)) { | ||
57 | cur_depth = depth; | ||
58 | ref = head->ctl_table; | ||
59 | repeat: | ||
60 | test = sysctl_parent(table, cur_depth); | ||
61 | for (; ref->procname; ref++) { | ||
62 | int match = 0; | ||
63 | if (cur_depth && !ref->child) | ||
64 | continue; | ||
65 | |||
66 | if (test->procname && ref->procname && | ||
67 | (strcmp(test->procname, ref->procname) == 0)) | ||
68 | match++; | ||
69 | |||
70 | if (match) { | ||
71 | if (cur_depth != 0) { | ||
72 | cur_depth--; | ||
73 | ref = ref->child; | ||
74 | goto repeat; | ||
75 | } | ||
76 | goto out; | ||
77 | } | ||
78 | } | ||
79 | } | ||
80 | ref = NULL; | ||
81 | out: | ||
82 | sysctl_head_finish(head); | ||
83 | return ref; | ||
84 | } | ||
85 | |||
86 | static void set_fail(const char **fail, struct ctl_table *table, const char *str) | ||
87 | { | ||
88 | if (*fail) { | ||
89 | printk(KERN_ERR "sysctl table check failed: "); | ||
90 | sysctl_print_path(table); | ||
91 | printk(" %s\n", *fail); | ||
92 | dump_stack(); | ||
93 | } | ||
94 | *fail = str; | ||
95 | } | ||
96 | |||
97 | static void sysctl_check_leaf(struct nsproxy *namespaces, | ||
98 | struct ctl_table *table, const char **fail) | ||
99 | { | ||
100 | struct ctl_table *ref; | ||
101 | |||
102 | ref = sysctl_check_lookup(namespaces, table); | ||
103 | if (ref && (ref != table)) | ||
104 | set_fail(fail, table, "Sysctl already exists"); | ||
105 | } | ||
106 | |||
107 | int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | ||
108 | { | ||
109 | int error = 0; | ||
110 | for (; table->procname; table++) { | ||
111 | const char *fail = NULL; | ||
112 | |||
113 | if (table->parent) { | ||
114 | if (!table->parent->procname) | ||
115 | set_fail(&fail, table, "Parent without procname"); | ||
116 | } | ||
117 | if (table->child) { | ||
118 | if (table->data) | ||
119 | set_fail(&fail, table, "Directory with data?"); | ||
120 | if (table->maxlen) | ||
121 | set_fail(&fail, table, "Directory with maxlen?"); | ||
122 | if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) | ||
123 | set_fail(&fail, table, "Writable sysctl directory"); | ||
124 | if (table->proc_handler) | ||
125 | set_fail(&fail, table, "Directory with proc_handler"); | ||
126 | if (table->extra1) | ||
127 | set_fail(&fail, table, "Directory with extra1"); | ||
128 | if (table->extra2) | ||
129 | set_fail(&fail, table, "Directory with extra2"); | ||
130 | } else { | ||
131 | if ((table->proc_handler == proc_dostring) || | ||
132 | (table->proc_handler == proc_dointvec) || | ||
133 | (table->proc_handler == proc_dointvec_minmax) || | ||
134 | (table->proc_handler == proc_dointvec_jiffies) || | ||
135 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
136 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
137 | (table->proc_handler == proc_doulongvec_minmax) || | ||
138 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
139 | if (!table->data) | ||
140 | set_fail(&fail, table, "No data"); | ||
141 | if (!table->maxlen) | ||
142 | set_fail(&fail, table, "No maxlen"); | ||
143 | } | ||
144 | #ifdef CONFIG_PROC_SYSCTL | ||
145 | if (!table->proc_handler) | ||
146 | set_fail(&fail, table, "No proc_handler"); | ||
147 | #endif | ||
148 | sysctl_check_leaf(namespaces, table, &fail); | ||
149 | } | ||
150 | if (table->mode > 0777) | ||
151 | set_fail(&fail, table, "bogus .mode"); | ||
152 | if (fail) { | ||
153 | set_fail(&fail, table, NULL); | ||
154 | error = -EINVAL; | ||
155 | } | ||
156 | if (table->child) | ||
157 | error |= sysctl_check_table(namespaces, table->child); | ||
158 | } | ||
159 | return error; | ||
160 | } | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 403c2a092830..15be32e19c6e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -1260,6 +1260,8 @@ ktime_t ktime_get_monotonic_offset(void) | |||
1260 | 1260 | ||
1261 | return timespec_to_ktime(wtom); | 1261 | return timespec_to_ktime(wtom); |
1262 | } | 1262 | } |
1263 | EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); | ||
1264 | |||
1263 | 1265 | ||
1264 | /** | 1266 | /** |
1265 | * xtime_update() - advances the timekeeping infrastructure | 1267 | * xtime_update() - advances the timekeeping infrastructure |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c5a01873567d..859fae6b1825 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -264,7 +264,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) | |||
264 | return ret; | 264 | return ret; |
265 | } | 265 | } |
266 | 266 | ||
267 | int trace_seq_path(struct trace_seq *s, struct path *path) | 267 | int trace_seq_path(struct trace_seq *s, const struct path *path) |
268 | { | 268 | { |
269 | unsigned char *p; | 269 | unsigned char *p; |
270 | 270 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 14bc092fb12c..df30ee08bdd4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -9,6 +9,8 @@ | |||
9 | * to those contributors as well. | 9 | * to those contributors as well. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define pr_fmt(fmt) "NMI watchdog: " fmt | ||
13 | |||
12 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
13 | #include <linux/cpu.h> | 15 | #include <linux/cpu.h> |
14 | #include <linux/nmi.h> | 16 | #include <linux/nmi.h> |
@@ -319,11 +321,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
319 | */ | 321 | */ |
320 | static int watchdog(void *unused) | 322 | static int watchdog(void *unused) |
321 | { | 323 | { |
322 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 324 | struct sched_param param = { .sched_priority = 0 }; |
323 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 325 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
324 | 326 | ||
325 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
326 | |||
327 | /* initialize timestamp */ | 327 | /* initialize timestamp */ |
328 | __touch_watchdog(); | 328 | __touch_watchdog(); |
329 | 329 | ||
@@ -349,8 +349,11 @@ static int watchdog(void *unused) | |||
349 | 349 | ||
350 | set_current_state(TASK_INTERRUPTIBLE); | 350 | set_current_state(TASK_INTERRUPTIBLE); |
351 | } | 351 | } |
352 | /* | ||
353 | * Drop the policy/priority elevation during thread exit to avoid a | ||
354 | * scheduling latency spike. | ||
355 | */ | ||
352 | __set_current_state(TASK_RUNNING); | 356 | __set_current_state(TASK_RUNNING); |
353 | param.sched_priority = 0; | ||
354 | sched_setscheduler(current, SCHED_NORMAL, ¶m); | 357 | sched_setscheduler(current, SCHED_NORMAL, ¶m); |
355 | return 0; | 358 | return 0; |
356 | } | 359 | } |
@@ -376,18 +379,20 @@ static int watchdog_nmi_enable(int cpu) | |||
376 | /* Try to register using hardware perf events */ | 379 | /* Try to register using hardware perf events */ |
377 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 380 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
378 | if (!IS_ERR(event)) { | 381 | if (!IS_ERR(event)) { |
379 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 382 | pr_info("enabled, takes one hw-pmu counter.\n"); |
380 | goto out_save; | 383 | goto out_save; |
381 | } | 384 | } |
382 | 385 | ||
383 | 386 | ||
384 | /* vary the KERN level based on the returned errno */ | 387 | /* vary the KERN level based on the returned errno */ |
385 | if (PTR_ERR(event) == -EOPNOTSUPP) | 388 | if (PTR_ERR(event) == -EOPNOTSUPP) |
386 | printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | 389 | pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); |
387 | else if (PTR_ERR(event) == -ENOENT) | 390 | else if (PTR_ERR(event) == -ENOENT) |
388 | printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); | 391 | pr_warning("disabled (cpu%i): hardware events not enabled\n", |
392 | cpu); | ||
389 | else | 393 | else |
390 | printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); | 394 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", |
395 | cpu, PTR_ERR(event)); | ||
391 | return PTR_ERR(event); | 396 | return PTR_ERR(event); |
392 | 397 | ||
393 | /* success path */ | 398 | /* success path */ |
@@ -439,9 +444,10 @@ static int watchdog_enable(int cpu) | |||
439 | 444 | ||
440 | /* create the watchdog thread */ | 445 | /* create the watchdog thread */ |
441 | if (!p) { | 446 | if (!p) { |
447 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
442 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); | 448 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); |
443 | if (IS_ERR(p)) { | 449 | if (IS_ERR(p)) { |
444 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 450 | pr_err("softlockup watchdog for %i failed\n", cpu); |
445 | if (!err) { | 451 | if (!err) { |
446 | /* if hardlockup hasn't already set this */ | 452 | /* if hardlockup hasn't already set this */ |
447 | err = PTR_ERR(p); | 453 | err = PTR_ERR(p); |
@@ -450,6 +456,7 @@ static int watchdog_enable(int cpu) | |||
450 | } | 456 | } |
451 | goto out; | 457 | goto out; |
452 | } | 458 | } |
459 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
453 | kthread_bind(p, cpu); | 460 | kthread_bind(p, cpu); |
454 | per_cpu(watchdog_touch_ts, cpu) = 0; | 461 | per_cpu(watchdog_touch_ts, cpu) = 0; |
455 | per_cpu(softlockup_watchdog, cpu) = p; | 462 | per_cpu(softlockup_watchdog, cpu) = p; |
@@ -496,7 +503,7 @@ static void watchdog_enable_all_cpus(void) | |||
496 | watchdog_enabled = 1; | 503 | watchdog_enabled = 1; |
497 | 504 | ||
498 | if (!watchdog_enabled) | 505 | if (!watchdog_enabled) |
499 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); | 506 | pr_err("failed to be enabled on some cpus\n"); |
500 | 507 | ||
501 | } | 508 | } |
502 | 509 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f2c5638bb5ab..5abf42f63c08 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -476,13 +476,8 @@ static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, | |||
476 | struct workqueue_struct *wq) | 476 | struct workqueue_struct *wq) |
477 | { | 477 | { |
478 | if (!(wq->flags & WQ_UNBOUND)) { | 478 | if (!(wq->flags & WQ_UNBOUND)) { |
479 | if (likely(cpu < nr_cpu_ids)) { | 479 | if (likely(cpu < nr_cpu_ids)) |
480 | #ifdef CONFIG_SMP | ||
481 | return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); | 480 | return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); |
482 | #else | ||
483 | return wq->cpu_wq.single; | ||
484 | #endif | ||
485 | } | ||
486 | } else if (likely(cpu == WORK_CPU_UNBOUND)) | 481 | } else if (likely(cpu == WORK_CPU_UNBOUND)) |
487 | return wq->cpu_wq.single; | 482 | return wq->cpu_wq.single; |
488 | return NULL; | 483 | return NULL; |
@@ -2899,13 +2894,8 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2899 | const size_t size = sizeof(struct cpu_workqueue_struct); | 2894 | const size_t size = sizeof(struct cpu_workqueue_struct); |
2900 | const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, | 2895 | const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, |
2901 | __alignof__(unsigned long long)); | 2896 | __alignof__(unsigned long long)); |
2902 | #ifdef CONFIG_SMP | ||
2903 | bool percpu = !(wq->flags & WQ_UNBOUND); | ||
2904 | #else | ||
2905 | bool percpu = false; | ||
2906 | #endif | ||
2907 | 2897 | ||
2908 | if (percpu) | 2898 | if (!(wq->flags & WQ_UNBOUND)) |
2909 | wq->cpu_wq.pcpu = __alloc_percpu(size, align); | 2899 | wq->cpu_wq.pcpu = __alloc_percpu(size, align); |
2910 | else { | 2900 | else { |
2911 | void *ptr; | 2901 | void *ptr; |
@@ -2929,13 +2919,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2929 | 2919 | ||
2930 | static void free_cwqs(struct workqueue_struct *wq) | 2920 | static void free_cwqs(struct workqueue_struct *wq) |
2931 | { | 2921 | { |
2932 | #ifdef CONFIG_SMP | 2922 | if (!(wq->flags & WQ_UNBOUND)) |
2933 | bool percpu = !(wq->flags & WQ_UNBOUND); | ||
2934 | #else | ||
2935 | bool percpu = false; | ||
2936 | #endif | ||
2937 | |||
2938 | if (percpu) | ||
2939 | free_percpu(wq->cpu_wq.pcpu); | 2923 | free_percpu(wq->cpu_wq.pcpu); |
2940 | else if (wq->cpu_wq.single) { | 2924 | else if (wq->cpu_wq.single) { |
2941 | /* the pointer to free is stored right after the cwq */ | 2925 | /* the pointer to free is stored right after the cwq */ |