aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c122
1 files changed, 57 insertions, 65 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f33c7153b6d7..7bb63eea6eb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -302,10 +302,10 @@ static void guarantee_online_cpus(const struct cpuset *cs,
302 * are online, with memory. If none are online with memory, walk 302 * are online, with memory. If none are online with memory, walk
303 * up the cpuset hierarchy until we find one that does have some 303 * up the cpuset hierarchy until we find one that does have some
304 * online mems. If we get all the way to the top and still haven't 304 * online mems. If we get all the way to the top and still haven't
305 * found any online mems, return node_states[N_HIGH_MEMORY]. 305 * found any online mems, return node_states[N_MEMORY].
306 * 306 *
307 * One way or another, we guarantee to return some non-empty subset 307 * One way or another, we guarantee to return some non-empty subset
308 * of node_states[N_HIGH_MEMORY]. 308 * of node_states[N_MEMORY].
309 * 309 *
310 * Call with callback_mutex held. 310 * Call with callback_mutex held.
311 */ 311 */
@@ -313,14 +313,14 @@ static void guarantee_online_cpus(const struct cpuset *cs,
313static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) 313static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
314{ 314{
315 while (cs && !nodes_intersects(cs->mems_allowed, 315 while (cs && !nodes_intersects(cs->mems_allowed,
316 node_states[N_HIGH_MEMORY])) 316 node_states[N_MEMORY]))
317 cs = cs->parent; 317 cs = cs->parent;
318 if (cs) 318 if (cs)
319 nodes_and(*pmask, cs->mems_allowed, 319 nodes_and(*pmask, cs->mems_allowed,
320 node_states[N_HIGH_MEMORY]); 320 node_states[N_MEMORY]);
321 else 321 else
322 *pmask = node_states[N_HIGH_MEMORY]; 322 *pmask = node_states[N_MEMORY];
323 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); 323 BUG_ON(!nodes_intersects(*pmask, node_states[N_MEMORY]));
324} 324}
325 325
326/* 326/*
@@ -1100,7 +1100,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1100 return -ENOMEM; 1100 return -ENOMEM;
1101 1101
1102 /* 1102 /*
1103 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; 1103 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
1104 * it's read-only 1104 * it's read-only
1105 */ 1105 */
1106 if (cs == &top_cpuset) { 1106 if (cs == &top_cpuset) {
@@ -1122,7 +1122,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1122 goto done; 1122 goto done;
1123 1123
1124 if (!nodes_subset(trialcs->mems_allowed, 1124 if (!nodes_subset(trialcs->mems_allowed,
1125 node_states[N_HIGH_MEMORY])) { 1125 node_states[N_MEMORY])) {
1126 retval = -EINVAL; 1126 retval = -EINVAL;
1127 goto done; 1127 goto done;
1128 } 1128 }
@@ -1784,56 +1784,20 @@ static struct cftype files[] = {
1784}; 1784};
1785 1785
1786/* 1786/*
1787 * post_clone() is called during cgroup_create() when the 1787 * cpuset_css_alloc - allocate a cpuset css
1788 * clone_children mount argument was specified. The cgroup
1789 * can not yet have any tasks.
1790 *
1791 * Currently we refuse to set up the cgroup - thereby
1792 * refusing the task to be entered, and as a result refusing
1793 * the sys_unshare() or clone() which initiated it - if any
1794 * sibling cpusets have exclusive cpus or mem.
1795 *
1796 * If this becomes a problem for some users who wish to
1797 * allow that scenario, then cpuset_post_clone() could be
1798 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
1799 * (and likewise for mems) to the new cgroup. Called with cgroup_mutex
1800 * held.
1801 */
1802static void cpuset_post_clone(struct cgroup *cgroup)
1803{
1804 struct cgroup *parent, *child;
1805 struct cpuset *cs, *parent_cs;
1806
1807 parent = cgroup->parent;
1808 list_for_each_entry(child, &parent->children, sibling) {
1809 cs = cgroup_cs(child);
1810 if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
1811 return;
1812 }
1813 cs = cgroup_cs(cgroup);
1814 parent_cs = cgroup_cs(parent);
1815
1816 mutex_lock(&callback_mutex);
1817 cs->mems_allowed = parent_cs->mems_allowed;
1818 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1819 mutex_unlock(&callback_mutex);
1820 return;
1821}
1822
1823/*
1824 * cpuset_create - create a cpuset
1825 * cont: control group that the new cpuset will be part of 1788 * cont: control group that the new cpuset will be part of
1826 */ 1789 */
1827 1790
1828static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) 1791static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
1829{ 1792{
1830 struct cpuset *cs; 1793 struct cgroup *parent_cg = cont->parent;
1831 struct cpuset *parent; 1794 struct cgroup *tmp_cg;
1795 struct cpuset *parent, *cs;
1832 1796
1833 if (!cont->parent) { 1797 if (!parent_cg)
1834 return &top_cpuset.css; 1798 return &top_cpuset.css;
1835 } 1799 parent = cgroup_cs(parent_cg);
1836 parent = cgroup_cs(cont->parent); 1800
1837 cs = kmalloc(sizeof(*cs), GFP_KERNEL); 1801 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1838 if (!cs) 1802 if (!cs)
1839 return ERR_PTR(-ENOMEM); 1803 return ERR_PTR(-ENOMEM);
@@ -1855,7 +1819,36 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
1855 1819
1856 cs->parent = parent; 1820 cs->parent = parent;
1857 number_of_cpusets++; 1821 number_of_cpusets++;
1858 return &cs->css ; 1822
1823 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cont->flags))
1824 goto skip_clone;
1825
1826 /*
1827 * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
1828 * set. This flag handling is implemented in cgroup core for
1829 * histrical reasons - the flag may be specified during mount.
1830 *
1831 * Currently, if any sibling cpusets have exclusive cpus or mem, we
1832 * refuse to clone the configuration - thereby refusing the task to
1833 * be entered, and as a result refusing the sys_unshare() or
1834 * clone() which initiated it. If this becomes a problem for some
1835 * users who wish to allow that scenario, then this could be
1836 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
1837 * (and likewise for mems) to the new cgroup.
1838 */
1839 list_for_each_entry(tmp_cg, &parent_cg->children, sibling) {
1840 struct cpuset *tmp_cs = cgroup_cs(tmp_cg);
1841
1842 if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs))
1843 goto skip_clone;
1844 }
1845
1846 mutex_lock(&callback_mutex);
1847 cs->mems_allowed = parent->mems_allowed;
1848 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
1849 mutex_unlock(&callback_mutex);
1850skip_clone:
1851 return &cs->css;
1859} 1852}
1860 1853
1861/* 1854/*
@@ -1864,7 +1857,7 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
1864 * will call async_rebuild_sched_domains(). 1857 * will call async_rebuild_sched_domains().
1865 */ 1858 */
1866 1859
1867static void cpuset_destroy(struct cgroup *cont) 1860static void cpuset_css_free(struct cgroup *cont)
1868{ 1861{
1869 struct cpuset *cs = cgroup_cs(cont); 1862 struct cpuset *cs = cgroup_cs(cont);
1870 1863
@@ -1878,11 +1871,10 @@ static void cpuset_destroy(struct cgroup *cont)
1878 1871
1879struct cgroup_subsys cpuset_subsys = { 1872struct cgroup_subsys cpuset_subsys = {
1880 .name = "cpuset", 1873 .name = "cpuset",
1881 .create = cpuset_create, 1874 .css_alloc = cpuset_css_alloc,
1882 .destroy = cpuset_destroy, 1875 .css_free = cpuset_css_free,
1883 .can_attach = cpuset_can_attach, 1876 .can_attach = cpuset_can_attach,
1884 .attach = cpuset_attach, 1877 .attach = cpuset_attach,
1885 .post_clone = cpuset_post_clone,
1886 .subsys_id = cpuset_subsys_id, 1878 .subsys_id = cpuset_subsys_id,
1887 .base_cftypes = files, 1879 .base_cftypes = files,
1888 .early_init = 1, 1880 .early_init = 1,
@@ -2034,7 +2026,7 @@ static struct cpuset *cpuset_next(struct list_head *queue)
2034 * before dropping down to the next. It always processes a node before 2026 * before dropping down to the next. It always processes a node before
2035 * any of its children. 2027 * any of its children.
2036 * 2028 *
2037 * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY 2029 * In the case of memory hot-unplug, it will remove nodes from N_MEMORY
2038 * if all present pages from a node are offlined. 2030 * if all present pages from a node are offlined.
2039 */ 2031 */
2040static void 2032static void
@@ -2073,7 +2065,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
2073 2065
2074 /* Continue past cpusets with all mems online */ 2066 /* Continue past cpusets with all mems online */
2075 if (nodes_subset(cp->mems_allowed, 2067 if (nodes_subset(cp->mems_allowed,
2076 node_states[N_HIGH_MEMORY])) 2068 node_states[N_MEMORY]))
2077 continue; 2069 continue;
2078 2070
2079 oldmems = cp->mems_allowed; 2071 oldmems = cp->mems_allowed;
@@ -2081,7 +2073,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
2081 /* Remove offline mems from this cpuset. */ 2073 /* Remove offline mems from this cpuset. */
2082 mutex_lock(&callback_mutex); 2074 mutex_lock(&callback_mutex);
2083 nodes_and(cp->mems_allowed, cp->mems_allowed, 2075 nodes_and(cp->mems_allowed, cp->mems_allowed,
2084 node_states[N_HIGH_MEMORY]); 2076 node_states[N_MEMORY]);
2085 mutex_unlock(&callback_mutex); 2077 mutex_unlock(&callback_mutex);
2086 2078
2087 /* Move tasks from the empty cpuset to a parent */ 2079 /* Move tasks from the empty cpuset to a parent */
@@ -2134,8 +2126,8 @@ void cpuset_update_active_cpus(bool cpu_online)
2134 2126
2135#ifdef CONFIG_MEMORY_HOTPLUG 2127#ifdef CONFIG_MEMORY_HOTPLUG
2136/* 2128/*
2137 * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. 2129 * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
2138 * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. 2130 * Call this routine anytime after node_states[N_MEMORY] changes.
2139 * See cpuset_update_active_cpus() for CPU hotplug handling. 2131 * See cpuset_update_active_cpus() for CPU hotplug handling.
2140 */ 2132 */
2141static int cpuset_track_online_nodes(struct notifier_block *self, 2133static int cpuset_track_online_nodes(struct notifier_block *self,
@@ -2148,7 +2140,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2148 case MEM_ONLINE: 2140 case MEM_ONLINE:
2149 oldmems = top_cpuset.mems_allowed; 2141 oldmems = top_cpuset.mems_allowed;
2150 mutex_lock(&callback_mutex); 2142 mutex_lock(&callback_mutex);
2151 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2143 top_cpuset.mems_allowed = node_states[N_MEMORY];
2152 mutex_unlock(&callback_mutex); 2144 mutex_unlock(&callback_mutex);
2153 update_tasks_nodemask(&top_cpuset, &oldmems, NULL); 2145 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2154 break; 2146 break;
@@ -2177,7 +2169,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2177void __init cpuset_init_smp(void) 2169void __init cpuset_init_smp(void)
2178{ 2170{
2179 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); 2171 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2180 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2172 top_cpuset.mems_allowed = node_states[N_MEMORY];
2181 2173
2182 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2174 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2183 2175
@@ -2245,7 +2237,7 @@ void cpuset_init_current_mems_allowed(void)
2245 * 2237 *
2246 * Description: Returns the nodemask_t mems_allowed of the cpuset 2238 * Description: Returns the nodemask_t mems_allowed of the cpuset
2247 * attached to the specified @tsk. Guaranteed to return some non-empty 2239 * attached to the specified @tsk. Guaranteed to return some non-empty
2248 * subset of node_states[N_HIGH_MEMORY], even if this means going outside the 2240 * subset of node_states[N_MEMORY], even if this means going outside the
2249 * tasks cpuset. 2241 * tasks cpuset.
2250 **/ 2242 **/
2251 2243