diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 122 |
1 files changed, 57 insertions, 65 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f33c7153b6d7..7bb63eea6eb8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -302,10 +302,10 @@ static void guarantee_online_cpus(const struct cpuset *cs, | |||
302 | * are online, with memory. If none are online with memory, walk | 302 | * are online, with memory. If none are online with memory, walk |
303 | * up the cpuset hierarchy until we find one that does have some | 303 | * up the cpuset hierarchy until we find one that does have some |
304 | * online mems. If we get all the way to the top and still haven't | 304 | * online mems. If we get all the way to the top and still haven't |
305 | * found any online mems, return node_states[N_HIGH_MEMORY]. | 305 | * found any online mems, return node_states[N_MEMORY]. |
306 | * | 306 | * |
307 | * One way or another, we guarantee to return some non-empty subset | 307 | * One way or another, we guarantee to return some non-empty subset |
308 | * of node_states[N_HIGH_MEMORY]. | 308 | * of node_states[N_MEMORY]. |
309 | * | 309 | * |
310 | * Call with callback_mutex held. | 310 | * Call with callback_mutex held. |
311 | */ | 311 | */ |
@@ -313,14 +313,14 @@ static void guarantee_online_cpus(const struct cpuset *cs, | |||
313 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | 313 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) |
314 | { | 314 | { |
315 | while (cs && !nodes_intersects(cs->mems_allowed, | 315 | while (cs && !nodes_intersects(cs->mems_allowed, |
316 | node_states[N_HIGH_MEMORY])) | 316 | node_states[N_MEMORY])) |
317 | cs = cs->parent; | 317 | cs = cs->parent; |
318 | if (cs) | 318 | if (cs) |
319 | nodes_and(*pmask, cs->mems_allowed, | 319 | nodes_and(*pmask, cs->mems_allowed, |
320 | node_states[N_HIGH_MEMORY]); | 320 | node_states[N_MEMORY]); |
321 | else | 321 | else |
322 | *pmask = node_states[N_HIGH_MEMORY]; | 322 | *pmask = node_states[N_MEMORY]; |
323 | BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); | 323 | BUG_ON(!nodes_intersects(*pmask, node_states[N_MEMORY])); |
324 | } | 324 | } |
325 | 325 | ||
326 | /* | 326 | /* |
@@ -1100,7 +1100,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1100 | return -ENOMEM; | 1100 | return -ENOMEM; |
1101 | 1101 | ||
1102 | /* | 1102 | /* |
1103 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | 1103 | * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; |
1104 | * it's read-only | 1104 | * it's read-only |
1105 | */ | 1105 | */ |
1106 | if (cs == &top_cpuset) { | 1106 | if (cs == &top_cpuset) { |
@@ -1122,7 +1122,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1122 | goto done; | 1122 | goto done; |
1123 | 1123 | ||
1124 | if (!nodes_subset(trialcs->mems_allowed, | 1124 | if (!nodes_subset(trialcs->mems_allowed, |
1125 | node_states[N_HIGH_MEMORY])) { | 1125 | node_states[N_MEMORY])) { |
1126 | retval = -EINVAL; | 1126 | retval = -EINVAL; |
1127 | goto done; | 1127 | goto done; |
1128 | } | 1128 | } |
@@ -1784,56 +1784,20 @@ static struct cftype files[] = { | |||
1784 | }; | 1784 | }; |
1785 | 1785 | ||
1786 | /* | 1786 | /* |
1787 | * post_clone() is called during cgroup_create() when the | 1787 | * cpuset_css_alloc - allocate a cpuset css |
1788 | * clone_children mount argument was specified. The cgroup | ||
1789 | * can not yet have any tasks. | ||
1790 | * | ||
1791 | * Currently we refuse to set up the cgroup - thereby | ||
1792 | * refusing the task to be entered, and as a result refusing | ||
1793 | * the sys_unshare() or clone() which initiated it - if any | ||
1794 | * sibling cpusets have exclusive cpus or mem. | ||
1795 | * | ||
1796 | * If this becomes a problem for some users who wish to | ||
1797 | * allow that scenario, then cpuset_post_clone() could be | ||
1798 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive | ||
1799 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex | ||
1800 | * held. | ||
1801 | */ | ||
1802 | static void cpuset_post_clone(struct cgroup *cgroup) | ||
1803 | { | ||
1804 | struct cgroup *parent, *child; | ||
1805 | struct cpuset *cs, *parent_cs; | ||
1806 | |||
1807 | parent = cgroup->parent; | ||
1808 | list_for_each_entry(child, &parent->children, sibling) { | ||
1809 | cs = cgroup_cs(child); | ||
1810 | if (is_mem_exclusive(cs) || is_cpu_exclusive(cs)) | ||
1811 | return; | ||
1812 | } | ||
1813 | cs = cgroup_cs(cgroup); | ||
1814 | parent_cs = cgroup_cs(parent); | ||
1815 | |||
1816 | mutex_lock(&callback_mutex); | ||
1817 | cs->mems_allowed = parent_cs->mems_allowed; | ||
1818 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); | ||
1819 | mutex_unlock(&callback_mutex); | ||
1820 | return; | ||
1821 | } | ||
1822 | |||
1823 | /* | ||
1824 | * cpuset_create - create a cpuset | ||
1825 | * cont: control group that the new cpuset will be part of | 1788 | * cont: control group that the new cpuset will be part of |
1826 | */ | 1789 | */ |
1827 | 1790 | ||
1828 | static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) | 1791 | static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) |
1829 | { | 1792 | { |
1830 | struct cpuset *cs; | 1793 | struct cgroup *parent_cg = cont->parent; |
1831 | struct cpuset *parent; | 1794 | struct cgroup *tmp_cg; |
1795 | struct cpuset *parent, *cs; | ||
1832 | 1796 | ||
1833 | if (!cont->parent) { | 1797 | if (!parent_cg) |
1834 | return &top_cpuset.css; | 1798 | return &top_cpuset.css; |
1835 | } | 1799 | parent = cgroup_cs(parent_cg); |
1836 | parent = cgroup_cs(cont->parent); | 1800 | |
1837 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); | 1801 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); |
1838 | if (!cs) | 1802 | if (!cs) |
1839 | return ERR_PTR(-ENOMEM); | 1803 | return ERR_PTR(-ENOMEM); |
@@ -1855,7 +1819,36 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) | |||
1855 | 1819 | ||
1856 | cs->parent = parent; | 1820 | cs->parent = parent; |
1857 | number_of_cpusets++; | 1821 | number_of_cpusets++; |
1858 | return &cs->css ; | 1822 | |
1823 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cont->flags)) | ||
1824 | goto skip_clone; | ||
1825 | |||
1826 | /* | ||
1827 | * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is | ||
1828 | * set. This flag handling is implemented in cgroup core for | ||
1829 | * histrical reasons - the flag may be specified during mount. | ||
1830 | * | ||
1831 | * Currently, if any sibling cpusets have exclusive cpus or mem, we | ||
1832 | * refuse to clone the configuration - thereby refusing the task to | ||
1833 | * be entered, and as a result refusing the sys_unshare() or | ||
1834 | * clone() which initiated it. If this becomes a problem for some | ||
1835 | * users who wish to allow that scenario, then this could be | ||
1836 | * changed to grant parent->cpus_allowed-sibling_cpus_exclusive | ||
1837 | * (and likewise for mems) to the new cgroup. | ||
1838 | */ | ||
1839 | list_for_each_entry(tmp_cg, &parent_cg->children, sibling) { | ||
1840 | struct cpuset *tmp_cs = cgroup_cs(tmp_cg); | ||
1841 | |||
1842 | if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) | ||
1843 | goto skip_clone; | ||
1844 | } | ||
1845 | |||
1846 | mutex_lock(&callback_mutex); | ||
1847 | cs->mems_allowed = parent->mems_allowed; | ||
1848 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | ||
1849 | mutex_unlock(&callback_mutex); | ||
1850 | skip_clone: | ||
1851 | return &cs->css; | ||
1859 | } | 1852 | } |
1860 | 1853 | ||
1861 | /* | 1854 | /* |
@@ -1864,7 +1857,7 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) | |||
1864 | * will call async_rebuild_sched_domains(). | 1857 | * will call async_rebuild_sched_domains(). |
1865 | */ | 1858 | */ |
1866 | 1859 | ||
1867 | static void cpuset_destroy(struct cgroup *cont) | 1860 | static void cpuset_css_free(struct cgroup *cont) |
1868 | { | 1861 | { |
1869 | struct cpuset *cs = cgroup_cs(cont); | 1862 | struct cpuset *cs = cgroup_cs(cont); |
1870 | 1863 | ||
@@ -1878,11 +1871,10 @@ static void cpuset_destroy(struct cgroup *cont) | |||
1878 | 1871 | ||
1879 | struct cgroup_subsys cpuset_subsys = { | 1872 | struct cgroup_subsys cpuset_subsys = { |
1880 | .name = "cpuset", | 1873 | .name = "cpuset", |
1881 | .create = cpuset_create, | 1874 | .css_alloc = cpuset_css_alloc, |
1882 | .destroy = cpuset_destroy, | 1875 | .css_free = cpuset_css_free, |
1883 | .can_attach = cpuset_can_attach, | 1876 | .can_attach = cpuset_can_attach, |
1884 | .attach = cpuset_attach, | 1877 | .attach = cpuset_attach, |
1885 | .post_clone = cpuset_post_clone, | ||
1886 | .subsys_id = cpuset_subsys_id, | 1878 | .subsys_id = cpuset_subsys_id, |
1887 | .base_cftypes = files, | 1879 | .base_cftypes = files, |
1888 | .early_init = 1, | 1880 | .early_init = 1, |
@@ -2034,7 +2026,7 @@ static struct cpuset *cpuset_next(struct list_head *queue) | |||
2034 | * before dropping down to the next. It always processes a node before | 2026 | * before dropping down to the next. It always processes a node before |
2035 | * any of its children. | 2027 | * any of its children. |
2036 | * | 2028 | * |
2037 | * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY | 2029 | * In the case of memory hot-unplug, it will remove nodes from N_MEMORY |
2038 | * if all present pages from a node are offlined. | 2030 | * if all present pages from a node are offlined. |
2039 | */ | 2031 | */ |
2040 | static void | 2032 | static void |
@@ -2073,7 +2065,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | |||
2073 | 2065 | ||
2074 | /* Continue past cpusets with all mems online */ | 2066 | /* Continue past cpusets with all mems online */ |
2075 | if (nodes_subset(cp->mems_allowed, | 2067 | if (nodes_subset(cp->mems_allowed, |
2076 | node_states[N_HIGH_MEMORY])) | 2068 | node_states[N_MEMORY])) |
2077 | continue; | 2069 | continue; |
2078 | 2070 | ||
2079 | oldmems = cp->mems_allowed; | 2071 | oldmems = cp->mems_allowed; |
@@ -2081,7 +2073,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | |||
2081 | /* Remove offline mems from this cpuset. */ | 2073 | /* Remove offline mems from this cpuset. */ |
2082 | mutex_lock(&callback_mutex); | 2074 | mutex_lock(&callback_mutex); |
2083 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2075 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
2084 | node_states[N_HIGH_MEMORY]); | 2076 | node_states[N_MEMORY]); |
2085 | mutex_unlock(&callback_mutex); | 2077 | mutex_unlock(&callback_mutex); |
2086 | 2078 | ||
2087 | /* Move tasks from the empty cpuset to a parent */ | 2079 | /* Move tasks from the empty cpuset to a parent */ |
@@ -2134,8 +2126,8 @@ void cpuset_update_active_cpus(bool cpu_online) | |||
2134 | 2126 | ||
2135 | #ifdef CONFIG_MEMORY_HOTPLUG | 2127 | #ifdef CONFIG_MEMORY_HOTPLUG |
2136 | /* | 2128 | /* |
2137 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. | 2129 | * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. |
2138 | * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. | 2130 | * Call this routine anytime after node_states[N_MEMORY] changes. |
2139 | * See cpuset_update_active_cpus() for CPU hotplug handling. | 2131 | * See cpuset_update_active_cpus() for CPU hotplug handling. |
2140 | */ | 2132 | */ |
2141 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2133 | static int cpuset_track_online_nodes(struct notifier_block *self, |
@@ -2148,7 +2140,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2148 | case MEM_ONLINE: | 2140 | case MEM_ONLINE: |
2149 | oldmems = top_cpuset.mems_allowed; | 2141 | oldmems = top_cpuset.mems_allowed; |
2150 | mutex_lock(&callback_mutex); | 2142 | mutex_lock(&callback_mutex); |
2151 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2143 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2152 | mutex_unlock(&callback_mutex); | 2144 | mutex_unlock(&callback_mutex); |
2153 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); | 2145 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); |
2154 | break; | 2146 | break; |
@@ -2177,7 +2169,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2177 | void __init cpuset_init_smp(void) | 2169 | void __init cpuset_init_smp(void) |
2178 | { | 2170 | { |
2179 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2171 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2180 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2172 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2181 | 2173 | ||
2182 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); | 2174 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); |
2183 | 2175 | ||
@@ -2245,7 +2237,7 @@ void cpuset_init_current_mems_allowed(void) | |||
2245 | * | 2237 | * |
2246 | * Description: Returns the nodemask_t mems_allowed of the cpuset | 2238 | * Description: Returns the nodemask_t mems_allowed of the cpuset |
2247 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2239 | * attached to the specified @tsk. Guaranteed to return some non-empty |
2248 | * subset of node_states[N_HIGH_MEMORY], even if this means going outside the | 2240 | * subset of node_states[N_MEMORY], even if this means going outside the |
2249 | * tasks cpuset. | 2241 | * tasks cpuset. |
2250 | **/ | 2242 | **/ |
2251 | 2243 | ||