diff options
-rw-r--r-- | kernel/cpuset.c | 221 |
1 files changed, 104 insertions, 117 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c5edc6b3eb28..3d448e646a4a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -148,12 +148,6 @@ typedef enum { | |||
148 | CS_SPREAD_SLAB, | 148 | CS_SPREAD_SLAB, |
149 | } cpuset_flagbits_t; | 149 | } cpuset_flagbits_t; |
150 | 150 | ||
151 | /* the type of hotplug event */ | ||
152 | enum hotplug_event { | ||
153 | CPUSET_CPU_OFFLINE, | ||
154 | CPUSET_MEM_OFFLINE, | ||
155 | }; | ||
156 | |||
157 | /* convenient tests for these bits */ | 151 | /* convenient tests for these bits */ |
158 | static inline bool is_cpuset_online(const struct cpuset *cs) | 152 | static inline bool is_cpuset_online(const struct cpuset *cs) |
159 | { | 153 | { |
@@ -2059,116 +2053,131 @@ static struct cpuset *cpuset_next(struct list_head *queue) | |||
2059 | return cp; | 2053 | return cp; |
2060 | } | 2054 | } |
2061 | 2055 | ||
2062 | 2056 | /** | |
2063 | /* | 2057 | * cpuset_propagate_hotplug - propagate CPU/memory hotplug to a cpuset |
2064 | * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory | 2058 | * @cs: cpuset in interest |
2065 | * online/offline) and update the cpusets accordingly. | ||
2066 | * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such | ||
2067 | * cpuset must be moved to a parent cpuset. | ||
2068 | * | ||
2069 | * Called with cgroup_mutex held. We take callback_mutex to modify | ||
2070 | * cpus_allowed and mems_allowed. | ||
2071 | * | 2059 | * |
2072 | * This walk processes the tree from top to bottom, completing one layer | 2060 | * Compare @cs's cpu and mem masks against top_cpuset and if some have gone |
2073 | * before dropping down to the next. It always processes a node before | 2061 | * offline, update @cs accordingly. If @cs ends up with no CPU or memory, |
2074 | * any of its children. | 2062 | * all its tasks are moved to the nearest ancestor with both resources. |
2075 | * | 2063 | * |
2076 | * In the case of memory hot-unplug, it will remove nodes from N_MEMORY | 2064 | * Should be called with cgroup_mutex held. |
2077 | * if all present pages from a node are offlined. | ||
2078 | */ | 2065 | */ |
2079 | static void | 2066 | static void cpuset_propagate_hotplug(struct cpuset *cs) |
2080 | scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | ||
2081 | { | 2067 | { |
2082 | LIST_HEAD(queue); | 2068 | static cpumask_t off_cpus; |
2083 | struct cpuset *cp; /* scans cpusets being updated */ | 2069 | static nodemask_t off_mems, tmp_mems; |
2084 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | ||
2085 | |||
2086 | list_add_tail((struct list_head *)&root->stack_list, &queue); | ||
2087 | |||
2088 | switch (event) { | ||
2089 | case CPUSET_CPU_OFFLINE: | ||
2090 | while ((cp = cpuset_next(&queue)) != NULL) { | ||
2091 | 2070 | ||
2092 | /* Continue past cpusets with all cpus online */ | 2071 | WARN_ON_ONCE(!cgroup_lock_is_held()); |
2093 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask)) | ||
2094 | continue; | ||
2095 | 2072 | ||
2096 | /* Remove offline cpus from this cpuset. */ | 2073 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); |
2097 | mutex_lock(&callback_mutex); | 2074 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); |
2098 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | ||
2099 | cpu_active_mask); | ||
2100 | mutex_unlock(&callback_mutex); | ||
2101 | 2075 | ||
2102 | /* Move tasks from the empty cpuset to a parent */ | 2076 | /* remove offline cpus from @cs */ |
2103 | if (cpumask_empty(cp->cpus_allowed)) | 2077 | if (!cpumask_empty(&off_cpus)) { |
2104 | remove_tasks_in_empty_cpuset(cp); | 2078 | mutex_lock(&callback_mutex); |
2105 | else | 2079 | cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); |
2106 | update_tasks_cpumask(cp, NULL); | 2080 | mutex_unlock(&callback_mutex); |
2107 | } | 2081 | update_tasks_cpumask(cs, NULL); |
2108 | break; | 2082 | } |
2109 | |||
2110 | case CPUSET_MEM_OFFLINE: | ||
2111 | while ((cp = cpuset_next(&queue)) != NULL) { | ||
2112 | |||
2113 | /* Continue past cpusets with all mems online */ | ||
2114 | if (nodes_subset(cp->mems_allowed, | ||
2115 | node_states[N_MEMORY])) | ||
2116 | continue; | ||
2117 | |||
2118 | oldmems = cp->mems_allowed; | ||
2119 | |||
2120 | /* Remove offline mems from this cpuset. */ | ||
2121 | mutex_lock(&callback_mutex); | ||
2122 | nodes_and(cp->mems_allowed, cp->mems_allowed, | ||
2123 | node_states[N_MEMORY]); | ||
2124 | mutex_unlock(&callback_mutex); | ||
2125 | 2083 | ||
2126 | /* Move tasks from the empty cpuset to a parent */ | 2084 | /* remove offline mems from @cs */ |
2127 | if (nodes_empty(cp->mems_allowed)) | 2085 | if (!nodes_empty(off_mems)) { |
2128 | remove_tasks_in_empty_cpuset(cp); | 2086 | tmp_mems = cs->mems_allowed; |
2129 | else | 2087 | mutex_lock(&callback_mutex); |
2130 | update_tasks_nodemask(cp, &oldmems, NULL); | 2088 | nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); |
2131 | } | 2089 | mutex_unlock(&callback_mutex); |
2090 | update_tasks_nodemask(cs, &tmp_mems, NULL); | ||
2132 | } | 2091 | } |
2092 | |||
2093 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
2094 | remove_tasks_in_empty_cpuset(cs); | ||
2133 | } | 2095 | } |
2134 | 2096 | ||
2135 | /* | 2097 | /** |
2136 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | 2098 | * cpuset_handle_hotplug - handle CPU/memory hot[un]plug |
2137 | * period. This is necessary in order to make cpusets transparent | ||
2138 | * (of no affect) on systems that are actively using CPU hotplug | ||
2139 | * but making no active use of cpusets. | ||
2140 | * | ||
2141 | * The only exception to this is suspend/resume, where we don't | ||
2142 | * modify cpusets at all. | ||
2143 | * | 2099 | * |
2144 | * This routine ensures that top_cpuset.cpus_allowed tracks | 2100 | * This function is called after either CPU or memory configuration has |
2145 | * cpu_active_mask on each CPU hotplug (cpuhp) event. | 2101 | * changed and updates cpuset accordingly. The top_cpuset is always |
2102 | * synchronized to cpu_active_mask and N_MEMORY, which is necessary in | ||
2103 | * order to make cpusets transparent (of no affect) on systems that are | ||
2104 | * actively using CPU hotplug but making no active use of cpusets. | ||
2146 | * | 2105 | * |
2147 | * Called within get_online_cpus(). Needs to call cgroup_lock() | 2106 | * Non-root cpusets are only affected by offlining. If any CPUs or memory |
2148 | * before calling generate_sched_domains(). | 2107 | * nodes have been taken down, cpuset_propagate_hotplug() is invoked on all |
2108 | * descendants. | ||
2149 | * | 2109 | * |
2150 | * @cpu_online: Indicates whether this is a CPU online event (true) or | 2110 | * Note that CPU offlining during suspend is ignored. We don't modify |
2151 | * a CPU offline event (false). | 2111 | * cpusets across suspend/resume cycles at all. |
2152 | */ | 2112 | */ |
2153 | void cpuset_update_active_cpus(bool cpu_online) | 2113 | static void cpuset_handle_hotplug(void) |
2154 | { | 2114 | { |
2155 | struct sched_domain_attr *attr; | 2115 | static cpumask_t new_cpus, tmp_cpus; |
2156 | cpumask_var_t *doms; | 2116 | static nodemask_t new_mems, tmp_mems; |
2157 | int ndoms; | 2117 | bool cpus_updated, mems_updated; |
2118 | bool cpus_offlined, mems_offlined; | ||
2158 | 2119 | ||
2159 | cgroup_lock(); | 2120 | cgroup_lock(); |
2160 | mutex_lock(&callback_mutex); | ||
2161 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | ||
2162 | mutex_unlock(&callback_mutex); | ||
2163 | 2121 | ||
2164 | if (!cpu_online) | 2122 | /* fetch the available cpus/mems and find out which changed how */ |
2165 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); | 2123 | cpumask_copy(&new_cpus, cpu_active_mask); |
2124 | new_mems = node_states[N_MEMORY]; | ||
2125 | |||
2126 | cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); | ||
2127 | cpus_offlined = cpumask_andnot(&tmp_cpus, top_cpuset.cpus_allowed, | ||
2128 | &new_cpus); | ||
2129 | |||
2130 | mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); | ||
2131 | nodes_andnot(tmp_mems, top_cpuset.mems_allowed, new_mems); | ||
2132 | mems_offlined = !nodes_empty(tmp_mems); | ||
2133 | |||
2134 | /* synchronize cpus_allowed to cpu_active_mask */ | ||
2135 | if (cpus_updated) { | ||
2136 | mutex_lock(&callback_mutex); | ||
2137 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | ||
2138 | mutex_unlock(&callback_mutex); | ||
2139 | /* we don't mess with cpumasks of tasks in top_cpuset */ | ||
2140 | } | ||
2141 | |||
2142 | /* synchronize mems_allowed to N_MEMORY */ | ||
2143 | if (mems_updated) { | ||
2144 | tmp_mems = top_cpuset.mems_allowed; | ||
2145 | mutex_lock(&callback_mutex); | ||
2146 | top_cpuset.mems_allowed = new_mems; | ||
2147 | mutex_unlock(&callback_mutex); | ||
2148 | update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); | ||
2149 | } | ||
2150 | |||
2151 | /* if cpus or mems went down, we need to propagate to descendants */ | ||
2152 | if (cpus_offlined || mems_offlined) { | ||
2153 | struct cpuset *cs; | ||
2154 | LIST_HEAD(queue); | ||
2155 | |||
2156 | list_add_tail(&top_cpuset.stack_list, &queue); | ||
2157 | while ((cs = cpuset_next(&queue))) | ||
2158 | if (cs != &top_cpuset) | ||
2159 | cpuset_propagate_hotplug(cs); | ||
2160 | } | ||
2166 | 2161 | ||
2167 | ndoms = generate_sched_domains(&doms, &attr); | ||
2168 | cgroup_unlock(); | 2162 | cgroup_unlock(); |
2169 | 2163 | ||
2170 | /* Have scheduler rebuild the domains */ | 2164 | /* rebuild sched domains if cpus_allowed has changed */ |
2171 | partition_sched_domains(ndoms, doms, attr); | 2165 | if (cpus_updated) { |
2166 | struct sched_domain_attr *attr; | ||
2167 | cpumask_var_t *doms; | ||
2168 | int ndoms; | ||
2169 | |||
2170 | cgroup_lock(); | ||
2171 | ndoms = generate_sched_domains(&doms, &attr); | ||
2172 | cgroup_unlock(); | ||
2173 | |||
2174 | partition_sched_domains(ndoms, doms, attr); | ||
2175 | } | ||
2176 | } | ||
2177 | |||
2178 | void cpuset_update_active_cpus(bool cpu_online) | ||
2179 | { | ||
2180 | cpuset_handle_hotplug(); | ||
2172 | } | 2181 | } |
2173 | 2182 | ||
2174 | #ifdef CONFIG_MEMORY_HOTPLUG | 2183 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -2180,29 +2189,7 @@ void cpuset_update_active_cpus(bool cpu_online) | |||
2180 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2189 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2181 | unsigned long action, void *arg) | 2190 | unsigned long action, void *arg) |
2182 | { | 2191 | { |
2183 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | 2192 | cpuset_handle_hotplug(); |
2184 | |||
2185 | cgroup_lock(); | ||
2186 | switch (action) { | ||
2187 | case MEM_ONLINE: | ||
2188 | oldmems = top_cpuset.mems_allowed; | ||
2189 | mutex_lock(&callback_mutex); | ||
2190 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | ||
2191 | mutex_unlock(&callback_mutex); | ||
2192 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); | ||
2193 | break; | ||
2194 | case MEM_OFFLINE: | ||
2195 | /* | ||
2196 | * needn't update top_cpuset.mems_allowed explicitly because | ||
2197 | * scan_cpusets_upon_hotplug() will update it. | ||
2198 | */ | ||
2199 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE); | ||
2200 | break; | ||
2201 | default: | ||
2202 | break; | ||
2203 | } | ||
2204 | cgroup_unlock(); | ||
2205 | |||
2206 | return NOTIFY_OK; | 2193 | return NOTIFY_OK; |
2207 | } | 2194 | } |
2208 | #endif | 2195 | #endif |