aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/cpuset.c221
1 files changed, 104 insertions, 117 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c5edc6b3eb28..3d448e646a4a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -148,12 +148,6 @@ typedef enum {
148 CS_SPREAD_SLAB, 148 CS_SPREAD_SLAB,
149} cpuset_flagbits_t; 149} cpuset_flagbits_t;
150 150
151/* the type of hotplug event */
152enum hotplug_event {
153 CPUSET_CPU_OFFLINE,
154 CPUSET_MEM_OFFLINE,
155};
156
157/* convenient tests for these bits */ 151/* convenient tests for these bits */
158static inline bool is_cpuset_online(const struct cpuset *cs) 152static inline bool is_cpuset_online(const struct cpuset *cs)
159{ 153{
@@ -2059,116 +2053,131 @@ static struct cpuset *cpuset_next(struct list_head *queue)
2059 return cp; 2053 return cp;
2060} 2054}
2061 2055
2062 2056/**
2063/* 2057 * cpuset_propagate_hotplug - propagate CPU/memory hotplug to a cpuset
2064 * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory 2058 * @cs: cpuset in interest
2065 * online/offline) and update the cpusets accordingly.
2066 * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such
2067 * cpuset must be moved to a parent cpuset.
2068 *
2069 * Called with cgroup_mutex held. We take callback_mutex to modify
2070 * cpus_allowed and mems_allowed.
2071 * 2059 *
2072 * This walk processes the tree from top to bottom, completing one layer 2060 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
2073 * before dropping down to the next. It always processes a node before 2061 * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
2074 * any of its children. 2062 * all its tasks are moved to the nearest ancestor with both resources.
2075 * 2063 *
2076 * In the case of memory hot-unplug, it will remove nodes from N_MEMORY 2064 * Should be called with cgroup_mutex held.
2077 * if all present pages from a node are offlined.
2078 */ 2065 */
2079static void 2066static void cpuset_propagate_hotplug(struct cpuset *cs)
2080scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
2081{ 2067{
2082 LIST_HEAD(queue); 2068 static cpumask_t off_cpus;
2083 struct cpuset *cp; /* scans cpusets being updated */ 2069 static nodemask_t off_mems, tmp_mems;
2084 static nodemask_t oldmems; /* protected by cgroup_mutex */
2085
2086 list_add_tail((struct list_head *)&root->stack_list, &queue);
2087
2088 switch (event) {
2089 case CPUSET_CPU_OFFLINE:
2090 while ((cp = cpuset_next(&queue)) != NULL) {
2091 2070
2092 /* Continue past cpusets with all cpus online */ 2071 WARN_ON_ONCE(!cgroup_lock_is_held());
2093 if (cpumask_subset(cp->cpus_allowed, cpu_active_mask))
2094 continue;
2095 2072
2096 /* Remove offline cpus from this cpuset. */ 2073 cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed);
2097 mutex_lock(&callback_mutex); 2074 nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed);
2098 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2099 cpu_active_mask);
2100 mutex_unlock(&callback_mutex);
2101 2075
2102 /* Move tasks from the empty cpuset to a parent */ 2076 /* remove offline cpus from @cs */
2103 if (cpumask_empty(cp->cpus_allowed)) 2077 if (!cpumask_empty(&off_cpus)) {
2104 remove_tasks_in_empty_cpuset(cp); 2078 mutex_lock(&callback_mutex);
2105 else 2079 cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus);
2106 update_tasks_cpumask(cp, NULL); 2080 mutex_unlock(&callback_mutex);
2107 } 2081 update_tasks_cpumask(cs, NULL);
2108 break; 2082 }
2109
2110 case CPUSET_MEM_OFFLINE:
2111 while ((cp = cpuset_next(&queue)) != NULL) {
2112
2113 /* Continue past cpusets with all mems online */
2114 if (nodes_subset(cp->mems_allowed,
2115 node_states[N_MEMORY]))
2116 continue;
2117
2118 oldmems = cp->mems_allowed;
2119
2120 /* Remove offline mems from this cpuset. */
2121 mutex_lock(&callback_mutex);
2122 nodes_and(cp->mems_allowed, cp->mems_allowed,
2123 node_states[N_MEMORY]);
2124 mutex_unlock(&callback_mutex);
2125 2083
2126 /* Move tasks from the empty cpuset to a parent */ 2084 /* remove offline mems from @cs */
2127 if (nodes_empty(cp->mems_allowed)) 2085 if (!nodes_empty(off_mems)) {
2128 remove_tasks_in_empty_cpuset(cp); 2086 tmp_mems = cs->mems_allowed;
2129 else 2087 mutex_lock(&callback_mutex);
2130 update_tasks_nodemask(cp, &oldmems, NULL); 2088 nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
2131 } 2089 mutex_unlock(&callback_mutex);
2090 update_tasks_nodemask(cs, &tmp_mems, NULL);
2132 } 2091 }
2092
2093 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
2094 remove_tasks_in_empty_cpuset(cs);
2133} 2095}
2134 2096
2135/* 2097/**
2136 * The top_cpuset tracks what CPUs and Memory Nodes are online, 2098 * cpuset_handle_hotplug - handle CPU/memory hot[un]plug
2137 * period. This is necessary in order to make cpusets transparent
2138 * (of no affect) on systems that are actively using CPU hotplug
2139 * but making no active use of cpusets.
2140 *
2141 * The only exception to this is suspend/resume, where we don't
2142 * modify cpusets at all.
2143 * 2099 *
2144 * This routine ensures that top_cpuset.cpus_allowed tracks 2100 * This function is called after either CPU or memory configuration has
2145 * cpu_active_mask on each CPU hotplug (cpuhp) event. 2101 * changed and updates cpuset accordingly. The top_cpuset is always
2102 * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
2103 * order to make cpusets transparent (of no affect) on systems that are
2104 * actively using CPU hotplug but making no active use of cpusets.
2146 * 2105 *
2147 * Called within get_online_cpus(). Needs to call cgroup_lock() 2106 * Non-root cpusets are only affected by offlining. If any CPUs or memory
2148 * before calling generate_sched_domains(). 2107 * nodes have been taken down, cpuset_propagate_hotplug() is invoked on all
2108 * descendants.
2149 * 2109 *
2150 * @cpu_online: Indicates whether this is a CPU online event (true) or 2110 * Note that CPU offlining during suspend is ignored. We don't modify
2151 * a CPU offline event (false). 2111 * cpusets across suspend/resume cycles at all.
2152 */ 2112 */
2153void cpuset_update_active_cpus(bool cpu_online) 2113static void cpuset_handle_hotplug(void)
2154{ 2114{
2155 struct sched_domain_attr *attr; 2115 static cpumask_t new_cpus, tmp_cpus;
2156 cpumask_var_t *doms; 2116 static nodemask_t new_mems, tmp_mems;
2157 int ndoms; 2117 bool cpus_updated, mems_updated;
2118 bool cpus_offlined, mems_offlined;
2158 2119
2159 cgroup_lock(); 2120 cgroup_lock();
2160 mutex_lock(&callback_mutex);
2161 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2162 mutex_unlock(&callback_mutex);
2163 2121
2164 if (!cpu_online) 2122 /* fetch the available cpus/mems and find out which changed how */
2165 scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); 2123 cpumask_copy(&new_cpus, cpu_active_mask);
2124 new_mems = node_states[N_MEMORY];
2125
2126 cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus);
2127 cpus_offlined = cpumask_andnot(&tmp_cpus, top_cpuset.cpus_allowed,
2128 &new_cpus);
2129
2130 mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems);
2131 nodes_andnot(tmp_mems, top_cpuset.mems_allowed, new_mems);
2132 mems_offlined = !nodes_empty(tmp_mems);
2133
2134 /* synchronize cpus_allowed to cpu_active_mask */
2135 if (cpus_updated) {
2136 mutex_lock(&callback_mutex);
2137 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
2138 mutex_unlock(&callback_mutex);
2139 /* we don't mess with cpumasks of tasks in top_cpuset */
2140 }
2141
2142 /* synchronize mems_allowed to N_MEMORY */
2143 if (mems_updated) {
2144 tmp_mems = top_cpuset.mems_allowed;
2145 mutex_lock(&callback_mutex);
2146 top_cpuset.mems_allowed = new_mems;
2147 mutex_unlock(&callback_mutex);
2148 update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL);
2149 }
2150
2151 /* if cpus or mems went down, we need to propagate to descendants */
2152 if (cpus_offlined || mems_offlined) {
2153 struct cpuset *cs;
2154 LIST_HEAD(queue);
2155
2156 list_add_tail(&top_cpuset.stack_list, &queue);
2157 while ((cs = cpuset_next(&queue)))
2158 if (cs != &top_cpuset)
2159 cpuset_propagate_hotplug(cs);
2160 }
2166 2161
2167 ndoms = generate_sched_domains(&doms, &attr);
2168 cgroup_unlock(); 2162 cgroup_unlock();
2169 2163
2170 /* Have scheduler rebuild the domains */ 2164 /* rebuild sched domains if cpus_allowed has changed */
2171 partition_sched_domains(ndoms, doms, attr); 2165 if (cpus_updated) {
2166 struct sched_domain_attr *attr;
2167 cpumask_var_t *doms;
2168 int ndoms;
2169
2170 cgroup_lock();
2171 ndoms = generate_sched_domains(&doms, &attr);
2172 cgroup_unlock();
2173
2174 partition_sched_domains(ndoms, doms, attr);
2175 }
2176}
2177
2178void cpuset_update_active_cpus(bool cpu_online)
2179{
2180 cpuset_handle_hotplug();
2172} 2181}
2173 2182
2174#ifdef CONFIG_MEMORY_HOTPLUG 2183#ifdef CONFIG_MEMORY_HOTPLUG
@@ -2180,29 +2189,7 @@ void cpuset_update_active_cpus(bool cpu_online)
2180static int cpuset_track_online_nodes(struct notifier_block *self, 2189static int cpuset_track_online_nodes(struct notifier_block *self,
2181 unsigned long action, void *arg) 2190 unsigned long action, void *arg)
2182{ 2191{
2183 static nodemask_t oldmems; /* protected by cgroup_mutex */ 2192 cpuset_handle_hotplug();
2184
2185 cgroup_lock();
2186 switch (action) {
2187 case MEM_ONLINE:
2188 oldmems = top_cpuset.mems_allowed;
2189 mutex_lock(&callback_mutex);
2190 top_cpuset.mems_allowed = node_states[N_MEMORY];
2191 mutex_unlock(&callback_mutex);
2192 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2193 break;
2194 case MEM_OFFLINE:
2195 /*
2196 * needn't update top_cpuset.mems_allowed explicitly because
2197 * scan_cpusets_upon_hotplug() will update it.
2198 */
2199 scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE);
2200 break;
2201 default:
2202 break;
2203 }
2204 cgroup_unlock();
2205
2206 return NOTIFY_OK; 2193 return NOTIFY_OK;
2207} 2194}
2208#endif 2195#endif