diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3d448e646a4a..658eb1a32084 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -260,6 +260,13 @@ static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | |||
260 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | 260 | static DEFINE_SPINLOCK(cpuset_buffer_lock); |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * CPU / memory hotplug is handled asynchronously. | ||
264 | */ | ||
265 | static void cpuset_hotplug_workfn(struct work_struct *work); | ||
266 | |||
267 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); | ||
268 | |||
269 | /* | ||
263 | * This is ugly, but preserves the userspace API for existing cpuset | 270 | * This is ugly, but preserves the userspace API for existing cpuset |
264 | * users. If someone tries to mount the "cpuset" filesystem, we | 271 | * users. If someone tries to mount the "cpuset" filesystem, we |
265 | * silently switch it to mount "cgroup" instead | 272 | * silently switch it to mount "cgroup" instead |
@@ -1565,6 +1572,19 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1565 | struct cpuset *cs = cgroup_cs(cgrp); | 1572 | struct cpuset *cs = cgroup_cs(cgrp); |
1566 | struct cpuset *trialcs; | 1573 | struct cpuset *trialcs; |
1567 | 1574 | ||
1575 | /* | ||
1576 | * CPU or memory hotunplug may leave @cs w/o any execution | ||
1577 | * resources, in which case the hotplug code asynchronously updates | ||
1578 | * configuration and transfers all tasks to the nearest ancestor | ||
1579 | * which can execute. | ||
1580 | * | ||
1581 | * As writes to "cpus" or "mems" may restore @cs's execution | ||
1582 | * resources, wait for the previously scheduled operations before | ||
1583 | * proceeding, so that we don't end up keep removing tasks added | ||
1584 | * after execution capability is restored. | ||
1585 | */ | ||
1586 | flush_work(&cpuset_hotplug_work); | ||
1587 | |||
1568 | if (!cgroup_lock_live_group(cgrp)) | 1588 | if (!cgroup_lock_live_group(cgrp)) |
1569 | return -ENODEV; | 1589 | return -ENODEV; |
1570 | 1590 | ||
@@ -2095,7 +2115,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs) | |||
2095 | } | 2115 | } |
2096 | 2116 | ||
2097 | /** | 2117 | /** |
2098 | * cpuset_handle_hotplug - handle CPU/memory hot[un]plug | 2118 | * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset |
2099 | * | 2119 | * |
2100 | * This function is called after either CPU or memory configuration has | 2120 | * This function is called after either CPU or memory configuration has |
2101 | * changed and updates cpuset accordingly. The top_cpuset is always | 2121 | * changed and updates cpuset accordingly. The top_cpuset is always |
@@ -2110,7 +2130,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs) | |||
2110 | * Note that CPU offlining during suspend is ignored. We don't modify | 2130 | * Note that CPU offlining during suspend is ignored. We don't modify |
2111 | * cpusets across suspend/resume cycles at all. | 2131 | * cpusets across suspend/resume cycles at all. |
2112 | */ | 2132 | */ |
2113 | static void cpuset_handle_hotplug(void) | 2133 | static void cpuset_hotplug_workfn(struct work_struct *work) |
2114 | { | 2134 | { |
2115 | static cpumask_t new_cpus, tmp_cpus; | 2135 | static cpumask_t new_cpus, tmp_cpus; |
2116 | static nodemask_t new_mems, tmp_mems; | 2136 | static nodemask_t new_mems, tmp_mems; |
@@ -2177,7 +2197,18 @@ static void cpuset_handle_hotplug(void) | |||
2177 | 2197 | ||
2178 | void cpuset_update_active_cpus(bool cpu_online) | 2198 | void cpuset_update_active_cpus(bool cpu_online) |
2179 | { | 2199 | { |
2180 | cpuset_handle_hotplug(); | 2200 | /* |
2201 | * We're inside cpu hotplug critical region which usually nests | ||
2202 | * inside cgroup synchronization. Bounce actual hotplug processing | ||
2203 | * to a work item to avoid reverse locking order. | ||
2204 | * | ||
2205 | * We still need to do partition_sched_domains() synchronously; | ||
2206 | * otherwise, the scheduler will get confused and put tasks to the | ||
2207 | * dead CPU. Fall back to the default single domain. | ||
2208 | * cpuset_hotplug_workfn() will rebuild it as necessary. | ||
2209 | */ | ||
2210 | partition_sched_domains(1, NULL, NULL); | ||
2211 | schedule_work(&cpuset_hotplug_work); | ||
2181 | } | 2212 | } |
2182 | 2213 | ||
2183 | #ifdef CONFIG_MEMORY_HOTPLUG | 2214 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -2189,7 +2220,7 @@ void cpuset_update_active_cpus(bool cpu_online) | |||
2189 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2220 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2190 | unsigned long action, void *arg) | 2221 | unsigned long action, void *arg) |
2191 | { | 2222 | { |
2192 | cpuset_handle_hotplug(); | 2223 | schedule_work(&cpuset_hotplug_work); |
2193 | return NOTIFY_OK; | 2224 | return NOTIFY_OK; |
2194 | } | 2225 | } |
2195 | #endif | 2226 | #endif |