diff options
author | Dmitry Adamushko <dmitry.adamushko@gmail.com> | 2008-07-12 20:10:29 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-13 05:37:02 -0400 |
commit | 3e84050c81ffb4961ef43d20e1fb1d7607167d83 (patch) | |
tree | 62ae82ba6cc923222ab0053efcb8b2dec9b91ca0 /kernel/cpuset.c | |
parent | b1e387348a2a70954312b102d0589c3e2ca3dba1 (diff) |
cpusets, hotplug, scheduler: fix scheduler domain breakage
Commit f18f982ab ("sched: CPU hotplug events must not destroy scheduler
domains created by the cpusets") introduced a hotplug-related problem as
described below:
Upon CPU_DOWN_PREPARE,
update_sched_domains() -> detach_destroy_domains(&cpu_online_map)
does the following:
/*
* Force a reinitialization of the sched domains hierarchy. The domains
* and groups cannot be updated in place without racing with the balancing
* code, so we temporarily attach all running cpus to the NULL domain
* which will prevent rebalancing while the sched domains are recalculated.
*/
The sched-domains should be rebuilt when a CPU_DOWN ops. has been
completed, effectively either upon CPU_DEAD{_FROZEN} (upon success) or
CPU_DOWN_FAILED{_FROZEN} (upon failure -- restore the things to their
initial state). That's what update_sched_domains() also does but only
for !CPUSETS case.
With f18f982ab, sched-domains' reinitialization is delegated to
CPUSETS code:
cpuset_handle_cpuhp() -> common_cpu_mem_hotplug_unplug() ->
rebuild_sched_domains()
Being called for CPU_UP_PREPARE and if its callback is called after
update_sched_domains()), it just negates all the work done by
update_sched_domains() -- i.e. a soon-to-be-offline cpu is included in
the sched-domains and that makes it visible for the load-balancer
while the CPU_DOWN ops. is in progress.
__migrate_live_tasks() moves the tasks off a 'dead' cpu (it's already
"offline" when this function is called).
try_to_wake_up() is called for one of these tasks from another CPU ->
the load-balancer (wake_idle()) picks up a "dead" CPU and places the
task on it. Then e.g. BUG_ON(rq->nr_running) detects this a bit later
-> oops.
Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Tested-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Paul Menage <menage@google.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: miaox@cn.fujitsu.com
Cc: rostedt@goodmis.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989c..798b3ab054eb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1882,7 +1882,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
1882 | * in order to minimize text size. | 1882 | * in order to minimize text size. |
1883 | */ | 1883 | */ |
1884 | 1884 | ||
1885 | static void common_cpu_mem_hotplug_unplug(void) | 1885 | static void common_cpu_mem_hotplug_unplug(int rebuild_sd) |
1886 | { | 1886 | { |
1887 | cgroup_lock(); | 1887 | cgroup_lock(); |
1888 | 1888 | ||
@@ -1894,7 +1894,8 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
1894 | * Scheduler destroys domains on hotplug events. | 1894 | * Scheduler destroys domains on hotplug events. |
1895 | * Rebuild them based on the current settings. | 1895 | * Rebuild them based on the current settings. |
1896 | */ | 1896 | */ |
1897 | rebuild_sched_domains(); | 1897 | if (rebuild_sd) |
1898 | rebuild_sched_domains(); | ||
1898 | 1899 | ||
1899 | cgroup_unlock(); | 1900 | cgroup_unlock(); |
1900 | } | 1901 | } |
@@ -1912,11 +1913,22 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
1912 | static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, | 1913 | static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, |
1913 | unsigned long phase, void *unused_cpu) | 1914 | unsigned long phase, void *unused_cpu) |
1914 | { | 1915 | { |
1915 | if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) | 1916 | switch (phase) { |
1917 | case CPU_UP_CANCELED: | ||
1918 | case CPU_UP_CANCELED_FROZEN: | ||
1919 | case CPU_DOWN_FAILED: | ||
1920 | case CPU_DOWN_FAILED_FROZEN: | ||
1921 | case CPU_ONLINE: | ||
1922 | case CPU_ONLINE_FROZEN: | ||
1923 | case CPU_DEAD: | ||
1924 | case CPU_DEAD_FROZEN: | ||
1925 | common_cpu_mem_hotplug_unplug(1); | ||
1926 | break; | ||
1927 | default: | ||
1916 | return NOTIFY_DONE; | 1928 | return NOTIFY_DONE; |
1929 | } | ||
1917 | 1930 | ||
1918 | common_cpu_mem_hotplug_unplug(); | 1931 | return NOTIFY_OK; |
1919 | return 0; | ||
1920 | } | 1932 | } |
1921 | 1933 | ||
1922 | #ifdef CONFIG_MEMORY_HOTPLUG | 1934 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -1929,7 +1941,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, | |||
1929 | 1941 | ||
1930 | void cpuset_track_online_nodes(void) | 1942 | void cpuset_track_online_nodes(void) |
1931 | { | 1943 | { |
1932 | common_cpu_mem_hotplug_unplug(); | 1944 | common_cpu_mem_hotplug_unplug(0); |
1933 | } | 1945 | } |
1934 | #endif | 1946 | #endif |
1935 | 1947 | ||