From 37340746a66e5e7feed5945f28cb75d90a8fd9f6 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 5 Jun 2008 22:46:32 -0700 Subject: cpusets: fix bug when adding nonexistent cpu or mem Adding a nonexistent cpu to a cpuset will be omitted quietly. It should return -EINVAL. Example: (real_nr_cpus <= 4 < NR_CPUS or cpu#4 was just offline) # cat cpus 0-1 # /bin/echo 4 > cpus # /bin/echo $? 0 # cat cpus # The same occurs when add a nonexistent mem. This patch will fix this bug. And when *buf == "", the check is unneeded. Signed-off-by: Lai Jiangshan Acked-by: Paul Jackson Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 86ea9e34e326..039baa4cd90c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -797,8 +797,10 @@ static int update_cpumask(struct cpuset *cs, char *buf) retval = cpulist_parse(buf, trialcs.cpus_allowed); if (retval < 0) return retval; + + if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) + return -EINVAL; } - cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); retval = validate_change(cs, &trialcs); if (retval < 0) return retval; @@ -932,9 +934,11 @@ static int update_nodemask(struct cpuset *cs, char *buf) retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) goto done; + + if (!nodes_subset(trialcs.mems_allowed, + node_states[N_HIGH_MEMORY])) + return -EINVAL; } - nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, - node_states[N_HIGH_MEMORY]); oldmem = cs->mems_allowed; if (nodes_equal(oldmem, trialcs.mems_allowed)) { retval = 0; /* Too easy - nothing to do */ -- cgit v1.2.2 From f18f982abf183e91f435990d337164c7a43d1e6d Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Thu, 29 May 2008 11:17:01 -0700 Subject: sched: CPU hotplug events must not destroy scheduler domains created by the cpusets First issue is not related to the cpusets. We're simply leaking doms_cur. It's allocated in arch_init_sched_domains() which is called for every hotplug event. So we just keep reallocation doms_cur without freeing it. I introduced free_sched_domains() function that cleans things up. Second issue is that sched domains created by the cpusets are completely destroyed by the CPU hotplug events. For all CPU hotplug events scheduler attaches all CPUs to the NULL domain and then puts them all into the single domain thereby destroying domains created by the cpusets (partition_sched_domains). The solution is simple, when cpusets are enabled scheduler should not create default domain and instead let cpusets do that. Which is exactly what the patch does. Signed-off-by: Max Krasnyansky Cc: pj@sgi.com Cc: menage@google.com Cc: rostedt@goodmis.org Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- kernel/cpuset.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 039baa4cd90c..bceb89557973 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1890,6 +1890,12 @@ static void common_cpu_mem_hotplug_unplug(void) top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; scan_for_empty_cpusets(&top_cpuset); + /* + * Scheduler destroys domains on hotplug events. + * Rebuild them based on the current settings. + */ + rebuild_sched_domains(); + cgroup_unlock(); } -- cgit v1.2.2 From 30e0e178193d4221abc9926b07a4c7661c7cc4a9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 13 May 2008 10:27:17 +0800 Subject: cpuset: limit the input of cpuset.sched_relax_domain_level We allow the inputs to be [-1 ... SD_LV_MAX), and return -EINVAL for inputs outside this range. Signed-off-by: Li Zefan Acked-by: Paul Menage Acked-by: Paul Jackson Acked-by: Hidetoshi Seto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/cpuset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 039baa4cd90c..66103a119bfe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1037,8 +1037,8 @@ int current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { - if ((int)val < 0) - val = -1; + if (val < -1 || val >= SD_LV_MAX) + return -EINVAL; if (val != cs->relax_domain_level) { cs->relax_domain_level = val; -- cgit v1.2.2 From 3e84050c81ffb4961ef43d20e1fb1d7607167d83 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Sun, 13 Jul 2008 02:10:29 +0200 Subject: cpusets, hotplug, scheduler: fix scheduler domain breakage Commit f18f982ab ("sched: CPU hotplug events must not destroy scheduler domains created by the cpusets") introduced a hotplug-related problem as described below: Upon CPU_DOWN_PREPARE, update_sched_domains() -> detach_destroy_domains(&cpu_online_map) does the following: /* * Force a reinitialization of the sched domains hierarchy. The domains * and groups cannot be updated in place without racing with the balancing * code, so we temporarily attach all running cpus to the NULL domain * which will prevent rebalancing while the sched domains are recalculated. */ The sched-domains should be rebuilt when a CPU_DOWN ops. has been completed, effectively either upon CPU_DEAD{_FROZEN} (upon success) or CPU_DOWN_FAILED{_FROZEN} (upon failure -- restore the things to their initial state). That's what update_sched_domains() also does but only for !CPUSETS case. With f18f982ab, sched-domains' reinitialization is delegated to CPUSETS code: cpuset_handle_cpuhp() -> common_cpu_mem_hotplug_unplug() -> rebuild_sched_domains() Being called for CPU_UP_PREPARE and if its callback is called after update_sched_domains()), it just negates all the work done by update_sched_domains() -- i.e. a soon-to-be-offline cpu is included in the sched-domains and that makes it visible for the load-balancer while the CPU_DOWN ops. is in progress. __migrate_live_tasks() moves the tasks off a 'dead' cpu (it's already "offline" when this function is called). try_to_wake_up() is called for one of these tasks from another CPU -> the load-balancer (wake_idle()) picks up a "dead" CPU and places the task on it. Then e.g. BUG_ON(rq->nr_running) detects this a bit later -> oops. Signed-off-by: Dmitry Adamushko Tested-by: Vegard Nossum Cc: Paul Menage Cc: Max Krasnyansky Cc: Paul Jackson Cc: Peter Zijlstra Cc: miaox@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- kernel/cpuset.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989c..798b3ab054eb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1882,7 +1882,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) * in order to minimize text size. */ -static void common_cpu_mem_hotplug_unplug(void) +static void common_cpu_mem_hotplug_unplug(int rebuild_sd) { cgroup_lock(); @@ -1894,7 +1894,8 @@ static void common_cpu_mem_hotplug_unplug(void) * Scheduler destroys domains on hotplug events. * Rebuild them based on the current settings. */ - rebuild_sched_domains(); + if (rebuild_sd) + rebuild_sched_domains(); cgroup_unlock(); } @@ -1912,11 +1913,22 @@ static void common_cpu_mem_hotplug_unplug(void) static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { - if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) + switch (phase) { + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + common_cpu_mem_hotplug_unplug(1); + break; + default: return NOTIFY_DONE; + } - common_cpu_mem_hotplug_unplug(); - return 0; + return NOTIFY_OK; } #ifdef CONFIG_MEMORY_HOTPLUG @@ -1929,7 +1941,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, void cpuset_track_online_nodes(void) { - common_cpu_mem_hotplug_unplug(); + common_cpu_mem_hotplug_unplug(0); } #endif -- cgit v1.2.2