aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2007-10-19 02:40:20 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:41 -0400
commit029190c515f15f512ac85de8fc686d4dbd0ae731 (patch)
treea946f9223d17e945141fef81f94a75b38e2cc6ef /kernel/sched.c
parent2f2a3a46fcafa7a12d61454f67f932dfe7d84c60 (diff)
cpuset sched_load_balance flag
Add a new per-cpuset flag called 'sched_load_balance'. When enabled in a cpuset (the default value) it tells the kernel scheduler that the scheduler should provide the normal load balancing on the CPUs in that cpuset, sometimes moving tasks from one CPU to a second CPU if the second CPU is less loaded and if that task is allowed to run there. When disabled (write "0" to the file) then it tells the kernel scheduler that load balancing is not required for the CPUs in that cpuset. Now even if this flag is disabled for some cpuset, the kernel may still have to load balance some or all the CPUs in that cpuset, if some overlapping cpuset has its sched_load_balance flag enabled. If there are some CPUs that are not in any cpuset whose sched_load_balance flag is enabled, the kernel scheduler will not load balance tasks to those CPUs. Moreover the kernel will partition the 'sched domains' (non-overlapping sets of CPUs over which load balancing is attempted) into the finest granularity partition that it can find, while still keeping any two CPUs that are in the same shed_load_balance enabled cpuset in the same element of the partition. This serves two purposes: 1) It provides a mechanism for real time isolation of some CPUs, and 2) it can be used to improve performance on systems with many CPUs by supporting configurations in which load balancing is not done across all CPUs at once, but rather only done in several smaller disjoint sets of CPUs. This mechanism replaces the earlier overloading of the per-cpuset flag 'cpu_exclusive', which overloading was removed in an earlier patch: cpuset-remove-sched-domain-hooks-from-cpusets See further the Documentation and comments in the code itself. [akpm@linux-foundation.org: don't be weird] Signed-off-by: Paul Jackson <pj@sgi.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c95
1 files changed, 81 insertions, 14 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 5d5e107ebc4e..39d6354af489 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6376,26 +6376,31 @@ error:
6376 return -ENOMEM; 6376 return -ENOMEM;
6377#endif 6377#endif
6378} 6378}
6379
6380static cpumask_t *doms_cur; /* current sched domains */
6381static int ndoms_cur; /* number of sched domains in 'doms_cur' */
6382
6383/*
6384 * Special case: If a kmalloc of a doms_cur partition (array of
6385 * cpumask_t) fails, then fallback to a single sched domain,
6386 * as determined by the single cpumask_t fallback_doms.
6387 */
6388static cpumask_t fallback_doms;
6389
6379/* 6390/*
6380 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 6391 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
6392 * For now this just excludes isolated cpus, but could be used to
6393 * exclude other special cases in the future.
6381 */ 6394 */
6382static int arch_init_sched_domains(const cpumask_t *cpu_map) 6395static int arch_init_sched_domains(const cpumask_t *cpu_map)
6383{ 6396{
6384 cpumask_t cpu_default_map; 6397 ndoms_cur = 1;
6385 int err; 6398 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
6386 6399 if (!doms_cur)
6387 /* 6400 doms_cur = &fallback_doms;
6388 * Setup mask for cpus without special case scheduling requirements. 6401 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
6389 * For now this just excludes isolated cpus, but could be used to
6390 * exclude other special cases in the future.
6391 */
6392 cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
6393
6394 err = build_sched_domains(&cpu_default_map);
6395
6396 register_sched_domain_sysctl(); 6402 register_sched_domain_sysctl();
6397 6403 return build_sched_domains(doms_cur);
6398 return err;
6399} 6404}
6400 6405
6401static void arch_destroy_sched_domains(const cpumask_t *cpu_map) 6406static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6419,6 +6424,68 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
6419 arch_destroy_sched_domains(cpu_map); 6424 arch_destroy_sched_domains(cpu_map);
6420} 6425}
6421 6426
6427/*
6428 * Partition sched domains as specified by the 'ndoms_new'
6429 * cpumasks in the array doms_new[] of cpumasks. This compares
6430 * doms_new[] to the current sched domain partitioning, doms_cur[].
6431 * It destroys each deleted domain and builds each new domain.
6432 *
6433 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
6434 * The masks don't intersect (don't overlap.) We should setup one
6435 * sched domain for each mask. CPUs not in any of the cpumasks will
6436 * not be load balanced. If the same cpumask appears both in the
6437 * current 'doms_cur' domains and in the new 'doms_new', we can leave
6438 * it as it is.
6439 *
6440 * The passed in 'doms_new' should be kmalloc'd. This routine takes
6441 * ownership of it and will kfree it when done with it. If the caller
6442 * failed the kmalloc call, then it can pass in doms_new == NULL,
6443 * and partition_sched_domains() will fallback to the single partition
6444 * 'fallback_doms'.
6445 *
6446 * Call with hotplug lock held
6447 */
6448void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
6449{
6450 int i, j;
6451
6452 if (doms_new == NULL) {
6453 ndoms_new = 1;
6454 doms_new = &fallback_doms;
6455 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
6456 }
6457
6458 /* Destroy deleted domains */
6459 for (i = 0; i < ndoms_cur; i++) {
6460 for (j = 0; j < ndoms_new; j++) {
6461 if (cpus_equal(doms_cur[i], doms_new[j]))
6462 goto match1;
6463 }
6464 /* no match - a current sched domain not in new doms_new[] */
6465 detach_destroy_domains(doms_cur + i);
6466match1:
6467 ;
6468 }
6469
6470 /* Build new domains */
6471 for (i = 0; i < ndoms_new; i++) {
6472 for (j = 0; j < ndoms_cur; j++) {
6473 if (cpus_equal(doms_new[i], doms_cur[j]))
6474 goto match2;
6475 }
6476 /* no match - add a new doms_new */
6477 build_sched_domains(doms_new + i);
6478match2:
6479 ;
6480 }
6481
6482 /* Remember the new sched domains */
6483 if (doms_cur != &fallback_doms)
6484 kfree(doms_cur);
6485 doms_cur = doms_new;
6486 ndoms_cur = ndoms_new;
6487}
6488
6422#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 6489#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
6423static int arch_reinit_sched_domains(void) 6490static int arch_reinit_sched_domains(void)
6424{ 6491{