aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorMilton Miller <miltonm@bga.com>2007-10-24 12:23:48 -0400
committerIngo Molnar <mingo@elte.hu>2007-10-24 12:23:48 -0400
commit7378547f2c83ca16a30d0a7c488a43a688ea0888 (patch)
tree6565a913ab6e649683fc49f094c35a9176434636 /kernel/sched.c
parentc9927c2bf4f45bb85e8b502ab3fb79ad6483c244 (diff)
sched: fix sched_domain sysctl registration again
commit 029190c515f15f512ac85de8fc686d4dbd0ae731 (cpuset sched_load_balance flag) was not tested SCHED_DEBUG enabled as committed as it dereferences NULL when used and it reordered the sysctl registration to cause it to never show any domains or their tunables. Fixes: 1) restore arch_init_sched_domains ordering we can't walk the domains before we build them presently we register cpus with empty directories (no domain directories or files). 2) make unregister_sched_domain_sysctl do nothing when already unregistered detach_destroy_domains is now called one set of cpus at a time unregister_syctl dereferences NULL if called with a null. While the the function would always dereference null if called twice, in the previous code it was always called once and then was followed a register. So only the hidden bug of the sysctl_root_table not being allocated followed by an attempt to free it would have shown the error. 3) always call unregister and register in partition_sched_domains The code is "smart" about unregistering only needed domains. Since we aren't guaranteed any calls to unregister, always unregister. Without calling register on the way out we will not have a table or any sysctl tree. 4) warn if register is called without unregistering The previous table memory is lost, leaving pointers to the later freed memory in sysctl and leaking the memory of the tables. Before this patch on a 2-core 4-thread box compiled for SMT and NUMA, the domains appear empty (there are actually 3 levels per cpu). And as soon as two domains a null pointer is dereferenced (unreliable in this case is stack garbage): bu19a:~# ls -R /proc/sys/kernel/sched_domain/ /proc/sys/kernel/sched_domain/: cpu0 cpu1 cpu2 cpu3 /proc/sys/kernel/sched_domain/cpu0: /proc/sys/kernel/sched_domain/cpu1: /proc/sys/kernel/sched_domain/cpu2: /proc/sys/kernel/sched_domain/cpu3: bu19a:~# mkdir /dev/cpuset bu19a:~# mount -tcpuset cpuset /dev/cpuset/ bu19a:~# cd /dev/cpuset/ bu19a:/dev/cpuset# echo 0 > sched_load_balance bu19a:/dev/cpuset# mkdir one bu19a:/dev/cpuset# echo 1 > one/cpus bu19a:/dev/cpuset# echo 0 > one/sched_load_balance Unable to handle kernel paging request for data at address 0x00000018 Faulting instruction address: 0xc00000000006b608 NIP: c00000000006b608 LR: c00000000006b604 CTR: 0000000000000000 REGS: c000000018d973f0 TRAP: 0300 Not tainted (2.6.23-bml) MSR: 9000000000009032 <EE,ME,IR,DR> CR: 28242442 XER: 00000000 DAR: 0000000000000018, DSISR: 0000000040000000 TASK = c00000001912e340[1987] 'bash' THREAD: c000000018d94000 CPU: 2 .. NIP [c00000000006b608] .unregister_sysctl_table+0x38/0x110 LR [c00000000006b604] .unregister_sysctl_table+0x34/0x110 Call Trace: [c000000018d97670] [c000000007017270] 0xc000000007017270 (unreliable) [c000000018d97720] [c000000000058710] .detach_destroy_domains+0x30/0xb0 [c000000018d977b0] [c00000000005cf1c] .partition_sched_domains+0x1bc/0x230 [c000000018d97870] [c00000000009fdc4] .rebuild_sched_domains+0xb4/0x4c0 [c000000018d97970] [c0000000000a02e8] .update_flag+0x118/0x170 [c000000018d97a80] [c0000000000a1768] .cpuset_common_file_write+0x568/0x820 [c000000018d97c00] [c00000000009d95c] .cgroup_file_write+0x7c/0x180 [c000000018d97cf0] [c0000000000e76b8] .vfs_write+0xe8/0x1b0 [c000000018d97d90] [c0000000000e810c] .sys_write+0x4c/0x90 [c000000018d97e30] [c00000000000852c] syscall_exit+0x0/0x40 Signed-off-by: Milton Miller <miltonm@bga.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c25
1 files changed, 20 insertions, 5 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 2810e562a991..e51f0eabfef2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5461,11 +5461,12 @@ static void register_sched_domain_sysctl(void)
5461 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); 5461 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
5462 char buf[32]; 5462 char buf[32];
5463 5463
5464 WARN_ON(sd_ctl_dir[0].child);
5465 sd_ctl_dir[0].child = entry;
5466
5464 if (entry == NULL) 5467 if (entry == NULL)
5465 return; 5468 return;
5466 5469
5467 sd_ctl_dir[0].child = entry;
5468
5469 for_each_online_cpu(i) { 5470 for_each_online_cpu(i) {
5470 snprintf(buf, 32, "cpu%d", i); 5471 snprintf(buf, 32, "cpu%d", i);
5471 entry->procname = kstrdup(buf, GFP_KERNEL); 5472 entry->procname = kstrdup(buf, GFP_KERNEL);
@@ -5473,14 +5474,19 @@ static void register_sched_domain_sysctl(void)
5473 entry->child = sd_alloc_ctl_cpu_table(i); 5474 entry->child = sd_alloc_ctl_cpu_table(i);
5474 entry++; 5475 entry++;
5475 } 5476 }
5477
5478 WARN_ON(sd_sysctl_header);
5476 sd_sysctl_header = register_sysctl_table(sd_ctl_root); 5479 sd_sysctl_header = register_sysctl_table(sd_ctl_root);
5477} 5480}
5478 5481
5482/* may be called multiple times per register */
5479static void unregister_sched_domain_sysctl(void) 5483static void unregister_sched_domain_sysctl(void)
5480{ 5484{
5481 unregister_sysctl_table(sd_sysctl_header); 5485 if (sd_sysctl_header)
5486 unregister_sysctl_table(sd_sysctl_header);
5482 sd_sysctl_header = NULL; 5487 sd_sysctl_header = NULL;
5483 sd_free_ctl_entry(&sd_ctl_dir[0].child); 5488 if (sd_ctl_dir[0].child)
5489 sd_free_ctl_entry(&sd_ctl_dir[0].child);
5484} 5490}
5485#else 5491#else
5486static void register_sched_domain_sysctl(void) 5492static void register_sched_domain_sysctl(void)
@@ -6424,13 +6430,17 @@ static cpumask_t fallback_doms;
6424 */ 6430 */
6425static int arch_init_sched_domains(const cpumask_t *cpu_map) 6431static int arch_init_sched_domains(const cpumask_t *cpu_map)
6426{ 6432{
6433 int err;
6434
6427 ndoms_cur = 1; 6435 ndoms_cur = 1;
6428 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 6436 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
6429 if (!doms_cur) 6437 if (!doms_cur)
6430 doms_cur = &fallback_doms; 6438 doms_cur = &fallback_doms;
6431 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); 6439 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
6440 err = build_sched_domains(doms_cur);
6432 register_sched_domain_sysctl(); 6441 register_sched_domain_sysctl();
6433 return build_sched_domains(doms_cur); 6442
6443 return err;
6434} 6444}
6435 6445
6436static void arch_destroy_sched_domains(const cpumask_t *cpu_map) 6446static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6479,6 +6489,9 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
6479{ 6489{
6480 int i, j; 6490 int i, j;
6481 6491
6492 /* always unregister in case we don't destroy any domains */
6493 unregister_sched_domain_sysctl();
6494
6482 if (doms_new == NULL) { 6495 if (doms_new == NULL) {
6483 ndoms_new = 1; 6496 ndoms_new = 1;
6484 doms_new = &fallback_doms; 6497 doms_new = &fallback_doms;
@@ -6514,6 +6527,8 @@ match2:
6514 kfree(doms_cur); 6527 kfree(doms_cur);
6515 doms_cur = doms_new; 6528 doms_cur = doms_new;
6516 ndoms_cur = ndoms_new; 6529 ndoms_cur = ndoms_new;
6530
6531 register_sched_domain_sysctl();
6517} 6532}
6518 6533
6519#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 6534#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)