aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>2013-12-30 06:36:04 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-01-14 21:58:40 -0500
commit68fb18aacb410aff26c24a3d73d27ad496f0a548 (patch)
tree1dcd84a72e812f535d5a7e6ab79c742d4f50ca04
parentd4edc5b6c480a0917e61d93d55531d7efa6230be (diff)
powerpc: Add debug checks to catch invalid cpu-to-node mappings
There have been some weird bugs in the past where the kernel tried to associate threads of the same core to different NUMA nodes, and things went haywire after that point (as expected). But unfortunately, root-causing such issues have been quite challenging, due to the lack of appropriate debug checks in the kernel. These bugs usually lead to some odd soft-lockups in the scheduler's build-sched-domain code in the CPU hotplug path, which makes it very hard to trace it back to the incorrect cpu-to-node mappings. So add appropriate debug checks to catch such invalid cpu-to-node mappings as early as possible. Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/mm/numa.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 6847d509162f..4f50c6a9e68f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -570,16 +570,38 @@ out:
570 return nid; 570 return nid;
571} 571}
572 572
573static void verify_cpu_node_mapping(int cpu, int node)
574{
575 int base, sibling, i;
576
577 /* Verify that all the threads in the core belong to the same node */
578 base = cpu_first_thread_sibling(cpu);
579
580 for (i = 0; i < threads_per_core; i++) {
581 sibling = base + i;
582
583 if (sibling == cpu || cpu_is_offline(sibling))
584 continue;
585
586 if (cpu_to_node(sibling) != node) {
587 WARN(1, "CPU thread siblings %d and %d don't belong"
588 " to the same node!\n", cpu, sibling);
589 break;
590 }
591 }
592}
593
573static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, 594static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
574 void *hcpu) 595 void *hcpu)
575{ 596{
576 unsigned long lcpu = (unsigned long)hcpu; 597 unsigned long lcpu = (unsigned long)hcpu;
577 int ret = NOTIFY_DONE; 598 int ret = NOTIFY_DONE, nid;
578 599
579 switch (action) { 600 switch (action) {
580 case CPU_UP_PREPARE: 601 case CPU_UP_PREPARE:
581 case CPU_UP_PREPARE_FROZEN: 602 case CPU_UP_PREPARE_FROZEN:
582 numa_setup_cpu(lcpu); 603 nid = numa_setup_cpu(lcpu);
604 verify_cpu_node_mapping((int)lcpu, nid);
583 ret = NOTIFY_OK; 605 ret = NOTIFY_OK;
584 break; 606 break;
585#ifdef CONFIG_HOTPLUG_CPU 607#ifdef CONFIG_HOTPLUG_CPU