diff options
author | Tejun Heo <tj@kernel.org> | 2011-01-23 08:37:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-28 08:54:09 -0500 |
commit | bbc9e2f452d9c4b166d1f9a78d941d80173312fe (patch) | |
tree | d75d41187b296235f833e942ed8c1dd938a7bae4 /arch/x86/mm | |
parent | 89e5dc218e084e13a3996db6693b01478912f4ee (diff) |
x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
The mapping between cpu/apicid and node is done via
apicid_to_node[] on 64bit and apicid_2_node[] +
apic->x86_32_numa_cpu_node() on 32bit. This difference makes it
difficult to further unify 32 and 64bit NUMA handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed
by two accessors - set_apicid_to_node() and numa_cpu_node(). On
64bit, numa_cpu_node() always consults __apicid_to_node[]
directly while 32bit goes through apic->numa_cpu_node() method
to allow apic implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken
NUMA configuration which assumes relationship between APIC ID,
HT node ID and NUMA topology. Leave it to access
__apicid_to_node[] directly as mapping through CPU might result
in undesirable behavior change. The comment is reformatted and
updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: eric.dumazet@gmail.com
Cc: yinghai@kernel.org
Cc: brgerst@gmail.com
Cc: gorcunov@gmail.com
Cc: shaohui.zheng@intel.com
Cc: rientjes@google.com
LKML-Reference: <1295789862-25482-14-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: David Rientjes <rientjes@google.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/amdtopology_64.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 12 |
6 files changed, 31 insertions, 25 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index f21962c435e..c7fae38c408 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) | |||
247 | __acpi_map_pxm_to_node(nid, i); | 247 | __acpi_map_pxm_to_node(nid, i); |
248 | #endif | 248 | #endif |
249 | } | 249 | } |
250 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | 250 | memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); |
251 | } | 251 | } |
252 | #endif /* CONFIG_NUMA_EMU */ | 252 | #endif /* CONFIG_NUMA_EMU */ |
253 | 253 | ||
@@ -285,7 +285,7 @@ int __init amd_scan_nodes(void) | |||
285 | nodes[i].start >> PAGE_SHIFT, | 285 | nodes[i].start >> PAGE_SHIFT, |
286 | nodes[i].end >> PAGE_SHIFT); | 286 | nodes[i].end >> PAGE_SHIFT); |
287 | for (j = apicid_base; j < cores + apicid_base; j++) | 287 | for (j = apicid_base; j < cores + apicid_base; j++) |
288 | apicid_to_node[(i << bits) + j] = i; | 288 | set_apicid_to_node((i << bits) + j, i); |
289 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 289 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
290 | } | 290 | } |
291 | 291 | ||
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a3..480b3571c8b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -26,8 +26,12 @@ static __init int numa_setup(char *opt) | |||
26 | early_param("numa", numa_setup); | 26 | early_param("numa", numa_setup); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Which logical CPUs are on which nodes | 29 | * apicid, cpu, node mappings |
30 | */ | 30 | */ |
31 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | ||
32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
33 | }; | ||
34 | |||
31 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
32 | EXPORT_SYMBOL(node_to_cpumask_map); | 36 | EXPORT_SYMBOL(node_to_cpumask_map); |
33 | 37 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 84a3e4c9f27..8d91d227be0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | |||
110 | 110 | ||
111 | static unsigned long kva_start_pfn; | 111 | static unsigned long kva_start_pfn; |
112 | static unsigned long kva_pages; | 112 | static unsigned long kva_pages; |
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | ||
115 | { | ||
116 | return apic->x86_32_numa_cpu_node(cpu); | ||
117 | } | ||
118 | |||
113 | /* | 119 | /* |
114 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 120 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
115 | * a single node with all available processors in it with a flat | 121 | * a single node with all available processors in it with a flat |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 95ea1551eeb..1e1026f61a5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data); | |||
26 | 26 | ||
27 | struct memnode memnode; | 27 | struct memnode memnode; |
28 | 28 | ||
29 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | ||
30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
31 | }; | ||
32 | |||
33 | static unsigned long __initdata nodemap_addr; | 29 | static unsigned long __initdata nodemap_addr; |
34 | static unsigned long __initdata nodemap_size; | 30 | static unsigned long __initdata nodemap_size; |
35 | 31 | ||
@@ -716,12 +712,8 @@ void __init init_cpu_to_node(void) | |||
716 | BUG_ON(cpu_to_apicid == NULL); | 712 | BUG_ON(cpu_to_apicid == NULL); |
717 | 713 | ||
718 | for_each_possible_cpu(cpu) { | 714 | for_each_possible_cpu(cpu) { |
719 | int node; | 715 | int node = numa_cpu_node(cpu); |
720 | u16 apicid = cpu_to_apicid[cpu]; | ||
721 | 716 | ||
722 | if (apicid == BAD_APICID) | ||
723 | continue; | ||
724 | node = apicid_to_node[apicid]; | ||
725 | if (node == NUMA_NO_NODE) | 717 | if (node == NUMA_NO_NODE) |
726 | continue; | 718 | continue; |
727 | if (!node_online(node)) | 719 | if (!node_online(node)) |
@@ -731,6 +723,14 @@ void __init init_cpu_to_node(void) | |||
731 | } | 723 | } |
732 | #endif | 724 | #endif |
733 | 725 | ||
726 | int __cpuinit numa_cpu_node(int cpu) | ||
727 | { | ||
728 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
729 | |||
730 | if (apicid != BAD_APICID) | ||
731 | return __apicid_to_node[apicid]; | ||
732 | return NUMA_NO_NODE; | ||
733 | } | ||
734 | 734 | ||
735 | void __cpuinit numa_set_node(int cpu, int node) | 735 | void __cpuinit numa_set_node(int cpu, int node) |
736 | { | 736 | { |
@@ -776,13 +776,9 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
776 | void __cpuinit numa_add_cpu(int cpu) | 776 | void __cpuinit numa_add_cpu(int cpu) |
777 | { | 777 | { |
778 | unsigned long addr; | 778 | unsigned long addr; |
779 | u16 apicid; | 779 | int physnid, nid; |
780 | int physnid; | ||
781 | int nid = NUMA_NO_NODE; | ||
782 | 780 | ||
783 | apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | 781 | nid = numa_cpu_node(cpu); |
784 | if (apicid != BAD_APICID) | ||
785 | nid = apicid_to_node[apicid]; | ||
786 | if (nid == NUMA_NO_NODE) | 782 | if (nid == NUMA_NO_NODE) |
787 | nid = early_cpu_to_node(cpu); | 783 | nid = early_cpu_to_node(cpu); |
788 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); | 784 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6027a481000..48651c6f657 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void) | |||
255 | num_memory_chunks); | 255 | num_memory_chunks); |
256 | 256 | ||
257 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 257 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
258 | apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); | 258 | set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); |
259 | 259 | ||
260 | for (j = 0; j < num_memory_chunks; j++){ | 260 | for (j = 0; j < num_memory_chunks; j++){ |
261 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; | 261 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 603d285d1da..9a97261a241 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -79,7 +79,7 @@ static __init void bad_srat(void) | |||
79 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 79 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
80 | acpi_numa = -1; | 80 | acpi_numa = -1; |
81 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 81 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
82 | apicid_to_node[i] = NUMA_NO_NODE; | 82 | set_apicid_to_node(i, NUMA_NO_NODE); |
83 | for (i = 0; i < MAX_NUMNODES; i++) { | 83 | for (i = 0; i < MAX_NUMNODES; i++) { |
84 | nodes[i].start = nodes[i].end = 0; | 84 | nodes[i].start = nodes[i].end = 0; |
85 | nodes_add[i].start = nodes_add[i].end = 0; | 85 | nodes_add[i].start = nodes_add[i].end = 0; |
@@ -138,7 +138,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | 138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); |
139 | return; | 139 | return; |
140 | } | 140 | } |
141 | apicid_to_node[apic_id] = node; | 141 | set_apicid_to_node(apic_id, node); |
142 | node_set(node, cpu_nodes_parsed); | 142 | node_set(node, cpu_nodes_parsed); |
143 | acpi_numa = 1; | 143 | acpi_numa = 1; |
144 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", | 144 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", |
@@ -178,7 +178,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
178 | return; | 178 | return; |
179 | } | 179 | } |
180 | 180 | ||
181 | apicid_to_node[apic_id] = node; | 181 | set_apicid_to_node(apic_id, node); |
182 | node_set(node, cpu_nodes_parsed); | 182 | node_set(node, cpu_nodes_parsed); |
183 | acpi_numa = 1; | 183 | acpi_numa = 1; |
184 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", | 184 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", |
@@ -521,7 +521,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
521 | * node, it must now point to the fake node ID. | 521 | * node, it must now point to the fake node ID. |
522 | */ | 522 | */ |
523 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 523 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
524 | if (apicid_to_node[j] == nid && | 524 | if (__apicid_to_node[j] == nid && |
525 | fake_apicid_to_node[j] == NUMA_NO_NODE) | 525 | fake_apicid_to_node[j] == NUMA_NO_NODE) |
526 | fake_apicid_to_node[j] = i; | 526 | fake_apicid_to_node[j] = i; |
527 | } | 527 | } |
@@ -532,13 +532,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
532 | * value. | 532 | * value. |
533 | */ | 533 | */ |
534 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 534 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
535 | if (apicid_to_node[i] != NUMA_NO_NODE && | 535 | if (__apicid_to_node[i] != NUMA_NO_NODE && |
536 | fake_apicid_to_node[i] == NUMA_NO_NODE) | 536 | fake_apicid_to_node[i] == NUMA_NO_NODE) |
537 | fake_apicid_to_node[i] = 0; | 537 | fake_apicid_to_node[i] = 0; |
538 | 538 | ||
539 | for (i = 0; i < num_nodes; i++) | 539 | for (i = 0; i < num_nodes; i++) |
540 | __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); | 540 | __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); |
541 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | 541 | memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node)); |
542 | 542 | ||
543 | nodes_clear(nodes_parsed); | 543 | nodes_clear(nodes_parsed); |
544 | for (i = 0; i < num_nodes; i++) | 544 | for (i = 0; i < num_nodes; i++) |