diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-03-25 13:14:35 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:19:55 -0400 |
commit | 6ec6e0d9f2fd7cb6ca6bc3bfab5ae7b5cdd8c36f (patch) | |
tree | bf05991fd8ecf8acd76fc48f5613ddc7bcb6926f /arch/x86 | |
parent | 8705a49c35be088a50b8d5fc5e1aa24d6711fd5b (diff) |
srat, x86: add support for nodes spanning other nodes
For example, If the physical address layout on a two node system with 8 GB
memory is something like:
node 0: 0-2GB, 4-6GB
node 1: 2-4GB, 6-8GB
Current kernels fail to boot/detect this NUMA topology.
ACPI SRAT tables can expose such a topology which needs to be supported.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 32 |
4 files changed, 42 insertions, 17 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a59dbb28248..07cf77113565 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA | |||
903 | help | 903 | help |
904 | Enable ACPI SRAT based node topology detection. | 904 | Enable ACPI SRAT based node topology detection. |
905 | 905 | ||
906 | # Some NUMA nodes have memory ranges that span | ||
907 | # other nodes. Even though a pfn is valid and | ||
908 | # between a node's start and end pfns, it may not | ||
909 | # reside on that node. See memmap_init_zone() | ||
910 | # for details. | ||
911 | config NODES_SPAN_OTHER_NODES | ||
912 | def_bool y | ||
913 | depends on X86_64_ACPI_NUMA | ||
914 | |||
906 | config NUMA_EMU | 915 | config NUMA_EMU |
907 | bool "NUMA emulation" | 916 | bool "NUMA emulation" |
908 | depends on X86_64 && NUMA | 917 | depends on X86_64 && NUMA |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 7a2ebce87df5..86808e666f9c 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
164 | if (!found) | 164 | if (!found) |
165 | return -1; | 165 | return -1; |
166 | 166 | ||
167 | memnode_shift = compute_hash_shift(nodes, 8); | 167 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
168 | if (memnode_shift < 0) { | 168 | if (memnode_shift < 0) { |
169 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 169 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); |
170 | return -1; | 170 | return -1; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2ea56f48f29b..cb3170186355 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size; | |||
60 | * -1 if node overlap or lost ram (shift too big) | 60 | * -1 if node overlap or lost ram (shift too big) |
61 | */ | 61 | */ |
62 | static int __init populate_memnodemap(const struct bootnode *nodes, | 62 | static int __init populate_memnodemap(const struct bootnode *nodes, |
63 | int numnodes, int shift) | 63 | int numnodes, int shift, int *nodeids) |
64 | { | 64 | { |
65 | unsigned long addr, end; | 65 | unsigned long addr, end; |
66 | int i, res = -1; | 66 | int i, res = -1; |
@@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes, | |||
76 | do { | 76 | do { |
77 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) | 77 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) |
78 | return -1; | 78 | return -1; |
79 | memnodemap[addr >> shift] = i; | 79 | |
80 | if (!nodeids) | ||
81 | memnodemap[addr >> shift] = i; | ||
82 | else | ||
83 | memnodemap[addr >> shift] = nodeids[i]; | ||
84 | |||
80 | addr += (1UL << shift); | 85 | addr += (1UL << shift); |
81 | } while (addr < end); | 86 | } while (addr < end); |
82 | res = 1; | 87 | res = 1; |
@@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, | |||
139 | return i; | 144 | return i; |
140 | } | 145 | } |
141 | 146 | ||
142 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | 147 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes, |
148 | int *nodeids) | ||
143 | { | 149 | { |
144 | int shift; | 150 | int shift; |
145 | 151 | ||
@@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | |||
149 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 155 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
150 | shift); | 156 | shift); |
151 | 157 | ||
152 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | 158 | if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { |
153 | printk(KERN_INFO "Your memory is not aligned you need to " | 159 | printk(KERN_INFO "Your memory is not aligned you need to " |
154 | "rebuild your kernel with a bigger NODEMAPSIZE " | 160 | "rebuild your kernel with a bigger NODEMAPSIZE " |
155 | "shift=%d\n", shift); | 161 | "shift=%d\n", shift); |
@@ -462,7 +468,7 @@ done: | |||
462 | } | 468 | } |
463 | } | 469 | } |
464 | out: | 470 | out: |
465 | memnode_shift = compute_hash_shift(nodes, num_nodes); | 471 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
466 | if (memnode_shift < 0) { | 472 | if (memnode_shift < 0) { |
467 | memnode_shift = 0; | 473 | memnode_shift = 0; |
468 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " | 474 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1bae9c855ceb..fb43d89f46f3 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES]; | |||
32 | static int found_add_area __initdata; | 32 | static int found_add_area __initdata; |
33 | int hotadd_percent __initdata = 0; | 33 | int hotadd_percent __initdata = 0; |
34 | 34 | ||
35 | static int num_node_memblks __initdata; | ||
36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | ||
37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | ||
38 | |||
35 | /* Too small nodes confuse the VM badly. Usually they result | 39 | /* Too small nodes confuse the VM badly. Usually they result |
36 | from BIOS bugs. */ | 40 | from BIOS bugs. */ |
37 | #define NODE_MIN_SIZE (4*1024*1024) | 41 | #define NODE_MIN_SIZE (4*1024*1024) |
@@ -41,17 +45,17 @@ static __init int setup_node(int pxm) | |||
41 | return acpi_map_pxm_to_node(pxm); | 45 | return acpi_map_pxm_to_node(pxm); |
42 | } | 46 | } |
43 | 47 | ||
44 | static __init int conflicting_nodes(unsigned long start, unsigned long end) | 48 | static __init int conflicting_memblks(unsigned long start, unsigned long end) |
45 | { | 49 | { |
46 | int i; | 50 | int i; |
47 | for_each_node_mask(i, nodes_parsed) { | 51 | for (i = 0; i < num_node_memblks; i++) { |
48 | struct bootnode *nd = &nodes[i]; | 52 | struct bootnode *nd = &node_memblk_range[i]; |
49 | if (nd->start == nd->end) | 53 | if (nd->start == nd->end) |
50 | continue; | 54 | continue; |
51 | if (nd->end > start && nd->start < end) | 55 | if (nd->end > start && nd->start < end) |
52 | return i; | 56 | return memblk_nodeid[i]; |
53 | if (nd->end == end && nd->start == start) | 57 | if (nd->end == end && nd->start == start) |
54 | return i; | 58 | return memblk_nodeid[i]; |
55 | } | 59 | } |
56 | return -1; | 60 | return -1; |
57 | } | 61 | } |
@@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
258 | bad_srat(); | 262 | bad_srat(); |
259 | return; | 263 | return; |
260 | } | 264 | } |
261 | i = conflicting_nodes(start, end); | 265 | i = conflicting_memblks(start, end); |
262 | if (i == node) { | 266 | if (i == node) { |
263 | printk(KERN_WARNING | 267 | printk(KERN_WARNING |
264 | "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", | 268 | "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", |
@@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
283 | nd->end = end; | 287 | nd->end = end; |
284 | } | 288 | } |
285 | 289 | ||
286 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | 290 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
287 | nd->start, nd->end); | 291 | start, end); |
288 | e820_register_active_regions(node, nd->start >> PAGE_SHIFT, | 292 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
289 | nd->end >> PAGE_SHIFT); | 293 | end >> PAGE_SHIFT); |
290 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | 294 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, |
291 | nd->end >> PAGE_SHIFT); | 295 | nd->end >> PAGE_SHIFT); |
292 | 296 | ||
@@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
298 | if ((nd->start | nd->end) == 0) | 302 | if ((nd->start | nd->end) == 0) |
299 | node_clear(node, nodes_parsed); | 303 | node_clear(node, nodes_parsed); |
300 | } | 304 | } |
305 | |||
306 | node_memblk_range[num_node_memblks].start = start; | ||
307 | node_memblk_range[num_node_memblks].end = end; | ||
308 | memblk_nodeid[num_node_memblks] = node; | ||
309 | num_node_memblks++; | ||
301 | } | 310 | } |
302 | 311 | ||
303 | /* Sanity check to catch more bad SRATs (they are amazingly common). | 312 | /* Sanity check to catch more bad SRATs (they are amazingly common). |
@@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
368 | return -1; | 377 | return -1; |
369 | } | 378 | } |
370 | 379 | ||
371 | memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); | 380 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
381 | memblk_nodeid); | ||
372 | if (memnode_shift < 0) { | 382 | if (memnode_shift < 0) { |
373 | printk(KERN_ERR | 383 | printk(KERN_ERR |
374 | "SRAT: No NUMA node hash function found. Contact maintainer\n"); | 384 | "SRAT: No NUMA node hash function found. Contact maintainer\n"); |