aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-03-25 13:14:35 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:19:55 -0400
commit6ec6e0d9f2fd7cb6ca6bc3bfab5ae7b5cdd8c36f (patch)
treebf05991fd8ecf8acd76fc48f5613ddc7bcb6926f
parent8705a49c35be088a50b8d5fc5e1aa24d6711fd5b (diff)
srat, x86: add support for nodes spanning other nodes
For example, If the physical address layout on a two node system with 8 GB memory is something like: node 0: 0-2GB, 4-6GB node 1: 2-4GB, 6-8GB Current kernels fail to boot/detect this NUMA topology. ACPI SRAT tables can expose such a topology which needs to be supported. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/mm/k8topology_64.c2
-rw-r--r--arch/x86/mm/numa_64.c16
-rw-r--r--arch/x86/mm/srat_64.c32
-rw-r--r--include/asm-x86/numa_64.h3
5 files changed, 44 insertions, 18 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2a59dbb28248..07cf77113565 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA
903 help 903 help
904 Enable ACPI SRAT based node topology detection. 904 Enable ACPI SRAT based node topology detection.
905 905
906# Some NUMA nodes have memory ranges that span
907# other nodes. Even though a pfn is valid and
908# between a node's start and end pfns, it may not
909# reside on that node. See memmap_init_zone()
910# for details.
911config NODES_SPAN_OTHER_NODES
912 def_bool y
913 depends on X86_64_ACPI_NUMA
914
906config NUMA_EMU 915config NUMA_EMU
907 bool "NUMA emulation" 916 bool "NUMA emulation"
908 depends on X86_64 && NUMA 917 depends on X86_64 && NUMA
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 7a2ebce87df5..86808e666f9c 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
164 if (!found) 164 if (!found)
165 return -1; 165 return -1;
166 166
167 memnode_shift = compute_hash_shift(nodes, 8); 167 memnode_shift = compute_hash_shift(nodes, 8, NULL);
168 if (memnode_shift < 0) { 168 if (memnode_shift < 0) {
169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
170 return -1; 170 return -1;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2ea56f48f29b..cb3170186355 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size;
60 * -1 if node overlap or lost ram (shift too big) 60 * -1 if node overlap or lost ram (shift too big)
61 */ 61 */
62static int __init populate_memnodemap(const struct bootnode *nodes, 62static int __init populate_memnodemap(const struct bootnode *nodes,
63 int numnodes, int shift) 63 int numnodes, int shift, int *nodeids)
64{ 64{
65 unsigned long addr, end; 65 unsigned long addr, end;
66 int i, res = -1; 66 int i, res = -1;
@@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
76 do { 76 do {
77 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 77 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
78 return -1; 78 return -1;
79 memnodemap[addr >> shift] = i; 79
80 if (!nodeids)
81 memnodemap[addr >> shift] = i;
82 else
83 memnodemap[addr >> shift] = nodeids[i];
84
80 addr += (1UL << shift); 85 addr += (1UL << shift);
81 } while (addr < end); 86 } while (addr < end);
82 res = 1; 87 res = 1;
@@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
139 return i; 144 return i;
140} 145}
141 146
142int __init compute_hash_shift(struct bootnode *nodes, int numnodes) 147int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
148 int *nodeids)
143{ 149{
144 int shift; 150 int shift;
145 151
@@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
149 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 155 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
150 shift); 156 shift);
151 157
152 if (populate_memnodemap(nodes, numnodes, shift) != 1) { 158 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
153 printk(KERN_INFO "Your memory is not aligned you need to " 159 printk(KERN_INFO "Your memory is not aligned you need to "
154 "rebuild your kernel with a bigger NODEMAPSIZE " 160 "rebuild your kernel with a bigger NODEMAPSIZE "
155 "shift=%d\n", shift); 161 "shift=%d\n", shift);
@@ -462,7 +468,7 @@ done:
462 } 468 }
463 } 469 }
464out: 470out:
465 memnode_shift = compute_hash_shift(nodes, num_nodes); 471 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
466 if (memnode_shift < 0) { 472 if (memnode_shift < 0) {
467 memnode_shift = 0; 473 memnode_shift = 0;
468 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 474 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 1bae9c855ceb..fb43d89f46f3 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
32static int found_add_area __initdata; 32static int found_add_area __initdata;
33int hotadd_percent __initdata = 0; 33int hotadd_percent __initdata = 0;
34 34
35static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
35/* Too small nodes confuse the VM badly. Usually they result 39/* Too small nodes confuse the VM badly. Usually they result
36 from BIOS bugs. */ 40 from BIOS bugs. */
37#define NODE_MIN_SIZE (4*1024*1024) 41#define NODE_MIN_SIZE (4*1024*1024)
@@ -41,17 +45,17 @@ static __init int setup_node(int pxm)
41 return acpi_map_pxm_to_node(pxm); 45 return acpi_map_pxm_to_node(pxm);
42} 46}
43 47
44static __init int conflicting_nodes(unsigned long start, unsigned long end) 48static __init int conflicting_memblks(unsigned long start, unsigned long end)
45{ 49{
46 int i; 50 int i;
47 for_each_node_mask(i, nodes_parsed) { 51 for (i = 0; i < num_node_memblks; i++) {
48 struct bootnode *nd = &nodes[i]; 52 struct bootnode *nd = &node_memblk_range[i];
49 if (nd->start == nd->end) 53 if (nd->start == nd->end)
50 continue; 54 continue;
51 if (nd->end > start && nd->start < end) 55 if (nd->end > start && nd->start < end)
52 return i; 56 return memblk_nodeid[i];
53 if (nd->end == end && nd->start == start) 57 if (nd->end == end && nd->start == start)
54 return i; 58 return memblk_nodeid[i];
55 } 59 }
56 return -1; 60 return -1;
57} 61}
@@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
258 bad_srat(); 262 bad_srat();
259 return; 263 return;
260 } 264 }
261 i = conflicting_nodes(start, end); 265 i = conflicting_memblks(start, end);
262 if (i == node) { 266 if (i == node) {
263 printk(KERN_WARNING 267 printk(KERN_WARNING
264 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", 268 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
@@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
283 nd->end = end; 287 nd->end = end;
284 } 288 }
285 289
286 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 290 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
287 nd->start, nd->end); 291 start, end);
288 e820_register_active_regions(node, nd->start >> PAGE_SHIFT, 292 e820_register_active_regions(node, start >> PAGE_SHIFT,
289 nd->end >> PAGE_SHIFT); 293 end >> PAGE_SHIFT);
290 push_node_boundaries(node, nd->start >> PAGE_SHIFT, 294 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
291 nd->end >> PAGE_SHIFT); 295 nd->end >> PAGE_SHIFT);
292 296
@@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
298 if ((nd->start | nd->end) == 0) 302 if ((nd->start | nd->end) == 0)
299 node_clear(node, nodes_parsed); 303 node_clear(node, nodes_parsed);
300 } 304 }
305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
301} 310}
302 311
303/* Sanity check to catch more bad SRATs (they are amazingly common). 312/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
368 return -1; 377 return -1;
369 } 378 }
370 379
371 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); 380 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
381 memblk_nodeid);
372 if (memnode_shift < 0) { 382 if (memnode_shift < 0) {
373 printk(KERN_ERR 383 printk(KERN_ERR
374 "SRAT: No NUMA node hash function found. Contact maintainer\n"); 384 "SRAT: No NUMA node hash function found. Contact maintainer\n");
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 32c22ae0709f..22e87c9f6a80 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -9,7 +9,8 @@ struct bootnode {
9 u64 end; 9 u64 end;
10}; 10};
11 11
12extern int compute_hash_shift(struct bootnode *nodes, int numnodes); 12extern int compute_hash_shift(struct bootnode *nodes, int numblks,
13 int *nodeids);
13 14
14#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
15 16