aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-07-12 03:45:34 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-07-13 00:58:29 -0400
commit1e01979c8f502ac13e3cdece4f38712c5944e6e8 (patch)
treed47c4700bfdcffc3f7f68b19d50c588c20689b48 /arch
parentd0ead157387f19801beb1b419568723b2e9b7c79 (diff)
x86, numa: Implement pfn -> nid mapping granularity check
SPARSEMEM w/o VMEMMAP and DISCONTIGMEM, both used only on 32bit, use sections array to map pfn to nid which is limited in granularity. If NUMA nodes are laid out such that the mapping cannot be accurate, boot will fail triggering BUG_ON() in mminit_verify_page_links(). On 32bit, it's 512MiB w/ PAE and SPARSEMEM. This seems to have been granular enough until commit 2706a0bf7b (x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too). Apparently, there is a machine which aligns NUMA nodes to 128MiB and has only AMD NUMA but not SRAT. This led to the following BUG_ON(). On node 0 totalpages: 2096615 DMA zone: 32 pages used for memmap DMA zone: 0 pages reserved DMA zone: 3927 pages, LIFO batch:0 Normal zone: 1740 pages used for memmap Normal zone: 220978 pages, LIFO batch:31 HighMem zone: 16405 pages used for memmap HighMem zone: 1853533 pages, LIFO batch:31 BUG: Int 6: CR2 (null) EDI (null) ESI 00000002 EBP 00000002 ESP c1543ecc EBX f2400000 EDX 00000006 ECX (null) EAX 00000001 err (null) EIP c16209aa CS 00000060 flg 00010002 Stack: f2400000 00220000 f7200800 c1620613 00220000 01000000 04400000 00238000 (null) f7200000 00000002 f7200b58 f7200800 c1620929 000375fe (null) f7200b80 c16395f0 00200a02 f7200a80 (null) 000375fe 00000002 (null) Pid: 0, comm: swapper Not tainted 2.6.39-rc5-00181-g2706a0b #17 Call Trace: [<c136b1e5>] ? early_fault+0x2e/0x2e [<c16209aa>] ? mminit_verify_page_links+0x12/0x42 [<c1620613>] ? memmap_init_zone+0xaf/0x10c [<c1620929>] ? free_area_init_node+0x2b9/0x2e3 [<c1607e99>] ? free_area_init_nodes+0x3f2/0x451 [<c1601d80>] ? paging_init+0x112/0x118 [<c15f578d>] ? setup_arch+0x791/0x82f [<c15f43d9>] ? start_kernel+0x6a/0x257 This patch implements node_map_pfn_alignment() which determines maximum internode alignment and update numa_register_memblks() to reject NUMA configuration if alignment exceeds the pfn -> nid mapping granularity of the memory model as determined by PAGES_PER_SECTION. This makes the problematic machine boot w/ flatmem by rejecting the NUMA config and provides protection against crazy NUMA configurations. Signed-off-by: Tejun Heo <tj@kernel.org> Link: http://lkml.kernel.org/r/20110712074534.GB2872@htj.dyndns.org LKML-Reference: <20110628174613.GP478@escobedo.osrc.amd.com> Reported-and-Tested-by: Hans Rosenfeld <hans.rosenfeld@amd.com> Cc: Conny Seidel <conny.seidel@amd.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/mm/numa.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index f5510d889a22..fbeaaf416610 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -496,6 +496,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
496 496
497static int __init numa_register_memblks(struct numa_meminfo *mi) 497static int __init numa_register_memblks(struct numa_meminfo *mi)
498{ 498{
499 unsigned long uninitialized_var(pfn_align);
499 int i, nid; 500 int i, nid;
500 501
501 /* Account for nodes with cpus and no memory */ 502 /* Account for nodes with cpus and no memory */
@@ -511,6 +512,20 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
511 512
512 /* for out of order entries */ 513 /* for out of order entries */
513 sort_node_map(); 514 sort_node_map();
515
516 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check
518 * whether its granularity is fine enough.
519 */
520#ifdef NODE_NOT_IN_PAGE_FLAGS
521 pfn_align = node_map_pfn_alignment();
522 if (pfn_align && pfn_align < PAGES_PER_SECTION) {
523 printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
524 PFN_PHYS(pfn_align) >> 20,
525 PFN_PHYS(PAGES_PER_SECTION) >> 20);
526 return -EINVAL;
527 }
528#endif
514 if (!numa_meminfo_cover_memory(mi)) 529 if (!numa_meminfo_cover_memory(mi))
515 return -EINVAL; 530 return -EINVAL;
516 531