aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLuiz Capitulino <lcapitulino@redhat.com>2014-08-22 16:27:36 -0400
committerIngo Molnar <mingo@kernel.org>2014-09-16 02:55:10 -0400
commit8b375f64dcf45ba5cfb36398b69b877dc35410fa (patch)
treeb5665a940292929fc3e21d4d93dbba9d532460e0 /arch/x86/mm
parent9661d5bcd058fe15b4138a00d96bd36516134543 (diff)
x86/mm/numa: Drop dead code and rename setup_node_data() to setup_alloc_data()
The setup_node_data() function allocates a pg_data_t object, inserts it into the node_data[] array and initializes the following fields: node_id, node_start_pfn and node_spanned_pages. However, a few function calls later during the kernel boot, free_area_init_node() re-initializes those fields, possibly with setup_node_data() is not used. This causes a small glitch when running Linux as a hyperv numa guest: SRAT: PXM 0 -> APIC 0x00 -> Node 0 SRAT: PXM 0 -> APIC 0x01 -> Node 0 SRAT: PXM 1 -> APIC 0x02 -> Node 1 SRAT: PXM 1 -> APIC 0x03 -> Node 1 SRAT: Node 0 PXM 0 [mem 0x00000000-0x7fffffff] SRAT: Node 1 PXM 1 [mem 0x80200000-0xf7ffffff] SRAT: Node 1 PXM 1 [mem 0x100000000-0x1081fffff] NUMA: Node 1 [mem 0x80200000-0xf7ffffff] + [mem 0x100000000-0x1081fffff] -> [mem 0x80200000-0x1081fffff] Initmem setup node 0 [mem 0x00000000-0x7fffffff] NODE_DATA [mem 0x7ffdc000-0x7ffeffff] Initmem setup node 1 [mem 0x80800000-0x1081fffff] NODE_DATA [mem 0x1081ea000-0x1081fdfff] crashkernel: memory value expected [ffffea0000000000-ffffea0001ffffff] PMD -> [ffff88007de00000-ffff88007fdfffff] on node 0 [ffffea0002000000-ffffea00043fffff] PMD -> [ffff880105600000-ffff8801077fffff] on node 1 Zone ranges: DMA [mem 0x00001000-0x00ffffff] DMA32 [mem 0x01000000-0xffffffff] Normal [mem 0x100000000-0x1081fffff] Movable zone start for each node Early memory node ranges node 0: [mem 0x00001000-0x0009efff] node 0: [mem 0x00100000-0x7ffeffff] node 1: [mem 0x80200000-0xf7ffffff] node 1: [mem 0x100000000-0x1081fffff] On node 0 totalpages: 524174 DMA zone: 64 pages used for memmap DMA zone: 21 pages reserved DMA zone: 3998 pages, LIFO batch:0 DMA32 zone: 8128 pages used for memmap DMA32 zone: 520176 pages, LIFO batch:31 On node 1 totalpages: 524288 DMA32 zone: 7672 pages used for memmap DMA32 zone: 491008 pages, LIFO batch:31 Normal zone: 520 pages used for memmap Normal zone: 33280 pages, LIFO batch:7 In this dmesg, the SRAT table reports that the memory range for node 1 starts at 0x80200000. However, the line starting with "Initmem" reports that node 1 memory range starts at 0x80800000. The "Initmem" line is reported by setup_node_data() and is wrong, because the kernel ends up using the range as reported in the SRAT table. This commit drops all that dead code from setup_node_data(), renames it to alloc_node_data() and adds a printk() to free_area_init_node() so that we report a node's memory range accurately. Here's the same dmesg section with this patch applied: SRAT: PXM 0 -> APIC 0x00 -> Node 0 SRAT: PXM 0 -> APIC 0x01 -> Node 0 SRAT: PXM 1 -> APIC 0x02 -> Node 1 SRAT: PXM 1 -> APIC 0x03 -> Node 1 SRAT: Node 0 PXM 0 [mem 0x00000000-0x7fffffff] SRAT: Node 1 PXM 1 [mem 0x80200000-0xf7ffffff] SRAT: Node 1 PXM 1 [mem 0x100000000-0x1081fffff] NUMA: Node 1 [mem 0x80200000-0xf7ffffff] + [mem 0x100000000-0x1081fffff] -> [mem 0x80200000-0x1081fffff] NODE_DATA(0) allocated [mem 0x7ffdc000-0x7ffeffff] NODE_DATA(1) allocated [mem 0x1081ea000-0x1081fdfff] crashkernel: memory value expected [ffffea0000000000-ffffea0001ffffff] PMD -> [ffff88007de00000-ffff88007fdfffff] on node 0 [ffffea0002000000-ffffea00043fffff] PMD -> [ffff880105600000-ffff8801077fffff] on node 1 Zone ranges: DMA [mem 0x00001000-0x00ffffff] DMA32 [mem 0x01000000-0xffffffff] Normal [mem 0x100000000-0x1081fffff] Movable zone start for each node Early memory node ranges node 0: [mem 0x00001000-0x0009efff] node 0: [mem 0x00100000-0x7ffeffff] node 1: [mem 0x80200000-0xf7ffffff] node 1: [mem 0x100000000-0x1081fffff] Initmem setup node 0 [mem 0x00001000-0x7ffeffff] On node 0 totalpages: 524174 DMA zone: 64 pages used for memmap DMA zone: 21 pages reserved DMA zone: 3998 pages, LIFO batch:0 DMA32 zone: 8128 pages used for memmap DMA32 zone: 520176 pages, LIFO batch:31 Initmem setup node 1 [mem 0x80200000-0x1081fffff] On node 1 totalpages: 524288 DMA32 zone: 7672 pages used for memmap DMA32 zone: 491008 pages, LIFO batch:31 Normal zone: 520 pages used for memmap Normal zone: 33280 pages, LIFO batch:7 This commit was tested on a two node bare-metal NUMA machine and Linux as a numa guest on hyperv and qemu/kvm. PS: The wrong memory range reported by setup_node_data() seems to be harmless in the current kernel because it's just not used. However, that bad range is used in kernel 2.6.32 to initialize the old boot memory allocator, which causes a crash during boot. Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Rientjes <rientjes@google.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/numa.c34
1 files changed, 14 insertions, 20 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a32b706c401a..d221374d5ce8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
185 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 185 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
186} 186}
187 187
188/* Initialize NODE_DATA for a node on the local memory */ 188/* Allocate NODE_DATA for a node on the local memory */
189static void __init setup_node_data(int nid, u64 start, u64 end) 189static void __init alloc_node_data(int nid)
190{ 190{
191 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 191 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
192 u64 nd_pa; 192 u64 nd_pa;
@@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
194 int tnid; 194 int tnid;
195 195
196 /* 196 /*
197 * Don't confuse VM with a node that doesn't have the
198 * minimum amount of memory:
199 */
200 if (end && (end - start) < NODE_MIN_SIZE)
201 return;
202
203 start = roundup(start, ZONE_ALIGN);
204
205 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
206 nid, start, end - 1);
207
208 /*
209 * Allocate node data. Try node-local memory and then any node. 197 * Allocate node data. Try node-local memory and then any node.
210 * Never allocate in DMA zone. 198 * Never allocate in DMA zone.
211 */ 199 */
@@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
222 nd = __va(nd_pa); 210 nd = __va(nd_pa);
223 211
224 /* report and initialize */ 212 /* report and initialize */
225 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", 213 printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
226 nd_pa, nd_pa + nd_size - 1); 214 nd_pa, nd_pa + nd_size - 1);
227 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 215 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
228 if (tnid != nid) 216 if (tnid != nid)
@@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
230 218
231 node_data[nid] = nd; 219 node_data[nid] = nd;
232 memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 220 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
233 NODE_DATA(nid)->node_id = nid;
234 NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
235 NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
236 221
237 node_set_online(nid); 222 node_set_online(nid);
238} 223}
@@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
523 end = max(mi->blk[i].end, end); 508 end = max(mi->blk[i].end, end);
524 } 509 }
525 510
526 if (start < end) 511 if (start >= end)
527 setup_node_data(nid, start, end); 512 continue;
513
514 /*
515 * Don't confuse VM with a node that doesn't have the
516 * minimum amount of memory:
517 */
518 if (end && (end - start) < NODE_MIN_SIZE)
519 continue;
520
521 alloc_node_data(nid);
528 } 522 }
529 523
530 /* Dump memblock with node info and return. */ 524 /* Dump memblock with node info and return. */