aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/numa_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r--arch/x86/mm/numa_64.c274
1 files changed, 124 insertions, 150 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3d6926ba8995..dc3b1f7e1451 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Generic VM initialization for x86-64 NUMA setups. 2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 */ 4 */
5#include <linux/kernel.h> 5#include <linux/kernel.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/string.h> 7#include <linux/string.h>
@@ -11,35 +11,45 @@
11#include <linux/ctype.h> 11#include <linux/ctype.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/sched.h>
14 15
15#include <asm/e820.h> 16#include <asm/e820.h>
16#include <asm/proto.h> 17#include <asm/proto.h>
17#include <asm/dma.h> 18#include <asm/dma.h>
18#include <asm/numa.h> 19#include <asm/numa.h>
19#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h>
20 22
21#ifndef Dprintk 23#ifndef Dprintk
22#define Dprintk(x...) 24#define Dprintk(x...)
23#endif 25#endif
24 26
25struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 27struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
28EXPORT_SYMBOL(node_data);
29
26bootmem_data_t plat_node_bdata[MAX_NUMNODES]; 30bootmem_data_t plat_node_bdata[MAX_NUMNODES];
27 31
28struct memnode memnode; 32struct memnode memnode;
29 33
30unsigned char cpu_to_node[NR_CPUS] __read_mostly = { 34int x86_cpu_to_node_map_init[NR_CPUS] = {
31 [0 ... NR_CPUS-1] = NUMA_NO_NODE 35 [0 ... NR_CPUS-1] = NUMA_NO_NODE
32}; 36};
33unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 37void *x86_cpu_to_node_map_early_ptr;
34 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 38DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
39EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
40EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
41
42s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
43 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
35}; 44};
36cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; 45
46cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
47EXPORT_SYMBOL(node_to_cpumask_map);
37 48
38int numa_off __initdata; 49int numa_off __initdata;
39unsigned long __initdata nodemap_addr; 50unsigned long __initdata nodemap_addr;
40unsigned long __initdata nodemap_size; 51unsigned long __initdata nodemap_size;
41 52
42
43/* 53/*
44 * Given a shift value, try to populate memnodemap[] 54 * Given a shift value, try to populate memnodemap[]
45 * Returns : 55 * Returns :
@@ -47,14 +57,13 @@ unsigned long __initdata nodemap_size;
47 * 0 if memnodmap[] too small (of shift too small) 57 * 0 if memnodmap[] too small (of shift too small)
48 * -1 if node overlap or lost ram (shift too big) 58 * -1 if node overlap or lost ram (shift too big)
49 */ 59 */
50static int __init 60static int __init populate_memnodemap(const struct bootnode *nodes,
51populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) 61 int numnodes, int shift)
52{ 62{
53 int i;
54 int res = -1;
55 unsigned long addr, end; 63 unsigned long addr, end;
64 int i, res = -1;
56 65
57 memset(memnodemap, 0xff, memnodemapsize); 66 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
58 for (i = 0; i < numnodes; i++) { 67 for (i = 0; i < numnodes; i++) {
59 addr = nodes[i].start; 68 addr = nodes[i].start;
60 end = nodes[i].end; 69 end = nodes[i].end;
@@ -63,13 +72,13 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
63 if ((end >> shift) >= memnodemapsize) 72 if ((end >> shift) >= memnodemapsize)
64 return 0; 73 return 0;
65 do { 74 do {
66 if (memnodemap[addr >> shift] != 0xff) 75 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
67 return -1; 76 return -1;
68 memnodemap[addr >> shift] = i; 77 memnodemap[addr >> shift] = i;
69 addr += (1UL << shift); 78 addr += (1UL << shift);
70 } while (addr < end); 79 } while (addr < end);
71 res = 1; 80 res = 1;
72 } 81 }
73 return res; 82 return res;
74} 83}
75 84
@@ -78,12 +87,12 @@ static int __init allocate_cachealigned_memnodemap(void)
78 unsigned long pad, pad_addr; 87 unsigned long pad, pad_addr;
79 88
80 memnodemap = memnode.embedded_map; 89 memnodemap = memnode.embedded_map;
81 if (memnodemapsize <= 48) 90 if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
82 return 0; 91 return 0;
83 92
84 pad = L1_CACHE_BYTES - 1; 93 pad = L1_CACHE_BYTES - 1;
85 pad_addr = 0x8000; 94 pad_addr = 0x8000;
86 nodemap_size = pad + memnodemapsize; 95 nodemap_size = pad + sizeof(s16) * memnodemapsize;
87 nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, 96 nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
88 nodemap_size); 97 nodemap_size);
89 if (nodemap_addr == -1UL) { 98 if (nodemap_addr == -1UL) {
@@ -94,6 +103,7 @@ static int __init allocate_cachealigned_memnodemap(void)
94 } 103 }
95 pad_addr = (nodemap_addr + pad) & ~pad; 104 pad_addr = (nodemap_addr + pad) & ~pad;
96 memnodemap = phys_to_virt(pad_addr); 105 memnodemap = phys_to_virt(pad_addr);
106 reserve_early(nodemap_addr, nodemap_addr + nodemap_size);
97 107
98 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", 108 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
99 nodemap_addr, nodemap_addr + nodemap_size); 109 nodemap_addr, nodemap_addr + nodemap_size);
@@ -104,8 +114,8 @@ static int __init allocate_cachealigned_memnodemap(void)
104 * The LSB of all start and end addresses in the node map is the value of the 114 * The LSB of all start and end addresses in the node map is the value of the
105 * maximum possible shift. 115 * maximum possible shift.
106 */ 116 */
107static int __init 117static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
108extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes) 118 int numnodes)
109{ 119{
110 int i, nodes_used = 0; 120 int i, nodes_used = 0;
111 unsigned long start, end; 121 unsigned long start, end;
@@ -140,51 +150,50 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
140 shift); 150 shift);
141 151
142 if (populate_memnodemap(nodes, numnodes, shift) != 1) { 152 if (populate_memnodemap(nodes, numnodes, shift) != 1) {
143 printk(KERN_INFO 153 printk(KERN_INFO "Your memory is not aligned you need to "
144 "Your memory is not aligned you need to rebuild your kernel " 154 "rebuild your kernel with a bigger NODEMAPSIZE "
145 "with a bigger NODEMAPSIZE shift=%d\n", 155 "shift=%d\n", shift);
146 shift);
147 return -1; 156 return -1;
148 } 157 }
149 return shift; 158 return shift;
150} 159}
151 160
152#ifdef CONFIG_SPARSEMEM
153int early_pfn_to_nid(unsigned long pfn) 161int early_pfn_to_nid(unsigned long pfn)
154{ 162{
155 return phys_to_nid(pfn << PAGE_SHIFT); 163 return phys_to_nid(pfn << PAGE_SHIFT);
156} 164}
157#endif
158 165
159static void * __init 166static void * __init early_node_mem(int nodeid, unsigned long start,
160early_node_mem(int nodeid, unsigned long start, unsigned long end, 167 unsigned long end, unsigned long size)
161 unsigned long size)
162{ 168{
163 unsigned long mem = find_e820_area(start, end, size); 169 unsigned long mem = find_e820_area(start, end, size);
164 void *ptr; 170 void *ptr;
171
165 if (mem != -1L) 172 if (mem != -1L)
166 return __va(mem); 173 return __va(mem);
167 ptr = __alloc_bootmem_nopanic(size, 174 ptr = __alloc_bootmem_nopanic(size,
168 SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); 175 SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
169 if (ptr == NULL) { 176 if (ptr == NULL) {
170 printk(KERN_ERR "Cannot find %lu bytes in node %d\n", 177 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
171 size, nodeid); 178 size, nodeid);
172 return NULL; 179 return NULL;
173 } 180 }
174 return ptr; 181 return ptr;
175} 182}
176 183
177/* Initialize bootmem allocator for a node */ 184/* Initialize bootmem allocator for a node */
178void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 185void __init setup_node_bootmem(int nodeid, unsigned long start,
179{ 186 unsigned long end)
180 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 187{
181 unsigned long nodedata_phys; 188 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size;
189 unsigned long bootmap_start, nodedata_phys;
182 void *bootmap; 190 void *bootmap;
183 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 191 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
184 192
185 start = round_up(start, ZONE_ALIGN); 193 start = round_up(start, ZONE_ALIGN);
186 194
187 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); 195 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
196 start, end);
188 197
189 start_pfn = start >> PAGE_SHIFT; 198 start_pfn = start >> PAGE_SHIFT;
190 end_pfn = end >> PAGE_SHIFT; 199 end_pfn = end >> PAGE_SHIFT;
@@ -200,75 +209,55 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
200 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; 209 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
201 210
202 /* Find a place for the bootmem map */ 211 /* Find a place for the bootmem map */
203 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 212 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
204 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 213 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
205 bootmap = early_node_mem(nodeid, bootmap_start, end, 214 bootmap = early_node_mem(nodeid, bootmap_start, end,
206 bootmap_pages<<PAGE_SHIFT); 215 bootmap_pages<<PAGE_SHIFT);
207 if (bootmap == NULL) { 216 if (bootmap == NULL) {
208 if (nodedata_phys < start || nodedata_phys >= end) 217 if (nodedata_phys < start || nodedata_phys >= end)
209 free_bootmem((unsigned long)node_data[nodeid],pgdat_size); 218 free_bootmem((unsigned long)node_data[nodeid],
219 pgdat_size);
210 node_data[nodeid] = NULL; 220 node_data[nodeid] = NULL;
211 return; 221 return;
212 } 222 }
213 bootmap_start = __pa(bootmap); 223 bootmap_start = __pa(bootmap);
214 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 224 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
215 225
216 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 226 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
217 bootmap_start >> PAGE_SHIFT, 227 bootmap_start >> PAGE_SHIFT,
218 start_pfn, end_pfn); 228 start_pfn, end_pfn);
219 229
220 free_bootmem_with_active_regions(nodeid, end); 230 free_bootmem_with_active_regions(nodeid, end);
221 231
222 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 232 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
223 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); 233 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
234 bootmap_pages<<PAGE_SHIFT);
224#ifdef CONFIG_ACPI_NUMA 235#ifdef CONFIG_ACPI_NUMA
225 srat_reserve_add_area(nodeid); 236 srat_reserve_add_area(nodeid);
226#endif 237#endif
227 node_set_online(nodeid); 238 node_set_online(nodeid);
228} 239}
229
230/* Initialize final allocator for a zone */
231void __init setup_node_zones(int nodeid)
232{
233 unsigned long start_pfn, end_pfn, memmapsize, limit;
234
235 start_pfn = node_start_pfn(nodeid);
236 end_pfn = node_end_pfn(nodeid);
237
238 Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
239 nodeid, start_pfn, end_pfn);
240
241 /* Try to allocate mem_map at end to not fill up precious <4GB
242 memory. */
243 memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
244 limit = end_pfn << PAGE_SHIFT;
245#ifdef CONFIG_FLAT_NODE_MEM_MAP
246 NODE_DATA(nodeid)->node_mem_map =
247 __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
248 memmapsize, SMP_CACHE_BYTES,
249 round_down(limit - memmapsize, PAGE_SIZE),
250 limit);
251#endif
252}
253 240
241/*
242 * There are unfortunately some poorly designed mainboards around that
243 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
244 * mapping. To avoid this fill in the mapping for all possible CPUs,
245 * as the number of CPUs is not known yet. We round robin the existing
246 * nodes.
247 */
254void __init numa_init_array(void) 248void __init numa_init_array(void)
255{ 249{
256 int rr, i; 250 int rr, i;
257 /* There are unfortunately some poorly designed mainboards around 251
258 that only connect memory to a single CPU. This breaks the 1:1 cpu->node
259 mapping. To avoid this fill in the mapping for all possible
260 CPUs, as the number of CPUs is not known yet.
261 We round robin the existing nodes. */
262 rr = first_node(node_online_map); 252 rr = first_node(node_online_map);
263 for (i = 0; i < NR_CPUS; i++) { 253 for (i = 0; i < NR_CPUS; i++) {
264 if (cpu_to_node(i) != NUMA_NO_NODE) 254 if (early_cpu_to_node(i) != NUMA_NO_NODE)
265 continue; 255 continue;
266 numa_set_node(i, rr); 256 numa_set_node(i, rr);
267 rr = next_node(rr, node_online_map); 257 rr = next_node(rr, node_online_map);
268 if (rr == MAX_NUMNODES) 258 if (rr == MAX_NUMNODES)
269 rr = first_node(node_online_map); 259 rr = first_node(node_online_map);
270 } 260 }
271
272} 261}
273 262
274#ifdef CONFIG_NUMA_EMU 263#ifdef CONFIG_NUMA_EMU
@@ -276,15 +265,17 @@ void __init numa_init_array(void)
276char *cmdline __initdata; 265char *cmdline __initdata;
277 266
278/* 267/*
279 * Setups up nid to range from addr to addr + size. If the end boundary is 268 * Setups up nid to range from addr to addr + size. If the end
280 * greater than max_addr, then max_addr is used instead. The return value is 0 269 * boundary is greater than max_addr, then max_addr is used instead.
281 * if there is additional memory left for allocation past addr and -1 otherwise. 270 * The return value is 0 if there is additional memory left for
282 * addr is adjusted to be at the end of the node. 271 * allocation past addr and -1 otherwise. addr is adjusted to be at
272 * the end of the node.
283 */ 273 */
284static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, 274static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
285 u64 size, u64 max_addr) 275 u64 size, u64 max_addr)
286{ 276{
287 int ret = 0; 277 int ret = 0;
278
288 nodes[nid].start = *addr; 279 nodes[nid].start = *addr;
289 *addr += size; 280 *addr += size;
290 if (*addr >= max_addr) { 281 if (*addr >= max_addr) {
@@ -335,6 +326,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
335 326
336 for (i = node_start; i < num_nodes + node_start; i++) { 327 for (i = node_start; i < num_nodes + node_start; i++) {
337 u64 end = *addr + size; 328 u64 end = *addr + size;
329
338 if (i < big) 330 if (i < big)
339 end += FAKE_NODE_MIN_SIZE; 331 end += FAKE_NODE_MIN_SIZE;
340 /* 332 /*
@@ -380,14 +372,9 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
380static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 372static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
381{ 373{
382 struct bootnode nodes[MAX_NUMNODES]; 374 struct bootnode nodes[MAX_NUMNODES];
383 u64 addr = start_pfn << PAGE_SHIFT; 375 u64 size, addr = start_pfn << PAGE_SHIFT;
384 u64 max_addr = end_pfn << PAGE_SHIFT; 376 u64 max_addr = end_pfn << PAGE_SHIFT;
385 int num_nodes = 0; 377 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
386 int coeff_flag;
387 int coeff = -1;
388 int num = 0;
389 u64 size;
390 int i;
391 378
392 memset(&nodes, 0, sizeof(nodes)); 379 memset(&nodes, 0, sizeof(nodes));
393 /* 380 /*
@@ -395,8 +382,9 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
395 * system RAM into N fake nodes. 382 * system RAM into N fake nodes.
396 */ 383 */
397 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 384 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
398 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, 385 long n = simple_strtol(cmdline, NULL, 0);
399 simple_strtol(cmdline, NULL, 0)); 386
387 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
400 if (num_nodes < 0) 388 if (num_nodes < 0)
401 return num_nodes; 389 return num_nodes;
402 goto out; 390 goto out;
@@ -483,46 +471,47 @@ out:
483 for_each_node_mask(i, node_possible_map) { 471 for_each_node_mask(i, node_possible_map) {
484 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 472 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
485 nodes[i].end >> PAGE_SHIFT); 473 nodes[i].end >> PAGE_SHIFT);
486 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 474 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
487 } 475 }
488 acpi_fake_nodes(nodes, num_nodes); 476 acpi_fake_nodes(nodes, num_nodes);
489 numa_init_array(); 477 numa_init_array();
490 return 0; 478 return 0;
491} 479}
492#endif /* CONFIG_NUMA_EMU */ 480#endif /* CONFIG_NUMA_EMU */
493 481
494void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 482void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
495{ 483{
496 int i; 484 int i;
497 485
498 nodes_clear(node_possible_map); 486 nodes_clear(node_possible_map);
499 487
500#ifdef CONFIG_NUMA_EMU 488#ifdef CONFIG_NUMA_EMU
501 if (cmdline && !numa_emulation(start_pfn, end_pfn)) 489 if (cmdline && !numa_emulation(start_pfn, end_pfn))
502 return; 490 return;
503 nodes_clear(node_possible_map); 491 nodes_clear(node_possible_map);
504#endif 492#endif
505 493
506#ifdef CONFIG_ACPI_NUMA 494#ifdef CONFIG_ACPI_NUMA
507 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 495 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
508 end_pfn << PAGE_SHIFT)) 496 end_pfn << PAGE_SHIFT))
509 return; 497 return;
510 nodes_clear(node_possible_map); 498 nodes_clear(node_possible_map);
511#endif 499#endif
512 500
513#ifdef CONFIG_K8_NUMA 501#ifdef CONFIG_K8_NUMA
514 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) 502 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
503 end_pfn<<PAGE_SHIFT))
515 return; 504 return;
516 nodes_clear(node_possible_map); 505 nodes_clear(node_possible_map);
517#endif 506#endif
518 printk(KERN_INFO "%s\n", 507 printk(KERN_INFO "%s\n",
519 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 508 numa_off ? "NUMA turned off" : "No NUMA configuration found");
520 509
521 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 510 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
522 start_pfn << PAGE_SHIFT, 511 start_pfn << PAGE_SHIFT,
523 end_pfn << PAGE_SHIFT); 512 end_pfn << PAGE_SHIFT);
524 /* setup dummy node covering all memory */ 513 /* setup dummy node covering all memory */
525 memnode_shift = 63; 514 memnode_shift = 63;
526 memnodemap = memnode.embedded_map; 515 memnodemap = memnode.embedded_map;
527 memnodemap[0] = 0; 516 memnodemap[0] = 0;
528 nodes_clear(node_online_map); 517 nodes_clear(node_online_map);
@@ -530,36 +519,48 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
530 node_set(0, node_possible_map); 519 node_set(0, node_possible_map);
531 for (i = 0; i < NR_CPUS; i++) 520 for (i = 0; i < NR_CPUS; i++)
532 numa_set_node(i, 0); 521 numa_set_node(i, 0);
533 node_to_cpumask[0] = cpumask_of_cpu(0); 522 /* cpumask_of_cpu() may not be available during early startup */
523 memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
524 cpu_set(0, node_to_cpumask_map[0]);
534 e820_register_active_regions(0, start_pfn, end_pfn); 525 e820_register_active_regions(0, start_pfn, end_pfn);
535 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 526 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
536} 527}
537 528
538__cpuinit void numa_add_cpu(int cpu) 529__cpuinit void numa_add_cpu(int cpu)
539{ 530{
540 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); 531 set_bit(cpu,
541} 532 (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
533}
542 534
543void __cpuinit numa_set_node(int cpu, int node) 535void __cpuinit numa_set_node(int cpu, int node)
544{ 536{
537 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
538
545 cpu_pda(cpu)->nodenumber = node; 539 cpu_pda(cpu)->nodenumber = node;
546 cpu_to_node(cpu) = node; 540
541 if(cpu_to_node_map)
542 cpu_to_node_map[cpu] = node;
543 else if(per_cpu_offset(cpu))
544 per_cpu(x86_cpu_to_node_map, cpu) = node;
545 else
546 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
547} 547}
548 548
549unsigned long __init numa_free_all_bootmem(void) 549unsigned long __init numa_free_all_bootmem(void)
550{ 550{
551 int i;
552 unsigned long pages = 0; 551 unsigned long pages = 0;
553 for_each_online_node(i) { 552 int i;
553
554 for_each_online_node(i)
554 pages += free_all_bootmem_node(NODE_DATA(i)); 555 pages += free_all_bootmem_node(NODE_DATA(i));
555 } 556
556 return pages; 557 return pages;
557} 558}
558 559
559void __init paging_init(void) 560void __init paging_init(void)
560{ 561{
561 int i;
562 unsigned long max_zone_pfns[MAX_NR_ZONES]; 562 unsigned long max_zone_pfns[MAX_NR_ZONES];
563
563 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 564 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
564 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 565 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
565 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 566 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -568,32 +569,27 @@ void __init paging_init(void)
568 sparse_memory_present_with_active_regions(MAX_NUMNODES); 569 sparse_memory_present_with_active_regions(MAX_NUMNODES);
569 sparse_init(); 570 sparse_init();
570 571
571 for_each_online_node(i) {
572 setup_node_zones(i);
573 }
574
575 free_area_init_nodes(max_zone_pfns); 572 free_area_init_nodes(max_zone_pfns);
576} 573}
577 574
578static __init int numa_setup(char *opt) 575static __init int numa_setup(char *opt)
579{ 576{
580 if (!opt) 577 if (!opt)
581 return -EINVAL; 578 return -EINVAL;
582 if (!strncmp(opt,"off",3)) 579 if (!strncmp(opt, "off", 3))
583 numa_off = 1; 580 numa_off = 1;
584#ifdef CONFIG_NUMA_EMU 581#ifdef CONFIG_NUMA_EMU
585 if (!strncmp(opt, "fake=", 5)) 582 if (!strncmp(opt, "fake=", 5))
586 cmdline = opt + 5; 583 cmdline = opt + 5;
587#endif 584#endif
588#ifdef CONFIG_ACPI_NUMA 585#ifdef CONFIG_ACPI_NUMA
589 if (!strncmp(opt,"noacpi",6)) 586 if (!strncmp(opt, "noacpi", 6))
590 acpi_numa = -1; 587 acpi_numa = -1;
591 if (!strncmp(opt,"hotadd=", 7)) 588 if (!strncmp(opt, "hotadd=", 7))
592 hotadd_percent = simple_strtoul(opt+7, NULL, 10); 589 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
593#endif 590#endif
594 return 0; 591 return 0;
595} 592}
596
597early_param("numa", numa_setup); 593early_param("numa", numa_setup);
598 594
599/* 595/*
@@ -611,38 +607,16 @@ early_param("numa", numa_setup);
611void __init init_cpu_to_node(void) 607void __init init_cpu_to_node(void)
612{ 608{
613 int i; 609 int i;
614 for (i = 0; i < NR_CPUS; i++) { 610
615 u8 apicid = x86_cpu_to_apicid_init[i]; 611 for (i = 0; i < NR_CPUS; i++) {
612 u16 apicid = x86_cpu_to_apicid_init[i];
613
616 if (apicid == BAD_APICID) 614 if (apicid == BAD_APICID)
617 continue; 615 continue;
618 if (apicid_to_node[apicid] == NUMA_NO_NODE) 616 if (apicid_to_node[apicid] == NUMA_NO_NODE)
619 continue; 617 continue;
620 numa_set_node(i,apicid_to_node[apicid]); 618 numa_set_node(i, apicid_to_node[apicid]);
621 } 619 }
622} 620}
623 621
624EXPORT_SYMBOL(cpu_to_node);
625EXPORT_SYMBOL(node_to_cpumask);
626EXPORT_SYMBOL(memnode);
627EXPORT_SYMBOL(node_data);
628
629#ifdef CONFIG_DISCONTIGMEM
630/*
631 * Functions to convert PFNs from/to per node page addresses.
632 * These are out of line because they are quite big.
633 * They could be all tuned by pre caching more state.
634 * Should do that.
635 */
636 622
637int pfn_valid(unsigned long pfn)
638{
639 unsigned nid;
640 if (pfn >= num_physpages)
641 return 0;
642 nid = pfn_to_nid(pfn);
643 if (nid == 0xff)
644 return 0;
645 return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
646}
647EXPORT_SYMBOL(pfn_valid);
648#endif