diff options
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r-- | arch/x86/mm/numa_64.c | 93 |
1 files changed, 33 insertions, 60 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..b432d5781773 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -27,30 +27,17 @@ | |||
27 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 27 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
28 | EXPORT_SYMBOL(node_data); | 28 | EXPORT_SYMBOL(node_data); |
29 | 29 | ||
30 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | 30 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
31 | 31 | ||
32 | struct memnode memnode; | 32 | struct memnode memnode; |
33 | 33 | ||
34 | #ifdef CONFIG_SMP | ||
35 | int x86_cpu_to_node_map_init[NR_CPUS] = { | ||
36 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | ||
37 | }; | ||
38 | void *x86_cpu_to_node_map_early_ptr; | ||
39 | EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); | ||
40 | #endif | ||
41 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | ||
42 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
43 | |||
44 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 34 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
45 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 35 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
46 | }; | 36 | }; |
47 | 37 | ||
48 | cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly; | ||
49 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
50 | |||
51 | int numa_off __initdata; | 38 | int numa_off __initdata; |
52 | unsigned long __initdata nodemap_addr; | 39 | static unsigned long __initdata nodemap_addr; |
53 | unsigned long __initdata nodemap_size; | 40 | static unsigned long __initdata nodemap_size; |
54 | 41 | ||
55 | /* | 42 | /* |
56 | * Given a shift value, try to populate memnodemap[] | 43 | * Given a shift value, try to populate memnodemap[] |
@@ -99,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void) | |||
99 | 86 | ||
100 | addr = 0x8000; | 87 | addr = 0x8000; |
101 | nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); | 88 | nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); |
102 | nodemap_addr = find_e820_area(addr, end_pfn<<PAGE_SHIFT, | 89 | nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, |
103 | nodemap_size, L1_CACHE_BYTES); | 90 | nodemap_size, L1_CACHE_BYTES); |
104 | if (nodemap_addr == -1UL) { | 91 | if (nodemap_addr == -1UL) { |
105 | printk(KERN_ERR | 92 | printk(KERN_ERR |
@@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
192 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 179 | void __init setup_node_bootmem(int nodeid, unsigned long start, |
193 | unsigned long end) | 180 | unsigned long end) |
194 | { | 181 | { |
195 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; | 182 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
196 | unsigned long bootmap_start, nodedata_phys; | 183 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 184 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 185 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
@@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
204 | start, end); | 191 | start, end); |
205 | 192 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 193 | start_pfn = start >> PAGE_SHIFT; |
207 | end_pfn = end >> PAGE_SHIFT; | 194 | last_pfn = end >> PAGE_SHIFT; |
208 | 195 | ||
209 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, | 196 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, |
210 | SMP_CACHE_BYTES); | 197 | SMP_CACHE_BYTES); |
@@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 204 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 205 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 206 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 207 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 208 | ||
222 | /* | 209 | /* |
223 | * Find a place for the bootmem map | 210 | * Find a place for the bootmem map |
@@ -226,14 +213,14 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
226 | * early_node_mem will get that with find_e820_area instead | 213 | * early_node_mem will get that with find_e820_area instead |
227 | * of alloc_bootmem, that could clash with reserved range | 214 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 215 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 216 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 217 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | 218 | if (nid == nodeid) |
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 219 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); |
233 | else | 220 | else |
234 | bootmap_start = round_up(start, PAGE_SIZE); | 221 | bootmap_start = round_up(start, PAGE_SIZE); |
235 | /* | 222 | /* |
236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 223 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 224 | * to use that to align to PAGE_SIZE |
238 | */ | 225 | */ |
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 226 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
@@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
248 | 235 | ||
249 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 236 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
250 | bootmap_start >> PAGE_SHIFT, | 237 | bootmap_start >> PAGE_SHIFT, |
251 | start_pfn, end_pfn); | 238 | start_pfn, last_pfn); |
252 | 239 | ||
253 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 240 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
254 | bootmap_start, bootmap_start + bootmap_size - 1, | 241 | bootmap_start, bootmap_start + bootmap_size - 1, |
@@ -309,7 +296,7 @@ void __init numa_init_array(void) | |||
309 | 296 | ||
310 | #ifdef CONFIG_NUMA_EMU | 297 | #ifdef CONFIG_NUMA_EMU |
311 | /* Numa emulation */ | 298 | /* Numa emulation */ |
312 | char *cmdline __initdata; | 299 | static char *cmdline __initdata; |
313 | 300 | ||
314 | /* | 301 | /* |
315 | * Setups up nid to range from addr to addr + size. If the end | 302 | * Setups up nid to range from addr to addr + size. If the end |
@@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | } | 400 | } |
414 | 401 | ||
415 | /* | 402 | /* |
416 | * Sets up the system RAM area from start_pfn to end_pfn according to the | 403 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
417 | * numa=fake command-line option. | 404 | * numa=fake command-line option. |
418 | */ | 405 | */ |
419 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 406 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
420 | 407 | ||
421 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 408 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) |
422 | { | 409 | { |
423 | u64 size, addr = start_pfn << PAGE_SHIFT; | 410 | u64 size, addr = start_pfn << PAGE_SHIFT; |
424 | u64 max_addr = end_pfn << PAGE_SHIFT; | 411 | u64 max_addr = last_pfn << PAGE_SHIFT; |
425 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 412 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
426 | 413 | ||
427 | memset(&nodes, 0, sizeof(nodes)); | 414 | memset(&nodes, 0, sizeof(nodes)); |
@@ -527,7 +514,7 @@ out: | |||
527 | } | 514 | } |
528 | #endif /* CONFIG_NUMA_EMU */ | 515 | #endif /* CONFIG_NUMA_EMU */ |
529 | 516 | ||
530 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 517 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) |
531 | { | 518 | { |
532 | int i; | 519 | int i; |
533 | 520 | ||
@@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
535 | nodes_clear(node_online_map); | 522 | nodes_clear(node_online_map); |
536 | 523 | ||
537 | #ifdef CONFIG_NUMA_EMU | 524 | #ifdef CONFIG_NUMA_EMU |
538 | if (cmdline && !numa_emulation(start_pfn, end_pfn)) | 525 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) |
539 | return; | 526 | return; |
540 | nodes_clear(node_possible_map); | 527 | nodes_clear(node_possible_map); |
541 | nodes_clear(node_online_map); | 528 | nodes_clear(node_online_map); |
@@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
543 | 530 | ||
544 | #ifdef CONFIG_ACPI_NUMA | 531 | #ifdef CONFIG_ACPI_NUMA |
545 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 532 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
546 | end_pfn << PAGE_SHIFT)) | 533 | last_pfn << PAGE_SHIFT)) |
547 | return; | 534 | return; |
548 | nodes_clear(node_possible_map); | 535 | nodes_clear(node_possible_map); |
549 | nodes_clear(node_online_map); | 536 | nodes_clear(node_online_map); |
@@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
551 | 538 | ||
552 | #ifdef CONFIG_K8_NUMA | 539 | #ifdef CONFIG_K8_NUMA |
553 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 540 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, |
554 | end_pfn<<PAGE_SHIFT)) | 541 | last_pfn<<PAGE_SHIFT)) |
555 | return; | 542 | return; |
556 | nodes_clear(node_possible_map); | 543 | nodes_clear(node_possible_map); |
557 | nodes_clear(node_online_map); | 544 | nodes_clear(node_online_map); |
@@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
561 | 548 | ||
562 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | 549 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", |
563 | start_pfn << PAGE_SHIFT, | 550 | start_pfn << PAGE_SHIFT, |
564 | end_pfn << PAGE_SHIFT); | 551 | last_pfn << PAGE_SHIFT); |
565 | /* setup dummy node covering all memory */ | 552 | /* setup dummy node covering all memory */ |
566 | memnode_shift = 63; | 553 | memnode_shift = 63; |
567 | memnodemap = memnode.embedded_map; | 554 | memnodemap = memnode.embedded_map; |
@@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
570 | node_set(0, node_possible_map); | 557 | node_set(0, node_possible_map); |
571 | for (i = 0; i < NR_CPUS; i++) | 558 | for (i = 0; i < NR_CPUS; i++) |
572 | numa_set_node(i, 0); | 559 | numa_set_node(i, 0); |
573 | /* cpumask_of_cpu() may not be available during early startup */ | 560 | e820_register_active_regions(0, start_pfn, last_pfn); |
574 | memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); | 561 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
575 | cpu_set(0, node_to_cpumask_map[0]); | ||
576 | e820_register_active_regions(0, start_pfn, end_pfn); | ||
577 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | ||
578 | } | ||
579 | |||
580 | __cpuinit void numa_add_cpu(int cpu) | ||
581 | { | ||
582 | set_bit(cpu, | ||
583 | (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
584 | } | ||
585 | |||
586 | void __cpuinit numa_set_node(int cpu, int node) | ||
587 | { | ||
588 | int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; | ||
589 | |||
590 | if(cpu_to_node_map) | ||
591 | cpu_to_node_map[cpu] = node; | ||
592 | else if(per_cpu_offset(cpu)) | ||
593 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
594 | else | ||
595 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | ||
596 | } | 562 | } |
597 | 563 | ||
598 | unsigned long __init numa_free_all_bootmem(void) | 564 | unsigned long __init numa_free_all_bootmem(void) |
@@ -613,7 +579,7 @@ void __init paging_init(void) | |||
613 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 579 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
614 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | 580 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; |
615 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 581 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
616 | max_zone_pfns[ZONE_NORMAL] = end_pfn; | 582 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
617 | 583 | ||
618 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 584 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
619 | sparse_init(); | 585 | sparse_init(); |
@@ -641,6 +607,7 @@ static __init int numa_setup(char *opt) | |||
641 | } | 607 | } |
642 | early_param("numa", numa_setup); | 608 | early_param("numa", numa_setup); |
643 | 609 | ||
610 | #ifdef CONFIG_NUMA | ||
644 | /* | 611 | /* |
645 | * Setup early cpu_to_node. | 612 | * Setup early cpu_to_node. |
646 | * | 613 | * |
@@ -652,14 +619,19 @@ early_param("numa", numa_setup); | |||
652 | * is already initialized in a round robin manner at numa_init_array, | 619 | * is already initialized in a round robin manner at numa_init_array, |
653 | * prior to this call, and this initialization is good enough | 620 | * prior to this call, and this initialization is good enough |
654 | * for the fake NUMA cases. | 621 | * for the fake NUMA cases. |
622 | * | ||
623 | * Called before the per_cpu areas are setup. | ||
655 | */ | 624 | */ |
656 | void __init init_cpu_to_node(void) | 625 | void __init init_cpu_to_node(void) |
657 | { | 626 | { |
658 | int i; | 627 | int cpu; |
628 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
659 | 629 | ||
660 | for (i = 0; i < NR_CPUS; i++) { | 630 | BUG_ON(cpu_to_apicid == NULL); |
631 | |||
632 | for_each_possible_cpu(cpu) { | ||
661 | int node; | 633 | int node; |
662 | u16 apicid = x86_cpu_to_apicid_init[i]; | 634 | u16 apicid = cpu_to_apicid[cpu]; |
663 | 635 | ||
664 | if (apicid == BAD_APICID) | 636 | if (apicid == BAD_APICID) |
665 | continue; | 637 | continue; |
@@ -668,8 +640,9 @@ void __init init_cpu_to_node(void) | |||
668 | continue; | 640 | continue; |
669 | if (!node_online(node)) | 641 | if (!node_online(node)) |
670 | continue; | 642 | continue; |
671 | numa_set_node(i, node); | 643 | numa_set_node(cpu, node); |
672 | } | 644 | } |
673 | } | 645 | } |
646 | #endif | ||
674 | 647 | ||
675 | 648 | ||