diff options
| author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-23 23:44:19 -0500 |
|---|---|---|
| committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-23 23:44:19 -0500 |
| commit | 1ebbe2b20091d306453a5cf480a87e6cd28ae76f (patch) | |
| tree | f5cd7a0fa69b8b1938cb5a0faed2e7b0628072a5 /arch/powerpc/mm/numa.c | |
| parent | ac58c9059da8886b5e8cde012a80266b18ca146e (diff) | |
| parent | 674a396c6d2ba0341ebdd7c1c9950f32f018e2dd (diff) | |
Merge branch 'linus'
Diffstat (limited to 'arch/powerpc/mm/numa.c')
| -rw-r--r-- | arch/powerpc/mm/numa.c | 160 |
1 files changed, 74 insertions, 86 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2863a912bcd0..e89b22aa539e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
| @@ -129,10 +129,12 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn, | |||
| 129 | *start_pfn = 0; | 129 | *start_pfn = 0; |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | static inline void map_cpu_to_node(int cpu, int node) | 132 | static void __cpuinit map_cpu_to_node(int cpu, int node) |
| 133 | { | 133 | { |
| 134 | numa_cpu_lookup_table[cpu] = node; | 134 | numa_cpu_lookup_table[cpu] = node; |
| 135 | 135 | ||
| 136 | dbg("adding cpu %d to node %d\n", cpu, node); | ||
| 137 | |||
| 136 | if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) | 138 | if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) |
| 137 | cpu_set(cpu, numa_cpumask_lookup_table[node]); | 139 | cpu_set(cpu, numa_cpumask_lookup_table[node]); |
| 138 | } | 140 | } |
| @@ -153,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu) | |||
| 153 | } | 155 | } |
| 154 | #endif /* CONFIG_HOTPLUG_CPU */ | 156 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 155 | 157 | ||
| 156 | static struct device_node *find_cpu_node(unsigned int cpu) | 158 | static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) |
| 157 | { | 159 | { |
| 158 | unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); | 160 | unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); |
| 159 | struct device_node *cpu_node = NULL; | 161 | struct device_node *cpu_node = NULL; |
| @@ -189,23 +191,29 @@ static int *of_get_associativity(struct device_node *dev) | |||
| 189 | return (unsigned int *)get_property(dev, "ibm,associativity", NULL); | 191 | return (unsigned int *)get_property(dev, "ibm,associativity", NULL); |
| 190 | } | 192 | } |
| 191 | 193 | ||
| 192 | static int of_node_numa_domain(struct device_node *device) | 194 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
| 195 | * info is found. | ||
| 196 | */ | ||
| 197 | static int of_node_to_nid(struct device_node *device) | ||
| 193 | { | 198 | { |
| 194 | int numa_domain; | 199 | int nid = -1; |
| 195 | unsigned int *tmp; | 200 | unsigned int *tmp; |
| 196 | 201 | ||
| 197 | if (min_common_depth == -1) | 202 | if (min_common_depth == -1) |
| 198 | return 0; | 203 | goto out; |
| 199 | 204 | ||
| 200 | tmp = of_get_associativity(device); | 205 | tmp = of_get_associativity(device); |
| 201 | if (tmp && (tmp[0] >= min_common_depth)) { | 206 | if (!tmp) |
| 202 | numa_domain = tmp[min_common_depth]; | 207 | goto out; |
| 203 | } else { | 208 | |
| 204 | dbg("WARNING: no NUMA information for %s\n", | 209 | if (tmp[0] >= min_common_depth) |
| 205 | device->full_name); | 210 | nid = tmp[min_common_depth]; |
| 206 | numa_domain = 0; | 211 | |
| 207 | } | 212 | /* POWER4 LPAR uses 0xffff as invalid node */ |
| 208 | return numa_domain; | 213 | if (nid == 0xffff || nid >= MAX_NUMNODES) |
| 214 | nid = -1; | ||
| 215 | out: | ||
| 216 | return nid; | ||
| 209 | } | 217 | } |
| 210 | 218 | ||
| 211 | /* | 219 | /* |
| @@ -246,8 +254,7 @@ static int __init find_min_common_depth(void) | |||
| 246 | if ((len >= 1) && ref_points) { | 254 | if ((len >= 1) && ref_points) { |
| 247 | depth = ref_points[1]; | 255 | depth = ref_points[1]; |
| 248 | } else { | 256 | } else { |
| 249 | dbg("WARNING: could not find NUMA " | 257 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); |
| 250 | "associativity reference point\n"); | ||
| 251 | depth = -1; | 258 | depth = -1; |
| 252 | } | 259 | } |
| 253 | of_node_put(rtas_root); | 260 | of_node_put(rtas_root); |
| @@ -283,9 +290,9 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf) | |||
| 283 | * Figure out to which domain a cpu belongs and stick it there. | 290 | * Figure out to which domain a cpu belongs and stick it there. |
| 284 | * Return the id of the domain used. | 291 | * Return the id of the domain used. |
| 285 | */ | 292 | */ |
| 286 | static int numa_setup_cpu(unsigned long lcpu) | 293 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) |
| 287 | { | 294 | { |
| 288 | int numa_domain = 0; | 295 | int nid = 0; |
| 289 | struct device_node *cpu = find_cpu_node(lcpu); | 296 | struct device_node *cpu = find_cpu_node(lcpu); |
| 290 | 297 | ||
| 291 | if (!cpu) { | 298 | if (!cpu) { |
| @@ -293,27 +300,16 @@ static int numa_setup_cpu(unsigned long lcpu) | |||
| 293 | goto out; | 300 | goto out; |
| 294 | } | 301 | } |
| 295 | 302 | ||
| 296 | numa_domain = of_node_numa_domain(cpu); | 303 | nid = of_node_to_nid(cpu); |
| 297 | 304 | ||
| 298 | if (numa_domain >= num_online_nodes()) { | 305 | if (nid < 0 || !node_online(nid)) |
| 299 | /* | 306 | nid = any_online_node(NODE_MASK_ALL); |
| 300 | * POWER4 LPAR uses 0xffff as invalid node, | ||
| 301 | * dont warn in this case. | ||
| 302 | */ | ||
| 303 | if (numa_domain != 0xffff) | ||
| 304 | printk(KERN_ERR "WARNING: cpu %ld " | ||
| 305 | "maps to invalid NUMA node %d\n", | ||
| 306 | lcpu, numa_domain); | ||
| 307 | numa_domain = 0; | ||
| 308 | } | ||
| 309 | out: | 307 | out: |
| 310 | node_set_online(numa_domain); | 308 | map_cpu_to_node(lcpu, nid); |
| 311 | |||
| 312 | map_cpu_to_node(lcpu, numa_domain); | ||
| 313 | 309 | ||
| 314 | of_node_put(cpu); | 310 | of_node_put(cpu); |
| 315 | 311 | ||
| 316 | return numa_domain; | 312 | return nid; |
| 317 | } | 313 | } |
| 318 | 314 | ||
| 319 | static int cpu_numa_callback(struct notifier_block *nfb, | 315 | static int cpu_numa_callback(struct notifier_block *nfb, |
| @@ -325,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb, | |||
| 325 | 321 | ||
| 326 | switch (action) { | 322 | switch (action) { |
| 327 | case CPU_UP_PREPARE: | 323 | case CPU_UP_PREPARE: |
| 328 | if (min_common_depth == -1 || !numa_enabled) | 324 | numa_setup_cpu(lcpu); |
| 329 | map_cpu_to_node(lcpu, 0); | ||
| 330 | else | ||
| 331 | numa_setup_cpu(lcpu); | ||
| 332 | ret = NOTIFY_OK; | 325 | ret = NOTIFY_OK; |
| 333 | break; | 326 | break; |
| 334 | #ifdef CONFIG_HOTPLUG_CPU | 327 | #ifdef CONFIG_HOTPLUG_CPU |
| @@ -375,7 +368,7 @@ static int __init parse_numa_properties(void) | |||
| 375 | { | 368 | { |
| 376 | struct device_node *cpu = NULL; | 369 | struct device_node *cpu = NULL; |
| 377 | struct device_node *memory = NULL; | 370 | struct device_node *memory = NULL; |
| 378 | int max_domain; | 371 | int default_nid = 0; |
| 379 | unsigned long i; | 372 | unsigned long i; |
| 380 | 373 | ||
| 381 | if (numa_enabled == 0) { | 374 | if (numa_enabled == 0) { |
| @@ -385,32 +378,32 @@ static int __init parse_numa_properties(void) | |||
| 385 | 378 | ||
| 386 | min_common_depth = find_min_common_depth(); | 379 | min_common_depth = find_min_common_depth(); |
| 387 | 380 | ||
| 388 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); | ||
| 389 | if (min_common_depth < 0) | 381 | if (min_common_depth < 0) |
| 390 | return min_common_depth; | 382 | return min_common_depth; |
| 391 | 383 | ||
| 392 | max_domain = numa_setup_cpu(boot_cpuid); | 384 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
| 393 | 385 | ||
| 394 | /* | 386 | /* |
| 395 | * Even though we connect cpus to numa domains later in SMP init, | 387 | * Even though we connect cpus to numa domains later in SMP |
| 396 | * we need to know the maximum node id now. This is because each | 388 | * init, we need to know the node ids now. This is because |
| 397 | * node id must have NODE_DATA etc backing it. | 389 | * each node to be onlined must have NODE_DATA etc backing it. |
| 398 | * As a result of hotplug we could still have cpus appear later on | ||
| 399 | * with larger node ids. In that case we force the cpu into node 0. | ||
| 400 | */ | 390 | */ |
| 401 | for_each_cpu(i) { | 391 | for_each_present_cpu(i) { |
| 402 | int numa_domain; | 392 | int nid; |
| 403 | 393 | ||
| 404 | cpu = find_cpu_node(i); | 394 | cpu = find_cpu_node(i); |
| 395 | BUG_ON(!cpu); | ||
| 396 | nid = of_node_to_nid(cpu); | ||
| 397 | of_node_put(cpu); | ||
| 405 | 398 | ||
| 406 | if (cpu) { | 399 | /* |
| 407 | numa_domain = of_node_numa_domain(cpu); | 400 | * Don't fall back to default_nid yet -- we will plug |
| 408 | of_node_put(cpu); | 401 | * cpus into nodes once the memory scan has discovered |
| 409 | 402 | * the topology. | |
| 410 | if (numa_domain < MAX_NUMNODES && | 403 | */ |
| 411 | max_domain < numa_domain) | 404 | if (nid < 0) |
| 412 | max_domain = numa_domain; | 405 | continue; |
| 413 | } | 406 | node_set_online(nid); |
| 414 | } | 407 | } |
| 415 | 408 | ||
| 416 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); | 409 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
| @@ -418,7 +411,7 @@ static int __init parse_numa_properties(void) | |||
| 418 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { | 411 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { |
| 419 | unsigned long start; | 412 | unsigned long start; |
| 420 | unsigned long size; | 413 | unsigned long size; |
| 421 | int numa_domain; | 414 | int nid; |
| 422 | int ranges; | 415 | int ranges; |
| 423 | unsigned int *memcell_buf; | 416 | unsigned int *memcell_buf; |
| 424 | unsigned int len; | 417 | unsigned int len; |
| @@ -439,18 +432,15 @@ new_range: | |||
| 439 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 432 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
| 440 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 433 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
| 441 | 434 | ||
| 442 | numa_domain = of_node_numa_domain(memory); | 435 | /* |
| 443 | 436 | * Assumption: either all memory nodes or none will | |
| 444 | if (numa_domain >= MAX_NUMNODES) { | 437 | * have associativity properties. If none, then |
| 445 | if (numa_domain != 0xffff) | 438 | * everything goes to default_nid. |
| 446 | printk(KERN_ERR "WARNING: memory at %lx maps " | 439 | */ |
| 447 | "to invalid NUMA node %d\n", start, | 440 | nid = of_node_to_nid(memory); |
| 448 | numa_domain); | 441 | if (nid < 0) |
| 449 | numa_domain = 0; | 442 | nid = default_nid; |
| 450 | } | 443 | node_set_online(nid); |
| 451 | |||
| 452 | if (max_domain < numa_domain) | ||
| 453 | max_domain = numa_domain; | ||
| 454 | 444 | ||
| 455 | if (!(size = numa_enforce_memory_limit(start, size))) { | 445 | if (!(size = numa_enforce_memory_limit(start, size))) { |
| 456 | if (--ranges) | 446 | if (--ranges) |
| @@ -459,16 +449,13 @@ new_range: | |||
| 459 | continue; | 449 | continue; |
| 460 | } | 450 | } |
| 461 | 451 | ||
| 462 | add_region(numa_domain, start >> PAGE_SHIFT, | 452 | add_region(nid, start >> PAGE_SHIFT, |
| 463 | size >> PAGE_SHIFT); | 453 | size >> PAGE_SHIFT); |
| 464 | 454 | ||
| 465 | if (--ranges) | 455 | if (--ranges) |
| 466 | goto new_range; | 456 | goto new_range; |
| 467 | } | 457 | } |
| 468 | 458 | ||
| 469 | for (i = 0; i <= max_domain; i++) | ||
| 470 | node_set_online(i); | ||
| 471 | |||
| 472 | return 0; | 459 | return 0; |
| 473 | } | 460 | } |
| 474 | 461 | ||
| @@ -483,7 +470,6 @@ static void __init setup_nonnuma(void) | |||
| 483 | printk(KERN_INFO "Memory hole size: %ldMB\n", | 470 | printk(KERN_INFO "Memory hole size: %ldMB\n", |
| 484 | (top_of_ram - total_ram) >> 20); | 471 | (top_of_ram - total_ram) >> 20); |
| 485 | 472 | ||
| 486 | map_cpu_to_node(boot_cpuid, 0); | ||
| 487 | for (i = 0; i < lmb.memory.cnt; ++i) | 473 | for (i = 0; i < lmb.memory.cnt; ++i) |
| 488 | add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, | 474 | add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, |
| 489 | lmb_size_pages(&lmb.memory, i)); | 475 | lmb_size_pages(&lmb.memory, i)); |
| @@ -570,11 +556,11 @@ static void __init *careful_allocation(int nid, unsigned long size, | |||
| 570 | unsigned long end_pfn) | 556 | unsigned long end_pfn) |
| 571 | { | 557 | { |
| 572 | int new_nid; | 558 | int new_nid; |
| 573 | unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); | 559 | unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); |
| 574 | 560 | ||
| 575 | /* retry over all memory */ | 561 | /* retry over all memory */ |
| 576 | if (!ret) | 562 | if (!ret) |
| 577 | ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); | 563 | ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); |
| 578 | 564 | ||
| 579 | if (!ret) | 565 | if (!ret) |
| 580 | panic("numa.c: cannot allocate %lu bytes on node %d", | 566 | panic("numa.c: cannot allocate %lu bytes on node %d", |
| @@ -620,6 +606,8 @@ void __init do_init_bootmem(void) | |||
| 620 | dump_numa_memory_topology(); | 606 | dump_numa_memory_topology(); |
| 621 | 607 | ||
| 622 | register_cpu_notifier(&ppc64_numa_nb); | 608 | register_cpu_notifier(&ppc64_numa_nb); |
| 609 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, | ||
| 610 | (void *)(unsigned long)boot_cpuid); | ||
| 623 | 611 | ||
| 624 | for_each_online_node(nid) { | 612 | for_each_online_node(nid) { |
| 625 | unsigned long start_pfn, end_pfn, pages_present; | 613 | unsigned long start_pfn, end_pfn, pages_present; |
| @@ -767,10 +755,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
| 767 | { | 755 | { |
| 768 | struct device_node *memory = NULL; | 756 | struct device_node *memory = NULL; |
| 769 | nodemask_t nodes; | 757 | nodemask_t nodes; |
| 770 | int numa_domain = 0; | 758 | int default_nid = any_online_node(NODE_MASK_ALL); |
| 771 | 759 | ||
| 772 | if (!numa_enabled || (min_common_depth < 0)) | 760 | if (!numa_enabled || (min_common_depth < 0)) |
| 773 | return numa_domain; | 761 | return default_nid; |
| 774 | 762 | ||
| 775 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { | 763 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { |
| 776 | unsigned long start, size; | 764 | unsigned long start, size; |
| @@ -787,15 +775,15 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
| 787 | ha_new_range: | 775 | ha_new_range: |
| 788 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 776 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
| 789 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 777 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
| 790 | numa_domain = of_node_numa_domain(memory); | 778 | nid = of_node_to_nid(memory); |
| 791 | 779 | ||
| 792 | /* Domains not present at boot default to 0 */ | 780 | /* Domains not present at boot default to 0 */ |
| 793 | if (!node_online(numa_domain)) | 781 | if (nid < 0 || !node_online(nid)) |
| 794 | numa_domain = any_online_node(NODE_MASK_ALL); | 782 | nid = default_nid; |
| 795 | 783 | ||
| 796 | if ((scn_addr >= start) && (scn_addr < (start + size))) { | 784 | if ((scn_addr >= start) && (scn_addr < (start + size))) { |
| 797 | of_node_put(memory); | 785 | of_node_put(memory); |
| 798 | goto got_numa_domain; | 786 | goto got_nid; |
| 799 | } | 787 | } |
| 800 | 788 | ||
| 801 | if (--ranges) /* process all ranges in cell */ | 789 | if (--ranges) /* process all ranges in cell */ |
| @@ -804,12 +792,12 @@ ha_new_range: | |||
| 804 | BUG(); /* section address should be found above */ | 792 | BUG(); /* section address should be found above */ |
| 805 | 793 | ||
| 806 | /* Temporary code to ensure that returned node is not empty */ | 794 | /* Temporary code to ensure that returned node is not empty */ |
| 807 | got_numa_domain: | 795 | got_nid: |
| 808 | nodes_setall(nodes); | 796 | nodes_setall(nodes); |
| 809 | while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { | 797 | while (NODE_DATA(nid)->node_spanned_pages == 0) { |
| 810 | node_clear(numa_domain, nodes); | 798 | node_clear(nid, nodes); |
| 811 | numa_domain = any_online_node(nodes); | 799 | nid = any_online_node(nodes); |
| 812 | } | 800 | } |
| 813 | return numa_domain; | 801 | return nid; |
| 814 | } | 802 | } |
| 815 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 803 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
