diff options
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r-- | arch/powerpc/mm/numa.c | 162 |
1 files changed, 76 insertions, 86 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2863a912bcd0..0a335f34974c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -129,10 +129,12 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn, | |||
129 | *start_pfn = 0; | 129 | *start_pfn = 0; |
130 | } | 130 | } |
131 | 131 | ||
132 | static inline void map_cpu_to_node(int cpu, int node) | 132 | static void __cpuinit map_cpu_to_node(int cpu, int node) |
133 | { | 133 | { |
134 | numa_cpu_lookup_table[cpu] = node; | 134 | numa_cpu_lookup_table[cpu] = node; |
135 | 135 | ||
136 | dbg("adding cpu %d to node %d\n", cpu, node); | ||
137 | |||
136 | if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) | 138 | if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) |
137 | cpu_set(cpu, numa_cpumask_lookup_table[node]); | 139 | cpu_set(cpu, numa_cpumask_lookup_table[node]); |
138 | } | 140 | } |
@@ -153,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu) | |||
153 | } | 155 | } |
154 | #endif /* CONFIG_HOTPLUG_CPU */ | 156 | #endif /* CONFIG_HOTPLUG_CPU */ |
155 | 157 | ||
156 | static struct device_node *find_cpu_node(unsigned int cpu) | 158 | static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) |
157 | { | 159 | { |
158 | unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); | 160 | unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); |
159 | struct device_node *cpu_node = NULL; | 161 | struct device_node *cpu_node = NULL; |
@@ -189,23 +191,29 @@ static int *of_get_associativity(struct device_node *dev) | |||
189 | return (unsigned int *)get_property(dev, "ibm,associativity", NULL); | 191 | return (unsigned int *)get_property(dev, "ibm,associativity", NULL); |
190 | } | 192 | } |
191 | 193 | ||
192 | static int of_node_numa_domain(struct device_node *device) | 194 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
195 | * info is found. | ||
196 | */ | ||
197 | static int of_node_to_nid(struct device_node *device) | ||
193 | { | 198 | { |
194 | int numa_domain; | 199 | int nid = -1; |
195 | unsigned int *tmp; | 200 | unsigned int *tmp; |
196 | 201 | ||
197 | if (min_common_depth == -1) | 202 | if (min_common_depth == -1) |
198 | return 0; | 203 | goto out; |
199 | 204 | ||
200 | tmp = of_get_associativity(device); | 205 | tmp = of_get_associativity(device); |
201 | if (tmp && (tmp[0] >= min_common_depth)) { | 206 | if (!tmp) |
202 | numa_domain = tmp[min_common_depth]; | 207 | goto out; |
203 | } else { | 208 | |
204 | dbg("WARNING: no NUMA information for %s\n", | 209 | if (tmp[0] >= min_common_depth) |
205 | device->full_name); | 210 | nid = tmp[min_common_depth]; |
206 | numa_domain = 0; | 211 | |
207 | } | 212 | /* POWER4 LPAR uses 0xffff as invalid node */ |
208 | return numa_domain; | 213 | if (nid == 0xffff || nid >= MAX_NUMNODES) |
214 | nid = -1; | ||
215 | out: | ||
216 | return nid; | ||
209 | } | 217 | } |
210 | 218 | ||
211 | /* | 219 | /* |
@@ -246,8 +254,7 @@ static int __init find_min_common_depth(void) | |||
246 | if ((len >= 1) && ref_points) { | 254 | if ((len >= 1) && ref_points) { |
247 | depth = ref_points[1]; | 255 | depth = ref_points[1]; |
248 | } else { | 256 | } else { |
249 | dbg("WARNING: could not find NUMA " | 257 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); |
250 | "associativity reference point\n"); | ||
251 | depth = -1; | 258 | depth = -1; |
252 | } | 259 | } |
253 | of_node_put(rtas_root); | 260 | of_node_put(rtas_root); |
@@ -283,9 +290,9 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf) | |||
283 | * Figure out to which domain a cpu belongs and stick it there. | 290 | * Figure out to which domain a cpu belongs and stick it there. |
284 | * Return the id of the domain used. | 291 | * Return the id of the domain used. |
285 | */ | 292 | */ |
286 | static int numa_setup_cpu(unsigned long lcpu) | 293 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) |
287 | { | 294 | { |
288 | int numa_domain = 0; | 295 | int nid = 0; |
289 | struct device_node *cpu = find_cpu_node(lcpu); | 296 | struct device_node *cpu = find_cpu_node(lcpu); |
290 | 297 | ||
291 | if (!cpu) { | 298 | if (!cpu) { |
@@ -293,27 +300,16 @@ static int numa_setup_cpu(unsigned long lcpu) | |||
293 | goto out; | 300 | goto out; |
294 | } | 301 | } |
295 | 302 | ||
296 | numa_domain = of_node_numa_domain(cpu); | 303 | nid = of_node_to_nid(cpu); |
297 | 304 | ||
298 | if (numa_domain >= num_online_nodes()) { | 305 | if (nid < 0 || !node_online(nid)) |
299 | /* | 306 | nid = any_online_node(NODE_MASK_ALL); |
300 | * POWER4 LPAR uses 0xffff as invalid node, | ||
301 | * dont warn in this case. | ||
302 | */ | ||
303 | if (numa_domain != 0xffff) | ||
304 | printk(KERN_ERR "WARNING: cpu %ld " | ||
305 | "maps to invalid NUMA node %d\n", | ||
306 | lcpu, numa_domain); | ||
307 | numa_domain = 0; | ||
308 | } | ||
309 | out: | 307 | out: |
310 | node_set_online(numa_domain); | 308 | map_cpu_to_node(lcpu, nid); |
311 | |||
312 | map_cpu_to_node(lcpu, numa_domain); | ||
313 | 309 | ||
314 | of_node_put(cpu); | 310 | of_node_put(cpu); |
315 | 311 | ||
316 | return numa_domain; | 312 | return nid; |
317 | } | 313 | } |
318 | 314 | ||
319 | static int cpu_numa_callback(struct notifier_block *nfb, | 315 | static int cpu_numa_callback(struct notifier_block *nfb, |
@@ -325,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb, | |||
325 | 321 | ||
326 | switch (action) { | 322 | switch (action) { |
327 | case CPU_UP_PREPARE: | 323 | case CPU_UP_PREPARE: |
328 | if (min_common_depth == -1 || !numa_enabled) | 324 | numa_setup_cpu(lcpu); |
329 | map_cpu_to_node(lcpu, 0); | ||
330 | else | ||
331 | numa_setup_cpu(lcpu); | ||
332 | ret = NOTIFY_OK; | 325 | ret = NOTIFY_OK; |
333 | break; | 326 | break; |
334 | #ifdef CONFIG_HOTPLUG_CPU | 327 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -375,7 +368,7 @@ static int __init parse_numa_properties(void) | |||
375 | { | 368 | { |
376 | struct device_node *cpu = NULL; | 369 | struct device_node *cpu = NULL; |
377 | struct device_node *memory = NULL; | 370 | struct device_node *memory = NULL; |
378 | int max_domain; | 371 | int default_nid = 0; |
379 | unsigned long i; | 372 | unsigned long i; |
380 | 373 | ||
381 | if (numa_enabled == 0) { | 374 | if (numa_enabled == 0) { |
@@ -385,32 +378,32 @@ static int __init parse_numa_properties(void) | |||
385 | 378 | ||
386 | min_common_depth = find_min_common_depth(); | 379 | min_common_depth = find_min_common_depth(); |
387 | 380 | ||
388 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); | ||
389 | if (min_common_depth < 0) | 381 | if (min_common_depth < 0) |
390 | return min_common_depth; | 382 | return min_common_depth; |
391 | 383 | ||
392 | max_domain = numa_setup_cpu(boot_cpuid); | 384 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
393 | 385 | ||
394 | /* | 386 | /* |
395 | * Even though we connect cpus to numa domains later in SMP init, | 387 | * Even though we connect cpus to numa domains later in SMP |
396 | * we need to know the maximum node id now. This is because each | 388 | * init, we need to know the node ids now. This is because |
397 | * node id must have NODE_DATA etc backing it. | 389 | * each node to be onlined must have NODE_DATA etc backing it. |
398 | * As a result of hotplug we could still have cpus appear later on | ||
399 | * with larger node ids. In that case we force the cpu into node 0. | ||
400 | */ | 390 | */ |
401 | for_each_cpu(i) { | 391 | for_each_present_cpu(i) { |
402 | int numa_domain; | 392 | int nid; |
403 | 393 | ||
404 | cpu = find_cpu_node(i); | 394 | cpu = find_cpu_node(i); |
395 | BUG_ON(!cpu); | ||
396 | nid = of_node_to_nid(cpu); | ||
397 | of_node_put(cpu); | ||
405 | 398 | ||
406 | if (cpu) { | 399 | /* |
407 | numa_domain = of_node_numa_domain(cpu); | 400 | * Don't fall back to default_nid yet -- we will plug |
408 | of_node_put(cpu); | 401 | * cpus into nodes once the memory scan has discovered |
409 | 402 | * the topology. | |
410 | if (numa_domain < MAX_NUMNODES && | 403 | */ |
411 | max_domain < numa_domain) | 404 | if (nid < 0) |
412 | max_domain = numa_domain; | 405 | continue; |
413 | } | 406 | node_set_online(nid); |
414 | } | 407 | } |
415 | 408 | ||
416 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); | 409 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
@@ -418,7 +411,7 @@ static int __init parse_numa_properties(void) | |||
418 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { | 411 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { |
419 | unsigned long start; | 412 | unsigned long start; |
420 | unsigned long size; | 413 | unsigned long size; |
421 | int numa_domain; | 414 | int nid; |
422 | int ranges; | 415 | int ranges; |
423 | unsigned int *memcell_buf; | 416 | unsigned int *memcell_buf; |
424 | unsigned int len; | 417 | unsigned int len; |
@@ -439,18 +432,15 @@ new_range: | |||
439 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 432 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
440 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 433 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
441 | 434 | ||
442 | numa_domain = of_node_numa_domain(memory); | 435 | /* |
443 | 436 | * Assumption: either all memory nodes or none will | |
444 | if (numa_domain >= MAX_NUMNODES) { | 437 | * have associativity properties. If none, then |
445 | if (numa_domain != 0xffff) | 438 | * everything goes to default_nid. |
446 | printk(KERN_ERR "WARNING: memory at %lx maps " | 439 | */ |
447 | "to invalid NUMA node %d\n", start, | 440 | nid = of_node_to_nid(memory); |
448 | numa_domain); | 441 | if (nid < 0) |
449 | numa_domain = 0; | 442 | nid = default_nid; |
450 | } | 443 | node_set_online(nid); |
451 | |||
452 | if (max_domain < numa_domain) | ||
453 | max_domain = numa_domain; | ||
454 | 444 | ||
455 | if (!(size = numa_enforce_memory_limit(start, size))) { | 445 | if (!(size = numa_enforce_memory_limit(start, size))) { |
456 | if (--ranges) | 446 | if (--ranges) |
@@ -459,16 +449,13 @@ new_range: | |||
459 | continue; | 449 | continue; |
460 | } | 450 | } |
461 | 451 | ||
462 | add_region(numa_domain, start >> PAGE_SHIFT, | 452 | add_region(nid, start >> PAGE_SHIFT, |
463 | size >> PAGE_SHIFT); | 453 | size >> PAGE_SHIFT); |
464 | 454 | ||
465 | if (--ranges) | 455 | if (--ranges) |
466 | goto new_range; | 456 | goto new_range; |
467 | } | 457 | } |
468 | 458 | ||
469 | for (i = 0; i <= max_domain; i++) | ||
470 | node_set_online(i); | ||
471 | |||
472 | return 0; | 459 | return 0; |
473 | } | 460 | } |
474 | 461 | ||
@@ -483,7 +470,6 @@ static void __init setup_nonnuma(void) | |||
483 | printk(KERN_INFO "Memory hole size: %ldMB\n", | 470 | printk(KERN_INFO "Memory hole size: %ldMB\n", |
484 | (top_of_ram - total_ram) >> 20); | 471 | (top_of_ram - total_ram) >> 20); |
485 | 472 | ||
486 | map_cpu_to_node(boot_cpuid, 0); | ||
487 | for (i = 0; i < lmb.memory.cnt; ++i) | 473 | for (i = 0; i < lmb.memory.cnt; ++i) |
488 | add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, | 474 | add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, |
489 | lmb_size_pages(&lmb.memory, i)); | 475 | lmb_size_pages(&lmb.memory, i)); |
@@ -570,11 +556,11 @@ static void __init *careful_allocation(int nid, unsigned long size, | |||
570 | unsigned long end_pfn) | 556 | unsigned long end_pfn) |
571 | { | 557 | { |
572 | int new_nid; | 558 | int new_nid; |
573 | unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); | 559 | unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); |
574 | 560 | ||
575 | /* retry over all memory */ | 561 | /* retry over all memory */ |
576 | if (!ret) | 562 | if (!ret) |
577 | ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); | 563 | ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); |
578 | 564 | ||
579 | if (!ret) | 565 | if (!ret) |
580 | panic("numa.c: cannot allocate %lu bytes on node %d", | 566 | panic("numa.c: cannot allocate %lu bytes on node %d", |
@@ -620,6 +606,8 @@ void __init do_init_bootmem(void) | |||
620 | dump_numa_memory_topology(); | 606 | dump_numa_memory_topology(); |
621 | 607 | ||
622 | register_cpu_notifier(&ppc64_numa_nb); | 608 | register_cpu_notifier(&ppc64_numa_nb); |
609 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, | ||
610 | (void *)(unsigned long)boot_cpuid); | ||
623 | 611 | ||
624 | for_each_online_node(nid) { | 612 | for_each_online_node(nid) { |
625 | unsigned long start_pfn, end_pfn, pages_present; | 613 | unsigned long start_pfn, end_pfn, pages_present; |
@@ -767,10 +755,11 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
767 | { | 755 | { |
768 | struct device_node *memory = NULL; | 756 | struct device_node *memory = NULL; |
769 | nodemask_t nodes; | 757 | nodemask_t nodes; |
770 | int numa_domain = 0; | 758 | int default_nid = any_online_node(NODE_MASK_ALL); |
759 | int nid; | ||
771 | 760 | ||
772 | if (!numa_enabled || (min_common_depth < 0)) | 761 | if (!numa_enabled || (min_common_depth < 0)) |
773 | return numa_domain; | 762 | return default_nid; |
774 | 763 | ||
775 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { | 764 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { |
776 | unsigned long start, size; | 765 | unsigned long start, size; |
@@ -787,29 +776,30 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
787 | ha_new_range: | 776 | ha_new_range: |
788 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 777 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
789 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 778 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
790 | numa_domain = of_node_numa_domain(memory); | 779 | nid = of_node_to_nid(memory); |
791 | 780 | ||
792 | /* Domains not present at boot default to 0 */ | 781 | /* Domains not present at boot default to 0 */ |
793 | if (!node_online(numa_domain)) | 782 | if (nid < 0 || !node_online(nid)) |
794 | numa_domain = any_online_node(NODE_MASK_ALL); | 783 | nid = default_nid; |
795 | 784 | ||
796 | if ((scn_addr >= start) && (scn_addr < (start + size))) { | 785 | if ((scn_addr >= start) && (scn_addr < (start + size))) { |
797 | of_node_put(memory); | 786 | of_node_put(memory); |
798 | goto got_numa_domain; | 787 | goto got_nid; |
799 | } | 788 | } |
800 | 789 | ||
801 | if (--ranges) /* process all ranges in cell */ | 790 | if (--ranges) /* process all ranges in cell */ |
802 | goto ha_new_range; | 791 | goto ha_new_range; |
803 | } | 792 | } |
804 | BUG(); /* section address should be found above */ | 793 | BUG(); /* section address should be found above */ |
794 | return 0; | ||
805 | 795 | ||
806 | /* Temporary code to ensure that returned node is not empty */ | 796 | /* Temporary code to ensure that returned node is not empty */ |
807 | got_numa_domain: | 797 | got_nid: |
808 | nodes_setall(nodes); | 798 | nodes_setall(nodes); |
809 | while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { | 799 | while (NODE_DATA(nid)->node_spanned_pages == 0) { |
810 | node_clear(numa_domain, nodes); | 800 | node_clear(nid, nodes); |
811 | numa_domain = any_online_node(nodes); | 801 | nid = any_online_node(nodes); |
812 | } | 802 | } |
813 | return numa_domain; | 803 | return nid; |
814 | } | 804 | } |
815 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 805 | #endif /* CONFIG_MEMORY_HOTPLUG */ |