aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c160
1 files changed, 74 insertions, 86 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 2863a912bcd0..e89b22aa539e 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -129,10 +129,12 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn,
129 *start_pfn = 0; 129 *start_pfn = 0;
130} 130}
131 131
132static inline void map_cpu_to_node(int cpu, int node) 132static void __cpuinit map_cpu_to_node(int cpu, int node)
133{ 133{
134 numa_cpu_lookup_table[cpu] = node; 134 numa_cpu_lookup_table[cpu] = node;
135 135
136 dbg("adding cpu %d to node %d\n", cpu, node);
137
136 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 138 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
137 cpu_set(cpu, numa_cpumask_lookup_table[node]); 139 cpu_set(cpu, numa_cpumask_lookup_table[node]);
138} 140}
@@ -153,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
153} 155}
154#endif /* CONFIG_HOTPLUG_CPU */ 156#endif /* CONFIG_HOTPLUG_CPU */
155 157
156static struct device_node *find_cpu_node(unsigned int cpu) 158static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
157{ 159{
158 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 160 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
159 struct device_node *cpu_node = NULL; 161 struct device_node *cpu_node = NULL;
@@ -189,23 +191,29 @@ static int *of_get_associativity(struct device_node *dev)
189 return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 191 return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
190} 192}
191 193
192static int of_node_numa_domain(struct device_node *device) 194/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
195 * info is found.
196 */
197static int of_node_to_nid(struct device_node *device)
193{ 198{
194 int numa_domain; 199 int nid = -1;
195 unsigned int *tmp; 200 unsigned int *tmp;
196 201
197 if (min_common_depth == -1) 202 if (min_common_depth == -1)
198 return 0; 203 goto out;
199 204
200 tmp = of_get_associativity(device); 205 tmp = of_get_associativity(device);
201 if (tmp && (tmp[0] >= min_common_depth)) { 206 if (!tmp)
202 numa_domain = tmp[min_common_depth]; 207 goto out;
203 } else { 208
204 dbg("WARNING: no NUMA information for %s\n", 209 if (tmp[0] >= min_common_depth)
205 device->full_name); 210 nid = tmp[min_common_depth];
206 numa_domain = 0; 211
207 } 212 /* POWER4 LPAR uses 0xffff as invalid node */
208 return numa_domain; 213 if (nid == 0xffff || nid >= MAX_NUMNODES)
214 nid = -1;
215out:
216 return nid;
209} 217}
210 218
211/* 219/*
@@ -246,8 +254,7 @@ static int __init find_min_common_depth(void)
246 if ((len >= 1) && ref_points) { 254 if ((len >= 1) && ref_points) {
247 depth = ref_points[1]; 255 depth = ref_points[1];
248 } else { 256 } else {
249 dbg("WARNING: could not find NUMA " 257 dbg("NUMA: ibm,associativity-reference-points not found.\n");
250 "associativity reference point\n");
251 depth = -1; 258 depth = -1;
252 } 259 }
253 of_node_put(rtas_root); 260 of_node_put(rtas_root);
@@ -283,9 +290,9 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
283 * Figure out to which domain a cpu belongs and stick it there. 290 * Figure out to which domain a cpu belongs and stick it there.
284 * Return the id of the domain used. 291 * Return the id of the domain used.
285 */ 292 */
286static int numa_setup_cpu(unsigned long lcpu) 293static int __cpuinit numa_setup_cpu(unsigned long lcpu)
287{ 294{
288 int numa_domain = 0; 295 int nid = 0;
289 struct device_node *cpu = find_cpu_node(lcpu); 296 struct device_node *cpu = find_cpu_node(lcpu);
290 297
291 if (!cpu) { 298 if (!cpu) {
@@ -293,27 +300,16 @@ static int numa_setup_cpu(unsigned long lcpu)
293 goto out; 300 goto out;
294 } 301 }
295 302
296 numa_domain = of_node_numa_domain(cpu); 303 nid = of_node_to_nid(cpu);
297 304
298 if (numa_domain >= num_online_nodes()) { 305 if (nid < 0 || !node_online(nid))
299 /* 306 nid = any_online_node(NODE_MASK_ALL);
300 * POWER4 LPAR uses 0xffff as invalid node,
301 * dont warn in this case.
302 */
303 if (numa_domain != 0xffff)
304 printk(KERN_ERR "WARNING: cpu %ld "
305 "maps to invalid NUMA node %d\n",
306 lcpu, numa_domain);
307 numa_domain = 0;
308 }
309out: 307out:
310 node_set_online(numa_domain); 308 map_cpu_to_node(lcpu, nid);
311
312 map_cpu_to_node(lcpu, numa_domain);
313 309
314 of_node_put(cpu); 310 of_node_put(cpu);
315 311
316 return numa_domain; 312 return nid;
317} 313}
318 314
319static int cpu_numa_callback(struct notifier_block *nfb, 315static int cpu_numa_callback(struct notifier_block *nfb,
@@ -325,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb,
325 321
326 switch (action) { 322 switch (action) {
327 case CPU_UP_PREPARE: 323 case CPU_UP_PREPARE:
328 if (min_common_depth == -1 || !numa_enabled) 324 numa_setup_cpu(lcpu);
329 map_cpu_to_node(lcpu, 0);
330 else
331 numa_setup_cpu(lcpu);
332 ret = NOTIFY_OK; 325 ret = NOTIFY_OK;
333 break; 326 break;
334#ifdef CONFIG_HOTPLUG_CPU 327#ifdef CONFIG_HOTPLUG_CPU
@@ -375,7 +368,7 @@ static int __init parse_numa_properties(void)
375{ 368{
376 struct device_node *cpu = NULL; 369 struct device_node *cpu = NULL;
377 struct device_node *memory = NULL; 370 struct device_node *memory = NULL;
378 int max_domain; 371 int default_nid = 0;
379 unsigned long i; 372 unsigned long i;
380 373
381 if (numa_enabled == 0) { 374 if (numa_enabled == 0) {
@@ -385,32 +378,32 @@ static int __init parse_numa_properties(void)
385 378
386 min_common_depth = find_min_common_depth(); 379 min_common_depth = find_min_common_depth();
387 380
388 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
389 if (min_common_depth < 0) 381 if (min_common_depth < 0)
390 return min_common_depth; 382 return min_common_depth;
391 383
392 max_domain = numa_setup_cpu(boot_cpuid); 384 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
393 385
394 /* 386 /*
395 * Even though we connect cpus to numa domains later in SMP init, 387 * Even though we connect cpus to numa domains later in SMP
396 * we need to know the maximum node id now. This is because each 388 * init, we need to know the node ids now. This is because
397 * node id must have NODE_DATA etc backing it. 389 * each node to be onlined must have NODE_DATA etc backing it.
398 * As a result of hotplug we could still have cpus appear later on
399 * with larger node ids. In that case we force the cpu into node 0.
400 */ 390 */
401 for_each_cpu(i) { 391 for_each_present_cpu(i) {
402 int numa_domain; 392 int nid;
403 393
404 cpu = find_cpu_node(i); 394 cpu = find_cpu_node(i);
395 BUG_ON(!cpu);
396 nid = of_node_to_nid(cpu);
397 of_node_put(cpu);
405 398
406 if (cpu) { 399 /*
407 numa_domain = of_node_numa_domain(cpu); 400 * Don't fall back to default_nid yet -- we will plug
408 of_node_put(cpu); 401 * cpus into nodes once the memory scan has discovered
409 402 * the topology.
410 if (numa_domain < MAX_NUMNODES && 403 */
411 max_domain < numa_domain) 404 if (nid < 0)
412 max_domain = numa_domain; 405 continue;
413 } 406 node_set_online(nid);
414 } 407 }
415 408
416 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 409 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -418,7 +411,7 @@ static int __init parse_numa_properties(void)
418 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 411 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
419 unsigned long start; 412 unsigned long start;
420 unsigned long size; 413 unsigned long size;
421 int numa_domain; 414 int nid;
422 int ranges; 415 int ranges;
423 unsigned int *memcell_buf; 416 unsigned int *memcell_buf;
424 unsigned int len; 417 unsigned int len;
@@ -439,18 +432,15 @@ new_range:
439 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 432 start = read_n_cells(n_mem_addr_cells, &memcell_buf);
440 size = read_n_cells(n_mem_size_cells, &memcell_buf); 433 size = read_n_cells(n_mem_size_cells, &memcell_buf);
441 434
442 numa_domain = of_node_numa_domain(memory); 435 /*
443 436 * Assumption: either all memory nodes or none will
444 if (numa_domain >= MAX_NUMNODES) { 437 * have associativity properties. If none, then
445 if (numa_domain != 0xffff) 438 * everything goes to default_nid.
446 printk(KERN_ERR "WARNING: memory at %lx maps " 439 */
447 "to invalid NUMA node %d\n", start, 440 nid = of_node_to_nid(memory);
448 numa_domain); 441 if (nid < 0)
449 numa_domain = 0; 442 nid = default_nid;
450 } 443 node_set_online(nid);
451
452 if (max_domain < numa_domain)
453 max_domain = numa_domain;
454 444
455 if (!(size = numa_enforce_memory_limit(start, size))) { 445 if (!(size = numa_enforce_memory_limit(start, size))) {
456 if (--ranges) 446 if (--ranges)
@@ -459,16 +449,13 @@ new_range:
459 continue; 449 continue;
460 } 450 }
461 451
462 add_region(numa_domain, start >> PAGE_SHIFT, 452 add_region(nid, start >> PAGE_SHIFT,
463 size >> PAGE_SHIFT); 453 size >> PAGE_SHIFT);
464 454
465 if (--ranges) 455 if (--ranges)
466 goto new_range; 456 goto new_range;
467 } 457 }
468 458
469 for (i = 0; i <= max_domain; i++)
470 node_set_online(i);
471
472 return 0; 459 return 0;
473} 460}
474 461
@@ -483,7 +470,6 @@ static void __init setup_nonnuma(void)
483 printk(KERN_INFO "Memory hole size: %ldMB\n", 470 printk(KERN_INFO "Memory hole size: %ldMB\n",
484 (top_of_ram - total_ram) >> 20); 471 (top_of_ram - total_ram) >> 20);
485 472
486 map_cpu_to_node(boot_cpuid, 0);
487 for (i = 0; i < lmb.memory.cnt; ++i) 473 for (i = 0; i < lmb.memory.cnt; ++i)
488 add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 474 add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
489 lmb_size_pages(&lmb.memory, i)); 475 lmb_size_pages(&lmb.memory, i));
@@ -570,11 +556,11 @@ static void __init *careful_allocation(int nid, unsigned long size,
570 unsigned long end_pfn) 556 unsigned long end_pfn)
571{ 557{
572 int new_nid; 558 int new_nid;
573 unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 559 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
574 560
575 /* retry over all memory */ 561 /* retry over all memory */
576 if (!ret) 562 if (!ret)
577 ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); 563 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
578 564
579 if (!ret) 565 if (!ret)
580 panic("numa.c: cannot allocate %lu bytes on node %d", 566 panic("numa.c: cannot allocate %lu bytes on node %d",
@@ -620,6 +606,8 @@ void __init do_init_bootmem(void)
620 dump_numa_memory_topology(); 606 dump_numa_memory_topology();
621 607
622 register_cpu_notifier(&ppc64_numa_nb); 608 register_cpu_notifier(&ppc64_numa_nb);
609 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
610 (void *)(unsigned long)boot_cpuid);
623 611
624 for_each_online_node(nid) { 612 for_each_online_node(nid) {
625 unsigned long start_pfn, end_pfn, pages_present; 613 unsigned long start_pfn, end_pfn, pages_present;
@@ -767,10 +755,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
767{ 755{
768 struct device_node *memory = NULL; 756 struct device_node *memory = NULL;
769 nodemask_t nodes; 757 nodemask_t nodes;
770 int numa_domain = 0; 758 int default_nid = any_online_node(NODE_MASK_ALL);
771 759
772 if (!numa_enabled || (min_common_depth < 0)) 760 if (!numa_enabled || (min_common_depth < 0))
773 return numa_domain; 761 return default_nid;
774 762
775 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 763 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
776 unsigned long start, size; 764 unsigned long start, size;
@@ -787,15 +775,15 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
787ha_new_range: 775ha_new_range:
788 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 776 start = read_n_cells(n_mem_addr_cells, &memcell_buf);
789 size = read_n_cells(n_mem_size_cells, &memcell_buf); 777 size = read_n_cells(n_mem_size_cells, &memcell_buf);
790 numa_domain = of_node_numa_domain(memory); 778 nid = of_node_to_nid(memory);
791 779
792 /* Domains not present at boot default to 0 */ 780 /* Domains not present at boot default to 0 */
793 if (!node_online(numa_domain)) 781 if (nid < 0 || !node_online(nid))
794 numa_domain = any_online_node(NODE_MASK_ALL); 782 nid = default_nid;
795 783
796 if ((scn_addr >= start) && (scn_addr < (start + size))) { 784 if ((scn_addr >= start) && (scn_addr < (start + size))) {
797 of_node_put(memory); 785 of_node_put(memory);
798 goto got_numa_domain; 786 goto got_nid;
799 } 787 }
800 788
801 if (--ranges) /* process all ranges in cell */ 789 if (--ranges) /* process all ranges in cell */
@@ -804,12 +792,12 @@ ha_new_range:
804 BUG(); /* section address should be found above */ 792 BUG(); /* section address should be found above */
805 793
806 /* Temporary code to ensure that returned node is not empty */ 794 /* Temporary code to ensure that returned node is not empty */
807got_numa_domain: 795got_nid:
808 nodes_setall(nodes); 796 nodes_setall(nodes);
809 while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { 797 while (NODE_DATA(nid)->node_spanned_pages == 0) {
810 node_clear(numa_domain, nodes); 798 node_clear(nid, nodes);
811 numa_domain = any_online_node(nodes); 799 nid = any_online_node(nodes);
812 } 800 }
813 return numa_domain; 801 return nid;
814} 802}
815#endif /* CONFIG_MEMORY_HOTPLUG */ 803#endif /* CONFIG_MEMORY_HOTPLUG */