aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNathan Lynch <nathanl@austin.ibm.com>2006-03-20 19:36:45 -0500
committerPaul Mackerras <paulus@samba.org>2006-03-21 23:04:01 -0500
commit482ec7c403d239bb4f1732faf9a14988094ce08b (patch)
tree9abc78e92b304e5ee09428db0cb8922cc86aa213
parentbc16a75926941094db6b42d76014abb5e8d3a910 (diff)
[PATCH] powerpc numa: Support sparse online node map
The powerpc numa code unconditionally onlines all nodes from 0 to the highest node id found, regardless of whether cpus or memory are present in the nodes. This wastes 8K per node and complicates some cpu and memory hotplug situations, such as adding a resource that doesn't map to one of the nodes discovered at boot. Set nodes online as resources are scanned. Fall back to node 0 only when we're sure this isn't a NUMA machine. Instead of defaulting to node 0 for cases of hot-adding a resource which doesn't belong to any initialized node, assign it to the first online node. Signed-off-by: Nathan Lynch <nathanl@austin.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/mm/numa.c95
1 files changed, 43 insertions, 52 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index dd611ef8df7a..7d6ebe3c3b9b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -191,27 +191,28 @@ static int *of_get_associativity(struct device_node *dev)
191 return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 191 return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
192} 192}
193 193
194/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
195 * info is found.
196 */
194static int of_node_to_nid(struct device_node *device) 197static int of_node_to_nid(struct device_node *device)
195{ 198{
196 int nid; 199 int nid = -1;
197 unsigned int *tmp; 200 unsigned int *tmp;
198 201
199 if (min_common_depth == -1) 202 if (min_common_depth == -1)
200 return 0; 203 goto out;
201 204
202 tmp = of_get_associativity(device); 205 tmp = of_get_associativity(device);
203 if (tmp && (tmp[0] >= min_common_depth)) { 206 if (!tmp)
207 goto out;
208
209 if (tmp[0] >= min_common_depth)
204 nid = tmp[min_common_depth]; 210 nid = tmp[min_common_depth];
205 } else {
206 dbg("WARNING: no NUMA information for %s\n",
207 device->full_name);
208 nid = 0;
209 }
210 211
211 /* POWER4 LPAR uses 0xffff as invalid node */ 212 /* POWER4 LPAR uses 0xffff as invalid node */
212 if (nid == 0xffff) 213 if (nid == 0xffff || nid >= MAX_NUMNODES)
213 nid = 0; 214 nid = -1;
214 215out:
215 return nid; 216 return nid;
216} 217}
217 218
@@ -301,15 +302,9 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
301 302
302 nid = of_node_to_nid(cpu); 303 nid = of_node_to_nid(cpu);
303 304
304 if (nid >= num_online_nodes()) { 305 if (nid < 0 || !node_online(nid))
305 printk(KERN_ERR "WARNING: cpu %ld " 306 nid = any_online_node(NODE_MASK_ALL);
306 "maps to invalid NUMA node %d\n",
307 lcpu, nid);
308 nid = 0;
309 }
310out: 307out:
311 node_set_online(nid);
312
313 map_cpu_to_node(lcpu, nid); 308 map_cpu_to_node(lcpu, nid);
314 309
315 of_node_put(cpu); 310 of_node_put(cpu);
@@ -376,7 +371,7 @@ static int __init parse_numa_properties(void)
376{ 371{
377 struct device_node *cpu = NULL; 372 struct device_node *cpu = NULL;
378 struct device_node *memory = NULL; 373 struct device_node *memory = NULL;
379 int max_domain = 0; 374 int default_nid = 0;
380 unsigned long i; 375 unsigned long i;
381 376
382 if (numa_enabled == 0) { 377 if (numa_enabled == 0) {
@@ -392,25 +387,26 @@ static int __init parse_numa_properties(void)
392 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 387 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
393 388
394 /* 389 /*
395 * Even though we connect cpus to numa domains later in SMP init, 390 * Even though we connect cpus to numa domains later in SMP
396 * we need to know the maximum node id now. This is because each 391 * init, we need to know the node ids now. This is because
397 * node id must have NODE_DATA etc backing it. 392 * each node to be onlined must have NODE_DATA etc backing it.
398 * As a result of hotplug we could still have cpus appear later on
399 * with larger node ids. In that case we force the cpu into node 0.
400 */ 393 */
401 for_each_cpu(i) { 394 for_each_present_cpu(i) {
402 int nid; 395 int nid;
403 396
404 cpu = find_cpu_node(i); 397 cpu = find_cpu_node(i);
398 BUG_ON(!cpu);
399 nid = of_node_to_nid(cpu);
400 of_node_put(cpu);
405 401
406 if (cpu) { 402 /*
407 nid = of_node_to_nid(cpu); 403 * Don't fall back to default_nid yet -- we will plug
408 of_node_put(cpu); 404 * cpus into nodes once the memory scan has discovered
409 405 * the topology.
410 if (nid < MAX_NUMNODES && 406 */
411 max_domain < nid) 407 if (nid < 0)
412 max_domain = nid; 408 continue;
413 } 409 node_set_online(nid);
414 } 410 }
415 411
416 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 412 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -439,17 +435,15 @@ new_range:
439 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 435 start = read_n_cells(n_mem_addr_cells, &memcell_buf);
440 size = read_n_cells(n_mem_size_cells, &memcell_buf); 436 size = read_n_cells(n_mem_size_cells, &memcell_buf);
441 437
438 /*
439 * Assumption: either all memory nodes or none will
440 * have associativity properties. If none, then
441 * everything goes to default_nid.
442 */
442 nid = of_node_to_nid(memory); 443 nid = of_node_to_nid(memory);
443 444 if (nid < 0)
444 if (nid >= MAX_NUMNODES) { 445 nid = default_nid;
445 printk(KERN_ERR "WARNING: memory at %lx maps " 446 node_set_online(nid);
446 "to invalid NUMA node %d\n", start,
447 nid);
448 nid = 0;
449 }
450
451 if (max_domain < nid)
452 max_domain = nid;
453 447
454 if (!(size = numa_enforce_memory_limit(start, size))) { 448 if (!(size = numa_enforce_memory_limit(start, size))) {
455 if (--ranges) 449 if (--ranges)
@@ -465,10 +459,7 @@ new_range:
465 goto new_range; 459 goto new_range;
466 } 460 }
467 461
468 for (i = 0; i <= max_domain; i++) 462 numa_setup_cpu(boot_cpuid);
469 node_set_online(i);
470
471 max_domain = numa_setup_cpu(boot_cpuid);
472 463
473 return 0; 464 return 0;
474} 465}
@@ -768,10 +759,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
768{ 759{
769 struct device_node *memory = NULL; 760 struct device_node *memory = NULL;
770 nodemask_t nodes; 761 nodemask_t nodes;
771 int nid = 0; 762 int default_nid = any_online_node(NODE_MASK_ALL);
772 763
773 if (!numa_enabled || (min_common_depth < 0)) 764 if (!numa_enabled || (min_common_depth < 0))
774 return nid; 765 return default_nid;
775 766
776 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 767 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
777 unsigned long start, size; 768 unsigned long start, size;
@@ -791,8 +782,8 @@ ha_new_range:
791 nid = of_node_to_nid(memory); 782 nid = of_node_to_nid(memory);
792 783
793 /* Domains not present at boot default to 0 */ 784 /* Domains not present at boot default to 0 */
794 if (!node_online(nid)) 785 if (nid < 0 || !node_online(nid))
795 nid = any_online_node(NODE_MASK_ALL); 786 nid = default_nid;
796 787
797 if ((scn_addr >= start) && (scn_addr < (start + size))) { 788 if ((scn_addr >= start) && (scn_addr < (start + size))) {
798 of_node_put(memory); 789 of_node_put(memory);