aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-05-16 16:22:31 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-07-08 21:28:35 -0400
commit41eab6f88f24124df89e38067b3766b7bef06ddb (patch)
tree41ec35970c76adbba1558b6243d80be669062136
parenta591f6b56d6fbd7d1951e352fe5b0acf6b91e497 (diff)
powerpc/numa: Use form 1 affinity to setup node distance
Form 1 affinity allows multiple entries in ibm,associativity-reference-points which represent affinity domains in decreasing order of importance. The Linux concept of a node is always the first entry, but using the other values as an input to node_distance() allows the memory allocator to make better decisions on which node to go first when local memory has been exhausted. We keep things simple and create an array indexed by NUMA node, capped at 4 entries. Each time we lookup an associativity property we initialise the array which is overkill, but since we should only hit this path during boot it didn't seem worth adding a per node valid bit. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/topology.h3
-rw-r--r--arch/powerpc/mm/numa.c122
2 files changed, 92 insertions, 33 deletions
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 32adf728072..3033c1b3074 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -87,6 +87,9 @@ static inline int pcibus_to_node(struct pci_bus *bus)
87 .balance_interval = 1, \ 87 .balance_interval = 1, \
88} 88}
89 89
90extern int __node_distance(int, int);
91#define node_distance(a, b) __node_distance(a, b)
92
90extern void __init dump_numa_cpu_topology(void); 93extern void __init dump_numa_cpu_topology(void);
91 94
92extern int sysfs_add_device_to_node(struct sys_device *dev, int nid); 95extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 80d110635d2..f78f19e0a2a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data);
42 42
43static int min_common_depth; 43static int min_common_depth;
44static int n_mem_addr_cells, n_mem_size_cells; 44static int n_mem_addr_cells, n_mem_size_cells;
45static int form1_affinity;
46
47#define MAX_DISTANCE_REF_POINTS 4
48static int distance_ref_points_depth;
49static const unsigned int *distance_ref_points;
50static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
45 51
46/* 52/*
47 * Allocate node_to_cpumask_map based on number of available nodes 53 * Allocate node_to_cpumask_map based on number of available nodes
@@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory)
204 return prop; 210 return prop;
205} 211}
206 212
213int __node_distance(int a, int b)
214{
215 int i;
216 int distance = LOCAL_DISTANCE;
217
218 if (!form1_affinity)
219 return distance;
220
221 for (i = 0; i < distance_ref_points_depth; i++) {
222 if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
223 break;
224
225 /* Double the distance for each NUMA level */
226 distance *= 2;
227 }
228
229 return distance;
230}
231
232static void initialize_distance_lookup_table(int nid,
233 const unsigned int *associativity)
234{
235 int i;
236
237 if (!form1_affinity)
238 return;
239
240 for (i = 0; i < distance_ref_points_depth; i++) {
241 distance_lookup_table[nid][i] =
242 associativity[distance_ref_points[i]];
243 }
244}
245
207/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 246/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
208 * info is found. 247 * info is found.
209 */ 248 */
@@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device)
225 /* POWER4 LPAR uses 0xffff as invalid node */ 264 /* POWER4 LPAR uses 0xffff as invalid node */
226 if (nid == 0xffff || nid >= MAX_NUMNODES) 265 if (nid == 0xffff || nid >= MAX_NUMNODES)
227 nid = -1; 266 nid = -1;
267
268 if (nid > 0 && tmp[0] >= distance_ref_points_depth)
269 initialize_distance_lookup_table(nid, tmp);
270
228out: 271out:
229 return nid; 272 return nid;
230} 273}
@@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device)
251} 294}
252EXPORT_SYMBOL_GPL(of_node_to_nid); 295EXPORT_SYMBOL_GPL(of_node_to_nid);
253 296
254/*
255 * In theory, the "ibm,associativity" property may contain multiple
256 * associativity lists because a resource may be multiply connected
257 * into the machine. This resource then has different associativity
258 * characteristics relative to its multiple connections. We ignore
259 * this for now. We also assume that all cpu and memory sets have
260 * their distances represented at a common level. This won't be
261 * true for hierarchical NUMA.
262 *
263 * In any case the ibm,associativity-reference-points should give
264 * the correct depth for a normal NUMA system.
265 *
266 * - Dave Hansen <haveblue@us.ibm.com>
267 */
268static int __init find_min_common_depth(void) 297static int __init find_min_common_depth(void)
269{ 298{
270 int depth, index; 299 int depth;
271 const unsigned int *ref_points;
272 struct device_node *rtas_root; 300 struct device_node *rtas_root;
273 unsigned int len;
274 struct device_node *chosen; 301 struct device_node *chosen;
275 const char *vec5; 302 const char *vec5;
276 303
@@ -280,18 +307,28 @@ static int __init find_min_common_depth(void)
280 return -1; 307 return -1;
281 308
282 /* 309 /*
283 * this property is 2 32-bit integers, each representing a level of 310 * This property is a set of 32-bit integers, each representing
284 * depth in the associativity nodes. The first is for an SMP 311 * an index into the ibm,associativity nodes.
285 * configuration (should be all 0's) and the second is for a normal 312 *
286 * NUMA configuration. 313 * With form 0 affinity the first integer is for an SMP configuration
314 * (should be all 0's) and the second is for a normal NUMA
315 * configuration. We have only one level of NUMA.
316 *
317 * With form 1 affinity the first integer is the most significant
318 * NUMA boundary and the following are progressively less significant
319 * boundaries. There can be more than one level of NUMA.
287 */ 320 */
288 index = 1; 321 distance_ref_points = of_get_property(rtas_root,
289 ref_points = of_get_property(rtas_root, 322 "ibm,associativity-reference-points",
290 "ibm,associativity-reference-points", &len); 323 &distance_ref_points_depth);
324
325 if (!distance_ref_points) {
326 dbg("NUMA: ibm,associativity-reference-points not found.\n");
327 goto err;
328 }
329
330 distance_ref_points_depth /= sizeof(int);
291 331
292 /*
293 * For form 1 affinity information we want the first field
294 */
295#define VEC5_AFFINITY_BYTE 5 332#define VEC5_AFFINITY_BYTE 5
296#define VEC5_AFFINITY 0x80 333#define VEC5_AFFINITY 0x80
297 chosen = of_find_node_by_path("/chosen"); 334 chosen = of_find_node_by_path("/chosen");
@@ -299,19 +336,38 @@ static int __init find_min_common_depth(void)
299 vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); 336 vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
300 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { 337 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
301 dbg("Using form 1 affinity\n"); 338 dbg("Using form 1 affinity\n");
302 index = 0; 339 form1_affinity = 1;
303 } 340 }
304 } 341 }
305 342
306 if ((len >= 2 * sizeof(unsigned int)) && ref_points) { 343 if (form1_affinity) {
307 depth = ref_points[index]; 344 depth = distance_ref_points[0];
308 } else { 345 } else {
309 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 346 if (distance_ref_points_depth < 2) {
310 depth = -1; 347 printk(KERN_WARNING "NUMA: "
348 "short ibm,associativity-reference-points\n");
349 goto err;
350 }
351
352 depth = distance_ref_points[1];
311 } 353 }
312 of_node_put(rtas_root);
313 354
355 /*
356 * Warn and cap if the hardware supports more than
357 * MAX_DISTANCE_REF_POINTS domains.
358 */
359 if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
360 printk(KERN_WARNING "NUMA: distance array capped at "
361 "%d entries\n", MAX_DISTANCE_REF_POINTS);
362 distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
363 }
364
365 of_node_put(rtas_root);
314 return depth; 366 return depth;
367
368err:
369 of_node_put(rtas_root);
370 return -1;
315} 371}
316 372
317static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 373static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)