aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:09 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:09 -0500
commitac7136b611ee8f8bd6231ce2e1dbdd31ae3d39bc (patch)
tree41f6809ba694a723b521573c2e8d34c652c31281 /arch/x86/mm
parent4697bdcc945c094d2c8a4876a24faeaf31a283e0 (diff)
x86-64, NUMA: Implement generic node distance handling
Node distance either used direct node comparison, ACPI PXM comparison or ACPI SLIT table lookup. This patch implements generic node distance handling. NUMA init methods can call numa_set_distance() to set distance between nodes and the common __node_distance() implementation will report the set distance. Due to the way NUMA emulation is implemented, the generic node distance handling is used only when emulation is not used. Later patches will update NUMA emulation to use the generic distance mechanism. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/numa_64.c95
-rw-r--r--arch/x86/mm/srat_64.c27
2 files changed, 106 insertions, 16 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8b1f178a866e..a3621f2953d6 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -45,6 +45,13 @@ static unsigned long __initdata nodemap_size;
45 45
46static struct numa_meminfo numa_meminfo __initdata; 46static struct numa_meminfo numa_meminfo __initdata;
47 47
48static int numa_distance_cnt;
49static u8 *numa_distance;
50
51#ifdef CONFIG_NUMA_EMU
52static bool numa_emu_dist;
53#endif
54
48/* 55/*
49 * Given a shift value, try to populate memnodemap[] 56 * Given a shift value, try to populate memnodemap[]
50 * Returns : 57 * Returns :
@@ -357,6 +364,92 @@ static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
357} 364}
358 365
359/* 366/*
367 * Reset distance table. The current table is freed. The next
368 * numa_set_distance() call will create a new one.
369 */
370static void __init numa_reset_distance(void)
371{
372 size_t size;
373
374 size = numa_distance_cnt * sizeof(numa_distance[0]);
375 memblock_x86_free_range(__pa(numa_distance),
376 __pa(numa_distance) + size);
377 numa_distance = NULL;
378 numa_distance_cnt = 0;
379}
380
381/*
382 * Set the distance between node @from to @to to @distance. If distance
383 * table doesn't exist, one which is large enough to accomodate all the
384 * currently known nodes will be created.
385 */
386void __init numa_set_distance(int from, int to, int distance)
387{
388 if (!numa_distance) {
389 nodemask_t nodes_parsed;
390 size_t size;
391 int i, j, cnt = 0;
392 u64 phys;
393
394 /* size the new table and allocate it */
395 nodes_parsed = numa_nodes_parsed;
396 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
397
398 for_each_node_mask(i, nodes_parsed)
399 cnt = i;
400 size = ++cnt * sizeof(numa_distance[0]);
401
402 phys = memblock_find_in_range(0,
403 (u64)max_pfn_mapped << PAGE_SHIFT,
404 size, PAGE_SIZE);
405 if (phys == MEMBLOCK_ERROR) {
406 pr_warning("NUMA: Warning: can't allocate distance table!\n");
407 /* don't retry until explicitly reset */
408 numa_distance = (void *)1LU;
409 return;
410 }
411 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
412
413 numa_distance = __va(phys);
414 numa_distance_cnt = cnt;
415
416 /* fill with the default distances */
417 for (i = 0; i < cnt; i++)
418 for (j = 0; j < cnt; j++)
419 numa_distance[i * cnt + j] = i == j ?
420 LOCAL_DISTANCE : REMOTE_DISTANCE;
421 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
422 }
423
424 if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
425 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
426 from, to, distance);
427 return;
428 }
429
430 if ((u8)distance != distance ||
431 (from == to && distance != LOCAL_DISTANCE)) {
432 pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
433 from, to, distance);
434 return;
435 }
436
437 numa_distance[from * numa_distance_cnt + to] = distance;
438}
439
440int __node_distance(int from, int to)
441{
442#if defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA_EMU)
443 if (numa_emu_dist)
444 return acpi_emu_node_distance(from, to);
445#endif
446 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
447 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
448 return numa_distance[from * numa_distance_cnt + to];
449}
450EXPORT_SYMBOL(__node_distance);
451
452/*
360 * Sanity check to catch more bad NUMA configurations (they are amazingly 453 * Sanity check to catch more bad NUMA configurations (they are amazingly
361 * common). Make sure the nodes cover all memory. 454 * common). Make sure the nodes cover all memory.
362 */ 455 */
@@ -826,6 +919,7 @@ static int __init numa_emulation(unsigned long start_pfn,
826 setup_physnodes(addr, max_addr); 919 setup_physnodes(addr, max_addr);
827 fake_physnodes(acpi, amd, num_nodes); 920 fake_physnodes(acpi, amd, num_nodes);
828 numa_init_array(); 921 numa_init_array();
922 numa_emu_dist = true;
829 return 0; 923 return 0;
830} 924}
831#endif /* CONFIG_NUMA_EMU */ 925#endif /* CONFIG_NUMA_EMU */
@@ -869,6 +963,7 @@ void __init initmem_init(void)
869 nodes_clear(node_online_map); 963 nodes_clear(node_online_map);
870 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 964 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
871 remove_all_active_ranges(); 965 remove_all_active_ranges();
966 numa_reset_distance();
872 967
873 if (numa_init[i]() < 0) 968 if (numa_init[i]() < 0)
874 continue; 969 continue;
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 4f8e6cde9bf6..d2f53f35d86a 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -50,9 +50,16 @@ static __init inline int srat_disabled(void)
50/* Callback for SLIT parsing */ 50/* Callback for SLIT parsing */
51void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 51void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
52{ 52{
53 int i, j;
53 unsigned length; 54 unsigned length;
54 unsigned long phys; 55 unsigned long phys;
55 56
57 for (i = 0; i < slit->locality_count; i++)
58 for (j = 0; j < slit->locality_count; j++)
59 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
60 slit->entry[slit->locality_count * i + j]);
61
62 /* acpi_slit is used only by emulation */
56 length = slit->header.length; 63 length = slit->header.length;
57 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length, 64 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
58 PAGE_SIZE); 65 PAGE_SIZE);
@@ -313,29 +320,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
313 node_set(i, numa_nodes_parsed); 320 node_set(i, numa_nodes_parsed);
314} 321}
315 322
316static int null_slit_node_compare(int a, int b) 323int acpi_emu_node_distance(int a, int b)
317{
318 return node_to_pxm(a) == node_to_pxm(b);
319}
320#else
321static int null_slit_node_compare(int a, int b)
322{
323 return a == b;
324}
325#endif /* CONFIG_NUMA_EMU */
326
327int __node_distance(int a, int b)
328{ 324{
329 int index; 325 int index;
330 326
331 if (!acpi_slit) 327 if (!acpi_slit)
332 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE : 328 return node_to_pxm(a) == node_to_pxm(b) ?
333 REMOTE_DISTANCE; 329 LOCAL_DISTANCE : REMOTE_DISTANCE;
334 index = acpi_slit->locality_count * node_to_pxm(a); 330 index = acpi_slit->locality_count * node_to_pxm(a);
335 return acpi_slit->entry[index + node_to_pxm(b)]; 331 return acpi_slit->entry[index + node_to_pxm(b)];
336} 332}
337 333#endif /* CONFIG_NUMA_EMU */
338EXPORT_SYMBOL(__node_distance);
339 334
340#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) 335#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
341int memory_add_physaddr_to_nid(u64 start) 336int memory_add_physaddr_to_nid(u64 start)