aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c139
1 files changed, 94 insertions, 45 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 64c00227b997..aa731af720c0 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -17,7 +17,7 @@
17#include <linux/nodemask.h> 17#include <linux/nodemask.h>
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/notifier.h> 19#include <linux/notifier.h>
20#include <linux/lmb.h> 20#include <linux/memblock.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/pfn.h> 22#include <linux/pfn.h>
23#include <asm/sparsemem.h> 23#include <asm/sparsemem.h>
@@ -33,16 +33,41 @@ static int numa_debug;
33#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 33#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
34 34
35int numa_cpu_lookup_table[NR_CPUS]; 35int numa_cpu_lookup_table[NR_CPUS];
36cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 36cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
37struct pglist_data *node_data[MAX_NUMNODES]; 37struct pglist_data *node_data[MAX_NUMNODES];
38 38
39EXPORT_SYMBOL(numa_cpu_lookup_table); 39EXPORT_SYMBOL(numa_cpu_lookup_table);
40EXPORT_SYMBOL(numa_cpumask_lookup_table); 40EXPORT_SYMBOL(node_to_cpumask_map);
41EXPORT_SYMBOL(node_data); 41EXPORT_SYMBOL(node_data);
42 42
43static int min_common_depth; 43static int min_common_depth;
44static int n_mem_addr_cells, n_mem_size_cells; 44static int n_mem_addr_cells, n_mem_size_cells;
45 45
46/*
47 * Allocate node_to_cpumask_map based on number of available nodes
48 * Requires node_possible_map to be valid.
49 *
50 * Note: node_to_cpumask() is not valid until after this is done.
51 */
52static void __init setup_node_to_cpumask_map(void)
53{
54 unsigned int node, num = 0;
55
56 /* setup nr_node_ids if not done yet */
57 if (nr_node_ids == MAX_NUMNODES) {
58 for_each_node_mask(node, node_possible_map)
59 num = node;
60 nr_node_ids = num + 1;
61 }
62
63 /* allocate the map */
64 for (node = 0; node < nr_node_ids; node++)
65 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
66
67 /* cpumask_of_node() will now work */
68 dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
69}
70
46static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, 71static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
47 unsigned int *nid) 72 unsigned int *nid)
48{ 73{
@@ -138,8 +163,8 @@ static void __cpuinit map_cpu_to_node(int cpu, int node)
138 163
139 dbg("adding cpu %d to node %d\n", cpu, node); 164 dbg("adding cpu %d to node %d\n", cpu, node);
140 165
141 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 166 if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
142 cpu_set(cpu, numa_cpumask_lookup_table[node]); 167 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
143} 168}
144 169
145#ifdef CONFIG_HOTPLUG_CPU 170#ifdef CONFIG_HOTPLUG_CPU
@@ -149,8 +174,8 @@ static void unmap_cpu_from_node(unsigned long cpu)
149 174
150 dbg("removing cpu %lu from node %d\n", cpu, node); 175 dbg("removing cpu %lu from node %d\n", cpu, node);
151 176
152 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 177 if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
153 cpu_clear(cpu, numa_cpumask_lookup_table[node]); 178 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
154 } else { 179 } else {
155 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 180 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
156 cpu, node); 181 cpu, node);
@@ -242,10 +267,12 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
242 */ 267 */
243static int __init find_min_common_depth(void) 268static int __init find_min_common_depth(void)
244{ 269{
245 int depth; 270 int depth, index;
246 const unsigned int *ref_points; 271 const unsigned int *ref_points;
247 struct device_node *rtas_root; 272 struct device_node *rtas_root;
248 unsigned int len; 273 unsigned int len;
274 struct device_node *chosen;
275 const char *vec5;
249 276
250 rtas_root = of_find_node_by_path("/rtas"); 277 rtas_root = of_find_node_by_path("/rtas");
251 278
@@ -258,11 +285,26 @@ static int __init find_min_common_depth(void)
258 * configuration (should be all 0's) and the second is for a normal 285 * configuration (should be all 0's) and the second is for a normal
259 * NUMA configuration. 286 * NUMA configuration.
260 */ 287 */
288 index = 1;
261 ref_points = of_get_property(rtas_root, 289 ref_points = of_get_property(rtas_root,
262 "ibm,associativity-reference-points", &len); 290 "ibm,associativity-reference-points", &len);
263 291
292 /*
293 * For form 1 affinity information we want the first field
294 */
295#define VEC5_AFFINITY_BYTE 5
296#define VEC5_AFFINITY 0x80
297 chosen = of_find_node_by_path("/chosen");
298 if (chosen) {
299 vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
300 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
301 dbg("Using form 1 affinity\n");
302 index = 0;
303 }
304 }
305
264 if ((len >= 2 * sizeof(unsigned int)) && ref_points) { 306 if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
265 depth = ref_points[1]; 307 depth = ref_points[index];
266 } else { 308 } else {
267 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 309 dbg("NUMA: ibm,associativity-reference-points not found.\n");
268 depth = -1; 310 depth = -1;
@@ -309,7 +351,7 @@ struct of_drconf_cell {
309#define DRCONF_MEM_RESERVED 0x00000080 351#define DRCONF_MEM_RESERVED 0x00000080
310 352
311/* 353/*
312 * Read the next lmb list entry from the ibm,dynamic-memory property 354 * Read the next memblock list entry from the ibm,dynamic-memory property
313 * and return the information in the provided of_drconf_cell structure. 355 * and return the information in the provided of_drconf_cell structure.
314 */ 356 */
315static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) 357static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
@@ -330,8 +372,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
330/* 372/*
331 * Retreive and validate the ibm,dynamic-memory property of the device tree. 373 * Retreive and validate the ibm,dynamic-memory property of the device tree.
332 * 374 *
333 * The layout of the ibm,dynamic-memory property is a number N of lmb 375 * The layout of the ibm,dynamic-memory property is a number N of memblock
334 * list entries followed by N lmb list entries. Each lmb list entry 376 * list entries followed by N memblock list entries. Each memblock list entry
335 * contains information as layed out in the of_drconf_cell struct above. 377 * contains information as layed out in the of_drconf_cell struct above.
336 */ 378 */
337static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) 379static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
@@ -498,19 +540,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
498 unsigned long size) 540 unsigned long size)
499{ 541{
500 /* 542 /*
501 * We use lmb_end_of_DRAM() in here instead of memory_limit because 543 * We use memblock_end_of_DRAM() in here instead of memory_limit because
502 * we've already adjusted it for the limit and it takes care of 544 * we've already adjusted it for the limit and it takes care of
503 * having memory holes below the limit. Also, in the case of 545 * having memory holes below the limit. Also, in the case of
504 * iommu_is_off, memory_limit is not set but is implicitly enforced. 546 * iommu_is_off, memory_limit is not set but is implicitly enforced.
505 */ 547 */
506 548
507 if (start + size <= lmb_end_of_DRAM()) 549 if (start + size <= memblock_end_of_DRAM())
508 return size; 550 return size;
509 551
510 if (start >= lmb_end_of_DRAM()) 552 if (start >= memblock_end_of_DRAM())
511 return 0; 553 return 0;
512 554
513 return lmb_end_of_DRAM() - start; 555 return memblock_end_of_DRAM() - start;
514} 556}
515 557
516/* 558/*
@@ -689,7 +731,7 @@ new_range:
689 } 731 }
690 732
691 /* 733 /*
692 * Now do the same thing for each LMB listed in the ibm,dynamic-memory 734 * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory
693 * property in the ibm,dynamic-reconfiguration-memory node. 735 * property in the ibm,dynamic-reconfiguration-memory node.
694 */ 736 */
695 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 737 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -701,8 +743,8 @@ new_range:
701 743
702static void __init setup_nonnuma(void) 744static void __init setup_nonnuma(void)
703{ 745{
704 unsigned long top_of_ram = lmb_end_of_DRAM(); 746 unsigned long top_of_ram = memblock_end_of_DRAM();
705 unsigned long total_ram = lmb_phys_mem_size(); 747 unsigned long total_ram = memblock_phys_mem_size();
706 unsigned long start_pfn, end_pfn; 748 unsigned long start_pfn, end_pfn;
707 unsigned int i, nid = 0; 749 unsigned int i, nid = 0;
708 750
@@ -711,9 +753,9 @@ static void __init setup_nonnuma(void)
711 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 753 printk(KERN_DEBUG "Memory hole size: %ldMB\n",
712 (top_of_ram - total_ram) >> 20); 754 (top_of_ram - total_ram) >> 20);
713 755
714 for (i = 0; i < lmb.memory.cnt; ++i) { 756 for (i = 0; i < memblock.memory.cnt; ++i) {
715 start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 757 start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
716 end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 758 end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
717 759
718 fake_numa_create_new_node(end_pfn, &nid); 760 fake_numa_create_new_node(end_pfn, &nid);
719 add_active_range(nid, start_pfn, end_pfn); 761 add_active_range(nid, start_pfn, end_pfn);
@@ -737,8 +779,9 @@ void __init dump_numa_cpu_topology(void)
737 * If we used a CPU iterator here we would miss printing 779 * If we used a CPU iterator here we would miss printing
738 * the holes in the cpumap. 780 * the holes in the cpumap.
739 */ 781 */
740 for (cpu = 0; cpu < NR_CPUS; cpu++) { 782 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
741 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 783 if (cpumask_test_cpu(cpu,
784 node_to_cpumask_map[node])) {
742 if (count == 0) 785 if (count == 0)
743 printk(" %u", cpu); 786 printk(" %u", cpu);
744 ++count; 787 ++count;
@@ -750,7 +793,7 @@ void __init dump_numa_cpu_topology(void)
750 } 793 }
751 794
752 if (count > 1) 795 if (count > 1)
753 printk("-%u", NR_CPUS - 1); 796 printk("-%u", nr_cpu_ids - 1);
754 printk("\n"); 797 printk("\n");
755 } 798 }
756} 799}
@@ -770,7 +813,7 @@ static void __init dump_numa_memory_topology(void)
770 813
771 count = 0; 814 count = 0;
772 815
773 for (i = 0; i < lmb_end_of_DRAM(); 816 for (i = 0; i < memblock_end_of_DRAM();
774 i += (1 << SECTION_SIZE_BITS)) { 817 i += (1 << SECTION_SIZE_BITS)) {
775 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 818 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
776 if (count == 0) 819 if (count == 0)
@@ -790,7 +833,7 @@ static void __init dump_numa_memory_topology(void)
790} 833}
791 834
792/* 835/*
793 * Allocate some memory, satisfying the lmb or bootmem allocator where 836 * Allocate some memory, satisfying the memblock or bootmem allocator where
794 * required. nid is the preferred node and end is the physical address of 837 * required. nid is the preferred node and end is the physical address of
795 * the highest address in the node. 838 * the highest address in the node.
796 * 839 *
@@ -804,11 +847,11 @@ static void __init *careful_zallocation(int nid, unsigned long size,
804 int new_nid; 847 int new_nid;
805 unsigned long ret_paddr; 848 unsigned long ret_paddr;
806 849
807 ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 850 ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
808 851
809 /* retry over all memory */ 852 /* retry over all memory */
810 if (!ret_paddr) 853 if (!ret_paddr)
811 ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 854 ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
812 855
813 if (!ret_paddr) 856 if (!ret_paddr)
814 panic("numa.c: cannot allocate %lu bytes for node %d", 857 panic("numa.c: cannot allocate %lu bytes for node %d",
@@ -818,14 +861,14 @@ static void __init *careful_zallocation(int nid, unsigned long size,
818 861
819 /* 862 /*
820 * We initialize the nodes in numeric order: 0, 1, 2... 863 * We initialize the nodes in numeric order: 0, 1, 2...
821 * and hand over control from the LMB allocator to the 864 * and hand over control from the MEMBLOCK allocator to the
822 * bootmem allocator. If this function is called for 865 * bootmem allocator. If this function is called for
823 * node 5, then we know that all nodes <5 are using the 866 * node 5, then we know that all nodes <5 are using the
824 * bootmem allocator instead of the LMB allocator. 867 * bootmem allocator instead of the MEMBLOCK allocator.
825 * 868 *
826 * So, check the nid from which this allocation came 869 * So, check the nid from which this allocation came
827 * and double check to see if we need to use bootmem 870 * and double check to see if we need to use bootmem
828 * instead of the LMB. We don't free the LMB memory 871 * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
829 * since it would be useless. 872 * since it would be useless.
830 */ 873 */
831 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); 874 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
@@ -850,9 +893,9 @@ static void mark_reserved_regions_for_nid(int nid)
850 struct pglist_data *node = NODE_DATA(nid); 893 struct pglist_data *node = NODE_DATA(nid);
851 int i; 894 int i;
852 895
853 for (i = 0; i < lmb.reserved.cnt; i++) { 896 for (i = 0; i < memblock.reserved.cnt; i++) {
854 unsigned long physbase = lmb.reserved.region[i].base; 897 unsigned long physbase = memblock.reserved.region[i].base;
855 unsigned long size = lmb.reserved.region[i].size; 898 unsigned long size = memblock.reserved.region[i].size;
856 unsigned long start_pfn = physbase >> PAGE_SHIFT; 899 unsigned long start_pfn = physbase >> PAGE_SHIFT;
857 unsigned long end_pfn = PFN_UP(physbase + size); 900 unsigned long end_pfn = PFN_UP(physbase + size);
858 struct node_active_region node_ar; 901 struct node_active_region node_ar;
@@ -860,7 +903,7 @@ static void mark_reserved_regions_for_nid(int nid)
860 node->node_spanned_pages; 903 node->node_spanned_pages;
861 904
862 /* 905 /*
863 * Check to make sure that this lmb.reserved area is 906 * Check to make sure that this memblock.reserved area is
864 * within the bounds of the node that we care about. 907 * within the bounds of the node that we care about.
865 * Checking the nid of the start and end points is not 908 * Checking the nid of the start and end points is not
866 * sufficient because the reserved area could span the 909 * sufficient because the reserved area could span the
@@ -918,7 +961,7 @@ void __init do_init_bootmem(void)
918 int nid; 961 int nid;
919 962
920 min_low_pfn = 0; 963 min_low_pfn = 0;
921 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 964 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
922 max_pfn = max_low_pfn; 965 max_pfn = max_low_pfn;
923 966
924 if (parse_numa_properties()) 967 if (parse_numa_properties())
@@ -926,10 +969,6 @@ void __init do_init_bootmem(void)
926 else 969 else
927 dump_numa_memory_topology(); 970 dump_numa_memory_topology();
928 971
929 register_cpu_notifier(&ppc64_numa_nb);
930 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
931 (void *)(unsigned long)boot_cpuid);
932
933 for_each_online_node(nid) { 972 for_each_online_node(nid) {
934 unsigned long start_pfn, end_pfn; 973 unsigned long start_pfn, end_pfn;
935 void *bootmem_vaddr; 974 void *bootmem_vaddr;
@@ -983,13 +1022,23 @@ void __init do_init_bootmem(void)
983 } 1022 }
984 1023
985 init_bootmem_done = 1; 1024 init_bootmem_done = 1;
1025
1026 /*
1027 * Now bootmem is initialised we can create the node to cpumask
1028 * lookup tables and setup the cpu callback to populate them.
1029 */
1030 setup_node_to_cpumask_map();
1031
1032 register_cpu_notifier(&ppc64_numa_nb);
1033 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
1034 (void *)(unsigned long)boot_cpuid);
986} 1035}
987 1036
988void __init paging_init(void) 1037void __init paging_init(void)
989{ 1038{
990 unsigned long max_zone_pfns[MAX_NR_ZONES]; 1039 unsigned long max_zone_pfns[MAX_NR_ZONES];
991 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 1040 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
992 max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; 1041 max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
993 free_area_init_nodes(max_zone_pfns); 1042 free_area_init_nodes(max_zone_pfns);
994} 1043}
995 1044
@@ -1064,7 +1113,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1064/* 1113/*
1065 * Find the node associated with a hot added memory section for memory 1114 * Find the node associated with a hot added memory section for memory
1066 * represented in the device tree as a node (i.e. memory@XXXX) for 1115 * represented in the device tree as a node (i.e. memory@XXXX) for
1067 * each lmb. 1116 * each memblock.
1068 */ 1117 */
1069int hot_add_node_scn_to_nid(unsigned long scn_addr) 1118int hot_add_node_scn_to_nid(unsigned long scn_addr)
1070{ 1119{
@@ -1105,8 +1154,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr)
1105 1154
1106/* 1155/*
1107 * Find the node associated with a hot added memory section. Section 1156 * Find the node associated with a hot added memory section. Section
1108 * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 1157 * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that
1109 * sections are fully contained within a single LMB. 1158 * sections are fully contained within a single MEMBLOCK.
1110 */ 1159 */
1111int hot_add_scn_to_nid(unsigned long scn_addr) 1160int hot_add_scn_to_nid(unsigned long scn_addr)
1112{ 1161{