aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c224
1 files changed, 35 insertions, 189 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fe6002c1d5a..0257a7d659ef 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
134 return 0; 134 return 0;
135} 135}
136 136
137/*
138 * get_node_active_region - Return active region containing pfn
139 * Active range returned is empty if none found.
140 * @pfn: The page to return the region for
141 * @node_ar: Returned set to the active region containing @pfn
142 */
143static void __init get_node_active_region(unsigned long pfn,
144 struct node_active_region *node_ar)
145{
146 unsigned long start_pfn, end_pfn;
147 int i, nid;
148
149 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
150 if (pfn >= start_pfn && pfn < end_pfn) {
151 node_ar->nid = nid;
152 node_ar->start_pfn = start_pfn;
153 node_ar->end_pfn = end_pfn;
154 break;
155 }
156 }
157}
158
159static void reset_numa_cpu_lookup_table(void) 137static void reset_numa_cpu_lookup_table(void)
160{ 138{
161 unsigned int cpu; 139 unsigned int cpu;
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
928 } 906 }
929} 907}
930 908
931/*
932 * Allocate some memory, satisfying the memblock or bootmem allocator where
933 * required. nid is the preferred node and end is the physical address of
934 * the highest address in the node.
935 *
936 * Returns the virtual address of the memory.
937 */
938static void __init *careful_zallocation(int nid, unsigned long size,
939 unsigned long align,
940 unsigned long end_pfn)
941{
942 void *ret;
943 int new_nid;
944 unsigned long ret_paddr;
945
946 ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
947
948 /* retry over all memory */
949 if (!ret_paddr)
950 ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
951
952 if (!ret_paddr)
953 panic("numa.c: cannot allocate %lu bytes for node %d",
954 size, nid);
955
956 ret = __va(ret_paddr);
957
958 /*
959 * We initialize the nodes in numeric order: 0, 1, 2...
960 * and hand over control from the MEMBLOCK allocator to the
961 * bootmem allocator. If this function is called for
962 * node 5, then we know that all nodes <5 are using the
963 * bootmem allocator instead of the MEMBLOCK allocator.
964 *
965 * So, check the nid from which this allocation came
966 * and double check to see if we need to use bootmem
967 * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
968 * since it would be useless.
969 */
970 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
971 if (new_nid < nid) {
972 ret = __alloc_bootmem_node(NODE_DATA(new_nid),
973 size, align, 0);
974
975 dbg("alloc_bootmem %p %lx\n", ret, size);
976 }
977
978 memset(ret, 0, size);
979 return ret;
980}
981
982static struct notifier_block ppc64_numa_nb = { 909static struct notifier_block ppc64_numa_nb = {
983 .notifier_call = cpu_numa_callback, 910 .notifier_call = cpu_numa_callback,
984 .priority = 1 /* Must run before sched domains notifier. */ 911 .priority = 1 /* Must run before sched domains notifier. */
985}; 912};
986 913
987static void __init mark_reserved_regions_for_nid(int nid) 914/* Initialize NODE_DATA for a node on the local memory */
915static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
988{ 916{
989 struct pglist_data *node = NODE_DATA(nid); 917 u64 spanned_pages = end_pfn - start_pfn;
990 struct memblock_region *reg; 918 const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
991 919 u64 nd_pa;
992 for_each_memblock(reserved, reg) { 920 void *nd;
993 unsigned long physbase = reg->base; 921 int tnid;
994 unsigned long size = reg->size; 922
995 unsigned long start_pfn = physbase >> PAGE_SHIFT; 923 if (spanned_pages)
996 unsigned long end_pfn = PFN_UP(physbase + size); 924 pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
997 struct node_active_region node_ar; 925 nid, start_pfn << PAGE_SHIFT,
998 unsigned long node_end_pfn = pgdat_end_pfn(node); 926 (end_pfn << PAGE_SHIFT) - 1);
999 927 else
1000 /* 928 pr_info("Initmem setup node %d\n", nid);
1001 * Check to make sure that this memblock.reserved area is 929
1002 * within the bounds of the node that we care about. 930 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
1003 * Checking the nid of the start and end points is not 931 nd = __va(nd_pa);
1004 * sufficient because the reserved area could span the 932
1005 * entire node. 933 /* report and initialize */
1006 */ 934 pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
1007 if (end_pfn <= node->node_start_pfn || 935 nd_pa, nd_pa + nd_size - 1);
1008 start_pfn >= node_end_pfn) 936 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
1009 continue; 937 if (tnid != nid)
1010 938 pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
1011 get_node_active_region(start_pfn, &node_ar); 939
1012 while (start_pfn < end_pfn && 940 node_data[nid] = nd;
1013 node_ar.start_pfn < node_ar.end_pfn) { 941 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
1014 unsigned long reserve_size = size; 942 NODE_DATA(nid)->node_id = nid;
1015 /* 943 NODE_DATA(nid)->node_start_pfn = start_pfn;
1016 * if reserved region extends past active region 944 NODE_DATA(nid)->node_spanned_pages = spanned_pages;
1017 * then trim size to active region
1018 */
1019 if (end_pfn > node_ar.end_pfn)
1020 reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
1021 - physbase;
1022 /*
1023 * Only worry about *this* node, others may not
1024 * yet have valid NODE_DATA().
1025 */
1026 if (node_ar.nid == nid) {
1027 dbg("reserve_bootmem %lx %lx nid=%d\n",
1028 physbase, reserve_size, node_ar.nid);
1029 reserve_bootmem_node(NODE_DATA(node_ar.nid),
1030 physbase, reserve_size,
1031 BOOTMEM_DEFAULT);
1032 }
1033 /*
1034 * if reserved region is contained in the active region
1035 * then done.
1036 */
1037 if (end_pfn <= node_ar.end_pfn)
1038 break;
1039
1040 /*
1041 * reserved region extends past the active region
1042 * get next active region that contains this
1043 * reserved region
1044 */
1045 start_pfn = node_ar.end_pfn;
1046 physbase = start_pfn << PAGE_SHIFT;
1047 size = size - reserve_size;
1048 get_node_active_region(start_pfn, &node_ar);
1049 }
1050 }
1051} 945}
1052 946
1053 947void __init initmem_init(void)
1054void __init do_init_bootmem(void)
1055{ 948{
1056 int nid, cpu; 949 int nid, cpu;
1057 950
1058 min_low_pfn = 0;
1059 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 951 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1060 max_pfn = max_low_pfn; 952 max_pfn = max_low_pfn;
1061 953
@@ -1064,64 +956,18 @@ void __init do_init_bootmem(void)
1064 else 956 else
1065 dump_numa_memory_topology(); 957 dump_numa_memory_topology();
1066 958
959 memblock_dump_all();
960
1067 for_each_online_node(nid) { 961 for_each_online_node(nid) {
1068 unsigned long start_pfn, end_pfn; 962 unsigned long start_pfn, end_pfn;
1069 void *bootmem_vaddr;
1070 unsigned long bootmap_pages;
1071 963
1072 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 964 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
1073 965 setup_node_data(nid, start_pfn, end_pfn);
1074 /*
1075 * Allocate the node structure node local if possible
1076 *
1077 * Be careful moving this around, as it relies on all
1078 * previous nodes' bootmem to be initialized and have
1079 * all reserved areas marked.
1080 */
1081 NODE_DATA(nid) = careful_zallocation(nid,
1082 sizeof(struct pglist_data),
1083 SMP_CACHE_BYTES, end_pfn);
1084
1085 dbg("node %d\n", nid);
1086 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
1087
1088 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
1089 NODE_DATA(nid)->node_start_pfn = start_pfn;
1090 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
1091
1092 if (NODE_DATA(nid)->node_spanned_pages == 0)
1093 continue;
1094
1095 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
1096 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
1097
1098 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1099 bootmem_vaddr = careful_zallocation(nid,
1100 bootmap_pages << PAGE_SHIFT,
1101 PAGE_SIZE, end_pfn);
1102
1103 dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
1104
1105 init_bootmem_node(NODE_DATA(nid),
1106 __pa(bootmem_vaddr) >> PAGE_SHIFT,
1107 start_pfn, end_pfn);
1108
1109 free_bootmem_with_active_regions(nid, end_pfn);
1110 /*
1111 * Be very careful about moving this around. Future
1112 * calls to careful_zallocation() depend on this getting
1113 * done correctly.
1114 */
1115 mark_reserved_regions_for_nid(nid);
1116 sparse_memory_present_with_active_regions(nid); 966 sparse_memory_present_with_active_regions(nid);
1117 } 967 }
1118 968
1119 init_bootmem_done = 1; 969 sparse_init();
1120 970
1121 /*
1122 * Now bootmem is initialised we can create the node to cpumask
1123 * lookup tables and setup the cpu callback to populate them.
1124 */
1125 setup_node_to_cpumask_map(); 971 setup_node_to_cpumask_map();
1126 972
1127 reset_numa_cpu_lookup_table(); 973 reset_numa_cpu_lookup_table();