diff options
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r-- | arch/powerpc/mm/numa.c | 224 |
1 files changed, 35 insertions, 189 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fe6002c1d5a..0257a7d659ef 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, | |||
134 | return 0; | 134 | return 0; |
135 | } | 135 | } |
136 | 136 | ||
137 | /* | ||
138 | * get_node_active_region - Return active region containing pfn | ||
139 | * Active range returned is empty if none found. | ||
140 | * @pfn: The page to return the region for | ||
141 | * @node_ar: Returned set to the active region containing @pfn | ||
142 | */ | ||
143 | static void __init get_node_active_region(unsigned long pfn, | ||
144 | struct node_active_region *node_ar) | ||
145 | { | ||
146 | unsigned long start_pfn, end_pfn; | ||
147 | int i, nid; | ||
148 | |||
149 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { | ||
150 | if (pfn >= start_pfn && pfn < end_pfn) { | ||
151 | node_ar->nid = nid; | ||
152 | node_ar->start_pfn = start_pfn; | ||
153 | node_ar->end_pfn = end_pfn; | ||
154 | break; | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static void reset_numa_cpu_lookup_table(void) | 137 | static void reset_numa_cpu_lookup_table(void) |
160 | { | 138 | { |
161 | unsigned int cpu; | 139 | unsigned int cpu; |
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) | |||
928 | } | 906 | } |
929 | } | 907 | } |
930 | 908 | ||
931 | /* | ||
932 | * Allocate some memory, satisfying the memblock or bootmem allocator where | ||
933 | * required. nid is the preferred node and end is the physical address of | ||
934 | * the highest address in the node. | ||
935 | * | ||
936 | * Returns the virtual address of the memory. | ||
937 | */ | ||
938 | static void __init *careful_zallocation(int nid, unsigned long size, | ||
939 | unsigned long align, | ||
940 | unsigned long end_pfn) | ||
941 | { | ||
942 | void *ret; | ||
943 | int new_nid; | ||
944 | unsigned long ret_paddr; | ||
945 | |||
946 | ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); | ||
947 | |||
948 | /* retry over all memory */ | ||
949 | if (!ret_paddr) | ||
950 | ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); | ||
951 | |||
952 | if (!ret_paddr) | ||
953 | panic("numa.c: cannot allocate %lu bytes for node %d", | ||
954 | size, nid); | ||
955 | |||
956 | ret = __va(ret_paddr); | ||
957 | |||
958 | /* | ||
959 | * We initialize the nodes in numeric order: 0, 1, 2... | ||
960 | * and hand over control from the MEMBLOCK allocator to the | ||
961 | * bootmem allocator. If this function is called for | ||
962 | * node 5, then we know that all nodes <5 are using the | ||
963 | * bootmem allocator instead of the MEMBLOCK allocator. | ||
964 | * | ||
965 | * So, check the nid from which this allocation came | ||
966 | * and double check to see if we need to use bootmem | ||
967 | * instead of the MEMBLOCK. We don't free the MEMBLOCK memory | ||
968 | * since it would be useless. | ||
969 | */ | ||
970 | new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); | ||
971 | if (new_nid < nid) { | ||
972 | ret = __alloc_bootmem_node(NODE_DATA(new_nid), | ||
973 | size, align, 0); | ||
974 | |||
975 | dbg("alloc_bootmem %p %lx\n", ret, size); | ||
976 | } | ||
977 | |||
978 | memset(ret, 0, size); | ||
979 | return ret; | ||
980 | } | ||
981 | |||
982 | static struct notifier_block ppc64_numa_nb = { | 909 | static struct notifier_block ppc64_numa_nb = { |
983 | .notifier_call = cpu_numa_callback, | 910 | .notifier_call = cpu_numa_callback, |
984 | .priority = 1 /* Must run before sched domains notifier. */ | 911 | .priority = 1 /* Must run before sched domains notifier. */ |
985 | }; | 912 | }; |
986 | 913 | ||
987 | static void __init mark_reserved_regions_for_nid(int nid) | 914 | /* Initialize NODE_DATA for a node on the local memory */ |
915 | static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) | ||
988 | { | 916 | { |
989 | struct pglist_data *node = NODE_DATA(nid); | 917 | u64 spanned_pages = end_pfn - start_pfn; |
990 | struct memblock_region *reg; | 918 | const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); |
991 | 919 | u64 nd_pa; | |
992 | for_each_memblock(reserved, reg) { | 920 | void *nd; |
993 | unsigned long physbase = reg->base; | 921 | int tnid; |
994 | unsigned long size = reg->size; | 922 | |
995 | unsigned long start_pfn = physbase >> PAGE_SHIFT; | 923 | if (spanned_pages) |
996 | unsigned long end_pfn = PFN_UP(physbase + size); | 924 | pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", |
997 | struct node_active_region node_ar; | 925 | nid, start_pfn << PAGE_SHIFT, |
998 | unsigned long node_end_pfn = pgdat_end_pfn(node); | 926 | (end_pfn << PAGE_SHIFT) - 1); |
999 | 927 | else | |
1000 | /* | 928 | pr_info("Initmem setup node %d\n", nid); |
1001 | * Check to make sure that this memblock.reserved area is | 929 | |
1002 | * within the bounds of the node that we care about. | 930 | nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
1003 | * Checking the nid of the start and end points is not | 931 | nd = __va(nd_pa); |
1004 | * sufficient because the reserved area could span the | 932 | |
1005 | * entire node. | 933 | /* report and initialize */ |
1006 | */ | 934 | pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", |
1007 | if (end_pfn <= node->node_start_pfn || | 935 | nd_pa, nd_pa + nd_size - 1); |
1008 | start_pfn >= node_end_pfn) | 936 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
1009 | continue; | 937 | if (tnid != nid) |
1010 | 938 | pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); | |
1011 | get_node_active_region(start_pfn, &node_ar); | 939 | |
1012 | while (start_pfn < end_pfn && | 940 | node_data[nid] = nd; |
1013 | node_ar.start_pfn < node_ar.end_pfn) { | 941 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); |
1014 | unsigned long reserve_size = size; | 942 | NODE_DATA(nid)->node_id = nid; |
1015 | /* | 943 | NODE_DATA(nid)->node_start_pfn = start_pfn; |
1016 | * if reserved region extends past active region | 944 | NODE_DATA(nid)->node_spanned_pages = spanned_pages; |
1017 | * then trim size to active region | ||
1018 | */ | ||
1019 | if (end_pfn > node_ar.end_pfn) | ||
1020 | reserve_size = (node_ar.end_pfn << PAGE_SHIFT) | ||
1021 | - physbase; | ||
1022 | /* | ||
1023 | * Only worry about *this* node, others may not | ||
1024 | * yet have valid NODE_DATA(). | ||
1025 | */ | ||
1026 | if (node_ar.nid == nid) { | ||
1027 | dbg("reserve_bootmem %lx %lx nid=%d\n", | ||
1028 | physbase, reserve_size, node_ar.nid); | ||
1029 | reserve_bootmem_node(NODE_DATA(node_ar.nid), | ||
1030 | physbase, reserve_size, | ||
1031 | BOOTMEM_DEFAULT); | ||
1032 | } | ||
1033 | /* | ||
1034 | * if reserved region is contained in the active region | ||
1035 | * then done. | ||
1036 | */ | ||
1037 | if (end_pfn <= node_ar.end_pfn) | ||
1038 | break; | ||
1039 | |||
1040 | /* | ||
1041 | * reserved region extends past the active region | ||
1042 | * get next active region that contains this | ||
1043 | * reserved region | ||
1044 | */ | ||
1045 | start_pfn = node_ar.end_pfn; | ||
1046 | physbase = start_pfn << PAGE_SHIFT; | ||
1047 | size = size - reserve_size; | ||
1048 | get_node_active_region(start_pfn, &node_ar); | ||
1049 | } | ||
1050 | } | ||
1051 | } | 945 | } |
1052 | 946 | ||
1053 | 947 | void __init initmem_init(void) | |
1054 | void __init do_init_bootmem(void) | ||
1055 | { | 948 | { |
1056 | int nid, cpu; | 949 | int nid, cpu; |
1057 | 950 | ||
1058 | min_low_pfn = 0; | ||
1059 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; | 951 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; |
1060 | max_pfn = max_low_pfn; | 952 | max_pfn = max_low_pfn; |
1061 | 953 | ||
@@ -1064,64 +956,18 @@ void __init do_init_bootmem(void) | |||
1064 | else | 956 | else |
1065 | dump_numa_memory_topology(); | 957 | dump_numa_memory_topology(); |
1066 | 958 | ||
959 | memblock_dump_all(); | ||
960 | |||
1067 | for_each_online_node(nid) { | 961 | for_each_online_node(nid) { |
1068 | unsigned long start_pfn, end_pfn; | 962 | unsigned long start_pfn, end_pfn; |
1069 | void *bootmem_vaddr; | ||
1070 | unsigned long bootmap_pages; | ||
1071 | 963 | ||
1072 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | 964 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
1073 | 965 | setup_node_data(nid, start_pfn, end_pfn); | |
1074 | /* | ||
1075 | * Allocate the node structure node local if possible | ||
1076 | * | ||
1077 | * Be careful moving this around, as it relies on all | ||
1078 | * previous nodes' bootmem to be initialized and have | ||
1079 | * all reserved areas marked. | ||
1080 | */ | ||
1081 | NODE_DATA(nid) = careful_zallocation(nid, | ||
1082 | sizeof(struct pglist_data), | ||
1083 | SMP_CACHE_BYTES, end_pfn); | ||
1084 | |||
1085 | dbg("node %d\n", nid); | ||
1086 | dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); | ||
1087 | |||
1088 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | ||
1089 | NODE_DATA(nid)->node_start_pfn = start_pfn; | ||
1090 | NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; | ||
1091 | |||
1092 | if (NODE_DATA(nid)->node_spanned_pages == 0) | ||
1093 | continue; | ||
1094 | |||
1095 | dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); | ||
1096 | dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); | ||
1097 | |||
1098 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | ||
1099 | bootmem_vaddr = careful_zallocation(nid, | ||
1100 | bootmap_pages << PAGE_SHIFT, | ||
1101 | PAGE_SIZE, end_pfn); | ||
1102 | |||
1103 | dbg("bootmap_vaddr = %p\n", bootmem_vaddr); | ||
1104 | |||
1105 | init_bootmem_node(NODE_DATA(nid), | ||
1106 | __pa(bootmem_vaddr) >> PAGE_SHIFT, | ||
1107 | start_pfn, end_pfn); | ||
1108 | |||
1109 | free_bootmem_with_active_regions(nid, end_pfn); | ||
1110 | /* | ||
1111 | * Be very careful about moving this around. Future | ||
1112 | * calls to careful_zallocation() depend on this getting | ||
1113 | * done correctly. | ||
1114 | */ | ||
1115 | mark_reserved_regions_for_nid(nid); | ||
1116 | sparse_memory_present_with_active_regions(nid); | 966 | sparse_memory_present_with_active_regions(nid); |
1117 | } | 967 | } |
1118 | 968 | ||
1119 | init_bootmem_done = 1; | 969 | sparse_init(); |
1120 | 970 | ||
1121 | /* | ||
1122 | * Now bootmem is initialised we can create the node to cpumask | ||
1123 | * lookup tables and setup the cpu callback to populate them. | ||
1124 | */ | ||
1125 | setup_node_to_cpumask_map(); | 971 | setup_node_to_cpumask_map(); |
1126 | 972 | ||
1127 | reset_numa_cpu_lookup_table(); | 973 | reset_numa_cpu_lookup_table(); |