aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-11 20:48:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-11 20:48:14 -0500
commit140cd7fb04a4a2bc09a30980bc8104cc89e09330 (patch)
tree776d57c7508f946d592de4334d4d3cb50fd36220 /arch/powerpc/mm/numa.c
parent27afc5dbda52ee3dbcd0bda7375c917c6936b470 (diff)
parent56548fc0e86cb9156af7a7e1f15ba78f251dafaf (diff)
Merge tag 'powerpc-3.19-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc updates from Michael Ellerman: "Some nice cleanups like removing bootmem, and removal of __get_cpu_var(). There is one patch to mm/gup.c. This is the generic GUP implementation, but is only used by us and arm(64). We have an ack from Steve Capper, and although we didn't get an ack from Andrew he told us to take the patch through the powerpc tree. There's one cxl patch. This is in drivers/misc, but Greg said he was happy for us to manage fixes for it. There is an infrastructure patch to support an IPMI driver for OPAL. There is also an RTC driver for OPAL. We weren't able to get any response from the RTC maintainer, Alessandro Zummo, so in the end we just merged the driver. The usual batch of Freescale updates from Scott" * tag 'powerpc-3.19-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (101 commits) powerpc/powernv: Return to cpu offline loop when finished in KVM guest powerpc/book3s: Fix partial invalidation of TLBs in MCE code. powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault powerpc/xmon: Cleanup the breakpoint flags powerpc/xmon: Enable HW instruction breakpoint on POWER8 powerpc/mm/thp: Use tlbiel if possible powerpc/mm/thp: Remove code duplication powerpc/mm/hugetlb: Sanity check gigantic hugepage count powerpc/oprofile: Disable pagefaults during user stack read powerpc/mm: Check for matching hpte without taking hpte lock powerpc: Drop useless warning in eeh_init() powerpc/powernv: Cleanup unused MCE definitions/declarations. powerpc/eeh: Dump PHB diag-data early powerpc/eeh: Recover EEH error on ownership change for BCM5719 powerpc/eeh: Set EEH_PE_RESET on PE reset powerpc/eeh: Refactor eeh_reset_pe() powerpc: Remove more traces of bootmem powerpc/pseries: Initialise nvram_pstore_info's buf_lock cxl: Name interrupts in /proc/interrupt cxl: Return error to PSL if IRQ demultiplexing fails & print clearer warning ...
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c224
1 files changed, 35 insertions, 189 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fe6002c1d5a..0257a7d659ef 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
134 return 0; 134 return 0;
135} 135}
136 136
137/*
138 * get_node_active_region - Return active region containing pfn
139 * Active range returned is empty if none found.
140 * @pfn: The page to return the region for
141 * @node_ar: Returned set to the active region containing @pfn
142 */
143static void __init get_node_active_region(unsigned long pfn,
144 struct node_active_region *node_ar)
145{
146 unsigned long start_pfn, end_pfn;
147 int i, nid;
148
149 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
150 if (pfn >= start_pfn && pfn < end_pfn) {
151 node_ar->nid = nid;
152 node_ar->start_pfn = start_pfn;
153 node_ar->end_pfn = end_pfn;
154 break;
155 }
156 }
157}
158
159static void reset_numa_cpu_lookup_table(void) 137static void reset_numa_cpu_lookup_table(void)
160{ 138{
161 unsigned int cpu; 139 unsigned int cpu;
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
928 } 906 }
929} 907}
930 908
931/*
932 * Allocate some memory, satisfying the memblock or bootmem allocator where
933 * required. nid is the preferred node and end is the physical address of
934 * the highest address in the node.
935 *
936 * Returns the virtual address of the memory.
937 */
938static void __init *careful_zallocation(int nid, unsigned long size,
939 unsigned long align,
940 unsigned long end_pfn)
941{
942 void *ret;
943 int new_nid;
944 unsigned long ret_paddr;
945
946 ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
947
948 /* retry over all memory */
949 if (!ret_paddr)
950 ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
951
952 if (!ret_paddr)
953 panic("numa.c: cannot allocate %lu bytes for node %d",
954 size, nid);
955
956 ret = __va(ret_paddr);
957
958 /*
959 * We initialize the nodes in numeric order: 0, 1, 2...
960 * and hand over control from the MEMBLOCK allocator to the
961 * bootmem allocator. If this function is called for
962 * node 5, then we know that all nodes <5 are using the
963 * bootmem allocator instead of the MEMBLOCK allocator.
964 *
965 * So, check the nid from which this allocation came
966 * and double check to see if we need to use bootmem
967 * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
968 * since it would be useless.
969 */
970 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
971 if (new_nid < nid) {
972 ret = __alloc_bootmem_node(NODE_DATA(new_nid),
973 size, align, 0);
974
975 dbg("alloc_bootmem %p %lx\n", ret, size);
976 }
977
978 memset(ret, 0, size);
979 return ret;
980}
981
982static struct notifier_block ppc64_numa_nb = { 909static struct notifier_block ppc64_numa_nb = {
983 .notifier_call = cpu_numa_callback, 910 .notifier_call = cpu_numa_callback,
984 .priority = 1 /* Must run before sched domains notifier. */ 911 .priority = 1 /* Must run before sched domains notifier. */
985}; 912};
986 913
987static void __init mark_reserved_regions_for_nid(int nid) 914/* Initialize NODE_DATA for a node on the local memory */
915static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
988{ 916{
989 struct pglist_data *node = NODE_DATA(nid); 917 u64 spanned_pages = end_pfn - start_pfn;
990 struct memblock_region *reg; 918 const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
991 919 u64 nd_pa;
992 for_each_memblock(reserved, reg) { 920 void *nd;
993 unsigned long physbase = reg->base; 921 int tnid;
994 unsigned long size = reg->size; 922
995 unsigned long start_pfn = physbase >> PAGE_SHIFT; 923 if (spanned_pages)
996 unsigned long end_pfn = PFN_UP(physbase + size); 924 pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
997 struct node_active_region node_ar; 925 nid, start_pfn << PAGE_SHIFT,
998 unsigned long node_end_pfn = pgdat_end_pfn(node); 926 (end_pfn << PAGE_SHIFT) - 1);
999 927 else
1000 /* 928 pr_info("Initmem setup node %d\n", nid);
1001 * Check to make sure that this memblock.reserved area is 929
1002 * within the bounds of the node that we care about. 930 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
1003 * Checking the nid of the start and end points is not 931 nd = __va(nd_pa);
1004 * sufficient because the reserved area could span the 932
1005 * entire node. 933 /* report and initialize */
1006 */ 934 pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
1007 if (end_pfn <= node->node_start_pfn || 935 nd_pa, nd_pa + nd_size - 1);
1008 start_pfn >= node_end_pfn) 936 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
1009 continue; 937 if (tnid != nid)
1010 938 pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
1011 get_node_active_region(start_pfn, &node_ar); 939
1012 while (start_pfn < end_pfn && 940 node_data[nid] = nd;
1013 node_ar.start_pfn < node_ar.end_pfn) { 941 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
1014 unsigned long reserve_size = size; 942 NODE_DATA(nid)->node_id = nid;
1015 /* 943 NODE_DATA(nid)->node_start_pfn = start_pfn;
1016 * if reserved region extends past active region 944 NODE_DATA(nid)->node_spanned_pages = spanned_pages;
1017 * then trim size to active region
1018 */
1019 if (end_pfn > node_ar.end_pfn)
1020 reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
1021 - physbase;
1022 /*
1023 * Only worry about *this* node, others may not
1024 * yet have valid NODE_DATA().
1025 */
1026 if (node_ar.nid == nid) {
1027 dbg("reserve_bootmem %lx %lx nid=%d\n",
1028 physbase, reserve_size, node_ar.nid);
1029 reserve_bootmem_node(NODE_DATA(node_ar.nid),
1030 physbase, reserve_size,
1031 BOOTMEM_DEFAULT);
1032 }
1033 /*
1034 * if reserved region is contained in the active region
1035 * then done.
1036 */
1037 if (end_pfn <= node_ar.end_pfn)
1038 break;
1039
1040 /*
1041 * reserved region extends past the active region
1042 * get next active region that contains this
1043 * reserved region
1044 */
1045 start_pfn = node_ar.end_pfn;
1046 physbase = start_pfn << PAGE_SHIFT;
1047 size = size - reserve_size;
1048 get_node_active_region(start_pfn, &node_ar);
1049 }
1050 }
1051} 945}
1052 946
1053 947void __init initmem_init(void)
1054void __init do_init_bootmem(void)
1055{ 948{
1056 int nid, cpu; 949 int nid, cpu;
1057 950
1058 min_low_pfn = 0;
1059 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 951 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1060 max_pfn = max_low_pfn; 952 max_pfn = max_low_pfn;
1061 953
@@ -1064,64 +956,18 @@ void __init do_init_bootmem(void)
1064 else 956 else
1065 dump_numa_memory_topology(); 957 dump_numa_memory_topology();
1066 958
959 memblock_dump_all();
960
1067 for_each_online_node(nid) { 961 for_each_online_node(nid) {
1068 unsigned long start_pfn, end_pfn; 962 unsigned long start_pfn, end_pfn;
1069 void *bootmem_vaddr;
1070 unsigned long bootmap_pages;
1071 963
1072 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 964 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
1073 965 setup_node_data(nid, start_pfn, end_pfn);
1074 /*
1075 * Allocate the node structure node local if possible
1076 *
1077 * Be careful moving this around, as it relies on all
1078 * previous nodes' bootmem to be initialized and have
1079 * all reserved areas marked.
1080 */
1081 NODE_DATA(nid) = careful_zallocation(nid,
1082 sizeof(struct pglist_data),
1083 SMP_CACHE_BYTES, end_pfn);
1084
1085 dbg("node %d\n", nid);
1086 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
1087
1088 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
1089 NODE_DATA(nid)->node_start_pfn = start_pfn;
1090 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
1091
1092 if (NODE_DATA(nid)->node_spanned_pages == 0)
1093 continue;
1094
1095 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
1096 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
1097
1098 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1099 bootmem_vaddr = careful_zallocation(nid,
1100 bootmap_pages << PAGE_SHIFT,
1101 PAGE_SIZE, end_pfn);
1102
1103 dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
1104
1105 init_bootmem_node(NODE_DATA(nid),
1106 __pa(bootmem_vaddr) >> PAGE_SHIFT,
1107 start_pfn, end_pfn);
1108
1109 free_bootmem_with_active_regions(nid, end_pfn);
1110 /*
1111 * Be very careful about moving this around. Future
1112 * calls to careful_zallocation() depend on this getting
1113 * done correctly.
1114 */
1115 mark_reserved_regions_for_nid(nid);
1116 sparse_memory_present_with_active_regions(nid); 966 sparse_memory_present_with_active_regions(nid);
1117 } 967 }
1118 968
1119 init_bootmem_done = 1; 969 sparse_init();
1120 970
1121 /*
1122 * Now bootmem is initialised we can create the node to cpumask
1123 * lookup tables and setup the cpu callback to populate them.
1124 */
1125 setup_node_to_cpumask_map(); 971 setup_node_to_cpumask_map();
1126 972
1127 reset_numa_cpu_lookup_table(); 973 reset_numa_cpu_lookup_table();