aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Bringmann <mwb@linux.vnet.ibm.com>2017-11-28 17:58:40 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-27 04:59:02 -0500
commitea05ba7c559c8e5a5946c3a94a2a266e9a6680a6 (patch)
treeffc9480f9636c1b1d0882e9d27fe829d92e8fa87
parenta346137e9142b039fd13af2e59696e3d40c487ef (diff)
powerpc/numa: Ensure nodes initialized for hotplug
This patch fixes some problems encountered at runtime with configurations that support memory-less nodes, or that hot-add CPUs into nodes that are memoryless during system execution after boot. The problems of interest include: * Nodes known to powerpc to be memoryless at boot, but to have CPUs in them are allowed to be 'possible' and 'online'. Memory allocations for those nodes are taken from another node that does have memory until and if memory is hot-added to the node. * Nodes which have no resources assigned at boot, but which may still be referenced subsequently by affinity or associativity attributes, are kept in the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs to the system can reference these nodes and bring them online instead of redirecting the references to one of the set of nodes known to have memory at boot. Note that this software operates under the context of CPU hotplug. We are not doing memory hotplug in this code, but rather updating the kernel's CPU topology (i.e. arch_update_cpu_topology / numa_update_cpu_topology). We are initializing a node that may be used by CPUs or memory before it can be referenced as invalid by a CPU hotplug operation. CPU hotplug operations are protected by a range of APIs including cpu_maps_update_begin/cpu_maps_update_done, cpus_read/write_lock / cpus_read/write_unlock, device locks, and more. Memory hotplug operations, including try_online_node, are protected by mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the case of CPUs being hot-added to a previously memoryless node, the try_online_node operation occurs wholly within the CPU locks with no overlap. Using HMC hot-add/hot-remove operations, we have been able to add and remove CPUs to any possible node without failures. HMC operations involve a degree self-serialization, though. Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com> Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/mm/numa.c47
1 files changed, 37 insertions, 10 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f9cd40cd5485..1bead2c67272 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -486,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu)
486 nid = of_node_to_nid_single(cpu); 486 nid = of_node_to_nid_single(cpu);
487 487
488out_present: 488out_present:
489 if (nid < 0 || !node_online(nid)) 489 if (nid < 0 || !node_possible(nid))
490 nid = first_online_node; 490 nid = first_online_node;
491 491
492 map_cpu_to_node(lcpu, nid); 492 map_cpu_to_node(lcpu, nid);
@@ -828,10 +828,8 @@ static void __init find_possible_nodes(void)
828 goto out; 828 goto out;
829 829
830 for (i = 0; i < numnodes; i++) { 830 for (i = 0; i < numnodes; i++) {
831 if (!node_possible(i)) { 831 if (!node_possible(i))
832 setup_node_data(i, 0, 0);
833 node_set(i, node_possible_map); 832 node_set(i, node_possible_map);
834 }
835 } 833 }
836 834
837out: 835out:
@@ -1200,6 +1198,40 @@ static long vphn_get_associativity(unsigned long cpu,
1200 return rc; 1198 return rc;
1201} 1199}
1202 1200
1201static inline int find_and_online_cpu_nid(int cpu)
1202{
1203 __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
1204 int new_nid;
1205
1206 /* Use associativity from first thread for all siblings */
1207 vphn_get_associativity(cpu, associativity);
1208 new_nid = associativity_to_nid(associativity);
1209 if (new_nid < 0 || !node_possible(new_nid))
1210 new_nid = first_online_node;
1211
1212 if (NODE_DATA(new_nid) == NULL) {
1213#ifdef CONFIG_MEMORY_HOTPLUG
1214 /*
1215 * Need to ensure that NODE_DATA is initialized for a node from
1216 * available memory (see memblock_alloc_try_nid). If unable to
1217 * init the node, then default to nearest node that has memory
1218 * installed.
1219 */
1220 if (try_online_node(new_nid))
1221 new_nid = first_online_node;
1222#else
1223 /*
1224 * Default to using the nearest node that has memory installed.
1225 * Otherwise, it would be necessary to patch the kernel MM code
1226 * to deal with more memoryless-node error conditions.
1227 */
1228 new_nid = first_online_node;
1229#endif
1230 }
1231
1232 return new_nid;
1233}
1234
1203/* 1235/*
1204 * Update the CPU maps and sysfs entries for a single CPU when its NUMA 1236 * Update the CPU maps and sysfs entries for a single CPU when its NUMA
1205 * characteristics change. This function doesn't perform any locking and is 1237 * characteristics change. This function doesn't perform any locking and is
@@ -1267,7 +1299,6 @@ int numa_update_cpu_topology(bool cpus_locked)
1267{ 1299{
1268 unsigned int cpu, sibling, changed = 0; 1300 unsigned int cpu, sibling, changed = 0;
1269 struct topology_update_data *updates, *ud; 1301 struct topology_update_data *updates, *ud;
1270 __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
1271 cpumask_t updated_cpus; 1302 cpumask_t updated_cpus;
1272 struct device *dev; 1303 struct device *dev;
1273 int weight, new_nid, i = 0; 1304 int weight, new_nid, i = 0;
@@ -1305,11 +1336,7 @@ int numa_update_cpu_topology(bool cpus_locked)
1305 continue; 1336 continue;
1306 } 1337 }
1307 1338
1308 /* Use associativity from first thread for all siblings */ 1339 new_nid = find_and_online_cpu_nid(cpu);
1309 vphn_get_associativity(cpu, associativity);
1310 new_nid = associativity_to_nid(associativity);
1311 if (new_nid < 0 || !node_online(new_nid))
1312 new_nid = first_online_node;
1313 1340
1314 if (new_nid == numa_cpu_lookup_table[cpu]) { 1341 if (new_nid == numa_cpu_lookup_table[cpu]) {
1315 cpumask_andnot(&cpu_associativity_changes_mask, 1342 cpumask_andnot(&cpu_associativity_changes_mask,