aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
authorNathan Fontenot <nfont@linux.vnet.ibm.com>2013-04-24 02:02:13 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-04-26 02:08:24 -0400
commit30c05350c39de6c17132cfee649518b842d89dd5 (patch)
tree542873ddbdb3e1bd9a31882e77b33c3b9766ec9a /arch/powerpc/mm/numa.c
parent5d88aa85c00bb4026dd986430dc496effc637d42 (diff)
powerpc/pseries: Use stop machine to update cpu maps
The new PRRN firmware feature allows CPU and memory resources to be transparently reassigned across NUMA boundaries. When this happens, the kernel must update the node maps to reflect the new affinity information. Although the NUMA maps can be protected by locking primitives during the update itself, this is insufficient to prevent concurrent accesses to these structures. Since cpumask_of_node() hands out a pointer to these structures, they can still be modified outside of the lock. Furthermore, tracking down each usage of these pointers and adding locks would be quite invasive and difficult to maintain. The approach used is to make a list of affected cpus and call stop_machine to have the update routine run on each of the affected cpus allowing them to update themselves. Each cpu finds itself in the list of cpus and makes the appropriate updates. We need to have each cpu do this for themselves to handle calls to vdso_getcpu_init() added in a subsequent patch. Situations like these are best handled using stop_machine(). Since the NUMA affinity updates are exceptionally rare events, this approach has the benefit of not adding any overhead while accessing the NUMA maps during normal operation. Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c84
1 files changed, 65 insertions, 19 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 42f50c352242..e8d1aeb6348c 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -22,6 +22,7 @@
22#include <linux/pfn.h> 22#include <linux/pfn.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/node.h> 24#include <linux/node.h>
25#include <linux/stop_machine.h>
25#include <asm/sparsemem.h> 26#include <asm/sparsemem.h>
26#include <asm/prom.h> 27#include <asm/prom.h>
27#include <asm/smp.h> 28#include <asm/smp.h>
@@ -1254,6 +1255,13 @@ u64 memory_hotplug_max(void)
1254 1255
1255/* Virtual Processor Home Node (VPHN) support */ 1256/* Virtual Processor Home Node (VPHN) support */
1256#ifdef CONFIG_PPC_SPLPAR 1257#ifdef CONFIG_PPC_SPLPAR
1258struct topology_update_data {
1259 struct topology_update_data *next;
1260 unsigned int cpu;
1261 int old_nid;
1262 int new_nid;
1263};
1264
1257static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; 1265static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1258static cpumask_t cpu_associativity_changes_mask; 1266static cpumask_t cpu_associativity_changes_mask;
1259static int vphn_enabled; 1267static int vphn_enabled;
@@ -1405,41 +1413,79 @@ static long vphn_get_associativity(unsigned long cpu,
1405} 1413}
1406 1414
1407/* 1415/*
1416 * Update the CPU maps and sysfs entries for a single CPU when its NUMA
1417 * characteristics change. This function doesn't perform any locking and is
1418 * only safe to call from stop_machine().
1419 */
1420static int update_cpu_topology(void *data)
1421{
1422 struct topology_update_data *update;
1423 unsigned long cpu;
1424
1425 if (!data)
1426 return -EINVAL;
1427
1428 cpu = get_cpu();
1429
1430 for (update = data; update; update = update->next) {
1431 if (cpu != update->cpu)
1432 continue;
1433
1434 unregister_cpu_under_node(update->cpu, update->old_nid);
1435 unmap_cpu_from_node(update->cpu);
1436 map_cpu_to_node(update->cpu, update->new_nid);
1437 register_cpu_under_node(update->cpu, update->new_nid);
1438 }
1439
1440 return 0;
1441}
1442
1443/*
1408 * Update the node maps and sysfs entries for each cpu whose home node 1444 * Update the node maps and sysfs entries for each cpu whose home node
1409 * has changed. Returns 1 when the topology has changed, and 0 otherwise. 1445 * has changed. Returns 1 when the topology has changed, and 0 otherwise.
1410 */ 1446 */
1411int arch_update_cpu_topology(void) 1447int arch_update_cpu_topology(void)
1412{ 1448{
1413 int cpu, nid, old_nid, changed = 0; 1449 unsigned int cpu, changed = 0;
1450 struct topology_update_data *updates, *ud;
1414 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1451 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1415 struct device *dev; 1452 struct device *dev;
1453 int weight, i = 0;
1454
1455 weight = cpumask_weight(&cpu_associativity_changes_mask);
1456 if (!weight)
1457 return 0;
1458
1459 updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
1460 if (!updates)
1461 return 0;
1416 1462
1417 for_each_cpu(cpu, &cpu_associativity_changes_mask) { 1463 for_each_cpu(cpu, &cpu_associativity_changes_mask) {
1464 ud = &updates[i++];
1465 ud->cpu = cpu;
1418 vphn_get_associativity(cpu, associativity); 1466 vphn_get_associativity(cpu, associativity);
1419 nid = associativity_to_nid(associativity); 1467 ud->new_nid = associativity_to_nid(associativity);
1420 1468
1421 if (nid < 0 || !node_online(nid)) 1469 if (ud->new_nid < 0 || !node_online(ud->new_nid))
1422 nid = first_online_node; 1470 ud->new_nid = first_online_node;
1423 1471
1424 old_nid = numa_cpu_lookup_table[cpu]; 1472 ud->old_nid = numa_cpu_lookup_table[cpu];
1425 1473
1426 /* Disable hotplug while we update the cpu 1474 if (i < weight)
1427 * masks and sysfs. 1475 ud->next = &updates[i];
1428 */ 1476 }
1429 get_online_cpus(); 1477
1430 unregister_cpu_under_node(cpu, old_nid); 1478 stop_machine(update_cpu_topology, &updates[0], cpu_online_mask);
1431 unmap_cpu_from_node(cpu); 1479
1432 map_cpu_to_node(cpu, nid); 1480 for (ud = &updates[0]; ud; ud = ud->next) {
1433 register_cpu_under_node(cpu, nid); 1481 dev = get_cpu_device(ud->cpu);
1434 put_online_cpus();
1435
1436 dev = get_cpu_device(cpu);
1437 if (dev) 1482 if (dev)
1438 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 1483 kobject_uevent(&dev->kobj, KOBJ_CHANGE);
1439 cpumask_clear_cpu(cpu, &cpu_associativity_changes_mask); 1484 cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
1440 changed = 1; 1485 changed = 1;
1441 } 1486 }
1442 1487
1488 kfree(updates);
1443 return changed; 1489 return changed;
1444} 1490}
1445 1491
@@ -1488,10 +1534,10 @@ static int dt_update_callback(struct notifier_block *nb,
1488 int rc = NOTIFY_DONE; 1534 int rc = NOTIFY_DONE;
1489 1535
1490 switch (action) { 1536 switch (action) {
1491 case OF_RECONFIG_ADD_PROPERTY:
1492 case OF_RECONFIG_UPDATE_PROPERTY: 1537 case OF_RECONFIG_UPDATE_PROPERTY:
1493 update = (struct of_prop_reconfig *)data; 1538 update = (struct of_prop_reconfig *)data;
1494 if (!of_prop_cmp(update->dn->type, "cpu")) { 1539 if (!of_prop_cmp(update->dn->type, "cpu") &&
1540 !of_prop_cmp(update->prop->name, "ibm,associativity")) {
1495 u32 core_id; 1541 u32 core_id;
1496 of_property_read_u32(update->dn, "reg", &core_id); 1542 of_property_read_u32(update->dn, "reg", &core_id);
1497 stage_topology_update(core_id); 1543 stage_topology_update(core_id);