aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
authorJesse Larrew <jlarrew@linux.vnet.ibm.com>2010-12-01 07:31:15 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-12-08 23:36:29 -0500
commit9eff1a38407c051273fe1a20f03f8155bd32de35 (patch)
treeebda1a3986c78cf7746676b3a8ee154301e2019d /arch/powerpc/mm/numa.c
parent4dfa9c474859629a2c4a3f8d29804d6a6c994908 (diff)
powerpc/pseries: Poll VPA for topology changes and update NUMA maps
This patch sets a timer during boot that will periodically poll the associativity change counters in the VPA. When a change in associativity is detected, it retrieves the new associativity domain information via the H_HOME_NODE_ASSOCIATIVITY hcall and updates the NUMA node maps and sysfs entries accordingly. Note that since the ibm,associativity device tree property does not exist on configurations with both NUMA and SPLPAR enabled, no device tree updates are necessary. Signed-off-by: Jesse Larrew <jlarrew@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c277
1 files changed, 267 insertions, 10 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 8c0944c465f6..d644ba7e8aba 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,10 +20,14 @@
20#include <linux/memblock.h> 20#include <linux/memblock.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/pfn.h> 22#include <linux/pfn.h>
23#include <linux/cpuset.h>
24#include <linux/node.h>
23#include <asm/sparsemem.h> 25#include <asm/sparsemem.h>
24#include <asm/prom.h> 26#include <asm/prom.h>
25#include <asm/system.h> 27#include <asm/system.h>
26#include <asm/smp.h> 28#include <asm/smp.h>
29#include <asm/firmware.h>
30#include <asm/paca.h>
27 31
28static int numa_enabled = 1; 32static int numa_enabled = 1;
29 33
@@ -246,32 +250,41 @@ static void initialize_distance_lookup_table(int nid,
246/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 250/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
247 * info is found. 251 * info is found.
248 */ 252 */
249static int of_node_to_nid_single(struct device_node *device) 253static int associativity_to_nid(const unsigned int *associativity)
250{ 254{
251 int nid = -1; 255 int nid = -1;
252 const unsigned int *tmp;
253 256
254 if (min_common_depth == -1) 257 if (min_common_depth == -1)
255 goto out; 258 goto out;
256 259
257 tmp = of_get_associativity(device); 260 if (associativity[0] >= min_common_depth)
258 if (!tmp) 261 nid = associativity[min_common_depth];
259 goto out;
260
261 if (tmp[0] >= min_common_depth)
262 nid = tmp[min_common_depth];
263 262
264 /* POWER4 LPAR uses 0xffff as invalid node */ 263 /* POWER4 LPAR uses 0xffff as invalid node */
265 if (nid == 0xffff || nid >= MAX_NUMNODES) 264 if (nid == 0xffff || nid >= MAX_NUMNODES)
266 nid = -1; 265 nid = -1;
267 266
268 if (nid > 0 && tmp[0] >= distance_ref_points_depth) 267 if (nid > 0 && associativity[0] >= distance_ref_points_depth)
269 initialize_distance_lookup_table(nid, tmp); 268 initialize_distance_lookup_table(nid, associativity);
270 269
271out: 270out:
272 return nid; 271 return nid;
273} 272}
274 273
274/* Returns the nid associated with the given device tree node,
275 * or -1 if not found.
276 */
277static int of_node_to_nid_single(struct device_node *device)
278{
279 int nid = -1;
280 const unsigned int *tmp;
281
282 tmp = of_get_associativity(device);
283 if (tmp)
284 nid = associativity_to_nid(tmp);
285 return nid;
286}
287
275/* Walk the device tree upwards, looking for an associativity id */ 288/* Walk the device tree upwards, looking for an associativity id */
276int of_node_to_nid(struct device_node *device) 289int of_node_to_nid(struct device_node *device)
277{ 290{
@@ -1274,3 +1287,247 @@ u64 memory_hotplug_max(void)
1274 return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); 1287 return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1275} 1288}
1276#endif /* CONFIG_MEMORY_HOTPLUG */ 1289#endif /* CONFIG_MEMORY_HOTPLUG */
1290
1291/* Vrtual Processor Home Node (VPHN) support */
1292#define VPHN_NR_CHANGE_CTRS (8)
1293static u8 vphn_cpu_change_counts[NR_CPUS][VPHN_NR_CHANGE_CTRS];
1294static cpumask_t cpu_associativity_changes_mask;
1295static int vphn_enabled;
1296static void set_topology_timer(void);
1297int stop_topology_update(void);
1298
1299/*
1300 * Store the current values of the associativity change counters in the
1301 * hypervisor.
1302 */
1303static void setup_cpu_associativity_change_counters(void)
1304{
1305 int cpu = 0;
1306
1307 for_each_possible_cpu(cpu) {
1308 int i = 0;
1309 u8 *counts = vphn_cpu_change_counts[cpu];
1310 volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1311
1312 for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) {
1313 counts[i] = hypervisor_counts[i];
1314 }
1315 }
1316}
1317
1318/*
1319 * The hypervisor maintains a set of 8 associativity change counters in
1320 * the VPA of each cpu that correspond to the associativity levels in the
1321 * ibm,associativity-reference-points property. When an associativity
1322 * level changes, the corresponding counter is incremented.
1323 *
1324 * Set a bit in cpu_associativity_changes_mask for each cpu whose home
1325 * node associativity levels have changed.
1326 *
1327 * Returns the number of cpus with unhandled associativity changes.
1328 */
1329static int update_cpu_associativity_changes_mask(void)
1330{
1331 int cpu = 0, nr_cpus = 0;
1332 cpumask_t *changes = &cpu_associativity_changes_mask;
1333
1334 cpumask_clear(changes);
1335
1336 for_each_possible_cpu(cpu) {
1337 int i, changed = 0;
1338 u8 *counts = vphn_cpu_change_counts[cpu];
1339 volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1340
1341 for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) {
1342 if (hypervisor_counts[i] > counts[i]) {
1343 counts[i] = hypervisor_counts[i];
1344 changed = 1;
1345 }
1346 }
1347 if (changed) {
1348 cpumask_set_cpu(cpu, changes);
1349 nr_cpus++;
1350 }
1351 }
1352
1353 return nr_cpus;
1354}
1355
1356/* 6 64-bit registers unpacked into 12 32-bit associativity values */
1357#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32))
1358
1359/*
1360 * Convert the associativity domain numbers returned from the hypervisor
1361 * to the sequence they would appear in the ibm,associativity property.
1362 */
1363static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
1364{
1365 int i = 0;
1366 int nr_assoc_doms = 0;
1367 const u16 *field = (const u16*) packed;
1368
1369#define VPHN_FIELD_UNUSED (0xffff)
1370#define VPHN_FIELD_MSB (0x8000)
1371#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
1372
1373 for (i = 0; i < VPHN_ASSOC_BUFSIZE; i++) {
1374 if (*field == VPHN_FIELD_UNUSED) {
1375 /* All significant fields processed, and remaining
1376 * fields contain the reserved value of all 1's.
1377 * Just store them.
1378 */
1379 unpacked[i] = *((u32*)field);
1380 field += 2;
1381 }
1382 else if (*field & VPHN_FIELD_MSB) {
1383 /* Data is in the lower 15 bits of this field */
1384 unpacked[i] = *field & VPHN_FIELD_MASK;
1385 field++;
1386 nr_assoc_doms++;
1387 }
1388 else {
1389 /* Data is in the lower 15 bits of this field
1390 * concatenated with the next 16 bit field
1391 */
1392 unpacked[i] = *((u32*)field);
1393 field += 2;
1394 nr_assoc_doms++;
1395 }
1396 }
1397
1398 return nr_assoc_doms;
1399}
1400
1401/*
1402 * Retrieve the new associativity information for a virtual processor's
1403 * home node.
1404 */
1405static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
1406{
1407 long rc = 0;
1408 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1409 u64 flags = 1;
1410 int hwcpu = get_hard_smp_processor_id(cpu);
1411
1412 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1413 vphn_unpack_associativity(retbuf, associativity);
1414
1415 return rc;
1416}
1417
1418static long vphn_get_associativity(unsigned long cpu,
1419 unsigned int *associativity)
1420{
1421 long rc = 0;
1422
1423 rc = hcall_vphn(cpu, associativity);
1424
1425 switch (rc) {
1426 case H_FUNCTION:
1427 printk(KERN_INFO
1428 "VPHN is not supported. Disabling polling...\n");
1429 stop_topology_update();
1430 break;
1431 case H_HARDWARE:
1432 printk(KERN_ERR
1433 "hcall_vphn() experienced a hardware fault "
1434 "preventing VPHN. Disabling polling...\n");
1435 stop_topology_update();
1436 }
1437
1438 return rc;
1439}
1440
1441/*
1442 * Update the node maps and sysfs entries for each cpu whose home node
1443 * has changed.
1444 */
1445int arch_update_cpu_topology(void)
1446{
1447 int cpu = 0, nid = 0, old_nid = 0;
1448 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1449 struct sys_device *sysdev = NULL;
1450
1451 for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
1452 vphn_get_associativity(cpu, associativity);
1453 nid = associativity_to_nid(associativity);
1454
1455 if (nid < 0 || !node_online(nid))
1456 nid = first_online_node;
1457
1458 old_nid = numa_cpu_lookup_table[cpu];
1459
1460 /* Disable hotplug while we update the cpu
1461 * masks and sysfs.
1462 */
1463 get_online_cpus();
1464 unregister_cpu_under_node(cpu, old_nid);
1465 unmap_cpu_from_node(cpu);
1466 map_cpu_to_node(cpu, nid);
1467 register_cpu_under_node(cpu, nid);
1468 put_online_cpus();
1469
1470 sysdev = get_cpu_sysdev(cpu);
1471 if (sysdev)
1472 kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
1473 }
1474
1475 return 1;
1476}
1477
1478static void topology_work_fn(struct work_struct *work)
1479{
1480 rebuild_sched_domains();
1481}
1482static DECLARE_WORK(topology_work, topology_work_fn);
1483
1484void topology_schedule_update(void)
1485{
1486 schedule_work(&topology_work);
1487}
1488
1489static void topology_timer_fn(unsigned long ignored)
1490{
1491 if (!vphn_enabled)
1492 return;
1493 if (update_cpu_associativity_changes_mask() > 0)
1494 topology_schedule_update();
1495 set_topology_timer();
1496}
1497static struct timer_list topology_timer =
1498 TIMER_INITIALIZER(topology_timer_fn, 0, 0);
1499
1500static void set_topology_timer(void)
1501{
1502 topology_timer.data = 0;
1503 topology_timer.expires = jiffies + 60 * HZ;
1504 add_timer(&topology_timer);
1505}
1506
1507/*
1508 * Start polling for VPHN associativity changes.
1509 */
1510int start_topology_update(void)
1511{
1512 int rc = 0;
1513
1514 if (firmware_has_feature(FW_FEATURE_VPHN)) {
1515 vphn_enabled = 1;
1516 setup_cpu_associativity_change_counters();
1517 init_timer_deferrable(&topology_timer);
1518 set_topology_timer();
1519 rc = 1;
1520 }
1521
1522 return rc;
1523}
1524__initcall(start_topology_update);
1525
1526/*
1527 * Disable polling for VPHN associativity changes.
1528 */
1529int stop_topology_update(void)
1530{
1531 vphn_enabled = 0;
1532 return del_timer_sync(&topology_timer);
1533}