diff options
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r-- | arch/powerpc/mm/numa.c | 277 |
1 files changed, 267 insertions, 10 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 8c0944c465f6..d644ba7e8aba 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -20,10 +20,14 @@ | |||
20 | #include <linux/memblock.h> | 20 | #include <linux/memblock.h> |
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/pfn.h> | 22 | #include <linux/pfn.h> |
23 | #include <linux/cpuset.h> | ||
24 | #include <linux/node.h> | ||
23 | #include <asm/sparsemem.h> | 25 | #include <asm/sparsemem.h> |
24 | #include <asm/prom.h> | 26 | #include <asm/prom.h> |
25 | #include <asm/system.h> | 27 | #include <asm/system.h> |
26 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
29 | #include <asm/firmware.h> | ||
30 | #include <asm/paca.h> | ||
27 | 31 | ||
28 | static int numa_enabled = 1; | 32 | static int numa_enabled = 1; |
29 | 33 | ||
@@ -246,32 +250,41 @@ static void initialize_distance_lookup_table(int nid, | |||
246 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa | 250 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
247 | * info is found. | 251 | * info is found. |
248 | */ | 252 | */ |
249 | static int of_node_to_nid_single(struct device_node *device) | 253 | static int associativity_to_nid(const unsigned int *associativity) |
250 | { | 254 | { |
251 | int nid = -1; | 255 | int nid = -1; |
252 | const unsigned int *tmp; | ||
253 | 256 | ||
254 | if (min_common_depth == -1) | 257 | if (min_common_depth == -1) |
255 | goto out; | 258 | goto out; |
256 | 259 | ||
257 | tmp = of_get_associativity(device); | 260 | if (associativity[0] >= min_common_depth) |
258 | if (!tmp) | 261 | nid = associativity[min_common_depth]; |
259 | goto out; | ||
260 | |||
261 | if (tmp[0] >= min_common_depth) | ||
262 | nid = tmp[min_common_depth]; | ||
263 | 262 | ||
264 | /* POWER4 LPAR uses 0xffff as invalid node */ | 263 | /* POWER4 LPAR uses 0xffff as invalid node */ |
265 | if (nid == 0xffff || nid >= MAX_NUMNODES) | 264 | if (nid == 0xffff || nid >= MAX_NUMNODES) |
266 | nid = -1; | 265 | nid = -1; |
267 | 266 | ||
268 | if (nid > 0 && tmp[0] >= distance_ref_points_depth) | 267 | if (nid > 0 && associativity[0] >= distance_ref_points_depth) |
269 | initialize_distance_lookup_table(nid, tmp); | 268 | initialize_distance_lookup_table(nid, associativity); |
270 | 269 | ||
271 | out: | 270 | out: |
272 | return nid; | 271 | return nid; |
273 | } | 272 | } |
274 | 273 | ||
274 | /* Returns the nid associated with the given device tree node, | ||
275 | * or -1 if not found. | ||
276 | */ | ||
277 | static int of_node_to_nid_single(struct device_node *device) | ||
278 | { | ||
279 | int nid = -1; | ||
280 | const unsigned int *tmp; | ||
281 | |||
282 | tmp = of_get_associativity(device); | ||
283 | if (tmp) | ||
284 | nid = associativity_to_nid(tmp); | ||
285 | return nid; | ||
286 | } | ||
287 | |||
275 | /* Walk the device tree upwards, looking for an associativity id */ | 288 | /* Walk the device tree upwards, looking for an associativity id */ |
276 | int of_node_to_nid(struct device_node *device) | 289 | int of_node_to_nid(struct device_node *device) |
277 | { | 290 | { |
@@ -1274,3 +1287,247 @@ u64 memory_hotplug_max(void) | |||
1274 | return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); | 1287 | return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); |
1275 | } | 1288 | } |
1276 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 1289 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
1290 | |||
1291 | /* Vrtual Processor Home Node (VPHN) support */ | ||
1292 | #define VPHN_NR_CHANGE_CTRS (8) | ||
1293 | static u8 vphn_cpu_change_counts[NR_CPUS][VPHN_NR_CHANGE_CTRS]; | ||
1294 | static cpumask_t cpu_associativity_changes_mask; | ||
1295 | static int vphn_enabled; | ||
1296 | static void set_topology_timer(void); | ||
1297 | int stop_topology_update(void); | ||
1298 | |||
1299 | /* | ||
1300 | * Store the current values of the associativity change counters in the | ||
1301 | * hypervisor. | ||
1302 | */ | ||
1303 | static void setup_cpu_associativity_change_counters(void) | ||
1304 | { | ||
1305 | int cpu = 0; | ||
1306 | |||
1307 | for_each_possible_cpu(cpu) { | ||
1308 | int i = 0; | ||
1309 | u8 *counts = vphn_cpu_change_counts[cpu]; | ||
1310 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; | ||
1311 | |||
1312 | for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) { | ||
1313 | counts[i] = hypervisor_counts[i]; | ||
1314 | } | ||
1315 | } | ||
1316 | } | ||
1317 | |||
1318 | /* | ||
1319 | * The hypervisor maintains a set of 8 associativity change counters in | ||
1320 | * the VPA of each cpu that correspond to the associativity levels in the | ||
1321 | * ibm,associativity-reference-points property. When an associativity | ||
1322 | * level changes, the corresponding counter is incremented. | ||
1323 | * | ||
1324 | * Set a bit in cpu_associativity_changes_mask for each cpu whose home | ||
1325 | * node associativity levels have changed. | ||
1326 | * | ||
1327 | * Returns the number of cpus with unhandled associativity changes. | ||
1328 | */ | ||
1329 | static int update_cpu_associativity_changes_mask(void) | ||
1330 | { | ||
1331 | int cpu = 0, nr_cpus = 0; | ||
1332 | cpumask_t *changes = &cpu_associativity_changes_mask; | ||
1333 | |||
1334 | cpumask_clear(changes); | ||
1335 | |||
1336 | for_each_possible_cpu(cpu) { | ||
1337 | int i, changed = 0; | ||
1338 | u8 *counts = vphn_cpu_change_counts[cpu]; | ||
1339 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; | ||
1340 | |||
1341 | for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) { | ||
1342 | if (hypervisor_counts[i] > counts[i]) { | ||
1343 | counts[i] = hypervisor_counts[i]; | ||
1344 | changed = 1; | ||
1345 | } | ||
1346 | } | ||
1347 | if (changed) { | ||
1348 | cpumask_set_cpu(cpu, changes); | ||
1349 | nr_cpus++; | ||
1350 | } | ||
1351 | } | ||
1352 | |||
1353 | return nr_cpus; | ||
1354 | } | ||
1355 | |||
1356 | /* 6 64-bit registers unpacked into 12 32-bit associativity values */ | ||
1357 | #define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32)) | ||
1358 | |||
1359 | /* | ||
1360 | * Convert the associativity domain numbers returned from the hypervisor | ||
1361 | * to the sequence they would appear in the ibm,associativity property. | ||
1362 | */ | ||
1363 | static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked) | ||
1364 | { | ||
1365 | int i = 0; | ||
1366 | int nr_assoc_doms = 0; | ||
1367 | const u16 *field = (const u16*) packed; | ||
1368 | |||
1369 | #define VPHN_FIELD_UNUSED (0xffff) | ||
1370 | #define VPHN_FIELD_MSB (0x8000) | ||
1371 | #define VPHN_FIELD_MASK (~VPHN_FIELD_MSB) | ||
1372 | |||
1373 | for (i = 0; i < VPHN_ASSOC_BUFSIZE; i++) { | ||
1374 | if (*field == VPHN_FIELD_UNUSED) { | ||
1375 | /* All significant fields processed, and remaining | ||
1376 | * fields contain the reserved value of all 1's. | ||
1377 | * Just store them. | ||
1378 | */ | ||
1379 | unpacked[i] = *((u32*)field); | ||
1380 | field += 2; | ||
1381 | } | ||
1382 | else if (*field & VPHN_FIELD_MSB) { | ||
1383 | /* Data is in the lower 15 bits of this field */ | ||
1384 | unpacked[i] = *field & VPHN_FIELD_MASK; | ||
1385 | field++; | ||
1386 | nr_assoc_doms++; | ||
1387 | } | ||
1388 | else { | ||
1389 | /* Data is in the lower 15 bits of this field | ||
1390 | * concatenated with the next 16 bit field | ||
1391 | */ | ||
1392 | unpacked[i] = *((u32*)field); | ||
1393 | field += 2; | ||
1394 | nr_assoc_doms++; | ||
1395 | } | ||
1396 | } | ||
1397 | |||
1398 | return nr_assoc_doms; | ||
1399 | } | ||
1400 | |||
1401 | /* | ||
1402 | * Retrieve the new associativity information for a virtual processor's | ||
1403 | * home node. | ||
1404 | */ | ||
1405 | static long hcall_vphn(unsigned long cpu, unsigned int *associativity) | ||
1406 | { | ||
1407 | long rc = 0; | ||
1408 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; | ||
1409 | u64 flags = 1; | ||
1410 | int hwcpu = get_hard_smp_processor_id(cpu); | ||
1411 | |||
1412 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); | ||
1413 | vphn_unpack_associativity(retbuf, associativity); | ||
1414 | |||
1415 | return rc; | ||
1416 | } | ||
1417 | |||
1418 | static long vphn_get_associativity(unsigned long cpu, | ||
1419 | unsigned int *associativity) | ||
1420 | { | ||
1421 | long rc = 0; | ||
1422 | |||
1423 | rc = hcall_vphn(cpu, associativity); | ||
1424 | |||
1425 | switch (rc) { | ||
1426 | case H_FUNCTION: | ||
1427 | printk(KERN_INFO | ||
1428 | "VPHN is not supported. Disabling polling...\n"); | ||
1429 | stop_topology_update(); | ||
1430 | break; | ||
1431 | case H_HARDWARE: | ||
1432 | printk(KERN_ERR | ||
1433 | "hcall_vphn() experienced a hardware fault " | ||
1434 | "preventing VPHN. Disabling polling...\n"); | ||
1435 | stop_topology_update(); | ||
1436 | } | ||
1437 | |||
1438 | return rc; | ||
1439 | } | ||
1440 | |||
1441 | /* | ||
1442 | * Update the node maps and sysfs entries for each cpu whose home node | ||
1443 | * has changed. | ||
1444 | */ | ||
1445 | int arch_update_cpu_topology(void) | ||
1446 | { | ||
1447 | int cpu = 0, nid = 0, old_nid = 0; | ||
1448 | unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; | ||
1449 | struct sys_device *sysdev = NULL; | ||
1450 | |||
1451 | for_each_cpu_mask(cpu, cpu_associativity_changes_mask) { | ||
1452 | vphn_get_associativity(cpu, associativity); | ||
1453 | nid = associativity_to_nid(associativity); | ||
1454 | |||
1455 | if (nid < 0 || !node_online(nid)) | ||
1456 | nid = first_online_node; | ||
1457 | |||
1458 | old_nid = numa_cpu_lookup_table[cpu]; | ||
1459 | |||
1460 | /* Disable hotplug while we update the cpu | ||
1461 | * masks and sysfs. | ||
1462 | */ | ||
1463 | get_online_cpus(); | ||
1464 | unregister_cpu_under_node(cpu, old_nid); | ||
1465 | unmap_cpu_from_node(cpu); | ||
1466 | map_cpu_to_node(cpu, nid); | ||
1467 | register_cpu_under_node(cpu, nid); | ||
1468 | put_online_cpus(); | ||
1469 | |||
1470 | sysdev = get_cpu_sysdev(cpu); | ||
1471 | if (sysdev) | ||
1472 | kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); | ||
1473 | } | ||
1474 | |||
1475 | return 1; | ||
1476 | } | ||
1477 | |||
1478 | static void topology_work_fn(struct work_struct *work) | ||
1479 | { | ||
1480 | rebuild_sched_domains(); | ||
1481 | } | ||
1482 | static DECLARE_WORK(topology_work, topology_work_fn); | ||
1483 | |||
1484 | void topology_schedule_update(void) | ||
1485 | { | ||
1486 | schedule_work(&topology_work); | ||
1487 | } | ||
1488 | |||
1489 | static void topology_timer_fn(unsigned long ignored) | ||
1490 | { | ||
1491 | if (!vphn_enabled) | ||
1492 | return; | ||
1493 | if (update_cpu_associativity_changes_mask() > 0) | ||
1494 | topology_schedule_update(); | ||
1495 | set_topology_timer(); | ||
1496 | } | ||
1497 | static struct timer_list topology_timer = | ||
1498 | TIMER_INITIALIZER(topology_timer_fn, 0, 0); | ||
1499 | |||
1500 | static void set_topology_timer(void) | ||
1501 | { | ||
1502 | topology_timer.data = 0; | ||
1503 | topology_timer.expires = jiffies + 60 * HZ; | ||
1504 | add_timer(&topology_timer); | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * Start polling for VPHN associativity changes. | ||
1509 | */ | ||
1510 | int start_topology_update(void) | ||
1511 | { | ||
1512 | int rc = 0; | ||
1513 | |||
1514 | if (firmware_has_feature(FW_FEATURE_VPHN)) { | ||
1515 | vphn_enabled = 1; | ||
1516 | setup_cpu_associativity_change_counters(); | ||
1517 | init_timer_deferrable(&topology_timer); | ||
1518 | set_topology_timer(); | ||
1519 | rc = 1; | ||
1520 | } | ||
1521 | |||
1522 | return rc; | ||
1523 | } | ||
1524 | __initcall(start_topology_update); | ||
1525 | |||
1526 | /* | ||
1527 | * Disable polling for VPHN associativity changes. | ||
1528 | */ | ||
1529 | int stop_topology_update(void) | ||
1530 | { | ||
1531 | vphn_enabled = 0; | ||
1532 | return del_timer_sync(&topology_timer); | ||
1533 | } | ||