diff options
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r-- | arch/powerpc/mm/numa.c | 96 |
1 files changed, 91 insertions, 5 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5a944f25e94f..86a63de072c6 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <asm/sparsemem.h> | 31 | #include <asm/sparsemem.h> |
32 | #include <asm/prom.h> | 32 | #include <asm/prom.h> |
33 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
34 | #include <asm/cputhreads.h> | ||
35 | #include <asm/topology.h> | ||
34 | #include <asm/firmware.h> | 36 | #include <asm/firmware.h> |
35 | #include <asm/paca.h> | 37 | #include <asm/paca.h> |
36 | #include <asm/hvcall.h> | 38 | #include <asm/hvcall.h> |
@@ -152,9 +154,22 @@ static void __init get_node_active_region(unsigned long pfn, | |||
152 | } | 154 | } |
153 | } | 155 | } |
154 | 156 | ||
155 | static void map_cpu_to_node(int cpu, int node) | 157 | static void reset_numa_cpu_lookup_table(void) |
158 | { | ||
159 | unsigned int cpu; | ||
160 | |||
161 | for_each_possible_cpu(cpu) | ||
162 | numa_cpu_lookup_table[cpu] = -1; | ||
163 | } | ||
164 | |||
165 | static void update_numa_cpu_lookup_table(unsigned int cpu, int node) | ||
156 | { | 166 | { |
157 | numa_cpu_lookup_table[cpu] = node; | 167 | numa_cpu_lookup_table[cpu] = node; |
168 | } | ||
169 | |||
170 | static void map_cpu_to_node(int cpu, int node) | ||
171 | { | ||
172 | update_numa_cpu_lookup_table(cpu, node); | ||
158 | 173 | ||
159 | dbg("adding cpu %d to node %d\n", cpu, node); | 174 | dbg("adding cpu %d to node %d\n", cpu, node); |
160 | 175 | ||
@@ -522,11 +537,24 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, | |||
522 | */ | 537 | */ |
523 | static int numa_setup_cpu(unsigned long lcpu) | 538 | static int numa_setup_cpu(unsigned long lcpu) |
524 | { | 539 | { |
525 | int nid = 0; | 540 | int nid; |
526 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); | 541 | struct device_node *cpu; |
542 | |||
543 | /* | ||
544 | * If a valid cpu-to-node mapping is already available, use it | ||
545 | * directly instead of querying the firmware, since it represents | ||
546 | * the most recent mapping notified to us by the platform (eg: VPHN). | ||
547 | */ | ||
548 | if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { | ||
549 | map_cpu_to_node(lcpu, nid); | ||
550 | return nid; | ||
551 | } | ||
552 | |||
553 | cpu = of_get_cpu_node(lcpu, NULL); | ||
527 | 554 | ||
528 | if (!cpu) { | 555 | if (!cpu) { |
529 | WARN_ON(1); | 556 | WARN_ON(1); |
557 | nid = 0; | ||
530 | goto out; | 558 | goto out; |
531 | } | 559 | } |
532 | 560 | ||
@@ -542,16 +570,38 @@ out: | |||
542 | return nid; | 570 | return nid; |
543 | } | 571 | } |
544 | 572 | ||
573 | static void verify_cpu_node_mapping(int cpu, int node) | ||
574 | { | ||
575 | int base, sibling, i; | ||
576 | |||
577 | /* Verify that all the threads in the core belong to the same node */ | ||
578 | base = cpu_first_thread_sibling(cpu); | ||
579 | |||
580 | for (i = 0; i < threads_per_core; i++) { | ||
581 | sibling = base + i; | ||
582 | |||
583 | if (sibling == cpu || cpu_is_offline(sibling)) | ||
584 | continue; | ||
585 | |||
586 | if (cpu_to_node(sibling) != node) { | ||
587 | WARN(1, "CPU thread siblings %d and %d don't belong" | ||
588 | " to the same node!\n", cpu, sibling); | ||
589 | break; | ||
590 | } | ||
591 | } | ||
592 | } | ||
593 | |||
545 | static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, | 594 | static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, |
546 | void *hcpu) | 595 | void *hcpu) |
547 | { | 596 | { |
548 | unsigned long lcpu = (unsigned long)hcpu; | 597 | unsigned long lcpu = (unsigned long)hcpu; |
549 | int ret = NOTIFY_DONE; | 598 | int ret = NOTIFY_DONE, nid; |
550 | 599 | ||
551 | switch (action) { | 600 | switch (action) { |
552 | case CPU_UP_PREPARE: | 601 | case CPU_UP_PREPARE: |
553 | case CPU_UP_PREPARE_FROZEN: | 602 | case CPU_UP_PREPARE_FROZEN: |
554 | numa_setup_cpu(lcpu); | 603 | nid = numa_setup_cpu(lcpu); |
604 | verify_cpu_node_mapping((int)lcpu, nid); | ||
555 | ret = NOTIFY_OK; | 605 | ret = NOTIFY_OK; |
556 | break; | 606 | break; |
557 | #ifdef CONFIG_HOTPLUG_CPU | 607 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1069,6 +1119,7 @@ void __init do_init_bootmem(void) | |||
1069 | */ | 1119 | */ |
1070 | setup_node_to_cpumask_map(); | 1120 | setup_node_to_cpumask_map(); |
1071 | 1121 | ||
1122 | reset_numa_cpu_lookup_table(); | ||
1072 | register_cpu_notifier(&ppc64_numa_nb); | 1123 | register_cpu_notifier(&ppc64_numa_nb); |
1073 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, | 1124 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, |
1074 | (void *)(unsigned long)boot_cpuid); | 1125 | (void *)(unsigned long)boot_cpuid); |
@@ -1447,6 +1498,33 @@ static int update_cpu_topology(void *data) | |||
1447 | return 0; | 1498 | return 0; |
1448 | } | 1499 | } |
1449 | 1500 | ||
1501 | static int update_lookup_table(void *data) | ||
1502 | { | ||
1503 | struct topology_update_data *update; | ||
1504 | |||
1505 | if (!data) | ||
1506 | return -EINVAL; | ||
1507 | |||
1508 | /* | ||
1509 | * Upon topology update, the numa-cpu lookup table needs to be updated | ||
1510 | * for all threads in the core, including offline CPUs, to ensure that | ||
1511 | * future hotplug operations respect the cpu-to-node associativity | ||
1512 | * properly. | ||
1513 | */ | ||
1514 | for (update = data; update; update = update->next) { | ||
1515 | int nid, base, j; | ||
1516 | |||
1517 | nid = update->new_nid; | ||
1518 | base = cpu_first_thread_sibling(update->cpu); | ||
1519 | |||
1520 | for (j = 0; j < threads_per_core; j++) { | ||
1521 | update_numa_cpu_lookup_table(base + j, nid); | ||
1522 | } | ||
1523 | } | ||
1524 | |||
1525 | return 0; | ||
1526 | } | ||
1527 | |||
1450 | /* | 1528 | /* |
1451 | * Update the node maps and sysfs entries for each cpu whose home node | 1529 | * Update the node maps and sysfs entries for each cpu whose home node |
1452 | * has changed. Returns 1 when the topology has changed, and 0 otherwise. | 1530 | * has changed. Returns 1 when the topology has changed, and 0 otherwise. |
@@ -1515,6 +1593,14 @@ int arch_update_cpu_topology(void) | |||
1515 | 1593 | ||
1516 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); | 1594 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
1517 | 1595 | ||
1596 | /* | ||
1597 | * Update the numa-cpu lookup table with the new mappings, even for | ||
1598 | * offline CPUs. It is best to perform this update from the stop- | ||
1599 | * machine context. | ||
1600 | */ | ||
1601 | stop_machine(update_lookup_table, &updates[0], | ||
1602 | cpumask_of(raw_smp_processor_id())); | ||
1603 | |||
1518 | for (ud = &updates[0]; ud; ud = ud->next) { | 1604 | for (ud = &updates[0]; ud; ud = ud->next) { |
1519 | unregister_cpu_under_node(ud->cpu, ud->old_nid); | 1605 | unregister_cpu_under_node(ud->cpu, ud->old_nid); |
1520 | register_cpu_under_node(ud->cpu, ud->new_nid); | 1606 | register_cpu_under_node(ud->cpu, ud->new_nid); |