powerpc: VPHN topology change updates all siblings

When an associativity level change is found for one thread, the siblings threads need to be updated as well. This is done today for PRRN in stage_topology_update() but is missing for VPHN in update_cpu_associativity_changes_mask(). This patch will correctly update all thread siblings during a topology change. Without this patch a topology update can result in a CPU in init_sched_groups_power() getting stuck indefinitely in a loop. This loop is built in build_sched_groups(). As a result of the thread moving to a node separate from its siblings the struct sched_group will have its next pointer set to point to itself rather than the sched_group struct of the next thread. This happens because we have a domain without the SD_OVERLAP flag, which is correct, and a topology that doesn't conform with reality (threads on the same core assigned to different numa nodes). When this list is traversed by init_sched_groups_power() it will reach the thread's sched_group structure and loop indefinitely; the cpu will be stuck at this point. The bug was exposed when VPHN was enabled in commit b7abef0 (v3.9). Cc: <stable@vger.kernel.org> [v3.9+] Reported-by: Jan Stancek <jstancek@redhat.com> Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: Robert Jennings <rcj@linux.vnet.ibm.com> 2013-07-24 21:13:21 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2013-07-31 23:11:47 -0400
commit: 3be7db6ab45b21345386d1a466da133b19cde5e4 (patch)
tree: 55f7be0aa23b1d0647aea236abde4a854009d392 /arch/powerpc/mm
parent: 8d7c55d01e4648605fd0dacc82d8d3989ead4db7 (diff)
1 files changed, 44 insertions, 15 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 08397217e8ac..5850798826cd 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <asm/cputhreads.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/smp.h>
@@ -1318,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void)
                        }
                }
                if (changed) {
-                        cpumask_set_cpu(cpu, changes);
+                        cpumask_or(changes, changes, cpu_sibling_mask(cpu));
+                        cpu = cpu_last_thread_sibling(cpu);
                }
        }
@@ -1426,7 +1428,7 @@ static int update_cpu_topology(void *data)
        if (!data)
                return -EINVAL;
-        cpu = get_cpu();
+        cpu = smp_processor_id();
        for (update = data; update; update = update->next) {
                if (cpu != update->cpu)
@@ -1446,12 +1448,12 @@ static int update_cpu_topology(void *data)
 */
 int arch_update_cpu_topology(void)
 {
-        unsigned int cpu, changed = 0;
+        unsigned int cpu, sibling, changed = 0;
        struct topology_update_data *updates, *ud;
        unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
        cpumask_t updated_cpus;
        struct device *dev;
-        int weight, i = 0;
+        int weight, new_nid, i = 0;
        weight = cpumask_weight(&cpu_associativity_changes_mask);
        if (!weight)
@@ -1464,19 +1466,46 @@ int arch_update_cpu_topology(void)
        cpumask_clear(&updated_cpus);
        for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-                ud = &updates[i++];
+                /*
-                ud->cpu = cpu;
+                 * If siblings aren't flagged for changes, updates list
-                vphn_get_associativity(cpu, associativity);
+                 * will be too short. Skip on this update and set for next
-                ud->new_nid = associativity_to_nid(associativity);
+                 * update.
+                 */
-                if (ud->new_nid < 0 || !node_online(ud->new_nid))
+                if (!cpumask_subset(cpu_sibling_mask(cpu),
-                        ud->new_nid = first_online_node;
+                                        &cpu_associativity_changes_mask)) {
+                        pr_info("Sibling bits not set for associativity "
+                                        "change, cpu%d\n", cpu);
+                        cpumask_or(&cpu_associativity_changes_mask,
+                                        &cpu_associativity_changes_mask,
+                                        cpu_sibling_mask(cpu));
+                        cpu = cpu_last_thread_sibling(cpu);
+                        continue;
+                }
-                ud->old_nid = numa_cpu_lookup_table[cpu];
+                /* Use associativity from first thread for all siblings */
-                cpumask_set_cpu(cpu, &updated_cpus);
+                vphn_get_associativity(cpu, associativity);
+                new_nid = associativity_to_nid(associativity);
+                if (new_nid < 0 || !node_online(new_nid))
+                        new_nid = first_online_node;
+                if (new_nid == numa_cpu_lookup_table[cpu]) {
+                        cpumask_andnot(&cpu_associativity_changes_mask,
+                                        &cpu_associativity_changes_mask,
+                                        cpu_sibling_mask(cpu));
+                        cpu = cpu_last_thread_sibling(cpu);
+                        continue;
+                }
-                if (i < weight)
+                for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
-                        ud->next = &updates[i];
+                        ud = &updates[i++];
+                        ud->cpu = sibling;
+                        ud->new_nid = new_nid;
+                        ud->old_nid = numa_cpu_lookup_table[sibling];
+                        cpumask_set_cpu(sibling, &updated_cpus);
+                        if (i < weight)
+                                ud->next = &updates[i];
+                }
+                cpu = cpu_last_thread_sibling(cpu);
        }
        stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
author	Robert Jennings <rcj@linux.vnet.ibm.com>	2013-07-24 21:13:21 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2013-07-31 23:11:47 -0400
commit	3be7db6ab45b21345386d1a466da133b19cde5e4 (patch)
tree	55f7be0aa23b1d0647aea236abde4a854009d392 /arch/powerpc/mm
parent	8d7c55d01e4648605fd0dacc82d8d3989ead4db7 (diff)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 08397217e8ac..5850798826cd 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c
@@ -27,6 +27,7 @@
27	#include <linux/seq_file.h>	27	#include <linux/seq_file.h>
28	#include <linux/uaccess.h>	28	#include <linux/uaccess.h>
29	#include <linux/slab.h>	29	#include <linux/slab.h>
		30	#include <asm/cputhreads.h>
30	#include <asm/sparsemem.h>	31	#include <asm/sparsemem.h>
31	#include <asm/prom.h>	32	#include <asm/prom.h>
32	#include <asm/smp.h>	33	#include <asm/smp.h>
@@ -1318,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void)
1318	}	1319	}
1319	}	1320	}
1320	if (changed) {	1321	if (changed) {
1321	cpumask_set_cpu(cpu, changes);	1322	cpumask_or(changes, changes, cpu_sibling_mask(cpu));
		1323	cpu = cpu_last_thread_sibling(cpu);
1322	}	1324	}
1323	}	1325	}
1324		1326
@@ -1426,7 +1428,7 @@ static int update_cpu_topology(void *data)
1426	if (!data)	1428	if (!data)
1427	return -EINVAL;	1429	return -EINVAL;
1428		1430
1429	cpu = get_cpu();	1431	cpu = smp_processor_id();
1430		1432
1431	for (update = data; update; update = update->next) {	1433	for (update = data; update; update = update->next) {
1432	if (cpu != update->cpu)	1434	if (cpu != update->cpu)
@@ -1446,12 +1448,12 @@ static int update_cpu_topology(void *data)
1446	*/	1448	*/
1447	int arch_update_cpu_topology(void)	1449	int arch_update_cpu_topology(void)
1448	{	1450	{
1449	unsigned int cpu, changed = 0;	1451	unsigned int cpu, sibling, changed = 0;
1450	struct topology_update_data updates, ud;	1452	struct topology_update_data updates, ud;
1451	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};	1453	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1452	cpumask_t updated_cpus;	1454	cpumask_t updated_cpus;
1453	struct device *dev;	1455	struct device *dev;
1454	int weight, i = 0;	1456	int weight, new_nid, i = 0;
1455		1457
1456	weight = cpumask_weight(&cpu_associativity_changes_mask);	1458	weight = cpumask_weight(&cpu_associativity_changes_mask);
1457	if (!weight)	1459	if (!weight)
@@ -1464,19 +1466,46 @@ int arch_update_cpu_topology(void)
1464	cpumask_clear(&updated_cpus);	1466	cpumask_clear(&updated_cpus);
1465		1467
1466	for_each_cpu(cpu, &cpu_associativity_changes_mask) {	1468	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
1467	ud = &updates[i++];	1469	/*
1468	ud->cpu = cpu;	1470	* If siblings aren't flagged for changes, updates list
1469	vphn_get_associativity(cpu, associativity);	1471	* will be too short. Skip on this update and set for next
1470	ud->new_nid = associativity_to_nid(associativity);	1472	* update.
1471		1473	*/
1472	if (ud->new_nid < 0 \|\| !node_online(ud->new_nid))	1474	if (!cpumask_subset(cpu_sibling_mask(cpu),
1473	ud->new_nid = first_online_node;	1475	&cpu_associativity_changes_mask)) {
		1476	pr_info("Sibling bits not set for associativity "
		1477	"change, cpu%d\n", cpu);
		1478	cpumask_or(&cpu_associativity_changes_mask,
		1479	&cpu_associativity_changes_mask,
		1480	cpu_sibling_mask(cpu));
		1481	cpu = cpu_last_thread_sibling(cpu);
		1482	continue;
		1483	}
1474		1484
1475	ud->old_nid = numa_cpu_lookup_table[cpu];	1485	/* Use associativity from first thread for all siblings */
1476	cpumask_set_cpu(cpu, &updated_cpus);	1486	vphn_get_associativity(cpu, associativity);
		1487	new_nid = associativity_to_nid(associativity);
		1488	if (new_nid < 0 \|\| !node_online(new_nid))
		1489	new_nid = first_online_node;
		1490
		1491	if (new_nid == numa_cpu_lookup_table[cpu]) {
		1492	cpumask_andnot(&cpu_associativity_changes_mask,
		1493	&cpu_associativity_changes_mask,
		1494	cpu_sibling_mask(cpu));
		1495	cpu = cpu_last_thread_sibling(cpu);
		1496	continue;
		1497	}
1477		1498
1478	if (i < weight)	1499	for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
1479	ud->next = &updates[i];	1500	ud = &updates[i++];
		1501	ud->cpu = sibling;
		1502	ud->new_nid = new_nid;
		1503	ud->old_nid = numa_cpu_lookup_table[sibling];
		1504	cpumask_set_cpu(sibling, &updated_cpus);
		1505	if (i < weight)
		1506	ud->next = &updates[i];
		1507	}
		1508	cpu = cpu_last_thread_sibling(cpu);
1480	}	1509	}
1481		1510
1482	stop_machine(update_cpu_topology, &updates[0], &updated_cpus);	1511	stop_machine(update_cpu_topology, &updates[0], &updated_cpus);