powerpc/mm: Add HW threads support to no_hash TLB management

The current "no hash" MMU context management code is written with the assumption that one CPU == one TLB. This is not the case on implementations that support HW multithreading, where several linux CPUs can share the same TLB. This adds some basic support for this to our context management and our TLB flushing code. It also cleans up the optional debugging output a bit Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2009-07-23 19:15:10 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2009-08-19 20:12:37 -0400
commit: fcce810986b3f32a8322faf240f8cc5560a4c463 (patch)
tree: 4ee99143e18a008c52a274d74c1cc11055983bc6 /arch/powerpc/mm
parent: 6c1719942e19936044c4673b18afa26e45a02320 (diff)
2 files changed, 70 insertions, 33 deletions
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index b1a727def15b..834436d6d6b8 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -25,10 +25,20 @@
 *     also clear mm->cpu_vm_mask bits when processes are migrated
 */
-#undef DEBUG
+#define DEBUG_MAP_CONSISTENCY
-#define DEBUG_STEAL_ONLY
+#define DEBUG_CLAMP_LAST_CONTEXT   31
-#undef DEBUG_MAP_CONSISTENCY
+//#define DEBUG_HARDER
-/*#define DEBUG_CLAMP_LAST_CONTEXT   15 */
+/* We don't use DEBUG because it tends to be compiled in always nowadays
+ * and this would generate way too much output
+ */
+#ifdef DEBUG_HARDER
+#define pr_hard(args...)        printk(KERN_DEBUG args)
+#define pr_hardcont(args...)    printk(KERN_CONT args)
+#else
+#define pr_hard(args...)        do { } while(0)
+#define pr_hardcont(args...)    do { } while(0)
+#endif
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -71,7 +81,7 @@ static DEFINE_SPINLOCK(context_lock);
 static unsigned int steal_context_smp(unsigned int id)
 {
        struct mm_struct *mm;
-        unsigned int cpu, max;
+        unsigned int cpu, max, i;
        max = last_context - first_context;
@@ -89,15 +99,22 @@ static unsigned int steal_context_smp(unsigned int id)
                                id = first_context;
                        continue;
                }
-                pr_devel("[%d] steal context %d from mm @%p\n",
+                pr_hardcont(" | steal %d from 0x%p", id, mm);
-                         smp_processor_id(), id, mm);
                /* Mark this mm has having no context anymore */
                mm->context.id = MMU_NO_CONTEXT;
-                /* Mark it stale on all CPUs that used this mm */
+                /* Mark it stale on all CPUs that used this mm. For threaded
-                for_each_cpu(cpu, mm_cpumask(mm))
+                 * implementations, we set it on all threads on each core
-                        __set_bit(id, stale_map[cpu]);
+                 * represented in the mask. A future implementation will use
+                 * a core map instead but this will do for now.
+                 */
+                for_each_cpu(cpu, mm_cpumask(mm)) {
+                        for (i = cpu_first_thread_in_core(cpu);
+                             i <= cpu_last_thread_in_core(cpu); i++)
+                                __set_bit(id, stale_map[i]);
+                        cpu = i - 1;
+                }
                return id;
        }
@@ -126,7 +143,7 @@ static unsigned int steal_context_up(unsigned int id)
        /* Pick up the victim mm */
        mm = context_mm[id];
-        pr_devel("[%d] steal context %d from mm @%p\n", cpu, id, mm);
+        pr_hardcont(" | steal %d from 0x%p", id, mm);
        /* Flush the TLB for that context */
        local_flush_tlb_mm(mm);
@@ -179,19 +196,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
        /* No lockless fast path .. yet */
        spin_lock(&context_lock);
-#ifndef DEBUG_STEAL_ONLY
+        pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
-        pr_devel("[%d] activating context for mm @%p, active=%d, id=%d\n",
+                cpu, next, next->context.active, next->context.id);
-                 cpu, next, next->context.active, next->context.id);
-#endif
 #ifdef CONFIG_SMP
        /* Mark us active and the previous one not anymore */
        next->context.active++;
        if (prev) {
-#ifndef DEBUG_STEAL_ONLY
+                pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
-                pr_devel(" old context %p active was: %d\n",
-                         prev, prev->context.active);
-#endif
                WARN_ON(prev->context.active < 1);
                prev->context.active--;
        }
@@ -201,8 +213,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
        /* If we already have a valid assigned context, skip all that */
        id = next->context.id;
-        if (likely(id != MMU_NO_CONTEXT))
+        if (likely(id != MMU_NO_CONTEXT)) {
+#ifdef DEBUG_MAP_CONSISTENCY
+                if (context_mm[id] != next)
+                        pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
+                               next, id, id, context_mm[id]);
+#endif
                goto ctxt_ok;
+        }
        /* We really don't have a context, let's try to acquire one */
        id = next_context;
@@ -235,11 +253,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
        next_context = id + 1;
        context_mm[id] = next;
        next->context.id = id;
+        pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
-#ifndef DEBUG_STEAL_ONLY
-        pr_devel("[%d] picked up new id %d, nrf is now %d\n",
-                 cpu, id, nr_free_contexts);
-#endif
        context_check_map();
 ctxt_ok:
@@ -248,15 +262,20 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
         * local TLB for it and unmark it before we use it
         */
        if (test_bit(id, stale_map[cpu])) {
-                pr_devel("[%d] flushing stale context %d for mm @%p !\n",
+                pr_hardcont(" | stale flush %d [%d..%d]",
-                         cpu, id, next);
+                            id, cpu_first_thread_in_core(cpu),
+                            cpu_last_thread_in_core(cpu));
                local_flush_tlb_mm(next);
                /* XXX This clear should ultimately be part of local_flush_tlb_mm */
-                __clear_bit(id, stale_map[cpu]);
+                for (cpu = cpu_first_thread_in_core(cpu);
+                     cpu <= cpu_last_thread_in_core(cpu); cpu++)
+                        __clear_bit(id, stale_map[cpu]);
        }
        /* Flick the MMU and release lock */
+        pr_hardcont(" -> %d\n", id);
        set_context(id, next->pgd);
        spin_unlock(&context_lock);
 }
@@ -266,6 +285,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 */
 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 {
+        pr_hard("initing context for mm @%p\n", mm);
        mm->context.id = MMU_NO_CONTEXT;
        mm->context.active = 0;
@@ -305,7 +326,9 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
                                            unsigned long action, void *hcpu)
 {
        unsigned int cpu = (unsigned int)(long)hcpu;
+#ifdef CONFIG_HOTPLUG_CPU
+        struct task_struct *p;
+#endif
        /* We don't touch CPU 0 map, it's allocated at aboot and kept
         * around forever
         */
@@ -324,8 +347,16 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
                pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
                kfree(stale_map[cpu]);
                stale_map[cpu] = NULL;
-                break;
-#endif
+                /* We also clear the cpu_vm_mask bits of CPUs going away */
+                read_lock(&tasklist_lock);
+                for_each_process(p) {
+                        if (p->mm)
+                                cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm));
+                }
+                read_unlock(&tasklist_lock);
+        break;
+#endif /* CONFIG_HOTPLUG_CPU */
        }
        return NOTIFY_OK;
 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ad2eb4d34dd4..d908e75cc3b5 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -87,6 +87,12 @@ EXPORT_SYMBOL(local_flush_tlb_page);
 static DEFINE_SPINLOCK(tlbivax_lock);
+static int mm_is_core_local(struct mm_struct *mm)
+{
+        return cpumask_subset(mm_cpumask(mm),
+                              topology_thread_cpumask(smp_processor_id()));
+}
 struct tlb_flush_param {
        unsigned long addr;
        unsigned int pid;
@@ -131,7 +137,7 @@ void flush_tlb_mm(struct mm_struct *mm)
        pid = mm->context.id;
        if (unlikely(pid == MMU_NO_CONTEXT))
                goto no_context;
-        if (!cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+        if (!mm_is_core_local(mm)) {
                struct tlb_flush_param p = { .pid = pid };
                /* Ignores smp_processor_id() even if set. */
                smp_call_function_many(mm_cpumask(mm),
@@ -153,7 +159,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
        if (unlikely(pid == MMU_NO_CONTEXT))
                goto bail;
        cpu_mask = mm_cpumask(vma->vm_mm);
-        if (!cpumask_equal(cpu_mask, cpumask_of(smp_processor_id()))) {
+        if (!mm_is_core_local(mm)) {
                /* If broadcast tlbivax is supported, use it */
                if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
                        int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
author	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2009-07-23 19:15:10 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2009-08-19 20:12:37 -0400
commit	fcce810986b3f32a8322faf240f8cc5560a4c463 (patch)
tree	4ee99143e18a008c52a274d74c1cc11055983bc6 /arch/powerpc/mm
parent	6c1719942e19936044c4673b18afa26e45a02320 (diff)

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index b1a727def15b..834436d6d6b8 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -25,10 +25,20 @@
25	* also clear mm->cpu_vm_mask bits when processes are migrated	25	* also clear mm->cpu_vm_mask bits when processes are migrated
26	*/	26	*/
27		27
28	#undef DEBUG	28	#define DEBUG_MAP_CONSISTENCY
29	#define DEBUG_STEAL_ONLY	29	#define DEBUG_CLAMP_LAST_CONTEXT 31
30	#undef DEBUG_MAP_CONSISTENCY	30	//#define DEBUG_HARDER
31	/#define DEBUG_CLAMP_LAST_CONTEXT 15 /	31
		32	/* We don't use DEBUG because it tends to be compiled in always nowadays
		33	* and this would generate way too much output
		34	*/
		35	#ifdef DEBUG_HARDER
		36	#define pr_hard(args...) printk(KERN_DEBUG args)
		37	#define pr_hardcont(args...) printk(KERN_CONT args)
		38	#else
		39	#define pr_hard(args...) do { } while(0)
		40	#define pr_hardcont(args...) do { } while(0)
		41	#endif
32		42
33	#include <linux/kernel.h>	43	#include <linux/kernel.h>
34	#include <linux/mm.h>	44	#include <linux/mm.h>
@@ -71,7 +81,7 @@ static DEFINE_SPINLOCK(context_lock);
71	static unsigned int steal_context_smp(unsigned int id)	81	static unsigned int steal_context_smp(unsigned int id)
72	{	82	{
73	struct mm_struct *mm;	83	struct mm_struct *mm;
74	unsigned int cpu, max;	84	unsigned int cpu, max, i;
75		85
76	max = last_context - first_context;	86	max = last_context - first_context;
77		87
@@ -89,15 +99,22 @@ static unsigned int steal_context_smp(unsigned int id)
89	id = first_context;	99	id = first_context;
90	continue;	100	continue;
91	}	101	}
92	pr_devel("[%d] steal context %d from mm @%p\n",	102	pr_hardcont(" \| steal %d from 0x%p", id, mm);
93	smp_processor_id(), id, mm);
94		103
95	/* Mark this mm has having no context anymore */	104	/* Mark this mm has having no context anymore */
96	mm->context.id = MMU_NO_CONTEXT;	105	mm->context.id = MMU_NO_CONTEXT;
97		106
98	/* Mark it stale on all CPUs that used this mm */	107	/* Mark it stale on all CPUs that used this mm. For threaded
99	for_each_cpu(cpu, mm_cpumask(mm))	108	* implementations, we set it on all threads on each core
100	__set_bit(id, stale_map[cpu]);	109	* represented in the mask. A future implementation will use
		110	* a core map instead but this will do for now.
		111	*/
		112	for_each_cpu(cpu, mm_cpumask(mm)) {
		113	for (i = cpu_first_thread_in_core(cpu);
		114	i <= cpu_last_thread_in_core(cpu); i++)
		115	__set_bit(id, stale_map[i]);
		116	cpu = i - 1;
		117	}
101	return id;	118	return id;
102	}	119	}
103		120
@@ -126,7 +143,7 @@ static unsigned int steal_context_up(unsigned int id)
126	/* Pick up the victim mm */	143	/* Pick up the victim mm */
127	mm = context_mm[id];	144	mm = context_mm[id];
128		145
129	pr_devel("[%d] steal context %d from mm @%p\n", cpu, id, mm);	146	pr_hardcont(" \| steal %d from 0x%p", id, mm);
130		147
131	/* Flush the TLB for that context */	148	/* Flush the TLB for that context */
132	local_flush_tlb_mm(mm);	149	local_flush_tlb_mm(mm);
@@ -179,19 +196,14 @@ void switch_mmu_context(struct mm_struct prev, struct mm_struct next)
179	/* No lockless fast path .. yet */	196	/* No lockless fast path .. yet */
180	spin_lock(&context_lock);	197	spin_lock(&context_lock);
181		198
182	#ifndef DEBUG_STEAL_ONLY	199	pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
183	pr_devel("[%d] activating context for mm @%p, active=%d, id=%d\n",	200	cpu, next, next->context.active, next->context.id);
184	cpu, next, next->context.active, next->context.id);
185	#endif
186		201
187	#ifdef CONFIG_SMP	202	#ifdef CONFIG_SMP
188	/* Mark us active and the previous one not anymore */	203	/* Mark us active and the previous one not anymore */
189	next->context.active++;	204	next->context.active++;
190	if (prev) {	205	if (prev) {
191	#ifndef DEBUG_STEAL_ONLY	206	pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
192	pr_devel(" old context %p active was: %d\n",
193	prev, prev->context.active);
194	#endif
195	WARN_ON(prev->context.active < 1);	207	WARN_ON(prev->context.active < 1);
196	prev->context.active--;	208	prev->context.active--;
197	}	209	}
@@ -201,8 +213,14 @@ void switch_mmu_context(struct mm_struct prev, struct mm_struct next)
201		213
202	/* If we already have a valid assigned context, skip all that */	214	/* If we already have a valid assigned context, skip all that */
203	id = next->context.id;	215	id = next->context.id;
204	if (likely(id != MMU_NO_CONTEXT))	216	if (likely(id != MMU_NO_CONTEXT)) {
		217	#ifdef DEBUG_MAP_CONSISTENCY
		218	if (context_mm[id] != next)
		219	pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
		220	next, id, id, context_mm[id]);
		221	#endif
205	goto ctxt_ok;	222	goto ctxt_ok;
		223	}
206		224
207	/* We really don't have a context, let's try to acquire one */	225	/* We really don't have a context, let's try to acquire one */
208	id = next_context;	226	id = next_context;
@@ -235,11 +253,7 @@ void switch_mmu_context(struct mm_struct prev, struct mm_struct next)
235	next_context = id + 1;	253	next_context = id + 1;
236	context_mm[id] = next;	254	context_mm[id] = next;
237	next->context.id = id;	255	next->context.id = id;
238		256	pr_hardcont(" \| new id=%d,nrf=%d", id, nr_free_contexts);
239	#ifndef DEBUG_STEAL_ONLY
240	pr_devel("[%d] picked up new id %d, nrf is now %d\n",
241	cpu, id, nr_free_contexts);
242	#endif
243		257
244	context_check_map();	258	context_check_map();
245	ctxt_ok:	259	ctxt_ok:
@@ -248,15 +262,20 @@ void switch_mmu_context(struct mm_struct prev, struct mm_struct next)
248	* local TLB for it and unmark it before we use it	262	* local TLB for it and unmark it before we use it
249	*/	263	*/
250	if (test_bit(id, stale_map[cpu])) {	264	if (test_bit(id, stale_map[cpu])) {
251	pr_devel("[%d] flushing stale context %d for mm @%p !\n",	265	pr_hardcont(" \| stale flush %d [%d..%d]",
252	cpu, id, next);	266	id, cpu_first_thread_in_core(cpu),
		267	cpu_last_thread_in_core(cpu));
		268
253	local_flush_tlb_mm(next);	269	local_flush_tlb_mm(next);
254		270
255	/* XXX This clear should ultimately be part of local_flush_tlb_mm */	271	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
256	__clear_bit(id, stale_map[cpu]);	272	for (cpu = cpu_first_thread_in_core(cpu);
		273	cpu <= cpu_last_thread_in_core(cpu); cpu++)
		274	__clear_bit(id, stale_map[cpu]);
257	}	275	}
258		276
259	/* Flick the MMU and release lock */	277	/* Flick the MMU and release lock */
		278	pr_hardcont(" -> %d\n", id);
260	set_context(id, next->pgd);	279	set_context(id, next->pgd);
261	spin_unlock(&context_lock);	280	spin_unlock(&context_lock);
262	}	281	}
@@ -266,6 +285,8 @@ void switch_mmu_context(struct mm_struct prev, struct mm_struct next)
266	*/	285	*/
267	int init_new_context(struct task_struct t, struct mm_struct mm)	286	int init_new_context(struct task_struct t, struct mm_struct mm)
268	{	287	{
		288	pr_hard("initing context for mm @%p\n", mm);
		289
269	mm->context.id = MMU_NO_CONTEXT;	290	mm->context.id = MMU_NO_CONTEXT;
270	mm->context.active = 0;	291	mm->context.active = 0;
271		292
@@ -305,7 +326,9 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
305	unsigned long action, void *hcpu)	326	unsigned long action, void *hcpu)
306	{	327	{
307	unsigned int cpu = (unsigned int)(long)hcpu;	328	unsigned int cpu = (unsigned int)(long)hcpu;
308		329	#ifdef CONFIG_HOTPLUG_CPU
		330	struct task_struct *p;
		331	#endif
309	/* We don't touch CPU 0 map, it's allocated at aboot and kept	332	/* We don't touch CPU 0 map, it's allocated at aboot and kept
310	* around forever	333	* around forever
311	*/	334	*/
@@ -324,8 +347,16 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
324	pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);	347	pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
325	kfree(stale_map[cpu]);	348	kfree(stale_map[cpu]);
326	stale_map[cpu] = NULL;	349	stale_map[cpu] = NULL;
327	break;	350
328	#endif	351	/* We also clear the cpu_vm_mask bits of CPUs going away */
		352	read_lock(&tasklist_lock);
		353	for_each_process(p) {
		354	if (p->mm)
		355	cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm));
		356	}
		357	read_unlock(&tasklist_lock);
		358	break;
		359	#endif /* CONFIG_HOTPLUG_CPU */
329	}	360	}
330	return NOTIFY_OK;	361	return NOTIFY_OK;
331	}	362	}


diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ad2eb4d34dd4..d908e75cc3b5 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c
@@ -87,6 +87,12 @@ EXPORT_SYMBOL(local_flush_tlb_page);
87		87
88	static DEFINE_SPINLOCK(tlbivax_lock);	88	static DEFINE_SPINLOCK(tlbivax_lock);
89		89
		90	static int mm_is_core_local(struct mm_struct *mm)
		91	{
		92	return cpumask_subset(mm_cpumask(mm),
		93	topology_thread_cpumask(smp_processor_id()));
		94	}
		95
90	struct tlb_flush_param {	96	struct tlb_flush_param {
91	unsigned long addr;	97	unsigned long addr;
92	unsigned int pid;	98	unsigned int pid;
@@ -131,7 +137,7 @@ void flush_tlb_mm(struct mm_struct *mm)
131	pid = mm->context.id;	137	pid = mm->context.id;
132	if (unlikely(pid == MMU_NO_CONTEXT))	138	if (unlikely(pid == MMU_NO_CONTEXT))
133	goto no_context;	139	goto no_context;
134	if (!cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {	140	if (!mm_is_core_local(mm)) {
135	struct tlb_flush_param p = { .pid = pid };	141	struct tlb_flush_param p = { .pid = pid };
136	/* Ignores smp_processor_id() even if set. */	142	/* Ignores smp_processor_id() even if set. */
137	smp_call_function_many(mm_cpumask(mm),	143	smp_call_function_many(mm_cpumask(mm),
@@ -153,7 +159,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
153	if (unlikely(pid == MMU_NO_CONTEXT))	159	if (unlikely(pid == MMU_NO_CONTEXT))
154	goto bail;	160	goto bail;
155	cpu_mask = mm_cpumask(vma->vm_mm);	161	cpu_mask = mm_cpumask(vma->vm_mm);
156	if (!cpumask_equal(cpu_mask, cpumask_of(smp_processor_id()))) {	162	if (!mm_is_core_local(mm)) {
157	/* If broadcast tlbivax is supported, use it */	163	/* If broadcast tlbivax is supported, use it */
158	if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {	164	if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
159	int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);	165	int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);