aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-12-18 14:13:48 -0500
committerPaul Mackerras <paulus@samba.org>2008-12-20 22:21:16 -0500
commit77520351805cc19ba37394ae33f862ef6d3c2a23 (patch)
tree4bb26bea674835683a2da53cd84efa782425d570
parent760ec0e02d8a13d0ed60d99f47879d4aa8ef1910 (diff)
powerpc/mm: Runtime allocation of mmu context maps for nohash CPUs
This makes the MMU context code used for CPUs with no hash table (except 603) dynamically allocate the various maps used to track the state of contexts. Only the main free map and CPU 0 stale map are allocated at boot time. Other CPU maps are allocated when those CPUs are brought up and freed if they are unplugged. This also moves the initialization of the MMU context management slightly later during the boot process, which should be fine as it's really only needed when userland if first started anyways. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Kumar Gala <galak@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/kernel/setup_32.c5
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c161
3 files changed, 116 insertions, 54 deletions
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b14c2a3e2185..d72ef39f2b37 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -38,6 +38,7 @@
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/serial.h> 39#include <asm/serial.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/mmu_context.h>
41 42
42#include "setup.h" 43#include "setup.h"
43 44
@@ -330,4 +331,8 @@ void __init setup_arch(char **cmdline_p)
330 if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); 331 if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
331 332
332 paging_init(); 333 paging_init();
334
335 /* Initialize the MMU context management stuff */
336 mmu_context_init();
337
333} 338}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 388ceda632f3..578294c3b1ce 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -35,7 +35,6 @@
35#include <asm/pgalloc.h> 35#include <asm/pgalloc.h>
36#include <asm/prom.h> 36#include <asm/prom.h>
37#include <asm/io.h> 37#include <asm/io.h>
38#include <asm/mmu_context.h>
39#include <asm/pgtable.h> 38#include <asm/pgtable.h>
40#include <asm/mmu.h> 39#include <asm/mmu.h>
41#include <asm/smp.h> 40#include <asm/smp.h>
@@ -180,9 +179,6 @@ void __init MMU_init(void)
180 if (ppc_md.progress) 179 if (ppc_md.progress)
181 ppc_md.progress("MMU:setio", 0x302); 180 ppc_md.progress("MMU:setio", 0x302);
182 181
183 /* Initialize the context management stuff */
184 mmu_context_init();
185
186 if (ppc_md.progress) 182 if (ppc_md.progress)
187 ppc_md.progress("MMU:exit", 0x211); 183 ppc_md.progress("MMU:exit", 0x211);
188 184
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 8b5de52de0ad..52a0cfc38b64 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -28,54 +28,30 @@
28#undef DEBUG 28#undef DEBUG
29#define DEBUG_STEAL_ONLY 29#define DEBUG_STEAL_ONLY
30#undef DEBUG_MAP_CONSISTENCY 30#undef DEBUG_MAP_CONSISTENCY
31/*#define DEBUG_CLAMP_LAST_CONTEXT 15 */
31 32
32#include <linux/kernel.h> 33#include <linux/kernel.h>
33#include <linux/mm.h> 34#include <linux/mm.h>
34#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/bootmem.h>
38#include <linux/notifier.h>
39#include <linux/cpu.h>
35 40
36#include <asm/mmu_context.h> 41#include <asm/mmu_context.h>
37#include <asm/tlbflush.h> 42#include <asm/tlbflush.h>
38#include <linux/spinlock.h>
39
40/*
41 * The MPC8xx has only 16 contexts. We rotate through them on each
42 * task switch. A better way would be to keep track of tasks that
43 * own contexts, and implement an LRU usage. That way very active
44 * tasks don't always have to pay the TLB reload overhead. The
45 * kernel pages are mapped shared, so the kernel can run on behalf
46 * of any task that makes a kernel entry. Shared does not mean they
47 * are not protected, just that the ASID comparison is not performed.
48 * -- Dan
49 *
50 * The IBM4xx has 256 contexts, so we can just rotate through these
51 * as a way of "switching" contexts. If the TID of the TLB is zero,
52 * the PID/TID comparison is disabled, so we can use a TID of zero
53 * to represent all kernel pages as shared among all contexts.
54 * -- Dan
55 */
56
57#ifdef CONFIG_8xx
58#define LAST_CONTEXT 15
59#define FIRST_CONTEXT 0
60
61#elif defined(CONFIG_4xx)
62#define LAST_CONTEXT 255
63#define FIRST_CONTEXT 1
64
65#elif defined(CONFIG_E200) || defined(CONFIG_E500)
66#define LAST_CONTEXT 255
67#define FIRST_CONTEXT 1
68
69#else
70#error Unsupported processor type
71#endif
72 43
44static unsigned int first_context, last_context;
73static unsigned int next_context, nr_free_contexts; 45static unsigned int next_context, nr_free_contexts;
74static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; 46static unsigned long *context_map;
75static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1]; 47static unsigned long *stale_map[NR_CPUS];
76static struct mm_struct *context_mm[LAST_CONTEXT+1]; 48static struct mm_struct **context_mm;
77static spinlock_t context_lock = SPIN_LOCK_UNLOCKED; 49static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
78 50
51#define CTX_MAP_SIZE \
52 (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
53
54
79/* Steal a context from a task that has one at the moment. 55/* Steal a context from a task that has one at the moment.
80 * 56 *
81 * This is used when we are running out of available PID numbers 57 * This is used when we are running out of available PID numbers
@@ -98,7 +74,7 @@ static unsigned int steal_context_smp(unsigned int id)
98 unsigned int cpu, max; 74 unsigned int cpu, max;
99 75
100 again: 76 again:
101 max = LAST_CONTEXT - FIRST_CONTEXT; 77 max = last_context - first_context;
102 78
103 /* Attempt to free next_context first and then loop until we manage */ 79 /* Attempt to free next_context first and then loop until we manage */
104 while (max--) { 80 while (max--) {
@@ -110,8 +86,8 @@ static unsigned int steal_context_smp(unsigned int id)
110 */ 86 */
111 if (mm->context.active) { 87 if (mm->context.active) {
112 id++; 88 id++;
113 if (id > LAST_CONTEXT) 89 if (id > last_context)
114 id = FIRST_CONTEXT; 90 id = first_context;
115 continue; 91 continue;
116 } 92 }
117 pr_debug("[%d] steal context %d from mm @%p\n", 93 pr_debug("[%d] steal context %d from mm @%p\n",
@@ -169,7 +145,7 @@ static void context_check_map(void)
169 unsigned int id, nrf, nact; 145 unsigned int id, nrf, nact;
170 146
171 nrf = nact = 0; 147 nrf = nact = 0;
172 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 148 for (id = first_context; id <= last_context; id++) {
173 int used = test_bit(id, context_map); 149 int used = test_bit(id, context_map);
174 if (!used) 150 if (!used)
175 nrf++; 151 nrf++;
@@ -187,6 +163,8 @@ static void context_check_map(void)
187 if (nact > num_online_cpus()) 163 if (nact > num_online_cpus())
188 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", 164 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
189 nact, num_online_cpus()); 165 nact, num_online_cpus());
166 if (first_context > 0 && !test_bit(0, context_map))
167 pr_err("MMU: Context 0 has been freed !!!\n");
190} 168}
191#else 169#else
192static void context_check_map(void) { } 170static void context_check_map(void) { }
@@ -209,6 +187,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
209 /* Mark us active and the previous one not anymore */ 187 /* Mark us active and the previous one not anymore */
210 next->context.active++; 188 next->context.active++;
211 if (prev) { 189 if (prev) {
190#ifndef DEBUG_STEAL_ONLY
191 pr_debug(" old context %p active was: %d\n",
192 prev, prev->context.active);
193#endif
212 WARN_ON(prev->context.active < 1); 194 WARN_ON(prev->context.active < 1);
213 prev->context.active--; 195 prev->context.active--;
214 } 196 }
@@ -221,8 +203,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
221 203
222 /* We really don't have a context, let's try to acquire one */ 204 /* We really don't have a context, let's try to acquire one */
223 id = next_context; 205 id = next_context;
224 if (id > LAST_CONTEXT) 206 if (id > last_context)
225 id = FIRST_CONTEXT; 207 id = first_context;
226 map = context_map; 208 map = context_map;
227 209
228 /* No more free contexts, let's try to steal one */ 210 /* No more free contexts, let's try to steal one */
@@ -240,9 +222,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
240 222
241 /* We know there's at least one free context, try to find it */ 223 /* We know there's at least one free context, try to find it */
242 while (__test_and_set_bit(id, map)) { 224 while (__test_and_set_bit(id, map)) {
243 id = find_next_zero_bit(map, LAST_CONTEXT+1, id); 225 id = find_next_zero_bit(map, last_context+1, id);
244 if (id > LAST_CONTEXT) 226 if (id > last_context)
245 id = FIRST_CONTEXT; 227 id = first_context;
246 } 228 }
247 stolen: 229 stolen:
248 next_context = id + 1; 230 next_context = id + 1;
@@ -311,6 +293,42 @@ void destroy_context(struct mm_struct *mm)
311 spin_unlock(&context_lock); 293 spin_unlock(&context_lock);
312} 294}
313 295
296#ifdef CONFIG_SMP
297
298static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
299 unsigned long action, void *hcpu)
300{
301 unsigned int cpu = (unsigned int)(long)hcpu;
302
303 /* We don't touch CPU 0 map, it's allocated at aboot and kept
304 * around forever
305 */
306 if (cpu == 0)
307 return NOTIFY_OK;
308
309 switch (action) {
310 case CPU_ONLINE:
311 case CPU_ONLINE_FROZEN:
312 pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu);
313 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
314 break;
315#ifdef CONFIG_HOTPLUG_CPU
316 case CPU_DEAD:
317 case CPU_DEAD_FROZEN:
318 pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu);
319 kfree(stale_map[cpu]);
320 stale_map[cpu] = NULL;
321 break;
322#endif
323 }
324 return NOTIFY_OK;
325}
326
327static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
328 .notifier_call = mmu_context_cpu_notify,
329};
330
331#endif /* CONFIG_SMP */
314 332
315/* 333/*
316 * Initialize the context management stuff. 334 * Initialize the context management stuff.
@@ -324,13 +342,56 @@ void __init mmu_context_init(void)
324 init_mm.context.active = NR_CPUS; 342 init_mm.context.active = NR_CPUS;
325 343
326 /* 344 /*
345 * The MPC8xx has only 16 contexts. We rotate through them on each
346 * task switch. A better way would be to keep track of tasks that
347 * own contexts, and implement an LRU usage. That way very active
348 * tasks don't always have to pay the TLB reload overhead. The
349 * kernel pages are mapped shared, so the kernel can run on behalf
350 * of any task that makes a kernel entry. Shared does not mean they
351 * are not protected, just that the ASID comparison is not performed.
352 * -- Dan
353 *
354 * The IBM4xx has 256 contexts, so we can just rotate through these
355 * as a way of "switching" contexts. If the TID of the TLB is zero,
356 * the PID/TID comparison is disabled, so we can use a TID of zero
357 * to represent all kernel pages as shared among all contexts.
358 * -- Dan
359 */
360 if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
361 first_context = 0;
362 last_context = 15;
363 } else {
364 first_context = 1;
365 last_context = 255;
366 }
367
368#ifdef DEBUG_CLAMP_LAST_CONTEXT
369 last_context = DEBUG_CLAMP_LAST_CONTEXT;
370#endif
371 /*
372 * Allocate the maps used by context management
373 */
374 context_map = alloc_bootmem(CTX_MAP_SIZE);
375 context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
376 stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
377
378#ifdef CONFIG_SMP
379 register_cpu_notifier(&mmu_context_cpu_nb);
380#endif
381
382 printk(KERN_INFO
383 "MMU: Allocated %d bytes of context maps for %d contexts\n",
384 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
385 last_context - first_context + 1);
386
387 /*
327 * Some processors have too few contexts to reserve one for 388 * Some processors have too few contexts to reserve one for
328 * init_mm, and require using context 0 for a normal task. 389 * init_mm, and require using context 0 for a normal task.
329 * Other processors reserve the use of context zero for the kernel. 390 * Other processors reserve the use of context zero for the kernel.
330 * This code assumes FIRST_CONTEXT < 32. 391 * This code assumes first_context < 32.
331 */ 392 */
332 context_map[0] = (1 << FIRST_CONTEXT) - 1; 393 context_map[0] = (1 << first_context) - 1;
333 next_context = FIRST_CONTEXT; 394 next_context = first_context;
334 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; 395 nr_free_contexts = last_context - first_context + 1;
335} 396}
336 397