diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2008-12-18 14:13:29 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-12-20 22:21:15 -0500 |
commit | 2ca8cf738907180e7fbda90f25f32b86feda609f (patch) | |
tree | 60d8af9b53a78ae9300ef7e68f222b02fe3be542 /arch/powerpc/mm/mmu_context_nohash.c | |
parent | 5e696617c425eb97bd943d781f3941fb1e8f0e5b (diff) |
powerpc/mm: Rework context management for CPUs with no hash table
This reworks the context management code used by 4xx,8xx and
freescale BookE. It adds support for SMP by implementing a
concept of stale context map to lazily flush the TLB on
processors where a context may have been invalidated. This
also contains the ground work for generalizing such lazy TLB
flushing by just picking up a new PID and marking the old one
stale. This will be implemented later.
This is a first implementation that uses a global spinlock.
Ideally, we should try to get at least the fast path (context ID
already assigned) lockless or limited to a per context lock,
but for now this will do.
I tried to keep the UP case reasonably simple to avoid adding
too much overhead to 8xx which does a lot of context stealing
since it effectively has only 16 PIDs available.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm/mmu_context_nohash.c')
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 268 |
1 files changed, 221 insertions, 47 deletions
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 00e02150abef..8b5de52de0ad 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
@@ -14,13 +14,28 @@ | |||
14 | * as published by the Free Software Foundation; either version | 14 | * as published by the Free Software Foundation; either version |
15 | * 2 of the License, or (at your option) any later version. | 15 | * 2 of the License, or (at your option) any later version. |
16 | * | 16 | * |
17 | * TODO: | ||
18 | * | ||
19 | * - The global context lock will not scale very well | ||
20 | * - The maps should be dynamically allocated to allow for processors | ||
21 | * that support more PID bits at runtime | ||
22 | * - Implement flush_tlb_mm() by making the context stale and picking | ||
23 | * a new one | ||
24 | * - More aggressively clear stale map bits and maybe find some way to | ||
25 | * also clear mm->cpu_vm_mask bits when processes are migrated | ||
17 | */ | 26 | */ |
18 | 27 | ||
28 | #undef DEBUG | ||
29 | #define DEBUG_STEAL_ONLY | ||
30 | #undef DEBUG_MAP_CONSISTENCY | ||
31 | |||
32 | #include <linux/kernel.h> | ||
19 | #include <linux/mm.h> | 33 | #include <linux/mm.h> |
20 | #include <linux/init.h> | 34 | #include <linux/init.h> |
21 | 35 | ||
22 | #include <asm/mmu_context.h> | 36 | #include <asm/mmu_context.h> |
23 | #include <asm/tlbflush.h> | 37 | #include <asm/tlbflush.h> |
38 | #include <linux/spinlock.h> | ||
24 | 39 | ||
25 | /* | 40 | /* |
26 | * The MPC8xx has only 16 contexts. We rotate through them on each | 41 | * The MPC8xx has only 16 contexts. We rotate through them on each |
@@ -40,17 +55,14 @@ | |||
40 | */ | 55 | */ |
41 | 56 | ||
42 | #ifdef CONFIG_8xx | 57 | #ifdef CONFIG_8xx |
43 | #define NO_CONTEXT 16 | ||
44 | #define LAST_CONTEXT 15 | 58 | #define LAST_CONTEXT 15 |
45 | #define FIRST_CONTEXT 0 | 59 | #define FIRST_CONTEXT 0 |
46 | 60 | ||
47 | #elif defined(CONFIG_4xx) | 61 | #elif defined(CONFIG_4xx) |
48 | #define NO_CONTEXT 256 | ||
49 | #define LAST_CONTEXT 255 | 62 | #define LAST_CONTEXT 255 |
50 | #define FIRST_CONTEXT 1 | 63 | #define FIRST_CONTEXT 1 |
51 | 64 | ||
52 | #elif defined(CONFIG_E200) || defined(CONFIG_E500) | 65 | #elif defined(CONFIG_E200) || defined(CONFIG_E500) |
53 | #define NO_CONTEXT 256 | ||
54 | #define LAST_CONTEXT 255 | 66 | #define LAST_CONTEXT 255 |
55 | #define FIRST_CONTEXT 1 | 67 | #define FIRST_CONTEXT 1 |
56 | 68 | ||
@@ -58,66 +70,208 @@ | |||
58 | #error Unsupported processor type | 70 | #error Unsupported processor type |
59 | #endif | 71 | #endif |
60 | 72 | ||
61 | static unsigned long next_mmu_context; | 73 | static unsigned int next_context, nr_free_contexts; |
62 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; | 74 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; |
63 | static atomic_t nr_free_contexts; | 75 | static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1]; |
64 | static struct mm_struct *context_mm[LAST_CONTEXT+1]; | 76 | static struct mm_struct *context_mm[LAST_CONTEXT+1]; |
65 | static void steal_context(void); | 77 | static spinlock_t context_lock = SPIN_LOCK_UNLOCKED; |
66 | 78 | ||
67 | /* Steal a context from a task that has one at the moment. | 79 | /* Steal a context from a task that has one at the moment. |
68 | * This is only used on 8xx and 4xx and we presently assume that | 80 | * |
69 | * they don't do SMP. If they do then this will have to check | 81 | * This is used when we are running out of available PID numbers |
70 | * whether the MM we steal is in use. | 82 | * on the processors. |
71 | * We also assume that this is only used on systems that don't | 83 | * |
72 | * use an MMU hash table - this is true for 8xx and 4xx. | ||
73 | * This isn't an LRU system, it just frees up each context in | 84 | * This isn't an LRU system, it just frees up each context in |
74 | * turn (sort-of pseudo-random replacement :). This would be the | 85 | * turn (sort-of pseudo-random replacement :). This would be the |
75 | * place to implement an LRU scheme if anyone was motivated to do it. | 86 | * place to implement an LRU scheme if anyone was motivated to do it. |
76 | * -- paulus | 87 | * -- paulus |
88 | * | ||
89 | * For context stealing, we use a slightly different approach for | ||
90 | * SMP and UP. Basically, the UP one is simpler and doesn't use | ||
91 | * the stale map as we can just flush the local CPU | ||
92 | * -- benh | ||
77 | */ | 93 | */ |
78 | static void steal_context(void) | 94 | #ifdef CONFIG_SMP |
95 | static unsigned int steal_context_smp(unsigned int id) | ||
79 | { | 96 | { |
80 | struct mm_struct *mm; | 97 | struct mm_struct *mm; |
98 | unsigned int cpu, max; | ||
81 | 99 | ||
82 | /* free up context `next_mmu_context' */ | 100 | again: |
83 | /* if we shouldn't free context 0, don't... */ | 101 | max = LAST_CONTEXT - FIRST_CONTEXT; |
84 | if (next_mmu_context < FIRST_CONTEXT) | ||
85 | next_mmu_context = FIRST_CONTEXT; | ||
86 | mm = context_mm[next_mmu_context]; | ||
87 | flush_tlb_mm(mm); | ||
88 | destroy_context(mm); | ||
89 | } | ||
90 | 102 | ||
103 | /* Attempt to free next_context first and then loop until we manage */ | ||
104 | while (max--) { | ||
105 | /* Pick up the victim mm */ | ||
106 | mm = context_mm[id]; | ||
91 | 107 | ||
92 | /* | 108 | /* We have a candidate victim, check if it's active, on SMP |
93 | * Get a new mmu context for the address space described by `mm'. | 109 | * we cannot steal active contexts |
110 | */ | ||
111 | if (mm->context.active) { | ||
112 | id++; | ||
113 | if (id > LAST_CONTEXT) | ||
114 | id = FIRST_CONTEXT; | ||
115 | continue; | ||
116 | } | ||
117 | pr_debug("[%d] steal context %d from mm @%p\n", | ||
118 | smp_processor_id(), id, mm); | ||
119 | |||
120 | /* Mark this mm has having no context anymore */ | ||
121 | mm->context.id = MMU_NO_CONTEXT; | ||
122 | |||
123 | /* Mark it stale on all CPUs that used this mm */ | ||
124 | for_each_cpu_mask_nr(cpu, mm->cpu_vm_mask) | ||
125 | __set_bit(id, stale_map[cpu]); | ||
126 | return id; | ||
127 | } | ||
128 | |||
129 | /* This will happen if you have more CPUs than available contexts, | ||
130 | * all we can do here is wait a bit and try again | ||
131 | */ | ||
132 | spin_unlock(&context_lock); | ||
133 | cpu_relax(); | ||
134 | spin_lock(&context_lock); | ||
135 | goto again; | ||
136 | } | ||
137 | #endif /* CONFIG_SMP */ | ||
138 | |||
139 | /* Note that this will also be called on SMP if all other CPUs are | ||
140 | * offlined, which means that it may be called for cpu != 0. For | ||
141 | * this to work, we somewhat assume that CPUs that are onlined | ||
142 | * come up with a fully clean TLB (or are cleaned when offlined) | ||
94 | */ | 143 | */ |
95 | static inline void get_mmu_context(struct mm_struct *mm) | 144 | static unsigned int steal_context_up(unsigned int id) |
96 | { | 145 | { |
97 | unsigned long ctx; | 146 | struct mm_struct *mm; |
147 | int cpu = smp_processor_id(); | ||
98 | 148 | ||
99 | if (mm->context.id != NO_CONTEXT) | 149 | /* Pick up the victim mm */ |
100 | return; | 150 | mm = context_mm[id]; |
151 | |||
152 | pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); | ||
101 | 153 | ||
102 | while (atomic_dec_if_positive(&nr_free_contexts) < 0) | 154 | /* Mark this mm has having no context anymore */ |
103 | steal_context(); | 155 | mm->context.id = MMU_NO_CONTEXT; |
104 | 156 | ||
105 | ctx = next_mmu_context; | 157 | /* Flush the TLB for that context */ |
106 | while (test_and_set_bit(ctx, context_map)) { | 158 | local_flush_tlb_mm(mm); |
107 | ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx); | 159 | |
108 | if (ctx > LAST_CONTEXT) | 160 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ |
109 | ctx = 0; | 161 | __clear_bit(id, stale_map[cpu]); |
162 | |||
163 | return id; | ||
164 | } | ||
165 | |||
166 | #ifdef DEBUG_MAP_CONSISTENCY | ||
167 | static void context_check_map(void) | ||
168 | { | ||
169 | unsigned int id, nrf, nact; | ||
170 | |||
171 | nrf = nact = 0; | ||
172 | for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { | ||
173 | int used = test_bit(id, context_map); | ||
174 | if (!used) | ||
175 | nrf++; | ||
176 | if (used != (context_mm[id] != NULL)) | ||
177 | pr_err("MMU: Context %d is %s and MM is %p !\n", | ||
178 | id, used ? "used" : "free", context_mm[id]); | ||
179 | if (context_mm[id] != NULL) | ||
180 | nact += context_mm[id]->context.active; | ||
110 | } | 181 | } |
111 | next_mmu_context = (ctx + 1) & LAST_CONTEXT; | 182 | if (nrf != nr_free_contexts) { |
112 | mm->context.id = ctx; | 183 | pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", |
113 | context_mm[ctx] = mm; | 184 | nr_free_contexts, nrf); |
185 | nr_free_contexts = nrf; | ||
186 | } | ||
187 | if (nact > num_online_cpus()) | ||
188 | pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", | ||
189 | nact, num_online_cpus()); | ||
114 | } | 190 | } |
191 | #else | ||
192 | static void context_check_map(void) { } | ||
193 | #endif | ||
115 | 194 | ||
116 | void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | 195 | void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) |
117 | { | 196 | { |
118 | get_mmu_context(next); | 197 | unsigned int id, cpu = smp_processor_id(); |
198 | unsigned long *map; | ||
119 | 199 | ||
120 | set_context(next->context.id, next->pgd); | 200 | /* No lockless fast path .. yet */ |
201 | spin_lock(&context_lock); | ||
202 | |||
203 | #ifndef DEBUG_STEAL_ONLY | ||
204 | pr_debug("[%d] activating context for mm @%p, active=%d, id=%d\n", | ||
205 | cpu, next, next->context.active, next->context.id); | ||
206 | #endif | ||
207 | |||
208 | #ifdef CONFIG_SMP | ||
209 | /* Mark us active and the previous one not anymore */ | ||
210 | next->context.active++; | ||
211 | if (prev) { | ||
212 | WARN_ON(prev->context.active < 1); | ||
213 | prev->context.active--; | ||
214 | } | ||
215 | #endif /* CONFIG_SMP */ | ||
216 | |||
217 | /* If we already have a valid assigned context, skip all that */ | ||
218 | id = next->context.id; | ||
219 | if (likely(id != MMU_NO_CONTEXT)) | ||
220 | goto ctxt_ok; | ||
221 | |||
222 | /* We really don't have a context, let's try to acquire one */ | ||
223 | id = next_context; | ||
224 | if (id > LAST_CONTEXT) | ||
225 | id = FIRST_CONTEXT; | ||
226 | map = context_map; | ||
227 | |||
228 | /* No more free contexts, let's try to steal one */ | ||
229 | if (nr_free_contexts == 0) { | ||
230 | #ifdef CONFIG_SMP | ||
231 | if (num_online_cpus() > 1) { | ||
232 | id = steal_context_smp(id); | ||
233 | goto stolen; | ||
234 | } | ||
235 | #endif /* CONFIG_SMP */ | ||
236 | id = steal_context_up(id); | ||
237 | goto stolen; | ||
238 | } | ||
239 | nr_free_contexts--; | ||
240 | |||
241 | /* We know there's at least one free context, try to find it */ | ||
242 | while (__test_and_set_bit(id, map)) { | ||
243 | id = find_next_zero_bit(map, LAST_CONTEXT+1, id); | ||
244 | if (id > LAST_CONTEXT) | ||
245 | id = FIRST_CONTEXT; | ||
246 | } | ||
247 | stolen: | ||
248 | next_context = id + 1; | ||
249 | context_mm[id] = next; | ||
250 | next->context.id = id; | ||
251 | |||
252 | #ifndef DEBUG_STEAL_ONLY | ||
253 | pr_debug("[%d] picked up new id %d, nrf is now %d\n", | ||
254 | cpu, id, nr_free_contexts); | ||
255 | #endif | ||
256 | |||
257 | context_check_map(); | ||
258 | ctxt_ok: | ||
259 | |||
260 | /* If that context got marked stale on this CPU, then flush the | ||
261 | * local TLB for it and unmark it before we use it | ||
262 | */ | ||
263 | if (test_bit(id, stale_map[cpu])) { | ||
264 | pr_debug("[%d] flushing stale context %d for mm @%p !\n", | ||
265 | cpu, id, next); | ||
266 | local_flush_tlb_mm(next); | ||
267 | |||
268 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ | ||
269 | __clear_bit(id, stale_map[cpu]); | ||
270 | } | ||
271 | |||
272 | /* Flick the MMU and release lock */ | ||
273 | set_context(id, next->pgd); | ||
274 | spin_unlock(&context_lock); | ||
121 | } | 275 | } |
122 | 276 | ||
123 | /* | 277 | /* |
@@ -125,7 +279,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
125 | */ | 279 | */ |
126 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | 280 | int init_new_context(struct task_struct *t, struct mm_struct *mm) |
127 | { | 281 | { |
128 | mm->context.id = NO_CONTEXT; | 282 | mm->context.id = MMU_NO_CONTEXT; |
283 | mm->context.active = 0; | ||
284 | |||
129 | return 0; | 285 | return 0; |
130 | } | 286 | } |
131 | 287 | ||
@@ -134,13 +290,25 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) | |||
134 | */ | 290 | */ |
135 | void destroy_context(struct mm_struct *mm) | 291 | void destroy_context(struct mm_struct *mm) |
136 | { | 292 | { |
137 | preempt_disable(); | 293 | unsigned int id; |
138 | if (mm->context.id != NO_CONTEXT) { | 294 | |
139 | clear_bit(mm->context.id, context_map); | 295 | if (mm->context.id == MMU_NO_CONTEXT) |
140 | mm->context.id = NO_CONTEXT; | 296 | return; |
141 | atomic_inc(&nr_free_contexts); | 297 | |
298 | WARN_ON(mm->context.active != 0); | ||
299 | |||
300 | spin_lock(&context_lock); | ||
301 | id = mm->context.id; | ||
302 | if (id != MMU_NO_CONTEXT) { | ||
303 | __clear_bit(id, context_map); | ||
304 | mm->context.id = MMU_NO_CONTEXT; | ||
305 | #ifdef DEBUG_MAP_CONSISTENCY | ||
306 | mm->context.active = 0; | ||
307 | context_mm[id] = NULL; | ||
308 | #endif | ||
309 | nr_free_contexts++; | ||
142 | } | 310 | } |
143 | preempt_enable(); | 311 | spin_unlock(&context_lock); |
144 | } | 312 | } |
145 | 313 | ||
146 | 314 | ||
@@ -149,6 +317,12 @@ void destroy_context(struct mm_struct *mm) | |||
149 | */ | 317 | */ |
150 | void __init mmu_context_init(void) | 318 | void __init mmu_context_init(void) |
151 | { | 319 | { |
320 | /* Mark init_mm as being active on all possible CPUs since | ||
321 | * we'll get called with prev == init_mm the first time | ||
322 | * we schedule on a given CPU | ||
323 | */ | ||
324 | init_mm.context.active = NR_CPUS; | ||
325 | |||
152 | /* | 326 | /* |
153 | * Some processors have too few contexts to reserve one for | 327 | * Some processors have too few contexts to reserve one for |
154 | * init_mm, and require using context 0 for a normal task. | 328 | * init_mm, and require using context 0 for a normal task. |
@@ -156,7 +330,7 @@ void __init mmu_context_init(void) | |||
156 | * This code assumes FIRST_CONTEXT < 32. | 330 | * This code assumes FIRST_CONTEXT < 32. |
157 | */ | 331 | */ |
158 | context_map[0] = (1 << FIRST_CONTEXT) - 1; | 332 | context_map[0] = (1 << FIRST_CONTEXT) - 1; |
159 | next_mmu_context = FIRST_CONTEXT; | 333 | next_context = FIRST_CONTEXT; |
160 | atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); | 334 | nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; |
161 | } | 335 | } |
162 | 336 | ||