diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-20 14:57:50 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-20 14:57:50 -0500 |
commit | c4a1745aa09fc110afdefea0e5d025043e348bae (patch) | |
tree | 6d28dc3a0c1bf18437b3d49f28e5c81b850cdb2f /arch/sparc64/mm/tsb.c | |
parent | 88dcb91177cfa5b26143a29074389a2aa259c7cf (diff) | |
parent | ac0eb3eb7e54b700386068be025a43d2a3958ee5 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6: (230 commits)
[SPARC64]: Update defconfig.
[SPARC64]: Fix 2 bugs in huge page support.
[SPARC64]: CONFIG_BLK_DEV_RAM fix
[SPARC64]: Optimized TSB table initialization.
[SPARC64]: Allow CONFIG_MEMORY_HOTPLUG to build.
[SPARC64]: Use SLAB caches for TSB tables.
[SPARC64]: Don't kill the page allocator when growing a TSB.
[SPARC64]: Randomize mm->mmap_base when PF_RANDOMIZE is set.
[SPARC64]: Increase top of 32-bit process stack.
[SPARC64]: Top-down address space allocation for 32-bit tasks.
[SPARC64] bbc_i2c: Fix cpu check and add missing module license.
[SPARC64]: Fix and re-enable dynamic TSB sizing.
[SUNSU]: Fix missing spinlock initialization.
[TG3]: Do not try to access NIC_SRAM_DATA_SIG on Sun parts.
[SPARC64]: First cut at VIS simulator for Niagara.
[SPARC64]: Fix system type in /proc/cpuinfo and remove bogus OBP check.
[SPARC64]: Add SMT scheduling support for Niagara.
[SPARC64]: Fix 32-bit truncation which broke sparsemem.
[SPARC64]: Move over to sparsemem.
[SPARC64]: Fix new context version SMP handling.
...
Diffstat (limited to 'arch/sparc64/mm/tsb.c')
-rw-r--r-- | arch/sparc64/mm/tsb.c | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 000000000000..b2064e2a44d6 --- /dev/null +++ b/arch/sparc64/mm/tsb.c | |||
@@ -0,0 +1,440 @@ | |||
1 | /* arch/sparc64/mm/tsb.c | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <asm/system.h> | ||
8 | #include <asm/page.h> | ||
9 | #include <asm/tlbflush.h> | ||
10 | #include <asm/tlb.h> | ||
11 | #include <asm/mmu_context.h> | ||
12 | #include <asm/pgtable.h> | ||
13 | #include <asm/tsb.h> | ||
14 | #include <asm/oplib.h> | ||
15 | |||
16 | extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; | ||
17 | |||
18 | static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries) | ||
19 | { | ||
20 | vaddr >>= PAGE_SHIFT; | ||
21 | return vaddr & (nentries - 1); | ||
22 | } | ||
23 | |||
24 | static inline int tag_compare(unsigned long tag, unsigned long vaddr) | ||
25 | { | ||
26 | return (tag == (vaddr >> 22)); | ||
27 | } | ||
28 | |||
29 | /* TSB flushes need only occur on the processor initiating the address | ||
30 | * space modification, not on each cpu the address space has run on. | ||
31 | * Only the TLB flush needs that treatment. | ||
32 | */ | ||
33 | |||
34 | void flush_tsb_kernel_range(unsigned long start, unsigned long end) | ||
35 | { | ||
36 | unsigned long v; | ||
37 | |||
38 | for (v = start; v < end; v += PAGE_SIZE) { | ||
39 | unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); | ||
40 | struct tsb *ent = &swapper_tsb[hash]; | ||
41 | |||
42 | if (tag_compare(ent->tag, v)) { | ||
43 | ent->tag = (1UL << TSB_TAG_INVALID_BIT); | ||
44 | membar_storeload_storestore(); | ||
45 | } | ||
46 | } | ||
47 | } | ||
48 | |||
49 | void flush_tsb_user(struct mmu_gather *mp) | ||
50 | { | ||
51 | struct mm_struct *mm = mp->mm; | ||
52 | unsigned long nentries, base, flags; | ||
53 | struct tsb *tsb; | ||
54 | int i; | ||
55 | |||
56 | spin_lock_irqsave(&mm->context.lock, flags); | ||
57 | |||
58 | tsb = mm->context.tsb; | ||
59 | nentries = mm->context.tsb_nentries; | ||
60 | |||
61 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) | ||
62 | base = __pa(tsb); | ||
63 | else | ||
64 | base = (unsigned long) tsb; | ||
65 | |||
66 | for (i = 0; i < mp->tlb_nr; i++) { | ||
67 | unsigned long v = mp->vaddrs[i]; | ||
68 | unsigned long tag, ent, hash; | ||
69 | |||
70 | v &= ~0x1UL; | ||
71 | |||
72 | hash = tsb_hash(v, nentries); | ||
73 | ent = base + (hash * sizeof(struct tsb)); | ||
74 | tag = (v >> 22UL); | ||
75 | |||
76 | tsb_flush(ent, tag); | ||
77 | } | ||
78 | |||
79 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
80 | } | ||
81 | |||
82 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) | ||
83 | { | ||
84 | unsigned long tsb_reg, base, tsb_paddr; | ||
85 | unsigned long page_sz, tte; | ||
86 | |||
87 | mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb); | ||
88 | |||
89 | base = TSBMAP_BASE; | ||
90 | tte = pgprot_val(PAGE_KERNEL_LOCKED); | ||
91 | tsb_paddr = __pa(mm->context.tsb); | ||
92 | BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); | ||
93 | |||
94 | /* Use the smallest page size that can map the whole TSB | ||
95 | * in one TLB entry. | ||
96 | */ | ||
97 | switch (tsb_bytes) { | ||
98 | case 8192 << 0: | ||
99 | tsb_reg = 0x0UL; | ||
100 | #ifdef DCACHE_ALIASING_POSSIBLE | ||
101 | base += (tsb_paddr & 8192); | ||
102 | #endif | ||
103 | page_sz = 8192; | ||
104 | break; | ||
105 | |||
106 | case 8192 << 1: | ||
107 | tsb_reg = 0x1UL; | ||
108 | page_sz = 64 * 1024; | ||
109 | break; | ||
110 | |||
111 | case 8192 << 2: | ||
112 | tsb_reg = 0x2UL; | ||
113 | page_sz = 64 * 1024; | ||
114 | break; | ||
115 | |||
116 | case 8192 << 3: | ||
117 | tsb_reg = 0x3UL; | ||
118 | page_sz = 64 * 1024; | ||
119 | break; | ||
120 | |||
121 | case 8192 << 4: | ||
122 | tsb_reg = 0x4UL; | ||
123 | page_sz = 512 * 1024; | ||
124 | break; | ||
125 | |||
126 | case 8192 << 5: | ||
127 | tsb_reg = 0x5UL; | ||
128 | page_sz = 512 * 1024; | ||
129 | break; | ||
130 | |||
131 | case 8192 << 6: | ||
132 | tsb_reg = 0x6UL; | ||
133 | page_sz = 512 * 1024; | ||
134 | break; | ||
135 | |||
136 | case 8192 << 7: | ||
137 | tsb_reg = 0x7UL; | ||
138 | page_sz = 4 * 1024 * 1024; | ||
139 | break; | ||
140 | |||
141 | default: | ||
142 | BUG(); | ||
143 | }; | ||
144 | tte |= pte_sz_bits(page_sz); | ||
145 | |||
146 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) { | ||
147 | /* Physical mapping, no locked TLB entry for TSB. */ | ||
148 | tsb_reg |= tsb_paddr; | ||
149 | |||
150 | mm->context.tsb_reg_val = tsb_reg; | ||
151 | mm->context.tsb_map_vaddr = 0; | ||
152 | mm->context.tsb_map_pte = 0; | ||
153 | } else { | ||
154 | tsb_reg |= base; | ||
155 | tsb_reg |= (tsb_paddr & (page_sz - 1UL)); | ||
156 | tte |= (tsb_paddr & ~(page_sz - 1UL)); | ||
157 | |||
158 | mm->context.tsb_reg_val = tsb_reg; | ||
159 | mm->context.tsb_map_vaddr = base; | ||
160 | mm->context.tsb_map_pte = tte; | ||
161 | } | ||
162 | |||
163 | /* Setup the Hypervisor TSB descriptor. */ | ||
164 | if (tlb_type == hypervisor) { | ||
165 | struct hv_tsb_descr *hp = &mm->context.tsb_descr; | ||
166 | |||
167 | switch (PAGE_SIZE) { | ||
168 | case 8192: | ||
169 | default: | ||
170 | hp->pgsz_idx = HV_PGSZ_IDX_8K; | ||
171 | break; | ||
172 | |||
173 | case 64 * 1024: | ||
174 | hp->pgsz_idx = HV_PGSZ_IDX_64K; | ||
175 | break; | ||
176 | |||
177 | case 512 * 1024: | ||
178 | hp->pgsz_idx = HV_PGSZ_IDX_512K; | ||
179 | break; | ||
180 | |||
181 | case 4 * 1024 * 1024: | ||
182 | hp->pgsz_idx = HV_PGSZ_IDX_4MB; | ||
183 | break; | ||
184 | }; | ||
185 | hp->assoc = 1; | ||
186 | hp->num_ttes = tsb_bytes / 16; | ||
187 | hp->ctx_idx = 0; | ||
188 | switch (PAGE_SIZE) { | ||
189 | case 8192: | ||
190 | default: | ||
191 | hp->pgsz_mask = HV_PGSZ_MASK_8K; | ||
192 | break; | ||
193 | |||
194 | case 64 * 1024: | ||
195 | hp->pgsz_mask = HV_PGSZ_MASK_64K; | ||
196 | break; | ||
197 | |||
198 | case 512 * 1024: | ||
199 | hp->pgsz_mask = HV_PGSZ_MASK_512K; | ||
200 | break; | ||
201 | |||
202 | case 4 * 1024 * 1024: | ||
203 | hp->pgsz_mask = HV_PGSZ_MASK_4MB; | ||
204 | break; | ||
205 | }; | ||
206 | hp->tsb_base = tsb_paddr; | ||
207 | hp->resv = 0; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static kmem_cache_t *tsb_caches[8] __read_mostly; | ||
212 | |||
213 | static const char *tsb_cache_names[8] = { | ||
214 | "tsb_8KB", | ||
215 | "tsb_16KB", | ||
216 | "tsb_32KB", | ||
217 | "tsb_64KB", | ||
218 | "tsb_128KB", | ||
219 | "tsb_256KB", | ||
220 | "tsb_512KB", | ||
221 | "tsb_1MB", | ||
222 | }; | ||
223 | |||
224 | void __init tsb_cache_init(void) | ||
225 | { | ||
226 | unsigned long i; | ||
227 | |||
228 | for (i = 0; i < 8; i++) { | ||
229 | unsigned long size = 8192 << i; | ||
230 | const char *name = tsb_cache_names[i]; | ||
231 | |||
232 | tsb_caches[i] = kmem_cache_create(name, | ||
233 | size, size, | ||
234 | SLAB_HWCACHE_ALIGN | | ||
235 | SLAB_MUST_HWCACHE_ALIGN, | ||
236 | NULL, NULL); | ||
237 | if (!tsb_caches[i]) { | ||
238 | prom_printf("Could not create %s cache\n", name); | ||
239 | prom_halt(); | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | /* When the RSS of an address space exceeds mm->context.tsb_rss_limit, | ||
245 | * do_sparc64_fault() invokes this routine to try and grow the TSB. | ||
246 | * | ||
247 | * When we reach the maximum TSB size supported, we stick ~0UL into | ||
248 | * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() | ||
249 | * will not trigger any longer. | ||
250 | * | ||
251 | * The TSB can be anywhere from 8K to 1MB in size, in increasing powers | ||
252 | * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB | ||
253 | * must be 512K aligned. It also must be physically contiguous, so we | ||
254 | * cannot use vmalloc(). | ||
255 | * | ||
256 | * The idea here is to grow the TSB when the RSS of the process approaches | ||
257 | * the number of entries that the current TSB can hold at once. Currently, | ||
258 | * we trigger when the RSS hits 3/4 of the TSB capacity. | ||
259 | */ | ||
260 | void tsb_grow(struct mm_struct *mm, unsigned long rss) | ||
261 | { | ||
262 | unsigned long max_tsb_size = 1 * 1024 * 1024; | ||
263 | unsigned long new_size, old_size, flags; | ||
264 | struct tsb *old_tsb, *new_tsb; | ||
265 | unsigned long new_cache_index, old_cache_index; | ||
266 | unsigned long new_rss_limit; | ||
267 | gfp_t gfp_flags; | ||
268 | |||
269 | if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) | ||
270 | max_tsb_size = (PAGE_SIZE << MAX_ORDER); | ||
271 | |||
272 | new_cache_index = 0; | ||
273 | for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { | ||
274 | unsigned long n_entries = new_size / sizeof(struct tsb); | ||
275 | |||
276 | n_entries = (n_entries * 3) / 4; | ||
277 | if (n_entries > rss) | ||
278 | break; | ||
279 | |||
280 | new_cache_index++; | ||
281 | } | ||
282 | |||
283 | if (new_size == max_tsb_size) | ||
284 | new_rss_limit = ~0UL; | ||
285 | else | ||
286 | new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4; | ||
287 | |||
288 | retry_tsb_alloc: | ||
289 | gfp_flags = GFP_KERNEL; | ||
290 | if (new_size > (PAGE_SIZE * 2)) | ||
291 | gfp_flags = __GFP_NOWARN | __GFP_NORETRY; | ||
292 | |||
293 | new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags); | ||
294 | if (unlikely(!new_tsb)) { | ||
295 | /* Not being able to fork due to a high-order TSB | ||
296 | * allocation failure is very bad behavior. Just back | ||
297 | * down to a 0-order allocation and force no TSB | ||
298 | * growing for this address space. | ||
299 | */ | ||
300 | if (mm->context.tsb == NULL && new_cache_index > 0) { | ||
301 | new_cache_index = 0; | ||
302 | new_size = 8192; | ||
303 | new_rss_limit = ~0UL; | ||
304 | goto retry_tsb_alloc; | ||
305 | } | ||
306 | |||
307 | /* If we failed on a TSB grow, we are under serious | ||
308 | * memory pressure so don't try to grow any more. | ||
309 | */ | ||
310 | if (mm->context.tsb != NULL) | ||
311 | mm->context.tsb_rss_limit = ~0UL; | ||
312 | return; | ||
313 | } | ||
314 | |||
315 | /* Mark all tags as invalid. */ | ||
316 | tsb_init(new_tsb, new_size); | ||
317 | |||
318 | /* Ok, we are about to commit the changes. If we are | ||
319 | * growing an existing TSB the locking is very tricky, | ||
320 | * so WATCH OUT! | ||
321 | * | ||
322 | * We have to hold mm->context.lock while committing to the | ||
323 | * new TSB, this synchronizes us with processors in | ||
324 | * flush_tsb_user() and switch_mm() for this address space. | ||
325 | * | ||
326 | * But even with that lock held, processors run asynchronously | ||
327 | * accessing the old TSB via TLB miss handling. This is OK | ||
328 | * because those actions are just propagating state from the | ||
329 | * Linux page tables into the TSB, page table mappings are not | ||
330 | * being changed. If a real fault occurs, the processor will | ||
331 | * synchronize with us when it hits flush_tsb_user(), this is | ||
332 | * also true for the case where vmscan is modifying the page | ||
333 | * tables. The only thing we need to be careful with is to | ||
334 | * skip any locked TSB entries during copy_tsb(). | ||
335 | * | ||
336 | * When we finish committing to the new TSB, we have to drop | ||
337 | * the lock and ask all other cpus running this address space | ||
338 | * to run tsb_context_switch() to see the new TSB table. | ||
339 | */ | ||
340 | spin_lock_irqsave(&mm->context.lock, flags); | ||
341 | |||
342 | old_tsb = mm->context.tsb; | ||
343 | old_cache_index = (mm->context.tsb_reg_val & 0x7UL); | ||
344 | old_size = mm->context.tsb_nentries * sizeof(struct tsb); | ||
345 | |||
346 | |||
347 | /* Handle multiple threads trying to grow the TSB at the same time. | ||
348 | * One will get in here first, and bump the size and the RSS limit. | ||
349 | * The others will get in here next and hit this check. | ||
350 | */ | ||
351 | if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { | ||
352 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
353 | |||
354 | kmem_cache_free(tsb_caches[new_cache_index], new_tsb); | ||
355 | return; | ||
356 | } | ||
357 | |||
358 | mm->context.tsb_rss_limit = new_rss_limit; | ||
359 | |||
360 | if (old_tsb) { | ||
361 | extern void copy_tsb(unsigned long old_tsb_base, | ||
362 | unsigned long old_tsb_size, | ||
363 | unsigned long new_tsb_base, | ||
364 | unsigned long new_tsb_size); | ||
365 | unsigned long old_tsb_base = (unsigned long) old_tsb; | ||
366 | unsigned long new_tsb_base = (unsigned long) new_tsb; | ||
367 | |||
368 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) { | ||
369 | old_tsb_base = __pa(old_tsb_base); | ||
370 | new_tsb_base = __pa(new_tsb_base); | ||
371 | } | ||
372 | copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); | ||
373 | } | ||
374 | |||
375 | mm->context.tsb = new_tsb; | ||
376 | setup_tsb_params(mm, new_size); | ||
377 | |||
378 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
379 | |||
380 | /* If old_tsb is NULL, we're being invoked for the first time | ||
381 | * from init_new_context(). | ||
382 | */ | ||
383 | if (old_tsb) { | ||
384 | /* Reload it on the local cpu. */ | ||
385 | tsb_context_switch(mm); | ||
386 | |||
387 | /* Now force other processors to do the same. */ | ||
388 | smp_tsb_sync(mm); | ||
389 | |||
390 | /* Now it is safe to free the old tsb. */ | ||
391 | kmem_cache_free(tsb_caches[old_cache_index], old_tsb); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
396 | { | ||
397 | spin_lock_init(&mm->context.lock); | ||
398 | |||
399 | mm->context.sparc64_ctx_val = 0UL; | ||
400 | |||
401 | /* copy_mm() copies over the parent's mm_struct before calling | ||
402 | * us, so we need to zero out the TSB pointer or else tsb_grow() | ||
403 | * will be confused and think there is an older TSB to free up. | ||
404 | */ | ||
405 | mm->context.tsb = NULL; | ||
406 | |||
407 | /* If this is fork, inherit the parent's TSB size. We would | ||
408 | * grow it to that size on the first page fault anyways. | ||
409 | */ | ||
410 | tsb_grow(mm, get_mm_rss(mm)); | ||
411 | |||
412 | if (unlikely(!mm->context.tsb)) | ||
413 | return -ENOMEM; | ||
414 | |||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | void destroy_context(struct mm_struct *mm) | ||
419 | { | ||
420 | unsigned long flags, cache_index; | ||
421 | |||
422 | cache_index = (mm->context.tsb_reg_val & 0x7UL); | ||
423 | kmem_cache_free(tsb_caches[cache_index], mm->context.tsb); | ||
424 | |||
425 | /* We can remove these later, but for now it's useful | ||
426 | * to catch any bogus post-destroy_context() references | ||
427 | * to the TSB. | ||
428 | */ | ||
429 | mm->context.tsb = NULL; | ||
430 | mm->context.tsb_reg_val = 0UL; | ||
431 | |||
432 | spin_lock_irqsave(&ctx_alloc_lock, flags); | ||
433 | |||
434 | if (CTX_VALID(mm->context)) { | ||
435 | unsigned long nr = CTX_NRBITS(mm->context); | ||
436 | mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); | ||
437 | } | ||
438 | |||
439 | spin_unlock_irqrestore(&ctx_alloc_lock, flags); | ||
440 | } | ||