diff options
-rw-r--r-- | arch/sparc64/kernel/tsb.S | 71 | ||||
-rw-r--r-- | arch/sparc64/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 7 | ||||
-rw-r--r-- | arch/sparc64/mm/tsb.c | 185 | ||||
-rw-r--r-- | include/asm-sparc64/mmu_context.h | 50 |
5 files changed, 203 insertions, 118 deletions
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index d738910153f6..1b154c863628 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S | |||
@@ -34,8 +34,9 @@ tsb_miss_itlb: | |||
34 | ldxa [%g4] ASI_IMMU, %g4 | 34 | ldxa [%g4] ASI_IMMU, %g4 |
35 | 35 | ||
36 | /* At this point we have: | 36 | /* At this point we have: |
37 | * %g4 -- missing virtual address | ||
38 | * %g1 -- TSB entry address | 37 | * %g1 -- TSB entry address |
38 | * %g3 -- FAULT_CODE_{D,I}TLB | ||
39 | * %g4 -- missing virtual address | ||
39 | * %g6 -- TAG TARGET (vaddr >> 22) | 40 | * %g6 -- TAG TARGET (vaddr >> 22) |
40 | */ | 41 | */ |
41 | tsb_miss_page_table_walk: | 42 | tsb_miss_page_table_walk: |
@@ -45,6 +46,12 @@ tsb_miss_page_table_walk: | |||
45 | tsb_miss_page_table_walk_sun4v_fastpath: | 46 | tsb_miss_page_table_walk_sun4v_fastpath: |
46 | USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) | 47 | USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) |
47 | 48 | ||
49 | /* At this point we have: | ||
50 | * %g1 -- TSB entry address | ||
51 | * %g3 -- FAULT_CODE_{D,I}TLB | ||
52 | * %g5 -- physical address of PTE in Linux page tables | ||
53 | * %g6 -- TAG TARGET (vaddr >> 22) | ||
54 | */ | ||
48 | tsb_reload: | 55 | tsb_reload: |
49 | TSB_LOCK_TAG(%g1, %g2, %g7) | 56 | TSB_LOCK_TAG(%g1, %g2, %g7) |
50 | 57 | ||
@@ -199,6 +206,7 @@ __tsb_insert: | |||
199 | wrpr %o5, %pstate | 206 | wrpr %o5, %pstate |
200 | retl | 207 | retl |
201 | nop | 208 | nop |
209 | .size __tsb_insert, .-__tsb_insert | ||
202 | 210 | ||
203 | /* Flush the given TSB entry if it has the matching | 211 | /* Flush the given TSB entry if it has the matching |
204 | * tag. | 212 | * tag. |
@@ -208,6 +216,7 @@ __tsb_insert: | |||
208 | */ | 216 | */ |
209 | .align 32 | 217 | .align 32 |
210 | .globl tsb_flush | 218 | .globl tsb_flush |
219 | .type tsb_flush,#function | ||
211 | tsb_flush: | 220 | tsb_flush: |
212 | sethi %hi(TSB_TAG_LOCK_HIGH), %g2 | 221 | sethi %hi(TSB_TAG_LOCK_HIGH), %g2 |
213 | 1: TSB_LOAD_TAG(%o0, %g1) | 222 | 1: TSB_LOAD_TAG(%o0, %g1) |
@@ -225,6 +234,7 @@ tsb_flush: | |||
225 | nop | 234 | nop |
226 | 2: retl | 235 | 2: retl |
227 | TSB_MEMBAR | 236 | TSB_MEMBAR |
237 | .size tsb_flush, .-tsb_flush | ||
228 | 238 | ||
229 | /* Reload MMU related context switch state at | 239 | /* Reload MMU related context switch state at |
230 | * schedule() time. | 240 | * schedule() time. |
@@ -241,6 +251,7 @@ tsb_flush: | |||
241 | */ | 251 | */ |
242 | .align 32 | 252 | .align 32 |
243 | .globl __tsb_context_switch | 253 | .globl __tsb_context_switch |
254 | .type __tsb_context_switch,#function | ||
244 | __tsb_context_switch: | 255 | __tsb_context_switch: |
245 | rdpr %pstate, %o5 | 256 | rdpr %pstate, %o5 |
246 | wrpr %o5, PSTATE_IE, %pstate | 257 | wrpr %o5, PSTATE_IE, %pstate |
@@ -302,3 +313,61 @@ __tsb_context_switch: | |||
302 | 313 | ||
303 | retl | 314 | retl |
304 | nop | 315 | nop |
316 | .size __tsb_context_switch, .-__tsb_context_switch | ||
317 | |||
318 | #define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \ | ||
319 | (1 << TSB_TAG_INVALID_BIT)) | ||
320 | |||
321 | .align 32 | ||
322 | .globl copy_tsb | ||
323 | .type copy_tsb,#function | ||
324 | copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size | ||
325 | * %o2=new_tsb_base, %o3=new_tsb_size | ||
326 | */ | ||
327 | sethi %uhi(TSB_PASS_BITS), %g7 | ||
328 | srlx %o3, 4, %o3 | ||
329 | add %o0, %o1, %g1 /* end of old tsb */ | ||
330 | sllx %g7, 32, %g7 | ||
331 | sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ | ||
332 | |||
333 | 661: prefetcha [%o0] ASI_N, #one_read | ||
334 | .section .tsb_phys_patch, "ax" | ||
335 | .word 661b | ||
336 | prefetcha [%o0] ASI_PHYS_USE_EC, #one_read | ||
337 | .previous | ||
338 | |||
339 | 90: andcc %o0, (64 - 1), %g0 | ||
340 | bne 1f | ||
341 | add %o0, 64, %o5 | ||
342 | |||
343 | 661: prefetcha [%o5] ASI_N, #one_read | ||
344 | .section .tsb_phys_patch, "ax" | ||
345 | .word 661b | ||
346 | prefetcha [%o5] ASI_PHYS_USE_EC, #one_read | ||
347 | .previous | ||
348 | |||
349 | 1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */ | ||
350 | andcc %g2, %g7, %g0 /* LOCK or INVALID set? */ | ||
351 | bne,pn %xcc, 80f /* Skip it */ | ||
352 | sllx %g2, 22, %o4 /* TAG --> VADDR */ | ||
353 | |||
354 | /* This can definitely be computed faster... */ | ||
355 | srlx %o0, 4, %o5 /* Build index */ | ||
356 | and %o5, 511, %o5 /* Mask index */ | ||
357 | sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ | ||
358 | or %o4, %o5, %o4 /* Full VADDR. */ | ||
359 | srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ | ||
360 | and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ | ||
361 | sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ | ||
362 | TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ | ||
363 | add %o4, 0x8, %o4 /* Advance to TTE */ | ||
364 | TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ | ||
365 | |||
366 | 80: add %o0, 16, %o0 | ||
367 | cmp %o0, %g1 | ||
368 | bne,pt %xcc, 90b | ||
369 | nop | ||
370 | |||
371 | retl | ||
372 | TSB_MEMBAR | ||
373 | .size copy_tsb, .-copy_tsb | ||
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index b97bd054aad3..63b6cc0cd5d5 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/lsu.h> | 29 | #include <asm/lsu.h> |
30 | #include <asm/sections.h> | 30 | #include <asm/sections.h> |
31 | #include <asm/kdebug.h> | 31 | #include <asm/kdebug.h> |
32 | #include <asm/mmu_context.h> | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * To debug kernel to catch accesses to certain virtual/physical addresses. | 35 | * To debug kernel to catch accesses to certain virtual/physical addresses. |
@@ -258,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) | |||
258 | struct vm_area_struct *vma; | 259 | struct vm_area_struct *vma; |
259 | unsigned int insn = 0; | 260 | unsigned int insn = 0; |
260 | int si_code, fault_code; | 261 | int si_code, fault_code; |
261 | unsigned long address; | 262 | unsigned long address, mm_rss; |
262 | 263 | ||
263 | fault_code = get_thread_fault_code(); | 264 | fault_code = get_thread_fault_code(); |
264 | 265 | ||
@@ -407,6 +408,11 @@ good_area: | |||
407 | } | 408 | } |
408 | 409 | ||
409 | up_read(&mm->mmap_sem); | 410 | up_read(&mm->mmap_sem); |
411 | |||
412 | mm_rss = get_mm_rss(mm); | ||
413 | if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) | ||
414 | tsb_grow(mm, mm_rss); | ||
415 | |||
410 | return; | 416 | return; |
411 | 417 | ||
412 | /* | 418 | /* |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index b40f6477dea0..d703b67bc7b9 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -279,7 +279,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p | |||
279 | { | 279 | { |
280 | struct mm_struct *mm; | 280 | struct mm_struct *mm; |
281 | struct tsb *tsb; | 281 | struct tsb *tsb; |
282 | unsigned long tag; | 282 | unsigned long tag, flags; |
283 | 283 | ||
284 | if (tlb_type != hypervisor) { | 284 | if (tlb_type != hypervisor) { |
285 | unsigned long pfn = pte_pfn(pte); | 285 | unsigned long pfn = pte_pfn(pte); |
@@ -308,10 +308,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p | |||
308 | } | 308 | } |
309 | 309 | ||
310 | mm = vma->vm_mm; | 310 | mm = vma->vm_mm; |
311 | |||
312 | spin_lock_irqsave(&mm->context.lock, flags); | ||
313 | |||
311 | tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & | 314 | tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & |
312 | (mm->context.tsb_nentries - 1UL)]; | 315 | (mm->context.tsb_nentries - 1UL)]; |
313 | tag = (address >> 22UL); | 316 | tag = (address >> 22UL); |
314 | tsb_insert(tsb, tag, pte_val(pte)); | 317 | tsb_insert(tsb, tag, pte_val(pte)); |
318 | |||
319 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
315 | } | 320 | } |
316 | 321 | ||
317 | void flush_dcache_page(struct page *page) | 322 | void flush_dcache_page(struct page *page) |
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index f36799b7152c..7fbe1e0cd105 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c | |||
@@ -48,11 +48,15 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) | |||
48 | void flush_tsb_user(struct mmu_gather *mp) | 48 | void flush_tsb_user(struct mmu_gather *mp) |
49 | { | 49 | { |
50 | struct mm_struct *mm = mp->mm; | 50 | struct mm_struct *mm = mp->mm; |
51 | struct tsb *tsb = mm->context.tsb; | 51 | unsigned long nentries, base, flags; |
52 | unsigned long nentries = mm->context.tsb_nentries; | 52 | struct tsb *tsb; |
53 | unsigned long base; | ||
54 | int i; | 53 | int i; |
55 | 54 | ||
55 | spin_lock_irqsave(&mm->context.lock, flags); | ||
56 | |||
57 | tsb = mm->context.tsb; | ||
58 | nentries = mm->context.tsb_nentries; | ||
59 | |||
56 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) | 60 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) |
57 | base = __pa(tsb); | 61 | base = __pa(tsb); |
58 | else | 62 | else |
@@ -70,6 +74,8 @@ void flush_tsb_user(struct mmu_gather *mp) | |||
70 | 74 | ||
71 | tsb_flush(ent, tag); | 75 | tsb_flush(ent, tag); |
72 | } | 76 | } |
77 | |||
78 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
73 | } | 79 | } |
74 | 80 | ||
75 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) | 81 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) |
@@ -201,86 +207,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) | |||
201 | } | 207 | } |
202 | } | 208 | } |
203 | 209 | ||
204 | /* The page tables are locked against modifications while this | ||
205 | * runs. | ||
206 | * | ||
207 | * XXX do some prefetching... | ||
208 | */ | ||
209 | static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, | ||
210 | struct tsb *new_tsb, unsigned long new_size) | ||
211 | { | ||
212 | unsigned long old_nentries = old_size / sizeof(struct tsb); | ||
213 | unsigned long new_nentries = new_size / sizeof(struct tsb); | ||
214 | unsigned long i; | ||
215 | |||
216 | for (i = 0; i < old_nentries; i++) { | ||
217 | register unsigned long tag asm("o4"); | ||
218 | register unsigned long pte asm("o5"); | ||
219 | unsigned long v, hash; | ||
220 | |||
221 | if (tlb_type == hypervisor) { | ||
222 | __asm__ __volatile__( | ||
223 | "ldda [%2] %3, %0" | ||
224 | : "=r" (tag), "=r" (pte) | ||
225 | : "r" (__pa(&old_tsb[i])), | ||
226 | "i" (ASI_QUAD_LDD_PHYS_4V)); | ||
227 | } else if (tlb_type == cheetah_plus) { | ||
228 | __asm__ __volatile__( | ||
229 | "ldda [%2] %3, %0" | ||
230 | : "=r" (tag), "=r" (pte) | ||
231 | : "r" (__pa(&old_tsb[i])), | ||
232 | "i" (ASI_QUAD_LDD_PHYS)); | ||
233 | } else { | ||
234 | __asm__ __volatile__( | ||
235 | "ldda [%2] %3, %0" | ||
236 | : "=r" (tag), "=r" (pte) | ||
237 | : "r" (&old_tsb[i]), | ||
238 | "i" (ASI_NUCLEUS_QUAD_LDD)); | ||
239 | } | ||
240 | |||
241 | if (tag & ((1UL << TSB_TAG_LOCK_BIT) | | ||
242 | (1UL << TSB_TAG_INVALID_BIT))) | ||
243 | continue; | ||
244 | |||
245 | /* We only put base page size PTEs into the TSB, | ||
246 | * but that might change in the future. This code | ||
247 | * would need to be changed if we start putting larger | ||
248 | * page size PTEs into there. | ||
249 | */ | ||
250 | WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS); | ||
251 | |||
252 | /* The tag holds bits 22 to 63 of the virtual address | ||
253 | * and the context. Clear out the context, and shift | ||
254 | * up to make a virtual address. | ||
255 | */ | ||
256 | v = (tag & ((1UL << 42UL) - 1UL)) << 22UL; | ||
257 | |||
258 | /* The implied bits of the tag (bits 13 to 21) are | ||
259 | * determined by the TSB entry index, so fill that in. | ||
260 | */ | ||
261 | v |= (i & (512UL - 1UL)) << 13UL; | ||
262 | |||
263 | hash = tsb_hash(v, new_nentries); | ||
264 | if (tlb_type == cheetah_plus || | ||
265 | tlb_type == hypervisor) { | ||
266 | __asm__ __volatile__( | ||
267 | "stxa %0, [%1] %2\n\t" | ||
268 | "stxa %3, [%4] %2" | ||
269 | : /* no outputs */ | ||
270 | : "r" (tag), | ||
271 | "r" (__pa(&new_tsb[hash].tag)), | ||
272 | "i" (ASI_PHYS_USE_EC), | ||
273 | "r" (pte), | ||
274 | "r" (__pa(&new_tsb[hash].pte))); | ||
275 | } else { | ||
276 | new_tsb[hash].tag = tag; | ||
277 | new_tsb[hash].pte = pte; | ||
278 | } | ||
279 | } | ||
280 | } | ||
281 | |||
282 | /* When the RSS of an address space exceeds mm->context.tsb_rss_limit, | 210 | /* When the RSS of an address space exceeds mm->context.tsb_rss_limit, |
283 | * update_mmu_cache() invokes this routine to try and grow the TSB. | 211 | * do_sparc64_fault() invokes this routine to try and grow the TSB. |
212 | * | ||
284 | * When we reach the maximum TSB size supported, we stick ~0UL into | 213 | * When we reach the maximum TSB size supported, we stick ~0UL into |
285 | * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() | 214 | * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() |
286 | * will not trigger any longer. | 215 | * will not trigger any longer. |
@@ -293,12 +222,12 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, | |||
293 | * the number of entries that the current TSB can hold at once. Currently, | 222 | * the number of entries that the current TSB can hold at once. Currently, |
294 | * we trigger when the RSS hits 3/4 of the TSB capacity. | 223 | * we trigger when the RSS hits 3/4 of the TSB capacity. |
295 | */ | 224 | */ |
296 | void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) | 225 | void tsb_grow(struct mm_struct *mm, unsigned long rss) |
297 | { | 226 | { |
298 | unsigned long max_tsb_size = 1 * 1024 * 1024; | 227 | unsigned long max_tsb_size = 1 * 1024 * 1024; |
299 | unsigned long size, old_size; | 228 | unsigned long size, old_size, flags; |
300 | struct page *page; | 229 | struct page *page; |
301 | struct tsb *old_tsb; | 230 | struct tsb *old_tsb, *new_tsb; |
302 | 231 | ||
303 | if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) | 232 | if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) |
304 | max_tsb_size = (PAGE_SIZE << MAX_ORDER); | 233 | max_tsb_size = (PAGE_SIZE << MAX_ORDER); |
@@ -311,12 +240,51 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) | |||
311 | break; | 240 | break; |
312 | } | 241 | } |
313 | 242 | ||
314 | page = alloc_pages(gfp_flags, get_order(size)); | 243 | page = alloc_pages(GFP_KERNEL, get_order(size)); |
315 | if (unlikely(!page)) | 244 | if (unlikely(!page)) |
316 | return; | 245 | return; |
317 | 246 | ||
318 | /* Mark all tags as invalid. */ | 247 | /* Mark all tags as invalid. */ |
319 | memset(page_address(page), 0x40, size); | 248 | new_tsb = page_address(page); |
249 | memset(new_tsb, 0x40, size); | ||
250 | |||
251 | /* Ok, we are about to commit the changes. If we are | ||
252 | * growing an existing TSB the locking is very tricky, | ||
253 | * so WATCH OUT! | ||
254 | * | ||
255 | * We have to hold mm->context.lock while committing to the | ||
256 | * new TSB, this synchronizes us with processors in | ||
257 | * flush_tsb_user() and switch_mm() for this address space. | ||
258 | * | ||
259 | * But even with that lock held, processors run asynchronously | ||
260 | * accessing the old TSB via TLB miss handling. This is OK | ||
261 | * because those actions are just propagating state from the | ||
262 | * Linux page tables into the TSB, page table mappings are not | ||
263 | * being changed. If a real fault occurs, the processor will | ||
264 | * synchronize with us when it hits flush_tsb_user(), this is | ||
265 | * also true for the case where vmscan is modifying the page | ||
266 | * tables. The only thing we need to be careful with is to | ||
267 | * skip any locked TSB entries during copy_tsb(). | ||
268 | * | ||
269 | * When we finish committing to the new TSB, we have to drop | ||
270 | * the lock and ask all other cpus running this address space | ||
271 | * to run tsb_context_switch() to see the new TSB table. | ||
272 | */ | ||
273 | spin_lock_irqsave(&mm->context.lock, flags); | ||
274 | |||
275 | old_tsb = mm->context.tsb; | ||
276 | old_size = mm->context.tsb_nentries * sizeof(struct tsb); | ||
277 | |||
278 | /* Handle multiple threads trying to grow the TSB at the same time. | ||
279 | * One will get in here first, and bump the size and the RSS limit. | ||
280 | * The others will get in here next and hit this check. | ||
281 | */ | ||
282 | if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { | ||
283 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
284 | |||
285 | free_pages((unsigned long) new_tsb, get_order(size)); | ||
286 | return; | ||
287 | } | ||
320 | 288 | ||
321 | if (size == max_tsb_size) | 289 | if (size == max_tsb_size) |
322 | mm->context.tsb_rss_limit = ~0UL; | 290 | mm->context.tsb_rss_limit = ~0UL; |
@@ -324,30 +292,37 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) | |||
324 | mm->context.tsb_rss_limit = | 292 | mm->context.tsb_rss_limit = |
325 | ((size / sizeof(struct tsb)) * 3) / 4; | 293 | ((size / sizeof(struct tsb)) * 3) / 4; |
326 | 294 | ||
327 | old_tsb = mm->context.tsb; | 295 | if (old_tsb) { |
328 | old_size = mm->context.tsb_nentries * sizeof(struct tsb); | 296 | extern void copy_tsb(unsigned long old_tsb_base, |
329 | 297 | unsigned long old_tsb_size, | |
330 | if (old_tsb) | 298 | unsigned long new_tsb_base, |
331 | copy_tsb(old_tsb, old_size, page_address(page), size); | 299 | unsigned long new_tsb_size); |
300 | unsigned long old_tsb_base = (unsigned long) old_tsb; | ||
301 | unsigned long new_tsb_base = (unsigned long) new_tsb; | ||
302 | |||
303 | if (tlb_type == cheetah_plus || tlb_type == hypervisor) { | ||
304 | old_tsb_base = __pa(old_tsb_base); | ||
305 | new_tsb_base = __pa(new_tsb_base); | ||
306 | } | ||
307 | copy_tsb(old_tsb_base, old_size, new_tsb_base, size); | ||
308 | } | ||
332 | 309 | ||
333 | mm->context.tsb = page_address(page); | 310 | mm->context.tsb = new_tsb; |
334 | setup_tsb_params(mm, size); | 311 | setup_tsb_params(mm, size); |
335 | 312 | ||
313 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
314 | |||
336 | /* If old_tsb is NULL, we're being invoked for the first time | 315 | /* If old_tsb is NULL, we're being invoked for the first time |
337 | * from init_new_context(). | 316 | * from init_new_context(). |
338 | */ | 317 | */ |
339 | if (old_tsb) { | 318 | if (old_tsb) { |
340 | /* Now force all other processors to reload the new | 319 | /* Reload it on the local cpu. */ |
341 | * TSB state. | ||
342 | */ | ||
343 | smp_tsb_sync(mm); | ||
344 | |||
345 | /* Finally reload it on the local cpu. No further | ||
346 | * references will remain to the old TSB and we can | ||
347 | * thus free it up. | ||
348 | */ | ||
349 | tsb_context_switch(mm); | 320 | tsb_context_switch(mm); |
350 | 321 | ||
322 | /* Now force other processors to do the same. */ | ||
323 | smp_tsb_sync(mm); | ||
324 | |||
325 | /* Now it is safe to free the old tsb. */ | ||
351 | free_pages((unsigned long) old_tsb, get_order(old_size)); | 326 | free_pages((unsigned long) old_tsb, get_order(old_size)); |
352 | } | 327 | } |
353 | } | 328 | } |
@@ -363,7 +338,11 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
363 | * will be confused and think there is an older TSB to free up. | 338 | * will be confused and think there is an older TSB to free up. |
364 | */ | 339 | */ |
365 | mm->context.tsb = NULL; | 340 | mm->context.tsb = NULL; |
366 | tsb_grow(mm, 0, GFP_KERNEL); | 341 | |
342 | /* If this is fork, inherit the parent's TSB size. We would | ||
343 | * grow it to that size on the first page fault anyways. | ||
344 | */ | ||
345 | tsb_grow(mm, get_mm_rss(mm)); | ||
367 | 346 | ||
368 | if (unlikely(!mm->context.tsb)) | 347 | if (unlikely(!mm->context.tsb)) |
369 | return -ENOMEM; | 348 | return -ENOMEM; |
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index ca36ea96f64b..e7974321d052 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h | |||
@@ -42,7 +42,7 @@ static inline void tsb_context_switch(struct mm_struct *mm) | |||
42 | __pa(&mm->context.tsb_descr)); | 42 | __pa(&mm->context.tsb_descr)); |
43 | } | 43 | } |
44 | 44 | ||
45 | extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss, gfp_t gfp_flags); | 45 | extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss); |
46 | #ifdef CONFIG_SMP | 46 | #ifdef CONFIG_SMP |
47 | extern void smp_tsb_sync(struct mm_struct *mm); | 47 | extern void smp_tsb_sync(struct mm_struct *mm); |
48 | #else | 48 | #else |
@@ -74,18 +74,43 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str | |||
74 | ctx_valid = CTX_VALID(mm->context); | 74 | ctx_valid = CTX_VALID(mm->context); |
75 | if (!ctx_valid) | 75 | if (!ctx_valid) |
76 | get_new_mmu_context(mm); | 76 | get_new_mmu_context(mm); |
77 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
78 | 77 | ||
79 | if (!ctx_valid || (old_mm != mm)) { | 78 | /* We have to be extremely careful here or else we will miss |
80 | load_secondary_context(mm); | 79 | * a TSB grow if we switch back and forth between a kernel |
81 | tsb_context_switch(mm); | 80 | * thread and an address space which has it's TSB size increased |
82 | } | 81 | * on another processor. |
82 | * | ||
83 | * It is possible to play some games in order to optimize the | ||
84 | * switch, but the safest thing to do is to unconditionally | ||
85 | * perform the secondary context load and the TSB context switch. | ||
86 | * | ||
87 | * For reference the bad case is, for address space "A": | ||
88 | * | ||
89 | * CPU 0 CPU 1 | ||
90 | * run address space A | ||
91 | * set cpu0's bits in cpu_vm_mask | ||
92 | * switch to kernel thread, borrow | ||
93 | * address space A via entry_lazy_tlb | ||
94 | * run address space A | ||
95 | * set cpu1's bit in cpu_vm_mask | ||
96 | * flush_tlb_pending() | ||
97 | * reset cpu_vm_mask to just cpu1 | ||
98 | * TSB grow | ||
99 | * run address space A | ||
100 | * context was valid, so skip | ||
101 | * TSB context switch | ||
102 | * | ||
103 | * At that point cpu0 continues to use a stale TSB, the one from | ||
104 | * before the TSB grow performed on cpu1. cpu1 did not cross-call | ||
105 | * cpu0 to update it's TSB because at that point the cpu_vm_mask | ||
106 | * only had cpu1 set in it. | ||
107 | */ | ||
108 | load_secondary_context(mm); | ||
109 | tsb_context_switch(mm); | ||
83 | 110 | ||
84 | /* Even if (mm == old_mm) we _must_ check | 111 | /* Any time a processor runs a context on an address space |
85 | * the cpu_vm_mask. If we do not we could | 112 | * for the first time, we must flush that context out of the |
86 | * corrupt the TLB state because of how | 113 | * local TLB. |
87 | * smp_flush_tlb_{page,range,mm} on sparc64 | ||
88 | * and lazy tlb switches work. -DaveM | ||
89 | */ | 114 | */ |
90 | cpu = smp_processor_id(); | 115 | cpu = smp_processor_id(); |
91 | if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { | 116 | if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { |
@@ -93,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str | |||
93 | __flush_tlb_mm(CTX_HWBITS(mm->context), | 118 | __flush_tlb_mm(CTX_HWBITS(mm->context), |
94 | SECONDARY_CONTEXT); | 119 | SECONDARY_CONTEXT); |
95 | } | 120 | } |
121 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
96 | } | 122 | } |
97 | 123 | ||
98 | #define deactivate_mm(tsk,mm) do { } while (0) | 124 | #define deactivate_mm(tsk,mm) do { } while (0) |
@@ -109,11 +135,11 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm | |||
109 | cpu = smp_processor_id(); | 135 | cpu = smp_processor_id(); |
110 | if (!cpu_isset(cpu, mm->cpu_vm_mask)) | 136 | if (!cpu_isset(cpu, mm->cpu_vm_mask)) |
111 | cpu_set(cpu, mm->cpu_vm_mask); | 137 | cpu_set(cpu, mm->cpu_vm_mask); |
112 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
113 | 138 | ||
114 | load_secondary_context(mm); | 139 | load_secondary_context(mm); |
115 | __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); | 140 | __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); |
116 | tsb_context_switch(mm); | 141 | tsb_context_switch(mm); |
142 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
117 | } | 143 | } |
118 | 144 | ||
119 | #endif /* !(__ASSEMBLY__) */ | 145 | #endif /* !(__ASSEMBLY__) */ |