diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2007-04-10 03:09:37 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-04-12 14:09:38 -0400 |
commit | a741e67969577163a4cfc78d7fd2753219087ef1 (patch) | |
tree | bac4162aaf15367e896429afa60465e201c9204c /include/asm-powerpc | |
parent | e4ee3891db35aa9a069bb403c2a66a8fbfa274d6 (diff) |
[POWERPC] Make tlb flush batch use lazy MMU mode
The current tlb flush code on powerpc 64 bits has a subtle race since we
lost the page table lock due to the possible faulting in of new PTEs
after a previous one has been removed but before the corresponding hash
entry has been evicted, which can leads to all sort of fatal problems.
This patch reworks the batch code completely. It doesn't use the mmu_gather
stuff anymore. Instead, we use the lazy mmu hooks that were added by the
paravirt code. They have the nice property that the enter/leave lazy mmu
mode pair is always fully contained by the PTE lock for a given range
of PTEs. Thus we can guarantee that all batches are flushed on a given
CPU before it drops that lock.
We also generalize batching for any PTE update that require a flush.
Batching is now enabled on a CPU by arch_enter_lazy_mmu_mode() and
disabled by arch_leave_lazy_mmu_mode(). The code epects that this is
always contained within a PTE lock section so no preemption can happen
and no PTE insertion in that range from another CPU. When batching
is enabled on a CPU, every PTE updates that need a hash flush will
use the batch for that flush.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include/asm-powerpc')
-rw-r--r-- | include/asm-powerpc/pgtable.h | 50 | ||||
-rw-r--r-- | include/asm-powerpc/tlb.h | 1 | ||||
-rw-r--r-- | include/asm-powerpc/tlbflush.h | 39 |
3 files changed, 41 insertions, 49 deletions
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index 10f52743f4ff..c7142c7e0e05 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h | |||
@@ -272,7 +272,10 @@ static inline pte_t pte_mkhuge(pte_t pte) { | |||
272 | return pte; } | 272 | return pte; } |
273 | 273 | ||
274 | /* Atomic PTE updates */ | 274 | /* Atomic PTE updates */ |
275 | static inline unsigned long pte_update(pte_t *p, unsigned long clr) | 275 | static inline unsigned long pte_update(struct mm_struct *mm, |
276 | unsigned long addr, | ||
277 | pte_t *ptep, unsigned long clr, | ||
278 | int huge) | ||
276 | { | 279 | { |
277 | unsigned long old, tmp; | 280 | unsigned long old, tmp; |
278 | 281 | ||
@@ -283,20 +286,15 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr) | |||
283 | andc %1,%0,%4 \n\ | 286 | andc %1,%0,%4 \n\ |
284 | stdcx. %1,0,%3 \n\ | 287 | stdcx. %1,0,%3 \n\ |
285 | bne- 1b" | 288 | bne- 1b" |
286 | : "=&r" (old), "=&r" (tmp), "=m" (*p) | 289 | : "=&r" (old), "=&r" (tmp), "=m" (*ptep) |
287 | : "r" (p), "r" (clr), "m" (*p), "i" (_PAGE_BUSY) | 290 | : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY) |
288 | : "cc" ); | 291 | : "cc" ); |
292 | |||
293 | if (old & _PAGE_HASHPTE) | ||
294 | hpte_need_flush(mm, addr, ptep, old, huge); | ||
289 | return old; | 295 | return old; |
290 | } | 296 | } |
291 | 297 | ||
292 | /* PTE updating functions, this function puts the PTE in the | ||
293 | * batch, doesn't actually triggers the hash flush immediately, | ||
294 | * you need to call flush_tlb_pending() to do that. | ||
295 | * Pass -1 for "normal" size (4K or 64K) | ||
296 | */ | ||
297 | extern void hpte_update(struct mm_struct *mm, unsigned long addr, | ||
298 | pte_t *ptep, unsigned long pte, int huge); | ||
299 | |||
300 | static inline int __ptep_test_and_clear_young(struct mm_struct *mm, | 298 | static inline int __ptep_test_and_clear_young(struct mm_struct *mm, |
301 | unsigned long addr, pte_t *ptep) | 299 | unsigned long addr, pte_t *ptep) |
302 | { | 300 | { |
@@ -304,11 +302,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, | |||
304 | 302 | ||
305 | if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | 303 | if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) |
306 | return 0; | 304 | return 0; |
307 | old = pte_update(ptep, _PAGE_ACCESSED); | 305 | old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0); |
308 | if (old & _PAGE_HASHPTE) { | ||
309 | hpte_update(mm, addr, ptep, old, 0); | ||
310 | flush_tlb_pending(); | ||
311 | } | ||
312 | return (old & _PAGE_ACCESSED) != 0; | 306 | return (old & _PAGE_ACCESSED) != 0; |
313 | } | 307 | } |
314 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 308 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
@@ -331,9 +325,7 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, | |||
331 | 325 | ||
332 | if ((pte_val(*ptep) & _PAGE_DIRTY) == 0) | 326 | if ((pte_val(*ptep) & _PAGE_DIRTY) == 0) |
333 | return 0; | 327 | return 0; |
334 | old = pte_update(ptep, _PAGE_DIRTY); | 328 | old = pte_update(mm, addr, ptep, _PAGE_DIRTY, 0); |
335 | if (old & _PAGE_HASHPTE) | ||
336 | hpte_update(mm, addr, ptep, old, 0); | ||
337 | return (old & _PAGE_DIRTY) != 0; | 329 | return (old & _PAGE_DIRTY) != 0; |
338 | } | 330 | } |
339 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY | 331 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
@@ -352,9 +344,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |||
352 | 344 | ||
353 | if ((pte_val(*ptep) & _PAGE_RW) == 0) | 345 | if ((pte_val(*ptep) & _PAGE_RW) == 0) |
354 | return; | 346 | return; |
355 | old = pte_update(ptep, _PAGE_RW); | 347 | old = pte_update(mm, addr, ptep, _PAGE_RW, 0); |
356 | if (old & _PAGE_HASHPTE) | ||
357 | hpte_update(mm, addr, ptep, old, 0); | ||
358 | } | 348 | } |
359 | 349 | ||
360 | /* | 350 | /* |
@@ -378,7 +368,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |||
378 | ({ \ | 368 | ({ \ |
379 | int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \ | 369 | int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \ |
380 | __ptep); \ | 370 | __ptep); \ |
381 | flush_tlb_page(__vma, __address); \ | ||
382 | __dirty; \ | 371 | __dirty; \ |
383 | }) | 372 | }) |
384 | 373 | ||
@@ -386,20 +375,14 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, | |||
386 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | 375 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, |
387 | unsigned long addr, pte_t *ptep) | 376 | unsigned long addr, pte_t *ptep) |
388 | { | 377 | { |
389 | unsigned long old = pte_update(ptep, ~0UL); | 378 | unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0); |
390 | |||
391 | if (old & _PAGE_HASHPTE) | ||
392 | hpte_update(mm, addr, ptep, old, 0); | ||
393 | return __pte(old); | 379 | return __pte(old); |
394 | } | 380 | } |
395 | 381 | ||
396 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | 382 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, |
397 | pte_t * ptep) | 383 | pte_t * ptep) |
398 | { | 384 | { |
399 | unsigned long old = pte_update(ptep, ~0UL); | 385 | pte_update(mm, addr, ptep, ~0UL, 0); |
400 | |||
401 | if (old & _PAGE_HASHPTE) | ||
402 | hpte_update(mm, addr, ptep, old, 0); | ||
403 | } | 386 | } |
404 | 387 | ||
405 | /* | 388 | /* |
@@ -408,10 +391,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | |||
408 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | 391 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, |
409 | pte_t *ptep, pte_t pte) | 392 | pte_t *ptep, pte_t pte) |
410 | { | 393 | { |
411 | if (pte_present(*ptep)) { | 394 | if (pte_present(*ptep)) |
412 | pte_clear(mm, addr, ptep); | 395 | pte_clear(mm, addr, ptep); |
413 | flush_tlb_pending(); | ||
414 | } | ||
415 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | 396 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
416 | *ptep = pte; | 397 | *ptep = pte; |
417 | } | 398 | } |
@@ -522,6 +503,7 @@ void pgtable_cache_init(void); | |||
522 | return pt; | 503 | return pt; |
523 | } | 504 | } |
524 | 505 | ||
506 | |||
525 | #include <asm-generic/pgtable.h> | 507 | #include <asm-generic/pgtable.h> |
526 | 508 | ||
527 | #endif /* __ASSEMBLY__ */ | 509 | #endif /* __ASSEMBLY__ */ |
diff --git a/include/asm-powerpc/tlb.h b/include/asm-powerpc/tlb.h index 4e2a834683fb..0a17682663d8 100644 --- a/include/asm-powerpc/tlb.h +++ b/include/asm-powerpc/tlb.h | |||
@@ -38,7 +38,6 @@ extern void pte_free_finish(void); | |||
38 | 38 | ||
39 | static inline void tlb_flush(struct mmu_gather *tlb) | 39 | static inline void tlb_flush(struct mmu_gather *tlb) |
40 | { | 40 | { |
41 | flush_tlb_pending(); | ||
42 | pte_free_finish(); | 41 | pte_free_finish(); |
43 | } | 42 | } |
44 | 43 | ||
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h index 93c7d0c7230f..0bc5a5e506be 100644 --- a/include/asm-powerpc/tlbflush.h +++ b/include/asm-powerpc/tlbflush.h | |||
@@ -28,25 +28,41 @@ struct mm_struct; | |||
28 | #define PPC64_TLB_BATCH_NR 192 | 28 | #define PPC64_TLB_BATCH_NR 192 |
29 | 29 | ||
30 | struct ppc64_tlb_batch { | 30 | struct ppc64_tlb_batch { |
31 | unsigned long index; | 31 | int active; |
32 | struct mm_struct *mm; | 32 | unsigned long index; |
33 | real_pte_t pte[PPC64_TLB_BATCH_NR]; | 33 | struct mm_struct *mm; |
34 | unsigned long vaddr[PPC64_TLB_BATCH_NR]; | 34 | real_pte_t pte[PPC64_TLB_BATCH_NR]; |
35 | unsigned int psize; | 35 | unsigned long vaddr[PPC64_TLB_BATCH_NR]; |
36 | unsigned int psize; | ||
36 | }; | 37 | }; |
37 | DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); | 38 | DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); |
38 | 39 | ||
39 | extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); | 40 | extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); |
40 | 41 | ||
41 | static inline void flush_tlb_pending(void) | 42 | extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, |
43 | pte_t *ptep, unsigned long pte, int huge); | ||
44 | |||
45 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | ||
46 | |||
47 | static inline void arch_enter_lazy_mmu_mode(void) | ||
48 | { | ||
49 | struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); | ||
50 | |||
51 | batch->active = 1; | ||
52 | } | ||
53 | |||
54 | static inline void arch_leave_lazy_mmu_mode(void) | ||
42 | { | 55 | { |
43 | struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); | 56 | struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); |
44 | 57 | ||
45 | if (batch->index) | 58 | if (batch->index) |
46 | __flush_tlb_pending(batch); | 59 | __flush_tlb_pending(batch); |
47 | put_cpu_var(ppc64_tlb_batch); | 60 | batch->active = 0; |
48 | } | 61 | } |
49 | 62 | ||
63 | #define arch_flush_lazy_mmu_mode() do {} while (0) | ||
64 | |||
65 | |||
50 | extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, | 66 | extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, |
51 | int local); | 67 | int local); |
52 | extern void flush_hash_range(unsigned long number, int local); | 68 | extern void flush_hash_range(unsigned long number, int local); |
@@ -88,15 +104,12 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); | |||
88 | 104 | ||
89 | static inline void flush_tlb_mm(struct mm_struct *mm) | 105 | static inline void flush_tlb_mm(struct mm_struct *mm) |
90 | { | 106 | { |
91 | flush_tlb_pending(); | ||
92 | } | 107 | } |
93 | 108 | ||
94 | static inline void flush_tlb_page(struct vm_area_struct *vma, | 109 | static inline void flush_tlb_page(struct vm_area_struct *vma, |
95 | unsigned long vmaddr) | 110 | unsigned long vmaddr) |
96 | { | 111 | { |
97 | #ifdef CONFIG_PPC64 | 112 | #ifndef CONFIG_PPC64 |
98 | flush_tlb_pending(); | ||
99 | #else | ||
100 | _tlbie(vmaddr); | 113 | _tlbie(vmaddr); |
101 | #endif | 114 | #endif |
102 | } | 115 | } |
@@ -112,13 +125,11 @@ static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, | |||
112 | static inline void flush_tlb_range(struct vm_area_struct *vma, | 125 | static inline void flush_tlb_range(struct vm_area_struct *vma, |
113 | unsigned long start, unsigned long end) | 126 | unsigned long start, unsigned long end) |
114 | { | 127 | { |
115 | flush_tlb_pending(); | ||
116 | } | 128 | } |
117 | 129 | ||
118 | static inline void flush_tlb_kernel_range(unsigned long start, | 130 | static inline void flush_tlb_kernel_range(unsigned long start, |
119 | unsigned long end) | 131 | unsigned long end) |
120 | { | 132 | { |
121 | flush_tlb_pending(); | ||
122 | } | 133 | } |
123 | 134 | ||
124 | #else /* 6xx, 7xx, 7xxx cpus */ | 135 | #else /* 6xx, 7xx, 7xxx cpus */ |