diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2007-10-22 06:52:44 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2007-10-22 06:52:48 -0400 |
commit | ba8a9229ab9e80278c28ad68b15053f65b2b0a7c (patch) | |
tree | d73e4f7d352d3b3edf8888973528cb7dd3e953f9 /include/asm-s390/tlbflush.h | |
parent | e3d3683d1402c1737687cb698451d545f57c32a7 (diff) |
[S390] tlb flush fix.
The current tlb flushing code for page table entries violates the
s390 architecture in a small detail. The relevant section from the
principles of operation (SA22-7832-02 page 3-47):
"A valid table entry must not be changed while it is attached
to any CPU and may be used for translation by that CPU except to
(1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY or
INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page-table
entry, or (3) make a change by means of a COMPARE AND SWAP AND
PURGE instruction that purges the TLB."
That means if one thread of a multithreaded applciation uses a vma
while another thread does an unmap on it, the page table entries of
that vma needs to get removed with IPTE, IDTE or CSP. In some strange
and rare situations a cpu could check-stop (die) because a entry has
been pushed out of the TLB that is still needed to complete a
(milli-coded) instruction. I've never seen it happen with the current
code on any of the supported machines, so right now this is a
theoretical problem. But I want to fix it nevertheless, to avoid
headaches in the futures.
To get this implemented correctly without changing common code the
primitives ptep_get_and_clear, ptep_get_and_clear_full and
ptep_set_wrprotect need to use the IPTE instruction to invalidate the
pte before the new pte value gets stored. If IPTE is always used for
the three primitives three important operations will have a performace
hit: fork, mprotect and exit_mmap. Time for some workarounds:
* 1: ptep_get_and_clear_full is used in unmap_vmas to remove page
tables entries in a batched tlb gather operation. If the mmu_gather
context passed to unmap_vmas has been started with full_mm_flush==1
or if only one cpu is online or if the only user of a mm_struct is the
current process then the fullmm indication in the mmu_gather context is
set to one. All TLBs for mm_struct are flushed by the tlb_gather_mmu
call. No new TLBs can be created while the unmap is in progress. In
this case ptep_get_and_clear_full clears the ptes with a simple store.
* 2: ptep_get_and_clear is used in change_protection to clear the
ptes from the page tables before they are reentered with the new
access flags. At the end of the update flush_tlb_range clears the
remaining TLBs. In general the ptep_get_and_clear has to issue IPTE
for each pte and flush_tlb_range is a nop. But if there is only one
user of the mm_struct then ptep_get_and_clear uses simple stores
to do the update and flush_tlb_range will flush the TLBs.
* 3: Similar to 2, ptep_set_wrprotect is used in copy_page_range
for a fork to make all ptes of a cow mapping read-only. At the end of
of copy_page_range dup_mmap will flush the TLBs with a call to
flush_tlb_mm. Check for mm->mm_users and if there is only one user
avoid using IPTE in ptep_set_wrprotect and let flush_tlb_mm clear the
TLBs.
Overall for single threaded programs the tlb flush code now performs
better, for multi threaded programs it is slightly worse. In particular
exit_mmap() now does a single IDTE for the mm and then just frees every
page cache reference and every page table page directly without a delay
over the mmu_gather structure.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'include/asm-s390/tlbflush.h')
-rw-r--r-- | include/asm-s390/tlbflush.h | 150 |
1 files changed, 55 insertions, 95 deletions
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 6de2632a3e4f..3a9985fbc8af 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h | |||
@@ -6,68 +6,19 @@ | |||
6 | #include <asm/pgalloc.h> | 6 | #include <asm/pgalloc.h> |
7 | 7 | ||
8 | /* | 8 | /* |
9 | * TLB flushing: | 9 | * Flush all tlb entries on the local cpu. |
10 | * | ||
11 | * - flush_tlb() flushes the current mm struct TLBs | ||
12 | * - flush_tlb_all() flushes all processes TLBs | ||
13 | * - flush_tlb_mm(mm) flushes the specified mm context TLB's | ||
14 | * - flush_tlb_page(vma, vmaddr) flushes one page | ||
15 | * - flush_tlb_range(vma, start, end) flushes a range of pages | ||
16 | * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | * S/390 has three ways of flushing TLBs | ||
21 | * 'ptlb' does a flush of the local processor | ||
22 | * 'csp' flushes the TLBs on all PUs of a SMP | ||
23 | * 'ipte' invalidates a pte in a page table and flushes that out of | ||
24 | * the TLBs of all PUs of a SMP | ||
25 | */ | ||
26 | |||
27 | #define local_flush_tlb() \ | ||
28 | do { asm volatile("ptlb": : :"memory"); } while (0) | ||
29 | |||
30 | #ifndef CONFIG_SMP | ||
31 | |||
32 | /* | ||
33 | * We always need to flush, since s390 does not flush tlb | ||
34 | * on each context switch | ||
35 | */ | 10 | */ |
36 | 11 | static inline void __tlb_flush_local(void) | |
37 | static inline void flush_tlb(void) | ||
38 | { | 12 | { |
39 | local_flush_tlb(); | 13 | asm volatile("ptlb" : : : "memory"); |
40 | } | 14 | } |
41 | static inline void flush_tlb_all(void) | ||
42 | { | ||
43 | local_flush_tlb(); | ||
44 | } | ||
45 | static inline void flush_tlb_mm(struct mm_struct *mm) | ||
46 | { | ||
47 | local_flush_tlb(); | ||
48 | } | ||
49 | static inline void flush_tlb_page(struct vm_area_struct *vma, | ||
50 | unsigned long addr) | ||
51 | { | ||
52 | local_flush_tlb(); | ||
53 | } | ||
54 | static inline void flush_tlb_range(struct vm_area_struct *vma, | ||
55 | unsigned long start, unsigned long end) | ||
56 | { | ||
57 | local_flush_tlb(); | ||
58 | } | ||
59 | |||
60 | #define flush_tlb_kernel_range(start, end) \ | ||
61 | local_flush_tlb(); | ||
62 | |||
63 | #else | ||
64 | 15 | ||
65 | #include <asm/smp.h> | 16 | /* |
66 | 17 | * Flush all tlb entries on all cpus. | |
67 | extern void smp_ptlb_all(void); | 18 | */ |
68 | 19 | static inline void __tlb_flush_global(void) | |
69 | static inline void global_flush_tlb(void) | ||
70 | { | 20 | { |
21 | extern void smp_ptlb_all(void); | ||
71 | register unsigned long reg2 asm("2"); | 22 | register unsigned long reg2 asm("2"); |
72 | register unsigned long reg3 asm("3"); | 23 | register unsigned long reg3 asm("3"); |
73 | register unsigned long reg4 asm("4"); | 24 | register unsigned long reg4 asm("4"); |
@@ -89,66 +40,75 @@ static inline void global_flush_tlb(void) | |||
89 | } | 40 | } |
90 | 41 | ||
91 | /* | 42 | /* |
92 | * We only have to do global flush of tlb if process run since last | 43 | * Flush all tlb entries of a page table on all cpus. |
93 | * flush on any other pu than current. | ||
94 | * If we have threads (mm->count > 1) we always do a global flush, | ||
95 | * since the process runs on more than one processor at the same time. | ||
96 | */ | 44 | */ |
45 | static inline void __tlb_flush_idte(pgd_t *pgd) | ||
46 | { | ||
47 | asm volatile( | ||
48 | " .insn rrf,0xb98e0000,0,%0,%1,0" | ||
49 | : : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" ); | ||
50 | } | ||
97 | 51 | ||
98 | static inline void __flush_tlb_mm(struct mm_struct * mm) | 52 | static inline void __tlb_flush_mm(struct mm_struct * mm) |
99 | { | 53 | { |
100 | cpumask_t local_cpumask; | 54 | cpumask_t local_cpumask; |
101 | 55 | ||
102 | if (unlikely(cpus_empty(mm->cpu_vm_mask))) | 56 | if (unlikely(cpus_empty(mm->cpu_vm_mask))) |
103 | return; | 57 | return; |
58 | /* | ||
59 | * If the machine has IDTE we prefer to do a per mm flush | ||
60 | * on all cpus instead of doing a local flush if the mm | ||
61 | * only ran on the local cpu. | ||
62 | */ | ||
104 | if (MACHINE_HAS_IDTE) { | 63 | if (MACHINE_HAS_IDTE) { |
105 | pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); | 64 | pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); |
106 | 65 | ||
107 | if (shadow_pgd) { | 66 | if (shadow_pgd) |
108 | asm volatile( | 67 | __tlb_flush_idte(shadow_pgd); |
109 | " .insn rrf,0xb98e0000,0,%0,%1,0" | 68 | __tlb_flush_idte(mm->pgd); |
110 | : : "a" (2048), | ||
111 | "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" ); | ||
112 | } | ||
113 | asm volatile( | ||
114 | " .insn rrf,0xb98e0000,0,%0,%1,0" | ||
115 | : : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc"); | ||
116 | return; | 69 | return; |
117 | } | 70 | } |
118 | preempt_disable(); | 71 | preempt_disable(); |
72 | /* | ||
73 | * If the process only ran on the local cpu, do a local flush. | ||
74 | */ | ||
119 | local_cpumask = cpumask_of_cpu(smp_processor_id()); | 75 | local_cpumask = cpumask_of_cpu(smp_processor_id()); |
120 | if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) | 76 | if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) |
121 | local_flush_tlb(); | 77 | __tlb_flush_local(); |
122 | else | 78 | else |
123 | global_flush_tlb(); | 79 | __tlb_flush_global(); |
124 | preempt_enable(); | 80 | preempt_enable(); |
125 | } | 81 | } |
126 | 82 | ||
127 | static inline void flush_tlb(void) | 83 | static inline void __tlb_flush_mm_cond(struct mm_struct * mm) |
128 | { | ||
129 | __flush_tlb_mm(current->mm); | ||
130 | } | ||
131 | static inline void flush_tlb_all(void) | ||
132 | { | ||
133 | global_flush_tlb(); | ||
134 | } | ||
135 | static inline void flush_tlb_mm(struct mm_struct *mm) | ||
136 | { | ||
137 | __flush_tlb_mm(mm); | ||
138 | } | ||
139 | static inline void flush_tlb_page(struct vm_area_struct *vma, | ||
140 | unsigned long addr) | ||
141 | { | ||
142 | __flush_tlb_mm(vma->vm_mm); | ||
143 | } | ||
144 | static inline void flush_tlb_range(struct vm_area_struct *vma, | ||
145 | unsigned long start, unsigned long end) | ||
146 | { | 84 | { |
147 | __flush_tlb_mm(vma->vm_mm); | 85 | if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm) |
86 | __tlb_flush_mm(mm); | ||
148 | } | 87 | } |
149 | 88 | ||
150 | #define flush_tlb_kernel_range(start, end) global_flush_tlb() | 89 | /* |
90 | * TLB flushing: | ||
91 | * flush_tlb() - flushes the current mm struct TLBs | ||
92 | * flush_tlb_all() - flushes all processes TLBs | ||
93 | * flush_tlb_mm(mm) - flushes the specified mm context TLB's | ||
94 | * flush_tlb_page(vma, vmaddr) - flushes one page | ||
95 | * flush_tlb_range(vma, start, end) - flushes a range of pages | ||
96 | * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages | ||
97 | */ | ||
151 | 98 | ||
152 | #endif | 99 | /* |
100 | * flush_tlb_mm goes together with ptep_set_wrprotect for the | ||
101 | * copy_page_range operation and flush_tlb_range is related to | ||
102 | * ptep_get_and_clear for change_protection. ptep_set_wrprotect and | ||
103 | * ptep_get_and_clear do not flush the TLBs directly if the mm has | ||
104 | * only one user. At the end of the update the flush_tlb_mm and | ||
105 | * flush_tlb_range functions need to do the flush. | ||
106 | */ | ||
107 | #define flush_tlb() do { } while (0) | ||
108 | #define flush_tlb_all() do { } while (0) | ||
109 | #define flush_tlb_mm(mm) __tlb_flush_mm_cond(mm) | ||
110 | #define flush_tlb_page(vma, addr) do { } while (0) | ||
111 | #define flush_tlb_range(vma, start, end) __tlb_flush_mm_cond(mm) | ||
112 | #define flush_tlb_kernel_range(start, end) __tlb_flush_mm(&init_mm) | ||
153 | 113 | ||
154 | #endif /* _S390_TLBFLUSH_H */ | 114 | #endif /* _S390_TLBFLUSH_H */ |