diff options
author | Tony Luck <tony.luck@intel.com> | 2005-10-28 18:27:36 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-10-28 18:27:36 -0400 |
commit | 8496f2a4513edbc0c1d40496f1b0868dfed36eb0 (patch) | |
tree | 18a772d7a4c42ec325539b52d2053dabc99c460a /arch | |
parent | 2d8f6a521908e3563478347aecad4ab20dc48155 (diff) | |
parent | c1902aae322952f8726469a6657df7b9d5c794fe (diff) |
Pull fix-slow-tlb-purge into release branch
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/mm/tlb.c | 16 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 31 |
2 files changed, 28 insertions, 19 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 987fb754d6ad..c93e0f2b5fea 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
@@ -87,10 +87,15 @@ wrap_mmu_context (struct mm_struct *mm) | |||
87 | } | 87 | } |
88 | 88 | ||
89 | void | 89 | void |
90 | ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) | 90 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) |
91 | { | 91 | { |
92 | static DEFINE_SPINLOCK(ptcg_lock); | 92 | static DEFINE_SPINLOCK(ptcg_lock); |
93 | 93 | ||
94 | if (mm != current->active_mm) { | ||
95 | flush_tlb_all(); | ||
96 | return; | ||
97 | } | ||
98 | |||
94 | /* HW requires global serialization of ptc.ga. */ | 99 | /* HW requires global serialization of ptc.ga. */ |
95 | spin_lock(&ptcg_lock); | 100 | spin_lock(&ptcg_lock); |
96 | { | 101 | { |
@@ -136,15 +141,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
136 | unsigned long size = end - start; | 141 | unsigned long size = end - start; |
137 | unsigned long nbits; | 142 | unsigned long nbits; |
138 | 143 | ||
144 | #ifndef CONFIG_SMP | ||
139 | if (mm != current->active_mm) { | 145 | if (mm != current->active_mm) { |
140 | /* this does happen, but perhaps it's not worth optimizing for? */ | ||
141 | #ifdef CONFIG_SMP | ||
142 | flush_tlb_all(); | ||
143 | #else | ||
144 | mm->context = 0; | 146 | mm->context = 0; |
145 | #endif | ||
146 | return; | 147 | return; |
147 | } | 148 | } |
149 | #endif | ||
148 | 150 | ||
149 | nbits = ia64_fls(size + 0xfff); | 151 | nbits = ia64_fls(size + 0xfff); |
150 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) | 152 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) |
@@ -154,7 +156,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
154 | start &= ~((1UL << nbits) - 1); | 156 | start &= ~((1UL << nbits) - 1); |
155 | 157 | ||
156 | # ifdef CONFIG_SMP | 158 | # ifdef CONFIG_SMP |
157 | platform_global_tlb_purge(start, end, nbits); | 159 | platform_global_tlb_purge(mm, start, end, nbits); |
158 | # else | 160 | # else |
159 | do { | 161 | do { |
160 | ia64_ptcl(start, (nbits<<2)); | 162 | ia64_ptcl(start, (nbits<<2)); |
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 0a4ee50c302f..49b530c39a42 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
177 | 177 | ||
178 | /** | 178 | /** |
179 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | 179 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range |
180 | * @mm: mm_struct containing virtual address range | ||
180 | * @start: start of virtual address range | 181 | * @start: start of virtual address range |
181 | * @end: end of virtual address range | 182 | * @end: end of virtual address range |
182 | * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) | 183 | * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) |
@@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
188 | * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. | 189 | * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. |
189 | * - cpu_vm_mask is converted into a nodemask of the nodes containing the | 190 | * - cpu_vm_mask is converted into a nodemask of the nodes containing the |
190 | * cpus in cpu_vm_mask. | 191 | * cpus in cpu_vm_mask. |
191 | * - if only one bit is set in cpu_vm_mask & it is the current cpu, | 192 | * - if only one bit is set in cpu_vm_mask & it is the current cpu & the |
192 | * then only the local TLB needs to be flushed. This flushing can be done | 193 | * process is purging its own virtual address range, then only the |
193 | * using ptc.l. This is the common case & avoids the global spinlock. | 194 | * local TLB needs to be flushed. This flushing can be done using |
195 | * ptc.l. This is the common case & avoids the global spinlock. | ||
194 | * - if multiple cpus have loaded the context, then flushing has to be | 196 | * - if multiple cpus have loaded the context, then flushing has to be |
195 | * done with ptc.g/MMRs under protection of the global ptc_lock. | 197 | * done with ptc.g/MMRs under protection of the global ptc_lock. |
196 | */ | 198 | */ |
197 | 199 | ||
198 | void | 200 | void |
199 | sn2_global_tlb_purge(unsigned long start, unsigned long end, | 201 | sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, |
200 | unsigned long nbits) | 202 | unsigned long end, unsigned long nbits) |
201 | { | 203 | { |
202 | int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; | 204 | int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; |
205 | int mymm = (mm == current->active_mm); | ||
203 | volatile unsigned long *ptc0, *ptc1; | 206 | volatile unsigned long *ptc0, *ptc1; |
204 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0; | 207 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; |
205 | struct mm_struct *mm = current->active_mm; | ||
206 | short nasids[MAX_NUMNODES], nix; | 208 | short nasids[MAX_NUMNODES], nix; |
207 | nodemask_t nodes_flushed; | 209 | nodemask_t nodes_flushed; |
208 | 210 | ||
@@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
216 | i++; | 218 | i++; |
217 | } | 219 | } |
218 | 220 | ||
221 | if (i == 0) | ||
222 | return; | ||
223 | |||
219 | preempt_disable(); | 224 | preempt_disable(); |
220 | 225 | ||
221 | if (likely(i == 1 && lcpu == smp_processor_id())) { | 226 | if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { |
222 | do { | 227 | do { |
223 | ia64_ptcl(start, nbits << 2); | 228 | ia64_ptcl(start, nbits << 2); |
224 | start += (1UL << nbits); | 229 | start += (1UL << nbits); |
@@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
229 | return; | 234 | return; |
230 | } | 235 | } |
231 | 236 | ||
232 | if (atomic_read(&mm->mm_users) == 1) { | 237 | if (atomic_read(&mm->mm_users) == 1 && mymm) { |
233 | flush_tlb_mm(mm); | 238 | flush_tlb_mm(mm); |
234 | __get_cpu_var(ptcstats).change_rid++; | 239 | __get_cpu_var(ptcstats).change_rid++; |
235 | preempt_enable(); | 240 | preempt_enable(); |
@@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
241 | for_each_node_mask(cnode, nodes_flushed) | 246 | for_each_node_mask(cnode, nodes_flushed) |
242 | nasids[nix++] = cnodeid_to_nasid(cnode); | 247 | nasids[nix++] = cnodeid_to_nasid(cnode); |
243 | 248 | ||
249 | rr_value = (mm->context << 3) | REGION_NUMBER(start); | ||
250 | |||
244 | shub1 = is_shub1(); | 251 | shub1 = is_shub1(); |
245 | if (shub1) { | 252 | if (shub1) { |
246 | data0 = (1UL << SH1_PTC_0_A_SHFT) | | 253 | data0 = (1UL << SH1_PTC_0_A_SHFT) | |
247 | (nbits << SH1_PTC_0_PS_SHFT) | | 254 | (nbits << SH1_PTC_0_PS_SHFT) | |
248 | ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | | 255 | (rr_value << SH1_PTC_0_RID_SHFT) | |
249 | (1UL << SH1_PTC_0_START_SHFT); | 256 | (1UL << SH1_PTC_0_START_SHFT); |
250 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); | 257 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); |
251 | ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); | 258 | ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); |
@@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
254 | (nbits << SH2_PTC_PS_SHFT) | | 261 | (nbits << SH2_PTC_PS_SHFT) | |
255 | (1UL << SH2_PTC_START_SHFT); | 262 | (1UL << SH2_PTC_START_SHFT); |
256 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + | 263 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + |
257 | ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); | 264 | (rr_value << SH2_PTC_RID_SHFT)); |
258 | ptc1 = NULL; | 265 | ptc1 = NULL; |
259 | } | 266 | } |
260 | 267 | ||
@@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
275 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); | 282 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); |
276 | for (i = 0; i < nix; i++) { | 283 | for (i = 0; i < nix; i++) { |
277 | nasid = nasids[i]; | 284 | nasid = nasids[i]; |
278 | if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { | 285 | if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { |
279 | ia64_ptcga(start, nbits << 2); | 286 | ia64_ptcga(start, nbits << 2); |
280 | ia64_srlz_i(); | 287 | ia64_srlz_i(); |
281 | } else { | 288 | } else { |