diff options
| -rw-r--r-- | arch/ia64/mm/tlb.c | 16 | ||||
| -rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 31 | ||||
| -rw-r--r-- | include/asm-ia64/machvec.h | 2 |
3 files changed, 29 insertions, 20 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 464557e4ed82..99ea8c70f408 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
| @@ -86,10 +86,15 @@ wrap_mmu_context (struct mm_struct *mm) | |||
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | void | 88 | void |
| 89 | ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) | 89 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) |
| 90 | { | 90 | { |
| 91 | static DEFINE_SPINLOCK(ptcg_lock); | 91 | static DEFINE_SPINLOCK(ptcg_lock); |
| 92 | 92 | ||
| 93 | if (mm != current->active_mm) { | ||
| 94 | flush_tlb_all(); | ||
| 95 | return; | ||
| 96 | } | ||
| 97 | |||
| 93 | /* HW requires global serialization of ptc.ga. */ | 98 | /* HW requires global serialization of ptc.ga. */ |
| 94 | spin_lock(&ptcg_lock); | 99 | spin_lock(&ptcg_lock); |
| 95 | { | 100 | { |
| @@ -135,15 +140,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
| 135 | unsigned long size = end - start; | 140 | unsigned long size = end - start; |
| 136 | unsigned long nbits; | 141 | unsigned long nbits; |
| 137 | 142 | ||
| 143 | #ifndef CONFIG_SMP | ||
| 138 | if (mm != current->active_mm) { | 144 | if (mm != current->active_mm) { |
| 139 | /* this does happen, but perhaps it's not worth optimizing for? */ | ||
| 140 | #ifdef CONFIG_SMP | ||
| 141 | flush_tlb_all(); | ||
| 142 | #else | ||
| 143 | mm->context = 0; | 145 | mm->context = 0; |
| 144 | #endif | ||
| 145 | return; | 146 | return; |
| 146 | } | 147 | } |
| 148 | #endif | ||
| 147 | 149 | ||
| 148 | nbits = ia64_fls(size + 0xfff); | 150 | nbits = ia64_fls(size + 0xfff); |
| 149 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) | 151 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) |
| @@ -153,7 +155,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
| 153 | start &= ~((1UL << nbits) - 1); | 155 | start &= ~((1UL << nbits) - 1); |
| 154 | 156 | ||
| 155 | # ifdef CONFIG_SMP | 157 | # ifdef CONFIG_SMP |
| 156 | platform_global_tlb_purge(start, end, nbits); | 158 | platform_global_tlb_purge(mm, start, end, nbits); |
| 157 | # else | 159 | # else |
| 158 | do { | 160 | do { |
| 159 | ia64_ptcl(start, (nbits<<2)); | 161 | ia64_ptcl(start, (nbits<<2)); |
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 0a4ee50c302f..49b530c39a42 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
| @@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
| 177 | 177 | ||
| 178 | /** | 178 | /** |
| 179 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | 179 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range |
| 180 | * @mm: mm_struct containing virtual address range | ||
| 180 | * @start: start of virtual address range | 181 | * @start: start of virtual address range |
| 181 | * @end: end of virtual address range | 182 | * @end: end of virtual address range |
| 182 | * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) | 183 | * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) |
| @@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
| 188 | * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. | 189 | * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. |
| 189 | * - cpu_vm_mask is converted into a nodemask of the nodes containing the | 190 | * - cpu_vm_mask is converted into a nodemask of the nodes containing the |
| 190 | * cpus in cpu_vm_mask. | 191 | * cpus in cpu_vm_mask. |
| 191 | * - if only one bit is set in cpu_vm_mask & it is the current cpu, | 192 | * - if only one bit is set in cpu_vm_mask & it is the current cpu & the |
| 192 | * then only the local TLB needs to be flushed. This flushing can be done | 193 | * process is purging its own virtual address range, then only the |
| 193 | * using ptc.l. This is the common case & avoids the global spinlock. | 194 | * local TLB needs to be flushed. This flushing can be done using |
| 195 | * ptc.l. This is the common case & avoids the global spinlock. | ||
| 194 | * - if multiple cpus have loaded the context, then flushing has to be | 196 | * - if multiple cpus have loaded the context, then flushing has to be |
| 195 | * done with ptc.g/MMRs under protection of the global ptc_lock. | 197 | * done with ptc.g/MMRs under protection of the global ptc_lock. |
| 196 | */ | 198 | */ |
| 197 | 199 | ||
| 198 | void | 200 | void |
| 199 | sn2_global_tlb_purge(unsigned long start, unsigned long end, | 201 | sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, |
| 200 | unsigned long nbits) | 202 | unsigned long end, unsigned long nbits) |
| 201 | { | 203 | { |
| 202 | int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; | 204 | int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; |
| 205 | int mymm = (mm == current->active_mm); | ||
| 203 | volatile unsigned long *ptc0, *ptc1; | 206 | volatile unsigned long *ptc0, *ptc1; |
| 204 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0; | 207 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; |
| 205 | struct mm_struct *mm = current->active_mm; | ||
| 206 | short nasids[MAX_NUMNODES], nix; | 208 | short nasids[MAX_NUMNODES], nix; |
| 207 | nodemask_t nodes_flushed; | 209 | nodemask_t nodes_flushed; |
| 208 | 210 | ||
| @@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
| 216 | i++; | 218 | i++; |
| 217 | } | 219 | } |
| 218 | 220 | ||
| 221 | if (i == 0) | ||
| 222 | return; | ||
| 223 | |||
| 219 | preempt_disable(); | 224 | preempt_disable(); |
| 220 | 225 | ||
| 221 | if (likely(i == 1 && lcpu == smp_processor_id())) { | 226 | if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { |
| 222 | do { | 227 | do { |
| 223 | ia64_ptcl(start, nbits << 2); | 228 | ia64_ptcl(start, nbits << 2); |
| 224 | start += (1UL << nbits); | 229 | start += (1UL << nbits); |
| @@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
| 229 | return; | 234 | return; |
| 230 | } | 235 | } |
| 231 | 236 | ||
| 232 | if (atomic_read(&mm->mm_users) == 1) { | 237 | if (atomic_read(&mm->mm_users) == 1 && mymm) { |
| 233 | flush_tlb_mm(mm); | 238 | flush_tlb_mm(mm); |
| 234 | __get_cpu_var(ptcstats).change_rid++; | 239 | __get_cpu_var(ptcstats).change_rid++; |
| 235 | preempt_enable(); | 240 | preempt_enable(); |
| @@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
| 241 | for_each_node_mask(cnode, nodes_flushed) | 246 | for_each_node_mask(cnode, nodes_flushed) |
| 242 | nasids[nix++] = cnodeid_to_nasid(cnode); | 247 | nasids[nix++] = cnodeid_to_nasid(cnode); |
| 243 | 248 | ||
| 249 | rr_value = (mm->context << 3) | REGION_NUMBER(start); | ||
| 250 | |||
| 244 | shub1 = is_shub1(); | 251 | shub1 = is_shub1(); |
| 245 | if (shub1) { | 252 | if (shub1) { |
| 246 | data0 = (1UL << SH1_PTC_0_A_SHFT) | | 253 | data0 = (1UL << SH1_PTC_0_A_SHFT) | |
| 247 | (nbits << SH1_PTC_0_PS_SHFT) | | 254 | (nbits << SH1_PTC_0_PS_SHFT) | |
| 248 | ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | | 255 | (rr_value << SH1_PTC_0_RID_SHFT) | |
| 249 | (1UL << SH1_PTC_0_START_SHFT); | 256 | (1UL << SH1_PTC_0_START_SHFT); |
| 250 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); | 257 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); |
| 251 | ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); | 258 | ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); |
| @@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
| 254 | (nbits << SH2_PTC_PS_SHFT) | | 261 | (nbits << SH2_PTC_PS_SHFT) | |
| 255 | (1UL << SH2_PTC_START_SHFT); | 262 | (1UL << SH2_PTC_START_SHFT); |
| 256 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + | 263 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + |
| 257 | ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); | 264 | (rr_value << SH2_PTC_RID_SHFT)); |
| 258 | ptc1 = NULL; | 265 | ptc1 = NULL; |
| 259 | } | 266 | } |
| 260 | 267 | ||
| @@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, | |||
| 275 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); | 282 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); |
| 276 | for (i = 0; i < nix; i++) { | 283 | for (i = 0; i < nix; i++) { |
| 277 | nasid = nasids[i]; | 284 | nasid = nasids[i]; |
| 278 | if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { | 285 | if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { |
| 279 | ia64_ptcga(start, nbits << 2); | 286 | ia64_ptcga(start, nbits << 2); |
| 280 | ia64_srlz_i(); | 287 | ia64_srlz_i(); |
| 281 | } else { | 288 | } else { |
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index 79e89a7db566..522c7f5ba8ce 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h | |||
| @@ -26,7 +26,7 @@ typedef void ia64_mv_cpu_init_t (void); | |||
| 26 | typedef void ia64_mv_irq_init_t (void); | 26 | typedef void ia64_mv_irq_init_t (void); |
| 27 | typedef void ia64_mv_send_ipi_t (int, int, int, int); | 27 | typedef void ia64_mv_send_ipi_t (int, int, int, int); |
| 28 | typedef void ia64_mv_timer_interrupt_t (int, void *, struct pt_regs *); | 28 | typedef void ia64_mv_timer_interrupt_t (int, void *, struct pt_regs *); |
| 29 | typedef void ia64_mv_global_tlb_purge_t (unsigned long, unsigned long, unsigned long); | 29 | typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long); |
| 30 | typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *); | 30 | typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *); |
| 31 | typedef unsigned int ia64_mv_local_vector_to_irq (u8); | 31 | typedef unsigned int ia64_mv_local_vector_to_irq (u8); |
| 32 | typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *); | 32 | typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *); |
