diff options
| -rw-r--r-- | arch/ia64/kernel/setup.c | 1 | ||||
| -rw-r--r-- | arch/ia64/mm/tlb.c | 85 | ||||
| -rw-r--r-- | include/asm-ia64/mmu_context.h | 81 | ||||
| -rw-r--r-- | include/asm-ia64/tlbflush.h | 1 |
4 files changed, 92 insertions, 76 deletions
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 3af6de36a482..5add0bcf87a7 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
| @@ -461,6 +461,7 @@ setup_arch (char **cmdline_p) | |||
| 461 | #endif | 461 | #endif |
| 462 | 462 | ||
| 463 | cpu_init(); /* initialize the bootstrap CPU */ | 463 | cpu_init(); /* initialize the bootstrap CPU */ |
| 464 | mmu_context_init(); /* initialize context_id bitmap */ | ||
| 464 | 465 | ||
| 465 | #ifdef CONFIG_ACPI | 466 | #ifdef CONFIG_ACPI |
| 466 | acpi_boot_init(); | 467 | acpi_boot_init(); |
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c79a9b96d02b..41105d454423 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
| @@ -8,6 +8,8 @@ | |||
| 8 | * Modified RID allocation for SMP | 8 | * Modified RID allocation for SMP |
| 9 | * Goutham Rao <goutham.rao@intel.com> | 9 | * Goutham Rao <goutham.rao@intel.com> |
| 10 | * IPI based ptc implementation and A-step IPI implementation. | 10 | * IPI based ptc implementation and A-step IPI implementation. |
| 11 | * Rohit Seth <rohit.seth@intel.com> | ||
| 12 | * Ken Chen <kenneth.w.chen@intel.com> | ||
| 11 | */ | 13 | */ |
| 12 | #include <linux/config.h> | 14 | #include <linux/config.h> |
| 13 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| @@ -16,78 +18,75 @@ | |||
| 16 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 17 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 18 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
| 21 | #include <linux/bootmem.h> | ||
| 19 | 22 | ||
| 20 | #include <asm/delay.h> | 23 | #include <asm/delay.h> |
| 21 | #include <asm/mmu_context.h> | 24 | #include <asm/mmu_context.h> |
| 22 | #include <asm/pgalloc.h> | 25 | #include <asm/pgalloc.h> |
| 23 | #include <asm/pal.h> | 26 | #include <asm/pal.h> |
| 24 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
| 28 | #include <asm/dma.h> | ||
| 25 | 29 | ||
| 26 | static struct { | 30 | static struct { |
| 27 | unsigned long mask; /* mask of supported purge page-sizes */ | 31 | unsigned long mask; /* mask of supported purge page-sizes */ |
| 28 | unsigned long max_bits; /* log2() of largest supported purge page-size */ | 32 | unsigned long max_bits; /* log2 of largest supported purge page-size */ |
| 29 | } purge; | 33 | } purge; |
| 30 | 34 | ||
| 31 | struct ia64_ctx ia64_ctx = { | 35 | struct ia64_ctx ia64_ctx = { |
| 32 | .lock = SPIN_LOCK_UNLOCKED, | 36 | .lock = SPIN_LOCK_UNLOCKED, |
| 33 | .next = 1, | 37 | .next = 1, |
| 34 | .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ | ||
| 35 | .max_ctx = ~0U | 38 | .max_ctx = ~0U |
| 36 | }; | 39 | }; |
| 37 | 40 | ||
| 38 | DEFINE_PER_CPU(u8, ia64_need_tlb_flush); | 41 | DEFINE_PER_CPU(u8, ia64_need_tlb_flush); |
| 39 | 42 | ||
| 40 | /* | 43 | /* |
| 44 | * Initializes the ia64_ctx.bitmap array based on max_ctx+1. | ||
| 45 | * Called after cpu_init() has setup ia64_ctx.max_ctx based on | ||
| 46 | * maximum RID that is supported by boot CPU. | ||
| 47 | */ | ||
| 48 | void __init | ||
| 49 | mmu_context_init (void) | ||
| 50 | { | ||
| 51 | ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); | ||
| 52 | ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); | ||
| 53 | } | ||
| 54 | |||
| 55 | /* | ||
| 41 | * Acquire the ia64_ctx.lock before calling this function! | 56 | * Acquire the ia64_ctx.lock before calling this function! |
| 42 | */ | 57 | */ |
| 43 | void | 58 | void |
| 44 | wrap_mmu_context (struct mm_struct *mm) | 59 | wrap_mmu_context (struct mm_struct *mm) |
| 45 | { | 60 | { |
| 46 | unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; | 61 | int i, cpu; |
| 47 | struct task_struct *tsk; | 62 | unsigned long flush_bit; |
| 48 | int i; | ||
| 49 | 63 | ||
| 50 | if (ia64_ctx.next > max_ctx) | 64 | for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { |
| 51 | ia64_ctx.next = 300; /* skip daemons */ | 65 | flush_bit = xchg(&ia64_ctx.flushmap[i], 0); |
| 52 | ia64_ctx.limit = max_ctx + 1; | 66 | ia64_ctx.bitmap[i] ^= flush_bit; |
| 67 | } | ||
| 68 | |||
| 69 | /* use offset at 300 to skip daemons */ | ||
| 70 | ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, | ||
| 71 | ia64_ctx.max_ctx, 300); | ||
| 72 | ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, | ||
| 73 | ia64_ctx.max_ctx, ia64_ctx.next); | ||
| 53 | 74 | ||
| 54 | /* | 75 | /* |
| 55 | * Scan all the task's mm->context and set proper safe range | 76 | * can't call flush_tlb_all() here because of race condition |
| 77 | * with O(1) scheduler [EF] | ||
| 56 | */ | 78 | */ |
| 57 | 79 | cpu = get_cpu(); /* prevent preemption/migration */ | |
| 58 | read_lock(&tasklist_lock); | 80 | for_each_online_cpu(i) |
| 59 | repeat: | 81 | if (i != cpu) |
| 60 | for_each_process(tsk) { | 82 | per_cpu(ia64_need_tlb_flush, i) = 1; |
| 61 | if (!tsk->mm) | 83 | put_cpu(); |
| 62 | continue; | ||
| 63 | tsk_context = tsk->mm->context; | ||
| 64 | if (tsk_context == ia64_ctx.next) { | ||
| 65 | if (++ia64_ctx.next >= ia64_ctx.limit) { | ||
| 66 | /* empty range: reset the range limit and start over */ | ||
| 67 | if (ia64_ctx.next > max_ctx) | ||
| 68 | ia64_ctx.next = 300; | ||
| 69 | ia64_ctx.limit = max_ctx + 1; | ||
| 70 | goto repeat; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) | ||
| 74 | ia64_ctx.limit = tsk_context; | ||
| 75 | } | ||
| 76 | read_unlock(&tasklist_lock); | ||
| 77 | /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ | ||
| 78 | { | ||
| 79 | int cpu = get_cpu(); /* prevent preemption/migration */ | ||
| 80 | for_each_online_cpu(i) { | ||
| 81 | if (i != cpu) | ||
| 82 | per_cpu(ia64_need_tlb_flush, i) = 1; | ||
| 83 | } | ||
| 84 | put_cpu(); | ||
| 85 | } | ||
| 86 | local_flush_tlb_all(); | 84 | local_flush_tlb_all(); |
| 87 | } | 85 | } |
| 88 | 86 | ||
| 89 | void | 87 | void |
| 90 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) | 88 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, |
| 89 | unsigned long end, unsigned long nbits) | ||
| 91 | { | 90 | { |
| 92 | static DEFINE_SPINLOCK(ptcg_lock); | 91 | static DEFINE_SPINLOCK(ptcg_lock); |
| 93 | 92 | ||
| @@ -135,7 +134,8 @@ local_flush_tlb_all (void) | |||
| 135 | } | 134 | } |
| 136 | 135 | ||
| 137 | void | 136 | void |
| 138 | flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) | 137 | flush_tlb_range (struct vm_area_struct *vma, unsigned long start, |
| 138 | unsigned long end) | ||
| 139 | { | 139 | { |
| 140 | struct mm_struct *mm = vma->vm_mm; | 140 | struct mm_struct *mm = vma->vm_mm; |
| 141 | unsigned long size = end - start; | 141 | unsigned long size = end - start; |
| @@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
| 149 | #endif | 149 | #endif |
| 150 | 150 | ||
| 151 | nbits = ia64_fls(size + 0xfff); | 151 | nbits = ia64_fls(size + 0xfff); |
| 152 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) | 152 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && |
| 153 | (nbits < purge.max_bits)) | ||
| 153 | ++nbits; | 154 | ++nbits; |
| 154 | if (nbits > purge.max_bits) | 155 | if (nbits > purge.max_bits) |
| 155 | nbits = purge.max_bits; | 156 | nbits = purge.max_bits; |
| @@ -191,5 +192,5 @@ ia64_tlb_init (void) | |||
| 191 | local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; | 192 | local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; |
| 192 | local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; | 193 | local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; |
| 193 | 194 | ||
| 194 | local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ | 195 | local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ |
| 195 | } | 196 | } |
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index 8d6e72f7b08e..b5c65081a3aa 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h | |||
| @@ -7,12 +7,13 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | /* | 9 | /* |
| 10 | * Routines to manage the allocation of task context numbers. Task context numbers are | 10 | * Routines to manage the allocation of task context numbers. Task context |
| 11 | * used to reduce or eliminate the need to perform TLB flushes due to context switches. | 11 | * numbers are used to reduce or eliminate the need to perform TLB flushes |
| 12 | * Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not | 12 | * due to context switches. Context numbers are implemented using ia-64 |
| 13 | * consider the region number when performing a TLB lookup, we need to assign a unique | 13 | * region ids. Since the IA-64 TLB does not consider the region number when |
| 14 | * region id to each region in a process. We use the least significant three bits in a | 14 | * performing a TLB lookup, we need to assign a unique region id to each |
| 15 | * region id for this purpose. | 15 | * region in a process. We use the least significant three bits in aregion |
| 16 | * id for this purpose. | ||
| 16 | */ | 17 | */ |
| 17 | 18 | ||
| 18 | #define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ | 19 | #define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ |
| @@ -32,13 +33,17 @@ | |||
| 32 | struct ia64_ctx { | 33 | struct ia64_ctx { |
| 33 | spinlock_t lock; | 34 | spinlock_t lock; |
| 34 | unsigned int next; /* next context number to use */ | 35 | unsigned int next; /* next context number to use */ |
| 35 | unsigned int limit; /* next >= limit => must call wrap_mmu_context() */ | 36 | unsigned int limit; /* available free range */ |
| 36 | unsigned int max_ctx; /* max. context value supported by all CPUs */ | 37 | unsigned int max_ctx; /* max. context value supported by all CPUs */ |
| 38 | /* call wrap_mmu_context when next >= max */ | ||
| 39 | unsigned long *bitmap; /* bitmap size is max_ctx+1 */ | ||
| 40 | unsigned long *flushmap;/* pending rid to be flushed */ | ||
| 37 | }; | 41 | }; |
| 38 | 42 | ||
| 39 | extern struct ia64_ctx ia64_ctx; | 43 | extern struct ia64_ctx ia64_ctx; |
| 40 | DECLARE_PER_CPU(u8, ia64_need_tlb_flush); | 44 | DECLARE_PER_CPU(u8, ia64_need_tlb_flush); |
| 41 | 45 | ||
| 46 | extern void mmu_context_init (void); | ||
| 42 | extern void wrap_mmu_context (struct mm_struct *mm); | 47 | extern void wrap_mmu_context (struct mm_struct *mm); |
| 43 | 48 | ||
| 44 | static inline void | 49 | static inline void |
| @@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) | |||
| 47 | } | 52 | } |
| 48 | 53 | ||
| 49 | /* | 54 | /* |
| 50 | * When the context counter wraps around all TLBs need to be flushed because an old | 55 | * When the context counter wraps around all TLBs need to be flushed because |
| 51 | * context number might have been reused. This is signalled by the ia64_need_tlb_flush | 56 | * an old context number might have been reused. This is signalled by the |
| 52 | * per-CPU variable, which is checked in the routine below. Called by activate_mm(). | 57 | * ia64_need_tlb_flush per-CPU variable, which is checked in the routine |
| 53 | * <efocht@ess.nec.de> | 58 | * below. Called by activate_mm(). <efocht@ess.nec.de> |
| 54 | */ | 59 | */ |
| 55 | static inline void | 60 | static inline void |
| 56 | delayed_tlb_flush (void) | 61 | delayed_tlb_flush (void) |
| @@ -60,11 +65,9 @@ delayed_tlb_flush (void) | |||
| 60 | 65 | ||
| 61 | if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { | 66 | if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { |
| 62 | spin_lock_irqsave(&ia64_ctx.lock, flags); | 67 | spin_lock_irqsave(&ia64_ctx.lock, flags); |
| 63 | { | 68 | if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { |
| 64 | if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { | 69 | local_flush_tlb_all(); |
| 65 | local_flush_tlb_all(); | 70 | __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; |
| 66 | __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; | ||
| 67 | } | ||
| 68 | } | 71 | } |
| 69 | spin_unlock_irqrestore(&ia64_ctx.lock, flags); | 72 | spin_unlock_irqrestore(&ia64_ctx.lock, flags); |
| 70 | } | 73 | } |
| @@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm) | |||
| 76 | unsigned long flags; | 79 | unsigned long flags; |
| 77 | nv_mm_context_t context = mm->context; | 80 | nv_mm_context_t context = mm->context; |
| 78 | 81 | ||
| 79 | if (unlikely(!context)) { | 82 | if (likely(context)) |
| 80 | spin_lock_irqsave(&ia64_ctx.lock, flags); | 83 | goto out; |
| 81 | { | 84 | |
| 82 | /* re-check, now that we've got the lock: */ | 85 | spin_lock_irqsave(&ia64_ctx.lock, flags); |
| 83 | context = mm->context; | 86 | /* re-check, now that we've got the lock: */ |
| 84 | if (context == 0) { | 87 | context = mm->context; |
| 85 | cpus_clear(mm->cpu_vm_mask); | 88 | if (context == 0) { |
| 86 | if (ia64_ctx.next >= ia64_ctx.limit) | 89 | cpus_clear(mm->cpu_vm_mask); |
| 87 | wrap_mmu_context(mm); | 90 | if (ia64_ctx.next >= ia64_ctx.limit) { |
| 88 | mm->context = context = ia64_ctx.next++; | 91 | ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, |
| 89 | } | 92 | ia64_ctx.max_ctx, ia64_ctx.next); |
| 93 | ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, | ||
| 94 | ia64_ctx.max_ctx, ia64_ctx.next); | ||
| 95 | if (ia64_ctx.next >= ia64_ctx.max_ctx) | ||
| 96 | wrap_mmu_context(mm); | ||
| 90 | } | 97 | } |
| 91 | spin_unlock_irqrestore(&ia64_ctx.lock, flags); | 98 | mm->context = context = ia64_ctx.next++; |
| 99 | __set_bit(context, ia64_ctx.bitmap); | ||
| 92 | } | 100 | } |
| 101 | spin_unlock_irqrestore(&ia64_ctx.lock, flags); | ||
| 102 | out: | ||
| 93 | /* | 103 | /* |
| 94 | * Ensure we're not starting to use "context" before any old | 104 | * Ensure we're not starting to use "context" before any old |
| 95 | * uses of it are gone from our TLB. | 105 | * uses of it are gone from our TLB. |
| @@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm) | |||
| 100 | } | 110 | } |
| 101 | 111 | ||
| 102 | /* | 112 | /* |
| 103 | * Initialize context number to some sane value. MM is guaranteed to be a brand-new | 113 | * Initialize context number to some sane value. MM is guaranteed to be a |
| 104 | * address-space, so no TLB flushing is needed, ever. | 114 | * brand-new address-space, so no TLB flushing is needed, ever. |
| 105 | */ | 115 | */ |
| 106 | static inline int | 116 | static inline int |
| 107 | init_new_context (struct task_struct *p, struct mm_struct *mm) | 117 | init_new_context (struct task_struct *p, struct mm_struct *mm) |
| @@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm) | |||
| 162 | if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) | 172 | if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) |
| 163 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); | 173 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); |
| 164 | reload_context(context); | 174 | reload_context(context); |
| 165 | /* in the unlikely event of a TLB-flush by another thread, redo the load: */ | 175 | /* |
| 176 | * in the unlikely event of a TLB-flush by another thread, | ||
| 177 | * redo the load. | ||
| 178 | */ | ||
| 166 | } while (unlikely(context != mm->context)); | 179 | } while (unlikely(context != mm->context)); |
| 167 | } | 180 | } |
| 168 | 181 | ||
| @@ -175,8 +188,8 @@ static inline void | |||
| 175 | activate_mm (struct mm_struct *prev, struct mm_struct *next) | 188 | activate_mm (struct mm_struct *prev, struct mm_struct *next) |
| 176 | { | 189 | { |
| 177 | /* | 190 | /* |
| 178 | * We may get interrupts here, but that's OK because interrupt handlers cannot | 191 | * We may get interrupts here, but that's OK because interrupt |
| 179 | * touch user-space. | 192 | * handlers cannot touch user-space. |
| 180 | */ | 193 | */ |
| 181 | ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); | 194 | ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); |
| 182 | activate_context(next); | 195 | activate_context(next); |
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h index b65c62702724..a35b323bae4c 100644 --- a/include/asm-ia64/tlbflush.h +++ b/include/asm-ia64/tlbflush.h | |||
| @@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm) | |||
| 51 | if (!mm) | 51 | if (!mm) |
| 52 | return; | 52 | return; |
| 53 | 53 | ||
| 54 | set_bit(mm->context, ia64_ctx.flushmap); | ||
| 54 | mm->context = 0; | 55 | mm->context = 0; |
| 55 | 56 | ||
| 56 | if (atomic_read(&mm->mm_users) == 0) | 57 | if (atomic_read(&mm->mm_users) == 0) |
