diff options
Diffstat (limited to 'arch/ia64/mm/tlb.c')
-rw-r--r-- | arch/ia64/mm/tlb.c | 100 |
1 files changed, 53 insertions, 47 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 464557e4ed82..41105d454423 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
@@ -8,6 +8,8 @@ | |||
8 | * Modified RID allocation for SMP | 8 | * Modified RID allocation for SMP |
9 | * Goutham Rao <goutham.rao@intel.com> | 9 | * Goutham Rao <goutham.rao@intel.com> |
10 | * IPI based ptc implementation and A-step IPI implementation. | 10 | * IPI based ptc implementation and A-step IPI implementation. |
11 | * Rohit Seth <rohit.seth@intel.com> | ||
12 | * Ken Chen <kenneth.w.chen@intel.com> | ||
11 | */ | 13 | */ |
12 | #include <linux/config.h> | 14 | #include <linux/config.h> |
13 | #include <linux/module.h> | 15 | #include <linux/module.h> |
@@ -16,80 +18,83 @@ | |||
16 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
17 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
18 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/bootmem.h> | ||
19 | 22 | ||
20 | #include <asm/delay.h> | 23 | #include <asm/delay.h> |
21 | #include <asm/mmu_context.h> | 24 | #include <asm/mmu_context.h> |
22 | #include <asm/pgalloc.h> | 25 | #include <asm/pgalloc.h> |
23 | #include <asm/pal.h> | 26 | #include <asm/pal.h> |
24 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
28 | #include <asm/dma.h> | ||
25 | 29 | ||
26 | static struct { | 30 | static struct { |
27 | unsigned long mask; /* mask of supported purge page-sizes */ | 31 | unsigned long mask; /* mask of supported purge page-sizes */ |
28 | unsigned long max_bits; /* log2() of largest supported purge page-size */ | 32 | unsigned long max_bits; /* log2 of largest supported purge page-size */ |
29 | } purge; | 33 | } purge; |
30 | 34 | ||
31 | struct ia64_ctx ia64_ctx = { | 35 | struct ia64_ctx ia64_ctx = { |
32 | .lock = SPIN_LOCK_UNLOCKED, | 36 | .lock = SPIN_LOCK_UNLOCKED, |
33 | .next = 1, | 37 | .next = 1, |
34 | .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ | ||
35 | .max_ctx = ~0U | 38 | .max_ctx = ~0U |
36 | }; | 39 | }; |
37 | 40 | ||
38 | DEFINE_PER_CPU(u8, ia64_need_tlb_flush); | 41 | DEFINE_PER_CPU(u8, ia64_need_tlb_flush); |
39 | 42 | ||
40 | /* | 43 | /* |
44 | * Initializes the ia64_ctx.bitmap array based on max_ctx+1. | ||
45 | * Called after cpu_init() has setup ia64_ctx.max_ctx based on | ||
46 | * maximum RID that is supported by boot CPU. | ||
47 | */ | ||
48 | void __init | ||
49 | mmu_context_init (void) | ||
50 | { | ||
51 | ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); | ||
52 | ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); | ||
53 | } | ||
54 | |||
55 | /* | ||
41 | * Acquire the ia64_ctx.lock before calling this function! | 56 | * Acquire the ia64_ctx.lock before calling this function! |
42 | */ | 57 | */ |
43 | void | 58 | void |
44 | wrap_mmu_context (struct mm_struct *mm) | 59 | wrap_mmu_context (struct mm_struct *mm) |
45 | { | 60 | { |
46 | unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; | 61 | int i, cpu; |
47 | struct task_struct *tsk; | 62 | unsigned long flush_bit; |
48 | int i; | ||
49 | 63 | ||
50 | if (ia64_ctx.next > max_ctx) | 64 | for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { |
51 | ia64_ctx.next = 300; /* skip daemons */ | 65 | flush_bit = xchg(&ia64_ctx.flushmap[i], 0); |
52 | ia64_ctx.limit = max_ctx + 1; | 66 | ia64_ctx.bitmap[i] ^= flush_bit; |
67 | } | ||
68 | |||
69 | /* use offset at 300 to skip daemons */ | ||
70 | ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, | ||
71 | ia64_ctx.max_ctx, 300); | ||
72 | ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, | ||
73 | ia64_ctx.max_ctx, ia64_ctx.next); | ||
53 | 74 | ||
54 | /* | 75 | /* |
55 | * Scan all the task's mm->context and set proper safe range | 76 | * can't call flush_tlb_all() here because of race condition |
77 | * with O(1) scheduler [EF] | ||
56 | */ | 78 | */ |
57 | 79 | cpu = get_cpu(); /* prevent preemption/migration */ | |
58 | read_lock(&tasklist_lock); | 80 | for_each_online_cpu(i) |
59 | repeat: | 81 | if (i != cpu) |
60 | for_each_process(tsk) { | 82 | per_cpu(ia64_need_tlb_flush, i) = 1; |
61 | if (!tsk->mm) | 83 | put_cpu(); |
62 | continue; | ||
63 | tsk_context = tsk->mm->context; | ||
64 | if (tsk_context == ia64_ctx.next) { | ||
65 | if (++ia64_ctx.next >= ia64_ctx.limit) { | ||
66 | /* empty range: reset the range limit and start over */ | ||
67 | if (ia64_ctx.next > max_ctx) | ||
68 | ia64_ctx.next = 300; | ||
69 | ia64_ctx.limit = max_ctx + 1; | ||
70 | goto repeat; | ||
71 | } | ||
72 | } | ||
73 | if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) | ||
74 | ia64_ctx.limit = tsk_context; | ||
75 | } | ||
76 | read_unlock(&tasklist_lock); | ||
77 | /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ | ||
78 | { | ||
79 | int cpu = get_cpu(); /* prevent preemption/migration */ | ||
80 | for (i = 0; i < NR_CPUS; ++i) | ||
81 | if (cpu_online(i) && (i != cpu)) | ||
82 | per_cpu(ia64_need_tlb_flush, i) = 1; | ||
83 | put_cpu(); | ||
84 | } | ||
85 | local_flush_tlb_all(); | 84 | local_flush_tlb_all(); |
86 | } | 85 | } |
87 | 86 | ||
88 | void | 87 | void |
89 | ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) | 88 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, |
89 | unsigned long end, unsigned long nbits) | ||
90 | { | 90 | { |
91 | static DEFINE_SPINLOCK(ptcg_lock); | 91 | static DEFINE_SPINLOCK(ptcg_lock); |
92 | 92 | ||
93 | if (mm != current->active_mm) { | ||
94 | flush_tlb_all(); | ||
95 | return; | ||
96 | } | ||
97 | |||
93 | /* HW requires global serialization of ptc.ga. */ | 98 | /* HW requires global serialization of ptc.ga. */ |
94 | spin_lock(&ptcg_lock); | 99 | spin_lock(&ptcg_lock); |
95 | { | 100 | { |
@@ -129,36 +134,37 @@ local_flush_tlb_all (void) | |||
129 | } | 134 | } |
130 | 135 | ||
131 | void | 136 | void |
132 | flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) | 137 | flush_tlb_range (struct vm_area_struct *vma, unsigned long start, |
138 | unsigned long end) | ||
133 | { | 139 | { |
134 | struct mm_struct *mm = vma->vm_mm; | 140 | struct mm_struct *mm = vma->vm_mm; |
135 | unsigned long size = end - start; | 141 | unsigned long size = end - start; |
136 | unsigned long nbits; | 142 | unsigned long nbits; |
137 | 143 | ||
144 | #ifndef CONFIG_SMP | ||
138 | if (mm != current->active_mm) { | 145 | if (mm != current->active_mm) { |
139 | /* this does happen, but perhaps it's not worth optimizing for? */ | ||
140 | #ifdef CONFIG_SMP | ||
141 | flush_tlb_all(); | ||
142 | #else | ||
143 | mm->context = 0; | 146 | mm->context = 0; |
144 | #endif | ||
145 | return; | 147 | return; |
146 | } | 148 | } |
149 | #endif | ||
147 | 150 | ||
148 | nbits = ia64_fls(size + 0xfff); | 151 | nbits = ia64_fls(size + 0xfff); |
149 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) | 152 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && |
153 | (nbits < purge.max_bits)) | ||
150 | ++nbits; | 154 | ++nbits; |
151 | if (nbits > purge.max_bits) | 155 | if (nbits > purge.max_bits) |
152 | nbits = purge.max_bits; | 156 | nbits = purge.max_bits; |
153 | start &= ~((1UL << nbits) - 1); | 157 | start &= ~((1UL << nbits) - 1); |
154 | 158 | ||
155 | # ifdef CONFIG_SMP | 159 | # ifdef CONFIG_SMP |
156 | platform_global_tlb_purge(start, end, nbits); | 160 | platform_global_tlb_purge(mm, start, end, nbits); |
157 | # else | 161 | # else |
162 | preempt_disable(); | ||
158 | do { | 163 | do { |
159 | ia64_ptcl(start, (nbits<<2)); | 164 | ia64_ptcl(start, (nbits<<2)); |
160 | start += (1UL << nbits); | 165 | start += (1UL << nbits); |
161 | } while (start < end); | 166 | } while (start < end); |
167 | preempt_enable(); | ||
162 | # endif | 168 | # endif |
163 | 169 | ||
164 | ia64_srlz_i(); /* srlz.i implies srlz.d */ | 170 | ia64_srlz_i(); /* srlz.i implies srlz.d */ |
@@ -186,5 +192,5 @@ ia64_tlb_init (void) | |||
186 | local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; | 192 | local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; |
187 | local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; | 193 | local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; |
188 | 194 | ||
189 | local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ | 195 | local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ |
190 | } | 196 | } |