diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/mm/tlb.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r-- | arch/x86/mm/tlb.c | 63 |
1 files changed, 53 insertions, 10 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c03f14ab6667..d6c0418c3e47 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/cpu.h> | ||
8 | 9 | ||
9 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 11 | #include <asm/mmu_context.h> |
@@ -52,6 +53,8 @@ union smp_flush_state { | |||
52 | want false sharing in the per cpu data segment. */ | 53 | want false sharing in the per cpu data segment. */ |
53 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | 54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
54 | 55 | ||
56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
57 | |||
55 | /* | 58 | /* |
56 | * We cannot call mmdrop() because we are in interrupt context, | 59 | * We cannot call mmdrop() because we are in interrupt context, |
57 | * instead update mm->cpu_vm_mask. | 60 | * instead update mm->cpu_vm_mask. |
@@ -173,15 +176,11 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
173 | union smp_flush_state *f; | 176 | union smp_flush_state *f; |
174 | 177 | ||
175 | /* Caller has disabled preemption */ | 178 | /* Caller has disabled preemption */ |
176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 179 | sender = this_cpu_read(tlb_vector_offset); |
177 | f = &flush_state[sender]; | 180 | f = &flush_state[sender]; |
178 | 181 | ||
179 | /* | 182 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) |
180 | * Could avoid this lock when | 183 | raw_spin_lock(&f->tlbstate_lock); |
181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | ||
182 | * probably not worth checking this for a cache-hot lock. | ||
183 | */ | ||
184 | raw_spin_lock(&f->tlbstate_lock); | ||
185 | 184 | ||
186 | f->flush_mm = mm; | 185 | f->flush_mm = mm; |
187 | f->flush_va = va; | 186 | f->flush_va = va; |
@@ -199,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
199 | 198 | ||
200 | f->flush_mm = NULL; | 199 | f->flush_mm = NULL; |
201 | f->flush_va = 0; | 200 | f->flush_va = 0; |
202 | raw_spin_unlock(&f->tlbstate_lock); | 201 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) |
202 | raw_spin_unlock(&f->tlbstate_lock); | ||
203 | } | 203 | } |
204 | 204 | ||
205 | void native_flush_tlb_others(const struct cpumask *cpumask, | 205 | void native_flush_tlb_others(const struct cpumask *cpumask, |
@@ -208,16 +208,57 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
208 | if (is_uv_system()) { | 208 | if (is_uv_system()) { |
209 | unsigned int cpu; | 209 | unsigned int cpu; |
210 | 210 | ||
211 | cpu = get_cpu(); | 211 | cpu = smp_processor_id(); |
212 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); | 212 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); |
213 | if (cpumask) | 213 | if (cpumask) |
214 | flush_tlb_others_ipi(cpumask, mm, va); | 214 | flush_tlb_others_ipi(cpumask, mm, va); |
215 | put_cpu(); | ||
216 | return; | 215 | return; |
217 | } | 216 | } |
218 | flush_tlb_others_ipi(cpumask, mm, va); | 217 | flush_tlb_others_ipi(cpumask, mm, va); |
219 | } | 218 | } |
220 | 219 | ||
220 | static void __cpuinit calculate_tlb_offset(void) | ||
221 | { | ||
222 | int cpu, node, nr_node_vecs, idx = 0; | ||
223 | /* | ||
224 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
225 | * will not cause inconsistency, as the write is atomic under X86. we | ||
226 | * might see more lock contentions in a short time, but after all CPU's | ||
227 | * tlb_vector_offset are changed, everything should go normal | ||
228 | * | ||
229 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
230 | * waste some vectors. | ||
231 | **/ | ||
232 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
233 | nr_node_vecs = 1; | ||
234 | else | ||
235 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
236 | |||
237 | for_each_online_node(node) { | ||
238 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * | ||
239 | nr_node_vecs; | ||
240 | int cpu_offset = 0; | ||
241 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
242 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
243 | cpu_offset; | ||
244 | cpu_offset++; | ||
245 | cpu_offset = cpu_offset % nr_node_vecs; | ||
246 | } | ||
247 | idx++; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, | ||
252 | unsigned long action, void *hcpu) | ||
253 | { | ||
254 | switch (action & 0xf) { | ||
255 | case CPU_ONLINE: | ||
256 | case CPU_DEAD: | ||
257 | calculate_tlb_offset(); | ||
258 | } | ||
259 | return NOTIFY_OK; | ||
260 | } | ||
261 | |||
221 | static int __cpuinit init_smp_flush(void) | 262 | static int __cpuinit init_smp_flush(void) |
222 | { | 263 | { |
223 | int i; | 264 | int i; |
@@ -225,6 +266,8 @@ static int __cpuinit init_smp_flush(void) | |||
225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 266 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | 267 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
227 | 268 | ||
269 | calculate_tlb_offset(); | ||
270 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
228 | return 0; | 271 | return 0; |
229 | } | 272 | } |
230 | core_initcall(init_smp_flush); | 273 | core_initcall(init_smp_flush); |