aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/tlb.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/mm/tlb.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r--arch/x86/mm/tlb.c63
1 files changed, 53 insertions, 10 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..d6c0418c3e47 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
5#include <linux/smp.h> 5#include <linux/smp.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/cpu.h>
8 9
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
52 want false sharing in the per cpu data segment. */ 53 want false sharing in the per cpu data segment. */
53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; 54static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
54 55
56static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
57
55/* 58/*
56 * We cannot call mmdrop() because we are in interrupt context, 59 * We cannot call mmdrop() because we are in interrupt context,
57 * instead update mm->cpu_vm_mask. 60 * instead update mm->cpu_vm_mask.
@@ -173,15 +176,11 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
173 union smp_flush_state *f; 176 union smp_flush_state *f;
174 177
175 /* Caller has disabled preemption */ 178 /* Caller has disabled preemption */
176 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 179 sender = this_cpu_read(tlb_vector_offset);
177 f = &flush_state[sender]; 180 f = &flush_state[sender];
178 181
179 /* 182 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
180 * Could avoid this lock when 183 raw_spin_lock(&f->tlbstate_lock);
181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
182 * probably not worth checking this for a cache-hot lock.
183 */
184 raw_spin_lock(&f->tlbstate_lock);
185 184
186 f->flush_mm = mm; 185 f->flush_mm = mm;
187 f->flush_va = va; 186 f->flush_va = va;
@@ -199,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
199 198
200 f->flush_mm = NULL; 199 f->flush_mm = NULL;
201 f->flush_va = 0; 200 f->flush_va = 0;
202 raw_spin_unlock(&f->tlbstate_lock); 201 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
202 raw_spin_unlock(&f->tlbstate_lock);
203} 203}
204 204
205void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -208,16 +208,57 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
208 if (is_uv_system()) { 208 if (is_uv_system()) {
209 unsigned int cpu; 209 unsigned int cpu;
210 210
211 cpu = get_cpu(); 211 cpu = smp_processor_id();
212 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 212 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
213 if (cpumask) 213 if (cpumask)
214 flush_tlb_others_ipi(cpumask, mm, va); 214 flush_tlb_others_ipi(cpumask, mm, va);
215 put_cpu();
216 return; 215 return;
217 } 216 }
218 flush_tlb_others_ipi(cpumask, mm, va); 217 flush_tlb_others_ipi(cpumask, mm, va);
219} 218}
220 219
220static void __cpuinit calculate_tlb_offset(void)
221{
222 int cpu, node, nr_node_vecs, idx = 0;
223 /*
224 * we are changing tlb_vector_offset for each CPU in runtime, but this
225 * will not cause inconsistency, as the write is atomic under X86. we
226 * might see more lock contentions in a short time, but after all CPU's
227 * tlb_vector_offset are changed, everything should go normal
228 *
229 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
230 * waste some vectors.
231 **/
232 if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
233 nr_node_vecs = 1;
234 else
235 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
236
237 for_each_online_node(node) {
238 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
239 nr_node_vecs;
240 int cpu_offset = 0;
241 for_each_cpu(cpu, cpumask_of_node(node)) {
242 per_cpu(tlb_vector_offset, cpu) = node_offset +
243 cpu_offset;
244 cpu_offset++;
245 cpu_offset = cpu_offset % nr_node_vecs;
246 }
247 idx++;
248 }
249}
250
251static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
252 unsigned long action, void *hcpu)
253{
254 switch (action & 0xf) {
255 case CPU_ONLINE:
256 case CPU_DEAD:
257 calculate_tlb_offset();
258 }
259 return NOTIFY_OK;
260}
261
221static int __cpuinit init_smp_flush(void) 262static int __cpuinit init_smp_flush(void)
222{ 263{
223 int i; 264 int i;
@@ -225,6 +266,8 @@ static int __cpuinit init_smp_flush(void)
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 266 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 raw_spin_lock_init(&flush_state[i].tlbstate_lock); 267 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 268
269 calculate_tlb_offset();
270 hotcpu_notifier(tlb_cpuhp_notify, 0);
228 return 0; 271 return 0;
229} 272}
230core_initcall(init_smp_flush); 273core_initcall(init_smp_flush);