diff options
Diffstat (limited to 'arch/ia64/kernel/smp.c')
-rw-r--r-- | arch/ia64/kernel/smp.c | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 55ddd809b02d..221de3804560 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c | |||
@@ -50,6 +50,18 @@ | |||
50 | #include <asm/mca.h> | 50 | #include <asm/mca.h> |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Note: alignment of 4 entries/cacheline was empirically determined | ||
54 | * to be a good tradeoff between hot cachelines & spreading the array | ||
55 | * across too many cacheline. | ||
56 | */ | ||
57 | static struct local_tlb_flush_counts { | ||
58 | unsigned int count; | ||
59 | } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; | ||
60 | |||
61 | static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; | ||
62 | |||
63 | |||
64 | /* | ||
53 | * Structure and data for smp_call_function(). This is designed to minimise static memory | 65 | * Structure and data for smp_call_function(). This is designed to minimise static memory |
54 | * requirements. It also looks cleaner. | 66 | * requirements. It also looks cleaner. |
55 | */ | 67 | */ |
@@ -248,6 +260,62 @@ smp_send_reschedule (int cpu) | |||
248 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); | 260 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); |
249 | } | 261 | } |
250 | 262 | ||
263 | /* | ||
264 | * Called with preeemption disabled. | ||
265 | */ | ||
266 | static void | ||
267 | smp_send_local_flush_tlb (int cpu) | ||
268 | { | ||
269 | platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); | ||
270 | } | ||
271 | |||
272 | void | ||
273 | smp_local_flush_tlb(void) | ||
274 | { | ||
275 | /* | ||
276 | * Use atomic ops. Otherwise, the load/increment/store sequence from | ||
277 | * a "++" operation can have the line stolen between the load & store. | ||
278 | * The overhead of the atomic op in negligible in this case & offers | ||
279 | * significant benefit for the brief periods where lots of cpus | ||
280 | * are simultaneously flushing TLBs. | ||
281 | */ | ||
282 | ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); | ||
283 | local_flush_tlb_all(); | ||
284 | } | ||
285 | |||
286 | #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ | ||
287 | |||
288 | void | ||
289 | smp_flush_tlb_cpumask(cpumask_t xcpumask) | ||
290 | { | ||
291 | unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); | ||
292 | cpumask_t cpumask = xcpumask; | ||
293 | int mycpu, cpu, flush_mycpu = 0; | ||
294 | |||
295 | preempt_disable(); | ||
296 | mycpu = smp_processor_id(); | ||
297 | |||
298 | for_each_cpu_mask(cpu, cpumask) | ||
299 | counts[cpu] = local_tlb_flush_counts[cpu].count; | ||
300 | |||
301 | mb(); | ||
302 | for_each_cpu_mask(cpu, cpumask) { | ||
303 | if (cpu == mycpu) | ||
304 | flush_mycpu = 1; | ||
305 | else | ||
306 | smp_send_local_flush_tlb(cpu); | ||
307 | } | ||
308 | |||
309 | if (flush_mycpu) | ||
310 | smp_local_flush_tlb(); | ||
311 | |||
312 | for_each_cpu_mask(cpu, cpumask) | ||
313 | while(counts[cpu] == local_tlb_flush_counts[cpu].count) | ||
314 | udelay(FLUSH_DELAY); | ||
315 | |||
316 | preempt_enable(); | ||
317 | } | ||
318 | |||
251 | void | 319 | void |
252 | smp_flush_tlb_all (void) | 320 | smp_flush_tlb_all (void) |
253 | { | 321 | { |