diff options
author | Jack Steiner <steiner@sgi.com> | 2007-05-08 17:50:43 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-05-08 17:50:43 -0400 |
commit | 3be44b9cc33d26930cb3bb014f35f582c6522481 (patch) | |
tree | 09225c5f0fb4c6caa81bbdff216ec83a093e4d12 /arch/ia64/kernel | |
parent | 8737d59579c5e61ea3d5da4bd63303159fd1cf7e (diff) |
[IA64] Optional method to purge the TLB on SN systems
This patch adds an optional method for purging the TLB on SN IA64 systems.
The change should not affect any non-SN system.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/irq_ia64.c | 27 | ||||
-rw-r--r-- | arch/ia64/kernel/smp.c | 68 |
2 files changed, 90 insertions, 5 deletions
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 456f57b087ca..9a5f41be760b 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/machvec.h> | 39 | #include <asm/machvec.h> |
40 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
41 | #include <asm/system.h> | 41 | #include <asm/system.h> |
42 | #include <asm/tlbflush.h> | ||
42 | 43 | ||
43 | #ifdef CONFIG_PERFMON | 44 | #ifdef CONFIG_PERFMON |
44 | # include <asm/perfmon.h> | 45 | # include <asm/perfmon.h> |
@@ -127,8 +128,10 @@ void destroy_irq(unsigned int irq) | |||
127 | 128 | ||
128 | #ifdef CONFIG_SMP | 129 | #ifdef CONFIG_SMP |
129 | # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) | 130 | # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) |
131 | # define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) | ||
130 | #else | 132 | #else |
131 | # define IS_RESCHEDULE(vec) (0) | 133 | # define IS_RESCHEDULE(vec) (0) |
134 | # define IS_LOCAL_TLB_FLUSH(vec) (0) | ||
132 | #endif | 135 | #endif |
133 | /* | 136 | /* |
134 | * That's where the IVT branches when we get an external | 137 | * That's where the IVT branches when we get an external |
@@ -180,8 +183,11 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) | |||
180 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); | 183 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); |
181 | ia64_srlz_d(); | 184 | ia64_srlz_d(); |
182 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | 185 | while (vector != IA64_SPURIOUS_INT_VECTOR) { |
183 | if (unlikely(IS_RESCHEDULE(vector))) | 186 | if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { |
184 | kstat_this_cpu.irqs[vector]++; | 187 | smp_local_flush_tlb(); |
188 | kstat_this_cpu.irqs[vector]++; | ||
189 | } else if (unlikely(IS_RESCHEDULE(vector))) | ||
190 | kstat_this_cpu.irqs[vector]++; | ||
185 | else { | 191 | else { |
186 | ia64_setreg(_IA64_REG_CR_TPR, vector); | 192 | ia64_setreg(_IA64_REG_CR_TPR, vector); |
187 | ia64_srlz_d(); | 193 | ia64_srlz_d(); |
@@ -227,8 +233,11 @@ void ia64_process_pending_intr(void) | |||
227 | * Perform normal interrupt style processing | 233 | * Perform normal interrupt style processing |
228 | */ | 234 | */ |
229 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | 235 | while (vector != IA64_SPURIOUS_INT_VECTOR) { |
230 | if (unlikely(IS_RESCHEDULE(vector))) | 236 | if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { |
231 | kstat_this_cpu.irqs[vector]++; | 237 | smp_local_flush_tlb(); |
238 | kstat_this_cpu.irqs[vector]++; | ||
239 | } else if (unlikely(IS_RESCHEDULE(vector))) | ||
240 | kstat_this_cpu.irqs[vector]++; | ||
232 | else { | 241 | else { |
233 | struct pt_regs *old_regs = set_irq_regs(NULL); | 242 | struct pt_regs *old_regs = set_irq_regs(NULL); |
234 | 243 | ||
@@ -260,12 +269,12 @@ void ia64_process_pending_intr(void) | |||
260 | 269 | ||
261 | 270 | ||
262 | #ifdef CONFIG_SMP | 271 | #ifdef CONFIG_SMP |
263 | extern irqreturn_t handle_IPI (int irq, void *dev_id); | ||
264 | 272 | ||
265 | static irqreturn_t dummy_handler (int irq, void *dev_id) | 273 | static irqreturn_t dummy_handler (int irq, void *dev_id) |
266 | { | 274 | { |
267 | BUG(); | 275 | BUG(); |
268 | } | 276 | } |
277 | extern irqreturn_t handle_IPI (int irq, void *dev_id); | ||
269 | 278 | ||
270 | static struct irqaction ipi_irqaction = { | 279 | static struct irqaction ipi_irqaction = { |
271 | .handler = handle_IPI, | 280 | .handler = handle_IPI, |
@@ -278,6 +287,13 @@ static struct irqaction resched_irqaction = { | |||
278 | .flags = IRQF_DISABLED, | 287 | .flags = IRQF_DISABLED, |
279 | .name = "resched" | 288 | .name = "resched" |
280 | }; | 289 | }; |
290 | |||
291 | static struct irqaction tlb_irqaction = { | ||
292 | .handler = dummy_handler, | ||
293 | .flags = SA_INTERRUPT, | ||
294 | .name = "tlb_flush" | ||
295 | }; | ||
296 | |||
281 | #endif | 297 | #endif |
282 | 298 | ||
283 | void | 299 | void |
@@ -303,6 +319,7 @@ init_IRQ (void) | |||
303 | #ifdef CONFIG_SMP | 319 | #ifdef CONFIG_SMP |
304 | register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); | 320 | register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); |
305 | register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); | 321 | register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); |
322 | register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); | ||
306 | #endif | 323 | #endif |
307 | #ifdef CONFIG_PERFMON | 324 | #ifdef CONFIG_PERFMON |
308 | pfm_init_percpu(); | 325 | pfm_init_percpu(); |
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 55ddd809b02d..221de3804560 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c | |||
@@ -50,6 +50,18 @@ | |||
50 | #include <asm/mca.h> | 50 | #include <asm/mca.h> |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Note: alignment of 4 entries/cacheline was empirically determined | ||
54 | * to be a good tradeoff between hot cachelines & spreading the array | ||
55 | * across too many cacheline. | ||
56 | */ | ||
57 | static struct local_tlb_flush_counts { | ||
58 | unsigned int count; | ||
59 | } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; | ||
60 | |||
61 | static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; | ||
62 | |||
63 | |||
64 | /* | ||
53 | * Structure and data for smp_call_function(). This is designed to minimise static memory | 65 | * Structure and data for smp_call_function(). This is designed to minimise static memory |
54 | * requirements. It also looks cleaner. | 66 | * requirements. It also looks cleaner. |
55 | */ | 67 | */ |
@@ -248,6 +260,62 @@ smp_send_reschedule (int cpu) | |||
248 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); | 260 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); |
249 | } | 261 | } |
250 | 262 | ||
263 | /* | ||
264 | * Called with preeemption disabled. | ||
265 | */ | ||
266 | static void | ||
267 | smp_send_local_flush_tlb (int cpu) | ||
268 | { | ||
269 | platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); | ||
270 | } | ||
271 | |||
272 | void | ||
273 | smp_local_flush_tlb(void) | ||
274 | { | ||
275 | /* | ||
276 | * Use atomic ops. Otherwise, the load/increment/store sequence from | ||
277 | * a "++" operation can have the line stolen between the load & store. | ||
278 | * The overhead of the atomic op in negligible in this case & offers | ||
279 | * significant benefit for the brief periods where lots of cpus | ||
280 | * are simultaneously flushing TLBs. | ||
281 | */ | ||
282 | ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); | ||
283 | local_flush_tlb_all(); | ||
284 | } | ||
285 | |||
286 | #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ | ||
287 | |||
288 | void | ||
289 | smp_flush_tlb_cpumask(cpumask_t xcpumask) | ||
290 | { | ||
291 | unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); | ||
292 | cpumask_t cpumask = xcpumask; | ||
293 | int mycpu, cpu, flush_mycpu = 0; | ||
294 | |||
295 | preempt_disable(); | ||
296 | mycpu = smp_processor_id(); | ||
297 | |||
298 | for_each_cpu_mask(cpu, cpumask) | ||
299 | counts[cpu] = local_tlb_flush_counts[cpu].count; | ||
300 | |||
301 | mb(); | ||
302 | for_each_cpu_mask(cpu, cpumask) { | ||
303 | if (cpu == mycpu) | ||
304 | flush_mycpu = 1; | ||
305 | else | ||
306 | smp_send_local_flush_tlb(cpu); | ||
307 | } | ||
308 | |||
309 | if (flush_mycpu) | ||
310 | smp_local_flush_tlb(); | ||
311 | |||
312 | for_each_cpu_mask(cpu, cpumask) | ||
313 | while(counts[cpu] == local_tlb_flush_counts[cpu].count) | ||
314 | udelay(FLUSH_DELAY); | ||
315 | |||
316 | preempt_enable(); | ||
317 | } | ||
318 | |||
251 | void | 319 | void |
252 | smp_flush_tlb_all (void) | 320 | smp_flush_tlb_all (void) |
253 | { | 321 | { |