diff options
author | Jack Steiner <steiner@sgi.com> | 2007-05-08 17:50:43 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-05-08 17:50:43 -0400 |
commit | 3be44b9cc33d26930cb3bb014f35f582c6522481 (patch) | |
tree | 09225c5f0fb4c6caa81bbdff216ec83a093e4d12 /arch | |
parent | 8737d59579c5e61ea3d5da4bd63303159fd1cf7e (diff) |
[IA64] Optional method to purge the TLB on SN systems
This patch adds an optional method for purging the TLB on SN IA64 systems.
The change should not affect any non-SN system.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/kernel/irq_ia64.c | 27 | ||||
-rw-r--r-- | arch/ia64/kernel/smp.c | 68 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 65 |
3 files changed, 150 insertions, 10 deletions
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 456f57b087ca..9a5f41be760b 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/machvec.h> | 39 | #include <asm/machvec.h> |
40 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
41 | #include <asm/system.h> | 41 | #include <asm/system.h> |
42 | #include <asm/tlbflush.h> | ||
42 | 43 | ||
43 | #ifdef CONFIG_PERFMON | 44 | #ifdef CONFIG_PERFMON |
44 | # include <asm/perfmon.h> | 45 | # include <asm/perfmon.h> |
@@ -127,8 +128,10 @@ void destroy_irq(unsigned int irq) | |||
127 | 128 | ||
128 | #ifdef CONFIG_SMP | 129 | #ifdef CONFIG_SMP |
129 | # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) | 130 | # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) |
131 | # define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) | ||
130 | #else | 132 | #else |
131 | # define IS_RESCHEDULE(vec) (0) | 133 | # define IS_RESCHEDULE(vec) (0) |
134 | # define IS_LOCAL_TLB_FLUSH(vec) (0) | ||
132 | #endif | 135 | #endif |
133 | /* | 136 | /* |
134 | * That's where the IVT branches when we get an external | 137 | * That's where the IVT branches when we get an external |
@@ -180,8 +183,11 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) | |||
180 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); | 183 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); |
181 | ia64_srlz_d(); | 184 | ia64_srlz_d(); |
182 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | 185 | while (vector != IA64_SPURIOUS_INT_VECTOR) { |
183 | if (unlikely(IS_RESCHEDULE(vector))) | 186 | if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { |
184 | kstat_this_cpu.irqs[vector]++; | 187 | smp_local_flush_tlb(); |
188 | kstat_this_cpu.irqs[vector]++; | ||
189 | } else if (unlikely(IS_RESCHEDULE(vector))) | ||
190 | kstat_this_cpu.irqs[vector]++; | ||
185 | else { | 191 | else { |
186 | ia64_setreg(_IA64_REG_CR_TPR, vector); | 192 | ia64_setreg(_IA64_REG_CR_TPR, vector); |
187 | ia64_srlz_d(); | 193 | ia64_srlz_d(); |
@@ -227,8 +233,11 @@ void ia64_process_pending_intr(void) | |||
227 | * Perform normal interrupt style processing | 233 | * Perform normal interrupt style processing |
228 | */ | 234 | */ |
229 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | 235 | while (vector != IA64_SPURIOUS_INT_VECTOR) { |
230 | if (unlikely(IS_RESCHEDULE(vector))) | 236 | if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { |
231 | kstat_this_cpu.irqs[vector]++; | 237 | smp_local_flush_tlb(); |
238 | kstat_this_cpu.irqs[vector]++; | ||
239 | } else if (unlikely(IS_RESCHEDULE(vector))) | ||
240 | kstat_this_cpu.irqs[vector]++; | ||
232 | else { | 241 | else { |
233 | struct pt_regs *old_regs = set_irq_regs(NULL); | 242 | struct pt_regs *old_regs = set_irq_regs(NULL); |
234 | 243 | ||
@@ -260,12 +269,12 @@ void ia64_process_pending_intr(void) | |||
260 | 269 | ||
261 | 270 | ||
262 | #ifdef CONFIG_SMP | 271 | #ifdef CONFIG_SMP |
263 | extern irqreturn_t handle_IPI (int irq, void *dev_id); | ||
264 | 272 | ||
265 | static irqreturn_t dummy_handler (int irq, void *dev_id) | 273 | static irqreturn_t dummy_handler (int irq, void *dev_id) |
266 | { | 274 | { |
267 | BUG(); | 275 | BUG(); |
268 | } | 276 | } |
277 | extern irqreturn_t handle_IPI (int irq, void *dev_id); | ||
269 | 278 | ||
270 | static struct irqaction ipi_irqaction = { | 279 | static struct irqaction ipi_irqaction = { |
271 | .handler = handle_IPI, | 280 | .handler = handle_IPI, |
@@ -278,6 +287,13 @@ static struct irqaction resched_irqaction = { | |||
278 | .flags = IRQF_DISABLED, | 287 | .flags = IRQF_DISABLED, |
279 | .name = "resched" | 288 | .name = "resched" |
280 | }; | 289 | }; |
290 | |||
291 | static struct irqaction tlb_irqaction = { | ||
292 | .handler = dummy_handler, | ||
293 | .flags = SA_INTERRUPT, | ||
294 | .name = "tlb_flush" | ||
295 | }; | ||
296 | |||
281 | #endif | 297 | #endif |
282 | 298 | ||
283 | void | 299 | void |
@@ -303,6 +319,7 @@ init_IRQ (void) | |||
303 | #ifdef CONFIG_SMP | 319 | #ifdef CONFIG_SMP |
304 | register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); | 320 | register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); |
305 | register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); | 321 | register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); |
322 | register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); | ||
306 | #endif | 323 | #endif |
307 | #ifdef CONFIG_PERFMON | 324 | #ifdef CONFIG_PERFMON |
308 | pfm_init_percpu(); | 325 | pfm_init_percpu(); |
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 55ddd809b02d..221de3804560 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c | |||
@@ -50,6 +50,18 @@ | |||
50 | #include <asm/mca.h> | 50 | #include <asm/mca.h> |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Note: alignment of 4 entries/cacheline was empirically determined | ||
54 | * to be a good tradeoff between hot cachelines & spreading the array | ||
55 | * across too many cacheline. | ||
56 | */ | ||
57 | static struct local_tlb_flush_counts { | ||
58 | unsigned int count; | ||
59 | } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; | ||
60 | |||
61 | static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; | ||
62 | |||
63 | |||
64 | /* | ||
53 | * Structure and data for smp_call_function(). This is designed to minimise static memory | 65 | * Structure and data for smp_call_function(). This is designed to minimise static memory |
54 | * requirements. It also looks cleaner. | 66 | * requirements. It also looks cleaner. |
55 | */ | 67 | */ |
@@ -248,6 +260,62 @@ smp_send_reschedule (int cpu) | |||
248 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); | 260 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); |
249 | } | 261 | } |
250 | 262 | ||
263 | /* | ||
264 | * Called with preeemption disabled. | ||
265 | */ | ||
266 | static void | ||
267 | smp_send_local_flush_tlb (int cpu) | ||
268 | { | ||
269 | platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); | ||
270 | } | ||
271 | |||
272 | void | ||
273 | smp_local_flush_tlb(void) | ||
274 | { | ||
275 | /* | ||
276 | * Use atomic ops. Otherwise, the load/increment/store sequence from | ||
277 | * a "++" operation can have the line stolen between the load & store. | ||
278 | * The overhead of the atomic op in negligible in this case & offers | ||
279 | * significant benefit for the brief periods where lots of cpus | ||
280 | * are simultaneously flushing TLBs. | ||
281 | */ | ||
282 | ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); | ||
283 | local_flush_tlb_all(); | ||
284 | } | ||
285 | |||
286 | #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ | ||
287 | |||
288 | void | ||
289 | smp_flush_tlb_cpumask(cpumask_t xcpumask) | ||
290 | { | ||
291 | unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); | ||
292 | cpumask_t cpumask = xcpumask; | ||
293 | int mycpu, cpu, flush_mycpu = 0; | ||
294 | |||
295 | preempt_disable(); | ||
296 | mycpu = smp_processor_id(); | ||
297 | |||
298 | for_each_cpu_mask(cpu, cpumask) | ||
299 | counts[cpu] = local_tlb_flush_counts[cpu].count; | ||
300 | |||
301 | mb(); | ||
302 | for_each_cpu_mask(cpu, cpumask) { | ||
303 | if (cpu == mycpu) | ||
304 | flush_mycpu = 1; | ||
305 | else | ||
306 | smp_send_local_flush_tlb(cpu); | ||
307 | } | ||
308 | |||
309 | if (flush_mycpu) | ||
310 | smp_local_flush_tlb(); | ||
311 | |||
312 | for_each_cpu_mask(cpu, cpumask) | ||
313 | while(counts[cpu] == local_tlb_flush_counts[cpu].count) | ||
314 | udelay(FLUSH_DELAY); | ||
315 | |||
316 | preempt_enable(); | ||
317 | } | ||
318 | |||
251 | void | 319 | void |
252 | smp_flush_tlb_all (void) | 320 | smp_flush_tlb_all (void) |
253 | { | 321 | { |
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 601747b1e22a..5d318b579fb1 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); | |||
46 | 46 | ||
47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); | 47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); |
48 | 48 | ||
49 | /* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ | ||
50 | static int sn2_flush_opt = 0; | ||
51 | |||
49 | extern unsigned long | 52 | extern unsigned long |
50 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, | 53 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, |
51 | volatile unsigned long *, unsigned long, | 54 | volatile unsigned long *, unsigned long, |
@@ -76,6 +79,8 @@ struct ptc_stats { | |||
76 | unsigned long shub_itc_clocks; | 79 | unsigned long shub_itc_clocks; |
77 | unsigned long shub_itc_clocks_max; | 80 | unsigned long shub_itc_clocks_max; |
78 | unsigned long shub_ptc_flushes_not_my_mm; | 81 | unsigned long shub_ptc_flushes_not_my_mm; |
82 | unsigned long shub_ipi_flushes; | ||
83 | unsigned long shub_ipi_flushes_itc_clocks; | ||
79 | }; | 84 | }; |
80 | 85 | ||
81 | #define sn2_ptctest 0 | 86 | #define sn2_ptctest 0 |
@@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
121 | flush_tlb_mm(mm); | 126 | flush_tlb_mm(mm); |
122 | } | 127 | } |
123 | 128 | ||
129 | static void | ||
130 | sn2_ipi_flush_all_tlb(struct mm_struct *mm) | ||
131 | { | ||
132 | unsigned long itc; | ||
133 | |||
134 | itc = ia64_get_itc(); | ||
135 | smp_flush_tlb_cpumask(mm->cpu_vm_mask); | ||
136 | itc = ia64_get_itc() - itc; | ||
137 | __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; | ||
138 | __get_cpu_var(ptcstats).shub_ipi_flushes++; | ||
139 | } | ||
140 | |||
124 | /** | 141 | /** |
125 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | 142 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range |
126 | * @mm: mm_struct containing virtual address range | 143 | * @mm: mm_struct containing virtual address range |
@@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
154 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; | 171 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; |
155 | short nasids[MAX_NUMNODES], nix; | 172 | short nasids[MAX_NUMNODES], nix; |
156 | nodemask_t nodes_flushed; | 173 | nodemask_t nodes_flushed; |
157 | int active, max_active, deadlock; | 174 | int active, max_active, deadlock, flush_opt = sn2_flush_opt; |
175 | |||
176 | if (flush_opt > 2) { | ||
177 | sn2_ipi_flush_all_tlb(mm); | ||
178 | return; | ||
179 | } | ||
158 | 180 | ||
159 | nodes_clear(nodes_flushed); | 181 | nodes_clear(nodes_flushed); |
160 | i = 0; | 182 | i = 0; |
@@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
189 | return; | 211 | return; |
190 | } | 212 | } |
191 | 213 | ||
214 | if (flush_opt == 2) { | ||
215 | sn2_ipi_flush_all_tlb(mm); | ||
216 | preempt_enable(); | ||
217 | return; | ||
218 | } | ||
219 | |||
192 | itc = ia64_get_itc(); | 220 | itc = ia64_get_itc(); |
193 | nix = 0; | 221 | nix = 0; |
194 | for_each_node_mask(cnode, nodes_flushed) | 222 | for_each_node_mask(cnode, nodes_flushed) |
@@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
256 | } | 284 | } |
257 | if (active >= max_active || i == (nix - 1)) { | 285 | if (active >= max_active || i == (nix - 1)) { |
258 | if ((deadlock = wait_piowc())) { | 286 | if ((deadlock = wait_piowc())) { |
287 | if (flush_opt == 1) | ||
288 | goto done; | ||
259 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); | 289 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); |
260 | if (reset_max_active_on_deadlock()) | 290 | if (reset_max_active_on_deadlock()) |
261 | max_active = 1; | 291 | max_active = 1; |
@@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
267 | start += (1UL << nbits); | 297 | start += (1UL << nbits); |
268 | } while (start < end); | 298 | } while (start < end); |
269 | 299 | ||
300 | done: | ||
270 | itc2 = ia64_get_itc() - itc2; | 301 | itc2 = ia64_get_itc() - itc2; |
271 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; | 302 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; |
272 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) | 303 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) |
@@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
279 | 310 | ||
280 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); | 311 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); |
281 | 312 | ||
313 | if (flush_opt == 1 && deadlock) { | ||
314 | __get_cpu_var(ptcstats).deadlocks++; | ||
315 | sn2_ipi_flush_all_tlb(mm); | ||
316 | } | ||
317 | |||
282 | preempt_enable(); | 318 | preempt_enable(); |
283 | } | 319 | } |
284 | 320 | ||
@@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) | |||
425 | 461 | ||
426 | if (!cpu) { | 462 | if (!cpu) { |
427 | seq_printf(file, | 463 | seq_printf(file, |
428 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); | 464 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); |
429 | seq_printf(file, "# ptctest %d\n", sn2_ptctest); | 465 | seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); |
430 | } | 466 | } |
431 | 467 | ||
432 | if (cpu < NR_CPUS && cpu_online(cpu)) { | 468 | if (cpu < NR_CPUS && cpu_online(cpu)) { |
433 | stat = &per_cpu(ptcstats, cpu); | 469 | stat = &per_cpu(ptcstats, cpu); |
434 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, | 470 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, |
435 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, | 471 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, |
436 | stat->deadlocks, | 472 | stat->deadlocks, |
437 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 473 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
438 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 474 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
439 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, | 475 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, |
440 | stat->shub_ptc_flushes_not_my_mm, | 476 | stat->shub_ptc_flushes_not_my_mm, |
441 | stat->deadlocks2); | 477 | stat->deadlocks2, |
478 | stat->shub_ipi_flushes, | ||
479 | 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec); | ||
442 | } | 480 | } |
443 | return 0; | 481 | return 0; |
444 | } | 482 | } |
445 | 483 | ||
484 | static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) | ||
485 | { | ||
486 | int cpu; | ||
487 | char optstr[64]; | ||
488 | |||
489 | if (copy_from_user(optstr, user, count)) | ||
490 | return -EFAULT; | ||
491 | optstr[count - 1] = '\0'; | ||
492 | sn2_flush_opt = simple_strtoul(optstr, NULL, 0); | ||
493 | |||
494 | for_each_online_cpu(cpu) | ||
495 | memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); | ||
496 | |||
497 | return count; | ||
498 | } | ||
499 | |||
446 | static struct seq_operations sn2_ptc_seq_ops = { | 500 | static struct seq_operations sn2_ptc_seq_ops = { |
447 | .start = sn2_ptc_seq_start, | 501 | .start = sn2_ptc_seq_start, |
448 | .next = sn2_ptc_seq_next, | 502 | .next = sn2_ptc_seq_next, |
@@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file) | |||
458 | static const struct file_operations proc_sn2_ptc_operations = { | 512 | static const struct file_operations proc_sn2_ptc_operations = { |
459 | .open = sn2_ptc_proc_open, | 513 | .open = sn2_ptc_proc_open, |
460 | .read = seq_read, | 514 | .read = seq_read, |
515 | .write = sn2_ptc_proc_write, | ||
461 | .llseek = seq_lseek, | 516 | .llseek = seq_lseek, |
462 | .release = seq_release, | 517 | .release = seq_release, |
463 | }; | 518 | }; |