diff options
author | Jack Steiner <steiner@sgi.com> | 2007-05-08 17:50:43 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-05-08 17:50:43 -0400 |
commit | 3be44b9cc33d26930cb3bb014f35f582c6522481 (patch) | |
tree | 09225c5f0fb4c6caa81bbdff216ec83a093e4d12 /arch/ia64/sn | |
parent | 8737d59579c5e61ea3d5da4bd63303159fd1cf7e (diff) |
[IA64] Optional method to purge the TLB on SN systems
This patch adds an optional method for purging the TLB on SN IA64 systems.
The change should not affect any non-SN system.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 65 |
1 files changed, 60 insertions, 5 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 601747b1e22a..5d318b579fb1 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); | |||
46 | 46 | ||
47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); | 47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); |
48 | 48 | ||
49 | /* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ | ||
50 | static int sn2_flush_opt = 0; | ||
51 | |||
49 | extern unsigned long | 52 | extern unsigned long |
50 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, | 53 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, |
51 | volatile unsigned long *, unsigned long, | 54 | volatile unsigned long *, unsigned long, |
@@ -76,6 +79,8 @@ struct ptc_stats { | |||
76 | unsigned long shub_itc_clocks; | 79 | unsigned long shub_itc_clocks; |
77 | unsigned long shub_itc_clocks_max; | 80 | unsigned long shub_itc_clocks_max; |
78 | unsigned long shub_ptc_flushes_not_my_mm; | 81 | unsigned long shub_ptc_flushes_not_my_mm; |
82 | unsigned long shub_ipi_flushes; | ||
83 | unsigned long shub_ipi_flushes_itc_clocks; | ||
79 | }; | 84 | }; |
80 | 85 | ||
81 | #define sn2_ptctest 0 | 86 | #define sn2_ptctest 0 |
@@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
121 | flush_tlb_mm(mm); | 126 | flush_tlb_mm(mm); |
122 | } | 127 | } |
123 | 128 | ||
129 | static void | ||
130 | sn2_ipi_flush_all_tlb(struct mm_struct *mm) | ||
131 | { | ||
132 | unsigned long itc; | ||
133 | |||
134 | itc = ia64_get_itc(); | ||
135 | smp_flush_tlb_cpumask(mm->cpu_vm_mask); | ||
136 | itc = ia64_get_itc() - itc; | ||
137 | __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; | ||
138 | __get_cpu_var(ptcstats).shub_ipi_flushes++; | ||
139 | } | ||
140 | |||
124 | /** | 141 | /** |
125 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | 142 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range |
126 | * @mm: mm_struct containing virtual address range | 143 | * @mm: mm_struct containing virtual address range |
@@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
154 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; | 171 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; |
155 | short nasids[MAX_NUMNODES], nix; | 172 | short nasids[MAX_NUMNODES], nix; |
156 | nodemask_t nodes_flushed; | 173 | nodemask_t nodes_flushed; |
157 | int active, max_active, deadlock; | 174 | int active, max_active, deadlock, flush_opt = sn2_flush_opt; |
175 | |||
176 | if (flush_opt > 2) { | ||
177 | sn2_ipi_flush_all_tlb(mm); | ||
178 | return; | ||
179 | } | ||
158 | 180 | ||
159 | nodes_clear(nodes_flushed); | 181 | nodes_clear(nodes_flushed); |
160 | i = 0; | 182 | i = 0; |
@@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
189 | return; | 211 | return; |
190 | } | 212 | } |
191 | 213 | ||
214 | if (flush_opt == 2) { | ||
215 | sn2_ipi_flush_all_tlb(mm); | ||
216 | preempt_enable(); | ||
217 | return; | ||
218 | } | ||
219 | |||
192 | itc = ia64_get_itc(); | 220 | itc = ia64_get_itc(); |
193 | nix = 0; | 221 | nix = 0; |
194 | for_each_node_mask(cnode, nodes_flushed) | 222 | for_each_node_mask(cnode, nodes_flushed) |
@@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
256 | } | 284 | } |
257 | if (active >= max_active || i == (nix - 1)) { | 285 | if (active >= max_active || i == (nix - 1)) { |
258 | if ((deadlock = wait_piowc())) { | 286 | if ((deadlock = wait_piowc())) { |
287 | if (flush_opt == 1) | ||
288 | goto done; | ||
259 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); | 289 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); |
260 | if (reset_max_active_on_deadlock()) | 290 | if (reset_max_active_on_deadlock()) |
261 | max_active = 1; | 291 | max_active = 1; |
@@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
267 | start += (1UL << nbits); | 297 | start += (1UL << nbits); |
268 | } while (start < end); | 298 | } while (start < end); |
269 | 299 | ||
300 | done: | ||
270 | itc2 = ia64_get_itc() - itc2; | 301 | itc2 = ia64_get_itc() - itc2; |
271 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; | 302 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; |
272 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) | 303 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) |
@@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
279 | 310 | ||
280 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); | 311 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); |
281 | 312 | ||
313 | if (flush_opt == 1 && deadlock) { | ||
314 | __get_cpu_var(ptcstats).deadlocks++; | ||
315 | sn2_ipi_flush_all_tlb(mm); | ||
316 | } | ||
317 | |||
282 | preempt_enable(); | 318 | preempt_enable(); |
283 | } | 319 | } |
284 | 320 | ||
@@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) | |||
425 | 461 | ||
426 | if (!cpu) { | 462 | if (!cpu) { |
427 | seq_printf(file, | 463 | seq_printf(file, |
428 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); | 464 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); |
429 | seq_printf(file, "# ptctest %d\n", sn2_ptctest); | 465 | seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); |
430 | } | 466 | } |
431 | 467 | ||
432 | if (cpu < NR_CPUS && cpu_online(cpu)) { | 468 | if (cpu < NR_CPUS && cpu_online(cpu)) { |
433 | stat = &per_cpu(ptcstats, cpu); | 469 | stat = &per_cpu(ptcstats, cpu); |
434 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, | 470 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, |
435 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, | 471 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, |
436 | stat->deadlocks, | 472 | stat->deadlocks, |
437 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 473 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
438 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 474 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
439 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, | 475 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, |
440 | stat->shub_ptc_flushes_not_my_mm, | 476 | stat->shub_ptc_flushes_not_my_mm, |
441 | stat->deadlocks2); | 477 | stat->deadlocks2, |
478 | stat->shub_ipi_flushes, | ||
479 | 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec); | ||
442 | } | 480 | } |
443 | return 0; | 481 | return 0; |
444 | } | 482 | } |
445 | 483 | ||
484 | static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) | ||
485 | { | ||
486 | int cpu; | ||
487 | char optstr[64]; | ||
488 | |||
489 | if (copy_from_user(optstr, user, count)) | ||
490 | return -EFAULT; | ||
491 | optstr[count - 1] = '\0'; | ||
492 | sn2_flush_opt = simple_strtoul(optstr, NULL, 0); | ||
493 | |||
494 | for_each_online_cpu(cpu) | ||
495 | memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); | ||
496 | |||
497 | return count; | ||
498 | } | ||
499 | |||
446 | static struct seq_operations sn2_ptc_seq_ops = { | 500 | static struct seq_operations sn2_ptc_seq_ops = { |
447 | .start = sn2_ptc_seq_start, | 501 | .start = sn2_ptc_seq_start, |
448 | .next = sn2_ptc_seq_next, | 502 | .next = sn2_ptc_seq_next, |
@@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file) | |||
458 | static const struct file_operations proc_sn2_ptc_operations = { | 512 | static const struct file_operations proc_sn2_ptc_operations = { |
459 | .open = sn2_ptc_proc_open, | 513 | .open = sn2_ptc_proc_open, |
460 | .read = seq_read, | 514 | .read = seq_read, |
515 | .write = sn2_ptc_proc_write, | ||
461 | .llseek = seq_lseek, | 516 | .llseek = seq_lseek, |
462 | .release = seq_release, | 517 | .release = seq_release, |
463 | }; | 518 | }; |