diff options
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r-- | arch/ia64/sn/kernel/irq.c | 58 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 65 |
2 files changed, 103 insertions, 20 deletions
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 8d2a1bfbfe7c..7f6d2360a262 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c | |||
@@ -59,6 +59,22 @@ void sn_intr_free(nasid_t local_nasid, int local_widget, | |||
59 | (u64) sn_irq_info->irq_cookie, 0, 0); | 59 | (u64) sn_irq_info->irq_cookie, 0, 0); |
60 | } | 60 | } |
61 | 61 | ||
62 | u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, | ||
63 | struct sn_irq_info *sn_irq_info, | ||
64 | nasid_t req_nasid, int req_slice) | ||
65 | { | ||
66 | struct ia64_sal_retval ret_stuff; | ||
67 | ret_stuff.status = 0; | ||
68 | ret_stuff.v0 = 0; | ||
69 | |||
70 | SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, | ||
71 | (u64) SAL_INTR_REDIRECT, (u64) local_nasid, | ||
72 | (u64) local_widget, __pa(sn_irq_info), | ||
73 | (u64) req_nasid, (u64) req_slice, 0); | ||
74 | |||
75 | return ret_stuff.status; | ||
76 | } | ||
77 | |||
62 | static unsigned int sn_startup_irq(unsigned int irq) | 78 | static unsigned int sn_startup_irq(unsigned int irq) |
63 | { | 79 | { |
64 | return 0; | 80 | return 0; |
@@ -127,15 +143,8 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, | |||
127 | struct sn_irq_info *new_irq_info; | 143 | struct sn_irq_info *new_irq_info; |
128 | struct sn_pcibus_provider *pci_provider; | 144 | struct sn_pcibus_provider *pci_provider; |
129 | 145 | ||
130 | new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); | 146 | bridge = (u64) sn_irq_info->irq_bridge; |
131 | if (new_irq_info == NULL) | ||
132 | return NULL; | ||
133 | |||
134 | memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); | ||
135 | |||
136 | bridge = (u64) new_irq_info->irq_bridge; | ||
137 | if (!bridge) { | 147 | if (!bridge) { |
138 | kfree(new_irq_info); | ||
139 | return NULL; /* irq is not a device interrupt */ | 148 | return NULL; /* irq is not a device interrupt */ |
140 | } | 149 | } |
141 | 150 | ||
@@ -145,8 +154,25 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, | |||
145 | local_widget = TIO_SWIN_WIDGETNUM(bridge); | 154 | local_widget = TIO_SWIN_WIDGETNUM(bridge); |
146 | else | 155 | else |
147 | local_widget = SWIN_WIDGETNUM(bridge); | 156 | local_widget = SWIN_WIDGETNUM(bridge); |
148 | |||
149 | vector = sn_irq_info->irq_irq; | 157 | vector = sn_irq_info->irq_irq; |
158 | |||
159 | /* Make use of SAL_INTR_REDIRECT if PROM supports it */ | ||
160 | status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); | ||
161 | if (!status) { | ||
162 | new_irq_info = sn_irq_info; | ||
163 | goto finish_up; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * PROM does not support SAL_INTR_REDIRECT, or it failed. | ||
168 | * Revert to old method. | ||
169 | */ | ||
170 | new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); | ||
171 | if (new_irq_info == NULL) | ||
172 | return NULL; | ||
173 | |||
174 | memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); | ||
175 | |||
150 | /* Free the old PROM new_irq_info structure */ | 176 | /* Free the old PROM new_irq_info structure */ |
151 | sn_intr_free(local_nasid, local_widget, new_irq_info); | 177 | sn_intr_free(local_nasid, local_widget, new_irq_info); |
152 | unregister_intr_pda(new_irq_info); | 178 | unregister_intr_pda(new_irq_info); |
@@ -162,11 +188,18 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, | |||
162 | return NULL; | 188 | return NULL; |
163 | } | 189 | } |
164 | 190 | ||
191 | register_intr_pda(new_irq_info); | ||
192 | spin_lock(&sn_irq_info_lock); | ||
193 | list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); | ||
194 | spin_unlock(&sn_irq_info_lock); | ||
195 | call_rcu(&sn_irq_info->rcu, sn_irq_info_free); | ||
196 | |||
197 | |||
198 | finish_up: | ||
165 | /* Update kernels new_irq_info with new target info */ | 199 | /* Update kernels new_irq_info with new target info */ |
166 | cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, | 200 | cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, |
167 | new_irq_info->irq_slice); | 201 | new_irq_info->irq_slice); |
168 | new_irq_info->irq_cpuid = cpuid; | 202 | new_irq_info->irq_cpuid = cpuid; |
169 | register_intr_pda(new_irq_info); | ||
170 | 203 | ||
171 | pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; | 204 | pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; |
172 | 205 | ||
@@ -178,11 +211,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, | |||
178 | pci_provider && pci_provider->target_interrupt) | 211 | pci_provider && pci_provider->target_interrupt) |
179 | (pci_provider->target_interrupt)(new_irq_info); | 212 | (pci_provider->target_interrupt)(new_irq_info); |
180 | 213 | ||
181 | spin_lock(&sn_irq_info_lock); | ||
182 | list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); | ||
183 | spin_unlock(&sn_irq_info_lock); | ||
184 | call_rcu(&sn_irq_info->rcu, sn_irq_info_free); | ||
185 | |||
186 | #ifdef CONFIG_SMP | 214 | #ifdef CONFIG_SMP |
187 | cpuphys = cpu_physical_id(cpuid); | 215 | cpuphys = cpu_physical_id(cpuid); |
188 | set_irq_affinity_info((vector & 0xff), cpuphys, 0); | 216 | set_irq_affinity_info((vector & 0xff), cpuphys, 0); |
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 601747b1e22a..5d318b579fb1 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); | |||
46 | 46 | ||
47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); | 47 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); |
48 | 48 | ||
49 | /* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ | ||
50 | static int sn2_flush_opt = 0; | ||
51 | |||
49 | extern unsigned long | 52 | extern unsigned long |
50 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, | 53 | sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, |
51 | volatile unsigned long *, unsigned long, | 54 | volatile unsigned long *, unsigned long, |
@@ -76,6 +79,8 @@ struct ptc_stats { | |||
76 | unsigned long shub_itc_clocks; | 79 | unsigned long shub_itc_clocks; |
77 | unsigned long shub_itc_clocks_max; | 80 | unsigned long shub_itc_clocks_max; |
78 | unsigned long shub_ptc_flushes_not_my_mm; | 81 | unsigned long shub_ptc_flushes_not_my_mm; |
82 | unsigned long shub_ipi_flushes; | ||
83 | unsigned long shub_ipi_flushes_itc_clocks; | ||
79 | }; | 84 | }; |
80 | 85 | ||
81 | #define sn2_ptctest 0 | 86 | #define sn2_ptctest 0 |
@@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) | |||
121 | flush_tlb_mm(mm); | 126 | flush_tlb_mm(mm); |
122 | } | 127 | } |
123 | 128 | ||
129 | static void | ||
130 | sn2_ipi_flush_all_tlb(struct mm_struct *mm) | ||
131 | { | ||
132 | unsigned long itc; | ||
133 | |||
134 | itc = ia64_get_itc(); | ||
135 | smp_flush_tlb_cpumask(mm->cpu_vm_mask); | ||
136 | itc = ia64_get_itc() - itc; | ||
137 | __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; | ||
138 | __get_cpu_var(ptcstats).shub_ipi_flushes++; | ||
139 | } | ||
140 | |||
124 | /** | 141 | /** |
125 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | 142 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range |
126 | * @mm: mm_struct containing virtual address range | 143 | * @mm: mm_struct containing virtual address range |
@@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
154 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; | 171 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; |
155 | short nasids[MAX_NUMNODES], nix; | 172 | short nasids[MAX_NUMNODES], nix; |
156 | nodemask_t nodes_flushed; | 173 | nodemask_t nodes_flushed; |
157 | int active, max_active, deadlock; | 174 | int active, max_active, deadlock, flush_opt = sn2_flush_opt; |
175 | |||
176 | if (flush_opt > 2) { | ||
177 | sn2_ipi_flush_all_tlb(mm); | ||
178 | return; | ||
179 | } | ||
158 | 180 | ||
159 | nodes_clear(nodes_flushed); | 181 | nodes_clear(nodes_flushed); |
160 | i = 0; | 182 | i = 0; |
@@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
189 | return; | 211 | return; |
190 | } | 212 | } |
191 | 213 | ||
214 | if (flush_opt == 2) { | ||
215 | sn2_ipi_flush_all_tlb(mm); | ||
216 | preempt_enable(); | ||
217 | return; | ||
218 | } | ||
219 | |||
192 | itc = ia64_get_itc(); | 220 | itc = ia64_get_itc(); |
193 | nix = 0; | 221 | nix = 0; |
194 | for_each_node_mask(cnode, nodes_flushed) | 222 | for_each_node_mask(cnode, nodes_flushed) |
@@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
256 | } | 284 | } |
257 | if (active >= max_active || i == (nix - 1)) { | 285 | if (active >= max_active || i == (nix - 1)) { |
258 | if ((deadlock = wait_piowc())) { | 286 | if ((deadlock = wait_piowc())) { |
287 | if (flush_opt == 1) | ||
288 | goto done; | ||
259 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); | 289 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); |
260 | if (reset_max_active_on_deadlock()) | 290 | if (reset_max_active_on_deadlock()) |
261 | max_active = 1; | 291 | max_active = 1; |
@@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
267 | start += (1UL << nbits); | 297 | start += (1UL << nbits); |
268 | } while (start < end); | 298 | } while (start < end); |
269 | 299 | ||
300 | done: | ||
270 | itc2 = ia64_get_itc() - itc2; | 301 | itc2 = ia64_get_itc() - itc2; |
271 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; | 302 | __get_cpu_var(ptcstats).shub_itc_clocks += itc2; |
272 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) | 303 | if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) |
@@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
279 | 310 | ||
280 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); | 311 | spin_unlock_irqrestore(PTC_LOCK(shub1), flags); |
281 | 312 | ||
313 | if (flush_opt == 1 && deadlock) { | ||
314 | __get_cpu_var(ptcstats).deadlocks++; | ||
315 | sn2_ipi_flush_all_tlb(mm); | ||
316 | } | ||
317 | |||
282 | preempt_enable(); | 318 | preempt_enable(); |
283 | } | 319 | } |
284 | 320 | ||
@@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) | |||
425 | 461 | ||
426 | if (!cpu) { | 462 | if (!cpu) { |
427 | seq_printf(file, | 463 | seq_printf(file, |
428 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); | 464 | "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); |
429 | seq_printf(file, "# ptctest %d\n", sn2_ptctest); | 465 | seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); |
430 | } | 466 | } |
431 | 467 | ||
432 | if (cpu < NR_CPUS && cpu_online(cpu)) { | 468 | if (cpu < NR_CPUS && cpu_online(cpu)) { |
433 | stat = &per_cpu(ptcstats, cpu); | 469 | stat = &per_cpu(ptcstats, cpu); |
434 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, | 470 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, |
435 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, | 471 | stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, |
436 | stat->deadlocks, | 472 | stat->deadlocks, |
437 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 473 | 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
438 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, | 474 | 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, |
439 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, | 475 | 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, |
440 | stat->shub_ptc_flushes_not_my_mm, | 476 | stat->shub_ptc_flushes_not_my_mm, |
441 | stat->deadlocks2); | 477 | stat->deadlocks2, |
478 | stat->shub_ipi_flushes, | ||
479 | 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec); | ||
442 | } | 480 | } |
443 | return 0; | 481 | return 0; |
444 | } | 482 | } |
445 | 483 | ||
484 | static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) | ||
485 | { | ||
486 | int cpu; | ||
487 | char optstr[64]; | ||
488 | |||
489 | if (copy_from_user(optstr, user, count)) | ||
490 | return -EFAULT; | ||
491 | optstr[count - 1] = '\0'; | ||
492 | sn2_flush_opt = simple_strtoul(optstr, NULL, 0); | ||
493 | |||
494 | for_each_online_cpu(cpu) | ||
495 | memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); | ||
496 | |||
497 | return count; | ||
498 | } | ||
499 | |||
446 | static struct seq_operations sn2_ptc_seq_ops = { | 500 | static struct seq_operations sn2_ptc_seq_ops = { |
447 | .start = sn2_ptc_seq_start, | 501 | .start = sn2_ptc_seq_start, |
448 | .next = sn2_ptc_seq_next, | 502 | .next = sn2_ptc_seq_next, |
@@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file) | |||
458 | static const struct file_operations proc_sn2_ptc_operations = { | 512 | static const struct file_operations proc_sn2_ptc_operations = { |
459 | .open = sn2_ptc_proc_open, | 513 | .open = sn2_ptc_proc_open, |
460 | .read = seq_read, | 514 | .read = seq_read, |
515 | .write = sn2_ptc_proc_write, | ||
461 | .llseek = seq_lseek, | 516 | .llseek = seq_lseek, |
462 | .release = seq_release, | 517 | .release = seq_release, |
463 | }; | 518 | }; |