aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJack Steiner <steiner@sgi.com>2006-01-26 18:03:41 -0500
committerTony Luck <tony.luck@intel.com>2006-01-26 18:03:41 -0500
commit61a34a024fcd61ef7207405b2e4cef2c073b220c (patch)
treebd4b43256189c4656a838311d69e49183b4d676a /arch
parent3ee68c4af3fd7228c1be63254b9f884614f9ebb2 (diff)
[IA64-SGI] Update TLB flushing code for SN platform
This patch finishes support for SHUB2 (the new chipset). Most of the changes are performance related. A few changes are workarounds for "interesting" chipset features. Some temporary debugging code has also been deleted. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c196
1 files changed, 75 insertions, 121 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 471bbaa65d1b..f153a4c35c70 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -5,7 +5,7 @@
5 * License. See the file "COPYING" in the main directory of this archive 5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details. 6 * for more details.
7 * 7 *
8 * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#include <linux/init.h> 11#include <linux/init.h>
@@ -46,104 +46,28 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats);
46 46
47static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); 47static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
48 48
49void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, 49void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long,
50 volatile unsigned long *, unsigned long data1); 50 volatile unsigned long *, unsigned long);
51 51
52#ifdef DEBUG_PTC
53/* 52/*
54 * ptctest: 53 * Note: some is the following is captured here to make degugging easier
55 * 54 * (the macros make more sense if you see the debug patch - not posted)
56 * xyz - 3 digit hex number:
57 * x - Force PTC purges to use shub:
58 * 0 - no force
59 * 1 - force
60 * y - interupt enable
61 * 0 - disable interrupts
62 * 1 - leave interuupts enabled
63 * z - type of lock:
64 * 0 - global lock
65 * 1 - node local lock
66 * 2 - no lock
67 *
68 * Note: on shub1, only ptctest == 0 is supported. Don't try other values!
69 */ 55 */
70
71static unsigned int sn2_ptctest = 0;
72
73static int __init ptc_test(char *str)
74{
75 get_option(&str, &sn2_ptctest);
76 return 1;
77}
78__setup("ptctest=", ptc_test);
79
80static inline int ptc_lock(unsigned long *flagp)
81{
82 unsigned long opt = sn2_ptctest & 255;
83
84 switch (opt) {
85 case 0x00:
86 spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
87 break;
88 case 0x01:
89 spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp);
90 break;
91 case 0x02:
92 local_irq_save(*flagp);
93 break;
94 case 0x10:
95 spin_lock(&sn2_global_ptc_lock);
96 break;
97 case 0x11:
98 spin_lock(&sn_nodepda->ptc_lock);
99 break;
100 case 0x12:
101 break;
102 default:
103 BUG();
104 }
105 return opt;
106}
107
108static inline void ptc_unlock(unsigned long flags, int opt)
109{
110 switch (opt) {
111 case 0x00:
112 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
113 break;
114 case 0x01:
115 spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags);
116 break;
117 case 0x02:
118 local_irq_restore(flags);
119 break;
120 case 0x10:
121 spin_unlock(&sn2_global_ptc_lock);
122 break;
123 case 0x11:
124 spin_unlock(&sn_nodepda->ptc_lock);
125 break;
126 case 0x12:
127 break;
128 default:
129 BUG();
130 }
131}
132#else
133
134#define sn2_ptctest 0 56#define sn2_ptctest 0
57#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0)
58#define max_active_pio(sh1) ((sh1) ? 32 : 7)
59#define reset_max_active_on_deadlock() 1
60#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
135 61
136static inline int ptc_lock(unsigned long *flagp) 62static inline void ptc_lock(int sh1, unsigned long *flagp)
137{ 63{
138 spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); 64 spin_lock_irqsave(PTC_LOCK(sh1), *flagp);
139 return 0;
140} 65}
141 66
142static inline void ptc_unlock(unsigned long flags, int opt) 67static inline void ptc_unlock(int sh1, unsigned long flags)
143{ 68{
144 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); 69 spin_unlock_irqrestore(PTC_LOCK(sh1), flags);
145} 70}
146#endif
147 71
148struct ptc_stats { 72struct ptc_stats {
149 unsigned long ptc_l; 73 unsigned long ptc_l;
@@ -151,27 +75,30 @@ struct ptc_stats {
151 unsigned long shub_ptc_flushes; 75 unsigned long shub_ptc_flushes;
152 unsigned long nodes_flushed; 76 unsigned long nodes_flushed;
153 unsigned long deadlocks; 77 unsigned long deadlocks;
78 unsigned long deadlocks2;
154 unsigned long lock_itc_clocks; 79 unsigned long lock_itc_clocks;
155 unsigned long shub_itc_clocks; 80 unsigned long shub_itc_clocks;
156 unsigned long shub_itc_clocks_max; 81 unsigned long shub_itc_clocks_max;
82 unsigned long shub_ptc_flushes_not_my_mm;
157}; 83};
158 84
159static inline unsigned long wait_piowc(void) 85static inline unsigned long wait_piowc(void)
160{ 86{
161 volatile unsigned long *piows, zeroval; 87 volatile unsigned long *piows;
162 unsigned long ws; 88 unsigned long zeroval, ws;
163 89
164 piows = pda->pio_write_status_addr; 90 piows = pda->pio_write_status_addr;
165 zeroval = pda->pio_write_status_val; 91 zeroval = pda->pio_write_status_val;
166 do { 92 do {
167 cpu_relax(); 93 cpu_relax();
168 } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); 94 } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
169 return ws; 95 return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
170} 96}
171 97
172void sn_tlb_migrate_finish(struct mm_struct *mm) 98void sn_tlb_migrate_finish(struct mm_struct *mm)
173{ 99{
174 if (mm == current->mm) 100 /* flush_tlb_mm is inefficient if more than 1 users of mm */
101 if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
175 flush_tlb_mm(mm); 102 flush_tlb_mm(mm);
176} 103}
177 104
@@ -201,12 +128,14 @@ void
201sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, 128sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
202 unsigned long end, unsigned long nbits) 129 unsigned long end, unsigned long nbits)
203{ 130{
204 int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; 131 int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
205 int mymm = (mm == current->active_mm && current->mm); 132 int mymm = (mm == current->active_mm && mm == current->mm);
133 int use_cpu_ptcga;
206 volatile unsigned long *ptc0, *ptc1; 134 volatile unsigned long *ptc0, *ptc1;
207 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; 135 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
208 short nasids[MAX_NUMNODES], nix; 136 short nasids[MAX_NUMNODES], nix;
209 nodemask_t nodes_flushed; 137 nodemask_t nodes_flushed;
138 int active, max_active, deadlock;
210 139
211 nodes_clear(nodes_flushed); 140 nodes_clear(nodes_flushed);
212 i = 0; 141 i = 0;
@@ -267,41 +196,56 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
267 196
268 197
269 mynasid = get_nasid(); 198 mynasid = get_nasid();
199 use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
200 max_active = max_active_pio(shub1);
270 201
271 itc = ia64_get_itc(); 202 itc = ia64_get_itc();
272 opt = ptc_lock(&flags); 203 ptc_lock(shub1, &flags);
273 itc2 = ia64_get_itc(); 204 itc2 = ia64_get_itc();
205
274 __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; 206 __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
275 __get_cpu_var(ptcstats).shub_ptc_flushes++; 207 __get_cpu_var(ptcstats).shub_ptc_flushes++;
276 __get_cpu_var(ptcstats).nodes_flushed += nix; 208 __get_cpu_var(ptcstats).nodes_flushed += nix;
209 if (!mymm)
210 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
277 211
212 if (use_cpu_ptcga && !mymm) {
213 old_rr = ia64_get_rr(start);
214 ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
215 ia64_srlz_d();
216 }
217
218 wait_piowc();
278 do { 219 do {
279 if (shub1) 220 if (shub1)
280 data1 = start | (1UL << SH1_PTC_1_START_SHFT); 221 data1 = start | (1UL << SH1_PTC_1_START_SHFT);
281 else 222 else
282 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); 223 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
283 for (i = 0; i < nix; i++) { 224 deadlock = 0;
225 active = 0;
226 for (ibegin = 0, i = 0; i < nix; i++) {
284 nasid = nasids[i]; 227 nasid = nasids[i];
285 if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { 228 if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
286 ia64_ptcga(start, nbits << 2); 229 ia64_ptcga(start, nbits << 2);
287 ia64_srlz_i(); 230 ia64_srlz_i();
288 } else { 231 } else {
289 ptc0 = CHANGE_NASID(nasid, ptc0); 232 ptc0 = CHANGE_NASID(nasid, ptc0);
290 if (ptc1) 233 if (ptc1)
291 ptc1 = CHANGE_NASID(nasid, ptc1); 234 ptc1 = CHANGE_NASID(nasid, ptc1);
292 pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, 235 pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
293 data1); 236 active++;
294 flushed = 1; 237 }
238 if (active >= max_active || i == (nix - 1)) {
239 if ((deadlock = wait_piowc())) {
240 sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
241 if (reset_max_active_on_deadlock())
242 max_active = 1;
243 }
244 active = 0;
245 ibegin = i + 1;
295 } 246 }
296 } 247 }
297 if (flushed
298 && (wait_piowc() &
299 (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) {
300 sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1);
301 }
302
303 start += (1UL << nbits); 248 start += (1UL << nbits);
304
305 } while (start < end); 249 } while (start < end);
306 250
307 itc2 = ia64_get_itc() - itc2; 251 itc2 = ia64_get_itc() - itc2;
@@ -309,7 +253,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
309 if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) 253 if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
310 __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; 254 __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
311 255
312 ptc_unlock(flags, opt); 256 if (old_rr) {
257 ia64_set_rr(start, old_rr);
258 ia64_srlz_d();
259 }
260
261 ptc_unlock(shub1, flags);
313 262
314 preempt_enable(); 263 preempt_enable();
315} 264}
@@ -321,27 +270,30 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
321 * TLB flush transaction. The recovery sequence is somewhat tricky & is 270 * TLB flush transaction. The recovery sequence is somewhat tricky & is
322 * coded in assembly language. 271 * coded in assembly language.
323 */ 272 */
324void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, 273void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
325 volatile unsigned long *ptc1, unsigned long data1) 274 volatile unsigned long *ptc1, unsigned long data1)
326{ 275{
327 extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, 276 extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
328 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); 277 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
329 short nasid, i; 278 short nasid, i;
330 unsigned long *piows, zeroval; 279 unsigned long *piows, zeroval, n;
331 280
332 __get_cpu_var(ptcstats).deadlocks++; 281 __get_cpu_var(ptcstats).deadlocks++;
333 282
334 piows = (unsigned long *) pda->pio_write_status_addr; 283 piows = (unsigned long *) pda->pio_write_status_addr;
335 zeroval = pda->pio_write_status_val; 284 zeroval = pda->pio_write_status_val;
336 285
337 for (i=0; i < nix; i++) { 286
287 for (i=ib; i <= ie; i++) {
338 nasid = nasids[i]; 288 nasid = nasids[i];
339 if (!(sn2_ptctest & 3) && nasid == mynasid) 289 if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
340 continue; 290 continue;
341 ptc0 = CHANGE_NASID(nasid, ptc0); 291 ptc0 = CHANGE_NASID(nasid, ptc0);
342 if (ptc1) 292 if (ptc1)
343 ptc1 = CHANGE_NASID(nasid, ptc1); 293 ptc1 = CHANGE_NASID(nasid, ptc1);
344 sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); 294
295 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
296 __get_cpu_var(ptcstats).deadlocks2 += n;
345 } 297 }
346 298
347} 299}
@@ -452,20 +404,22 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
452 cpu = *(loff_t *) data; 404 cpu = *(loff_t *) data;
453 405
454 if (!cpu) { 406 if (!cpu) {
455 seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); 407 seq_printf(file,
408 "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
456 seq_printf(file, "# ptctest %d\n", sn2_ptctest); 409 seq_printf(file, "# ptctest %d\n", sn2_ptctest);
457 } 410 }
458 411
459 if (cpu < NR_CPUS && cpu_online(cpu)) { 412 if (cpu < NR_CPUS && cpu_online(cpu)) {
460 stat = &per_cpu(ptcstats, cpu); 413 stat = &per_cpu(ptcstats, cpu);
461 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, 414 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
462 stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, 415 stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
463 stat->deadlocks, 416 stat->deadlocks,
464 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 417 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
465 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 418 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
466 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); 419 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
420 stat->shub_ptc_flushes_not_my_mm,
421 stat->deadlocks2);
467 } 422 }
468
469 return 0; 423 return 0;
470} 424}
471 425
@@ -476,7 +430,7 @@ static struct seq_operations sn2_ptc_seq_ops = {
476 .show = sn2_ptc_seq_show 430 .show = sn2_ptc_seq_show
477}; 431};
478 432
479int sn2_ptc_proc_open(struct inode *inode, struct file *file) 433static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
480{ 434{
481 return seq_open(file, &sn2_ptc_seq_ops); 435 return seq_open(file, &sn2_ptc_seq_ops);
482} 436}