aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJack Steiner <steiner@sgi.com>2006-11-22 10:55:08 -0500
committerTony Luck <tony.luck@intel.com>2006-12-12 14:47:09 -0500
commit1cf24bdbbbd2eb5439796dc399ab1649d150ed1d (patch)
treef1ef2033b15e43d01a5759f90130900455ea5957 /arch
parent8b9c106856d92c8266697328b148d115538b59ce (diff)
[IA64] - Reduce overhead of FP exception logging messages
Improve the scalability of the fpswa code that rate-limits logging of messages. There are 2 distinctly different problems in this code. 1) If prctl is used to disable logging, last_time is never updated. The result is that fpu_swa_count is zeroed out on EVERY fp fault. This causes a very very hot cache line. The fix reduces the wallclock time of a 1024p FP exception test from 28734 sec to 19 sec!!! 2) On VERY large systems, excessive messages are logged because multiple cpus can each reset or increment fpu_swa_count at about the same time. The result is that hundreds of messages are logged each second. The fixes reduces the logging rate to ~1 per second. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kernel/traps.c50
1 files changed, 40 insertions, 10 deletions
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index fffa9e0826bc..ab684747036f 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -307,6 +307,15 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long
307 return ret.status; 307 return ret.status;
308} 308}
309 309
310struct fpu_swa_msg {
311 unsigned long count;
312 unsigned long time;
313};
314static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast);
315DECLARE_PER_CPU(struct fpu_swa_msg, cpulast);
316static struct fpu_swa_msg last __cacheline_aligned;
317
318
310/* 319/*
311 * Handle floating-point assist faults and traps. 320 * Handle floating-point assist faults and traps.
312 */ 321 */
@@ -316,8 +325,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
316 long exception, bundle[2]; 325 long exception, bundle[2];
317 unsigned long fault_ip; 326 unsigned long fault_ip;
318 struct siginfo siginfo; 327 struct siginfo siginfo;
319 static int fpu_swa_count = 0;
320 static unsigned long last_time;
321 328
322 fault_ip = regs->cr_iip; 329 fault_ip = regs->cr_iip;
323 if (!fp_fault && (ia64_psr(regs)->ri == 0)) 330 if (!fp_fault && (ia64_psr(regs)->ri == 0))
@@ -325,14 +332,37 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
325 if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) 332 if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
326 return -1; 333 return -1;
327 334
328 if (jiffies - last_time > 5*HZ) 335 if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
329 fpu_swa_count = 0; 336 unsigned long count, current_jiffies = jiffies;
330 if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { 337 struct fpu_swa_msg *cp = &__get_cpu_var(cpulast);
331 last_time = jiffies; 338
332 ++fpu_swa_count; 339 if (unlikely(current_jiffies > cp->time))
333 printk(KERN_WARNING 340 cp->count = 0;
334 "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", 341 if (unlikely(cp->count < 5)) {
335 current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); 342 cp->count++;
343 cp->time = current_jiffies + 5 * HZ;
344
345 /* minimize races by grabbing a copy of count BEFORE checking last.time. */
346 count = last.count;
347 barrier();
348
349 /*
350 * Lower 4 bits are used as a count. Upper bits are a sequence
351 * number that is updated when count is reset. The cmpxchg will
352 * fail is seqno has changed. This minimizes mutiple cpus
353 * reseting the count.
354 */
355 if (current_jiffies > last.time)
356 (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
357
358 /* used fetchadd to atomically update the count */
359 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
360 last.time = current_jiffies + 5 * HZ;
361 printk(KERN_WARNING
362 "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
363 current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
364 }
365 }
336 } 366 }
337 367
338 exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr, 368 exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,