aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Daney <ddaney@caviumnetworks.com>2009-11-05 14:34:26 -0500
committerRalf Baechle <ralf@linux-mips.org>2009-12-16 20:57:08 -0500
commitb6ee75ed4fa201873d3a2b32dfce2dbd701a2de4 (patch)
tree4574e5e523e9773fb1c8e4e579dc8f3be133daa6
parent32028f1f7bce32e72183129dc55fc23656e7081c (diff)
MIPS: Collect FPU emulator statistics per-CPU.
On SMP systems, the collection of statistics can cause cache line bouncing in the lines associated with the counters. Also there are races incrementing the counters on multiple CPUs. To fix both problems, we collect the statistics in per-CPU variables, and add them up in the debugfs read operation. As a test I ran the LTP float_bessel test on a 12 CPU Octeon system. Without CONFIG_DEBUG_FS : 2602 seconds. With CONFIG_DEBUG_FS: 2640 seconds. With non-cpu-local atomic statistics: 14569 seconds. Signed-off-by: David Daney <ddaney@caviumnetworks.com> Cc: linux-mips@linux-mips.org Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/include/asm/fpu_emulator.h24
-rw-r--r--arch/mips/math-emu/cp1emu.c102
-rw-r--r--arch/mips/math-emu/dsemul.c4
3 files changed, 80 insertions, 50 deletions
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index e5189572956c..aecada6f6117 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -25,17 +25,27 @@
25 25
26#include <asm/break.h> 26#include <asm/break.h>
27#include <asm/inst.h> 27#include <asm/inst.h>
28#include <asm/local.h>
29
30#ifdef CONFIG_DEBUG_FS
28 31
29struct mips_fpu_emulator_stats { 32struct mips_fpu_emulator_stats {
30 unsigned int emulated; 33 local_t emulated;
31 unsigned int loads; 34 local_t loads;
32 unsigned int stores; 35 local_t stores;
33 unsigned int cp1ops; 36 local_t cp1ops;
34 unsigned int cp1xops; 37 local_t cp1xops;
35 unsigned int errors; 38 local_t errors;
36}; 39};
37 40
38extern struct mips_fpu_emulator_stats fpuemustats; 41DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
42
43#define MIPS_FPU_EMU_INC_STATS(M) \
44 cpu_local_wrap(__local_inc(&__get_cpu_var(fpuemustats).M))
45
46#else
47#define MIPS_FPU_EMU_INC_STATS(M) do { } while (0)
48#endif /* CONFIG_DEBUG_FS */
39 49
40extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, 50extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
41 unsigned long cpc); 51 unsigned long cpc);
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 454b53924490..8f2f8e9d8b21 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -35,6 +35,7 @@
35 * better performance by compiling with -msoft-float! 35 * better performance by compiling with -msoft-float!
36 */ 36 */
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/module.h>
38#include <linux/debugfs.h> 39#include <linux/debugfs.h>
39 40
40#include <asm/inst.h> 41#include <asm/inst.h>
@@ -68,7 +69,9 @@ static int fpux_emu(struct pt_regs *,
68 69
69/* Further private data for which no space exists in mips_fpu_struct */ 70/* Further private data for which no space exists in mips_fpu_struct */
70 71
71struct mips_fpu_emulator_stats fpuemustats; 72#ifdef CONFIG_DEBUG_FS
73DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
74#endif
72 75
73/* Control registers */ 76/* Control registers */
74 77
@@ -209,7 +212,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
209 unsigned int cond; 212 unsigned int cond;
210 213
211 if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { 214 if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) {
212 fpuemustats.errors++; 215 MIPS_FPU_EMU_INC_STATS(errors);
213 return SIGBUS; 216 return SIGBUS;
214 } 217 }
215 218
@@ -240,7 +243,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
240 return SIGILL; 243 return SIGILL;
241 } 244 }
242 if (get_user(ir, (mips_instruction __user *) emulpc)) { 245 if (get_user(ir, (mips_instruction __user *) emulpc)) {
243 fpuemustats.errors++; 246 MIPS_FPU_EMU_INC_STATS(errors);
244 return SIGBUS; 247 return SIGBUS;
245 } 248 }
246 /* __compute_return_epc() will have updated cp0_epc */ 249 /* __compute_return_epc() will have updated cp0_epc */
@@ -253,16 +256,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
253 } 256 }
254 257
255 emul: 258 emul:
256 fpuemustats.emulated++; 259 MIPS_FPU_EMU_INC_STATS(emulated);
257 switch (MIPSInst_OPCODE(ir)) { 260 switch (MIPSInst_OPCODE(ir)) {
258 case ldc1_op:{ 261 case ldc1_op:{
259 u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + 262 u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
260 MIPSInst_SIMM(ir)); 263 MIPSInst_SIMM(ir));
261 u64 val; 264 u64 val;
262 265
263 fpuemustats.loads++; 266 MIPS_FPU_EMU_INC_STATS(loads);
264 if (get_user(val, va)) { 267 if (get_user(val, va)) {
265 fpuemustats.errors++; 268 MIPS_FPU_EMU_INC_STATS(errors);
266 return SIGBUS; 269 return SIGBUS;
267 } 270 }
268 DITOREG(val, MIPSInst_RT(ir)); 271 DITOREG(val, MIPSInst_RT(ir));
@@ -274,10 +277,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
274 MIPSInst_SIMM(ir)); 277 MIPSInst_SIMM(ir));
275 u64 val; 278 u64 val;
276 279
277 fpuemustats.stores++; 280 MIPS_FPU_EMU_INC_STATS(stores);
278 DIFROMREG(val, MIPSInst_RT(ir)); 281 DIFROMREG(val, MIPSInst_RT(ir));
279 if (put_user(val, va)) { 282 if (put_user(val, va)) {
280 fpuemustats.errors++; 283 MIPS_FPU_EMU_INC_STATS(errors);
281 return SIGBUS; 284 return SIGBUS;
282 } 285 }
283 break; 286 break;
@@ -288,9 +291,9 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
288 MIPSInst_SIMM(ir)); 291 MIPSInst_SIMM(ir));
289 u32 val; 292 u32 val;
290 293
291 fpuemustats.loads++; 294 MIPS_FPU_EMU_INC_STATS(loads);
292 if (get_user(val, va)) { 295 if (get_user(val, va)) {
293 fpuemustats.errors++; 296 MIPS_FPU_EMU_INC_STATS(errors);
294 return SIGBUS; 297 return SIGBUS;
295 } 298 }
296 SITOREG(val, MIPSInst_RT(ir)); 299 SITOREG(val, MIPSInst_RT(ir));
@@ -302,10 +305,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
302 MIPSInst_SIMM(ir)); 305 MIPSInst_SIMM(ir));
303 u32 val; 306 u32 val;
304 307
305 fpuemustats.stores++; 308 MIPS_FPU_EMU_INC_STATS(stores);
306 SIFROMREG(val, MIPSInst_RT(ir)); 309 SIFROMREG(val, MIPSInst_RT(ir));
307 if (put_user(val, va)) { 310 if (put_user(val, va)) {
308 fpuemustats.errors++; 311 MIPS_FPU_EMU_INC_STATS(errors);
309 return SIGBUS; 312 return SIGBUS;
310 } 313 }
311 break; 314 break;
@@ -429,7 +432,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
429 432
430 if (get_user(ir, 433 if (get_user(ir,
431 (mips_instruction __user *) xcp->cp0_epc)) { 434 (mips_instruction __user *) xcp->cp0_epc)) {
432 fpuemustats.errors++; 435 MIPS_FPU_EMU_INC_STATS(errors);
433 return SIGBUS; 436 return SIGBUS;
434 } 437 }
435 438
@@ -595,7 +598,7 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
595{ 598{
596 unsigned rcsr = 0; /* resulting csr */ 599 unsigned rcsr = 0; /* resulting csr */
597 600
598 fpuemustats.cp1xops++; 601 MIPS_FPU_EMU_INC_STATS(cp1xops);
599 602
600 switch (MIPSInst_FMA_FFMT(ir)) { 603 switch (MIPSInst_FMA_FFMT(ir)) {
601 case s_fmt:{ /* 0 */ 604 case s_fmt:{ /* 0 */
@@ -610,9 +613,9 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
610 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 613 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
611 xcp->regs[MIPSInst_FT(ir)]); 614 xcp->regs[MIPSInst_FT(ir)]);
612 615
613 fpuemustats.loads++; 616 MIPS_FPU_EMU_INC_STATS(loads);
614 if (get_user(val, va)) { 617 if (get_user(val, va)) {
615 fpuemustats.errors++; 618 MIPS_FPU_EMU_INC_STATS(errors);
616 return SIGBUS; 619 return SIGBUS;
617 } 620 }
618 SITOREG(val, MIPSInst_FD(ir)); 621 SITOREG(val, MIPSInst_FD(ir));
@@ -622,11 +625,11 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
622 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 625 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
623 xcp->regs[MIPSInst_FT(ir)]); 626 xcp->regs[MIPSInst_FT(ir)]);
624 627
625 fpuemustats.stores++; 628 MIPS_FPU_EMU_INC_STATS(stores);
626 629
627 SIFROMREG(val, MIPSInst_FS(ir)); 630 SIFROMREG(val, MIPSInst_FS(ir));
628 if (put_user(val, va)) { 631 if (put_user(val, va)) {
629 fpuemustats.errors++; 632 MIPS_FPU_EMU_INC_STATS(errors);
630 return SIGBUS; 633 return SIGBUS;
631 } 634 }
632 break; 635 break;
@@ -687,9 +690,9 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
687 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 690 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
688 xcp->regs[MIPSInst_FT(ir)]); 691 xcp->regs[MIPSInst_FT(ir)]);
689 692
690 fpuemustats.loads++; 693 MIPS_FPU_EMU_INC_STATS(loads);
691 if (get_user(val, va)) { 694 if (get_user(val, va)) {
692 fpuemustats.errors++; 695 MIPS_FPU_EMU_INC_STATS(errors);
693 return SIGBUS; 696 return SIGBUS;
694 } 697 }
695 DITOREG(val, MIPSInst_FD(ir)); 698 DITOREG(val, MIPSInst_FD(ir));
@@ -699,10 +702,10 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
699 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 702 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
700 xcp->regs[MIPSInst_FT(ir)]); 703 xcp->regs[MIPSInst_FT(ir)]);
701 704
702 fpuemustats.stores++; 705 MIPS_FPU_EMU_INC_STATS(stores);
703 DIFROMREG(val, MIPSInst_FS(ir)); 706 DIFROMREG(val, MIPSInst_FS(ir));
704 if (put_user(val, va)) { 707 if (put_user(val, va)) {
705 fpuemustats.errors++; 708 MIPS_FPU_EMU_INC_STATS(errors);
706 return SIGBUS; 709 return SIGBUS;
707 } 710 }
708 break; 711 break;
@@ -769,7 +772,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
769#endif 772#endif
770 } rv; /* resulting value */ 773 } rv; /* resulting value */
771 774
772 fpuemustats.cp1ops++; 775 MIPS_FPU_EMU_INC_STATS(cp1ops);
773 switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { 776 switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) {
774 case s_fmt:{ /* 0 */ 777 case s_fmt:{ /* 0 */
775 union { 778 union {
@@ -1240,7 +1243,7 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
1240 prevepc = xcp->cp0_epc; 1243 prevepc = xcp->cp0_epc;
1241 1244
1242 if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { 1245 if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) {
1243 fpuemustats.errors++; 1246 MIPS_FPU_EMU_INC_STATS(errors);
1244 return SIGBUS; 1247 return SIGBUS;
1245 } 1248 }
1246 if (insn == 0) 1249 if (insn == 0)
@@ -1276,33 +1279,50 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
1276} 1279}
1277 1280
1278#ifdef CONFIG_DEBUG_FS 1281#ifdef CONFIG_DEBUG_FS
1282
1283static int fpuemu_stat_get(void *data, u64 *val)
1284{
1285 int cpu;
1286 unsigned long sum = 0;
1287 for_each_online_cpu(cpu) {
1288 struct mips_fpu_emulator_stats *ps;
1289 local_t *pv;
1290 ps = &per_cpu(fpuemustats, cpu);
1291 pv = (void *)ps + (unsigned long)data;
1292 sum += local_read(pv);
1293 }
1294 *val = sum;
1295 return 0;
1296}
1297DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
1298
1279extern struct dentry *mips_debugfs_dir; 1299extern struct dentry *mips_debugfs_dir;
1280static int __init debugfs_fpuemu(void) 1300static int __init debugfs_fpuemu(void)
1281{ 1301{
1282 struct dentry *d, *dir; 1302 struct dentry *d, *dir;
1283 int i;
1284 static struct {
1285 const char *name;
1286 unsigned int *v;
1287 } vars[] __initdata = {
1288 { "emulated", &fpuemustats.emulated },
1289 { "loads", &fpuemustats.loads },
1290 { "stores", &fpuemustats.stores },
1291 { "cp1ops", &fpuemustats.cp1ops },
1292 { "cp1xops", &fpuemustats.cp1xops },
1293 { "errors", &fpuemustats.errors },
1294 };
1295 1303
1296 if (!mips_debugfs_dir) 1304 if (!mips_debugfs_dir)
1297 return -ENODEV; 1305 return -ENODEV;
1298 dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); 1306 dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
1299 if (!dir) 1307 if (!dir)
1300 return -ENOMEM; 1308 return -ENOMEM;
1301 for (i = 0; i < ARRAY_SIZE(vars); i++) { 1309
1302 d = debugfs_create_u32(vars[i].name, S_IRUGO, dir, vars[i].v); 1310#define FPU_STAT_CREATE(M) \
1303 if (!d) 1311 do { \
1304 return -ENOMEM; 1312 d = debugfs_create_file(#M , S_IRUGO, dir, \
1305 } 1313 (void *)offsetof(struct mips_fpu_emulator_stats, M), \
1314 &fops_fpuemu_stat); \
1315 if (!d) \
1316 return -ENOMEM; \
1317 } while (0)
1318
1319 FPU_STAT_CREATE(emulated);
1320 FPU_STAT_CREATE(loads);
1321 FPU_STAT_CREATE(stores);
1322 FPU_STAT_CREATE(cp1ops);
1323 FPU_STAT_CREATE(cp1xops);
1324 FPU_STAT_CREATE(errors);
1325
1306 return 0; 1326 return 0;
1307} 1327}
1308__initcall(debugfs_fpuemu); 1328__initcall(debugfs_fpuemu);
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index df7b9d928efc..36d975ae08f8 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -98,7 +98,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
98 err |= __put_user(cpc, &fr->epc); 98 err |= __put_user(cpc, &fr->epc);
99 99
100 if (unlikely(err)) { 100 if (unlikely(err)) {
101 fpuemustats.errors++; 101 MIPS_FPU_EMU_INC_STATS(errors);
102 return SIGBUS; 102 return SIGBUS;
103 } 103 }
104 104
@@ -136,7 +136,7 @@ int do_dsemulret(struct pt_regs *xcp)
136 err |= __get_user(cookie, &fr->cookie); 136 err |= __get_user(cookie, &fr->cookie);
137 137
138 if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { 138 if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
139 fpuemustats.errors++; 139 MIPS_FPU_EMU_INC_STATS(errors);
140 return 0; 140 return 0;
141 } 141 }
142 142