diff options
author | David Daney <ddaney@caviumnetworks.com> | 2009-11-05 14:34:26 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2009-12-16 20:57:08 -0500 |
commit | b6ee75ed4fa201873d3a2b32dfce2dbd701a2de4 (patch) | |
tree | 4574e5e523e9773fb1c8e4e579dc8f3be133daa6 | |
parent | 32028f1f7bce32e72183129dc55fc23656e7081c (diff) |
MIPS: Collect FPU emulator statistics per-CPU.
On SMP systems, the collection of statistics can cause cache line
bouncing in the lines associated with the counters. Also there are
races incrementing the counters on multiple CPUs.
To fix both problems, we collect the statistics in per-CPU variables,
and add them up in the debugfs read operation.
As a test I ran the LTP float_bessel test on a 12 CPU Octeon system.
Without CONFIG_DEBUG_FS : 2602 seconds.
With CONFIG_DEBUG_FS: 2640 seconds.
With non-cpu-local atomic statistics: 14569 seconds.
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/include/asm/fpu_emulator.h | 24 | ||||
-rw-r--r-- | arch/mips/math-emu/cp1emu.c | 102 | ||||
-rw-r--r-- | arch/mips/math-emu/dsemul.c | 4 |
3 files changed, 80 insertions, 50 deletions
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index e5189572956c..aecada6f6117 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h | |||
@@ -25,17 +25,27 @@ | |||
25 | 25 | ||
26 | #include <asm/break.h> | 26 | #include <asm/break.h> |
27 | #include <asm/inst.h> | 27 | #include <asm/inst.h> |
28 | #include <asm/local.h> | ||
29 | |||
30 | #ifdef CONFIG_DEBUG_FS | ||
28 | 31 | ||
29 | struct mips_fpu_emulator_stats { | 32 | struct mips_fpu_emulator_stats { |
30 | unsigned int emulated; | 33 | local_t emulated; |
31 | unsigned int loads; | 34 | local_t loads; |
32 | unsigned int stores; | 35 | local_t stores; |
33 | unsigned int cp1ops; | 36 | local_t cp1ops; |
34 | unsigned int cp1xops; | 37 | local_t cp1xops; |
35 | unsigned int errors; | 38 | local_t errors; |
36 | }; | 39 | }; |
37 | 40 | ||
38 | extern struct mips_fpu_emulator_stats fpuemustats; | 41 | DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); |
42 | |||
43 | #define MIPS_FPU_EMU_INC_STATS(M) \ | ||
44 | cpu_local_wrap(__local_inc(&__get_cpu_var(fpuemustats).M)) | ||
45 | |||
46 | #else | ||
47 | #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0) | ||
48 | #endif /* CONFIG_DEBUG_FS */ | ||
39 | 49 | ||
40 | extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, | 50 | extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, |
41 | unsigned long cpc); | 51 | unsigned long cpc); |
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 454b53924490..8f2f8e9d8b21 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c | |||
@@ -35,6 +35,7 @@ | |||
35 | * better performance by compiling with -msoft-float! | 35 | * better performance by compiling with -msoft-float! |
36 | */ | 36 | */ |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/module.h> | ||
38 | #include <linux/debugfs.h> | 39 | #include <linux/debugfs.h> |
39 | 40 | ||
40 | #include <asm/inst.h> | 41 | #include <asm/inst.h> |
@@ -68,7 +69,9 @@ static int fpux_emu(struct pt_regs *, | |||
68 | 69 | ||
69 | /* Further private data for which no space exists in mips_fpu_struct */ | 70 | /* Further private data for which no space exists in mips_fpu_struct */ |
70 | 71 | ||
71 | struct mips_fpu_emulator_stats fpuemustats; | 72 | #ifdef CONFIG_DEBUG_FS |
73 | DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); | ||
74 | #endif | ||
72 | 75 | ||
73 | /* Control registers */ | 76 | /* Control registers */ |
74 | 77 | ||
@@ -209,7 +212,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
209 | unsigned int cond; | 212 | unsigned int cond; |
210 | 213 | ||
211 | if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { | 214 | if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { |
212 | fpuemustats.errors++; | 215 | MIPS_FPU_EMU_INC_STATS(errors); |
213 | return SIGBUS; | 216 | return SIGBUS; |
214 | } | 217 | } |
215 | 218 | ||
@@ -240,7 +243,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
240 | return SIGILL; | 243 | return SIGILL; |
241 | } | 244 | } |
242 | if (get_user(ir, (mips_instruction __user *) emulpc)) { | 245 | if (get_user(ir, (mips_instruction __user *) emulpc)) { |
243 | fpuemustats.errors++; | 246 | MIPS_FPU_EMU_INC_STATS(errors); |
244 | return SIGBUS; | 247 | return SIGBUS; |
245 | } | 248 | } |
246 | /* __compute_return_epc() will have updated cp0_epc */ | 249 | /* __compute_return_epc() will have updated cp0_epc */ |
@@ -253,16 +256,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
253 | } | 256 | } |
254 | 257 | ||
255 | emul: | 258 | emul: |
256 | fpuemustats.emulated++; | 259 | MIPS_FPU_EMU_INC_STATS(emulated); |
257 | switch (MIPSInst_OPCODE(ir)) { | 260 | switch (MIPSInst_OPCODE(ir)) { |
258 | case ldc1_op:{ | 261 | case ldc1_op:{ |
259 | u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + | 262 | u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + |
260 | MIPSInst_SIMM(ir)); | 263 | MIPSInst_SIMM(ir)); |
261 | u64 val; | 264 | u64 val; |
262 | 265 | ||
263 | fpuemustats.loads++; | 266 | MIPS_FPU_EMU_INC_STATS(loads); |
264 | if (get_user(val, va)) { | 267 | if (get_user(val, va)) { |
265 | fpuemustats.errors++; | 268 | MIPS_FPU_EMU_INC_STATS(errors); |
266 | return SIGBUS; | 269 | return SIGBUS; |
267 | } | 270 | } |
268 | DITOREG(val, MIPSInst_RT(ir)); | 271 | DITOREG(val, MIPSInst_RT(ir)); |
@@ -274,10 +277,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
274 | MIPSInst_SIMM(ir)); | 277 | MIPSInst_SIMM(ir)); |
275 | u64 val; | 278 | u64 val; |
276 | 279 | ||
277 | fpuemustats.stores++; | 280 | MIPS_FPU_EMU_INC_STATS(stores); |
278 | DIFROMREG(val, MIPSInst_RT(ir)); | 281 | DIFROMREG(val, MIPSInst_RT(ir)); |
279 | if (put_user(val, va)) { | 282 | if (put_user(val, va)) { |
280 | fpuemustats.errors++; | 283 | MIPS_FPU_EMU_INC_STATS(errors); |
281 | return SIGBUS; | 284 | return SIGBUS; |
282 | } | 285 | } |
283 | break; | 286 | break; |
@@ -288,9 +291,9 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
288 | MIPSInst_SIMM(ir)); | 291 | MIPSInst_SIMM(ir)); |
289 | u32 val; | 292 | u32 val; |
290 | 293 | ||
291 | fpuemustats.loads++; | 294 | MIPS_FPU_EMU_INC_STATS(loads); |
292 | if (get_user(val, va)) { | 295 | if (get_user(val, va)) { |
293 | fpuemustats.errors++; | 296 | MIPS_FPU_EMU_INC_STATS(errors); |
294 | return SIGBUS; | 297 | return SIGBUS; |
295 | } | 298 | } |
296 | SITOREG(val, MIPSInst_RT(ir)); | 299 | SITOREG(val, MIPSInst_RT(ir)); |
@@ -302,10 +305,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
302 | MIPSInst_SIMM(ir)); | 305 | MIPSInst_SIMM(ir)); |
303 | u32 val; | 306 | u32 val; |
304 | 307 | ||
305 | fpuemustats.stores++; | 308 | MIPS_FPU_EMU_INC_STATS(stores); |
306 | SIFROMREG(val, MIPSInst_RT(ir)); | 309 | SIFROMREG(val, MIPSInst_RT(ir)); |
307 | if (put_user(val, va)) { | 310 | if (put_user(val, va)) { |
308 | fpuemustats.errors++; | 311 | MIPS_FPU_EMU_INC_STATS(errors); |
309 | return SIGBUS; | 312 | return SIGBUS; |
310 | } | 313 | } |
311 | break; | 314 | break; |
@@ -429,7 +432,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
429 | 432 | ||
430 | if (get_user(ir, | 433 | if (get_user(ir, |
431 | (mips_instruction __user *) xcp->cp0_epc)) { | 434 | (mips_instruction __user *) xcp->cp0_epc)) { |
432 | fpuemustats.errors++; | 435 | MIPS_FPU_EMU_INC_STATS(errors); |
433 | return SIGBUS; | 436 | return SIGBUS; |
434 | } | 437 | } |
435 | 438 | ||
@@ -595,7 +598,7 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
595 | { | 598 | { |
596 | unsigned rcsr = 0; /* resulting csr */ | 599 | unsigned rcsr = 0; /* resulting csr */ |
597 | 600 | ||
598 | fpuemustats.cp1xops++; | 601 | MIPS_FPU_EMU_INC_STATS(cp1xops); |
599 | 602 | ||
600 | switch (MIPSInst_FMA_FFMT(ir)) { | 603 | switch (MIPSInst_FMA_FFMT(ir)) { |
601 | case s_fmt:{ /* 0 */ | 604 | case s_fmt:{ /* 0 */ |
@@ -610,9 +613,9 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
610 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + | 613 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + |
611 | xcp->regs[MIPSInst_FT(ir)]); | 614 | xcp->regs[MIPSInst_FT(ir)]); |
612 | 615 | ||
613 | fpuemustats.loads++; | 616 | MIPS_FPU_EMU_INC_STATS(loads); |
614 | if (get_user(val, va)) { | 617 | if (get_user(val, va)) { |
615 | fpuemustats.errors++; | 618 | MIPS_FPU_EMU_INC_STATS(errors); |
616 | return SIGBUS; | 619 | return SIGBUS; |
617 | } | 620 | } |
618 | SITOREG(val, MIPSInst_FD(ir)); | 621 | SITOREG(val, MIPSInst_FD(ir)); |
@@ -622,11 +625,11 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
622 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + | 625 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + |
623 | xcp->regs[MIPSInst_FT(ir)]); | 626 | xcp->regs[MIPSInst_FT(ir)]); |
624 | 627 | ||
625 | fpuemustats.stores++; | 628 | MIPS_FPU_EMU_INC_STATS(stores); |
626 | 629 | ||
627 | SIFROMREG(val, MIPSInst_FS(ir)); | 630 | SIFROMREG(val, MIPSInst_FS(ir)); |
628 | if (put_user(val, va)) { | 631 | if (put_user(val, va)) { |
629 | fpuemustats.errors++; | 632 | MIPS_FPU_EMU_INC_STATS(errors); |
630 | return SIGBUS; | 633 | return SIGBUS; |
631 | } | 634 | } |
632 | break; | 635 | break; |
@@ -687,9 +690,9 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
687 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + | 690 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + |
688 | xcp->regs[MIPSInst_FT(ir)]); | 691 | xcp->regs[MIPSInst_FT(ir)]); |
689 | 692 | ||
690 | fpuemustats.loads++; | 693 | MIPS_FPU_EMU_INC_STATS(loads); |
691 | if (get_user(val, va)) { | 694 | if (get_user(val, va)) { |
692 | fpuemustats.errors++; | 695 | MIPS_FPU_EMU_INC_STATS(errors); |
693 | return SIGBUS; | 696 | return SIGBUS; |
694 | } | 697 | } |
695 | DITOREG(val, MIPSInst_FD(ir)); | 698 | DITOREG(val, MIPSInst_FD(ir)); |
@@ -699,10 +702,10 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
699 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + | 702 | va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + |
700 | xcp->regs[MIPSInst_FT(ir)]); | 703 | xcp->regs[MIPSInst_FT(ir)]); |
701 | 704 | ||
702 | fpuemustats.stores++; | 705 | MIPS_FPU_EMU_INC_STATS(stores); |
703 | DIFROMREG(val, MIPSInst_FS(ir)); | 706 | DIFROMREG(val, MIPSInst_FS(ir)); |
704 | if (put_user(val, va)) { | 707 | if (put_user(val, va)) { |
705 | fpuemustats.errors++; | 708 | MIPS_FPU_EMU_INC_STATS(errors); |
706 | return SIGBUS; | 709 | return SIGBUS; |
707 | } | 710 | } |
708 | break; | 711 | break; |
@@ -769,7 +772,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
769 | #endif | 772 | #endif |
770 | } rv; /* resulting value */ | 773 | } rv; /* resulting value */ |
771 | 774 | ||
772 | fpuemustats.cp1ops++; | 775 | MIPS_FPU_EMU_INC_STATS(cp1ops); |
773 | switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { | 776 | switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { |
774 | case s_fmt:{ /* 0 */ | 777 | case s_fmt:{ /* 0 */ |
775 | union { | 778 | union { |
@@ -1240,7 +1243,7 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
1240 | prevepc = xcp->cp0_epc; | 1243 | prevepc = xcp->cp0_epc; |
1241 | 1244 | ||
1242 | if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { | 1245 | if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { |
1243 | fpuemustats.errors++; | 1246 | MIPS_FPU_EMU_INC_STATS(errors); |
1244 | return SIGBUS; | 1247 | return SIGBUS; |
1245 | } | 1248 | } |
1246 | if (insn == 0) | 1249 | if (insn == 0) |
@@ -1276,33 +1279,50 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, | |||
1276 | } | 1279 | } |
1277 | 1280 | ||
1278 | #ifdef CONFIG_DEBUG_FS | 1281 | #ifdef CONFIG_DEBUG_FS |
1282 | |||
1283 | static int fpuemu_stat_get(void *data, u64 *val) | ||
1284 | { | ||
1285 | int cpu; | ||
1286 | unsigned long sum = 0; | ||
1287 | for_each_online_cpu(cpu) { | ||
1288 | struct mips_fpu_emulator_stats *ps; | ||
1289 | local_t *pv; | ||
1290 | ps = &per_cpu(fpuemustats, cpu); | ||
1291 | pv = (void *)ps + (unsigned long)data; | ||
1292 | sum += local_read(pv); | ||
1293 | } | ||
1294 | *val = sum; | ||
1295 | return 0; | ||
1296 | } | ||
1297 | DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); | ||
1298 | |||
1279 | extern struct dentry *mips_debugfs_dir; | 1299 | extern struct dentry *mips_debugfs_dir; |
1280 | static int __init debugfs_fpuemu(void) | 1300 | static int __init debugfs_fpuemu(void) |
1281 | { | 1301 | { |
1282 | struct dentry *d, *dir; | 1302 | struct dentry *d, *dir; |
1283 | int i; | ||
1284 | static struct { | ||
1285 | const char *name; | ||
1286 | unsigned int *v; | ||
1287 | } vars[] __initdata = { | ||
1288 | { "emulated", &fpuemustats.emulated }, | ||
1289 | { "loads", &fpuemustats.loads }, | ||
1290 | { "stores", &fpuemustats.stores }, | ||
1291 | { "cp1ops", &fpuemustats.cp1ops }, | ||
1292 | { "cp1xops", &fpuemustats.cp1xops }, | ||
1293 | { "errors", &fpuemustats.errors }, | ||
1294 | }; | ||
1295 | 1303 | ||
1296 | if (!mips_debugfs_dir) | 1304 | if (!mips_debugfs_dir) |
1297 | return -ENODEV; | 1305 | return -ENODEV; |
1298 | dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); | 1306 | dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); |
1299 | if (!dir) | 1307 | if (!dir) |
1300 | return -ENOMEM; | 1308 | return -ENOMEM; |
1301 | for (i = 0; i < ARRAY_SIZE(vars); i++) { | 1309 | |
1302 | d = debugfs_create_u32(vars[i].name, S_IRUGO, dir, vars[i].v); | 1310 | #define FPU_STAT_CREATE(M) \ |
1303 | if (!d) | 1311 | do { \ |
1304 | return -ENOMEM; | 1312 | d = debugfs_create_file(#M , S_IRUGO, dir, \ |
1305 | } | 1313 | (void *)offsetof(struct mips_fpu_emulator_stats, M), \ |
1314 | &fops_fpuemu_stat); \ | ||
1315 | if (!d) \ | ||
1316 | return -ENOMEM; \ | ||
1317 | } while (0) | ||
1318 | |||
1319 | FPU_STAT_CREATE(emulated); | ||
1320 | FPU_STAT_CREATE(loads); | ||
1321 | FPU_STAT_CREATE(stores); | ||
1322 | FPU_STAT_CREATE(cp1ops); | ||
1323 | FPU_STAT_CREATE(cp1xops); | ||
1324 | FPU_STAT_CREATE(errors); | ||
1325 | |||
1306 | return 0; | 1326 | return 0; |
1307 | } | 1327 | } |
1308 | __initcall(debugfs_fpuemu); | 1328 | __initcall(debugfs_fpuemu); |
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index df7b9d928efc..36d975ae08f8 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c | |||
@@ -98,7 +98,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) | |||
98 | err |= __put_user(cpc, &fr->epc); | 98 | err |= __put_user(cpc, &fr->epc); |
99 | 99 | ||
100 | if (unlikely(err)) { | 100 | if (unlikely(err)) { |
101 | fpuemustats.errors++; | 101 | MIPS_FPU_EMU_INC_STATS(errors); |
102 | return SIGBUS; | 102 | return SIGBUS; |
103 | } | 103 | } |
104 | 104 | ||
@@ -136,7 +136,7 @@ int do_dsemulret(struct pt_regs *xcp) | |||
136 | err |= __get_user(cookie, &fr->cookie); | 136 | err |= __get_user(cookie, &fr->cookie); |
137 | 137 | ||
138 | if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { | 138 | if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { |
139 | fpuemustats.errors++; | 139 | MIPS_FPU_EMU_INC_STATS(errors); |
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||