aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2009-10-26 14:50:29 -0400
committerPaul Mackerras <paulus@samba.org>2009-10-28 01:13:04 -0400
commitc8cd093a6e9f96ea6b871576fd4e46d7c818bb89 (patch)
tree2bad2c3a2cc68a35fb93d986a49bf543efcd0156 /arch/powerpc
parent6795b85c6a4f690e61e7be31aa150d945c723fb5 (diff)
powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline assembly HCALL_STATS code and converts it to use the new tracepoints. To keep the disabled case as quick as possible, we embed a status word in the TOC so we can get at it with a single load. By doing so we keep the overhead at a minimum. Time taken for a null hcall: No tracepoint code: 135.79 cycles Disabled tracepoints: 137.95 cycles For reference, before this patch enabling HCALL_STATS resulted in a null hcall of 201.44 cycles! Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig.debug2
-rw-r--r--arch/powerpc/include/asm/hvcall.h2
-rw-r--r--arch/powerpc/include/asm/trace.h45
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S101
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c37
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c32
6 files changed, 175 insertions, 44 deletions
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3b1005185390..bf3382f1904d 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
46 46
47config HCALL_STATS 47config HCALL_STATS
48 bool "Hypervisor call instrumentation" 48 bool "Hypervisor call instrumentation"
49 depends on PPC_PSERIES && DEBUG_FS 49 depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
50 help 50 help
51 Adds code to keep track of the number of hypervisor calls made and 51 Adds code to keep track of the number of hypervisor calls made and
52 the amount of time spent in hypervisor calls. Wall time spent in 52 the amount of time spent in hypervisor calls. Wall time spent in
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 6251a4b10be7..c27caac47ad1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -274,6 +274,8 @@ struct hcall_stats {
274 unsigned long num_calls; /* number of calls (on this CPU) */ 274 unsigned long num_calls; /* number of calls (on this CPU) */
275 unsigned long tb_total; /* total wall time (mftb) of calls. */ 275 unsigned long tb_total; /* total wall time (mftb) of calls. */
276 unsigned long purr_total; /* total cpu time (PURR) of calls. */ 276 unsigned long purr_total; /* total cpu time (PURR) of calls. */
277 unsigned long tb_start;
278 unsigned long purr_start;
277}; 279};
278#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) 280#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
279 281
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index b558c31d409e..9b01c0e43b55 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit,
76 TP_printk("pt_regs=%p", __entry->regs) 76 TP_printk("pt_regs=%p", __entry->regs)
77); 77);
78 78
79#ifdef CONFIG_PPC_PSERIES
80extern void hcall_tracepoint_regfunc(void);
81extern void hcall_tracepoint_unregfunc(void);
82
83TRACE_EVENT_FN(hcall_entry,
84
85 TP_PROTO(unsigned long opcode),
86
87 TP_ARGS(opcode),
88
89 TP_STRUCT__entry(
90 __field(unsigned long, opcode)
91 ),
92
93 TP_fast_assign(
94 __entry->opcode = opcode;
95 ),
96
97 TP_printk("opcode=%lu", __entry->opcode),
98
99 hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
100);
101
102TRACE_EVENT_FN(hcall_exit,
103
104 TP_PROTO(unsigned long opcode, unsigned long retval),
105
106 TP_ARGS(opcode, retval),
107
108 TP_STRUCT__entry(
109 __field(unsigned long, opcode)
110 __field(unsigned long, retval)
111 ),
112
113 TP_fast_assign(
114 __entry->opcode = opcode;
115 __entry->retval = retval;
116 ),
117
118 TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
119
120 hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
121);
122#endif
123
79#endif /* _TRACE_POWERPC_H */ 124#endif /* _TRACE_POWERPC_H */
80 125
81#undef TRACE_INCLUDE_PATH 126#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..01e95ab18d35 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,20 +14,54 @@
14 14
15#define STK_PARM(i) (48 + ((i)-3)*8) 15#define STK_PARM(i) (48 + ((i)-3)*8)
16 16
17#ifdef CONFIG_HCALL_STATS 17#ifdef CONFIG_TRACEPOINTS
18
19 .section ".toc","aw"
20
21 .globl hcall_tracepoint_refcount
22hcall_tracepoint_refcount:
23 .llong 0
24
25 .section ".text"
26
18/* 27/*
19 * precall must preserve all registers. use unused STK_PARM() 28 * precall must preserve all registers. use unused STK_PARM()
20 * areas to save snapshots and opcode. 29 * areas to save snapshots and opcode. We branch around this
30 * in early init (eg when populating the MMU hashtable) by using an
31 * unconditional cpu feature.
21 */ 32 */
22#define HCALL_INST_PRECALL \ 33#define HCALL_INST_PRECALL \
23 std r3,STK_PARM(r3)(r1); /* save opcode */ \
24 mftb r0; /* get timebase and */ \
25 std r0,STK_PARM(r5)(r1); /* save for later */ \
26BEGIN_FTR_SECTION; \ 34BEGIN_FTR_SECTION; \
27 mfspr r0,SPRN_PURR; /* get PURR and */ \ 35 b 1f; \
28 std r0,STK_PARM(r6)(r1); /* save for later */ \ 36END_FTR_SECTION(0, 1); \
29END_FTR_SECTION_IFSET(CPU_FTR_PURR); 37 ld r12,hcall_tracepoint_refcount@toc(r2); \
30 38 cmpdi r12,0; \
39 beq+ 1f; \
40 mflr r0; \
41 std r3,STK_PARM(r3)(r1); \
42 std r4,STK_PARM(r4)(r1); \
43 std r5,STK_PARM(r5)(r1); \
44 std r6,STK_PARM(r6)(r1); \
45 std r7,STK_PARM(r7)(r1); \
46 std r8,STK_PARM(r8)(r1); \
47 std r9,STK_PARM(r9)(r1); \
48 std r10,STK_PARM(r10)(r1); \
49 std r0,16(r1); \
50 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51 bl .__trace_hcall_entry; \
52 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 ld r0,16(r1); \
54 ld r3,STK_PARM(r3)(r1); \
55 ld r4,STK_PARM(r4)(r1); \
56 ld r5,STK_PARM(r5)(r1); \
57 ld r6,STK_PARM(r6)(r1); \
58 ld r7,STK_PARM(r7)(r1); \
59 ld r8,STK_PARM(r8)(r1); \
60 ld r9,STK_PARM(r9)(r1); \
61 ld r10,STK_PARM(r10)(r1); \
62 mtlr r0; \
631:
64
31/* 65/*
32 * postcall is performed immediately before function return which 66 * postcall is performed immediately before function return which
33 * allows liberal use of volatile registers. We branch around this 67 * allows liberal use of volatile registers. We branch around this
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
38BEGIN_FTR_SECTION; \ 72BEGIN_FTR_SECTION; \
39 b 1f; \ 73 b 1f; \
40END_FTR_SECTION(0, 1); \ 74END_FTR_SECTION(0, 1); \
41 ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ 75 ld r12,hcall_tracepoint_refcount@toc(r2); \
42 cmpldi cr7,r4,MAX_HCALL_OPCODE; \ 76 cmpdi r12,0; \
43 bgt- cr7,1f; \ 77 beq+ 1f; \
44 \ 78 mflr r0; \
45 /* get time and PURR snapshots after hcall */ \ 79 ld r6,STK_PARM(r3)(r1); \
46 mftb r7; /* timebase after */ \ 80 std r3,STK_PARM(r3)(r1); \
47BEGIN_FTR_SECTION; \ 81 mr r4,r3; \
48 mfspr r8,SPRN_PURR; /* PURR after */ \ 82 mr r3,r6; \
49 ld r6,STK_PARM(r6)(r1); /* PURR before */ \ 83 std r0,16(r1); \
50 subf r6,r6,r8; /* delta */ \ 84 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 85 bl .__trace_hcall_exit; \
52 ld r5,STK_PARM(r5)(r1); /* timebase before */ \ 86 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 subf r5,r5,r7; /* time delta */ \ 87 ld r0,16(r1); \
54 \ 88 ld r3,STK_PARM(r3)(r1); \
55 /* calculate address of stat structure r4 = opcode */ \ 89 mtlr r0; \
56 srdi r4,r4,2; /* index into array */ \
57 mulli r4,r4,HCALL_STAT_SIZE; \
58 LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
59 add r4,r4,r7; \
60 ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
61 add r4,r4,r7; \
62 \
63 /* update stats */ \
64 ld r7,HCALL_STAT_CALLS(r4); /* count */ \
65 addi r7,r7,1; \
66 std r7,HCALL_STAT_CALLS(r4); \
67 ld r7,HCALL_STAT_TB(r4); /* timebase */ \
68 add r7,r7,r5; \
69 std r7,HCALL_STAT_TB(r4); \
70BEGIN_FTR_SECTION; \
71 ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
72 add r7,r7,r6; \
73 std r7,HCALL_STAT_PURR(r4); \
74END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
751: 901:
76#else 91#else
77#define HCALL_INST_PRECALL 92#define HCALL_INST_PRECALL
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f277eb..e44e1035f133 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/firmware.h> 27#include <asm/firmware.h>
28#include <asm/cputable.h> 28#include <asm/cputable.h>
29#include <asm/trace.h>
29 30
30DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); 31DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
31 32
@@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = {
100#define HCALL_ROOT_DIR "hcall_inst" 101#define HCALL_ROOT_DIR "hcall_inst"
101#define CPU_NAME_BUF_SIZE 32 102#define CPU_NAME_BUF_SIZE 32
102 103
104
105static void probe_hcall_entry(unsigned long opcode)
106{
107 struct hcall_stats *h;
108
109 if (opcode > MAX_HCALL_OPCODE)
110 return;
111
112 h = &get_cpu_var(hcall_stats)[opcode / 4];
113 h->tb_start = mftb();
114 h->purr_start = mfspr(SPRN_PURR);
115}
116
117static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
118{
119 struct hcall_stats *h;
120
121 if (opcode > MAX_HCALL_OPCODE)
122 return;
123
124 h = &__get_cpu_var(hcall_stats)[opcode / 4];
125 h->num_calls++;
126 h->tb_total = mftb() - h->tb_start;
127 h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
128
129 put_cpu_var(hcall_stats);
130}
131
103static int __init hcall_inst_init(void) 132static int __init hcall_inst_init(void)
104{ 133{
105 struct dentry *hcall_root; 134 struct dentry *hcall_root;
@@ -110,6 +139,14 @@ static int __init hcall_inst_init(void)
110 if (!firmware_has_feature(FW_FEATURE_LPAR)) 139 if (!firmware_has_feature(FW_FEATURE_LPAR))
111 return 0; 140 return 0;
112 141
142 if (register_trace_hcall_entry(probe_hcall_entry))
143 return -EINVAL;
144
145 if (register_trace_hcall_exit(probe_hcall_exit)) {
146 unregister_trace_hcall_entry(probe_hcall_entry);
147 return -EINVAL;
148 }
149
113 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); 150 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
114 if (!hcall_root) 151 if (!hcall_root)
115 return -ENOMEM; 152 return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 903eb9eec687..4b7b6e8e32de 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
39#include <asm/cputable.h> 39#include <asm/cputable.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/smp.h> 41#include <asm/smp.h>
42#include <asm/trace.h>
42 43
43#include "plpar_wrappers.h" 44#include "plpar_wrappers.h"
44#include "pseries.h" 45#include "pseries.h"
@@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order)
661EXPORT_SYMBOL(arch_free_page); 662EXPORT_SYMBOL(arch_free_page);
662 663
663#endif 664#endif
665
666#ifdef CONFIG_TRACEPOINTS
667/*
668 * We optimise our hcall path by placing hcall_tracepoint_refcount
669 * directly in the TOC so we can check if the hcall tracepoints are
670 * enabled via a single load.
671 */
672
673/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
674extern long hcall_tracepoint_refcount;
675
676void hcall_tracepoint_regfunc(void)
677{
678 hcall_tracepoint_refcount++;
679}
680
681void hcall_tracepoint_unregfunc(void)
682{
683 hcall_tracepoint_refcount--;
684}
685
686void __trace_hcall_entry(unsigned long opcode)
687{
688 trace_hcall_entry(opcode);
689}
690
691void __trace_hcall_exit(long opcode, unsigned long retval)
692{
693 trace_hcall_exit(opcode, retval);
694}
695#endif