diff options
author | Anton Blanchard <anton@samba.org> | 2009-10-26 14:50:29 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2009-10-28 01:13:04 -0400 |
commit | c8cd093a6e9f96ea6b871576fd4e46d7c818bb89 (patch) | |
tree | 2bad2c3a2cc68a35fb93d986a49bf543efcd0156 | |
parent | 6795b85c6a4f690e61e7be31aa150d945c723fb5 (diff) |
powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline
assembly HCALL_STATS code and converts it to use the new tracepoints.
To keep the disabled case as quick as possible, we embed a status word
in the TOC so we can get at it with a single load. By doing so we
keep the overhead at a minimum. Time taken for a null hcall:
No tracepoint code: 135.79 cycles
Disabled tracepoints: 137.95 cycles
For reference, before this patch enabling HCALL_STATS resulted in a null
hcall of 201.44 cycles!
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/powerpc/Kconfig.debug | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/hvcall.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/trace.h | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hvCall.S | 101 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hvCall_inst.c | 37 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 32 |
6 files changed, 175 insertions, 44 deletions
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b1005185390..bf3382f1904d 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug | |||
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE | |||
46 | 46 | ||
47 | config HCALL_STATS | 47 | config HCALL_STATS |
48 | bool "Hypervisor call instrumentation" | 48 | bool "Hypervisor call instrumentation" |
49 | depends on PPC_PSERIES && DEBUG_FS | 49 | depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS |
50 | help | 50 | help |
51 | Adds code to keep track of the number of hypervisor calls made and | 51 | Adds code to keep track of the number of hypervisor calls made and |
52 | the amount of time spent in hypervisor calls. Wall time spent in | 52 | the amount of time spent in hypervisor calls. Wall time spent in |
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b10be7..c27caac47ad1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h | |||
@@ -274,6 +274,8 @@ struct hcall_stats { | |||
274 | unsigned long num_calls; /* number of calls (on this CPU) */ | 274 | unsigned long num_calls; /* number of calls (on this CPU) */ |
275 | unsigned long tb_total; /* total wall time (mftb) of calls. */ | 275 | unsigned long tb_total; /* total wall time (mftb) of calls. */ |
276 | unsigned long purr_total; /* total cpu time (PURR) of calls. */ | 276 | unsigned long purr_total; /* total cpu time (PURR) of calls. */ |
277 | unsigned long tb_start; | ||
278 | unsigned long purr_start; | ||
277 | }; | 279 | }; |
278 | #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) | 280 | #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) |
279 | 281 | ||
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index b558c31d409e..9b01c0e43b55 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h | |||
@@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit, | |||
76 | TP_printk("pt_regs=%p", __entry->regs) | 76 | TP_printk("pt_regs=%p", __entry->regs) |
77 | ); | 77 | ); |
78 | 78 | ||
79 | #ifdef CONFIG_PPC_PSERIES | ||
80 | extern void hcall_tracepoint_regfunc(void); | ||
81 | extern void hcall_tracepoint_unregfunc(void); | ||
82 | |||
83 | TRACE_EVENT_FN(hcall_entry, | ||
84 | |||
85 | TP_PROTO(unsigned long opcode), | ||
86 | |||
87 | TP_ARGS(opcode), | ||
88 | |||
89 | TP_STRUCT__entry( | ||
90 | __field(unsigned long, opcode) | ||
91 | ), | ||
92 | |||
93 | TP_fast_assign( | ||
94 | __entry->opcode = opcode; | ||
95 | ), | ||
96 | |||
97 | TP_printk("opcode=%lu", __entry->opcode), | ||
98 | |||
99 | hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | ||
100 | ); | ||
101 | |||
102 | TRACE_EVENT_FN(hcall_exit, | ||
103 | |||
104 | TP_PROTO(unsigned long opcode, unsigned long retval), | ||
105 | |||
106 | TP_ARGS(opcode, retval), | ||
107 | |||
108 | TP_STRUCT__entry( | ||
109 | __field(unsigned long, opcode) | ||
110 | __field(unsigned long, retval) | ||
111 | ), | ||
112 | |||
113 | TP_fast_assign( | ||
114 | __entry->opcode = opcode; | ||
115 | __entry->retval = retval; | ||
116 | ), | ||
117 | |||
118 | TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), | ||
119 | |||
120 | hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | ||
121 | ); | ||
122 | #endif | ||
123 | |||
79 | #endif /* _TRACE_POWERPC_H */ | 124 | #endif /* _TRACE_POWERPC_H */ |
80 | 125 | ||
81 | #undef TRACE_INCLUDE_PATH | 126 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634ec..01e95ab18d35 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S | |||
@@ -14,20 +14,54 @@ | |||
14 | 14 | ||
15 | #define STK_PARM(i) (48 + ((i)-3)*8) | 15 | #define STK_PARM(i) (48 + ((i)-3)*8) |
16 | 16 | ||
17 | #ifdef CONFIG_HCALL_STATS | 17 | #ifdef CONFIG_TRACEPOINTS |
18 | |||
19 | .section ".toc","aw" | ||
20 | |||
21 | .globl hcall_tracepoint_refcount | ||
22 | hcall_tracepoint_refcount: | ||
23 | .llong 0 | ||
24 | |||
25 | .section ".text" | ||
26 | |||
18 | /* | 27 | /* |
19 | * precall must preserve all registers. use unused STK_PARM() | 28 | * precall must preserve all registers. use unused STK_PARM() |
20 | * areas to save snapshots and opcode. | 29 | * areas to save snapshots and opcode. We branch around this |
30 | * in early init (eg when populating the MMU hashtable) by using an | ||
31 | * unconditional cpu feature. | ||
21 | */ | 32 | */ |
22 | #define HCALL_INST_PRECALL \ | 33 | #define HCALL_INST_PRECALL \ |
23 | std r3,STK_PARM(r3)(r1); /* save opcode */ \ | ||
24 | mftb r0; /* get timebase and */ \ | ||
25 | std r0,STK_PARM(r5)(r1); /* save for later */ \ | ||
26 | BEGIN_FTR_SECTION; \ | 34 | BEGIN_FTR_SECTION; \ |
27 | mfspr r0,SPRN_PURR; /* get PURR and */ \ | 35 | b 1f; \ |
28 | std r0,STK_PARM(r6)(r1); /* save for later */ \ | 36 | END_FTR_SECTION(0, 1); \ |
29 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); | 37 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
30 | 38 | cmpdi r12,0; \ | |
39 | beq+ 1f; \ | ||
40 | mflr r0; \ | ||
41 | std r3,STK_PARM(r3)(r1); \ | ||
42 | std r4,STK_PARM(r4)(r1); \ | ||
43 | std r5,STK_PARM(r5)(r1); \ | ||
44 | std r6,STK_PARM(r6)(r1); \ | ||
45 | std r7,STK_PARM(r7)(r1); \ | ||
46 | std r8,STK_PARM(r8)(r1); \ | ||
47 | std r9,STK_PARM(r9)(r1); \ | ||
48 | std r10,STK_PARM(r10)(r1); \ | ||
49 | std r0,16(r1); \ | ||
50 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ | ||
51 | bl .__trace_hcall_entry; \ | ||
52 | addi r1,r1,STACK_FRAME_OVERHEAD; \ | ||
53 | ld r0,16(r1); \ | ||
54 | ld r3,STK_PARM(r3)(r1); \ | ||
55 | ld r4,STK_PARM(r4)(r1); \ | ||
56 | ld r5,STK_PARM(r5)(r1); \ | ||
57 | ld r6,STK_PARM(r6)(r1); \ | ||
58 | ld r7,STK_PARM(r7)(r1); \ | ||
59 | ld r8,STK_PARM(r8)(r1); \ | ||
60 | ld r9,STK_PARM(r9)(r1); \ | ||
61 | ld r10,STK_PARM(r10)(r1); \ | ||
62 | mtlr r0; \ | ||
63 | 1: | ||
64 | |||
31 | /* | 65 | /* |
32 | * postcall is performed immediately before function return which | 66 | * postcall is performed immediately before function return which |
33 | * allows liberal use of volatile registers. We branch around this | 67 | * allows liberal use of volatile registers. We branch around this |
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR); | |||
38 | BEGIN_FTR_SECTION; \ | 72 | BEGIN_FTR_SECTION; \ |
39 | b 1f; \ | 73 | b 1f; \ |
40 | END_FTR_SECTION(0, 1); \ | 74 | END_FTR_SECTION(0, 1); \ |
41 | ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ | 75 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
42 | cmpldi cr7,r4,MAX_HCALL_OPCODE; \ | 76 | cmpdi r12,0; \ |
43 | bgt- cr7,1f; \ | 77 | beq+ 1f; \ |
44 | \ | 78 | mflr r0; \ |
45 | /* get time and PURR snapshots after hcall */ \ | 79 | ld r6,STK_PARM(r3)(r1); \ |
46 | mftb r7; /* timebase after */ \ | 80 | std r3,STK_PARM(r3)(r1); \ |
47 | BEGIN_FTR_SECTION; \ | 81 | mr r4,r3; \ |
48 | mfspr r8,SPRN_PURR; /* PURR after */ \ | 82 | mr r3,r6; \ |
49 | ld r6,STK_PARM(r6)(r1); /* PURR before */ \ | 83 | std r0,16(r1); \ |
50 | subf r6,r6,r8; /* delta */ \ | 84 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ |
51 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | 85 | bl .__trace_hcall_exit; \ |
52 | ld r5,STK_PARM(r5)(r1); /* timebase before */ \ | 86 | addi r1,r1,STACK_FRAME_OVERHEAD; \ |
53 | subf r5,r5,r7; /* time delta */ \ | 87 | ld r0,16(r1); \ |
54 | \ | 88 | ld r3,STK_PARM(r3)(r1); \ |
55 | /* calculate address of stat structure r4 = opcode */ \ | 89 | mtlr r0; \ |
56 | srdi r4,r4,2; /* index into array */ \ | ||
57 | mulli r4,r4,HCALL_STAT_SIZE; \ | ||
58 | LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ | ||
59 | add r4,r4,r7; \ | ||
60 | ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ | ||
61 | add r4,r4,r7; \ | ||
62 | \ | ||
63 | /* update stats */ \ | ||
64 | ld r7,HCALL_STAT_CALLS(r4); /* count */ \ | ||
65 | addi r7,r7,1; \ | ||
66 | std r7,HCALL_STAT_CALLS(r4); \ | ||
67 | ld r7,HCALL_STAT_TB(r4); /* timebase */ \ | ||
68 | add r7,r7,r5; \ | ||
69 | std r7,HCALL_STAT_TB(r4); \ | ||
70 | BEGIN_FTR_SECTION; \ | ||
71 | ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ | ||
72 | add r7,r7,r6; \ | ||
73 | std r7,HCALL_STAT_PURR(r4); \ | ||
74 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | ||
75 | 1: | 90 | 1: |
76 | #else | 91 | #else |
77 | #define HCALL_INST_PRECALL | 92 | #define HCALL_INST_PRECALL |
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277eb..e44e1035f133 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/hvcall.h> | 26 | #include <asm/hvcall.h> |
27 | #include <asm/firmware.h> | 27 | #include <asm/firmware.h> |
28 | #include <asm/cputable.h> | 28 | #include <asm/cputable.h> |
29 | #include <asm/trace.h> | ||
29 | 30 | ||
30 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); | 31 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); |
31 | 32 | ||
@@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = { | |||
100 | #define HCALL_ROOT_DIR "hcall_inst" | 101 | #define HCALL_ROOT_DIR "hcall_inst" |
101 | #define CPU_NAME_BUF_SIZE 32 | 102 | #define CPU_NAME_BUF_SIZE 32 |
102 | 103 | ||
104 | |||
105 | static void probe_hcall_entry(unsigned long opcode) | ||
106 | { | ||
107 | struct hcall_stats *h; | ||
108 | |||
109 | if (opcode > MAX_HCALL_OPCODE) | ||
110 | return; | ||
111 | |||
112 | h = &get_cpu_var(hcall_stats)[opcode / 4]; | ||
113 | h->tb_start = mftb(); | ||
114 | h->purr_start = mfspr(SPRN_PURR); | ||
115 | } | ||
116 | |||
117 | static void probe_hcall_exit(unsigned long opcode, unsigned long retval) | ||
118 | { | ||
119 | struct hcall_stats *h; | ||
120 | |||
121 | if (opcode > MAX_HCALL_OPCODE) | ||
122 | return; | ||
123 | |||
124 | h = &__get_cpu_var(hcall_stats)[opcode / 4]; | ||
125 | h->num_calls++; | ||
126 | h->tb_total = mftb() - h->tb_start; | ||
127 | h->purr_total = mfspr(SPRN_PURR) - h->purr_start; | ||
128 | |||
129 | put_cpu_var(hcall_stats); | ||
130 | } | ||
131 | |||
103 | static int __init hcall_inst_init(void) | 132 | static int __init hcall_inst_init(void) |
104 | { | 133 | { |
105 | struct dentry *hcall_root; | 134 | struct dentry *hcall_root; |
@@ -110,6 +139,14 @@ static int __init hcall_inst_init(void) | |||
110 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 139 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
111 | return 0; | 140 | return 0; |
112 | 141 | ||
142 | if (register_trace_hcall_entry(probe_hcall_entry)) | ||
143 | return -EINVAL; | ||
144 | |||
145 | if (register_trace_hcall_exit(probe_hcall_exit)) { | ||
146 | unregister_trace_hcall_entry(probe_hcall_entry); | ||
147 | return -EINVAL; | ||
148 | } | ||
149 | |||
113 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); | 150 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); |
114 | if (!hcall_root) | 151 | if (!hcall_root) |
115 | return -ENOMEM; | 152 | return -ENOMEM; |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec687..4b7b6e8e32de 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/cputable.h> | 39 | #include <asm/cputable.h> |
40 | #include <asm/udbg.h> | 40 | #include <asm/udbg.h> |
41 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
42 | #include <asm/trace.h> | ||
42 | 43 | ||
43 | #include "plpar_wrappers.h" | 44 | #include "plpar_wrappers.h" |
44 | #include "pseries.h" | 45 | #include "pseries.h" |
@@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order) | |||
661 | EXPORT_SYMBOL(arch_free_page); | 662 | EXPORT_SYMBOL(arch_free_page); |
662 | 663 | ||
663 | #endif | 664 | #endif |
665 | |||
666 | #ifdef CONFIG_TRACEPOINTS | ||
667 | /* | ||
668 | * We optimise our hcall path by placing hcall_tracepoint_refcount | ||
669 | * directly in the TOC so we can check if the hcall tracepoints are | ||
670 | * enabled via a single load. | ||
671 | */ | ||
672 | |||
673 | /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ | ||
674 | extern long hcall_tracepoint_refcount; | ||
675 | |||
676 | void hcall_tracepoint_regfunc(void) | ||
677 | { | ||
678 | hcall_tracepoint_refcount++; | ||
679 | } | ||
680 | |||
681 | void hcall_tracepoint_unregfunc(void) | ||
682 | { | ||
683 | hcall_tracepoint_refcount--; | ||
684 | } | ||
685 | |||
686 | void __trace_hcall_entry(unsigned long opcode) | ||
687 | { | ||
688 | trace_hcall_entry(opcode); | ||
689 | } | ||
690 | |||
691 | void __trace_hcall_exit(long opcode, unsigned long retval) | ||
692 | { | ||
693 | trace_hcall_exit(opcode, retval); | ||
694 | } | ||
695 | #endif | ||