aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2009-10-26 14:50:29 -0400
committerPaul Mackerras <paulus@samba.org>2009-10-28 01:13:04 -0400
commitc8cd093a6e9f96ea6b871576fd4e46d7c818bb89 (patch)
tree2bad2c3a2cc68a35fb93d986a49bf543efcd0156 /arch/powerpc/platforms
parent6795b85c6a4f690e61e7be31aa150d945c723fb5 (diff)
powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline assembly HCALL_STATS code and converts it to use the new tracepoints. To keep the disabled case as quick as possible, we embed a status word in the TOC so we can get at it with a single load. By doing so we keep the overhead at a minimum. Time taken for a null hcall: No tracepoint code: 135.79 cycles Disabled tracepoints: 137.95 cycles For reference, before this patch enabling HCALL_STATS resulted in a null hcall of 201.44 cycles! Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S101
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c37
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c32
3 files changed, 127 insertions, 43 deletions
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..01e95ab18d35 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,20 +14,54 @@
14 14
15#define STK_PARM(i) (48 + ((i)-3)*8) 15#define STK_PARM(i) (48 + ((i)-3)*8)
16 16
17#ifdef CONFIG_HCALL_STATS 17#ifdef CONFIG_TRACEPOINTS
18
19 .section ".toc","aw"
20
21 .globl hcall_tracepoint_refcount
22hcall_tracepoint_refcount:
23 .llong 0
24
25 .section ".text"
26
18/* 27/*
19 * precall must preserve all registers. use unused STK_PARM() 28 * precall must preserve all registers. use unused STK_PARM()
20 * areas to save snapshots and opcode. 29 * areas to save snapshots and opcode. We branch around this
30 * in early init (eg when populating the MMU hashtable) by using an
31 * unconditional cpu feature.
21 */ 32 */
22#define HCALL_INST_PRECALL \ 33#define HCALL_INST_PRECALL \
23 std r3,STK_PARM(r3)(r1); /* save opcode */ \
24 mftb r0; /* get timebase and */ \
25 std r0,STK_PARM(r5)(r1); /* save for later */ \
26BEGIN_FTR_SECTION; \ 34BEGIN_FTR_SECTION; \
27 mfspr r0,SPRN_PURR; /* get PURR and */ \ 35 b 1f; \
28 std r0,STK_PARM(r6)(r1); /* save for later */ \ 36END_FTR_SECTION(0, 1); \
29END_FTR_SECTION_IFSET(CPU_FTR_PURR); 37 ld r12,hcall_tracepoint_refcount@toc(r2); \
30 38 cmpdi r12,0; \
39 beq+ 1f; \
40 mflr r0; \
41 std r3,STK_PARM(r3)(r1); \
42 std r4,STK_PARM(r4)(r1); \
43 std r5,STK_PARM(r5)(r1); \
44 std r6,STK_PARM(r6)(r1); \
45 std r7,STK_PARM(r7)(r1); \
46 std r8,STK_PARM(r8)(r1); \
47 std r9,STK_PARM(r9)(r1); \
48 std r10,STK_PARM(r10)(r1); \
49 std r0,16(r1); \
50 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51 bl .__trace_hcall_entry; \
52 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 ld r0,16(r1); \
54 ld r3,STK_PARM(r3)(r1); \
55 ld r4,STK_PARM(r4)(r1); \
56 ld r5,STK_PARM(r5)(r1); \
57 ld r6,STK_PARM(r6)(r1); \
58 ld r7,STK_PARM(r7)(r1); \
59 ld r8,STK_PARM(r8)(r1); \
60 ld r9,STK_PARM(r9)(r1); \
61 ld r10,STK_PARM(r10)(r1); \
62 mtlr r0; \
631:
64
31/* 65/*
32 * postcall is performed immediately before function return which 66 * postcall is performed immediately before function return which
33 * allows liberal use of volatile registers. We branch around this 67 * allows liberal use of volatile registers. We branch around this
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
38BEGIN_FTR_SECTION; \ 72BEGIN_FTR_SECTION; \
39 b 1f; \ 73 b 1f; \
40END_FTR_SECTION(0, 1); \ 74END_FTR_SECTION(0, 1); \
41 ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ 75 ld r12,hcall_tracepoint_refcount@toc(r2); \
42 cmpldi cr7,r4,MAX_HCALL_OPCODE; \ 76 cmpdi r12,0; \
43 bgt- cr7,1f; \ 77 beq+ 1f; \
44 \ 78 mflr r0; \
45 /* get time and PURR snapshots after hcall */ \ 79 ld r6,STK_PARM(r3)(r1); \
46 mftb r7; /* timebase after */ \ 80 std r3,STK_PARM(r3)(r1); \
47BEGIN_FTR_SECTION; \ 81 mr r4,r3; \
48 mfspr r8,SPRN_PURR; /* PURR after */ \ 82 mr r3,r6; \
49 ld r6,STK_PARM(r6)(r1); /* PURR before */ \ 83 std r0,16(r1); \
50 subf r6,r6,r8; /* delta */ \ 84 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 85 bl .__trace_hcall_exit; \
52 ld r5,STK_PARM(r5)(r1); /* timebase before */ \ 86 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 subf r5,r5,r7; /* time delta */ \ 87 ld r0,16(r1); \
54 \ 88 ld r3,STK_PARM(r3)(r1); \
55 /* calculate address of stat structure r4 = opcode */ \ 89 mtlr r0; \
56 srdi r4,r4,2; /* index into array */ \
57 mulli r4,r4,HCALL_STAT_SIZE; \
58 LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
59 add r4,r4,r7; \
60 ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
61 add r4,r4,r7; \
62 \
63 /* update stats */ \
64 ld r7,HCALL_STAT_CALLS(r4); /* count */ \
65 addi r7,r7,1; \
66 std r7,HCALL_STAT_CALLS(r4); \
67 ld r7,HCALL_STAT_TB(r4); /* timebase */ \
68 add r7,r7,r5; \
69 std r7,HCALL_STAT_TB(r4); \
70BEGIN_FTR_SECTION; \
71 ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
72 add r7,r7,r6; \
73 std r7,HCALL_STAT_PURR(r4); \
74END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
751: 901:
76#else 91#else
77#define HCALL_INST_PRECALL 92#define HCALL_INST_PRECALL
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f277eb..e44e1035f133 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/firmware.h> 27#include <asm/firmware.h>
28#include <asm/cputable.h> 28#include <asm/cputable.h>
29#include <asm/trace.h>
29 30
30DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); 31DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
31 32
@@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = {
100#define HCALL_ROOT_DIR "hcall_inst" 101#define HCALL_ROOT_DIR "hcall_inst"
101#define CPU_NAME_BUF_SIZE 32 102#define CPU_NAME_BUF_SIZE 32
102 103
104
105static void probe_hcall_entry(unsigned long opcode)
106{
107 struct hcall_stats *h;
108
109 if (opcode > MAX_HCALL_OPCODE)
110 return;
111
112 h = &get_cpu_var(hcall_stats)[opcode / 4];
113 h->tb_start = mftb();
114 h->purr_start = mfspr(SPRN_PURR);
115}
116
117static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
118{
119 struct hcall_stats *h;
120
121 if (opcode > MAX_HCALL_OPCODE)
122 return;
123
124 h = &__get_cpu_var(hcall_stats)[opcode / 4];
125 h->num_calls++;
126 h->tb_total = mftb() - h->tb_start;
127 h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
128
129 put_cpu_var(hcall_stats);
130}
131
103static int __init hcall_inst_init(void) 132static int __init hcall_inst_init(void)
104{ 133{
105 struct dentry *hcall_root; 134 struct dentry *hcall_root;
@@ -110,6 +139,14 @@ static int __init hcall_inst_init(void)
110 if (!firmware_has_feature(FW_FEATURE_LPAR)) 139 if (!firmware_has_feature(FW_FEATURE_LPAR))
111 return 0; 140 return 0;
112 141
142 if (register_trace_hcall_entry(probe_hcall_entry))
143 return -EINVAL;
144
145 if (register_trace_hcall_exit(probe_hcall_exit)) {
146 unregister_trace_hcall_entry(probe_hcall_entry);
147 return -EINVAL;
148 }
149
113 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); 150 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
114 if (!hcall_root) 151 if (!hcall_root)
115 return -ENOMEM; 152 return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 903eb9eec687..4b7b6e8e32de 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
39#include <asm/cputable.h> 39#include <asm/cputable.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/smp.h> 41#include <asm/smp.h>
42#include <asm/trace.h>
42 43
43#include "plpar_wrappers.h" 44#include "plpar_wrappers.h"
44#include "pseries.h" 45#include "pseries.h"
@@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order)
661EXPORT_SYMBOL(arch_free_page); 662EXPORT_SYMBOL(arch_free_page);
662 663
663#endif 664#endif
665
666#ifdef CONFIG_TRACEPOINTS
667/*
668 * We optimise our hcall path by placing hcall_tracepoint_refcount
669 * directly in the TOC so we can check if the hcall tracepoints are
670 * enabled via a single load.
671 */
672
673/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
674extern long hcall_tracepoint_refcount;
675
676void hcall_tracepoint_regfunc(void)
677{
678 hcall_tracepoint_refcount++;
679}
680
681void hcall_tracepoint_unregfunc(void)
682{
683 hcall_tracepoint_refcount--;
684}
685
686void __trace_hcall_entry(unsigned long opcode)
687{
688 trace_hcall_entry(opcode);
689}
690
691void __trace_hcall_exit(long opcode, unsigned long retval)
692{
693 trace_hcall_exit(opcode, retval);
694}
695#endif