diff options
| author | Anton Blanchard <anton@samba.org> | 2009-10-26 14:50:29 -0400 |
|---|---|---|
| committer | Paul Mackerras <paulus@samba.org> | 2009-10-28 01:13:04 -0400 |
| commit | c8cd093a6e9f96ea6b871576fd4e46d7c818bb89 (patch) | |
| tree | 2bad2c3a2cc68a35fb93d986a49bf543efcd0156 | |
| parent | 6795b85c6a4f690e61e7be31aa150d945c723fb5 (diff) | |
powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline
assembly HCALL_STATS code and converts it to use the new tracepoints.
To keep the disabled case as quick as possible, we embed a status word
in the TOC so we can get at it with a single load. By doing so we
keep the overhead at a minimum. Time taken for a null hcall:
No tracepoint code: 135.79 cycles
Disabled tracepoints: 137.95 cycles
For reference, before this patch enabling HCALL_STATS resulted in a null
hcall of 201.44 cycles!
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
| -rw-r--r-- | arch/powerpc/Kconfig.debug | 2 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/hvcall.h | 2 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/trace.h | 45 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/hvCall.S | 101 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/hvCall_inst.c | 37 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 32 |
6 files changed, 175 insertions, 44 deletions
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b1005185390..bf3382f1904d 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug | |||
| @@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE | |||
| 46 | 46 | ||
| 47 | config HCALL_STATS | 47 | config HCALL_STATS |
| 48 | bool "Hypervisor call instrumentation" | 48 | bool "Hypervisor call instrumentation" |
| 49 | depends on PPC_PSERIES && DEBUG_FS | 49 | depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS |
| 50 | help | 50 | help |
| 51 | Adds code to keep track of the number of hypervisor calls made and | 51 | Adds code to keep track of the number of hypervisor calls made and |
| 52 | the amount of time spent in hypervisor calls. Wall time spent in | 52 | the amount of time spent in hypervisor calls. Wall time spent in |
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b10be7..c27caac47ad1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h | |||
| @@ -274,6 +274,8 @@ struct hcall_stats { | |||
| 274 | unsigned long num_calls; /* number of calls (on this CPU) */ | 274 | unsigned long num_calls; /* number of calls (on this CPU) */ |
| 275 | unsigned long tb_total; /* total wall time (mftb) of calls. */ | 275 | unsigned long tb_total; /* total wall time (mftb) of calls. */ |
| 276 | unsigned long purr_total; /* total cpu time (PURR) of calls. */ | 276 | unsigned long purr_total; /* total cpu time (PURR) of calls. */ |
| 277 | unsigned long tb_start; | ||
| 278 | unsigned long purr_start; | ||
| 277 | }; | 279 | }; |
| 278 | #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) | 280 | #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) |
| 279 | 281 | ||
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index b558c31d409e..9b01c0e43b55 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h | |||
| @@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit, | |||
| 76 | TP_printk("pt_regs=%p", __entry->regs) | 76 | TP_printk("pt_regs=%p", __entry->regs) |
| 77 | ); | 77 | ); |
| 78 | 78 | ||
| 79 | #ifdef CONFIG_PPC_PSERIES | ||
| 80 | extern void hcall_tracepoint_regfunc(void); | ||
| 81 | extern void hcall_tracepoint_unregfunc(void); | ||
| 82 | |||
| 83 | TRACE_EVENT_FN(hcall_entry, | ||
| 84 | |||
| 85 | TP_PROTO(unsigned long opcode), | ||
| 86 | |||
| 87 | TP_ARGS(opcode), | ||
| 88 | |||
| 89 | TP_STRUCT__entry( | ||
| 90 | __field(unsigned long, opcode) | ||
| 91 | ), | ||
| 92 | |||
| 93 | TP_fast_assign( | ||
| 94 | __entry->opcode = opcode; | ||
| 95 | ), | ||
| 96 | |||
| 97 | TP_printk("opcode=%lu", __entry->opcode), | ||
| 98 | |||
| 99 | hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | ||
| 100 | ); | ||
| 101 | |||
| 102 | TRACE_EVENT_FN(hcall_exit, | ||
| 103 | |||
| 104 | TP_PROTO(unsigned long opcode, unsigned long retval), | ||
| 105 | |||
| 106 | TP_ARGS(opcode, retval), | ||
| 107 | |||
| 108 | TP_STRUCT__entry( | ||
| 109 | __field(unsigned long, opcode) | ||
| 110 | __field(unsigned long, retval) | ||
| 111 | ), | ||
| 112 | |||
| 113 | TP_fast_assign( | ||
| 114 | __entry->opcode = opcode; | ||
| 115 | __entry->retval = retval; | ||
| 116 | ), | ||
| 117 | |||
| 118 | TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), | ||
| 119 | |||
| 120 | hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | ||
| 121 | ); | ||
| 122 | #endif | ||
| 123 | |||
| 79 | #endif /* _TRACE_POWERPC_H */ | 124 | #endif /* _TRACE_POWERPC_H */ |
| 80 | 125 | ||
| 81 | #undef TRACE_INCLUDE_PATH | 126 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634ec..01e95ab18d35 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S | |||
| @@ -14,20 +14,54 @@ | |||
| 14 | 14 | ||
| 15 | #define STK_PARM(i) (48 + ((i)-3)*8) | 15 | #define STK_PARM(i) (48 + ((i)-3)*8) |
| 16 | 16 | ||
| 17 | #ifdef CONFIG_HCALL_STATS | 17 | #ifdef CONFIG_TRACEPOINTS |
| 18 | |||
| 19 | .section ".toc","aw" | ||
| 20 | |||
| 21 | .globl hcall_tracepoint_refcount | ||
| 22 | hcall_tracepoint_refcount: | ||
| 23 | .llong 0 | ||
| 24 | |||
| 25 | .section ".text" | ||
| 26 | |||
| 18 | /* | 27 | /* |
| 19 | * precall must preserve all registers. use unused STK_PARM() | 28 | * precall must preserve all registers. use unused STK_PARM() |
| 20 | * areas to save snapshots and opcode. | 29 | * areas to save snapshots and opcode. We branch around this |
| 30 | * in early init (eg when populating the MMU hashtable) by using an | ||
| 31 | * unconditional cpu feature. | ||
| 21 | */ | 32 | */ |
| 22 | #define HCALL_INST_PRECALL \ | 33 | #define HCALL_INST_PRECALL \ |
| 23 | std r3,STK_PARM(r3)(r1); /* save opcode */ \ | ||
| 24 | mftb r0; /* get timebase and */ \ | ||
| 25 | std r0,STK_PARM(r5)(r1); /* save for later */ \ | ||
| 26 | BEGIN_FTR_SECTION; \ | 34 | BEGIN_FTR_SECTION; \ |
| 27 | mfspr r0,SPRN_PURR; /* get PURR and */ \ | 35 | b 1f; \ |
| 28 | std r0,STK_PARM(r6)(r1); /* save for later */ \ | 36 | END_FTR_SECTION(0, 1); \ |
| 29 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); | 37 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
| 30 | 38 | cmpdi r12,0; \ | |
| 39 | beq+ 1f; \ | ||
| 40 | mflr r0; \ | ||
| 41 | std r3,STK_PARM(r3)(r1); \ | ||
| 42 | std r4,STK_PARM(r4)(r1); \ | ||
| 43 | std r5,STK_PARM(r5)(r1); \ | ||
| 44 | std r6,STK_PARM(r6)(r1); \ | ||
| 45 | std r7,STK_PARM(r7)(r1); \ | ||
| 46 | std r8,STK_PARM(r8)(r1); \ | ||
| 47 | std r9,STK_PARM(r9)(r1); \ | ||
| 48 | std r10,STK_PARM(r10)(r1); \ | ||
| 49 | std r0,16(r1); \ | ||
| 50 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ | ||
| 51 | bl .__trace_hcall_entry; \ | ||
| 52 | addi r1,r1,STACK_FRAME_OVERHEAD; \ | ||
| 53 | ld r0,16(r1); \ | ||
| 54 | ld r3,STK_PARM(r3)(r1); \ | ||
| 55 | ld r4,STK_PARM(r4)(r1); \ | ||
| 56 | ld r5,STK_PARM(r5)(r1); \ | ||
| 57 | ld r6,STK_PARM(r6)(r1); \ | ||
| 58 | ld r7,STK_PARM(r7)(r1); \ | ||
| 59 | ld r8,STK_PARM(r8)(r1); \ | ||
| 60 | ld r9,STK_PARM(r9)(r1); \ | ||
| 61 | ld r10,STK_PARM(r10)(r1); \ | ||
| 62 | mtlr r0; \ | ||
| 63 | 1: | ||
| 64 | |||
| 31 | /* | 65 | /* |
| 32 | * postcall is performed immediately before function return which | 66 | * postcall is performed immediately before function return which |
| 33 | * allows liberal use of volatile registers. We branch around this | 67 | * allows liberal use of volatile registers. We branch around this |
| @@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR); | |||
| 38 | BEGIN_FTR_SECTION; \ | 72 | BEGIN_FTR_SECTION; \ |
| 39 | b 1f; \ | 73 | b 1f; \ |
| 40 | END_FTR_SECTION(0, 1); \ | 74 | END_FTR_SECTION(0, 1); \ |
| 41 | ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ | 75 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
| 42 | cmpldi cr7,r4,MAX_HCALL_OPCODE; \ | 76 | cmpdi r12,0; \ |
| 43 | bgt- cr7,1f; \ | 77 | beq+ 1f; \ |
| 44 | \ | 78 | mflr r0; \ |
| 45 | /* get time and PURR snapshots after hcall */ \ | 79 | ld r6,STK_PARM(r3)(r1); \ |
| 46 | mftb r7; /* timebase after */ \ | 80 | std r3,STK_PARM(r3)(r1); \ |
| 47 | BEGIN_FTR_SECTION; \ | 81 | mr r4,r3; \ |
| 48 | mfspr r8,SPRN_PURR; /* PURR after */ \ | 82 | mr r3,r6; \ |
| 49 | ld r6,STK_PARM(r6)(r1); /* PURR before */ \ | 83 | std r0,16(r1); \ |
| 50 | subf r6,r6,r8; /* delta */ \ | 84 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ |
| 51 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | 85 | bl .__trace_hcall_exit; \ |
| 52 | ld r5,STK_PARM(r5)(r1); /* timebase before */ \ | 86 | addi r1,r1,STACK_FRAME_OVERHEAD; \ |
| 53 | subf r5,r5,r7; /* time delta */ \ | 87 | ld r0,16(r1); \ |
| 54 | \ | 88 | ld r3,STK_PARM(r3)(r1); \ |
| 55 | /* calculate address of stat structure r4 = opcode */ \ | 89 | mtlr r0; \ |
| 56 | srdi r4,r4,2; /* index into array */ \ | ||
| 57 | mulli r4,r4,HCALL_STAT_SIZE; \ | ||
| 58 | LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ | ||
| 59 | add r4,r4,r7; \ | ||
| 60 | ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ | ||
| 61 | add r4,r4,r7; \ | ||
| 62 | \ | ||
| 63 | /* update stats */ \ | ||
| 64 | ld r7,HCALL_STAT_CALLS(r4); /* count */ \ | ||
| 65 | addi r7,r7,1; \ | ||
| 66 | std r7,HCALL_STAT_CALLS(r4); \ | ||
| 67 | ld r7,HCALL_STAT_TB(r4); /* timebase */ \ | ||
| 68 | add r7,r7,r5; \ | ||
| 69 | std r7,HCALL_STAT_TB(r4); \ | ||
| 70 | BEGIN_FTR_SECTION; \ | ||
| 71 | ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ | ||
| 72 | add r7,r7,r6; \ | ||
| 73 | std r7,HCALL_STAT_PURR(r4); \ | ||
| 74 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | ||
| 75 | 1: | 90 | 1: |
| 76 | #else | 91 | #else |
| 77 | #define HCALL_INST_PRECALL | 92 | #define HCALL_INST_PRECALL |
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277eb..e44e1035f133 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <asm/hvcall.h> | 26 | #include <asm/hvcall.h> |
| 27 | #include <asm/firmware.h> | 27 | #include <asm/firmware.h> |
| 28 | #include <asm/cputable.h> | 28 | #include <asm/cputable.h> |
| 29 | #include <asm/trace.h> | ||
| 29 | 30 | ||
| 30 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); | 31 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); |
| 31 | 32 | ||
| @@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = { | |||
| 100 | #define HCALL_ROOT_DIR "hcall_inst" | 101 | #define HCALL_ROOT_DIR "hcall_inst" |
| 101 | #define CPU_NAME_BUF_SIZE 32 | 102 | #define CPU_NAME_BUF_SIZE 32 |
| 102 | 103 | ||
| 104 | |||
| 105 | static void probe_hcall_entry(unsigned long opcode) | ||
| 106 | { | ||
| 107 | struct hcall_stats *h; | ||
| 108 | |||
| 109 | if (opcode > MAX_HCALL_OPCODE) | ||
| 110 | return; | ||
| 111 | |||
| 112 | h = &get_cpu_var(hcall_stats)[opcode / 4]; | ||
| 113 | h->tb_start = mftb(); | ||
| 114 | h->purr_start = mfspr(SPRN_PURR); | ||
| 115 | } | ||
| 116 | |||
| 117 | static void probe_hcall_exit(unsigned long opcode, unsigned long retval) | ||
| 118 | { | ||
| 119 | struct hcall_stats *h; | ||
| 120 | |||
| 121 | if (opcode > MAX_HCALL_OPCODE) | ||
| 122 | return; | ||
| 123 | |||
| 124 | h = &__get_cpu_var(hcall_stats)[opcode / 4]; | ||
| 125 | h->num_calls++; | ||
| 126 | h->tb_total = mftb() - h->tb_start; | ||
| 127 | h->purr_total = mfspr(SPRN_PURR) - h->purr_start; | ||
| 128 | |||
| 129 | put_cpu_var(hcall_stats); | ||
| 130 | } | ||
| 131 | |||
| 103 | static int __init hcall_inst_init(void) | 132 | static int __init hcall_inst_init(void) |
| 104 | { | 133 | { |
| 105 | struct dentry *hcall_root; | 134 | struct dentry *hcall_root; |
| @@ -110,6 +139,14 @@ static int __init hcall_inst_init(void) | |||
| 110 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 139 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
| 111 | return 0; | 140 | return 0; |
| 112 | 141 | ||
| 142 | if (register_trace_hcall_entry(probe_hcall_entry)) | ||
| 143 | return -EINVAL; | ||
| 144 | |||
| 145 | if (register_trace_hcall_exit(probe_hcall_exit)) { | ||
| 146 | unregister_trace_hcall_entry(probe_hcall_entry); | ||
| 147 | return -EINVAL; | ||
| 148 | } | ||
| 149 | |||
| 113 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); | 150 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); |
| 114 | if (!hcall_root) | 151 | if (!hcall_root) |
| 115 | return -ENOMEM; | 152 | return -ENOMEM; |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec687..4b7b6e8e32de 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <asm/cputable.h> | 39 | #include <asm/cputable.h> |
| 40 | #include <asm/udbg.h> | 40 | #include <asm/udbg.h> |
| 41 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
| 42 | #include <asm/trace.h> | ||
| 42 | 43 | ||
| 43 | #include "plpar_wrappers.h" | 44 | #include "plpar_wrappers.h" |
| 44 | #include "pseries.h" | 45 | #include "pseries.h" |
| @@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order) | |||
| 661 | EXPORT_SYMBOL(arch_free_page); | 662 | EXPORT_SYMBOL(arch_free_page); |
| 662 | 663 | ||
| 663 | #endif | 664 | #endif |
| 665 | |||
| 666 | #ifdef CONFIG_TRACEPOINTS | ||
| 667 | /* | ||
| 668 | * We optimise our hcall path by placing hcall_tracepoint_refcount | ||
| 669 | * directly in the TOC so we can check if the hcall tracepoints are | ||
| 670 | * enabled via a single load. | ||
| 671 | */ | ||
| 672 | |||
| 673 | /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ | ||
| 674 | extern long hcall_tracepoint_refcount; | ||
| 675 | |||
| 676 | void hcall_tracepoint_regfunc(void) | ||
| 677 | { | ||
| 678 | hcall_tracepoint_refcount++; | ||
| 679 | } | ||
| 680 | |||
| 681 | void hcall_tracepoint_unregfunc(void) | ||
| 682 | { | ||
| 683 | hcall_tracepoint_refcount--; | ||
| 684 | } | ||
| 685 | |||
| 686 | void __trace_hcall_entry(unsigned long opcode) | ||
| 687 | { | ||
| 688 | trace_hcall_entry(opcode); | ||
| 689 | } | ||
| 690 | |||
| 691 | void __trace_hcall_exit(long opcode, unsigned long retval) | ||
| 692 | { | ||
| 693 | trace_hcall_exit(opcode, retval); | ||
| 694 | } | ||
| 695 | #endif | ||
