diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-06-14 09:04:15 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-14 14:34:06 -0400 |
commit | 3efa1cc99ec51bc7a7ae0011a16619fd20dbe6ea (patch) | |
tree | db39c3b638bdaaf65382147c55a6cb60b12b9cbd | |
parent | 8465b05046652cfde3d47692cab2e8ba962f140f (diff) |
perf record/report: Add call graph / call chain profiling
Add the first steps of call-graph profiling:
- add the -c (--call-graph) option to perf record
- parse the call-graph record and printout out under -D (--dump-trace)
The call-graph data is not put into the histogram yet, but it
can be seen that it's being processed correctly:
0x3ce0 [0x38]: event: 35
.
. ... raw event: size 56 bytes
. 0000: 23 00 00 00 05 00 38 00 d4 df 0e 81 ff ff ff ff #.....8........
. 0010: 60 0b 00 00 60 0b 00 00 03 00 00 00 01 00 02 00 `...`..........
. 0020: d4 df 0e 81 ff ff ff ff a0 61 ed 41 36 00 00 00 .........a.A6..
. 0030: 04 92 e6 41 36 00 00 00 .a.A6..
.
0x3ce0 [0x38]: PERF_EVENT (IP, 5): 2912: 0xffffffff810edfd4 period: 1
... chain: u:2, k:1, nr:3
..... 0: 0xffffffff810edfd4
..... 1: 0x3641ed61a0
..... 2: 0x3641e69204
... thread: perf:2912
...... dso: [kernel]
This shows a 3-entry call-graph: with 1 kernel-space and two user-space
entries
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | tools/perf/builtin-record.c | 8 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 57 |
2 files changed, 53 insertions, 12 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0f5771f615da..a177a591b52c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -37,6 +37,7 @@ static pid_t target_pid = -1; | |||
37 | static int inherit = 1; | 37 | static int inherit = 1; |
38 | static int force = 0; | 38 | static int force = 0; |
39 | static int append_file = 0; | 39 | static int append_file = 0; |
40 | static int call_graph = 0; | ||
40 | static int verbose = 0; | 41 | static int verbose = 0; |
41 | 42 | ||
42 | static long samples; | 43 | static long samples; |
@@ -351,11 +352,16 @@ static void create_counter(int counter, int cpu, pid_t pid) | |||
351 | int track = 1; | 352 | int track = 1; |
352 | 353 | ||
353 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | 354 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; |
355 | |||
354 | if (freq) { | 356 | if (freq) { |
355 | attr->sample_type |= PERF_SAMPLE_PERIOD; | 357 | attr->sample_type |= PERF_SAMPLE_PERIOD; |
356 | attr->freq = 1; | 358 | attr->freq = 1; |
357 | attr->sample_freq = freq; | 359 | attr->sample_freq = freq; |
358 | } | 360 | } |
361 | |||
362 | if (call_graph) | ||
363 | attr->sample_type |= PERF_SAMPLE_CALLCHAIN; | ||
364 | |||
359 | attr->mmap = track; | 365 | attr->mmap = track; |
360 | attr->comm = track; | 366 | attr->comm = track; |
361 | attr->inherit = (cpu < 0) && inherit; | 367 | attr->inherit = (cpu < 0) && inherit; |
@@ -555,6 +561,8 @@ static const struct option options[] = { | |||
555 | "profile at this frequency"), | 561 | "profile at this frequency"), |
556 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, | 562 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, |
557 | "number of mmap data pages"), | 563 | "number of mmap data pages"), |
564 | OPT_BOOLEAN('g', "call-graph", &call_graph, | ||
565 | "do call-graph (stack chain/backtrace) recording"), | ||
558 | OPT_BOOLEAN('v', "verbose", &verbose, | 566 | OPT_BOOLEAN('v', "verbose", &verbose, |
559 | "be more verbose (show counter open errors, etc)"), | 567 | "be more verbose (show counter open errors, etc)"), |
560 | OPT_END() | 568 | OPT_END() |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 37515da637f7..aebba5659345 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -36,6 +36,7 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | |||
36 | 36 | ||
37 | static int dump_trace = 0; | 37 | static int dump_trace = 0; |
38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) | 38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) |
39 | #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) | ||
39 | 40 | ||
40 | static int verbose; | 41 | static int verbose; |
41 | static int full_paths; | 42 | static int full_paths; |
@@ -43,11 +44,19 @@ static int full_paths; | |||
43 | static unsigned long page_size; | 44 | static unsigned long page_size; |
44 | static unsigned long mmap_window = 32; | 45 | static unsigned long mmap_window = 32; |
45 | 46 | ||
47 | struct ip_chain_event { | ||
48 | __u16 nr; | ||
49 | __u16 hv; | ||
50 | __u16 kernel; | ||
51 | __u16 user; | ||
52 | __u64 ips[]; | ||
53 | }; | ||
54 | |||
46 | struct ip_event { | 55 | struct ip_event { |
47 | struct perf_event_header header; | 56 | struct perf_event_header header; |
48 | __u64 ip; | 57 | __u64 ip; |
49 | __u32 pid, tid; | 58 | __u32 pid, tid; |
50 | __u64 period; | 59 | unsigned char __more_data[]; |
51 | }; | 60 | }; |
52 | 61 | ||
53 | struct mmap_event { | 62 | struct mmap_event { |
@@ -944,9 +953,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
944 | __u64 ip = event->ip.ip; | 953 | __u64 ip = event->ip.ip; |
945 | __u64 period = 1; | 954 | __u64 period = 1; |
946 | struct map *map = NULL; | 955 | struct map *map = NULL; |
956 | void *more_data = event->ip.__more_data; | ||
957 | struct ip_chain_event *chain; | ||
947 | 958 | ||
948 | if (event->header.type & PERF_SAMPLE_PERIOD) | 959 | if (event->header.type & PERF_SAMPLE_PERIOD) { |
949 | period = event->ip.period; | 960 | period = *(__u64 *)more_data; |
961 | more_data += sizeof(__u64); | ||
962 | } | ||
950 | 963 | ||
951 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", | 964 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", |
952 | (void *)(offset + head), | 965 | (void *)(offset + head), |
@@ -956,6 +969,22 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
956 | (void *)(long)ip, | 969 | (void *)(long)ip, |
957 | (long long)period); | 970 | (long long)period); |
958 | 971 | ||
972 | if (event->header.type & PERF_SAMPLE_CALLCHAIN) { | ||
973 | int i; | ||
974 | |||
975 | chain = (void *)more_data; | ||
976 | |||
977 | if (dump_trace) { | ||
978 | dprintf("... chain: u:%d, k:%d, nr:%d\n", | ||
979 | chain->user, | ||
980 | chain->kernel, | ||
981 | chain->nr); | ||
982 | |||
983 | for (i = 0; i < chain->nr; i++) | ||
984 | dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); | ||
985 | } | ||
986 | } | ||
987 | |||
959 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 988 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
960 | 989 | ||
961 | if (thread == NULL) { | 990 | if (thread == NULL) { |
@@ -1098,30 +1127,34 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) | |||
1098 | static void trace_event(event_t *event) | 1127 | static void trace_event(event_t *event) |
1099 | { | 1128 | { |
1100 | unsigned char *raw_event = (void *)event; | 1129 | unsigned char *raw_event = (void *)event; |
1130 | char *color = PERF_COLOR_BLUE; | ||
1101 | int i, j; | 1131 | int i, j; |
1102 | 1132 | ||
1103 | if (!dump_trace) | 1133 | if (!dump_trace) |
1104 | return; | 1134 | return; |
1105 | 1135 | ||
1106 | dprintf(".\n. ... raw event: size %d bytes\n", event->header.size); | 1136 | dprintf("."); |
1137 | cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); | ||
1107 | 1138 | ||
1108 | for (i = 0; i < event->header.size; i++) { | 1139 | for (i = 0; i < event->header.size; i++) { |
1109 | if ((i & 15) == 0) | 1140 | if ((i & 15) == 0) { |
1110 | dprintf(". %04x: ", i); | 1141 | dprintf("."); |
1142 | cdprintf(" %04x: ", i); | ||
1143 | } | ||
1111 | 1144 | ||
1112 | dprintf(" %02x", raw_event[i]); | 1145 | cdprintf(" %02x", raw_event[i]); |
1113 | 1146 | ||
1114 | if (((i & 15) == 15) || i == event->header.size-1) { | 1147 | if (((i & 15) == 15) || i == event->header.size-1) { |
1115 | dprintf(" "); | 1148 | cdprintf(" "); |
1116 | for (j = 0; j < 15-(i & 15); j++) | 1149 | for (j = 0; j < 15-(i & 15); j++) |
1117 | dprintf(" "); | 1150 | cdprintf(" "); |
1118 | for (j = 0; j < (i & 15); j++) { | 1151 | for (j = 0; j < (i & 15); j++) { |
1119 | if (isprint(raw_event[i-15+j])) | 1152 | if (isprint(raw_event[i-15+j])) |
1120 | dprintf("%c", raw_event[i-15+j]); | 1153 | cdprintf("%c", raw_event[i-15+j]); |
1121 | else | 1154 | else |
1122 | dprintf("."); | 1155 | cdprintf("."); |
1123 | } | 1156 | } |
1124 | dprintf("\n"); | 1157 | cdprintf("\n"); |
1125 | } | 1158 | } |
1126 | } | 1159 | } |
1127 | dprintf(".\n"); | 1160 | dprintf(".\n"); |