diff options
author | Paul Mackerras <paulus@samba.org> | 2009-03-24 01:52:34 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-06 03:30:30 -0400 |
commit | cbe46555dc4de6403cd757139d42289b5f21abb9 (patch) | |
tree | 1fd6d82af9a1dbe739d14e5440d81c5fad7ea946 /Documentation | |
parent | 81cdbe0509542324ad7d3282ab67c2b6716df663 (diff) |
perf_counter tools: remove glib dependency and fix bugs in kerneltop.c
The glib dependency in kerneltop.c is only for a little bit of list
manipulation, and I find it inconvenient. This adds a 'next' field to
struct source_line, which lets us link them together into a list. The
code to do the linking ourselves turns out to be no longer or more
difficult than using glib.
This also fixes a few other problems:
- We need to #include <limits.h> to get PATH_MAX on powerpc.
- We need to #include <linux/types.h> rather than have our own
definitions of __u64 and __s64; on powerpc the installed headers
define them to be unsigned long and long respectively, and if we
have our own, different definition here that causes a compile error.
- This takes out the x86 setting of errno from -ret in
sys_perf_counter_open. My experiments on x86 indicate that the
glibc syscall() does this for us already.
- We had two CPU migration counters in the default set, which seems
unnecessary; I changed one of them to a context switch counter.
- In perfstat mode we were printing CPU cycles and instructions as
milliseconds, and the cpu clock and task clock counters as events.
This fixes that.
- In perfstat mode we were still printing a blank line after the first
counter, which was a holdover from when a task clock counter was
automatically included as the first counter. This removes the blank
line.
- On a test machine here, parse_symbols() and parse_vmlinux() were
taking long enough (almost 0.5 seconds) for the mmap buffer to
overflow before we got to the first mmap_read() call, so this moves
them before we open all the counters.
- The error message if sys_perf_counter_open fails needs to use errno,
not -fd[i][counter].
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Orig-LKML-Reference: <18888.29986.340328.540512@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/perf_counter/Makefile | 2 | ||||
-rw-r--r-- | Documentation/perf_counter/kerneltop.c | 112 |
2 files changed, 46 insertions, 68 deletions
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 666da95a7877..194b66215588 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile | |||
@@ -3,7 +3,7 @@ BINS = kerneltop perfstat | |||
3 | all: $(BINS) | 3 | all: $(BINS) |
4 | 4 | ||
5 | kerneltop: kerneltop.c ../../include/linux/perf_counter.h | 5 | kerneltop: kerneltop.c ../../include/linux/perf_counter.h |
6 | cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $< | 6 | cc -O6 -Wall -lrt -o $@ $< |
7 | 7 | ||
8 | perfstat: kerneltop | 8 | perfstat: kerneltop |
9 | ln -sf kerneltop perfstat | 9 | ln -sf kerneltop perfstat |
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 2ab29b5e32e8..ea13e4e67229 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | Build with: | 4 | Build with: |
5 | 5 | ||
6 | cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c | 6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt |
7 | 7 | ||
8 | Sample output: | 8 | Sample output: |
9 | 9 | ||
@@ -56,6 +56,7 @@ | |||
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | 56 | * Yanmin Zhang <yanmin.zhang@intel.com> |
57 | * Wu Fengguang <fengguang.wu@intel.com> | 57 | * Wu Fengguang <fengguang.wu@intel.com> |
58 | * Mike Galbraith <efault@gmx.de> | 58 | * Mike Galbraith <efault@gmx.de> |
59 | * Paul Mackerras <paulus@samba.org> | ||
59 | * | 60 | * |
60 | * Released under the GPL v2. (and only v2, not any later version) | 61 | * Released under the GPL v2. (and only v2, not any later version) |
61 | */ | 62 | */ |
@@ -68,6 +69,7 @@ | |||
68 | #include <stdint.h> | 69 | #include <stdint.h> |
69 | #include <stdlib.h> | 70 | #include <stdlib.h> |
70 | #include <string.h> | 71 | #include <string.h> |
72 | #include <limits.h> | ||
71 | #include <getopt.h> | 73 | #include <getopt.h> |
72 | #include <assert.h> | 74 | #include <assert.h> |
73 | #include <fcntl.h> | 75 | #include <fcntl.h> |
@@ -76,8 +78,6 @@ | |||
76 | #include <ctype.h> | 78 | #include <ctype.h> |
77 | #include <time.h> | 79 | #include <time.h> |
78 | 80 | ||
79 | #include <glib.h> | ||
80 | |||
81 | #include <sys/syscall.h> | 81 | #include <sys/syscall.h> |
82 | #include <sys/ioctl.h> | 82 | #include <sys/ioctl.h> |
83 | #include <sys/poll.h> | 83 | #include <sys/poll.h> |
@@ -87,6 +87,7 @@ | |||
87 | #include <sys/mman.h> | 87 | #include <sys/mman.h> |
88 | 88 | ||
89 | #include <linux/unistd.h> | 89 | #include <linux/unistd.h> |
90 | #include <linux/types.h> | ||
90 | 91 | ||
91 | #include "../../include/linux/perf_counter.h" | 92 | #include "../../include/linux/perf_counter.h" |
92 | 93 | ||
@@ -114,11 +115,6 @@ | |||
114 | #define __user | 115 | #define __user |
115 | #define asmlinkage | 116 | #define asmlinkage |
116 | 117 | ||
117 | typedef unsigned int __u32; | ||
118 | typedef unsigned long long __u64; | ||
119 | typedef long long __s64; | ||
120 | |||
121 | |||
122 | #ifdef __x86_64__ | 118 | #ifdef __x86_64__ |
123 | #define __NR_perf_counter_open 295 | 119 | #define __NR_perf_counter_open 295 |
124 | #define rmb() asm volatile("lfence" ::: "memory") | 120 | #define rmb() asm volatile("lfence" ::: "memory") |
@@ -146,17 +142,8 @@ asmlinkage int sys_perf_counter_open( | |||
146 | int group_fd, | 142 | int group_fd, |
147 | unsigned long flags) | 143 | unsigned long flags) |
148 | { | 144 | { |
149 | int ret; | 145 | return syscall( |
150 | |||
151 | ret = syscall( | ||
152 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); | 146 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); |
153 | #if defined(__x86_64__) || defined(__i386__) | ||
154 | if (ret < 0 && ret > -4096) { | ||
155 | errno = -ret; | ||
156 | ret = -1; | ||
157 | } | ||
158 | #endif | ||
159 | return ret; | ||
160 | } | 147 | } |
161 | 148 | ||
162 | #define MAX_COUNTERS 64 | 149 | #define MAX_COUNTERS 64 |
@@ -170,7 +157,7 @@ static int system_wide = 0; | |||
170 | static int nr_counters = 0; | 157 | static int nr_counters = 0; |
171 | static __u64 event_id[MAX_COUNTERS] = { | 158 | static __u64 event_id[MAX_COUNTERS] = { |
172 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | 159 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), |
173 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | 160 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), |
174 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), | 161 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), |
175 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | 162 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), |
176 | 163 | ||
@@ -202,14 +189,15 @@ static int delay_secs = 2; | |||
202 | static int zero; | 189 | static int zero; |
203 | static int dump_symtab; | 190 | static int dump_symtab; |
204 | 191 | ||
205 | static GList *lines; | ||
206 | |||
207 | struct source_line { | 192 | struct source_line { |
208 | uint64_t EIP; | 193 | uint64_t EIP; |
209 | unsigned long count; | 194 | unsigned long count; |
210 | char *line; | 195 | char *line; |
196 | struct source_line *next; | ||
211 | }; | 197 | }; |
212 | 198 | ||
199 | static struct source_line *lines; | ||
200 | static struct source_line **lines_tail; | ||
213 | 201 | ||
214 | const unsigned int default_count[] = { | 202 | const unsigned int default_count[] = { |
215 | 1000000, | 203 | 1000000, |
@@ -519,9 +507,8 @@ int do_perfstat(int argc, char *argv[]) | |||
519 | count += single_count; | 507 | count += single_count; |
520 | } | 508 | } |
521 | 509 | ||
522 | if (!PERF_COUNTER_RAW(event_id[counter]) && | 510 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || |
523 | (event_id[counter] == PERF_COUNT_CPU_CLOCK || | 511 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { |
524 | event_id[counter] == PERF_COUNT_TASK_CLOCK)) { | ||
525 | 512 | ||
526 | double msecs = (double)count / 1000000; | 513 | double msecs = (double)count / 1000000; |
527 | 514 | ||
@@ -531,8 +518,6 @@ int do_perfstat(int argc, char *argv[]) | |||
531 | fprintf(stderr, " %14Ld %-20s (events)\n", | 518 | fprintf(stderr, " %14Ld %-20s (events)\n", |
532 | count, event_name(counter)); | 519 | count, event_name(counter)); |
533 | } | 520 | } |
534 | if (!counter) | ||
535 | fprintf(stderr, "\n"); | ||
536 | } | 521 | } |
537 | fprintf(stderr, "\n"); | 522 | fprintf(stderr, "\n"); |
538 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | 523 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", |
@@ -554,7 +539,7 @@ struct sym_entry { | |||
554 | char *sym; | 539 | char *sym; |
555 | unsigned long count[MAX_COUNTERS]; | 540 | unsigned long count[MAX_COUNTERS]; |
556 | int skip; | 541 | int skip; |
557 | GList *source; | 542 | struct source_line *source; |
558 | }; | 543 | }; |
559 | 544 | ||
560 | #define MAX_SYMS 100000 | 545 | #define MAX_SYMS 100000 |
@@ -855,6 +840,7 @@ static void parse_vmlinux(char *filename) | |||
855 | if (!file) | 840 | if (!file) |
856 | return; | 841 | return; |
857 | 842 | ||
843 | lines_tail = &lines; | ||
858 | while (!feof(file)) { | 844 | while (!feof(file)) { |
859 | struct source_line *src; | 845 | struct source_line *src; |
860 | size_t dummy = 0; | 846 | size_t dummy = 0; |
@@ -873,7 +859,9 @@ static void parse_vmlinux(char *filename) | |||
873 | if (c) | 859 | if (c) |
874 | *c = 0; | 860 | *c = 0; |
875 | 861 | ||
876 | lines = g_list_prepend(lines, src); | 862 | src->next = NULL; |
863 | *lines_tail = src; | ||
864 | lines_tail = &src->next; | ||
877 | 865 | ||
878 | if (strlen(src->line)>8 && src->line[8] == ':') | 866 | if (strlen(src->line)>8 && src->line[8] == ':') |
879 | src->EIP = strtoull(src->line, NULL, 16); | 867 | src->EIP = strtoull(src->line, NULL, 16); |
@@ -881,52 +869,43 @@ static void parse_vmlinux(char *filename) | |||
881 | src->EIP = strtoull(src->line, NULL, 16); | 869 | src->EIP = strtoull(src->line, NULL, 16); |
882 | } | 870 | } |
883 | pclose(file); | 871 | pclose(file); |
884 | lines = g_list_reverse(lines); | ||
885 | } | 872 | } |
886 | 873 | ||
887 | static void record_precise_ip(uint64_t ip) | 874 | static void record_precise_ip(uint64_t ip) |
888 | { | 875 | { |
889 | struct source_line *line; | 876 | struct source_line *line; |
890 | GList *item; | ||
891 | 877 | ||
892 | item = g_list_first(lines); | 878 | for (line = lines; line; line = line->next) { |
893 | while (item) { | ||
894 | line = item->data; | ||
895 | if (line->EIP == ip) | 879 | if (line->EIP == ip) |
896 | line->count++; | 880 | line->count++; |
897 | if (line->EIP > ip) | 881 | if (line->EIP > ip) |
898 | break; | 882 | break; |
899 | item = g_list_next(item); | ||
900 | } | 883 | } |
901 | } | 884 | } |
902 | 885 | ||
903 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) | 886 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) |
904 | { | 887 | { |
905 | struct source_line *line; | 888 | struct source_line *line; |
906 | GList *item; | ||
907 | char pattern[PATH_MAX]; | 889 | char pattern[PATH_MAX]; |
908 | sprintf(pattern, "<%s>:", sym->sym); | 890 | sprintf(pattern, "<%s>:", sym->sym); |
909 | 891 | ||
910 | item = g_list_first(lines); | 892 | for (line = lines; line; line = line->next) { |
911 | while (item) { | ||
912 | line = item->data; | ||
913 | if (strstr(line->line, pattern)) { | 893 | if (strstr(line->line, pattern)) { |
914 | sym->source = item; | 894 | sym->source = line; |
915 | break; | 895 | break; |
916 | } | 896 | } |
917 | item = g_list_next(item); | ||
918 | } | 897 | } |
919 | } | 898 | } |
920 | 899 | ||
921 | void show_lines(GList *item_queue, int item_queue_count) | 900 | static void show_lines(struct source_line *line_queue, int line_queue_count) |
922 | { | 901 | { |
923 | int i; | 902 | int i; |
924 | struct source_line *line; | 903 | struct source_line *line; |
925 | 904 | ||
926 | for (i = 0; i < item_queue_count; i++) { | 905 | line = line_queue; |
927 | line = item_queue->data; | 906 | for (i = 0; i < line_queue_count; i++) { |
928 | printf("%8li\t%s\n", line->count, line->line); | 907 | printf("%8li\t%s\n", line->count, line->line); |
929 | item_queue = g_list_next(item_queue); | 908 | line = line->next; |
930 | } | 909 | } |
931 | } | 910 | } |
932 | 911 | ||
@@ -935,10 +914,9 @@ void show_lines(GList *item_queue, int item_queue_count) | |||
935 | static void show_details(struct sym_entry *sym) | 914 | static void show_details(struct sym_entry *sym) |
936 | { | 915 | { |
937 | struct source_line *line; | 916 | struct source_line *line; |
938 | GList *item; | 917 | struct source_line *line_queue = NULL; |
939 | int displayed = 0; | 918 | int displayed = 0; |
940 | GList *item_queue = NULL; | 919 | int line_queue_count = 0; |
941 | int item_queue_count = 0; | ||
942 | 920 | ||
943 | if (!sym->source) | 921 | if (!sym->source) |
944 | lookup_sym_in_vmlinux(sym); | 922 | lookup_sym_in_vmlinux(sym); |
@@ -947,30 +925,29 @@ static void show_details(struct sym_entry *sym) | |||
947 | 925 | ||
948 | printf("Showing details for %s\n", sym->sym); | 926 | printf("Showing details for %s\n", sym->sym); |
949 | 927 | ||
950 | item = sym->source; | 928 | line = sym->source; |
951 | while (item) { | 929 | while (line) { |
952 | line = item->data; | ||
953 | if (displayed && strstr(line->line, ">:")) | 930 | if (displayed && strstr(line->line, ">:")) |
954 | break; | 931 | break; |
955 | 932 | ||
956 | if (!item_queue_count) | 933 | if (!line_queue_count) |
957 | item_queue = item; | 934 | line_queue = line; |
958 | item_queue_count ++; | 935 | line_queue_count ++; |
959 | 936 | ||
960 | if (line->count >= count_filter) { | 937 | if (line->count >= count_filter) { |
961 | show_lines(item_queue, item_queue_count); | 938 | show_lines(line_queue, line_queue_count); |
962 | item_queue_count = 0; | 939 | line_queue_count = 0; |
963 | item_queue = NULL; | 940 | line_queue = NULL; |
964 | } else if (item_queue_count > TRACE_COUNT) { | 941 | } else if (line_queue_count > TRACE_COUNT) { |
965 | item_queue = g_list_next(item_queue); | 942 | line_queue = line_queue->next; |
966 | item_queue_count --; | 943 | line_queue_count --; |
967 | } | 944 | } |
968 | 945 | ||
969 | line->count = 0; | 946 | line->count = 0; |
970 | displayed++; | 947 | displayed++; |
971 | if (displayed > 300) | 948 | if (displayed > 300) |
972 | break; | 949 | break; |
973 | item = g_list_next(item); | 950 | line = line->next; |
974 | } | 951 | } |
975 | } | 952 | } |
976 | 953 | ||
@@ -1201,6 +1178,10 @@ int main(int argc, char *argv[]) | |||
1201 | if (tid != -1 || profile_cpu != -1) | 1178 | if (tid != -1 || profile_cpu != -1) |
1202 | nr_cpus = 1; | 1179 | nr_cpus = 1; |
1203 | 1180 | ||
1181 | parse_symbols(); | ||
1182 | if (vmlinux && sym_filter_entry) | ||
1183 | parse_vmlinux(vmlinux); | ||
1184 | |||
1204 | for (i = 0; i < nr_cpus; i++) { | 1185 | for (i = 0; i < nr_cpus; i++) { |
1205 | group_fd = -1; | 1186 | group_fd = -1; |
1206 | for (counter = 0; counter < nr_counters; counter++) { | 1187 | for (counter = 0; counter < nr_counters; counter++) { |
@@ -1216,15 +1197,16 @@ int main(int argc, char *argv[]) | |||
1216 | hw_event.nmi = nmi; | 1197 | hw_event.nmi = nmi; |
1217 | 1198 | ||
1218 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); | 1199 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); |
1219 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1220 | if (fd[i][counter] < 0) { | 1200 | if (fd[i][counter] < 0) { |
1201 | int err = errno; | ||
1221 | printf("kerneltop error: syscall returned with %d (%s)\n", | 1202 | printf("kerneltop error: syscall returned with %d (%s)\n", |
1222 | fd[i][counter], strerror(-fd[i][counter])); | 1203 | fd[i][counter], strerror(err)); |
1223 | if (fd[i][counter] == -1) | 1204 | if (err == EPERM) |
1224 | printf("Are you root?\n"); | 1205 | printf("Are you root?\n"); |
1225 | exit(-1); | 1206 | exit(-1); |
1226 | } | 1207 | } |
1227 | assert(fd[i][counter] >= 0); | 1208 | assert(fd[i][counter] >= 0); |
1209 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1228 | 1210 | ||
1229 | /* | 1211 | /* |
1230 | * First counter acts as the group leader: | 1212 | * First counter acts as the group leader: |
@@ -1248,10 +1230,6 @@ int main(int argc, char *argv[]) | |||
1248 | } | 1230 | } |
1249 | } | 1231 | } |
1250 | 1232 | ||
1251 | parse_symbols(); | ||
1252 | if (vmlinux && sym_filter_entry) | ||
1253 | parse_vmlinux(vmlinux); | ||
1254 | |||
1255 | printf("KernelTop refresh period: %d seconds\n", delay_secs); | 1233 | printf("KernelTop refresh period: %d seconds\n", delay_secs); |
1256 | last_refresh = time(NULL); | 1234 | last_refresh = time(NULL); |
1257 | 1235 | ||