diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/perf_counter/kerneltop.c | 530 | ||||
-rw-r--r-- | Documentation/perf_counter/perfcounters.h | 132 |
2 files changed, 432 insertions, 230 deletions
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index cba5cb0a97f9..9db65a4f1042 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | Build with: | 4 | Build with: |
5 | 5 | ||
6 | cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c | 6 | cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c |
7 | 7 | ||
8 | Sample output: | 8 | Sample output: |
9 | 9 | ||
@@ -26,18 +26,40 @@ | |||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | 26 | 12.00 - ffffffff804ffb7f : __ip_local_out |
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | 27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish |
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | 28 | 8.54 - ffffffff805001a3 : ip_queue_xmit |
29 | */ | ||
29 | 30 | ||
30 | Started by Ingo Molnar <mingo@redhat.com> | 31 | /* |
32 | * perfstat: /usr/bin/time -alike performance counter statistics utility | ||
31 | 33 | ||
32 | Improvements and fixes by: | 34 | It summarizes the counter events of all tasks (and child tasks), |
35 | covering all CPUs that the command (or workload) executes on. | ||
36 | It only counts the per-task events of the workload started, | ||
37 | independent of how many other tasks run on those CPUs. | ||
33 | 38 | ||
34 | Arjan van de Ven <arjan@linux.intel.com> | 39 | Sample output: |
35 | Yanmin Zhang <yanmin.zhang@intel.com> | ||
36 | Mike Galbraith <efault@gmx.de> | ||
37 | 40 | ||
38 | Released under the GPL v2. (and only v2, not any later version) | 41 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null |
39 | 42 | ||
43 | Performance counter stats for 'ls': | ||
44 | |||
45 | 163516953 instructions | ||
46 | 2295 cache-misses | ||
47 | 2855182 branch-misses | ||
40 | */ | 48 | */ |
49 | |||
50 | /* | ||
51 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
52 | * | ||
53 | * Improvements and fixes by: | ||
54 | * | ||
55 | * Arjan van de Ven <arjan@linux.intel.com> | ||
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
57 | * Wu Fengguang <fengguang.wu@intel.com> | ||
58 | * Mike Galbraith <efault@gmx.de> | ||
59 | * | ||
60 | * Released under the GPL v2. (and only v2, not any later version) | ||
61 | */ | ||
62 | |||
41 | #define _GNU_SOURCE | 63 | #define _GNU_SOURCE |
42 | #include <sys/types.h> | 64 | #include <sys/types.h> |
43 | #include <sys/stat.h> | 65 | #include <sys/stat.h> |
@@ -67,18 +89,22 @@ | |||
67 | 89 | ||
68 | #include "perfcounters.h" | 90 | #include "perfcounters.h" |
69 | 91 | ||
70 | const unsigned int default_count[] = { | ||
71 | 1000000, | ||
72 | 1000000, | ||
73 | 10000, | ||
74 | 10000, | ||
75 | 1000000, | ||
76 | 10000, | ||
77 | }; | ||
78 | 92 | ||
79 | static __u64 count_filter = 100; | 93 | #define MAX_COUNTERS 64 |
94 | #define MAX_NR_CPUS 256 | ||
95 | |||
96 | #define DEF_PERFSTAT_EVENTS { -2, -5, -4, -3, 0, 1, 2, 3} | ||
97 | |||
98 | static int run_perfstat = 0; | ||
99 | static int system_wide = 0; | ||
80 | 100 | ||
101 | static int nr_counters = 0; | ||
102 | static long event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS; | ||
103 | static int event_raw[MAX_COUNTERS]; | ||
81 | static int event_count[MAX_COUNTERS]; | 104 | static int event_count[MAX_COUNTERS]; |
105 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
106 | |||
107 | static __u64 count_filter = 100; | ||
82 | 108 | ||
83 | static int tid = -1; | 109 | static int tid = -1; |
84 | static int profile_cpu = -1; | 110 | static int profile_cpu = -1; |
@@ -96,125 +122,335 @@ static int delay_secs = 2; | |||
96 | static int zero; | 122 | static int zero; |
97 | static int dump_symtab; | 123 | static int dump_symtab; |
98 | 124 | ||
125 | static GList *lines; | ||
126 | |||
99 | struct source_line { | 127 | struct source_line { |
100 | uint64_t EIP; | 128 | uint64_t EIP; |
101 | unsigned long count; | 129 | unsigned long count; |
102 | char *line; | 130 | char *line; |
103 | }; | 131 | }; |
104 | 132 | ||
105 | static GList *lines; | 133 | |
134 | const unsigned int default_count[] = { | ||
135 | 1000000, | ||
136 | 1000000, | ||
137 | 10000, | ||
138 | 10000, | ||
139 | 1000000, | ||
140 | 10000, | ||
141 | }; | ||
142 | |||
143 | static char *hw_event_names[] = { | ||
144 | "CPU cycles", | ||
145 | "instructions", | ||
146 | "cache references", | ||
147 | "cache misses", | ||
148 | "branches", | ||
149 | "branch misses", | ||
150 | "bus cycles", | ||
151 | }; | ||
152 | |||
153 | static char *sw_event_names[] = { | ||
154 | "cpu clock ticks", | ||
155 | "task clock ticks", | ||
156 | "pagefaults", | ||
157 | "context switches", | ||
158 | "CPU migrations", | ||
159 | }; | ||
160 | |||
161 | struct event_symbol { | ||
162 | int event; | ||
163 | char *symbol; | ||
164 | }; | ||
165 | |||
166 | static struct event_symbol event_symbols[] = { | ||
167 | {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, | ||
168 | {PERF_COUNT_CPU_CYCLES, "cycles", }, | ||
169 | {PERF_COUNT_INSTRUCTIONS, "instructions", }, | ||
170 | {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, | ||
171 | {PERF_COUNT_CACHE_MISSES, "cache-misses", }, | ||
172 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, | ||
173 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, | ||
174 | {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, | ||
175 | {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, | ||
176 | {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, | ||
177 | {PERF_COUNT_CPU_CLOCK, "ticks", }, | ||
178 | {PERF_COUNT_TASK_CLOCK, "task-ticks", }, | ||
179 | {PERF_COUNT_PAGE_FAULTS, "page-faults", }, | ||
180 | {PERF_COUNT_PAGE_FAULTS, "faults", }, | ||
181 | {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, | ||
182 | {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, | ||
183 | {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, | ||
184 | {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, | ||
185 | }; | ||
186 | |||
187 | static void display_events_help(void) | ||
188 | { | ||
189 | unsigned int i; | ||
190 | int e; | ||
191 | |||
192 | printf( | ||
193 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
194 | |||
195 | for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { | ||
196 | if (e != event_symbols[i].event) { | ||
197 | e = event_symbols[i].event; | ||
198 | printf( | ||
199 | "\n %2d: %-20s", e, event_symbols[i].symbol); | ||
200 | } else | ||
201 | printf(" %s", event_symbols[i].symbol); | ||
202 | } | ||
203 | |||
204 | printf("\n" | ||
205 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | ||
206 | } | ||
207 | |||
208 | static void display_perfstat_help(void) | ||
209 | { | ||
210 | printf( | ||
211 | "Usage: perfstat [<events...>] <cmd...>\n\n" | ||
212 | "PerfStat Options (up to %d event types can be specified):\n\n", | ||
213 | MAX_COUNTERS); | ||
214 | |||
215 | display_events_help(); | ||
216 | |||
217 | printf( | ||
218 | " -a # system-wide collection\n"); | ||
219 | exit(0); | ||
220 | } | ||
106 | 221 | ||
107 | static void display_help(void) | 222 | static void display_help(void) |
108 | { | 223 | { |
224 | if (run_perfstat) | ||
225 | return display_perfstat_help(); | ||
226 | |||
109 | printf( | 227 | printf( |
110 | "Usage: kerneltop [<options>]\n\n" | 228 | "Usage: kerneltop [<options>]\n" |
229 | " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n" | ||
111 | "KernelTop Options (up to %d event types can be specified at once):\n\n", | 230 | "KernelTop Options (up to %d event types can be specified at once):\n\n", |
112 | MAX_COUNTERS); | 231 | MAX_COUNTERS); |
232 | |||
233 | display_events_help(); | ||
234 | |||
113 | printf( | 235 | printf( |
114 | " -e EID --event=EID # event type ID [default: 0]\n" | 236 | " -S --stat # perfstat COMMAND\n" |
115 | " 0: CPU cycles\n" | 237 | " -a # system-wide collection (for perfstat)\n\n" |
116 | " 1: instructions\n" | ||
117 | " 2: cache accesses\n" | ||
118 | " 3: cache misses\n" | ||
119 | " 4: branch instructions\n" | ||
120 | " 5: branch prediction misses\n" | ||
121 | " 6: bus cycles\n\n" | ||
122 | " rNNN: raw PMU events (eventsel+umask)\n\n" | ||
123 | " -c CNT --count=CNT # event period to sample\n\n" | 238 | " -c CNT --count=CNT # event period to sample\n\n" |
124 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | 239 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" |
125 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | 240 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" |
126 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" | 241 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" |
127 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" | 242 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" |
128 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" | 243 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" |
129 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use:\n" | 244 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n" |
130 | " -z --zero # zero counts after display\n" | 245 | " -z --zero # zero counts after display\n" |
131 | " -D --dump_symtab # dump symbol table to stderr on startup\n" | 246 | " -D --dump_symtab # dump symbol table to stderr on startup\n" |
132 | "\n"); | 247 | ); |
133 | 248 | ||
134 | exit(0); | 249 | exit(0); |
135 | } | 250 | } |
136 | 251 | ||
137 | static void process_options(int argc, char *argv[]) | 252 | static int type_valid(int type) |
138 | { | 253 | { |
139 | int error = 0, counter; | 254 | if (type >= PERF_HW_EVENTS_MAX) |
255 | return 0; | ||
256 | if (type <= PERF_SW_EVENTS_MIN) | ||
257 | return 0; | ||
140 | 258 | ||
141 | for (;;) { | 259 | return 1; |
142 | int option_index = 0; | 260 | } |
143 | /** Options for getopt */ | ||
144 | static struct option long_options[] = { | ||
145 | {"count", required_argument, NULL, 'c'}, | ||
146 | {"cpu", required_argument, NULL, 'C'}, | ||
147 | {"delay", required_argument, NULL, 'd'}, | ||
148 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
149 | {"event", required_argument, NULL, 'e'}, | ||
150 | {"filter", required_argument, NULL, 'f'}, | ||
151 | {"group", required_argument, NULL, 'g'}, | ||
152 | {"help", no_argument, NULL, 'h'}, | ||
153 | {"nmi", required_argument, NULL, 'n'}, | ||
154 | {"pid", required_argument, NULL, 'p'}, | ||
155 | {"vmlinux", required_argument, NULL, 'x'}, | ||
156 | {"symbol", required_argument, NULL, 's'}, | ||
157 | {"zero", no_argument, NULL, 'z'}, | ||
158 | {NULL, 0, NULL, 0 } | ||
159 | }; | ||
160 | int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z", | ||
161 | long_options, &option_index); | ||
162 | if (c == -1) | ||
163 | break; | ||
164 | 261 | ||
165 | switch (c) { | 262 | static char *event_name(int ctr) |
166 | case 'c': | 263 | { |
167 | event_count[nr_counters] = atoi(optarg); break; | 264 | int type = event_id[ctr]; |
168 | case 'C': | 265 | static char buf[32]; |
169 | /* CPU and PID are mutually exclusive */ | ||
170 | if (tid != -1) { | ||
171 | printf("WARNING: CPU switch overriding PID\n"); | ||
172 | sleep(1); | ||
173 | tid = -1; | ||
174 | } | ||
175 | profile_cpu = atoi(optarg); break; | ||
176 | case 'd': delay_secs = atoi(optarg); break; | ||
177 | case 'D': dump_symtab = 1; break; | ||
178 | 266 | ||
179 | case 'e': error = parse_events(optarg); break; | 267 | if (event_raw[ctr]) { |
268 | sprintf(buf, "raw 0x%x", type); | ||
269 | return buf; | ||
270 | } | ||
271 | if (!type_valid(type)) | ||
272 | return "unknown"; | ||
180 | 273 | ||
181 | case 'f': count_filter = atoi(optarg); break; | 274 | if (type >= 0) |
182 | case 'g': group = atoi(optarg); break; | 275 | return hw_event_names[type]; |
183 | case 'h': display_help(); break; | 276 | |
184 | case 'n': nmi = atoi(optarg); break; | 277 | return sw_event_names[-type-1]; |
185 | case 'p': | 278 | } |
186 | /* CPU and PID are mutually exclusive */ | 279 | |
187 | if (profile_cpu != -1) { | 280 | /* |
188 | printf("WARNING: PID switch overriding CPU\n"); | 281 | * Each event can have multiple symbolic names. |
189 | sleep(1); | 282 | * Symbolic names are (almost) exactly matched. |
190 | profile_cpu = -1; | 283 | */ |
284 | static int match_event_symbols(char *str) | ||
285 | { | ||
286 | unsigned int i; | ||
287 | |||
288 | if (isdigit(str[0]) || str[0] == '-') | ||
289 | return atoi(str); | ||
290 | |||
291 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
292 | if (!strncmp(str, event_symbols[i].symbol, | ||
293 | strlen(event_symbols[i].symbol))) | ||
294 | return event_symbols[i].event; | ||
295 | } | ||
296 | |||
297 | return PERF_HW_EVENTS_MAX; | ||
298 | } | ||
299 | |||
300 | static int parse_events(char *str) | ||
301 | { | ||
302 | int type, raw; | ||
303 | |||
304 | again: | ||
305 | if (nr_counters == MAX_COUNTERS) | ||
306 | return -1; | ||
307 | |||
308 | raw = 0; | ||
309 | if (*str == 'r') { | ||
310 | raw = 1; | ||
311 | ++str; | ||
312 | type = strtol(str, NULL, 16); | ||
313 | } else { | ||
314 | type = match_event_symbols(str); | ||
315 | if (!type_valid(type)) | ||
316 | return -1; | ||
317 | } | ||
318 | |||
319 | event_id[nr_counters] = type; | ||
320 | event_raw[nr_counters] = raw; | ||
321 | nr_counters++; | ||
322 | |||
323 | str = strstr(str, ","); | ||
324 | if (str) { | ||
325 | str++; | ||
326 | goto again; | ||
327 | } | ||
328 | |||
329 | return 0; | ||
330 | } | ||
331 | |||
332 | |||
333 | /* | ||
334 | * perfstat | ||
335 | */ | ||
336 | |||
337 | char fault_here[1000000]; | ||
338 | |||
339 | static void create_perfstat_counter(int counter) | ||
340 | { | ||
341 | struct perf_counter_hw_event hw_event; | ||
342 | |||
343 | memset(&hw_event, 0, sizeof(hw_event)); | ||
344 | hw_event.type = event_id[counter]; | ||
345 | hw_event.raw = event_raw[counter]; | ||
346 | hw_event.record_type = PERF_RECORD_SIMPLE; | ||
347 | hw_event.nmi = 0; | ||
348 | |||
349 | if (system_wide) { | ||
350 | int cpu; | ||
351 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
352 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | ||
353 | if (fd[cpu][counter] < 0) { | ||
354 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
355 | fd[cpu][counter], strerror(errno)); | ||
356 | exit(-1); | ||
191 | } | 357 | } |
192 | tid = atoi(optarg); break; | 358 | } |
193 | case 's': sym_filter = strdup(optarg); break; | 359 | } else { |
194 | case 'x': vmlinux = strdup(optarg); break; | 360 | hw_event.inherit = 1; |
195 | case 'z': zero = 1; break; | 361 | hw_event.disabled = 1; |
196 | default: error = 1; break; | 362 | |
363 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | ||
364 | if (fd[0][counter] < 0) { | ||
365 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
366 | fd[0][counter], strerror(errno)); | ||
367 | exit(-1); | ||
197 | } | 368 | } |
198 | } | 369 | } |
199 | if (error) | 370 | } |
200 | display_help(); | ||
201 | 371 | ||
202 | if (!nr_counters) { | 372 | int do_perfstat(int argc, char *argv[]) |
203 | nr_counters = 1; | 373 | { |
204 | event_id[0] = 0; | 374 | unsigned long long t0, t1; |
375 | int counter; | ||
376 | ssize_t res; | ||
377 | int status; | ||
378 | int pid; | ||
379 | |||
380 | if (!system_wide) | ||
381 | nr_cpus = 1; | ||
382 | |||
383 | for (counter = 0; counter < nr_counters; counter++) | ||
384 | create_perfstat_counter(counter); | ||
385 | |||
386 | argc -= optind; | ||
387 | argv += optind; | ||
388 | |||
389 | /* | ||
390 | * Enable counters and exec the command: | ||
391 | */ | ||
392 | t0 = rdclock(); | ||
393 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
394 | |||
395 | if ((pid = fork()) < 0) | ||
396 | perror("failed to fork"); | ||
397 | if (!pid) { | ||
398 | if (execvp(argv[0], argv)) { | ||
399 | perror(argv[0]); | ||
400 | exit(-1); | ||
401 | } | ||
205 | } | 402 | } |
403 | while (wait(&status) >= 0) | ||
404 | ; | ||
405 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
406 | t1 = rdclock(); | ||
407 | |||
408 | fflush(stdout); | ||
409 | |||
410 | fprintf(stderr, "\n"); | ||
411 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | ||
412 | argv[0]); | ||
413 | fprintf(stderr, "\n"); | ||
206 | 414 | ||
207 | for (counter = 0; counter < nr_counters; counter++) { | 415 | for (counter = 0; counter < nr_counters; counter++) { |
208 | if (event_count[counter]) | 416 | int cpu; |
209 | continue; | 417 | __u64 count, single_count; |
418 | |||
419 | count = 0; | ||
420 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
421 | res = read(fd[cpu][counter], | ||
422 | (char *) &single_count, sizeof(single_count)); | ||
423 | assert(res == sizeof(single_count)); | ||
424 | count += single_count; | ||
425 | } | ||
210 | 426 | ||
211 | if (event_id[counter] < PERF_HW_EVENTS_MAX) | 427 | if (!event_raw[counter] && |
212 | event_count[counter] = default_count[event_id[counter]]; | 428 | (event_id[counter] == PERF_COUNT_CPU_CLOCK || |
213 | else | 429 | event_id[counter] == PERF_COUNT_TASK_CLOCK)) { |
214 | event_count[counter] = 100000; | 430 | |
431 | double msecs = (double)count / 1000000; | ||
432 | |||
433 | fprintf(stderr, " %14.6f %-20s (msecs)\n", | ||
434 | msecs, event_name(counter)); | ||
435 | } else { | ||
436 | fprintf(stderr, " %14Ld %-20s (events)\n", | ||
437 | count, event_name(counter)); | ||
438 | } | ||
439 | if (!counter) | ||
440 | fprintf(stderr, "\n"); | ||
215 | } | 441 | } |
442 | fprintf(stderr, "\n"); | ||
443 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
444 | (double)(t1-t0)/1e6); | ||
445 | fprintf(stderr, "\n"); | ||
446 | |||
447 | return 0; | ||
216 | } | 448 | } |
217 | 449 | ||
450 | /* | ||
451 | * Symbols | ||
452 | */ | ||
453 | |||
218 | static uint64_t min_ip; | 454 | static uint64_t min_ip; |
219 | static uint64_t max_ip = -1ll; | 455 | static uint64_t max_ip = -1ll; |
220 | 456 | ||
@@ -507,6 +743,9 @@ static void parse_symbols(void) | |||
507 | } | 743 | } |
508 | } | 744 | } |
509 | 745 | ||
746 | /* | ||
747 | * Source lines | ||
748 | */ | ||
510 | 749 | ||
511 | static void parse_vmlinux(char *filename) | 750 | static void parse_vmlinux(char *filename) |
512 | { | 751 | { |
@@ -527,7 +766,7 @@ static void parse_vmlinux(char *filename) | |||
527 | char *c; | 766 | char *c; |
528 | 767 | ||
529 | src = malloc(sizeof(struct source_line)); | 768 | src = malloc(sizeof(struct source_line)); |
530 | assert(src != NULL); | 769 | assert(src != NULL); |
531 | memset(src, 0, sizeof(struct source_line)); | 770 | memset(src, 0, sizeof(struct source_line)); |
532 | 771 | ||
533 | if (getline(&src->line, &dummy, file) < 0) | 772 | if (getline(&src->line, &dummy, file) < 0) |
@@ -706,11 +945,100 @@ static void process_event(uint64_t ip, int counter) | |||
706 | record_ip(ip, counter); | 945 | record_ip(ip, counter); |
707 | } | 946 | } |
708 | 947 | ||
948 | static void process_options(int argc, char *argv[]) | ||
949 | { | ||
950 | int error = 0, counter; | ||
951 | |||
952 | if (strstr(argv[0], "perfstat")) | ||
953 | run_perfstat = 1; | ||
954 | |||
955 | for (;;) { | ||
956 | int option_index = 0; | ||
957 | /** Options for getopt */ | ||
958 | static struct option long_options[] = { | ||
959 | {"count", required_argument, NULL, 'c'}, | ||
960 | {"cpu", required_argument, NULL, 'C'}, | ||
961 | {"delay", required_argument, NULL, 'd'}, | ||
962 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
963 | {"event", required_argument, NULL, 'e'}, | ||
964 | {"filter", required_argument, NULL, 'f'}, | ||
965 | {"group", required_argument, NULL, 'g'}, | ||
966 | {"help", no_argument, NULL, 'h'}, | ||
967 | {"nmi", required_argument, NULL, 'n'}, | ||
968 | {"pid", required_argument, NULL, 'p'}, | ||
969 | {"vmlinux", required_argument, NULL, 'x'}, | ||
970 | {"symbol", required_argument, NULL, 's'}, | ||
971 | {"stat", no_argument, NULL, 'S'}, | ||
972 | {"zero", no_argument, NULL, 'z'}, | ||
973 | {NULL, 0, NULL, 0 } | ||
974 | }; | ||
975 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z", | ||
976 | long_options, &option_index); | ||
977 | if (c == -1) | ||
978 | break; | ||
979 | |||
980 | switch (c) { | ||
981 | case 'a': system_wide = 1; break; | ||
982 | case 'c': event_count[nr_counters] = atoi(optarg); break; | ||
983 | case 'C': | ||
984 | /* CPU and PID are mutually exclusive */ | ||
985 | if (tid != -1) { | ||
986 | printf("WARNING: CPU switch overriding PID\n"); | ||
987 | sleep(1); | ||
988 | tid = -1; | ||
989 | } | ||
990 | profile_cpu = atoi(optarg); break; | ||
991 | case 'd': delay_secs = atoi(optarg); break; | ||
992 | case 'D': dump_symtab = 1; break; | ||
993 | |||
994 | case 'e': error = parse_events(optarg); break; | ||
995 | |||
996 | case 'f': count_filter = atoi(optarg); break; | ||
997 | case 'g': group = atoi(optarg); break; | ||
998 | case 'h': display_help(); break; | ||
999 | case 'n': nmi = atoi(optarg); break; | ||
1000 | case 'p': | ||
1001 | /* CPU and PID are mutually exclusive */ | ||
1002 | if (profile_cpu != -1) { | ||
1003 | printf("WARNING: PID switch overriding CPU\n"); | ||
1004 | sleep(1); | ||
1005 | profile_cpu = -1; | ||
1006 | } | ||
1007 | tid = atoi(optarg); break; | ||
1008 | case 's': sym_filter = strdup(optarg); break; | ||
1009 | case 'S': run_perfstat = 1; break; | ||
1010 | case 'x': vmlinux = strdup(optarg); break; | ||
1011 | case 'z': zero = 1; break; | ||
1012 | default: error = 1; break; | ||
1013 | } | ||
1014 | } | ||
1015 | if (error) | ||
1016 | display_help(); | ||
1017 | |||
1018 | if (!nr_counters) { | ||
1019 | if (run_perfstat) | ||
1020 | nr_counters = 8; | ||
1021 | else { | ||
1022 | nr_counters = 1; | ||
1023 | event_id[0] = 0; | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | for (counter = 0; counter < nr_counters; counter++) { | ||
1028 | if (event_count[counter]) | ||
1029 | continue; | ||
1030 | |||
1031 | if (event_id[counter] < PERF_HW_EVENTS_MAX) | ||
1032 | event_count[counter] = default_count[event_id[counter]]; | ||
1033 | else | ||
1034 | event_count[counter] = 100000; | ||
1035 | } | ||
1036 | } | ||
1037 | |||
709 | int main(int argc, char *argv[]) | 1038 | int main(int argc, char *argv[]) |
710 | { | 1039 | { |
711 | struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; | 1040 | struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; |
712 | struct perf_counter_hw_event hw_event; | 1041 | struct perf_counter_hw_event hw_event; |
713 | int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
714 | int i, counter, group_fd; | 1042 | int i, counter, group_fd; |
715 | unsigned int cpu; | 1043 | unsigned int cpu; |
716 | uint64_t ip; | 1044 | uint64_t ip; |
@@ -720,11 +1048,15 @@ int main(int argc, char *argv[]) | |||
720 | process_options(argc, argv); | 1048 | process_options(argc, argv); |
721 | 1049 | ||
722 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | 1050 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
1051 | assert(nr_cpus <= MAX_NR_CPUS); | ||
1052 | assert(nr_cpus >= 0); | ||
1053 | |||
1054 | if (run_perfstat) | ||
1055 | return do_perfstat(argc, argv); | ||
1056 | |||
723 | if (tid != -1 || profile_cpu != -1) | 1057 | if (tid != -1 || profile_cpu != -1) |
724 | nr_cpus = 1; | 1058 | nr_cpus = 1; |
725 | 1059 | ||
726 | assert(nr_cpus <= MAX_NR_CPUS); | ||
727 | |||
728 | for (i = 0; i < nr_cpus; i++) { | 1060 | for (i = 0; i < nr_cpus; i++) { |
729 | group_fd = -1; | 1061 | group_fd = -1; |
730 | for (counter = 0; counter < nr_counters; counter++) { | 1062 | for (counter = 0; counter < nr_counters; counter++) { |
diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h index 99a90d833e12..32e24b9154ab 100644 --- a/Documentation/perf_counter/perfcounters.h +++ b/Documentation/perf_counter/perfcounters.h | |||
@@ -11,9 +11,6 @@ | |||
11 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | 11 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 |
12 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | 12 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 |
13 | 13 | ||
14 | #define MAX_COUNTERS 64 | ||
15 | #define MAX_NR_CPUS 256 | ||
16 | |||
17 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 14 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
18 | 15 | ||
19 | #define rdclock() \ | 16 | #define rdclock() \ |
@@ -110,6 +107,7 @@ struct perf_counter_hw_event { | |||
110 | __u64 __reserved_3; | 107 | __u64 __reserved_3; |
111 | }; | 108 | }; |
112 | 109 | ||
110 | |||
113 | #ifdef __x86_64__ | 111 | #ifdef __x86_64__ |
114 | # define __NR_perf_counter_open 295 | 112 | # define __NR_perf_counter_open 295 |
115 | #endif | 113 | #endif |
@@ -142,131 +140,3 @@ asmlinkage int sys_perf_counter_open( | |||
142 | #endif | 140 | #endif |
143 | return ret; | 141 | return ret; |
144 | } | 142 | } |
145 | |||
146 | static int nr_counters = 0; | ||
147 | static long event_id[MAX_COUNTERS] = { -2, -5, -4, -3, 0, 1, 2, 3}; | ||
148 | static int event_raw[MAX_COUNTERS]; | ||
149 | |||
150 | static char *hw_event_names [] = { | ||
151 | "CPU cycles", | ||
152 | "instructions", | ||
153 | "cache references", | ||
154 | "cache misses", | ||
155 | "branches", | ||
156 | "branch misses", | ||
157 | "bus cycles", | ||
158 | }; | ||
159 | |||
160 | static char *sw_event_names [] = { | ||
161 | "cpu clock ticks", | ||
162 | "task clock ticks", | ||
163 | "pagefaults", | ||
164 | "context switches", | ||
165 | "CPU migrations", | ||
166 | }; | ||
167 | |||
168 | struct event_symbol { | ||
169 | int event; | ||
170 | char *symbol; | ||
171 | }; | ||
172 | |||
173 | static struct event_symbol event_symbols [] = { | ||
174 | {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, | ||
175 | {PERF_COUNT_CPU_CYCLES, "cycles", }, | ||
176 | {PERF_COUNT_INSTRUCTIONS, "instructions", }, | ||
177 | {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, | ||
178 | {PERF_COUNT_CACHE_MISSES, "cache-misses", }, | ||
179 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, | ||
180 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, | ||
181 | {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, | ||
182 | {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, | ||
183 | {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, | ||
184 | {PERF_COUNT_CPU_CLOCK, "ticks", }, | ||
185 | {PERF_COUNT_TASK_CLOCK, "task-ticks", }, | ||
186 | {PERF_COUNT_PAGE_FAULTS, "page-faults", }, | ||
187 | {PERF_COUNT_PAGE_FAULTS, "faults", }, | ||
188 | {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, | ||
189 | {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, | ||
190 | {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, | ||
191 | {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, | ||
192 | }; | ||
193 | |||
194 | static int type_valid(int type) | ||
195 | { | ||
196 | if (type >= PERF_HW_EVENTS_MAX) | ||
197 | return 0; | ||
198 | if (type <= PERF_SW_EVENTS_MIN) | ||
199 | return 0; | ||
200 | |||
201 | return 1; | ||
202 | } | ||
203 | |||
204 | static char *event_name(int ctr) | ||
205 | { | ||
206 | int type = event_id[ctr]; | ||
207 | static char buf[32]; | ||
208 | |||
209 | if (event_raw[ctr]) { | ||
210 | sprintf(buf, "raw 0x%x", type); | ||
211 | return buf; | ||
212 | } | ||
213 | if (!type_valid(type)) | ||
214 | return "unknown"; | ||
215 | |||
216 | if (type >= 0) | ||
217 | return hw_event_names[type]; | ||
218 | |||
219 | return sw_event_names[-type-1]; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Each event can have multiple symbolic names. | ||
224 | * Symbolic names are (almost) exactly matched. | ||
225 | */ | ||
226 | static int match_event_symbols(char *str) | ||
227 | { | ||
228 | unsigned int i; | ||
229 | |||
230 | if (isdigit(str[0]) || str[0] == '-') | ||
231 | return atoi(str); | ||
232 | |||
233 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
234 | if (!strncmp(str, event_symbols[i].symbol, | ||
235 | strlen(event_symbols[i].symbol))) | ||
236 | return event_symbols[i].event; | ||
237 | } | ||
238 | |||
239 | return PERF_HW_EVENTS_MAX; | ||
240 | } | ||
241 | |||
242 | static int parse_events(char *str) | ||
243 | { | ||
244 | int type, raw; | ||
245 | |||
246 | again: | ||
247 | if (nr_counters == MAX_COUNTERS) | ||
248 | return -1; | ||
249 | |||
250 | raw = 0; | ||
251 | if (*str == 'r') { | ||
252 | raw = 1; | ||
253 | ++str; | ||
254 | type = strtol(str, NULL, 16); | ||
255 | } else { | ||
256 | type = match_event_symbols(str); | ||
257 | if (!type_valid(type)) | ||
258 | return -1; | ||
259 | } | ||
260 | |||
261 | event_id[nr_counters] = type; | ||
262 | event_raw[nr_counters] = raw; | ||
263 | nr_counters++; | ||
264 | |||
265 | str = strstr(str, ","); | ||
266 | if (str) { | ||
267 | str++; | ||
268 | goto again; | ||
269 | } | ||
270 | |||
271 | return 0; | ||
272 | } | ||