diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/perf_counter/Makefile | 12 | ||||
-rw-r--r-- | Documentation/perf_counter/kerneltop.c | 956 | ||||
-rw-r--r-- | Documentation/perf_counter/perfstat.c | 521 |
3 files changed, 1489 insertions, 0 deletions
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile new file mode 100644 index 000000000000..b45749753fcb --- /dev/null +++ b/Documentation/perf_counter/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | BINS = kerneltop perfstat | ||
2 | |||
3 | all: $(BINS) | ||
4 | |||
5 | kerneltop: kerneltop.c perfcounters.h | ||
6 | cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $< | ||
7 | |||
8 | perfstat: kerneltop | ||
9 | ln -sf kerneltop perfstat | ||
10 | |||
11 | clean: | ||
12 | rm $(BINS) | ||
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c new file mode 100644 index 000000000000..cf0e30bab5d5 --- /dev/null +++ b/Documentation/perf_counter/kerneltop.c | |||
@@ -0,0 +1,956 @@ | |||
1 | /* | ||
2 | * kerneltop.c: show top kernel functions - performance counters showcase | ||
3 | |||
4 | Build with: | ||
5 | |||
6 | cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c | ||
7 | |||
8 | Sample output: | ||
9 | |||
10 | ------------------------------------------------------------------------------ | ||
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | ||
12 | ------------------------------------------------------------------------------ | ||
13 | |||
14 | weight RIP kernel function | ||
15 | ______ ________________ _______________ | ||
16 | |||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | ||
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | ||
19 | 31.26 - ffffffff804ce808 : skb_push | ||
20 | 22.43 - ffffffff80510004 : tcp_established_options | ||
21 | 19.00 - ffffffff8027d250 : find_get_page | ||
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | ||
23 | 15.20 - ffffffff804d8baa : dst_release | ||
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | ||
25 | 14.00 - ffffffff802217d5 : read_hpet | ||
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | ||
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | ||
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | ||
29 | |||
30 | Started by Ingo Molnar <mingo@redhat.com> | ||
31 | |||
32 | Improvements and fixes by: | ||
33 | |||
34 | Arjan van de Ven <arjan@linux.intel.com> | ||
35 | Yanmin Zhang <yanmin.zhang@intel.com> | ||
36 | Mike Galbraith <efault@gmx.de> | ||
37 | |||
38 | Released under the GPL v2. (and only v2, not any later version) | ||
39 | |||
40 | */ | ||
41 | #define _GNU_SOURCE | ||
42 | #include <sys/types.h> | ||
43 | #include <sys/stat.h> | ||
44 | #include <sys/time.h> | ||
45 | #include <unistd.h> | ||
46 | #include <stdint.h> | ||
47 | #include <stdlib.h> | ||
48 | #include <string.h> | ||
49 | #include <getopt.h> | ||
50 | #include <assert.h> | ||
51 | #include <fcntl.h> | ||
52 | #include <stdio.h> | ||
53 | #include <errno.h> | ||
54 | #include <ctype.h> | ||
55 | #include <time.h> | ||
56 | |||
57 | #include <glib.h> | ||
58 | |||
59 | #include <sys/syscall.h> | ||
60 | #include <sys/ioctl.h> | ||
61 | #include <sys/poll.h> | ||
62 | #include <sys/prctl.h> | ||
63 | #include <sys/wait.h> | ||
64 | #include <sys/uio.h> | ||
65 | |||
66 | #include <linux/unistd.h> | ||
67 | |||
68 | #ifdef __x86_64__ | ||
69 | # define __NR_perf_counter_open 295 | ||
70 | #endif | ||
71 | |||
72 | #ifdef __i386__ | ||
73 | # define __NR_perf_counter_open 333 | ||
74 | #endif | ||
75 | |||
76 | /* | ||
77 | * Pick up some kernel type conventions: | ||
78 | */ | ||
79 | #define __user | ||
80 | #define asmlinkage | ||
81 | |||
82 | typedef unsigned int __u32; | ||
83 | typedef unsigned long long __u64; | ||
84 | typedef long long __s64; | ||
85 | |||
86 | /* | ||
87 | * User-space ABI bits: | ||
88 | */ | ||
89 | |||
90 | /* | ||
91 | * Generalized performance counter event types, used by the hw_event.type | ||
92 | * parameter of the sys_perf_counter_open() syscall: | ||
93 | */ | ||
94 | enum hw_event_types { | ||
95 | /* | ||
96 | * Common hardware events, generalized by the kernel: | ||
97 | */ | ||
98 | PERF_COUNT_CPU_CYCLES = 0, | ||
99 | PERF_COUNT_INSTRUCTIONS = 1, | ||
100 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
101 | PERF_COUNT_CACHE_MISSES = 3, | ||
102 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
103 | PERF_COUNT_BRANCH_MISSES = 5, | ||
104 | PERF_COUNT_BUS_CYCLES = 6, | ||
105 | |||
106 | PERF_HW_EVENTS_MAX = 7, | ||
107 | |||
108 | /* | ||
109 | * Special "software" counters provided by the kernel, even if | ||
110 | * the hardware does not support performance counters. These | ||
111 | * counters measure various physical and sw events of the | ||
112 | * kernel (and allow the profiling of them as well): | ||
113 | */ | ||
114 | PERF_COUNT_CPU_CLOCK = -1, | ||
115 | PERF_COUNT_TASK_CLOCK = -2, | ||
116 | PERF_COUNT_PAGE_FAULTS = -3, | ||
117 | PERF_COUNT_CONTEXT_SWITCHES = -4, | ||
118 | PERF_COUNT_CPU_MIGRATIONS = -5, | ||
119 | |||
120 | PERF_SW_EVENTS_MIN = -6, | ||
121 | }; | ||
122 | |||
123 | /* | ||
124 | * IRQ-notification data record type: | ||
125 | */ | ||
126 | enum perf_counter_record_type { | ||
127 | PERF_RECORD_SIMPLE = 0, | ||
128 | PERF_RECORD_IRQ = 1, | ||
129 | PERF_RECORD_GROUP = 2, | ||
130 | }; | ||
131 | |||
132 | /* | ||
133 | * Hardware event to monitor via a performance monitoring counter: | ||
134 | */ | ||
135 | struct perf_counter_hw_event { | ||
136 | __s64 type; | ||
137 | |||
138 | __u64 irq_period; | ||
139 | __u64 record_type; | ||
140 | __u64 read_format; | ||
141 | |||
142 | __u64 disabled : 1, /* off by default */ | ||
143 | nmi : 1, /* NMI sampling */ | ||
144 | raw : 1, /* raw event type */ | ||
145 | inherit : 1, /* children inherit it */ | ||
146 | pinned : 1, /* must always be on PMU */ | ||
147 | exclusive : 1, /* only group on PMU */ | ||
148 | exclude_user : 1, /* don't count user */ | ||
149 | exclude_kernel : 1, /* ditto kernel */ | ||
150 | exclude_hv : 1, /* ditto hypervisor */ | ||
151 | exclude_idle : 1, /* don't count when idle */ | ||
152 | |||
153 | __reserved_1 : 54; | ||
154 | |||
155 | __u32 extra_config_len; | ||
156 | __u32 __reserved_4; | ||
157 | |||
158 | __u64 __reserved_2; | ||
159 | __u64 __reserved_3; | ||
160 | }; | ||
161 | |||
162 | /* | ||
163 | * Ioctls that can be done on a perf counter fd: | ||
164 | */ | ||
165 | #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) | ||
166 | #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) | ||
167 | |||
168 | asmlinkage int sys_perf_counter_open( | ||
169 | |||
170 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
171 | pid_t pid, | ||
172 | int cpu, | ||
173 | int group_fd, | ||
174 | unsigned long flags) | ||
175 | { | ||
176 | int ret; | ||
177 | |||
178 | ret = syscall( | ||
179 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); | ||
180 | #if defined(__x86_64__) || defined(__i386__) | ||
181 | if (ret < 0 && ret > -4096) { | ||
182 | errno = -ret; | ||
183 | ret = -1; | ||
184 | } | ||
185 | #endif | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | const char *event_types [] = { | ||
190 | "CPU cycles", | ||
191 | "instructions", | ||
192 | "cache-refs", | ||
193 | "cache-misses", | ||
194 | "branches", | ||
195 | "branch-misses", | ||
196 | "bus cycles" | ||
197 | }; | ||
198 | |||
199 | const unsigned int default_count[] = { | ||
200 | 1000000, | ||
201 | 1000000, | ||
202 | 10000, | ||
203 | 10000, | ||
204 | 1000000, | ||
205 | 10000, | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
210 | * counters in the current task. | ||
211 | */ | ||
212 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
213 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
214 | |||
215 | #define MAX_COUNTERS 8 | ||
216 | |||
217 | static int nr_counters = -1; | ||
218 | |||
219 | static __u64 count_filter = 100; | ||
220 | |||
221 | #define MAX_NR_CPUS 256 | ||
222 | |||
223 | static int event_count[MAX_COUNTERS]; | ||
224 | static unsigned long event_id[MAX_COUNTERS]; | ||
225 | static int event_raw[MAX_COUNTERS]; | ||
226 | |||
227 | static int tid = -1; | ||
228 | static int profile_cpu = -1; | ||
229 | static int nr_cpus = 0; | ||
230 | static int nmi = 1; | ||
231 | static int group = 0; | ||
232 | |||
233 | static char *vmlinux; | ||
234 | |||
235 | static char *sym_filter; | ||
236 | static unsigned long filter_start; | ||
237 | static unsigned long filter_end; | ||
238 | |||
239 | static int delay_secs = 2; | ||
240 | static int zero; | ||
241 | static int dump_symtab; | ||
242 | |||
243 | struct source_line { | ||
244 | uint64_t EIP; | ||
245 | unsigned long count; | ||
246 | char *line; | ||
247 | }; | ||
248 | |||
249 | static GList *lines; | ||
250 | |||
251 | static void display_help(void) | ||
252 | { | ||
253 | printf( | ||
254 | "Usage: kerneltop [<options>]\n\n" | ||
255 | "KernelTop Options (up to %d event types can be specified at once):\n\n", | ||
256 | MAX_COUNTERS); | ||
257 | printf( | ||
258 | " -e EID --event_id=EID # event type ID [default: 0]\n" | ||
259 | " 0: CPU cycles\n" | ||
260 | " 1: instructions\n" | ||
261 | " 2: cache accesses\n" | ||
262 | " 3: cache misses\n" | ||
263 | " 4: branch instructions\n" | ||
264 | " 5: branch prediction misses\n" | ||
265 | " 6: bus cycles\n\n" | ||
266 | " rNNN: raw PMU events (eventsel+umask)\n\n" | ||
267 | " -c CNT --count=CNT # event period to sample\n\n" | ||
268 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | ||
269 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | ||
270 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" | ||
271 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" | ||
272 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" | ||
273 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use:\n" | ||
274 | " -z --zero # zero counts after display\n" | ||
275 | " -D --dump_symtab # dump symbol table to stderr on startup\n" | ||
276 | "\n"); | ||
277 | |||
278 | exit(0); | ||
279 | } | ||
280 | |||
281 | static void process_options(int argc, char *argv[]) | ||
282 | { | ||
283 | int error = 0, counter; | ||
284 | |||
285 | for (;;) { | ||
286 | int option_index = 0; | ||
287 | /** Options for getopt */ | ||
288 | static struct option long_options[] = { | ||
289 | {"count", required_argument, NULL, 'c'}, | ||
290 | {"cpu", required_argument, NULL, 'C'}, | ||
291 | {"delay", required_argument, NULL, 'd'}, | ||
292 | {"dump_symtab", no_argument, NULL, 'D'}, | ||
293 | {"event_id", required_argument, NULL, 'e'}, | ||
294 | {"filter", required_argument, NULL, 'f'}, | ||
295 | {"group", required_argument, NULL, 'g'}, | ||
296 | {"help", no_argument, NULL, 'h'}, | ||
297 | {"nmi", required_argument, NULL, 'n'}, | ||
298 | {"pid", required_argument, NULL, 'p'}, | ||
299 | {"vmlinux", required_argument, NULL, 'x'}, | ||
300 | {"symbol", required_argument, NULL, 's'}, | ||
301 | {"zero", no_argument, NULL, 'z'}, | ||
302 | {NULL, 0, NULL, 0 } | ||
303 | }; | ||
304 | int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z", | ||
305 | long_options, &option_index); | ||
306 | if (c == -1) | ||
307 | break; | ||
308 | |||
309 | switch (c) { | ||
310 | case 'c': | ||
311 | if (nr_counters == -1) | ||
312 | nr_counters = 0; | ||
313 | event_count[nr_counters] = atoi(optarg); break; | ||
314 | case 'C': | ||
315 | /* CPU and PID are mutually exclusive */ | ||
316 | if (tid != -1) { | ||
317 | printf("WARNING: CPU switch overriding PID\n"); | ||
318 | sleep(1); | ||
319 | tid = -1; | ||
320 | } | ||
321 | profile_cpu = atoi(optarg); break; | ||
322 | case 'd': delay_secs = atoi(optarg); break; | ||
323 | case 'D': dump_symtab = 1; break; | ||
324 | |||
325 | case 'e': | ||
326 | nr_counters++; | ||
327 | if (nr_counters == MAX_COUNTERS) { | ||
328 | error = 1; | ||
329 | break; | ||
330 | } | ||
331 | if (*optarg == 'r') { | ||
332 | event_raw[nr_counters] = 1; | ||
333 | ++optarg; | ||
334 | } | ||
335 | event_id[nr_counters] = strtol(optarg, NULL, 16); | ||
336 | break; | ||
337 | |||
338 | case 'f': count_filter = atoi(optarg); break; | ||
339 | case 'g': group = atoi(optarg); break; | ||
340 | case 'h': display_help(); break; | ||
341 | case 'n': nmi = atoi(optarg); break; | ||
342 | case 'p': | ||
343 | /* CPU and PID are mutually exclusive */ | ||
344 | if (profile_cpu != -1) { | ||
345 | printf("WARNING: PID switch overriding CPU\n"); | ||
346 | sleep(1); | ||
347 | profile_cpu = -1; | ||
348 | } | ||
349 | tid = atoi(optarg); break; | ||
350 | case 's': sym_filter = strdup(optarg); break; | ||
351 | case 'x': vmlinux = strdup(optarg); break; | ||
352 | case 'z': zero = 1; break; | ||
353 | default: error = 1; break; | ||
354 | } | ||
355 | } | ||
356 | if (error) | ||
357 | display_help(); | ||
358 | |||
359 | nr_counters++; | ||
360 | if (nr_counters < 1) | ||
361 | nr_counters = 1; | ||
362 | |||
363 | for (counter = 0; counter < nr_counters; counter++) { | ||
364 | if (event_count[counter]) | ||
365 | continue; | ||
366 | |||
367 | if (event_id[counter] < PERF_HW_EVENTS_MAX) | ||
368 | event_count[counter] = default_count[event_id[counter]]; | ||
369 | else | ||
370 | event_count[counter] = 100000; | ||
371 | } | ||
372 | } | ||
373 | |||
374 | static uint64_t min_ip; | ||
375 | static uint64_t max_ip = -1ll; | ||
376 | |||
377 | struct sym_entry { | ||
378 | unsigned long long addr; | ||
379 | char *sym; | ||
380 | unsigned long count[MAX_COUNTERS]; | ||
381 | int skip; | ||
382 | GList *source; | ||
383 | }; | ||
384 | |||
385 | #define MAX_SYMS 100000 | ||
386 | |||
387 | static int sym_table_count; | ||
388 | |||
389 | struct sym_entry *sym_filter_entry; | ||
390 | |||
391 | static struct sym_entry sym_table[MAX_SYMS]; | ||
392 | |||
393 | static void show_details(struct sym_entry *sym); | ||
394 | |||
395 | /* | ||
396 | * Ordering weight: count-1 * count-1 * ... / count-n | ||
397 | */ | ||
398 | static double sym_weight(const struct sym_entry *sym) | ||
399 | { | ||
400 | double weight; | ||
401 | int counter; | ||
402 | |||
403 | weight = sym->count[0]; | ||
404 | |||
405 | for (counter = 1; counter < nr_counters-1; counter++) | ||
406 | weight *= sym->count[counter]; | ||
407 | |||
408 | weight /= (sym->count[counter] + 1); | ||
409 | |||
410 | return weight; | ||
411 | } | ||
412 | |||
413 | static int compare(const void *__sym1, const void *__sym2) | ||
414 | { | ||
415 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
416 | |||
417 | return sym_weight(sym1) < sym_weight(sym2); | ||
418 | } | ||
419 | |||
420 | static time_t last_refresh; | ||
421 | static long events; | ||
422 | static long userspace_events; | ||
423 | static const char CONSOLE_CLEAR[] = "[H[2J"; | ||
424 | |||
425 | static struct sym_entry tmp[MAX_SYMS]; | ||
426 | |||
427 | static void print_sym_table(void) | ||
428 | { | ||
429 | int i, printed; | ||
430 | int counter; | ||
431 | float events_per_sec = events/delay_secs; | ||
432 | float kevents_per_sec = (events-userspace_events)/delay_secs; | ||
433 | |||
434 | memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); | ||
435 | qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); | ||
436 | |||
437 | write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); | ||
438 | |||
439 | printf( | ||
440 | "------------------------------------------------------------------------------\n"); | ||
441 | printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ", | ||
442 | events_per_sec, | ||
443 | 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), | ||
444 | nmi ? "NMI" : "IRQ"); | ||
445 | |||
446 | if (nr_counters == 1) | ||
447 | printf("%d ", event_count[0]); | ||
448 | |||
449 | for (counter = 0; counter < nr_counters; counter++) { | ||
450 | if (counter) | ||
451 | printf("/"); | ||
452 | |||
453 | if (event_id[counter] < PERF_HW_EVENTS_MAX) | ||
454 | printf( "%s", event_types[event_id[counter]]); | ||
455 | else | ||
456 | printf( "raw:%04lx", event_id[counter]); | ||
457 | } | ||
458 | |||
459 | printf( "], "); | ||
460 | |||
461 | if (tid != -1) | ||
462 | printf(" (tid: %d", tid); | ||
463 | else | ||
464 | printf(" (all"); | ||
465 | |||
466 | if (profile_cpu != -1) | ||
467 | printf(", cpu: %d)\n", profile_cpu); | ||
468 | else { | ||
469 | if (tid != -1) | ||
470 | printf(")\n"); | ||
471 | else | ||
472 | printf(", %d CPUs)\n", nr_cpus); | ||
473 | } | ||
474 | |||
475 | printf("------------------------------------------------------------------------------\n\n"); | ||
476 | |||
477 | if (nr_counters == 1) | ||
478 | printf(" events"); | ||
479 | else | ||
480 | printf(" weight events"); | ||
481 | |||
482 | printf(" RIP kernel function\n" | ||
483 | " ______ ______ ________________ _______________\n\n" | ||
484 | ); | ||
485 | |||
486 | printed = 0; | ||
487 | for (i = 0; i < sym_table_count; i++) { | ||
488 | int count; | ||
489 | |||
490 | if (nr_counters == 1) { | ||
491 | if (printed <= 18 && | ||
492 | tmp[i].count[0] >= count_filter) { | ||
493 | printf("%19.2f - %016llx : %s\n", | ||
494 | sym_weight(tmp + i), tmp[i].addr, tmp[i].sym); | ||
495 | printed++; | ||
496 | } | ||
497 | } else { | ||
498 | if (printed <= 18 && | ||
499 | tmp[i].count[0] >= count_filter) { | ||
500 | printf("%8.1f %10ld - %016llx : %s\n", | ||
501 | sym_weight(tmp + i), | ||
502 | tmp[i].count[0], | ||
503 | tmp[i].addr, tmp[i].sym); | ||
504 | printed++; | ||
505 | } | ||
506 | } | ||
507 | /* | ||
508 | * Add decay to the counts: | ||
509 | */ | ||
510 | for (count = 0; count < nr_counters; count++) | ||
511 | sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; | ||
512 | } | ||
513 | |||
514 | if (sym_filter_entry) | ||
515 | show_details(sym_filter_entry); | ||
516 | |||
517 | last_refresh = time(NULL); | ||
518 | |||
519 | { | ||
520 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | ||
521 | |||
522 | if (poll(&stdin_poll, 1, 0) == 1) { | ||
523 | printf("key pressed - exiting.\n"); | ||
524 | exit(0); | ||
525 | } | ||
526 | } | ||
527 | } | ||
528 | |||
529 | static int read_symbol(FILE *in, struct sym_entry *s) | ||
530 | { | ||
531 | static int filter_match = 0; | ||
532 | char *sym, stype; | ||
533 | char str[500]; | ||
534 | int rc, pos; | ||
535 | |||
536 | rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); | ||
537 | if (rc == EOF) | ||
538 | return -1; | ||
539 | |||
540 | assert(rc == 3); | ||
541 | |||
542 | /* skip until end of line: */ | ||
543 | pos = strlen(str); | ||
544 | do { | ||
545 | rc = fgetc(in); | ||
546 | if (rc == '\n' || rc == EOF || pos >= 499) | ||
547 | break; | ||
548 | str[pos] = rc; | ||
549 | pos++; | ||
550 | } while (1); | ||
551 | str[pos] = 0; | ||
552 | |||
553 | sym = str; | ||
554 | |||
555 | /* Filter out known duplicates and non-text symbols. */ | ||
556 | if (!strcmp(sym, "_text")) | ||
557 | return 1; | ||
558 | if (!min_ip && !strcmp(sym, "_stext")) | ||
559 | return 1; | ||
560 | if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) | ||
561 | return 1; | ||
562 | if (stype != 'T' && stype != 't') | ||
563 | return 1; | ||
564 | if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) | ||
565 | return 1; | ||
566 | if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) | ||
567 | return 1; | ||
568 | |||
569 | s->sym = malloc(strlen(str)); | ||
570 | assert(s->sym); | ||
571 | |||
572 | strcpy((char *)s->sym, str); | ||
573 | s->skip = 0; | ||
574 | |||
575 | /* Tag events to be skipped. */ | ||
576 | if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) | ||
577 | s->skip = 1; | ||
578 | if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) | ||
579 | s->skip = 1; | ||
580 | |||
581 | if (filter_match == 1) { | ||
582 | filter_end = s->addr; | ||
583 | filter_match = -1; | ||
584 | if (filter_end - filter_start > 10000) { | ||
585 | printf("hm, too large filter symbol <%s> - skipping.\n", | ||
586 | sym_filter); | ||
587 | printf("symbol filter start: %016lx\n", filter_start); | ||
588 | printf(" end: %016lx\n", filter_end); | ||
589 | filter_end = filter_start = 0; | ||
590 | sym_filter = NULL; | ||
591 | sleep(1); | ||
592 | } | ||
593 | } | ||
594 | if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { | ||
595 | filter_match = 1; | ||
596 | filter_start = s->addr; | ||
597 | } | ||
598 | |||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | int compare_addr(const void *__sym1, const void *__sym2) | ||
603 | { | ||
604 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | ||
605 | |||
606 | return sym1->addr > sym2->addr; | ||
607 | } | ||
608 | |||
609 | static void sort_symbol_table(void) | ||
610 | { | ||
611 | int i, dups; | ||
612 | |||
613 | do { | ||
614 | qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); | ||
615 | for (i = 0, dups = 0; i < sym_table_count; i++) { | ||
616 | if (sym_table[i].addr == sym_table[i+1].addr) { | ||
617 | sym_table[i+1].addr = -1ll; | ||
618 | dups++; | ||
619 | } | ||
620 | } | ||
621 | sym_table_count -= dups; | ||
622 | } while(dups); | ||
623 | } | ||
624 | |||
625 | static void parse_symbols(void) | ||
626 | { | ||
627 | struct sym_entry *last; | ||
628 | |||
629 | FILE *kallsyms = fopen("/proc/kallsyms", "r"); | ||
630 | |||
631 | if (!kallsyms) { | ||
632 | printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); | ||
633 | exit(-1); | ||
634 | } | ||
635 | |||
636 | while (!feof(kallsyms)) { | ||
637 | if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { | ||
638 | sym_table_count++; | ||
639 | assert(sym_table_count <= MAX_SYMS); | ||
640 | } | ||
641 | } | ||
642 | |||
643 | sort_symbol_table(); | ||
644 | min_ip = sym_table[0].addr; | ||
645 | max_ip = sym_table[sym_table_count-1].addr; | ||
646 | last = sym_table + sym_table_count++; | ||
647 | |||
648 | last->addr = -1ll; | ||
649 | last->sym = "<end>"; | ||
650 | |||
651 | if (filter_end) { | ||
652 | int count; | ||
653 | for (count=0; count < sym_table_count; count ++) { | ||
654 | if (!strcmp(sym_table[count].sym, sym_filter)) { | ||
655 | sym_filter_entry = &sym_table[count]; | ||
656 | break; | ||
657 | } | ||
658 | } | ||
659 | } | ||
660 | if (dump_symtab) { | ||
661 | int i; | ||
662 | |||
663 | for (i = 0; i < sym_table_count; i++) | ||
664 | fprintf(stderr, "%llx %s\n", | ||
665 | sym_table[i].addr, sym_table[i].sym); | ||
666 | } | ||
667 | } | ||
668 | |||
669 | |||
670 | static void parse_vmlinux(char *filename) | ||
671 | { | ||
672 | FILE *file; | ||
673 | char command[PATH_MAX*2]; | ||
674 | if (!filename) | ||
675 | return; | ||
676 | |||
677 | sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); | ||
678 | |||
679 | file = popen(command, "r"); | ||
680 | if (!file) | ||
681 | return; | ||
682 | |||
683 | while (!feof(file)) { | ||
684 | struct source_line *src; | ||
685 | size_t dummy = 0; | ||
686 | char *c; | ||
687 | |||
688 | src = malloc(sizeof(struct source_line)); | ||
689 | assert(src != NULL); | ||
690 | memset(src, 0, sizeof(struct source_line)); | ||
691 | |||
692 | if (getline(&src->line, &dummy, file) < 0) | ||
693 | break; | ||
694 | if (!src->line) | ||
695 | break; | ||
696 | |||
697 | c = strchr(src->line, '\n'); | ||
698 | if (c) | ||
699 | *c = 0; | ||
700 | |||
701 | lines = g_list_prepend(lines, src); | ||
702 | |||
703 | if (strlen(src->line)>8 && src->line[8] == ':') | ||
704 | src->EIP = strtoull(src->line, NULL, 16); | ||
705 | if (strlen(src->line)>8 && src->line[16] == ':') | ||
706 | src->EIP = strtoull(src->line, NULL, 16); | ||
707 | } | ||
708 | pclose(file); | ||
709 | lines = g_list_reverse(lines); | ||
710 | } | ||
711 | |||
712 | static void record_precise_ip(uint64_t ip) | ||
713 | { | ||
714 | struct source_line *line; | ||
715 | GList *item; | ||
716 | |||
717 | item = g_list_first(lines); | ||
718 | while (item) { | ||
719 | line = item->data; | ||
720 | if (line->EIP == ip) | ||
721 | line->count++; | ||
722 | if (line->EIP > ip) | ||
723 | break; | ||
724 | item = g_list_next(item); | ||
725 | } | ||
726 | } | ||
727 | |||
728 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) | ||
729 | { | ||
730 | struct source_line *line; | ||
731 | GList *item; | ||
732 | char pattern[PATH_MAX]; | ||
733 | sprintf(pattern, "<%s>:", sym->sym); | ||
734 | |||
735 | item = g_list_first(lines); | ||
736 | while (item) { | ||
737 | line = item->data; | ||
738 | if (strstr(line->line, pattern)) { | ||
739 | sym->source = item; | ||
740 | break; | ||
741 | } | ||
742 | item = g_list_next(item); | ||
743 | } | ||
744 | } | ||
745 | |||
746 | void show_lines(GList *item_queue, int item_queue_count) | ||
747 | { | ||
748 | int i; | ||
749 | struct source_line *line; | ||
750 | |||
751 | for (i = 0; i < item_queue_count; i++) { | ||
752 | line = item_queue->data; | ||
753 | printf("%8li\t%s\n", line->count, line->line); | ||
754 | item_queue = g_list_next(item_queue); | ||
755 | } | ||
756 | } | ||
757 | |||
758 | #define TRACE_COUNT 3 | ||
759 | |||
760 | static void show_details(struct sym_entry *sym) | ||
761 | { | ||
762 | struct source_line *line; | ||
763 | GList *item; | ||
764 | int displayed = 0; | ||
765 | GList *item_queue = NULL; | ||
766 | int item_queue_count = 0; | ||
767 | |||
768 | if (!sym->source) | ||
769 | lookup_sym_in_vmlinux(sym); | ||
770 | if (!sym->source) | ||
771 | return; | ||
772 | |||
773 | printf("Showing details for %s\n", sym->sym); | ||
774 | |||
775 | item = sym->source; | ||
776 | while (item) { | ||
777 | line = item->data; | ||
778 | if (displayed && strstr(line->line, ">:")) | ||
779 | break; | ||
780 | |||
781 | if (!item_queue_count) | ||
782 | item_queue = item; | ||
783 | item_queue_count ++; | ||
784 | |||
785 | if (line->count >= count_filter) { | ||
786 | show_lines(item_queue, item_queue_count); | ||
787 | item_queue_count = 0; | ||
788 | item_queue = NULL; | ||
789 | } else if (item_queue_count > TRACE_COUNT) { | ||
790 | item_queue = g_list_next(item_queue); | ||
791 | item_queue_count --; | ||
792 | } | ||
793 | |||
794 | line->count = 0; | ||
795 | displayed++; | ||
796 | if (displayed > 300) | ||
797 | break; | ||
798 | item = g_list_next(item); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | /* | ||
803 | * Binary search in the histogram table and record the hit: | ||
804 | */ | ||
805 | static void record_ip(uint64_t ip, int counter) | ||
806 | { | ||
807 | int left_idx, middle_idx, right_idx, idx; | ||
808 | unsigned long left, middle, right; | ||
809 | |||
810 | record_precise_ip(ip); | ||
811 | |||
812 | left_idx = 0; | ||
813 | right_idx = sym_table_count-1; | ||
814 | assert(ip <= max_ip && ip >= min_ip); | ||
815 | |||
816 | while (left_idx + 1 < right_idx) { | ||
817 | middle_idx = (left_idx + right_idx) / 2; | ||
818 | |||
819 | left = sym_table[ left_idx].addr; | ||
820 | middle = sym_table[middle_idx].addr; | ||
821 | right = sym_table[ right_idx].addr; | ||
822 | |||
823 | if (!(left <= middle && middle <= right)) { | ||
824 | printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); | ||
825 | printf("%d %d %d\n", left_idx, middle_idx, right_idx); | ||
826 | } | ||
827 | assert(left <= middle && middle <= right); | ||
828 | if (!(left <= ip && ip <= right)) { | ||
829 | printf(" left: %016lx\n", left); | ||
830 | printf(" ip: %016lx\n", ip); | ||
831 | printf("right: %016lx\n", right); | ||
832 | } | ||
833 | assert(left <= ip && ip <= right); | ||
834 | /* | ||
835 | * [ left .... target .... middle .... right ] | ||
836 | * => right := middle | ||
837 | */ | ||
838 | if (ip < middle) { | ||
839 | right_idx = middle_idx; | ||
840 | continue; | ||
841 | } | ||
842 | /* | ||
843 | * [ left .... middle ... target ... right ] | ||
844 | * => left := middle | ||
845 | */ | ||
846 | left_idx = middle_idx; | ||
847 | } | ||
848 | |||
849 | idx = left_idx; | ||
850 | |||
851 | if (!sym_table[idx].skip) | ||
852 | sym_table[idx].count[counter]++; | ||
853 | else events--; | ||
854 | } | ||
855 | |||
856 | static void process_event(uint64_t ip, int counter) | ||
857 | { | ||
858 | events++; | ||
859 | |||
860 | if (ip < min_ip || ip > max_ip) { | ||
861 | userspace_events++; | ||
862 | return; | ||
863 | } | ||
864 | |||
865 | record_ip(ip, counter); | ||
866 | } | ||
867 | |||
868 | int main(int argc, char *argv[]) | ||
869 | { | ||
870 | struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
871 | struct perf_counter_hw_event hw_event; | ||
872 | int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
873 | int i, counter, group_fd; | ||
874 | unsigned int cpu; | ||
875 | uint64_t ip; | ||
876 | ssize_t res; | ||
877 | int ret; | ||
878 | |||
879 | process_options(argc, argv); | ||
880 | |||
881 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
882 | if (tid != -1 || profile_cpu != -1) | ||
883 | nr_cpus = 1; | ||
884 | |||
885 | assert(nr_cpus <= MAX_NR_CPUS); | ||
886 | |||
887 | for (i = 0; i < nr_cpus; i++) { | ||
888 | group_fd = -1; | ||
889 | for (counter = 0; counter < nr_counters; counter++) { | ||
890 | |||
891 | cpu = profile_cpu; | ||
892 | if (tid == -1 && profile_cpu == -1) | ||
893 | cpu = i; | ||
894 | |||
895 | memset(&hw_event, 0, sizeof(hw_event)); | ||
896 | hw_event.type = event_id[counter]; | ||
897 | hw_event.raw = event_raw[counter]; | ||
898 | hw_event.irq_period = event_count[counter]; | ||
899 | hw_event.record_type = PERF_RECORD_IRQ; | ||
900 | hw_event.nmi = nmi; | ||
901 | |||
902 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); | ||
903 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
904 | if (fd[i][counter] < 0) { | ||
905 | printf("kerneltop error: syscall returned with %d (%s)\n", | ||
906 | fd[i][counter], strerror(-fd[i][counter])); | ||
907 | if (fd[i][counter] == -1) | ||
908 | printf("Are you root?\n"); | ||
909 | exit(-1); | ||
910 | } | ||
911 | assert(fd[i][counter] >= 0); | ||
912 | |||
913 | /* | ||
914 | * First counter acts as the group leader: | ||
915 | */ | ||
916 | if (group && group_fd == -1) | ||
917 | group_fd = fd[i][counter]; | ||
918 | |||
919 | event_array[i][counter].fd = fd[i][counter]; | ||
920 | event_array[i][counter].events = POLLIN; | ||
921 | } | ||
922 | } | ||
923 | |||
924 | parse_symbols(); | ||
925 | if (vmlinux && sym_filter_entry) | ||
926 | parse_vmlinux(vmlinux); | ||
927 | |||
928 | printf("KernelTop refresh period: %d seconds\n", delay_secs); | ||
929 | last_refresh = time(NULL); | ||
930 | |||
931 | while (1) { | ||
932 | int hits = events; | ||
933 | |||
934 | for (i = 0; i < nr_cpus; i++) { | ||
935 | for (counter = 0; counter < nr_counters; counter++) { | ||
936 | res = read(fd[i][counter], (char *) &ip, sizeof(ip)); | ||
937 | if (res > 0) { | ||
938 | assert(res == sizeof(ip)); | ||
939 | |||
940 | process_event(ip, counter); | ||
941 | } | ||
942 | } | ||
943 | } | ||
944 | |||
945 | if (time(NULL) >= last_refresh + delay_secs) { | ||
946 | print_sym_table(); | ||
947 | events = userspace_events = 0; | ||
948 | } | ||
949 | |||
950 | if (hits == events) | ||
951 | ret = poll(event_array[0], nr_cpus, 1000); | ||
952 | hits = events; | ||
953 | } | ||
954 | |||
955 | return 0; | ||
956 | } | ||
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c new file mode 100644 index 000000000000..9a5808fbcf90 --- /dev/null +++ b/Documentation/perf_counter/perfstat.c | |||
@@ -0,0 +1,521 @@ | |||
1 | /* | ||
2 | * perfstat: /usr/bin/time -alike performance counter statistics utility | ||
3 | * | ||
4 | * It summarizes the counter events of all tasks (and child tasks), | ||
5 | * covering all CPUs that the command (or workload) executes on. | ||
6 | * It only counts the per-task events of the workload started, | ||
7 | * independent of how many other tasks run on those CPUs. | ||
8 | * | ||
9 | * Build with: cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c | ||
10 | * | ||
11 | * Sample output: | ||
12 | * | ||
13 | |||
14 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null | ||
15 | |||
16 | Performance counter stats for 'ls': | ||
17 | |||
18 | 163516953 instructions | ||
19 | 2295 cache-misses | ||
20 | 2855182 branch-misses | ||
21 | |||
22 | * | ||
23 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
24 | * | ||
25 | * Released under the GPLv2 (not later). | ||
26 | * | ||
27 | * Percpu counter support by: Yanmin Zhang <yanmin_zhang@linux.intel.com> | ||
28 | * Symbolic event options by: Wu Fengguang <fengguang.wu@intel.com> | ||
29 | */ | ||
30 | #define _GNU_SOURCE | ||
31 | |||
32 | #include <assert.h> | ||
33 | #include <getopt.h> | ||
34 | #include <stdint.h> | ||
35 | #include <stdlib.h> | ||
36 | #include <string.h> | ||
37 | #include <unistd.h> | ||
38 | #include <ctype.h> | ||
39 | #include <errno.h> | ||
40 | #include <fcntl.h> | ||
41 | #include <stdio.h> | ||
42 | #include <time.h> | ||
43 | |||
44 | #include <sys/syscall.h> | ||
45 | #include <sys/ioctl.h> | ||
46 | #include <sys/prctl.h> | ||
47 | #include <sys/types.h> | ||
48 | #include <sys/stat.h> | ||
49 | #include <sys/time.h> | ||
50 | #include <sys/wait.h> | ||
51 | #include <sys/uio.h> | ||
52 | |||
53 | #include <linux/unistd.h> | ||
54 | |||
55 | #ifdef __x86_64__ | ||
56 | # define __NR_perf_counter_open 295 | ||
57 | #endif | ||
58 | |||
59 | #ifdef __i386__ | ||
60 | # define __NR_perf_counter_open 333 | ||
61 | #endif | ||
62 | |||
63 | #ifdef __powerpc__ | ||
64 | #define __NR_perf_counter_open 319 | ||
65 | #endif | ||
66 | |||
67 | /* | ||
68 | * Pick up some kernel type conventions: | ||
69 | */ | ||
70 | #define __user | ||
71 | #define asmlinkage | ||
72 | |||
73 | typedef unsigned int __u32; | ||
74 | typedef unsigned long long __u64; | ||
75 | typedef long long __s64; | ||
76 | |||
77 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | ||
78 | |||
79 | /* | ||
80 | * User-space ABI bits: | ||
81 | */ | ||
82 | |||
83 | /* | ||
84 | * Generalized performance counter event types, used by the hw_event.type | ||
85 | * parameter of the sys_perf_counter_open() syscall: | ||
86 | */ | ||
87 | enum hw_event_types { | ||
88 | /* | ||
89 | * Common hardware events, generalized by the kernel: | ||
90 | */ | ||
91 | PERF_COUNT_CPU_CYCLES = 0, | ||
92 | PERF_COUNT_INSTRUCTIONS = 1, | ||
93 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
94 | PERF_COUNT_CACHE_MISSES = 3, | ||
95 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
96 | PERF_COUNT_BRANCH_MISSES = 5, | ||
97 | PERF_COUNT_BUS_CYCLES = 6, | ||
98 | |||
99 | PERF_HW_EVENTS_MAX = 7, | ||
100 | |||
101 | /* | ||
102 | * Special "software" counters provided by the kernel, even if | ||
103 | * the hardware does not support performance counters. These | ||
104 | * counters measure various physical and sw events of the | ||
105 | * kernel (and allow the profiling of them as well): | ||
106 | */ | ||
107 | PERF_COUNT_CPU_CLOCK = -1, | ||
108 | PERF_COUNT_TASK_CLOCK = -2, | ||
109 | PERF_COUNT_PAGE_FAULTS = -3, | ||
110 | PERF_COUNT_CONTEXT_SWITCHES = -4, | ||
111 | PERF_COUNT_CPU_MIGRATIONS = -5, | ||
112 | |||
113 | PERF_SW_EVENTS_MIN = -6, | ||
114 | }; | ||
115 | |||
116 | /* | ||
117 | * IRQ-notification data record type: | ||
118 | */ | ||
119 | enum perf_counter_record_type { | ||
120 | PERF_RECORD_SIMPLE = 0, | ||
121 | PERF_RECORD_IRQ = 1, | ||
122 | PERF_RECORD_GROUP = 2, | ||
123 | }; | ||
124 | |||
125 | /* | ||
126 | * Hardware event to monitor via a performance monitoring counter: | ||
127 | */ | ||
128 | struct perf_counter_hw_event { | ||
129 | __s64 type; | ||
130 | |||
131 | __u64 irq_period; | ||
132 | __u64 record_type; | ||
133 | __u64 read_format; | ||
134 | |||
135 | __u64 disabled : 1, /* off by default */ | ||
136 | nmi : 1, /* NMI sampling */ | ||
137 | raw : 1, /* raw event type */ | ||
138 | inherit : 1, /* children inherit it */ | ||
139 | pinned : 1, /* must always be on PMU */ | ||
140 | exclusive : 1, /* only group on PMU */ | ||
141 | exclude_user : 1, /* don't count user */ | ||
142 | exclude_kernel : 1, /* ditto kernel */ | ||
143 | exclude_hv : 1, /* ditto hypervisor */ | ||
144 | exclude_idle : 1, /* don't count when idle */ | ||
145 | |||
146 | __reserved_1 : 54; | ||
147 | |||
148 | __u32 extra_config_len; | ||
149 | __u32 __reserved_4; | ||
150 | |||
151 | __u64 __reserved_2; | ||
152 | __u64 __reserved_3; | ||
153 | }; | ||
154 | |||
155 | /* | ||
156 | * Ioctls that can be done on a perf counter fd: | ||
157 | */ | ||
158 | #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) | ||
159 | #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) | ||
160 | |||
161 | asmlinkage int sys_perf_counter_open( | ||
162 | |||
163 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
164 | pid_t pid, | ||
165 | int cpu, | ||
166 | int group_fd, | ||
167 | unsigned long flags) | ||
168 | { | ||
169 | int ret; | ||
170 | |||
171 | ret = syscall( | ||
172 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); | ||
173 | #if defined(__x86_64__) || defined(__i386__) | ||
174 | if (ret < 0 && ret > -4096) { | ||
175 | errno = -ret; | ||
176 | ret = -1; | ||
177 | } | ||
178 | #endif | ||
179 | return ret; | ||
180 | } | ||
181 | |||
182 | |||
183 | static char *hw_event_names [] = { | ||
184 | "CPU cycles", | ||
185 | "instructions", | ||
186 | "cache references", | ||
187 | "cache misses", | ||
188 | "branches", | ||
189 | "branch misses", | ||
190 | "bus cycles", | ||
191 | }; | ||
192 | |||
193 | static char *sw_event_names [] = { | ||
194 | "cpu clock ticks", | ||
195 | "task clock ticks", | ||
196 | "pagefaults", | ||
197 | "context switches", | ||
198 | "CPU migrations", | ||
199 | }; | ||
200 | |||
201 | struct event_symbol { | ||
202 | int event; | ||
203 | char *symbol; | ||
204 | }; | ||
205 | |||
206 | static struct event_symbol event_symbols [] = { | ||
207 | {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, | ||
208 | {PERF_COUNT_CPU_CYCLES, "cycles", }, | ||
209 | {PERF_COUNT_INSTRUCTIONS, "instructions", }, | ||
210 | {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, | ||
211 | {PERF_COUNT_CACHE_MISSES, "cache-misses", }, | ||
212 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, | ||
213 | {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, | ||
214 | {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, | ||
215 | {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, | ||
216 | {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, | ||
217 | {PERF_COUNT_CPU_CLOCK, "ticks", }, | ||
218 | {PERF_COUNT_TASK_CLOCK, "task-ticks", }, | ||
219 | {PERF_COUNT_PAGE_FAULTS, "page-faults", }, | ||
220 | {PERF_COUNT_PAGE_FAULTS, "faults", }, | ||
221 | {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, | ||
222 | {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, | ||
223 | {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, | ||
224 | {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, | ||
225 | }; | ||
226 | |||
227 | #define MAX_COUNTERS 64 | ||
228 | #define MAX_NR_CPUS 256 | ||
229 | |||
230 | static int nr_counters = 0; | ||
231 | static int nr_cpus = 0; | ||
232 | |||
233 | static int event_id[MAX_COUNTERS] = | ||
234 | { -2, -5, -4, -3, 0, 1, 2, 3}; | ||
235 | |||
236 | static int event_raw[MAX_COUNTERS]; | ||
237 | |||
238 | static int system_wide = 0; | ||
239 | |||
240 | static void display_help(void) | ||
241 | { | ||
242 | unsigned int i; | ||
243 | int e; | ||
244 | |||
245 | printf( | ||
246 | "Usage: perfstat [<events...>] <cmd...>\n\n" | ||
247 | "PerfStat Options (up to %d event types can be specified):\n\n", | ||
248 | MAX_COUNTERS); | ||
249 | printf( | ||
250 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | ||
251 | |||
252 | for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { | ||
253 | if (e != event_symbols[i].event) { | ||
254 | e = event_symbols[i].event; | ||
255 | printf( | ||
256 | "\n %2d: %-20s", e, event_symbols[i].symbol); | ||
257 | } else | ||
258 | printf(" %s", event_symbols[i].symbol); | ||
259 | } | ||
260 | |||
261 | printf("\n" | ||
262 | " rNNN: raw event type\n\n" | ||
263 | " -s # system-wide collection\n\n" | ||
264 | " -c <cmd..> --command=<cmd..> # command+arguments to be timed.\n" | ||
265 | "\n"); | ||
266 | exit(0); | ||
267 | } | ||
268 | |||
269 | static int type_valid(int type) | ||
270 | { | ||
271 | if (type >= PERF_HW_EVENTS_MAX) | ||
272 | return 0; | ||
273 | if (type <= PERF_SW_EVENTS_MIN) | ||
274 | return 0; | ||
275 | |||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | static char *event_name(int ctr) | ||
280 | { | ||
281 | int type = event_id[ctr]; | ||
282 | static char buf[32]; | ||
283 | |||
284 | if (event_raw[ctr]) { | ||
285 | sprintf(buf, "raw 0x%x", type); | ||
286 | return buf; | ||
287 | } | ||
288 | if (!type_valid(type)) | ||
289 | return "unknown"; | ||
290 | |||
291 | if (type >= 0) | ||
292 | return hw_event_names[type]; | ||
293 | |||
294 | return sw_event_names[-type-1]; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Each event can have multiple symbolic names. | ||
299 | * Symbolic names are (almost) exactly matched. | ||
300 | */ | ||
301 | static int match_event_symbols(char *str) | ||
302 | { | ||
303 | unsigned int i; | ||
304 | |||
305 | if (isdigit(str[0]) || str[0] == '-') | ||
306 | return atoi(str); | ||
307 | |||
308 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
309 | if (!strncmp(str, event_symbols[i].symbol, | ||
310 | strlen(event_symbols[i].symbol))) | ||
311 | return event_symbols[i].event; | ||
312 | } | ||
313 | |||
314 | return PERF_HW_EVENTS_MAX; | ||
315 | } | ||
316 | |||
317 | static void parse_events(char *str) | ||
318 | { | ||
319 | int type, raw; | ||
320 | |||
321 | again: | ||
322 | nr_counters++; | ||
323 | if (nr_counters == MAX_COUNTERS) | ||
324 | display_help(); | ||
325 | |||
326 | raw = 0; | ||
327 | if (*str == 'r') { | ||
328 | raw = 1; | ||
329 | ++str; | ||
330 | type = strtol(str, NULL, 16); | ||
331 | } else { | ||
332 | type = match_event_symbols(str); | ||
333 | if (!type_valid(type)) | ||
334 | display_help(); | ||
335 | } | ||
336 | |||
337 | event_id[nr_counters] = type; | ||
338 | event_raw[nr_counters] = raw; | ||
339 | |||
340 | str = strstr(str, ","); | ||
341 | if (str) { | ||
342 | str++; | ||
343 | goto again; | ||
344 | } | ||
345 | } | ||
346 | |||
347 | static void process_options(int argc, char *argv[]) | ||
348 | { | ||
349 | for (;;) { | ||
350 | int option_index = 0; | ||
351 | /** Options for getopt */ | ||
352 | static struct option long_options[] = { | ||
353 | {"event", required_argument, NULL, 'e'}, | ||
354 | {"help", no_argument, NULL, 'h'}, | ||
355 | {"command", no_argument, NULL, 'c'}, | ||
356 | {NULL, 0, NULL, 0 } | ||
357 | }; | ||
358 | int c = getopt_long(argc, argv, "+:e:c:s", | ||
359 | long_options, &option_index); | ||
360 | if (c == -1) | ||
361 | break; | ||
362 | |||
363 | switch (c) { | ||
364 | case 'c': | ||
365 | break; | ||
366 | case 's': | ||
367 | system_wide = 1; | ||
368 | break; | ||
369 | case 'e': | ||
370 | parse_events(optarg); | ||
371 | break; | ||
372 | default: | ||
373 | break; | ||
374 | } | ||
375 | } | ||
376 | if (optind == argc) | ||
377 | goto err; | ||
378 | |||
379 | if (!nr_counters) | ||
380 | nr_counters = 8; | ||
381 | else | ||
382 | nr_counters++; | ||
383 | return; | ||
384 | |||
385 | err: | ||
386 | display_help(); | ||
387 | } | ||
388 | |||
389 | char fault_here[1000000]; | ||
390 | |||
391 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
392 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
393 | |||
394 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
395 | |||
396 | static void create_counter(int counter) | ||
397 | { | ||
398 | struct perf_counter_hw_event hw_event; | ||
399 | |||
400 | memset(&hw_event, 0, sizeof(hw_event)); | ||
401 | hw_event.type = event_id[counter]; | ||
402 | hw_event.raw = event_raw[counter]; | ||
403 | hw_event.record_type = PERF_RECORD_SIMPLE; | ||
404 | hw_event.nmi = 0; | ||
405 | |||
406 | if (system_wide) { | ||
407 | int cpu; | ||
408 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
409 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | ||
410 | if (fd[cpu][counter] < 0) { | ||
411 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
412 | fd[cpu][counter], strerror(errno)); | ||
413 | exit(-1); | ||
414 | } | ||
415 | |||
416 | } | ||
417 | } else { | ||
418 | hw_event.inherit = 1; | ||
419 | hw_event.disabled = 1; | ||
420 | |||
421 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | ||
422 | if (fd[0][counter] < 0) { | ||
423 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
424 | fd[0][counter], strerror(errno)); | ||
425 | exit(-1); | ||
426 | } | ||
427 | } | ||
428 | } | ||
429 | |||
430 | |||
431 | #define rdclock() \ | ||
432 | ({ \ | ||
433 | struct timespec ts; \ | ||
434 | \ | ||
435 | clock_gettime(CLOCK_MONOTONIC, &ts); \ | ||
436 | ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ | ||
437 | }) | ||
438 | |||
439 | int main(int argc, char *argv[]) | ||
440 | { | ||
441 | unsigned long long t0, t1; | ||
442 | int counter; | ||
443 | ssize_t res; | ||
444 | int status; | ||
445 | int pid; | ||
446 | |||
447 | process_options(argc, argv); | ||
448 | |||
449 | if (system_wide) { | ||
450 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
451 | assert(nr_cpus <= MAX_NR_CPUS); | ||
452 | assert(nr_cpus >= 0); | ||
453 | } else | ||
454 | nr_cpus = 1; | ||
455 | |||
456 | for (counter = 0; counter < nr_counters; counter++) | ||
457 | create_counter(counter); | ||
458 | |||
459 | argc -= optind; | ||
460 | argv += optind; | ||
461 | |||
462 | /* | ||
463 | * Enable counters and exec the command: | ||
464 | */ | ||
465 | t0 = rdclock(); | ||
466 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
467 | |||
468 | if ((pid = fork()) < 0) | ||
469 | perror("failed to fork"); | ||
470 | if (!pid) { | ||
471 | if (execvp(argv[0], argv)) { | ||
472 | perror(argv[0]); | ||
473 | exit(-1); | ||
474 | } | ||
475 | } | ||
476 | while (wait(&status) >= 0) | ||
477 | ; | ||
478 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
479 | t1 = rdclock(); | ||
480 | |||
481 | fflush(stdout); | ||
482 | |||
483 | fprintf(stderr, "\n"); | ||
484 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | ||
485 | argv[0]); | ||
486 | fprintf(stderr, "\n"); | ||
487 | |||
488 | for (counter = 0; counter < nr_counters; counter++) { | ||
489 | int cpu; | ||
490 | __u64 count, single_count; | ||
491 | |||
492 | count = 0; | ||
493 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
494 | res = read(fd[cpu][counter], | ||
495 | (char *) &single_count, sizeof(single_count)); | ||
496 | assert(res == sizeof(single_count)); | ||
497 | count += single_count; | ||
498 | } | ||
499 | |||
500 | if (!event_raw[counter] && | ||
501 | (event_id[counter] == PERF_COUNT_CPU_CLOCK || | ||
502 | event_id[counter] == PERF_COUNT_TASK_CLOCK)) { | ||
503 | |||
504 | double msecs = (double)count / 1000000; | ||
505 | |||
506 | fprintf(stderr, " %14.6f %-20s (msecs)\n", | ||
507 | msecs, event_name(counter)); | ||
508 | } else { | ||
509 | fprintf(stderr, " %14Ld %-20s (events)\n", | ||
510 | count, event_name(counter)); | ||
511 | } | ||
512 | if (!counter) | ||
513 | fprintf(stderr, "\n"); | ||
514 | } | ||
515 | fprintf(stderr, "\n"); | ||
516 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
517 | (double)(t1-t0)/1e6); | ||
518 | fprintf(stderr, "\n"); | ||
519 | |||
520 | return 0; | ||
521 | } | ||