diff options
author | Zhang, Yanmin <yanmin_zhang@linux.intel.com> | 2010-03-18 10:36:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-18 11:21:12 -0400 |
commit | d6d901c23a9c4c7361aa901b5b2dda69703dd5e0 (patch) | |
tree | 601fc2cafac552c80b8456c8dd4b9964171552db | |
parent | 46be604b5ba738d53e5f5314813a4e7092864baf (diff) |
perf events: Change perf parameter --pid to process-wide collection instead of thread-wide
Parameter --pid (or -p) of perf currently means a thread-wide
collection. For exmaple, if a process whose id is 8888 has 10
threads, 'perf top -p 8888' just collects the main thread
statistics. That's misleading. Users are used to attach a whole
process when debugging a process by gdb. To follow normal usage
style, the patch change --pid to process-wide collection and add
--tid (-t) to mean a thread-wide collection.
Usage example is:
# perf top -p 8888
# perf record -p 8888 -f sleep 10
# perf stat -p 8888 -f sleep 10
Above commands collect the statistics of all threads of process
8888.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jes Sorensen <Jes.Sorensen@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: zhiteng.huang@intel.com
Cc: Zachary Amsden <zamsden@redhat.com>
LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | tools/perf/builtin-record.c | 260 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 110 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 162 | ||||
-rw-r--r-- | tools/perf/util/thread.c | 32 | ||||
-rw-r--r-- | tools/perf/util/thread.h | 1 |
5 files changed, 372 insertions, 193 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e2b35ad82a77..bb5b23db4239 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <unistd.h> | 27 | #include <unistd.h> |
28 | #include <sched.h> | 28 | #include <sched.h> |
29 | 29 | ||
30 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 30 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
31 | 31 | ||
32 | static long default_interval = 0; | 32 | static long default_interval = 0; |
33 | 33 | ||
@@ -43,6 +43,9 @@ static int raw_samples = 0; | |||
43 | static int system_wide = 0; | 43 | static int system_wide = 0; |
44 | static int profile_cpu = -1; | 44 | static int profile_cpu = -1; |
45 | static pid_t target_pid = -1; | 45 | static pid_t target_pid = -1; |
46 | static pid_t target_tid = -1; | ||
47 | static pid_t *all_tids = NULL; | ||
48 | static int thread_num = 0; | ||
46 | static pid_t child_pid = -1; | 49 | static pid_t child_pid = -1; |
47 | static int inherit = 1; | 50 | static int inherit = 1; |
48 | static int force = 0; | 51 | static int force = 0; |
@@ -60,7 +63,7 @@ static struct timeval this_read; | |||
60 | 63 | ||
61 | static u64 bytes_written = 0; | 64 | static u64 bytes_written = 0; |
62 | 65 | ||
63 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | 66 | static struct pollfd *event_array; |
64 | 67 | ||
65 | static int nr_poll = 0; | 68 | static int nr_poll = 0; |
66 | static int nr_cpu = 0; | 69 | static int nr_cpu = 0; |
@@ -77,7 +80,7 @@ struct mmap_data { | |||
77 | unsigned int prev; | 80 | unsigned int prev; |
78 | }; | 81 | }; |
79 | 82 | ||
80 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | 83 | static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; |
81 | 84 | ||
82 | static unsigned long mmap_read_head(struct mmap_data *md) | 85 | static unsigned long mmap_read_head(struct mmap_data *md) |
83 | { | 86 | { |
@@ -225,12 +228,13 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n | |||
225 | return h_attr; | 228 | return h_attr; |
226 | } | 229 | } |
227 | 230 | ||
228 | static void create_counter(int counter, int cpu, pid_t pid) | 231 | static void create_counter(int counter, int cpu) |
229 | { | 232 | { |
230 | char *filter = filters[counter]; | 233 | char *filter = filters[counter]; |
231 | struct perf_event_attr *attr = attrs + counter; | 234 | struct perf_event_attr *attr = attrs + counter; |
232 | struct perf_header_attr *h_attr; | 235 | struct perf_header_attr *h_attr; |
233 | int track = !counter; /* only the first counter needs these */ | 236 | int track = !counter; /* only the first counter needs these */ |
237 | int thread_index; | ||
234 | int ret; | 238 | int ret; |
235 | struct { | 239 | struct { |
236 | u64 count; | 240 | u64 count; |
@@ -280,115 +284,124 @@ static void create_counter(int counter, int cpu, pid_t pid) | |||
280 | attr->enable_on_exec = 1; | 284 | attr->enable_on_exec = 1; |
281 | } | 285 | } |
282 | 286 | ||
287 | for (thread_index = 0; thread_index < thread_num; thread_index++) { | ||
283 | try_again: | 288 | try_again: |
284 | fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); | 289 | fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr, |
285 | 290 | all_tids[thread_index], cpu, group_fd, 0); | |
286 | if (fd[nr_cpu][counter] < 0) { | 291 | |
287 | int err = errno; | 292 | if (fd[nr_cpu][counter][thread_index] < 0) { |
288 | 293 | int err = errno; | |
289 | if (err == EPERM || err == EACCES) | 294 | |
290 | die("Permission error - are you root?\n" | 295 | if (err == EPERM || err == EACCES) |
291 | "\t Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n"); | 296 | die("Permission error - are you root?\n" |
292 | else if (err == ENODEV && profile_cpu != -1) | 297 | "\t Consider tweaking" |
293 | die("No such device - did you specify an out-of-range profile CPU?\n"); | 298 | " /proc/sys/kernel/perf_event_paranoid.\n"); |
299 | else if (err == ENODEV && profile_cpu != -1) { | ||
300 | die("No such device - did you specify" | ||
301 | " an out-of-range profile CPU?\n"); | ||
302 | } | ||
294 | 303 | ||
295 | /* | 304 | /* |
296 | * If it's cycles then fall back to hrtimer | 305 | * If it's cycles then fall back to hrtimer |
297 | * based cpu-clock-tick sw counter, which | 306 | * based cpu-clock-tick sw counter, which |
298 | * is always available even if no PMU support: | 307 | * is always available even if no PMU support: |
299 | */ | 308 | */ |
300 | if (attr->type == PERF_TYPE_HARDWARE | 309 | if (attr->type == PERF_TYPE_HARDWARE |
301 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | 310 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { |
302 | 311 | ||
303 | if (verbose) | 312 | if (verbose) |
304 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | 313 | warning(" ... trying to fall back to cpu-clock-ticks\n"); |
305 | attr->type = PERF_TYPE_SOFTWARE; | 314 | attr->type = PERF_TYPE_SOFTWARE; |
306 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | 315 | attr->config = PERF_COUNT_SW_CPU_CLOCK; |
307 | goto try_again; | 316 | goto try_again; |
308 | } | 317 | } |
309 | printf("\n"); | 318 | printf("\n"); |
310 | error("perfcounter syscall returned with %d (%s)\n", | 319 | error("perfcounter syscall returned with %d (%s)\n", |
311 | fd[nr_cpu][counter], strerror(err)); | 320 | fd[nr_cpu][counter][thread_index], strerror(err)); |
312 | 321 | ||
313 | #if defined(__i386__) || defined(__x86_64__) | 322 | #if defined(__i386__) || defined(__x86_64__) |
314 | if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) | 323 | if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) |
315 | die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); | 324 | die("No hardware sampling interrupt available." |
325 | " No APIC? If so then you can boot the kernel" | ||
326 | " with the \"lapic\" boot parameter to" | ||
327 | " force-enable it.\n"); | ||
316 | #endif | 328 | #endif |
317 | 329 | ||
318 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | 330 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
319 | exit(-1); | 331 | exit(-1); |
320 | } | 332 | } |
321 | 333 | ||
322 | h_attr = get_header_attr(attr, counter); | 334 | h_attr = get_header_attr(attr, counter); |
323 | if (h_attr == NULL) | 335 | if (h_attr == NULL) |
324 | die("nomem\n"); | 336 | die("nomem\n"); |
325 | 337 | ||
326 | if (!file_new) { | 338 | if (!file_new) { |
327 | if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { | 339 | if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { |
328 | fprintf(stderr, "incompatible append\n"); | 340 | fprintf(stderr, "incompatible append\n"); |
329 | exit(-1); | 341 | exit(-1); |
342 | } | ||
330 | } | 343 | } |
331 | } | ||
332 | 344 | ||
333 | if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { | 345 | if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { |
334 | perror("Unable to read perf file descriptor\n"); | 346 | perror("Unable to read perf file descriptor\n"); |
335 | exit(-1); | 347 | exit(-1); |
336 | } | 348 | } |
337 | 349 | ||
338 | if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { | 350 | if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { |
339 | pr_warning("Not enough memory to add id\n"); | 351 | pr_warning("Not enough memory to add id\n"); |
340 | exit(-1); | 352 | exit(-1); |
341 | } | 353 | } |
342 | 354 | ||
343 | assert(fd[nr_cpu][counter] >= 0); | 355 | assert(fd[nr_cpu][counter][thread_index] >= 0); |
344 | fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); | 356 | fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK); |
345 | 357 | ||
346 | /* | 358 | /* |
347 | * First counter acts as the group leader: | 359 | * First counter acts as the group leader: |
348 | */ | 360 | */ |
349 | if (group && group_fd == -1) | 361 | if (group && group_fd == -1) |
350 | group_fd = fd[nr_cpu][counter]; | 362 | group_fd = fd[nr_cpu][counter][thread_index]; |
351 | if (multiplex && multiplex_fd == -1) | 363 | if (multiplex && multiplex_fd == -1) |
352 | multiplex_fd = fd[nr_cpu][counter]; | 364 | multiplex_fd = fd[nr_cpu][counter][thread_index]; |
353 | 365 | ||
354 | if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { | 366 | if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) { |
355 | 367 | ||
356 | ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); | 368 | ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); |
357 | assert(ret != -1); | 369 | assert(ret != -1); |
358 | } else { | 370 | } else { |
359 | event_array[nr_poll].fd = fd[nr_cpu][counter]; | 371 | event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; |
360 | event_array[nr_poll].events = POLLIN; | 372 | event_array[nr_poll].events = POLLIN; |
361 | nr_poll++; | 373 | nr_poll++; |
362 | 374 | ||
363 | mmap_array[nr_cpu][counter].counter = counter; | 375 | mmap_array[nr_cpu][counter][thread_index].counter = counter; |
364 | mmap_array[nr_cpu][counter].prev = 0; | 376 | mmap_array[nr_cpu][counter][thread_index].prev = 0; |
365 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | 377 | mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1; |
366 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | 378 | mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, |
367 | PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); | 379 | PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); |
368 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | 380 | if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) { |
369 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | 381 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); |
370 | exit(-1); | 382 | exit(-1); |
383 | } | ||
371 | } | 384 | } |
372 | } | ||
373 | 385 | ||
374 | if (filter != NULL) { | 386 | if (filter != NULL) { |
375 | ret = ioctl(fd[nr_cpu][counter], | 387 | ret = ioctl(fd[nr_cpu][counter][thread_index], |
376 | PERF_EVENT_IOC_SET_FILTER, filter); | 388 | PERF_EVENT_IOC_SET_FILTER, filter); |
377 | if (ret) { | 389 | if (ret) { |
378 | error("failed to set filter with %d (%s)\n", errno, | 390 | error("failed to set filter with %d (%s)\n", errno, |
379 | strerror(errno)); | 391 | strerror(errno)); |
380 | exit(-1); | 392 | exit(-1); |
393 | } | ||
381 | } | 394 | } |
382 | } | 395 | } |
383 | } | 396 | } |
384 | 397 | ||
385 | static void open_counters(int cpu, pid_t pid) | 398 | static void open_counters(int cpu) |
386 | { | 399 | { |
387 | int counter; | 400 | int counter; |
388 | 401 | ||
389 | group_fd = -1; | 402 | group_fd = -1; |
390 | for (counter = 0; counter < nr_counters; counter++) | 403 | for (counter = 0; counter < nr_counters; counter++) |
391 | create_counter(counter, cpu, pid); | 404 | create_counter(counter, cpu); |
392 | 405 | ||
393 | nr_cpu++; | 406 | nr_cpu++; |
394 | } | 407 | } |
@@ -529,6 +542,9 @@ static int __cmd_record(int argc, const char **argv) | |||
529 | exit(-1); | 542 | exit(-1); |
530 | } | 543 | } |
531 | 544 | ||
545 | if (!system_wide && target_tid == -1 && target_pid == -1) | ||
546 | all_tids[0] = child_pid; | ||
547 | |||
532 | close(child_ready_pipe[1]); | 548 | close(child_ready_pipe[1]); |
533 | close(go_pipe[0]); | 549 | close(go_pipe[0]); |
534 | /* | 550 | /* |
@@ -541,17 +557,12 @@ static int __cmd_record(int argc, const char **argv) | |||
541 | close(child_ready_pipe[0]); | 557 | close(child_ready_pipe[0]); |
542 | } | 558 | } |
543 | 559 | ||
544 | if (forks && target_pid == -1 && !system_wide) | ||
545 | pid = child_pid; | ||
546 | else | ||
547 | pid = target_pid; | ||
548 | |||
549 | if ((!system_wide && !inherit) || profile_cpu != -1) { | 560 | if ((!system_wide && !inherit) || profile_cpu != -1) { |
550 | open_counters(profile_cpu, pid); | 561 | open_counters(profile_cpu); |
551 | } else { | 562 | } else { |
552 | nr_cpus = read_cpu_map(); | 563 | nr_cpus = read_cpu_map(); |
553 | for (i = 0; i < nr_cpus; i++) | 564 | for (i = 0; i < nr_cpus; i++) |
554 | open_counters(cpumap[i], pid); | 565 | open_counters(cpumap[i]); |
555 | } | 566 | } |
556 | 567 | ||
557 | if (file_new) { | 568 | if (file_new) { |
@@ -576,7 +587,7 @@ static int __cmd_record(int argc, const char **argv) | |||
576 | } | 587 | } |
577 | 588 | ||
578 | if (!system_wide && profile_cpu == -1) | 589 | if (!system_wide && profile_cpu == -1) |
579 | event__synthesize_thread(target_pid, process_synthesized_event, | 590 | event__synthesize_thread(target_tid, process_synthesized_event, |
580 | session); | 591 | session); |
581 | else | 592 | else |
582 | event__synthesize_threads(process_synthesized_event, session); | 593 | event__synthesize_threads(process_synthesized_event, session); |
@@ -599,11 +610,16 @@ static int __cmd_record(int argc, const char **argv) | |||
599 | 610 | ||
600 | for (;;) { | 611 | for (;;) { |
601 | int hits = samples; | 612 | int hits = samples; |
613 | int thread; | ||
602 | 614 | ||
603 | for (i = 0; i < nr_cpu; i++) { | 615 | for (i = 0; i < nr_cpu; i++) { |
604 | for (counter = 0; counter < nr_counters; counter++) { | 616 | for (counter = 0; counter < nr_counters; counter++) { |
605 | if (mmap_array[i][counter].base) | 617 | for (thread = 0; |
606 | mmap_read(&mmap_array[i][counter]); | 618 | thread < thread_num; thread++) { |
619 | if (mmap_array[i][counter][thread].base) | ||
620 | mmap_read(&mmap_array[i][counter][thread]); | ||
621 | } | ||
622 | |||
607 | } | 623 | } |
608 | } | 624 | } |
609 | 625 | ||
@@ -616,8 +632,15 @@ static int __cmd_record(int argc, const char **argv) | |||
616 | 632 | ||
617 | if (done) { | 633 | if (done) { |
618 | for (i = 0; i < nr_cpu; i++) { | 634 | for (i = 0; i < nr_cpu; i++) { |
619 | for (counter = 0; counter < nr_counters; counter++) | 635 | for (counter = 0; |
620 | ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); | 636 | counter < nr_counters; |
637 | counter++) { | ||
638 | for (thread = 0; | ||
639 | thread < thread_num; | ||
640 | thread++) | ||
641 | ioctl(fd[i][counter][thread], | ||
642 | PERF_EVENT_IOC_DISABLE); | ||
643 | } | ||
621 | } | 644 | } |
622 | } | 645 | } |
623 | } | 646 | } |
@@ -649,7 +672,9 @@ static const struct option options[] = { | |||
649 | OPT_CALLBACK(0, "filter", NULL, "filter", | 672 | OPT_CALLBACK(0, "filter", NULL, "filter", |
650 | "event filter", parse_filter), | 673 | "event filter", parse_filter), |
651 | OPT_INTEGER('p', "pid", &target_pid, | 674 | OPT_INTEGER('p', "pid", &target_pid, |
652 | "record events on existing pid"), | 675 | "record events on existing process id"), |
676 | OPT_INTEGER('t', "tid", &target_tid, | ||
677 | "record events on existing thread id"), | ||
653 | OPT_INTEGER('r', "realtime", &realtime_prio, | 678 | OPT_INTEGER('r', "realtime", &realtime_prio, |
654 | "collect data with this RT SCHED_FIFO priority"), | 679 | "collect data with this RT SCHED_FIFO priority"), |
655 | OPT_BOOLEAN('R', "raw-samples", &raw_samples, | 680 | OPT_BOOLEAN('R', "raw-samples", &raw_samples, |
@@ -690,10 +715,12 @@ static const struct option options[] = { | |||
690 | int cmd_record(int argc, const char **argv, const char *prefix __used) | 715 | int cmd_record(int argc, const char **argv, const char *prefix __used) |
691 | { | 716 | { |
692 | int counter; | 717 | int counter; |
718 | int i,j; | ||
693 | 719 | ||
694 | argc = parse_options(argc, argv, options, record_usage, | 720 | argc = parse_options(argc, argv, options, record_usage, |
695 | PARSE_OPT_STOP_AT_NON_OPTION); | 721 | PARSE_OPT_STOP_AT_NON_OPTION); |
696 | if (!argc && target_pid == -1 && !system_wide && profile_cpu == -1) | 722 | if (!argc && target_pid == -1 && target_tid == -1 && |
723 | !system_wide && profile_cpu == -1) | ||
697 | usage_with_options(record_usage, options); | 724 | usage_with_options(record_usage, options); |
698 | 725 | ||
699 | symbol__init(); | 726 | symbol__init(); |
@@ -704,6 +731,37 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
704 | attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; | 731 | attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; |
705 | } | 732 | } |
706 | 733 | ||
734 | if (target_pid != -1) { | ||
735 | target_tid = target_pid; | ||
736 | thread_num = find_all_tid(target_pid, &all_tids); | ||
737 | if (thread_num <= 0) { | ||
738 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
739 | target_pid); | ||
740 | usage_with_options(record_usage, options); | ||
741 | } | ||
742 | } else { | ||
743 | all_tids=malloc(sizeof(pid_t)); | ||
744 | if (!all_tids) | ||
745 | return -ENOMEM; | ||
746 | |||
747 | all_tids[0] = target_tid; | ||
748 | thread_num = 1; | ||
749 | } | ||
750 | |||
751 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
752 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
753 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
754 | mmap_array[i][j] = malloc( | ||
755 | sizeof(struct mmap_data)*thread_num); | ||
756 | if (!fd[i][j] || !mmap_array[i][j]) | ||
757 | return -ENOMEM; | ||
758 | } | ||
759 | } | ||
760 | event_array = malloc( | ||
761 | sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); | ||
762 | if (!event_array) | ||
763 | return -ENOMEM; | ||
764 | |||
707 | /* | 765 | /* |
708 | * User specified count overrides default frequency. | 766 | * User specified count overrides default frequency. |
709 | */ | 767 | */ |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5f41244cbbf2..c92f90ff5a9f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
47 | #include "util/header.h" | 47 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" |
49 | #include "util/thread.h" | ||
49 | 50 | ||
50 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> |
51 | #include <math.h> | 52 | #include <math.h> |
@@ -74,10 +75,13 @@ static int run_count = 1; | |||
74 | static int inherit = 1; | 75 | static int inherit = 1; |
75 | static int scale = 1; | 76 | static int scale = 1; |
76 | static pid_t target_pid = -1; | 77 | static pid_t target_pid = -1; |
78 | static pid_t target_tid = -1; | ||
79 | static pid_t *all_tids = NULL; | ||
80 | static int thread_num = 0; | ||
77 | static pid_t child_pid = -1; | 81 | static pid_t child_pid = -1; |
78 | static int null_run = 0; | 82 | static int null_run = 0; |
79 | 83 | ||
80 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 84 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
81 | 85 | ||
82 | static int event_scaled[MAX_COUNTERS]; | 86 | static int event_scaled[MAX_COUNTERS]; |
83 | 87 | ||
@@ -140,9 +144,10 @@ struct stats runtime_branches_stats; | |||
140 | #define ERR_PERF_OPEN \ | 144 | #define ERR_PERF_OPEN \ |
141 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" |
142 | 146 | ||
143 | static void create_perf_stat_counter(int counter, int pid) | 147 | static void create_perf_stat_counter(int counter) |
144 | { | 148 | { |
145 | struct perf_event_attr *attr = attrs + counter; | 149 | struct perf_event_attr *attr = attrs + counter; |
150 | int thread; | ||
146 | 151 | ||
147 | if (scale) | 152 | if (scale) |
148 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 153 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
@@ -152,10 +157,11 @@ static void create_perf_stat_counter(int counter, int pid) | |||
152 | unsigned int cpu; | 157 | unsigned int cpu; |
153 | 158 | ||
154 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 159 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | 160 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
156 | if (fd[cpu][counter] < 0 && verbose) | 161 | -1, cpumap[cpu], -1, 0); |
162 | if (fd[cpu][counter][0] < 0 && verbose) | ||
157 | fprintf(stderr, ERR_PERF_OPEN, counter, | 163 | fprintf(stderr, ERR_PERF_OPEN, counter, |
158 | fd[cpu][counter], strerror(errno)); | 164 | fd[cpu][counter][0], strerror(errno)); |
159 | } | 165 | } |
160 | } else { | 166 | } else { |
161 | attr->inherit = inherit; | 167 | attr->inherit = inherit; |
@@ -163,11 +169,14 @@ static void create_perf_stat_counter(int counter, int pid) | |||
163 | attr->disabled = 1; | 169 | attr->disabled = 1; |
164 | attr->enable_on_exec = 1; | 170 | attr->enable_on_exec = 1; |
165 | } | 171 | } |
166 | 172 | for (thread = 0; thread < thread_num; thread++) { | |
167 | fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | 173 | fd[0][counter][thread] = sys_perf_event_open(attr, |
168 | if (fd[0][counter] < 0 && verbose) | 174 | all_tids[thread], -1, -1, 0); |
169 | fprintf(stderr, ERR_PERF_OPEN, counter, | 175 | if (fd[0][counter][thread] < 0 && verbose) |
170 | fd[0][counter], strerror(errno)); | 176 | fprintf(stderr, ERR_PERF_OPEN, counter, |
177 | fd[0][counter][thread], | ||
178 | strerror(errno)); | ||
179 | } | ||
171 | } | 180 | } |
172 | } | 181 | } |
173 | 182 | ||
@@ -192,25 +201,28 @@ static void read_counter(int counter) | |||
192 | unsigned int cpu; | 201 | unsigned int cpu; |
193 | size_t res, nv; | 202 | size_t res, nv; |
194 | int scaled; | 203 | int scaled; |
195 | int i; | 204 | int i, thread; |
196 | 205 | ||
197 | count[0] = count[1] = count[2] = 0; | 206 | count[0] = count[1] = count[2] = 0; |
198 | 207 | ||
199 | nv = scale ? 3 : 1; | 208 | nv = scale ? 3 : 1; |
200 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 209 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
201 | if (fd[cpu][counter] < 0) | 210 | for (thread = 0; thread < thread_num; thread++) { |
202 | continue; | 211 | if (fd[cpu][counter][thread] < 0) |
203 | 212 | continue; | |
204 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 213 | |
205 | assert(res == nv * sizeof(u64)); | 214 | res = read(fd[cpu][counter][thread], |
206 | 215 | single_count, nv * sizeof(u64)); | |
207 | close(fd[cpu][counter]); | 216 | assert(res == nv * sizeof(u64)); |
208 | fd[cpu][counter] = -1; | 217 | |
209 | 218 | close(fd[cpu][counter][thread]); | |
210 | count[0] += single_count[0]; | 219 | fd[cpu][counter][thread] = -1; |
211 | if (scale) { | 220 | |
212 | count[1] += single_count[1]; | 221 | count[0] += single_count[0]; |
213 | count[2] += single_count[2]; | 222 | if (scale) { |
223 | count[1] += single_count[1]; | ||
224 | count[2] += single_count[2]; | ||
225 | } | ||
214 | } | 226 | } |
215 | } | 227 | } |
216 | 228 | ||
@@ -253,7 +265,6 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
253 | unsigned long long t0, t1; | 265 | unsigned long long t0, t1; |
254 | int status = 0; | 266 | int status = 0; |
255 | int counter; | 267 | int counter; |
256 | int pid; | ||
257 | int child_ready_pipe[2], go_pipe[2]; | 268 | int child_ready_pipe[2], go_pipe[2]; |
258 | const bool forks = (argc > 0); | 269 | const bool forks = (argc > 0); |
259 | char buf; | 270 | char buf; |
@@ -299,6 +310,9 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
299 | exit(-1); | 310 | exit(-1); |
300 | } | 311 | } |
301 | 312 | ||
313 | if (target_tid == -1 && target_pid == -1 && !system_wide) | ||
314 | all_tids[0] = child_pid; | ||
315 | |||
302 | /* | 316 | /* |
303 | * Wait for the child to be ready to exec. | 317 | * Wait for the child to be ready to exec. |
304 | */ | 318 | */ |
@@ -309,12 +323,8 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
309 | close(child_ready_pipe[0]); | 323 | close(child_ready_pipe[0]); |
310 | } | 324 | } |
311 | 325 | ||
312 | if (target_pid == -1) | ||
313 | pid = child_pid; | ||
314 | else | ||
315 | pid = target_pid; | ||
316 | for (counter = 0; counter < nr_counters; counter++) | 326 | for (counter = 0; counter < nr_counters; counter++) |
317 | create_perf_stat_counter(counter, pid); | 327 | create_perf_stat_counter(counter); |
318 | 328 | ||
319 | /* | 329 | /* |
320 | * Enable counters and exec the command: | 330 | * Enable counters and exec the command: |
@@ -433,12 +443,14 @@ static void print_stat(int argc, const char **argv) | |||
433 | 443 | ||
434 | fprintf(stderr, "\n"); | 444 | fprintf(stderr, "\n"); |
435 | fprintf(stderr, " Performance counter stats for "); | 445 | fprintf(stderr, " Performance counter stats for "); |
436 | if(target_pid == -1) { | 446 | if(target_pid == -1 && target_tid == -1) { |
437 | fprintf(stderr, "\'%s", argv[0]); | 447 | fprintf(stderr, "\'%s", argv[0]); |
438 | for (i = 1; i < argc; i++) | 448 | for (i = 1; i < argc; i++) |
439 | fprintf(stderr, " %s", argv[i]); | 449 | fprintf(stderr, " %s", argv[i]); |
440 | }else | 450 | } else if (target_pid != -1) |
441 | fprintf(stderr, "task pid \'%d", target_pid); | 451 | fprintf(stderr, "process id \'%d", target_pid); |
452 | else | ||
453 | fprintf(stderr, "thread id \'%d", target_tid); | ||
442 | 454 | ||
443 | fprintf(stderr, "\'"); | 455 | fprintf(stderr, "\'"); |
444 | if (run_count > 1) | 456 | if (run_count > 1) |
@@ -493,7 +505,9 @@ static const struct option options[] = { | |||
493 | OPT_BOOLEAN('i', "inherit", &inherit, | 505 | OPT_BOOLEAN('i', "inherit", &inherit, |
494 | "child tasks inherit counters"), | 506 | "child tasks inherit counters"), |
495 | OPT_INTEGER('p', "pid", &target_pid, | 507 | OPT_INTEGER('p', "pid", &target_pid, |
496 | "stat events on existing pid"), | 508 | "stat events on existing process id"), |
509 | OPT_INTEGER('t', "tid", &target_tid, | ||
510 | "stat events on existing thread id"), | ||
497 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 511 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
498 | "system-wide collection from all CPUs"), | 512 | "system-wide collection from all CPUs"), |
499 | OPT_BOOLEAN('c', "scale", &scale, | 513 | OPT_BOOLEAN('c', "scale", &scale, |
@@ -510,10 +524,11 @@ static const struct option options[] = { | |||
510 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 524 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
511 | { | 525 | { |
512 | int status; | 526 | int status; |
527 | int i,j; | ||
513 | 528 | ||
514 | argc = parse_options(argc, argv, options, stat_usage, | 529 | argc = parse_options(argc, argv, options, stat_usage, |
515 | PARSE_OPT_STOP_AT_NON_OPTION); | 530 | PARSE_OPT_STOP_AT_NON_OPTION); |
516 | if (!argc && target_pid == -1) | 531 | if (!argc && target_pid == -1 && target_tid == -1) |
517 | usage_with_options(stat_usage, options); | 532 | usage_with_options(stat_usage, options); |
518 | if (run_count <= 0) | 533 | if (run_count <= 0) |
519 | usage_with_options(stat_usage, options); | 534 | usage_with_options(stat_usage, options); |
@@ -529,6 +544,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
529 | else | 544 | else |
530 | nr_cpus = 1; | 545 | nr_cpus = 1; |
531 | 546 | ||
547 | if (target_pid != -1) { | ||
548 | target_tid = target_pid; | ||
549 | thread_num = find_all_tid(target_pid, &all_tids); | ||
550 | if (thread_num <= 0) { | ||
551 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
552 | target_pid); | ||
553 | usage_with_options(stat_usage, options); | ||
554 | } | ||
555 | } else { | ||
556 | all_tids=malloc(sizeof(pid_t)); | ||
557 | if (!all_tids) | ||
558 | return -ENOMEM; | ||
559 | |||
560 | all_tids[0] = target_tid; | ||
561 | thread_num = 1; | ||
562 | } | ||
563 | |||
564 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
565 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
566 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
567 | if (!fd[i][j]) | ||
568 | return -ENOMEM; | ||
569 | } | ||
570 | } | ||
571 | |||
532 | /* | 572 | /* |
533 | * We dont want to block the signals - that would cause | 573 | * We dont want to block the signals - that would cause |
534 | * child tasks to inherit that and Ctrl-C would not work. | 574 | * child tasks to inherit that and Ctrl-C would not work. |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 887ebbf5d1ff..5f3ac9ff354d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -55,7 +55,7 @@ | |||
55 | #include <linux/unistd.h> | 55 | #include <linux/unistd.h> |
56 | #include <linux/types.h> | 56 | #include <linux/types.h> |
57 | 57 | ||
58 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 58 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
59 | 59 | ||
60 | static int system_wide = 0; | 60 | static int system_wide = 0; |
61 | 61 | ||
@@ -65,6 +65,9 @@ static int count_filter = 5; | |||
65 | static int print_entries; | 65 | static int print_entries; |
66 | 66 | ||
67 | static int target_pid = -1; | 67 | static int target_pid = -1; |
68 | static int target_tid = -1; | ||
69 | static pid_t *all_tids = NULL; | ||
70 | static int thread_num = 0; | ||
68 | static int inherit = 0; | 71 | static int inherit = 0; |
69 | static int profile_cpu = -1; | 72 | static int profile_cpu = -1; |
70 | static int nr_cpus = 0; | 73 | static int nr_cpus = 0; |
@@ -524,13 +527,15 @@ static void print_sym_table(void) | |||
524 | 527 | ||
525 | if (target_pid != -1) | 528 | if (target_pid != -1) |
526 | printf(" (target_pid: %d", target_pid); | 529 | printf(" (target_pid: %d", target_pid); |
530 | else if (target_tid != -1) | ||
531 | printf(" (target_tid: %d", target_tid); | ||
527 | else | 532 | else |
528 | printf(" (all"); | 533 | printf(" (all"); |
529 | 534 | ||
530 | if (profile_cpu != -1) | 535 | if (profile_cpu != -1) |
531 | printf(", cpu: %d)\n", profile_cpu); | 536 | printf(", cpu: %d)\n", profile_cpu); |
532 | else { | 537 | else { |
533 | if (target_pid != -1) | 538 | if (target_tid != -1) |
534 | printf(")\n"); | 539 | printf(")\n"); |
535 | else | 540 | else |
536 | printf(", %d CPUs)\n", nr_cpus); | 541 | printf(", %d CPUs)\n", nr_cpus); |
@@ -1129,16 +1134,21 @@ static void perf_session__mmap_read_counter(struct perf_session *self, | |||
1129 | md->prev = old; | 1134 | md->prev = old; |
1130 | } | 1135 | } |
1131 | 1136 | ||
1132 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | 1137 | static struct pollfd *event_array; |
1133 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | 1138 | static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; |
1134 | 1139 | ||
1135 | static void perf_session__mmap_read(struct perf_session *self) | 1140 | static void perf_session__mmap_read(struct perf_session *self) |
1136 | { | 1141 | { |
1137 | int i, counter; | 1142 | int i, counter, thread_index; |
1138 | 1143 | ||
1139 | for (i = 0; i < nr_cpus; i++) { | 1144 | for (i = 0; i < nr_cpus; i++) { |
1140 | for (counter = 0; counter < nr_counters; counter++) | 1145 | for (counter = 0; counter < nr_counters; counter++) |
1141 | perf_session__mmap_read_counter(self, &mmap_array[i][counter]); | 1146 | for (thread_index = 0; |
1147 | thread_index < thread_num; | ||
1148 | thread_index++) { | ||
1149 | perf_session__mmap_read_counter(self, | ||
1150 | &mmap_array[i][counter][thread_index]); | ||
1151 | } | ||
1142 | } | 1152 | } |
1143 | } | 1153 | } |
1144 | 1154 | ||
@@ -1149,9 +1159,10 @@ static void start_counter(int i, int counter) | |||
1149 | { | 1159 | { |
1150 | struct perf_event_attr *attr; | 1160 | struct perf_event_attr *attr; |
1151 | int cpu; | 1161 | int cpu; |
1162 | int thread_index; | ||
1152 | 1163 | ||
1153 | cpu = profile_cpu; | 1164 | cpu = profile_cpu; |
1154 | if (target_pid == -1 && profile_cpu == -1) | 1165 | if (target_tid == -1 && profile_cpu == -1) |
1155 | cpu = cpumap[i]; | 1166 | cpu = cpumap[i]; |
1156 | 1167 | ||
1157 | attr = attrs + counter; | 1168 | attr = attrs + counter; |
@@ -1167,55 +1178,58 @@ static void start_counter(int i, int counter) | |||
1167 | attr->inherit = (cpu < 0) && inherit; | 1178 | attr->inherit = (cpu < 0) && inherit; |
1168 | attr->mmap = 1; | 1179 | attr->mmap = 1; |
1169 | 1180 | ||
1181 | for (thread_index = 0; thread_index < thread_num; thread_index++) { | ||
1170 | try_again: | 1182 | try_again: |
1171 | fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); | 1183 | fd[i][counter][thread_index] = sys_perf_event_open(attr, |
1172 | 1184 | all_tids[thread_index], cpu, group_fd, 0); | |
1173 | if (fd[i][counter] < 0) { | 1185 | |
1174 | int err = errno; | 1186 | if (fd[i][counter][thread_index] < 0) { |
1187 | int err = errno; | ||
1188 | |||
1189 | if (err == EPERM || err == EACCES) | ||
1190 | die("No permission - are you root?\n"); | ||
1191 | /* | ||
1192 | * If it's cycles then fall back to hrtimer | ||
1193 | * based cpu-clock-tick sw counter, which | ||
1194 | * is always available even if no PMU support: | ||
1195 | */ | ||
1196 | if (attr->type == PERF_TYPE_HARDWARE | ||
1197 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | ||
1198 | |||
1199 | if (verbose) | ||
1200 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | ||
1201 | |||
1202 | attr->type = PERF_TYPE_SOFTWARE; | ||
1203 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | ||
1204 | goto try_again; | ||
1205 | } | ||
1206 | printf("\n"); | ||
1207 | error("perfcounter syscall returned with %d (%s)\n", | ||
1208 | fd[i][counter][thread_index], strerror(err)); | ||
1209 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
1210 | exit(-1); | ||
1211 | } | ||
1212 | assert(fd[i][counter][thread_index] >= 0); | ||
1213 | fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); | ||
1175 | 1214 | ||
1176 | if (err == EPERM || err == EACCES) | ||
1177 | die("No permission - are you root?\n"); | ||
1178 | /* | 1215 | /* |
1179 | * If it's cycles then fall back to hrtimer | 1216 | * First counter acts as the group leader: |
1180 | * based cpu-clock-tick sw counter, which | ||
1181 | * is always available even if no PMU support: | ||
1182 | */ | 1217 | */ |
1183 | if (attr->type == PERF_TYPE_HARDWARE | 1218 | if (group && group_fd == -1) |
1184 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | 1219 | group_fd = fd[i][counter][thread_index]; |
1185 | 1220 | ||
1186 | if (verbose) | 1221 | event_array[nr_poll].fd = fd[i][counter][thread_index]; |
1187 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | 1222 | event_array[nr_poll].events = POLLIN; |
1188 | 1223 | nr_poll++; | |
1189 | attr->type = PERF_TYPE_SOFTWARE; | 1224 | |
1190 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | 1225 | mmap_array[i][counter][thread_index].counter = counter; |
1191 | goto try_again; | 1226 | mmap_array[i][counter][thread_index].prev = 0; |
1192 | } | 1227 | mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; |
1193 | printf("\n"); | 1228 | mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, |
1194 | error("perfcounter syscall returned with %d (%s)\n", | 1229 | PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); |
1195 | fd[i][counter], strerror(err)); | 1230 | if (mmap_array[i][counter][thread_index].base == MAP_FAILED) |
1196 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | 1231 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); |
1197 | exit(-1); | ||
1198 | } | 1232 | } |
1199 | assert(fd[i][counter] >= 0); | ||
1200 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1201 | |||
1202 | /* | ||
1203 | * First counter acts as the group leader: | ||
1204 | */ | ||
1205 | if (group && group_fd == -1) | ||
1206 | group_fd = fd[i][counter]; | ||
1207 | |||
1208 | event_array[nr_poll].fd = fd[i][counter]; | ||
1209 | event_array[nr_poll].events = POLLIN; | ||
1210 | nr_poll++; | ||
1211 | |||
1212 | mmap_array[i][counter].counter = counter; | ||
1213 | mmap_array[i][counter].prev = 0; | ||
1214 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
1215 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
1216 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
1217 | if (mmap_array[i][counter].base == MAP_FAILED) | ||
1218 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
1219 | } | 1233 | } |
1220 | 1234 | ||
1221 | static int __cmd_top(void) | 1235 | static int __cmd_top(void) |
@@ -1231,8 +1245,8 @@ static int __cmd_top(void) | |||
1231 | if (session == NULL) | 1245 | if (session == NULL) |
1232 | return -ENOMEM; | 1246 | return -ENOMEM; |
1233 | 1247 | ||
1234 | if (target_pid != -1) | 1248 | if (target_tid != -1) |
1235 | event__synthesize_thread(target_pid, event__process, session); | 1249 | event__synthesize_thread(target_tid, event__process, session); |
1236 | else | 1250 | else |
1237 | event__synthesize_threads(event__process, session); | 1251 | event__synthesize_threads(event__process, session); |
1238 | 1252 | ||
@@ -1243,7 +1257,7 @@ static int __cmd_top(void) | |||
1243 | } | 1257 | } |
1244 | 1258 | ||
1245 | /* Wait for a minimal set of events before starting the snapshot */ | 1259 | /* Wait for a minimal set of events before starting the snapshot */ |
1246 | poll(event_array, nr_poll, 100); | 1260 | poll(&event_array[0], nr_poll, 100); |
1247 | 1261 | ||
1248 | perf_session__mmap_read(session); | 1262 | perf_session__mmap_read(session); |
1249 | 1263 | ||
@@ -1286,7 +1300,9 @@ static const struct option options[] = { | |||
1286 | OPT_INTEGER('c', "count", &default_interval, | 1300 | OPT_INTEGER('c', "count", &default_interval, |
1287 | "event period to sample"), | 1301 | "event period to sample"), |
1288 | OPT_INTEGER('p', "pid", &target_pid, | 1302 | OPT_INTEGER('p', "pid", &target_pid, |
1289 | "profile events on existing pid"), | 1303 | "profile events on existing process id"), |
1304 | OPT_INTEGER('t', "tid", &target_tid, | ||
1305 | "profile events on existing thread id"), | ||
1290 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1306 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1291 | "system-wide collection from all CPUs"), | 1307 | "system-wide collection from all CPUs"), |
1292 | OPT_INTEGER('C', "CPU", &profile_cpu, | 1308 | OPT_INTEGER('C', "CPU", &profile_cpu, |
@@ -1327,6 +1343,7 @@ static const struct option options[] = { | |||
1327 | int cmd_top(int argc, const char **argv, const char *prefix __used) | 1343 | int cmd_top(int argc, const char **argv, const char *prefix __used) |
1328 | { | 1344 | { |
1329 | int counter; | 1345 | int counter; |
1346 | int i,j; | ||
1330 | 1347 | ||
1331 | page_size = sysconf(_SC_PAGE_SIZE); | 1348 | page_size = sysconf(_SC_PAGE_SIZE); |
1332 | 1349 | ||
@@ -1334,8 +1351,39 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1334 | if (argc) | 1351 | if (argc) |
1335 | usage_with_options(top_usage, options); | 1352 | usage_with_options(top_usage, options); |
1336 | 1353 | ||
1354 | if (target_pid != -1) { | ||
1355 | target_tid = target_pid; | ||
1356 | thread_num = find_all_tid(target_pid, &all_tids); | ||
1357 | if (thread_num <= 0) { | ||
1358 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
1359 | target_pid); | ||
1360 | usage_with_options(top_usage, options); | ||
1361 | } | ||
1362 | } else { | ||
1363 | all_tids=malloc(sizeof(pid_t)); | ||
1364 | if (!all_tids) | ||
1365 | return -ENOMEM; | ||
1366 | |||
1367 | all_tids[0] = target_tid; | ||
1368 | thread_num = 1; | ||
1369 | } | ||
1370 | |||
1371 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
1372 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
1373 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
1374 | mmap_array[i][j] = malloc( | ||
1375 | sizeof(struct mmap_data)*thread_num); | ||
1376 | if (!fd[i][j] || !mmap_array[i][j]) | ||
1377 | return -ENOMEM; | ||
1378 | } | ||
1379 | } | ||
1380 | event_array = malloc( | ||
1381 | sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); | ||
1382 | if (!event_array) | ||
1383 | return -ENOMEM; | ||
1384 | |||
1337 | /* CPU and PID are mutually exclusive */ | 1385 | /* CPU and PID are mutually exclusive */ |
1338 | if (target_pid != -1 && profile_cpu != -1) { | 1386 | if (target_tid > 0 && profile_cpu != -1) { |
1339 | printf("WARNING: PID switch overriding CPU\n"); | 1387 | printf("WARNING: PID switch overriding CPU\n"); |
1340 | sleep(1); | 1388 | sleep(1); |
1341 | profile_cpu = -1; | 1389 | profile_cpu = -1; |
@@ -1376,7 +1424,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1376 | attrs[counter].sample_period = default_interval; | 1424 | attrs[counter].sample_period = default_interval; |
1377 | } | 1425 | } |
1378 | 1426 | ||
1379 | if (target_pid != -1 || profile_cpu != -1) | 1427 | if (target_tid != -1 || profile_cpu != -1) |
1380 | nr_cpus = 1; | 1428 | nr_cpus = 1; |
1381 | else | 1429 | else |
1382 | nr_cpus = read_cpu_map(); | 1430 | nr_cpus = read_cpu_map(); |
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fa968312ee7d..ea6506234d57 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c | |||
@@ -7,6 +7,37 @@ | |||
7 | #include "util.h" | 7 | #include "util.h" |
8 | #include "debug.h" | 8 | #include "debug.h" |
9 | 9 | ||
10 | int find_all_tid(int pid, pid_t ** all_tid) | ||
11 | { | ||
12 | char name[256]; | ||
13 | int items; | ||
14 | struct dirent **namelist = NULL; | ||
15 | int ret = 0; | ||
16 | int i; | ||
17 | |||
18 | sprintf(name, "/proc/%d/task", pid); | ||
19 | items = scandir(name, &namelist, NULL, NULL); | ||
20 | if (items <= 0) | ||
21 | return -ENOENT; | ||
22 | *all_tid = malloc(sizeof(pid_t) * items); | ||
23 | if (!*all_tid) { | ||
24 | ret = -ENOMEM; | ||
25 | goto failure; | ||
26 | } | ||
27 | |||
28 | for (i = 0; i < items; i++) | ||
29 | (*all_tid)[i] = atoi(namelist[i]->d_name); | ||
30 | |||
31 | ret = items; | ||
32 | |||
33 | failure: | ||
34 | for (i=0; i<items; i++) | ||
35 | free(namelist[i]); | ||
36 | free(namelist); | ||
37 | |||
38 | return ret; | ||
39 | } | ||
40 | |||
10 | void map_groups__init(struct map_groups *self) | 41 | void map_groups__init(struct map_groups *self) |
11 | { | 42 | { |
12 | int i; | 43 | int i; |
@@ -348,3 +379,4 @@ struct symbol *map_groups__find_symbol(struct map_groups *self, | |||
348 | 379 | ||
349 | return NULL; | 380 | return NULL; |
350 | } | 381 | } |
382 | |||
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index dcf70303e58e..a81426a891bf 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h | |||
@@ -23,6 +23,7 @@ struct thread { | |||
23 | int comm_len; | 23 | int comm_len; |
24 | }; | 24 | }; |
25 | 25 | ||
26 | int find_all_tid(int pid, pid_t ** all_tid); | ||
26 | void map_groups__init(struct map_groups *self); | 27 | void map_groups__init(struct map_groups *self); |
27 | int thread__set_comm(struct thread *self, const char *comm); | 28 | int thread__set_comm(struct thread *self, const char *comm); |
28 | int thread__comm_len(struct thread *self); | 29 | int thread__comm_len(struct thread *self); |