diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-13 12:15:54 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-14 09:45:11 -0400 |
commit | ea57c4f5203d82c7844c54cdef54e972cf4e9d1f (patch) | |
tree | 13eb76a07480adbfe1bcabd25cde7cdd12907ddb /tools/perf | |
parent | aa1ab9d26ae9fe2566a9036e3cb83e7d555b3987 (diff) |
perf tools: Implement counter output multiplexing
Finish the -M/--multiplex option implementation:
- separate it out from group_fd
- correctly set it via the ioctl and dont mmap counters that
are multiplexed
- modify the perf record event loop to deal with buffer-less
counters.
- remove the -g option from perf sched record
- account for unordered events in perf sched latency
- (add -f to perf sched record to ease measurements)
- skip idle threads (pid==0) in latency output
The result is better latency output by 'perf sched latency':
-----------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------
ksoftirqd/8 | 0.071 ms | 2 | avg: 0.458 ms | max: 0.913 ms |
at-spi-registry | 0.609 ms | 19 | avg: 0.013 ms | max: 0.023 ms |
perf | 3.316 ms | 16 | avg: 0.013 ms | max: 0.054 ms |
Xorg | 0.392 ms | 19 | avg: 0.011 ms | max: 0.018 ms |
sleep | 0.537 ms | 2 | avg: 0.009 ms | max: 0.009 ms |
-----------------------------------------------------------------------------------
TOTAL: | 4.925 ms | 58 |
---------------------------------------------
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-record.c | 43 | ||||
-rw-r--r-- | tools/perf/builtin-sched.c | 25 | ||||
-rw-r--r-- | tools/perf/util/trace-event-parse.c | 6 |
3 files changed, 52 insertions, 22 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 79f99dba5be0..5f3127e7a615 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -49,6 +49,7 @@ static int inherit_stat = 0; | |||
49 | static int no_samples = 0; | 49 | static int no_samples = 0; |
50 | static int sample_address = 0; | 50 | static int sample_address = 0; |
51 | static int multiplex = 0; | 51 | static int multiplex = 0; |
52 | static int multiplex_fd = -1; | ||
52 | 53 | ||
53 | static long samples; | 54 | static long samples; |
54 | static struct timeval last_read; | 55 | static struct timeval last_read; |
@@ -471,23 +472,29 @@ try_again: | |||
471 | */ | 472 | */ |
472 | if (group && group_fd == -1) | 473 | if (group && group_fd == -1) |
473 | group_fd = fd[nr_cpu][counter]; | 474 | group_fd = fd[nr_cpu][counter]; |
475 | if (multiplex && multiplex_fd == -1) | ||
476 | multiplex_fd = fd[nr_cpu][counter]; | ||
474 | 477 | ||
475 | event_array[nr_poll].fd = fd[nr_cpu][counter]; | 478 | if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { |
476 | event_array[nr_poll].events = POLLIN; | 479 | int ret; |
477 | nr_poll++; | ||
478 | |||
479 | mmap_array[nr_cpu][counter].counter = counter; | ||
480 | mmap_array[nr_cpu][counter].prev = 0; | ||
481 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | ||
482 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
483 | PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); | ||
484 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | ||
485 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
486 | exit(-1); | ||
487 | } | ||
488 | 480 | ||
489 | if (multiplex && fd[nr_cpu][counter] != group_fd) | 481 | ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd); |
490 | ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, group_fd); | 482 | assert(ret != -1); |
483 | } else { | ||
484 | event_array[nr_poll].fd = fd[nr_cpu][counter]; | ||
485 | event_array[nr_poll].events = POLLIN; | ||
486 | nr_poll++; | ||
487 | |||
488 | mmap_array[nr_cpu][counter].counter = counter; | ||
489 | mmap_array[nr_cpu][counter].prev = 0; | ||
490 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | ||
491 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
492 | PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); | ||
493 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | ||
494 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
495 | exit(-1); | ||
496 | } | ||
497 | } | ||
491 | 498 | ||
492 | ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); | 499 | ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); |
493 | } | 500 | } |
@@ -618,8 +625,10 @@ static int __cmd_record(int argc, const char **argv) | |||
618 | int hits = samples; | 625 | int hits = samples; |
619 | 626 | ||
620 | for (i = 0; i < nr_cpu; i++) { | 627 | for (i = 0; i < nr_cpu; i++) { |
621 | for (counter = 0; counter < nr_counters; counter++) | 628 | for (counter = 0; counter < nr_counters; counter++) { |
622 | mmap_read(&mmap_array[i][counter]); | 629 | if (mmap_array[i][counter].base) |
630 | mmap_read(&mmap_array[i][counter]); | ||
631 | } | ||
623 | } | 632 | } |
624 | 633 | ||
625 | if (hits == samples) { | 634 | if (hits == samples) { |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 3e003237c42f..2ce87ef5a3e6 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -116,6 +116,8 @@ static u64 sum_fluct; | |||
116 | static u64 run_avg; | 116 | static u64 run_avg; |
117 | 117 | ||
118 | static unsigned long replay_repeat = 10; | 118 | static unsigned long replay_repeat = 10; |
119 | static unsigned long nr_timestamps; | ||
120 | static unsigned long unordered_timestamps; | ||
119 | 121 | ||
120 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" | 122 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" |
121 | 123 | ||
@@ -1109,8 +1111,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1109 | if (atom->state != THREAD_SLEEPING) | 1111 | if (atom->state != THREAD_SLEEPING) |
1110 | return; | 1112 | return; |
1111 | 1113 | ||
1112 | if (atom->sched_out_time > timestamp) | 1114 | nr_timestamps++; |
1115 | if (atom->sched_out_time > timestamp) { | ||
1116 | unordered_timestamps++; | ||
1113 | return; | 1117 | return; |
1118 | } | ||
1114 | 1119 | ||
1115 | atom->state = THREAD_WAIT_CPU; | 1120 | atom->state = THREAD_WAIT_CPU; |
1116 | atom->wake_up_time = timestamp; | 1121 | atom->wake_up_time = timestamp; |
@@ -1130,6 +1135,11 @@ static void output_lat_thread(struct task_atoms *atom_list) | |||
1130 | 1135 | ||
1131 | if (!atom_list->nb_atoms) | 1136 | if (!atom_list->nb_atoms) |
1132 | return; | 1137 | return; |
1138 | /* | ||
1139 | * Ignore idle threads: | ||
1140 | */ | ||
1141 | if (!atom_list->thread->pid) | ||
1142 | return; | ||
1133 | 1143 | ||
1134 | all_runtime += atom_list->total_runtime; | 1144 | all_runtime += atom_list->total_runtime; |
1135 | all_count += atom_list->nb_atoms; | 1145 | all_count += atom_list->nb_atoms; |
@@ -1301,8 +1311,16 @@ static void __cmd_lat(void) | |||
1301 | } | 1311 | } |
1302 | 1312 | ||
1303 | printf("-----------------------------------------------------------------------------------\n"); | 1313 | printf("-----------------------------------------------------------------------------------\n"); |
1304 | printf(" TOTAL: |%9.3f ms |%9Ld |\n", | 1314 | printf(" TOTAL: |%9.3f ms |%9Ld |", |
1305 | (double)all_runtime/1e6, all_count); | 1315 | (double)all_runtime/1e6, all_count); |
1316 | |||
1317 | if (unordered_timestamps && nr_timestamps) { | ||
1318 | printf(" INFO: %.2f%% unordered events.\n", | ||
1319 | (double)unordered_timestamps/(double)nr_timestamps*100.0); | ||
1320 | } else { | ||
1321 | printf("\n"); | ||
1322 | } | ||
1323 | |||
1306 | printf("---------------------------------------------\n"); | 1324 | printf("---------------------------------------------\n"); |
1307 | } | 1325 | } |
1308 | 1326 | ||
@@ -1667,12 +1685,13 @@ static const char *record_args[] = { | |||
1667 | "-a", | 1685 | "-a", |
1668 | "-R", | 1686 | "-R", |
1669 | "-M", | 1687 | "-M", |
1670 | "-g", | 1688 | "-f", |
1671 | "-c", "1", | 1689 | "-c", "1", |
1672 | "-e", "sched:sched_switch:r", | 1690 | "-e", "sched:sched_switch:r", |
1673 | "-e", "sched:sched_stat_wait:r", | 1691 | "-e", "sched:sched_stat_wait:r", |
1674 | "-e", "sched:sched_stat_sleep:r", | 1692 | "-e", "sched:sched_stat_sleep:r", |
1675 | "-e", "sched:sched_stat_iowait:r", | 1693 | "-e", "sched:sched_stat_iowait:r", |
1694 | "-e", "sched:sched_stat_runtime:r", | ||
1676 | "-e", "sched:sched_process_exit:r", | 1695 | "-e", "sched:sched_process_exit:r", |
1677 | "-e", "sched:sched_process_fork:r", | 1696 | "-e", "sched:sched_process_fork:r", |
1678 | "-e", "sched:sched_wakeup:r", | 1697 | "-e", "sched:sched_wakeup:r", |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 64d6e302751a..f6a8437141c8 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -2722,8 +2722,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, | |||
2722 | type = trace_parse_common_type(data); | 2722 | type = trace_parse_common_type(data); |
2723 | 2723 | ||
2724 | event = trace_find_event(type); | 2724 | event = trace_find_event(type); |
2725 | if (!event) | 2725 | if (!event) { |
2726 | die("ug! no event found for type %d", type); | 2726 | printf("ug! no event found for type %d\n", type); |
2727 | return; | ||
2728 | } | ||
2727 | 2729 | ||
2728 | pid = parse_common_pid(data); | 2730 | pid = parse_common_pid(data); |
2729 | 2731 | ||