diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-10 14:48:51 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-10 14:48:51 -0400 |
| commit | d00aa6695b67a31be2ce5f7464da32c20cb50699 (patch) | |
| tree | 4e4a2bbd1ab710ddca3bd1a611a6c3e9a00f52f9 | |
| parent | cec36911b5fa4ac342f6de856b12a9f71f84e6e5 (diff) | |
| parent | 1853db0e02ae4088f102b0d8e59e83dc98f93f03 (diff) | |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits)
perf_counter: Zero dead bytes from ftrace raw samples size alignment
perf_counter: Subtract the buffer size field from the event record size
perf_counter: Require CAP_SYS_ADMIN for raw tracepoint data
perf_counter: Correct PERF_SAMPLE_RAW output
perf tools: callchain: Fix bad rounding of minimum rate
perf_counter tools: Fix libbfd detection for systems with libz dependency
perf: "Longum est iter per praecepta, breve et efficax per exempla"
perf_counter: Fix a race on perf_counter_ctx
perf_counter: Fix tracepoint sampling to be part of generic sampling
perf_counter: Work around gcc warning by initializing tracepoint record unconditionally
perf tools: callchain: Fix sum of percentages to be 100% by displaying amount of ignored chains in fractal mode
perf tools: callchain: Fix 'perf report' display to be callchain by default
perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains
perf record: Fix the -A UI for empty or non-existent perf.data
perf util: Fix do_read() to fail on EOF instead of busy-looping
perf list: Fix the output to not include tracepoints without an id
perf_counter/powerpc: Fix oops on cpus without perf_counter hardware support
perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale
perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc)
perf report: Fix per task mult-counter stat reporting
...
| -rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 8 | ||||
| -rw-r--r-- | include/linux/perf_counter.h | 12 | ||||
| -rw-r--r-- | include/trace/ftrace.h | 15 | ||||
| -rw-r--r-- | kernel/perf_counter.c | 239 | ||||
| -rw-r--r-- | tools/perf/Documentation/perf-examples.txt | 225 | ||||
| -rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 2 | ||||
| -rw-r--r-- | tools/perf/Documentation/perf-top.txt | 112 | ||||
| -rw-r--r-- | tools/perf/Makefile | 4 | ||||
| -rw-r--r-- | tools/perf/builtin-record.c | 12 | ||||
| -rw-r--r-- | tools/perf/builtin-report.c | 99 | ||||
| -rw-r--r-- | tools/perf/builtin-stat.c | 2 | ||||
| -rw-r--r-- | tools/perf/builtin-top.c | 552 | ||||
| -rw-r--r-- | tools/perf/util/callchain.c | 32 | ||||
| -rw-r--r-- | tools/perf/util/callchain.h | 8 | ||||
| -rw-r--r-- | tools/perf/util/header.c | 5 | ||||
| -rw-r--r-- | tools/perf/util/parse-events.c | 26 | ||||
| -rw-r--r-- | tools/perf/util/parse-events.h | 1 | ||||
| -rw-r--r-- | tools/perf/util/symbol.c | 57 | ||||
| -rw-r--r-- | tools/perf/util/symbol.h | 2 |
19 files changed, 1212 insertions, 201 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 809fdf94b95f..70e1f57f7dd8 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
| @@ -518,6 +518,8 @@ void hw_perf_disable(void) | |||
| 518 | struct cpu_hw_counters *cpuhw; | 518 | struct cpu_hw_counters *cpuhw; |
| 519 | unsigned long flags; | 519 | unsigned long flags; |
| 520 | 520 | ||
| 521 | if (!ppmu) | ||
| 522 | return; | ||
| 521 | local_irq_save(flags); | 523 | local_irq_save(flags); |
| 522 | cpuhw = &__get_cpu_var(cpu_hw_counters); | 524 | cpuhw = &__get_cpu_var(cpu_hw_counters); |
| 523 | 525 | ||
| @@ -572,6 +574,8 @@ void hw_perf_enable(void) | |||
| 572 | int n_lim; | 574 | int n_lim; |
| 573 | int idx; | 575 | int idx; |
| 574 | 576 | ||
| 577 | if (!ppmu) | ||
| 578 | return; | ||
| 575 | local_irq_save(flags); | 579 | local_irq_save(flags); |
| 576 | cpuhw = &__get_cpu_var(cpu_hw_counters); | 580 | cpuhw = &__get_cpu_var(cpu_hw_counters); |
| 577 | if (!cpuhw->disabled) { | 581 | if (!cpuhw->disabled) { |
| @@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
| 737 | long i, n, n0; | 741 | long i, n, n0; |
| 738 | struct perf_counter *sub; | 742 | struct perf_counter *sub; |
| 739 | 743 | ||
| 744 | if (!ppmu) | ||
| 745 | return 0; | ||
| 740 | cpuhw = &__get_cpu_var(cpu_hw_counters); | 746 | cpuhw = &__get_cpu_var(cpu_hw_counters); |
| 741 | n0 = cpuhw->n_counters; | 747 | n0 = cpuhw->n_counters; |
| 742 | n = collect_events(group_leader, ppmu->n_counter - n0, | 748 | n = collect_events(group_leader, ppmu->n_counter - n0, |
| @@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu) | |||
| 1281 | { | 1287 | { |
| 1282 | struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); | 1288 | struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); |
| 1283 | 1289 | ||
| 1290 | if (!ppmu) | ||
| 1291 | return; | ||
| 1284 | memset(cpuhw, 0, sizeof(*cpuhw)); | 1292 | memset(cpuhw, 0, sizeof(*cpuhw)); |
| 1285 | cpuhw->mmcr[0] = MMCR0_FC; | 1293 | cpuhw->mmcr[0] = MMCR0_FC; |
| 1286 | } | 1294 | } |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a67dd5c5b6d3..a9d823a93fe8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
| @@ -121,7 +121,7 @@ enum perf_counter_sample_format { | |||
| 121 | PERF_SAMPLE_CPU = 1U << 7, | 121 | PERF_SAMPLE_CPU = 1U << 7, |
| 122 | PERF_SAMPLE_PERIOD = 1U << 8, | 122 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 123 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 123 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 124 | PERF_SAMPLE_TP_RECORD = 1U << 10, | 124 | PERF_SAMPLE_RAW = 1U << 10, |
| 125 | 125 | ||
| 126 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 126 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ |
| 127 | }; | 127 | }; |
| @@ -369,6 +369,8 @@ enum perf_event_type { | |||
| 369 | * | 369 | * |
| 370 | * { u64 nr, | 370 | * { u64 nr, |
| 371 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN | 371 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
| 372 | * { u32 size; | ||
| 373 | * char data[size];}&& PERF_SAMPLE_RAW | ||
| 372 | * }; | 374 | * }; |
| 373 | */ | 375 | */ |
| 374 | PERF_EVENT_SAMPLE = 9, | 376 | PERF_EVENT_SAMPLE = 9, |
| @@ -414,9 +416,9 @@ struct perf_callchain_entry { | |||
| 414 | __u64 ip[PERF_MAX_STACK_DEPTH]; | 416 | __u64 ip[PERF_MAX_STACK_DEPTH]; |
| 415 | }; | 417 | }; |
| 416 | 418 | ||
| 417 | struct perf_tracepoint_record { | 419 | struct perf_raw_record { |
| 418 | int size; | 420 | u32 size; |
| 419 | char *record; | 421 | void *data; |
| 420 | }; | 422 | }; |
| 421 | 423 | ||
| 422 | struct task_struct; | 424 | struct task_struct; |
| @@ -687,7 +689,7 @@ struct perf_sample_data { | |||
| 687 | struct pt_regs *regs; | 689 | struct pt_regs *regs; |
| 688 | u64 addr; | 690 | u64 addr; |
| 689 | u64 period; | 691 | u64 period; |
| 690 | void *private; | 692 | struct perf_raw_record *raw; |
| 691 | }; | 693 | }; |
| 692 | 694 | ||
| 693 | extern int perf_counter_overflow(struct perf_counter *counter, int nmi, | 695 | extern int perf_counter_overflow(struct perf_counter *counter, int nmi, |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 7fb16d90e7b1..f64fbaae781a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
| @@ -637,12 +637,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
| 637 | * pc = preempt_count(); | 637 | * pc = preempt_count(); |
| 638 | * | 638 | * |
| 639 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); | 639 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); |
| 640 | * __entry_size = __data_size + sizeof(*entry); | 640 | * |
| 641 | * // Below we want to get the aligned size by taking into account | ||
| 642 | * // the u32 field that will later store the buffer size | ||
| 643 | * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), | ||
| 644 | * sizeof(u64)); | ||
| 645 | * __entry_size -= sizeof(u32); | ||
| 641 | * | 646 | * |
| 642 | * do { | 647 | * do { |
| 643 | * char raw_data[__entry_size]; <- allocate our sample in the stack | 648 | * char raw_data[__entry_size]; <- allocate our sample in the stack |
| 644 | * struct trace_entry *ent; | 649 | * struct trace_entry *ent; |
| 645 | * | 650 | * |
| 651 | * zero dead bytes from alignment to avoid stack leak to userspace: | ||
| 652 | * | ||
| 653 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; | ||
| 646 | * entry = (struct ftrace_raw_<call> *)raw_data; | 654 | * entry = (struct ftrace_raw_<call> *)raw_data; |
| 647 | * ent = &entry->ent; | 655 | * ent = &entry->ent; |
| 648 | * tracing_generic_entry_update(ent, irq_flags, pc); | 656 | * tracing_generic_entry_update(ent, irq_flags, pc); |
| @@ -685,12 +693,15 @@ static void ftrace_profile_##call(proto) \ | |||
| 685 | pc = preempt_count(); \ | 693 | pc = preempt_count(); \ |
| 686 | \ | 694 | \ |
| 687 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ | 695 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ |
| 688 | __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ | 696 | __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ |
| 697 | sizeof(u64)); \ | ||
| 698 | __entry_size -= sizeof(u32); \ | ||
| 689 | \ | 699 | \ |
| 690 | do { \ | 700 | do { \ |
| 691 | char raw_data[__entry_size]; \ | 701 | char raw_data[__entry_size]; \ |
| 692 | struct trace_entry *ent; \ | 702 | struct trace_entry *ent; \ |
| 693 | \ | 703 | \ |
| 704 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ | ||
| 694 | entry = (struct ftrace_raw_##call *)raw_data; \ | 705 | entry = (struct ftrace_raw_##call *)raw_data; \ |
| 695 | ent = &entry->ent; \ | 706 | ent = &entry->ent; \ |
| 696 | tracing_generic_entry_update(ent, irq_flags, pc); \ | 707 | tracing_generic_entry_update(ent, irq_flags, pc); \ |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 868102172aa4..b0b20a07f394 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
| @@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
| 2646 | u64 counter; | 2646 | u64 counter; |
| 2647 | } group_entry; | 2647 | } group_entry; |
| 2648 | struct perf_callchain_entry *callchain = NULL; | 2648 | struct perf_callchain_entry *callchain = NULL; |
| 2649 | struct perf_tracepoint_record *tp; | ||
| 2650 | int callchain_size = 0; | 2649 | int callchain_size = 0; |
| 2651 | u64 time; | 2650 | u64 time; |
| 2652 | struct { | 2651 | struct { |
| @@ -2715,9 +2714,16 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
| 2715 | header.size += sizeof(u64); | 2714 | header.size += sizeof(u64); |
| 2716 | } | 2715 | } |
| 2717 | 2716 | ||
| 2718 | if (sample_type & PERF_SAMPLE_TP_RECORD) { | 2717 | if (sample_type & PERF_SAMPLE_RAW) { |
| 2719 | tp = data->private; | 2718 | int size = sizeof(u32); |
| 2720 | header.size += tp->size; | 2719 | |
| 2720 | if (data->raw) | ||
| 2721 | size += data->raw->size; | ||
| 2722 | else | ||
| 2723 | size += sizeof(u32); | ||
| 2724 | |||
| 2725 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | ||
| 2726 | header.size += size; | ||
| 2721 | } | 2727 | } |
| 2722 | 2728 | ||
| 2723 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | 2729 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); |
| @@ -2783,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
| 2783 | } | 2789 | } |
| 2784 | } | 2790 | } |
| 2785 | 2791 | ||
| 2786 | if (sample_type & PERF_SAMPLE_TP_RECORD) | 2792 | if (sample_type & PERF_SAMPLE_RAW) { |
| 2787 | perf_output_copy(&handle, tp->record, tp->size); | 2793 | if (data->raw) { |
| 2794 | perf_output_put(&handle, data->raw->size); | ||
| 2795 | perf_output_copy(&handle, data->raw->data, data->raw->size); | ||
| 2796 | } else { | ||
| 2797 | struct { | ||
| 2798 | u32 size; | ||
| 2799 | u32 data; | ||
| 2800 | } raw = { | ||
| 2801 | .size = sizeof(u32), | ||
| 2802 | .data = 0, | ||
| 2803 | }; | ||
| 2804 | perf_output_put(&handle, raw); | ||
| 2805 | } | ||
| 2806 | } | ||
| 2788 | 2807 | ||
| 2789 | perf_output_end(&handle); | 2808 | perf_output_end(&handle); |
| 2790 | } | 2809 | } |
| @@ -2849,7 +2868,8 @@ perf_counter_read_event(struct perf_counter *counter, | |||
| 2849 | */ | 2868 | */ |
| 2850 | 2869 | ||
| 2851 | struct perf_task_event { | 2870 | struct perf_task_event { |
| 2852 | struct task_struct *task; | 2871 | struct task_struct *task; |
| 2872 | struct perf_counter_context *task_ctx; | ||
| 2853 | 2873 | ||
| 2854 | struct { | 2874 | struct { |
| 2855 | struct perf_event_header header; | 2875 | struct perf_event_header header; |
| @@ -2909,24 +2929,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx, | |||
| 2909 | static void perf_counter_task_event(struct perf_task_event *task_event) | 2929 | static void perf_counter_task_event(struct perf_task_event *task_event) |
| 2910 | { | 2930 | { |
| 2911 | struct perf_cpu_context *cpuctx; | 2931 | struct perf_cpu_context *cpuctx; |
| 2912 | struct perf_counter_context *ctx; | 2932 | struct perf_counter_context *ctx = task_event->task_ctx; |
| 2913 | 2933 | ||
| 2914 | cpuctx = &get_cpu_var(perf_cpu_context); | 2934 | cpuctx = &get_cpu_var(perf_cpu_context); |
| 2915 | perf_counter_task_ctx(&cpuctx->ctx, task_event); | 2935 | perf_counter_task_ctx(&cpuctx->ctx, task_event); |
| 2916 | put_cpu_var(perf_cpu_context); | 2936 | put_cpu_var(perf_cpu_context); |
| 2917 | 2937 | ||
| 2918 | rcu_read_lock(); | 2938 | rcu_read_lock(); |
| 2919 | /* | 2939 | if (!ctx) |
| 2920 | * doesn't really matter which of the child contexts the | 2940 | ctx = rcu_dereference(task_event->task->perf_counter_ctxp); |
| 2921 | * events ends up in. | ||
| 2922 | */ | ||
| 2923 | ctx = rcu_dereference(current->perf_counter_ctxp); | ||
| 2924 | if (ctx) | 2941 | if (ctx) |
| 2925 | perf_counter_task_ctx(ctx, task_event); | 2942 | perf_counter_task_ctx(ctx, task_event); |
| 2926 | rcu_read_unlock(); | 2943 | rcu_read_unlock(); |
| 2927 | } | 2944 | } |
| 2928 | 2945 | ||
| 2929 | static void perf_counter_task(struct task_struct *task, int new) | 2946 | static void perf_counter_task(struct task_struct *task, |
| 2947 | struct perf_counter_context *task_ctx, | ||
| 2948 | int new) | ||
| 2930 | { | 2949 | { |
| 2931 | struct perf_task_event task_event; | 2950 | struct perf_task_event task_event; |
| 2932 | 2951 | ||
| @@ -2936,8 +2955,9 @@ static void perf_counter_task(struct task_struct *task, int new) | |||
| 2936 | return; | 2955 | return; |
| 2937 | 2956 | ||
| 2938 | task_event = (struct perf_task_event){ | 2957 | task_event = (struct perf_task_event){ |
| 2939 | .task = task, | 2958 | .task = task, |
| 2940 | .event = { | 2959 | .task_ctx = task_ctx, |
| 2960 | .event = { | ||
| 2941 | .header = { | 2961 | .header = { |
| 2942 | .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, | 2962 | .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, |
| 2943 | .misc = 0, | 2963 | .misc = 0, |
| @@ -2955,7 +2975,7 @@ static void perf_counter_task(struct task_struct *task, int new) | |||
| 2955 | 2975 | ||
| 2956 | void perf_counter_fork(struct task_struct *task) | 2976 | void perf_counter_fork(struct task_struct *task) |
| 2957 | { | 2977 | { |
| 2958 | perf_counter_task(task, 1); | 2978 | perf_counter_task(task, NULL, 1); |
| 2959 | } | 2979 | } |
| 2960 | 2980 | ||
| 2961 | /* | 2981 | /* |
| @@ -3344,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi, | |||
| 3344 | * Generic software counter infrastructure | 3364 | * Generic software counter infrastructure |
| 3345 | */ | 3365 | */ |
| 3346 | 3366 | ||
| 3347 | static void perf_swcounter_update(struct perf_counter *counter) | 3367 | /* |
| 3368 | * We directly increment counter->count and keep a second value in | ||
| 3369 | * counter->hw.period_left to count intervals. This period counter | ||
| 3370 | * is kept in the range [-sample_period, 0] so that we can use the | ||
| 3371 | * sign as trigger. | ||
| 3372 | */ | ||
| 3373 | |||
| 3374 | static u64 perf_swcounter_set_period(struct perf_counter *counter) | ||
| 3348 | { | 3375 | { |
| 3349 | struct hw_perf_counter *hwc = &counter->hw; | 3376 | struct hw_perf_counter *hwc = &counter->hw; |
| 3350 | u64 prev, now; | 3377 | u64 period = hwc->last_period; |
| 3351 | s64 delta; | 3378 | u64 nr, offset; |
| 3379 | s64 old, val; | ||
| 3380 | |||
| 3381 | hwc->last_period = hwc->sample_period; | ||
| 3352 | 3382 | ||
| 3353 | again: | 3383 | again: |
| 3354 | prev = atomic64_read(&hwc->prev_count); | 3384 | old = val = atomic64_read(&hwc->period_left); |
| 3355 | now = atomic64_read(&hwc->count); | 3385 | if (val < 0) |
| 3356 | if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) | 3386 | return 0; |
| 3357 | goto again; | ||
| 3358 | 3387 | ||
| 3359 | delta = now - prev; | 3388 | nr = div64_u64(period + val, period); |
| 3389 | offset = nr * period; | ||
| 3390 | val -= offset; | ||
| 3391 | if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) | ||
| 3392 | goto again; | ||
| 3360 | 3393 | ||
| 3361 | atomic64_add(delta, &counter->count); | 3394 | return nr; |
| 3362 | atomic64_sub(delta, &hwc->period_left); | ||
| 3363 | } | 3395 | } |
| 3364 | 3396 | ||
| 3365 | static void perf_swcounter_set_period(struct perf_counter *counter) | 3397 | static void perf_swcounter_overflow(struct perf_counter *counter, |
| 3398 | int nmi, struct perf_sample_data *data) | ||
| 3366 | { | 3399 | { |
| 3367 | struct hw_perf_counter *hwc = &counter->hw; | 3400 | struct hw_perf_counter *hwc = &counter->hw; |
| 3368 | s64 left = atomic64_read(&hwc->period_left); | 3401 | u64 overflow; |
| 3369 | s64 period = hwc->sample_period; | ||
| 3370 | 3402 | ||
| 3371 | if (unlikely(left <= -period)) { | 3403 | data->period = counter->hw.last_period; |
| 3372 | left = period; | 3404 | overflow = perf_swcounter_set_period(counter); |
| 3373 | atomic64_set(&hwc->period_left, left); | ||
| 3374 | hwc->last_period = period; | ||
| 3375 | } | ||
| 3376 | 3405 | ||
| 3377 | if (unlikely(left <= 0)) { | 3406 | if (hwc->interrupts == MAX_INTERRUPTS) |
| 3378 | left += period; | 3407 | return; |
| 3379 | atomic64_add(period, &hwc->period_left); | ||
| 3380 | hwc->last_period = period; | ||
| 3381 | } | ||
| 3382 | 3408 | ||
| 3383 | atomic64_set(&hwc->prev_count, -left); | 3409 | for (; overflow; overflow--) { |
| 3384 | atomic64_set(&hwc->count, -left); | 3410 | if (perf_counter_overflow(counter, nmi, data)) { |
| 3411 | /* | ||
| 3412 | * We inhibit the overflow from happening when | ||
| 3413 | * hwc->interrupts == MAX_INTERRUPTS. | ||
| 3414 | */ | ||
| 3415 | break; | ||
| 3416 | } | ||
| 3417 | } | ||
| 3385 | } | 3418 | } |
| 3386 | 3419 | ||
| 3387 | static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | 3420 | static void perf_swcounter_unthrottle(struct perf_counter *counter) |
| 3388 | { | 3421 | { |
| 3389 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
| 3390 | struct perf_sample_data data; | ||
| 3391 | struct perf_counter *counter; | ||
| 3392 | u64 period; | ||
| 3393 | |||
| 3394 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | ||
| 3395 | counter->pmu->read(counter); | ||
| 3396 | |||
| 3397 | data.addr = 0; | ||
| 3398 | data.regs = get_irq_regs(); | ||
| 3399 | /* | 3422 | /* |
| 3400 | * In case we exclude kernel IPs or are somehow not in interrupt | 3423 | * Nothing to do, we already reset hwc->interrupts. |
| 3401 | * context, provide the next best thing, the user IP. | ||
| 3402 | */ | 3424 | */ |
| 3403 | if ((counter->attr.exclude_kernel || !data.regs) && | 3425 | } |
| 3404 | !counter->attr.exclude_user) | ||
| 3405 | data.regs = task_pt_regs(current); | ||
| 3406 | 3426 | ||
| 3407 | if (data.regs) { | 3427 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, |
| 3408 | if (perf_counter_overflow(counter, 0, &data)) | 3428 | int nmi, struct perf_sample_data *data) |
| 3409 | ret = HRTIMER_NORESTART; | 3429 | { |
| 3410 | } | 3430 | struct hw_perf_counter *hwc = &counter->hw; |
| 3411 | 3431 | ||
| 3412 | period = max_t(u64, 10000, counter->hw.sample_period); | 3432 | atomic64_add(nr, &counter->count); |
| 3413 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
| 3414 | 3433 | ||
| 3415 | return ret; | 3434 | if (!hwc->sample_period) |
| 3416 | } | 3435 | return; |
| 3417 | 3436 | ||
| 3418 | static void perf_swcounter_overflow(struct perf_counter *counter, | 3437 | if (!data->regs) |
| 3419 | int nmi, struct perf_sample_data *data) | 3438 | return; |
| 3420 | { | ||
| 3421 | data->period = counter->hw.last_period; | ||
| 3422 | 3439 | ||
| 3423 | perf_swcounter_update(counter); | 3440 | if (!atomic64_add_negative(nr, &hwc->period_left)) |
| 3424 | perf_swcounter_set_period(counter); | 3441 | perf_swcounter_overflow(counter, nmi, data); |
| 3425 | if (perf_counter_overflow(counter, nmi, data)) | ||
| 3426 | /* soft-disable the counter */ | ||
| 3427 | ; | ||
| 3428 | } | 3442 | } |
| 3429 | 3443 | ||
| 3430 | static int perf_swcounter_is_counting(struct perf_counter *counter) | 3444 | static int perf_swcounter_is_counting(struct perf_counter *counter) |
| @@ -3488,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter, | |||
| 3488 | return 1; | 3502 | return 1; |
| 3489 | } | 3503 | } |
| 3490 | 3504 | ||
| 3491 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | ||
| 3492 | int nmi, struct perf_sample_data *data) | ||
| 3493 | { | ||
| 3494 | int neg = atomic64_add_negative(nr, &counter->hw.count); | ||
| 3495 | |||
| 3496 | if (counter->hw.sample_period && !neg && data->regs) | ||
| 3497 | perf_swcounter_overflow(counter, nmi, data); | ||
| 3498 | } | ||
| 3499 | |||
| 3500 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | 3505 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, |
| 3501 | enum perf_type_id type, | 3506 | enum perf_type_id type, |
| 3502 | u32 event, u64 nr, int nmi, | 3507 | u32 event, u64 nr, int nmi, |
| @@ -3575,27 +3580,66 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi, | |||
| 3575 | 3580 | ||
| 3576 | static void perf_swcounter_read(struct perf_counter *counter) | 3581 | static void perf_swcounter_read(struct perf_counter *counter) |
| 3577 | { | 3582 | { |
| 3578 | perf_swcounter_update(counter); | ||
| 3579 | } | 3583 | } |
| 3580 | 3584 | ||
| 3581 | static int perf_swcounter_enable(struct perf_counter *counter) | 3585 | static int perf_swcounter_enable(struct perf_counter *counter) |
| 3582 | { | 3586 | { |
| 3583 | perf_swcounter_set_period(counter); | 3587 | struct hw_perf_counter *hwc = &counter->hw; |
| 3588 | |||
| 3589 | if (hwc->sample_period) { | ||
| 3590 | hwc->last_period = hwc->sample_period; | ||
| 3591 | perf_swcounter_set_period(counter); | ||
| 3592 | } | ||
| 3584 | return 0; | 3593 | return 0; |
| 3585 | } | 3594 | } |
| 3586 | 3595 | ||
| 3587 | static void perf_swcounter_disable(struct perf_counter *counter) | 3596 | static void perf_swcounter_disable(struct perf_counter *counter) |
| 3588 | { | 3597 | { |
| 3589 | perf_swcounter_update(counter); | ||
| 3590 | } | 3598 | } |
| 3591 | 3599 | ||
| 3592 | static const struct pmu perf_ops_generic = { | 3600 | static const struct pmu perf_ops_generic = { |
| 3593 | .enable = perf_swcounter_enable, | 3601 | .enable = perf_swcounter_enable, |
| 3594 | .disable = perf_swcounter_disable, | 3602 | .disable = perf_swcounter_disable, |
| 3595 | .read = perf_swcounter_read, | 3603 | .read = perf_swcounter_read, |
| 3604 | .unthrottle = perf_swcounter_unthrottle, | ||
| 3596 | }; | 3605 | }; |
| 3597 | 3606 | ||
| 3598 | /* | 3607 | /* |
| 3608 | * hrtimer based swcounter callback | ||
| 3609 | */ | ||
| 3610 | |||
| 3611 | static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | ||
| 3612 | { | ||
| 3613 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
| 3614 | struct perf_sample_data data; | ||
| 3615 | struct perf_counter *counter; | ||
| 3616 | u64 period; | ||
| 3617 | |||
| 3618 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | ||
| 3619 | counter->pmu->read(counter); | ||
| 3620 | |||
| 3621 | data.addr = 0; | ||
| 3622 | data.regs = get_irq_regs(); | ||
| 3623 | /* | ||
| 3624 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
| 3625 | * context, provide the next best thing, the user IP. | ||
| 3626 | */ | ||
| 3627 | if ((counter->attr.exclude_kernel || !data.regs) && | ||
| 3628 | !counter->attr.exclude_user) | ||
| 3629 | data.regs = task_pt_regs(current); | ||
| 3630 | |||
| 3631 | if (data.regs) { | ||
| 3632 | if (perf_counter_overflow(counter, 0, &data)) | ||
| 3633 | ret = HRTIMER_NORESTART; | ||
| 3634 | } | ||
| 3635 | |||
| 3636 | period = max_t(u64, 10000, counter->hw.sample_period); | ||
| 3637 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
| 3638 | |||
| 3639 | return ret; | ||
| 3640 | } | ||
| 3641 | |||
| 3642 | /* | ||
| 3599 | * Software counter: cpu wall time clock | 3643 | * Software counter: cpu wall time clock |
| 3600 | */ | 3644 | */ |
| 3601 | 3645 | ||
| @@ -3715,15 +3759,15 @@ static const struct pmu perf_ops_task_clock = { | |||
| 3715 | void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, | 3759 | void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, |
| 3716 | int entry_size) | 3760 | int entry_size) |
| 3717 | { | 3761 | { |
| 3718 | struct perf_tracepoint_record tp = { | 3762 | struct perf_raw_record raw = { |
| 3719 | .size = entry_size, | 3763 | .size = entry_size, |
| 3720 | .record = record, | 3764 | .data = record, |
| 3721 | }; | 3765 | }; |
| 3722 | 3766 | ||
| 3723 | struct perf_sample_data data = { | 3767 | struct perf_sample_data data = { |
| 3724 | .regs = get_irq_regs(), | 3768 | .regs = get_irq_regs(), |
| 3725 | .addr = addr, | 3769 | .addr = addr, |
| 3726 | .private = &tp, | 3770 | .raw = &raw, |
| 3727 | }; | 3771 | }; |
| 3728 | 3772 | ||
| 3729 | if (!data.regs) | 3773 | if (!data.regs) |
| @@ -3743,6 +3787,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter) | |||
| 3743 | 3787 | ||
| 3744 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | 3788 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) |
| 3745 | { | 3789 | { |
| 3790 | /* | ||
| 3791 | * Raw tracepoint data is a severe data leak, only allow root to | ||
| 3792 | * have these. | ||
| 3793 | */ | ||
| 3794 | if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && | ||
| 3795 | !capable(CAP_SYS_ADMIN)) | ||
| 3796 | return ERR_PTR(-EPERM); | ||
| 3797 | |||
| 3746 | if (ftrace_profile_enable(counter->attr.config)) | 3798 | if (ftrace_profile_enable(counter->attr.config)) |
| 3747 | return NULL; | 3799 | return NULL; |
| 3748 | 3800 | ||
| @@ -4285,7 +4337,7 @@ void perf_counter_exit_task(struct task_struct *child) | |||
| 4285 | unsigned long flags; | 4337 | unsigned long flags; |
| 4286 | 4338 | ||
| 4287 | if (likely(!child->perf_counter_ctxp)) { | 4339 | if (likely(!child->perf_counter_ctxp)) { |
| 4288 | perf_counter_task(child, 0); | 4340 | perf_counter_task(child, NULL, 0); |
| 4289 | return; | 4341 | return; |
| 4290 | } | 4342 | } |
| 4291 | 4343 | ||
| @@ -4305,6 +4357,7 @@ void perf_counter_exit_task(struct task_struct *child) | |||
| 4305 | * incremented the context's refcount before we do put_ctx below. | 4357 | * incremented the context's refcount before we do put_ctx below. |
| 4306 | */ | 4358 | */ |
| 4307 | spin_lock(&child_ctx->lock); | 4359 | spin_lock(&child_ctx->lock); |
| 4360 | child->perf_counter_ctxp = NULL; | ||
| 4308 | /* | 4361 | /* |
| 4309 | * If this context is a clone; unclone it so it can't get | 4362 | * If this context is a clone; unclone it so it can't get |
| 4310 | * swapped to another process while we're removing all | 4363 | * swapped to another process while we're removing all |
| @@ -4318,9 +4371,7 @@ void perf_counter_exit_task(struct task_struct *child) | |||
| 4318 | * won't get any samples after PERF_EVENT_EXIT. We can however still | 4371 | * won't get any samples after PERF_EVENT_EXIT. We can however still |
| 4319 | * get a few PERF_EVENT_READ events. | 4372 | * get a few PERF_EVENT_READ events. |
| 4320 | */ | 4373 | */ |
| 4321 | perf_counter_task(child, 0); | 4374 | perf_counter_task(child, child_ctx, 0); |
| 4322 | |||
| 4323 | child->perf_counter_ctxp = NULL; | ||
| 4324 | 4375 | ||
| 4325 | /* | 4376 | /* |
| 4326 | * We can recurse on the same lock type through: | 4377 | * We can recurse on the same lock type through: |
diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt new file mode 100644 index 000000000000..8eb6c489fb15 --- /dev/null +++ b/tools/perf/Documentation/perf-examples.txt | |||
| @@ -0,0 +1,225 @@ | |||
| 1 | |||
| 2 | ------------------------------ | ||
| 3 | ****** perf by examples ****** | ||
| 4 | ------------------------------ | ||
| 5 | |||
| 6 | [ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] | ||
| 7 | |||
| 8 | |||
| 9 | First, discovery/enumeration of available counters can be done via | ||
| 10 | 'perf list': | ||
| 11 | |||
| 12 | titan:~> perf list | ||
| 13 | [...] | ||
| 14 | kmem:kmalloc [Tracepoint event] | ||
| 15 | kmem:kmem_cache_alloc [Tracepoint event] | ||
| 16 | kmem:kmalloc_node [Tracepoint event] | ||
| 17 | kmem:kmem_cache_alloc_node [Tracepoint event] | ||
| 18 | kmem:kfree [Tracepoint event] | ||
| 19 | kmem:kmem_cache_free [Tracepoint event] | ||
| 20 | kmem:mm_page_free_direct [Tracepoint event] | ||
| 21 | kmem:mm_pagevec_free [Tracepoint event] | ||
| 22 | kmem:mm_page_alloc [Tracepoint event] | ||
| 23 | kmem:mm_page_alloc_zone_locked [Tracepoint event] | ||
| 24 | kmem:mm_page_pcpu_drain [Tracepoint event] | ||
| 25 | kmem:mm_page_alloc_extfrag [Tracepoint event] | ||
| 26 | |||
| 27 | Then any (or all) of the above event sources can be activated and | ||
| 28 | measured. For example the page alloc/free properties of a 'hackbench | ||
| 29 | run' are: | ||
| 30 | |||
| 31 | titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc | ||
| 32 | -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 | ||
| 33 | Time: 0.575 | ||
| 34 | |||
| 35 | Performance counter stats for './hackbench 10': | ||
| 36 | |||
| 37 | 13857 kmem:mm_page_pcpu_drain | ||
| 38 | 27576 kmem:mm_page_alloc | ||
| 39 | 6025 kmem:mm_pagevec_free | ||
| 40 | 20934 kmem:mm_page_free_direct | ||
| 41 | |||
| 42 | 0.613972165 seconds time elapsed | ||
| 43 | |||
| 44 | You can observe the statistical properties as well, by using the | ||
| 45 | 'repeat the workload N times' feature of perf stat: | ||
| 46 | |||
| 47 | titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e | ||
| 48 | kmem:mm_page_alloc -e kmem:mm_pagevec_free -e | ||
| 49 | kmem:mm_page_free_direct ./hackbench 10 | ||
| 50 | Time: 0.627 | ||
| 51 | Time: 0.644 | ||
| 52 | Time: 0.564 | ||
| 53 | Time: 0.559 | ||
| 54 | Time: 0.626 | ||
| 55 | |||
| 56 | Performance counter stats for './hackbench 10' (5 runs): | ||
| 57 | |||
| 58 | 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) | ||
| 59 | 25035 kmem:mm_page_alloc ( +- 3.783% ) | ||
| 60 | 6104 kmem:mm_pagevec_free ( +- 0.934% ) | ||
| 61 | 18376 kmem:mm_page_free_direct ( +- 4.941% ) | ||
| 62 | |||
| 63 | 0.643954516 seconds time elapsed ( +- 2.363% ) | ||
| 64 | |||
| 65 | Furthermore, these tracepoints can be used to sample the workload as | ||
| 66 | well. For example the page allocations done by a 'git gc' can be | ||
| 67 | captured the following way: | ||
| 68 | |||
| 69 | titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc | ||
| 70 | Counting objects: 1148, done. | ||
| 71 | Delta compression using up to 2 threads. | ||
| 72 | Compressing objects: 100% (450/450), done. | ||
| 73 | Writing objects: 100% (1148/1148), done. | ||
| 74 | Total 1148 (delta 690), reused 1148 (delta 690) | ||
| 75 | [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] | ||
| 76 | |||
| 77 | To check which functions generated page allocations: | ||
| 78 | |||
| 79 | titan:~/git> perf report | ||
| 80 | # Samples: 10646 | ||
| 81 | # | ||
| 82 | # Overhead Command Shared Object | ||
| 83 | # ........ ............... .......................... | ||
| 84 | # | ||
| 85 | 23.57% git-repack /lib64/libc-2.5.so | ||
| 86 | 21.81% git /lib64/libc-2.5.so | ||
| 87 | 14.59% git ./git | ||
| 88 | 11.79% git-repack ./git | ||
| 89 | 7.12% git /lib64/ld-2.5.so | ||
| 90 | 3.16% git-repack /lib64/libpthread-2.5.so | ||
| 91 | 2.09% git-repack /bin/bash | ||
| 92 | 1.97% rm /lib64/libc-2.5.so | ||
| 93 | 1.39% mv /lib64/ld-2.5.so | ||
| 94 | 1.37% mv /lib64/libc-2.5.so | ||
| 95 | 1.12% git-repack /lib64/ld-2.5.so | ||
| 96 | 0.95% rm /lib64/ld-2.5.so | ||
| 97 | 0.90% git-update-serv /lib64/libc-2.5.so | ||
| 98 | 0.73% git-update-serv /lib64/ld-2.5.so | ||
| 99 | 0.68% perf /lib64/libpthread-2.5.so | ||
| 100 | 0.64% git-repack /usr/lib64/libz.so.1.2.3 | ||
| 101 | |||
| 102 | Or to see it on a more finegrained level: | ||
| 103 | |||
| 104 | titan:~/git> perf report --sort comm,dso,symbol | ||
| 105 | # Samples: 10646 | ||
| 106 | # | ||
| 107 | # Overhead Command Shared Object Symbol | ||
| 108 | # ........ ............... .......................... ...... | ||
| 109 | # | ||
| 110 | 9.35% git-repack ./git [.] insert_obj_hash | ||
| 111 | 9.12% git ./git [.] insert_obj_hash | ||
| 112 | 7.31% git /lib64/libc-2.5.so [.] memcpy | ||
| 113 | 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc | ||
| 114 | 6.24% git-repack /lib64/libc-2.5.so [.] memcpy | ||
| 115 | 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork | ||
| 116 | 5.47% git /lib64/libc-2.5.so [.] _int_malloc | ||
| 117 | 2.99% git /lib64/libc-2.5.so [.] memset | ||
| 118 | |||
| 119 | Furthermore, call-graph sampling can be done too, of page | ||
| 120 | allocations - to see precisely what kind of page allocations there | ||
| 121 | are: | ||
| 122 | |||
| 123 | titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc | ||
| 124 | Counting objects: 1148, done. | ||
| 125 | Delta compression using up to 2 threads. | ||
| 126 | Compressing objects: 100% (450/450), done. | ||
| 127 | Writing objects: 100% (1148/1148), done. | ||
| 128 | Total 1148 (delta 690), reused 1148 (delta 690) | ||
| 129 | [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] | ||
| 130 | |||
| 131 | titan:~/git> perf report -g | ||
| 132 | # Samples: 10686 | ||
| 133 | # | ||
| 134 | # Overhead Command Shared Object | ||
| 135 | # ........ ............... .......................... | ||
| 136 | # | ||
| 137 | 23.25% git-repack /lib64/libc-2.5.so | ||
| 138 | | | ||
| 139 | |--50.00%-- _int_free | ||
| 140 | | | ||
| 141 | |--37.50%-- __GI___fork | ||
| 142 | | make_child | ||
| 143 | | | ||
| 144 | |--12.50%-- ptmalloc_unlock_all2 | ||
| 145 | | make_child | ||
| 146 | | | ||
| 147 | --6.25%-- __GI_strcpy | ||
| 148 | 21.61% git /lib64/libc-2.5.so | ||
| 149 | | | ||
| 150 | |--30.00%-- __GI_read | ||
| 151 | | | | ||
| 152 | | --83.33%-- git_config_from_file | ||
| 153 | | git_config | ||
| 154 | | | | ||
| 155 | [...] | ||
| 156 | |||
| 157 | Or you can observe the whole system's page allocations for 10 | ||
| 158 | seconds: | ||
| 159 | |||
| 160 | titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e | ||
| 161 | kmem:mm_page_alloc -e kmem:mm_pagevec_free -e | ||
| 162 | kmem:mm_page_free_direct sleep 10 | ||
| 163 | |||
| 164 | Performance counter stats for 'sleep 10': | ||
| 165 | |||
| 166 | 171585 kmem:mm_page_pcpu_drain | ||
| 167 | 322114 kmem:mm_page_alloc | ||
| 168 | 73623 kmem:mm_pagevec_free | ||
| 169 | 254115 kmem:mm_page_free_direct | ||
| 170 | |||
| 171 | 10.000591410 seconds time elapsed | ||
| 172 | |||
| 173 | Or observe how fluctuating the page allocations are, via statistical | ||
| 174 | analysis done over ten 1-second intervals: | ||
| 175 | |||
| 176 | titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e | ||
| 177 | kmem:mm_page_alloc -e kmem:mm_pagevec_free -e | ||
| 178 | kmem:mm_page_free_direct sleep 1 | ||
| 179 | |||
| 180 | Performance counter stats for 'sleep 1' (10 runs): | ||
| 181 | |||
| 182 | 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) | ||
| 183 | 34394 kmem:mm_page_alloc ( +- 4.617% ) | ||
| 184 | 7509 kmem:mm_pagevec_free ( +- 4.820% ) | ||
| 185 | 25653 kmem:mm_page_free_direct ( +- 3.672% ) | ||
| 186 | |||
| 187 | 1.058135029 seconds time elapsed ( +- 3.089% ) | ||
| 188 | |||
| 189 | Or you can annotate the recorded 'git gc' run on a per symbol basis | ||
| 190 | and check which instructions/source-code generated page allocations: | ||
| 191 | |||
| 192 | titan:~/git> perf annotate __GI___fork | ||
| 193 | ------------------------------------------------ | ||
| 194 | Percent | Source code & Disassembly of libc-2.5.so | ||
| 195 | ------------------------------------------------ | ||
| 196 | : | ||
| 197 | : | ||
| 198 | : Disassembly of section .plt: | ||
| 199 | : Disassembly of section .text: | ||
| 200 | : | ||
| 201 | : 00000031a2e95560 <__fork>: | ||
| 202 | [...] | ||
| 203 | 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax | ||
| 204 | 0.00 : 31a2e95607: 0f 05 syscall | ||
| 205 | 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax | ||
| 206 | 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> | ||
| 207 | 0.00 : 31a2e95615: 85 c0 test %eax,%eax | ||
| 208 | |||
| 209 | ( this shows that 83.42% of __GI___fork's page allocations come from | ||
| 210 | the 0x38 system call it performs. ) | ||
| 211 | |||
| 212 | etc. etc. - a lot more is possible. I could list a dozen of | ||
| 213 | other different usecases straight away - neither of which is | ||
| 214 | possible via /proc/vmstat. | ||
| 215 | |||
| 216 | /proc/vmstat is not in the same league really, in terms of | ||
| 217 | expressive power of system analysis and performance | ||
| 218 | analysis. | ||
| 219 | |||
| 220 | All that the above results needed were those new tracepoints | ||
| 221 | in include/tracing/events/kmem.h. | ||
| 222 | |||
| 223 | Ingo | ||
| 224 | |||
| 225 | |||
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 0d74346d21ab..484080dd5b6f 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
| @@ -40,7 +40,7 @@ OPTIONS | |||
| 40 | -a:: | 40 | -a:: |
| 41 | system-wide collection | 41 | system-wide collection |
| 42 | 42 | ||
| 43 | -S:: | 43 | -c:: |
| 44 | scale counter values | 44 | scale counter values |
| 45 | 45 | ||
| 46 | EXAMPLES | 46 | EXAMPLES |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 539d01289725..4a7d558dc309 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
| @@ -3,36 +3,122 @@ perf-top(1) | |||
| 3 | 3 | ||
| 4 | NAME | 4 | NAME |
| 5 | ---- | 5 | ---- |
| 6 | perf-top - Run a command and profile it | 6 | perf-top - System profiling tool. |
| 7 | 7 | ||
| 8 | SYNOPSIS | 8 | SYNOPSIS |
| 9 | -------- | 9 | -------- |
| 10 | [verse] | 10 | [verse] |
| 11 | 'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | 11 | 'perf top' [-e <EVENT> | --event=EVENT] [<options>] |
| 12 | 12 | ||
| 13 | DESCRIPTION | 13 | DESCRIPTION |
| 14 | ----------- | 14 | ----------- |
| 15 | This command runs a command and gathers a performance counter profile | 15 | This command generates and displays a performance counter profile in realtime. |
| 16 | from it. | ||
| 17 | 16 | ||
| 18 | 17 | ||
| 19 | OPTIONS | 18 | OPTIONS |
| 20 | ------- | 19 | ------- |
| 21 | <command>...:: | 20 | -a:: |
| 22 | Any command you can specify in a shell. | 21 | --all-cpus:: |
| 22 | System-wide collection. (default) | ||
| 23 | |||
| 24 | -c <count>:: | ||
| 25 | --count=<count>:: | ||
| 26 | Event period to sample. | ||
| 27 | |||
| 28 | -C <cpu>:: | ||
| 29 | --CPU=<cpu>:: | ||
| 30 | CPU to profile. | ||
| 31 | |||
| 32 | -d <seconds>:: | ||
| 33 | --delay=<seconds>:: | ||
| 34 | Number of seconds to delay between refreshes. | ||
| 23 | 35 | ||
| 24 | -e:: | 36 | -e <event>:: |
| 25 | --event=:: | 37 | --event=<event>:: |
| 26 | Select the PMU event. Selection can be a symbolic event name | 38 | Select the PMU event. Selection can be a symbolic event name |
| 27 | (use 'perf list' to list all events) or a raw PMU | 39 | (use 'perf list' to list all events) or a raw PMU |
| 28 | event (eventsel+umask) in the form of rNNN where NNN is a | 40 | event (eventsel+umask) in the form of rNNN where NNN is a |
| 29 | hexadecimal event descriptor. | 41 | hexadecimal event descriptor. |
| 30 | 42 | ||
| 31 | -a:: | 43 | -E <entries>:: |
| 32 | system-wide collection | 44 | --entries=<entries>:: |
| 45 | Display this many functions. | ||
| 46 | |||
| 47 | -f <count>:: | ||
| 48 | --count-filter=<count>:: | ||
| 49 | Only display functions with more events than this. | ||
| 50 | |||
| 51 | -F <freq>:: | ||
| 52 | --freq=<freq>:: | ||
| 53 | Profile at this frequency. | ||
| 54 | |||
| 55 | -i:: | ||
| 56 | --inherit:: | ||
| 57 | Child tasks inherit counters, only makes sens with -p option. | ||
| 58 | |||
| 59 | -k <path>:: | ||
| 60 | --vmlinux=<path>:: | ||
| 61 | Path to vmlinux. Required for annotation functionality. | ||
| 62 | |||
| 63 | -m <pages>:: | ||
| 64 | --mmap-pages=<pages>:: | ||
| 65 | Number of mmapped data pages. | ||
| 66 | |||
| 67 | -p <pid>:: | ||
| 68 | --pid=<pid>:: | ||
| 69 | Profile events on existing pid. | ||
| 70 | |||
| 71 | -r <priority>:: | ||
| 72 | --realtime=<priority>:: | ||
| 73 | Collect data with this RT SCHED_FIFO priority. | ||
| 74 | |||
| 75 | -s <symbol>:: | ||
| 76 | --sym-annotate=<symbol>:: | ||
| 77 | Annotate this symbol. Requires -k option. | ||
| 78 | |||
| 79 | -v:: | ||
| 80 | --verbose:: | ||
| 81 | Be more verbose (show counter open errors, etc). | ||
| 82 | |||
| 83 | -z:: | ||
| 84 | --zero:: | ||
| 85 | Zero history across display updates. | ||
| 86 | |||
| 87 | INTERACTIVE PROMPTING KEYS | ||
| 88 | -------------------------- | ||
| 89 | |||
| 90 | [d]:: | ||
| 91 | Display refresh delay. | ||
| 92 | |||
| 93 | [e]:: | ||
| 94 | Number of entries to display. | ||
| 95 | |||
| 96 | [E]:: | ||
| 97 | Event to display when multiple counters are active. | ||
| 98 | |||
| 99 | [f]:: | ||
| 100 | Profile display filter (>= hit count). | ||
| 101 | |||
| 102 | [F]:: | ||
| 103 | Annotation display filter (>= % of total). | ||
| 104 | |||
| 105 | [s]:: | ||
| 106 | Annotate symbol. | ||
| 107 | |||
| 108 | [S]:: | ||
| 109 | Stop annotation, return to full profile display. | ||
| 110 | |||
| 111 | [w]:: | ||
| 112 | Toggle between weighted sum and individual count[E]r profile. | ||
| 113 | |||
| 114 | [z]:: | ||
| 115 | Toggle event count zeroing across display updates. | ||
| 116 | |||
| 117 | [qQ]:: | ||
| 118 | Quit. | ||
| 119 | |||
| 120 | Pressing any unmapped key displays a menu, and prompts for input. | ||
| 33 | 121 | ||
| 34 | -l:: | ||
| 35 | scale counter values | ||
| 36 | 122 | ||
| 37 | SEE ALSO | 123 | SEE ALSO |
| 38 | -------- | 124 | -------- |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1916e44b9bb0..60411e94113b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
| @@ -387,10 +387,14 @@ else | |||
| 387 | 387 | ||
| 388 | has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") | 388 | has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") |
| 389 | 389 | ||
| 390 | has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") | ||
| 391 | |||
| 390 | ifeq ($(has_bfd),y) | 392 | ifeq ($(has_bfd),y) |
| 391 | EXTLIBS += -lbfd | 393 | EXTLIBS += -lbfd |
| 392 | else ifeq ($(has_bfd_iberty),y) | 394 | else ifeq ($(has_bfd_iberty),y) |
| 393 | EXTLIBS += -lbfd -liberty | 395 | EXTLIBS += -lbfd -liberty |
| 396 | else ifeq ($(has_bfd_iberty_z),y) | ||
| 397 | EXTLIBS += -lbfd -liberty -lz | ||
| 394 | else | 398 | else |
| 395 | msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) | 399 | msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) |
| 396 | BASIC_CFLAGS += -DNO_DEMANGLE | 400 | BASIC_CFLAGS += -DNO_DEMANGLE |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 90c98082af10..0345aad8eba5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
| @@ -525,10 +525,14 @@ static int __cmd_record(int argc, const char **argv) | |||
| 525 | signal(SIGCHLD, sig_handler); | 525 | signal(SIGCHLD, sig_handler); |
| 526 | signal(SIGINT, sig_handler); | 526 | signal(SIGINT, sig_handler); |
| 527 | 527 | ||
| 528 | if (!stat(output_name, &st) && !force && !append_file) { | 528 | if (!stat(output_name, &st) && st.st_size) { |
| 529 | fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", | 529 | if (!force && !append_file) { |
| 530 | output_name); | 530 | fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", |
| 531 | exit(-1); | 531 | output_name); |
| 532 | exit(-1); | ||
| 533 | } | ||
| 534 | } else { | ||
| 535 | append_file = 0; | ||
| 532 | } | 536 | } |
| 533 | 537 | ||
| 534 | flags = O_CREAT|O_RDWR; | 538 | flags = O_CREAT|O_RDWR; |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8cb58d68a006..99274cec0adb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -68,7 +68,7 @@ static int callchain; | |||
| 68 | 68 | ||
| 69 | static | 69 | static |
| 70 | struct callchain_param callchain_param = { | 70 | struct callchain_param callchain_param = { |
| 71 | .mode = CHAIN_GRAPH_ABS, | 71 | .mode = CHAIN_GRAPH_REL, |
| 72 | .min_percent = 0.5 | 72 | .min_percent = 0.5 |
| 73 | }; | 73 | }; |
| 74 | 74 | ||
| @@ -112,7 +112,9 @@ struct read_event { | |||
| 112 | struct perf_event_header header; | 112 | struct perf_event_header header; |
| 113 | u32 pid,tid; | 113 | u32 pid,tid; |
| 114 | u64 value; | 114 | u64 value; |
| 115 | u64 format[3]; | 115 | u64 time_enabled; |
| 116 | u64 time_running; | ||
| 117 | u64 id; | ||
| 116 | }; | 118 | }; |
| 117 | 119 | ||
| 118 | typedef union event_union { | 120 | typedef union event_union { |
| @@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) | |||
| 698 | size_t ret = 0; | 700 | size_t ret = 0; |
| 699 | 701 | ||
| 700 | if (verbose) | 702 | if (verbose) |
| 701 | ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); | 703 | ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, |
| 704 | dso__symtab_origin(self->dso)); | ||
| 702 | 705 | ||
| 703 | ret += repsep_fprintf(fp, "[%c] ", self->level); | 706 | ret += repsep_fprintf(fp, "[%c] ", self->level); |
| 704 | if (self->sym) { | 707 | if (self->sym) { |
| @@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, | |||
| 888 | return ret; | 891 | return ret; |
| 889 | } | 892 | } |
| 890 | 893 | ||
| 894 | static struct symbol *rem_sq_bracket; | ||
| 895 | static struct callchain_list rem_hits; | ||
| 896 | |||
| 897 | static void init_rem_hits(void) | ||
| 898 | { | ||
| 899 | rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); | ||
| 900 | if (!rem_sq_bracket) { | ||
| 901 | fprintf(stderr, "Not enough memory to display remaining hits\n"); | ||
| 902 | return; | ||
| 903 | } | ||
| 904 | |||
| 905 | strcpy(rem_sq_bracket->name, "[...]"); | ||
| 906 | rem_hits.sym = rem_sq_bracket; | ||
| 907 | } | ||
| 908 | |||
| 891 | static size_t | 909 | static size_t |
| 892 | callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | 910 | callchain__fprintf_graph(FILE *fp, struct callchain_node *self, |
| 893 | u64 total_samples, int depth, int depth_mask) | 911 | u64 total_samples, int depth, int depth_mask) |
| @@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | |||
| 897 | struct callchain_list *chain; | 915 | struct callchain_list *chain; |
| 898 | int new_depth_mask = depth_mask; | 916 | int new_depth_mask = depth_mask; |
| 899 | u64 new_total; | 917 | u64 new_total; |
| 918 | u64 remaining; | ||
| 900 | size_t ret = 0; | 919 | size_t ret = 0; |
| 901 | int i; | 920 | int i; |
| 902 | 921 | ||
| 903 | if (callchain_param.mode == CHAIN_GRAPH_REL) | 922 | if (callchain_param.mode == CHAIN_GRAPH_REL) |
| 904 | new_total = self->cumul_hit; | 923 | new_total = self->children_hit; |
| 905 | else | 924 | else |
| 906 | new_total = total_samples; | 925 | new_total = total_samples; |
| 907 | 926 | ||
| 927 | remaining = new_total; | ||
| 928 | |||
| 908 | node = rb_first(&self->rb_root); | 929 | node = rb_first(&self->rb_root); |
| 909 | while (node) { | 930 | while (node) { |
| 931 | u64 cumul; | ||
| 932 | |||
| 910 | child = rb_entry(node, struct callchain_node, rb_node); | 933 | child = rb_entry(node, struct callchain_node, rb_node); |
| 934 | cumul = cumul_hits(child); | ||
| 935 | remaining -= cumul; | ||
| 911 | 936 | ||
| 912 | /* | 937 | /* |
| 913 | * The depth mask manages the output of pipes that show | 938 | * The depth mask manages the output of pipes that show |
| 914 | * the depth. We don't want to keep the pipes of the current | 939 | * the depth. We don't want to keep the pipes of the current |
| 915 | * level for the last child of this depth | 940 | * level for the last child of this depth. |
| 941 | * Except if we have remaining filtered hits. They will | ||
| 942 | * supersede the last child | ||
| 916 | */ | 943 | */ |
| 917 | next = rb_next(node); | 944 | next = rb_next(node); |
| 918 | if (!next) | 945 | if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) |
| 919 | new_depth_mask &= ~(1 << (depth - 1)); | 946 | new_depth_mask &= ~(1 << (depth - 1)); |
| 920 | 947 | ||
| 921 | /* | 948 | /* |
| @@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | |||
| 930 | ret += ipchain__fprintf_graph(fp, chain, depth, | 957 | ret += ipchain__fprintf_graph(fp, chain, depth, |
| 931 | new_depth_mask, i++, | 958 | new_depth_mask, i++, |
| 932 | new_total, | 959 | new_total, |
| 933 | child->cumul_hit); | 960 | cumul); |
| 934 | } | 961 | } |
| 935 | ret += callchain__fprintf_graph(fp, child, new_total, | 962 | ret += callchain__fprintf_graph(fp, child, new_total, |
| 936 | depth + 1, | 963 | depth + 1, |
| @@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | |||
| 938 | node = next; | 965 | node = next; |
| 939 | } | 966 | } |
| 940 | 967 | ||
| 968 | if (callchain_param.mode == CHAIN_GRAPH_REL && | ||
| 969 | remaining && remaining != new_total) { | ||
| 970 | |||
| 971 | if (!rem_sq_bracket) | ||
| 972 | return ret; | ||
| 973 | |||
| 974 | new_depth_mask &= ~(1 << (depth - 1)); | ||
| 975 | |||
| 976 | ret += ipchain__fprintf_graph(fp, &rem_hits, depth, | ||
| 977 | new_depth_mask, 0, new_total, | ||
| 978 | remaining); | ||
| 979 | } | ||
| 980 | |||
| 941 | return ret; | 981 | return ret; |
| 942 | } | 982 | } |
| 943 | 983 | ||
| @@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) | |||
| 1358 | unsigned int width; | 1398 | unsigned int width; |
| 1359 | char *col_width = col_width_list_str; | 1399 | char *col_width = col_width_list_str; |
| 1360 | 1400 | ||
| 1401 | init_rem_hits(); | ||
| 1402 | |||
| 1361 | fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); | 1403 | fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); |
| 1362 | fprintf(fp, "#\n"); | 1404 | fprintf(fp, "#\n"); |
| 1363 | 1405 | ||
| @@ -1429,6 +1471,8 @@ print_entries: | |||
| 1429 | } | 1471 | } |
| 1430 | fprintf(fp, "\n"); | 1472 | fprintf(fp, "\n"); |
| 1431 | 1473 | ||
| 1474 | free(rem_sq_bracket); | ||
| 1475 | |||
| 1432 | return ret; | 1476 | return ret; |
| 1433 | } | 1477 | } |
| 1434 | 1478 | ||
| @@ -1690,14 +1734,37 @@ static void trace_event(event_t *event) | |||
| 1690 | dprintf(".\n"); | 1734 | dprintf(".\n"); |
| 1691 | } | 1735 | } |
| 1692 | 1736 | ||
| 1737 | static struct perf_header *header; | ||
| 1738 | |||
| 1739 | static struct perf_counter_attr *perf_header__find_attr(u64 id) | ||
| 1740 | { | ||
| 1741 | int i; | ||
| 1742 | |||
| 1743 | for (i = 0; i < header->attrs; i++) { | ||
| 1744 | struct perf_header_attr *attr = header->attr[i]; | ||
| 1745 | int j; | ||
| 1746 | |||
| 1747 | for (j = 0; j < attr->ids; j++) { | ||
| 1748 | if (attr->id[j] == id) | ||
| 1749 | return &attr->attr; | ||
| 1750 | } | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | return NULL; | ||
| 1754 | } | ||
| 1755 | |||
| 1693 | static int | 1756 | static int |
| 1694 | process_read_event(event_t *event, unsigned long offset, unsigned long head) | 1757 | process_read_event(event_t *event, unsigned long offset, unsigned long head) |
| 1695 | { | 1758 | { |
| 1696 | dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", | 1759 | struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); |
| 1760 | |||
| 1761 | dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", | ||
| 1697 | (void *)(offset + head), | 1762 | (void *)(offset + head), |
| 1698 | (void *)(long)(event->header.size), | 1763 | (void *)(long)(event->header.size), |
| 1699 | event->read.pid, | 1764 | event->read.pid, |
| 1700 | event->read.tid, | 1765 | event->read.tid, |
| 1766 | attr ? __event_name(attr->type, attr->config) | ||
| 1767 | : "FAIL", | ||
| 1701 | event->read.value); | 1768 | event->read.value); |
| 1702 | 1769 | ||
| 1703 | return 0; | 1770 | return 0; |
| @@ -1743,8 +1810,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
| 1743 | return 0; | 1810 | return 0; |
| 1744 | } | 1811 | } |
| 1745 | 1812 | ||
| 1746 | static struct perf_header *header; | ||
| 1747 | |||
| 1748 | static u64 perf_header__sample_type(void) | 1813 | static u64 perf_header__sample_type(void) |
| 1749 | { | 1814 | { |
| 1750 | u64 sample_type = 0; | 1815 | u64 sample_type = 0; |
| @@ -1812,6 +1877,13 @@ static int __cmd_report(void) | |||
| 1812 | " -g?\n"); | 1877 | " -g?\n"); |
| 1813 | exit(-1); | 1878 | exit(-1); |
| 1814 | } | 1879 | } |
| 1880 | } else if (callchain_param.mode != CHAIN_NONE && !callchain) { | ||
| 1881 | callchain = 1; | ||
| 1882 | if (register_callchain_param(&callchain_param) < 0) { | ||
| 1883 | fprintf(stderr, "Can't register callchain" | ||
| 1884 | " params\n"); | ||
| 1885 | exit(-1); | ||
| 1886 | } | ||
| 1815 | } | 1887 | } |
| 1816 | 1888 | ||
| 1817 | if (load_kernel() < 0) { | 1889 | if (load_kernel() < 0) { |
| @@ -1950,6 +2022,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, | |||
| 1950 | else if (!strncmp(tok, "fractal", strlen(arg))) | 2022 | else if (!strncmp(tok, "fractal", strlen(arg))) |
| 1951 | callchain_param.mode = CHAIN_GRAPH_REL; | 2023 | callchain_param.mode = CHAIN_GRAPH_REL; |
| 1952 | 2024 | ||
| 2025 | else if (!strncmp(tok, "none", strlen(arg))) { | ||
| 2026 | callchain_param.mode = CHAIN_NONE; | ||
| 2027 | callchain = 0; | ||
| 2028 | |||
| 2029 | return 0; | ||
| 2030 | } | ||
| 2031 | |||
| 1953 | else | 2032 | else |
| 1954 | return -1; | 2033 | return -1; |
| 1955 | 2034 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f9510eeeb6c7..b4b06c7903e1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -496,7 +496,7 @@ static const struct option options[] = { | |||
| 496 | "stat events on existing pid"), | 496 | "stat events on existing pid"), |
| 497 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 497 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
| 498 | "system-wide collection from all CPUs"), | 498 | "system-wide collection from all CPUs"), |
| 499 | OPT_BOOLEAN('S', "scale", &scale, | 499 | OPT_BOOLEAN('c', "scale", &scale, |
| 500 | "scale/normalize counters"), | 500 | "scale/normalize counters"), |
| 501 | OPT_BOOLEAN('v', "verbose", &verbose, | 501 | OPT_BOOLEAN('v', "verbose", &verbose, |
| 502 | "be more verbose (show counter open errors, etc)"), | 502 | "be more verbose (show counter open errors, etc)"), |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f139f1ab9333..7de28ce9ca26 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
| @@ -31,6 +31,8 @@ | |||
| 31 | #include <fcntl.h> | 31 | #include <fcntl.h> |
| 32 | 32 | ||
| 33 | #include <stdio.h> | 33 | #include <stdio.h> |
| 34 | #include <termios.h> | ||
| 35 | #include <unistd.h> | ||
| 34 | 36 | ||
| 35 | #include <errno.h> | 37 | #include <errno.h> |
| 36 | #include <time.h> | 38 | #include <time.h> |
| @@ -54,7 +56,7 @@ static int system_wide = 0; | |||
| 54 | 56 | ||
| 55 | static int default_interval = 100000; | 57 | static int default_interval = 100000; |
| 56 | 58 | ||
| 57 | static u64 count_filter = 5; | 59 | static int count_filter = 5; |
| 58 | static int print_entries = 15; | 60 | static int print_entries = 15; |
| 59 | 61 | ||
| 60 | static int target_pid = -1; | 62 | static int target_pid = -1; |
| @@ -69,15 +71,28 @@ static int freq = 0; | |||
| 69 | static int verbose = 0; | 71 | static int verbose = 0; |
| 70 | static char *vmlinux = NULL; | 72 | static char *vmlinux = NULL; |
| 71 | 73 | ||
| 72 | static char *sym_filter; | ||
| 73 | static unsigned long filter_start; | ||
| 74 | static unsigned long filter_end; | ||
| 75 | |||
| 76 | static int delay_secs = 2; | 74 | static int delay_secs = 2; |
| 77 | static int zero; | 75 | static int zero; |
| 78 | static int dump_symtab; | 76 | static int dump_symtab; |
| 79 | 77 | ||
| 80 | /* | 78 | /* |
| 79 | * Source | ||
| 80 | */ | ||
| 81 | |||
| 82 | struct source_line { | ||
| 83 | u64 eip; | ||
| 84 | unsigned long count[MAX_COUNTERS]; | ||
| 85 | char *line; | ||
| 86 | struct source_line *next; | ||
| 87 | }; | ||
| 88 | |||
| 89 | static char *sym_filter = NULL; | ||
| 90 | struct sym_entry *sym_filter_entry = NULL; | ||
| 91 | static int sym_pcnt_filter = 5; | ||
| 92 | static int sym_counter = 0; | ||
| 93 | static int display_weighted = -1; | ||
| 94 | |||
| 95 | /* | ||
| 81 | * Symbols | 96 | * Symbols |
| 82 | */ | 97 | */ |
| 83 | 98 | ||
| @@ -91,9 +106,237 @@ struct sym_entry { | |||
| 91 | unsigned long snap_count; | 106 | unsigned long snap_count; |
| 92 | double weight; | 107 | double weight; |
| 93 | int skip; | 108 | int skip; |
| 109 | struct source_line *source; | ||
| 110 | struct source_line *lines; | ||
| 111 | struct source_line **lines_tail; | ||
| 112 | pthread_mutex_t source_lock; | ||
| 94 | }; | 113 | }; |
| 95 | 114 | ||
| 96 | struct sym_entry *sym_filter_entry; | 115 | /* |
| 116 | * Source functions | ||
| 117 | */ | ||
| 118 | |||
| 119 | static void parse_source(struct sym_entry *syme) | ||
| 120 | { | ||
| 121 | struct symbol *sym; | ||
| 122 | struct module *module; | ||
| 123 | struct section *section = NULL; | ||
| 124 | FILE *file; | ||
| 125 | char command[PATH_MAX*2], *path = vmlinux; | ||
| 126 | u64 start, end, len; | ||
| 127 | |||
| 128 | if (!syme) | ||
| 129 | return; | ||
| 130 | |||
| 131 | if (syme->lines) { | ||
| 132 | pthread_mutex_lock(&syme->source_lock); | ||
| 133 | goto out_assign; | ||
| 134 | } | ||
| 135 | |||
| 136 | sym = (struct symbol *)(syme + 1); | ||
| 137 | module = sym->module; | ||
| 138 | |||
| 139 | if (module) | ||
| 140 | path = module->path; | ||
| 141 | if (!path) | ||
| 142 | return; | ||
| 143 | |||
| 144 | start = sym->obj_start; | ||
| 145 | if (!start) | ||
| 146 | start = sym->start; | ||
| 147 | |||
| 148 | if (module) { | ||
| 149 | section = module->sections->find_section(module->sections, ".text"); | ||
| 150 | if (section) | ||
| 151 | start -= section->vma; | ||
| 152 | } | ||
| 153 | |||
| 154 | end = start + sym->end - sym->start + 1; | ||
| 155 | len = sym->end - sym->start; | ||
| 156 | |||
| 157 | sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); | ||
| 158 | |||
| 159 | file = popen(command, "r"); | ||
| 160 | if (!file) | ||
| 161 | return; | ||
| 162 | |||
| 163 | pthread_mutex_lock(&syme->source_lock); | ||
| 164 | syme->lines_tail = &syme->lines; | ||
| 165 | while (!feof(file)) { | ||
| 166 | struct source_line *src; | ||
| 167 | size_t dummy = 0; | ||
| 168 | char *c; | ||
| 169 | |||
| 170 | src = malloc(sizeof(struct source_line)); | ||
| 171 | assert(src != NULL); | ||
| 172 | memset(src, 0, sizeof(struct source_line)); | ||
| 173 | |||
| 174 | if (getline(&src->line, &dummy, file) < 0) | ||
| 175 | break; | ||
| 176 | if (!src->line) | ||
| 177 | break; | ||
| 178 | |||
| 179 | c = strchr(src->line, '\n'); | ||
| 180 | if (c) | ||
| 181 | *c = 0; | ||
| 182 | |||
| 183 | src->next = NULL; | ||
| 184 | *syme->lines_tail = src; | ||
| 185 | syme->lines_tail = &src->next; | ||
| 186 | |||
| 187 | if (strlen(src->line)>8 && src->line[8] == ':') { | ||
| 188 | src->eip = strtoull(src->line, NULL, 16); | ||
| 189 | if (section) | ||
| 190 | src->eip += section->vma; | ||
| 191 | } | ||
| 192 | if (strlen(src->line)>8 && src->line[16] == ':') { | ||
| 193 | src->eip = strtoull(src->line, NULL, 16); | ||
| 194 | if (section) | ||
| 195 | src->eip += section->vma; | ||
| 196 | } | ||
| 197 | } | ||
| 198 | pclose(file); | ||
| 199 | out_assign: | ||
| 200 | sym_filter_entry = syme; | ||
| 201 | pthread_mutex_unlock(&syme->source_lock); | ||
| 202 | } | ||
| 203 | |||
| 204 | static void __zero_source_counters(struct sym_entry *syme) | ||
| 205 | { | ||
| 206 | int i; | ||
| 207 | struct source_line *line; | ||
| 208 | |||
| 209 | line = syme->lines; | ||
| 210 | while (line) { | ||
| 211 | for (i = 0; i < nr_counters; i++) | ||
| 212 | line->count[i] = 0; | ||
| 213 | line = line->next; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) | ||
| 218 | { | ||
| 219 | struct source_line *line; | ||
| 220 | |||
| 221 | if (syme != sym_filter_entry) | ||
| 222 | return; | ||
| 223 | |||
| 224 | if (pthread_mutex_trylock(&syme->source_lock)) | ||
| 225 | return; | ||
| 226 | |||
| 227 | if (!syme->source) | ||
| 228 | goto out_unlock; | ||
| 229 | |||
| 230 | for (line = syme->lines; line; line = line->next) { | ||
| 231 | if (line->eip == ip) { | ||
| 232 | line->count[counter]++; | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | if (line->eip > ip) | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | out_unlock: | ||
| 239 | pthread_mutex_unlock(&syme->source_lock); | ||
| 240 | } | ||
| 241 | |||
| 242 | static void lookup_sym_source(struct sym_entry *syme) | ||
| 243 | { | ||
| 244 | struct symbol *symbol = (struct symbol *)(syme + 1); | ||
| 245 | struct source_line *line; | ||
| 246 | char pattern[PATH_MAX]; | ||
| 247 | char *idx; | ||
| 248 | |||
| 249 | sprintf(pattern, "<%s>:", symbol->name); | ||
| 250 | |||
| 251 | if (symbol->module) { | ||
| 252 | idx = strstr(pattern, "\t"); | ||
| 253 | if (idx) | ||
| 254 | *idx = 0; | ||
| 255 | } | ||
| 256 | |||
| 257 | pthread_mutex_lock(&syme->source_lock); | ||
| 258 | for (line = syme->lines; line; line = line->next) { | ||
| 259 | if (strstr(line->line, pattern)) { | ||
| 260 | syme->source = line; | ||
| 261 | break; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | pthread_mutex_unlock(&syme->source_lock); | ||
| 265 | } | ||
| 266 | |||
| 267 | static void show_lines(struct source_line *queue, int count, int total) | ||
| 268 | { | ||
| 269 | int i; | ||
| 270 | struct source_line *line; | ||
| 271 | |||
| 272 | line = queue; | ||
| 273 | for (i = 0; i < count; i++) { | ||
| 274 | float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; | ||
| 275 | |||
| 276 | printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); | ||
| 277 | line = line->next; | ||
| 278 | } | ||
| 279 | } | ||
| 280 | |||
| 281 | #define TRACE_COUNT 3 | ||
| 282 | |||
| 283 | static void show_details(struct sym_entry *syme) | ||
| 284 | { | ||
| 285 | struct symbol *symbol; | ||
| 286 | struct source_line *line; | ||
| 287 | struct source_line *line_queue = NULL; | ||
| 288 | int displayed = 0; | ||
| 289 | int line_queue_count = 0, total = 0, more = 0; | ||
| 290 | |||
| 291 | if (!syme) | ||
| 292 | return; | ||
| 293 | |||
| 294 | if (!syme->source) | ||
| 295 | lookup_sym_source(syme); | ||
| 296 | |||
| 297 | if (!syme->source) | ||
| 298 | return; | ||
| 299 | |||
| 300 | symbol = (struct symbol *)(syme + 1); | ||
| 301 | printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); | ||
| 302 | printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); | ||
| 303 | |||
| 304 | pthread_mutex_lock(&syme->source_lock); | ||
| 305 | line = syme->source; | ||
| 306 | while (line) { | ||
| 307 | total += line->count[sym_counter]; | ||
| 308 | line = line->next; | ||
| 309 | } | ||
| 310 | |||
| 311 | line = syme->source; | ||
| 312 | while (line) { | ||
| 313 | float pcnt = 0.0; | ||
| 314 | |||
| 315 | if (!line_queue_count) | ||
| 316 | line_queue = line; | ||
| 317 | line_queue_count++; | ||
| 318 | |||
| 319 | if (line->count[sym_counter]) | ||
| 320 | pcnt = 100.0 * line->count[sym_counter] / (float)total; | ||
| 321 | if (pcnt >= (float)sym_pcnt_filter) { | ||
| 322 | if (displayed <= print_entries) | ||
| 323 | show_lines(line_queue, line_queue_count, total); | ||
| 324 | else more++; | ||
| 325 | displayed += line_queue_count; | ||
| 326 | line_queue_count = 0; | ||
| 327 | line_queue = NULL; | ||
| 328 | } else if (line_queue_count > TRACE_COUNT) { | ||
| 329 | line_queue = line_queue->next; | ||
| 330 | line_queue_count--; | ||
| 331 | } | ||
| 332 | |||
| 333 | line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; | ||
| 334 | line = line->next; | ||
| 335 | } | ||
| 336 | pthread_mutex_unlock(&syme->source_lock); | ||
| 337 | if (more) | ||
| 338 | printf("%d lines not displayed, maybe increase display entries [e]\n", more); | ||
| 339 | } | ||
| 97 | 340 | ||
| 98 | struct dso *kernel_dso; | 341 | struct dso *kernel_dso; |
| 99 | 342 | ||
| @@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym) | |||
| 112 | double weight = sym->snap_count; | 355 | double weight = sym->snap_count; |
| 113 | int counter; | 356 | int counter; |
| 114 | 357 | ||
| 358 | if (!display_weighted) | ||
| 359 | return weight; | ||
| 360 | |||
| 115 | for (counter = 1; counter < nr_counters-1; counter++) | 361 | for (counter = 1; counter < nr_counters-1; counter++) |
| 116 | weight *= sym->count[counter]; | 362 | weight *= sym->count[counter]; |
| 117 | 363 | ||
| @@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) | |||
| 159 | static void print_sym_table(void) | 405 | static void print_sym_table(void) |
| 160 | { | 406 | { |
| 161 | int printed = 0, j; | 407 | int printed = 0, j; |
| 162 | int counter; | 408 | int counter, snap = !display_weighted ? sym_counter : 0; |
| 163 | float samples_per_sec = samples/delay_secs; | 409 | float samples_per_sec = samples/delay_secs; |
| 164 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; | 410 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; |
| 165 | float sum_ksamples = 0.0; | 411 | float sum_ksamples = 0.0; |
| @@ -175,7 +421,7 @@ static void print_sym_table(void) | |||
| 175 | pthread_mutex_unlock(&active_symbols_lock); | 421 | pthread_mutex_unlock(&active_symbols_lock); |
| 176 | 422 | ||
| 177 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { | 423 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { |
| 178 | syme->snap_count = syme->count[0]; | 424 | syme->snap_count = syme->count[snap]; |
| 179 | if (syme->snap_count != 0) { | 425 | if (syme->snap_count != 0) { |
| 180 | syme->weight = sym_weight(syme); | 426 | syme->weight = sym_weight(syme); |
| 181 | rb_insert_active_sym(&tmp, syme); | 427 | rb_insert_active_sym(&tmp, syme); |
| @@ -195,7 +441,7 @@ static void print_sym_table(void) | |||
| 195 | samples_per_sec, | 441 | samples_per_sec, |
| 196 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); | 442 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); |
| 197 | 443 | ||
| 198 | if (nr_counters == 1) { | 444 | if (nr_counters == 1 || !display_weighted) { |
| 199 | printf("%Ld", (u64)attrs[0].sample_period); | 445 | printf("%Ld", (u64)attrs[0].sample_period); |
| 200 | if (freq) | 446 | if (freq) |
| 201 | printf("Hz "); | 447 | printf("Hz "); |
| @@ -203,7 +449,9 @@ static void print_sym_table(void) | |||
| 203 | printf(" "); | 449 | printf(" "); |
| 204 | } | 450 | } |
| 205 | 451 | ||
| 206 | for (counter = 0; counter < nr_counters; counter++) { | 452 | if (!display_weighted) |
| 453 | printf("%s", event_name(sym_counter)); | ||
| 454 | else for (counter = 0; counter < nr_counters; counter++) { | ||
| 207 | if (counter) | 455 | if (counter) |
| 208 | printf("/"); | 456 | printf("/"); |
| 209 | 457 | ||
| @@ -228,6 +476,11 @@ static void print_sym_table(void) | |||
| 228 | 476 | ||
| 229 | printf("------------------------------------------------------------------------------\n\n"); | 477 | printf("------------------------------------------------------------------------------\n\n"); |
| 230 | 478 | ||
| 479 | if (sym_filter_entry) { | ||
| 480 | show_details(sym_filter_entry); | ||
| 481 | return; | ||
| 482 | } | ||
| 483 | |||
| 231 | if (nr_counters == 1) | 484 | if (nr_counters == 1) |
| 232 | printf(" samples pcnt"); | 485 | printf(" samples pcnt"); |
| 233 | else | 486 | else |
| @@ -242,13 +495,13 @@ static void print_sym_table(void) | |||
| 242 | struct symbol *sym = (struct symbol *)(syme + 1); | 495 | struct symbol *sym = (struct symbol *)(syme + 1); |
| 243 | double pcnt; | 496 | double pcnt; |
| 244 | 497 | ||
| 245 | if (++printed > print_entries || syme->snap_count < count_filter) | 498 | if (++printed > print_entries || (int)syme->snap_count < count_filter) |
| 246 | continue; | 499 | continue; |
| 247 | 500 | ||
| 248 | pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / | 501 | pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / |
| 249 | sum_ksamples)); | 502 | sum_ksamples)); |
| 250 | 503 | ||
| 251 | if (nr_counters == 1) | 504 | if (nr_counters == 1 || !display_weighted) |
| 252 | printf("%20.2f - ", syme->weight); | 505 | printf("%20.2f - ", syme->weight); |
| 253 | else | 506 | else |
| 254 | printf("%9.1f %10ld - ", syme->weight, syme->snap_count); | 507 | printf("%9.1f %10ld - ", syme->weight, syme->snap_count); |
| @@ -261,19 +514,250 @@ static void print_sym_table(void) | |||
| 261 | } | 514 | } |
| 262 | } | 515 | } |
| 263 | 516 | ||
| 517 | static void prompt_integer(int *target, const char *msg) | ||
| 518 | { | ||
| 519 | char *buf = malloc(0), *p; | ||
| 520 | size_t dummy = 0; | ||
| 521 | int tmp; | ||
| 522 | |||
| 523 | fprintf(stdout, "\n%s: ", msg); | ||
| 524 | if (getline(&buf, &dummy, stdin) < 0) | ||
| 525 | return; | ||
| 526 | |||
| 527 | p = strchr(buf, '\n'); | ||
| 528 | if (p) | ||
| 529 | *p = 0; | ||
| 530 | |||
| 531 | p = buf; | ||
| 532 | while(*p) { | ||
| 533 | if (!isdigit(*p)) | ||
| 534 | goto out_free; | ||
| 535 | p++; | ||
| 536 | } | ||
| 537 | tmp = strtoul(buf, NULL, 10); | ||
| 538 | *target = tmp; | ||
| 539 | out_free: | ||
| 540 | free(buf); | ||
| 541 | } | ||
| 542 | |||
| 543 | static void prompt_percent(int *target, const char *msg) | ||
| 544 | { | ||
| 545 | int tmp = 0; | ||
| 546 | |||
| 547 | prompt_integer(&tmp, msg); | ||
| 548 | if (tmp >= 0 && tmp <= 100) | ||
| 549 | *target = tmp; | ||
| 550 | } | ||
| 551 | |||
| 552 | static void prompt_symbol(struct sym_entry **target, const char *msg) | ||
| 553 | { | ||
| 554 | char *buf = malloc(0), *p; | ||
| 555 | struct sym_entry *syme = *target, *n, *found = NULL; | ||
| 556 | size_t dummy = 0; | ||
| 557 | |||
| 558 | /* zero counters of active symbol */ | ||
| 559 | if (syme) { | ||
| 560 | pthread_mutex_lock(&syme->source_lock); | ||
| 561 | __zero_source_counters(syme); | ||
| 562 | *target = NULL; | ||
| 563 | pthread_mutex_unlock(&syme->source_lock); | ||
| 564 | } | ||
| 565 | |||
| 566 | fprintf(stdout, "\n%s: ", msg); | ||
| 567 | if (getline(&buf, &dummy, stdin) < 0) | ||
| 568 | goto out_free; | ||
| 569 | |||
| 570 | p = strchr(buf, '\n'); | ||
| 571 | if (p) | ||
| 572 | *p = 0; | ||
| 573 | |||
| 574 | pthread_mutex_lock(&active_symbols_lock); | ||
| 575 | syme = list_entry(active_symbols.next, struct sym_entry, node); | ||
| 576 | pthread_mutex_unlock(&active_symbols_lock); | ||
| 577 | |||
| 578 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { | ||
| 579 | struct symbol *sym = (struct symbol *)(syme + 1); | ||
| 580 | |||
| 581 | if (!strcmp(buf, sym->name)) { | ||
| 582 | found = syme; | ||
| 583 | break; | ||
| 584 | } | ||
| 585 | } | ||
| 586 | |||
| 587 | if (!found) { | ||
| 588 | fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); | ||
| 589 | sleep(1); | ||
| 590 | return; | ||
| 591 | } else | ||
| 592 | parse_source(found); | ||
| 593 | |||
| 594 | out_free: | ||
| 595 | free(buf); | ||
| 596 | } | ||
| 597 | |||
| 598 | static void print_mapped_keys(void) | ||
| 599 | { | ||
| 600 | char *name = NULL; | ||
| 601 | |||
| 602 | if (sym_filter_entry) { | ||
| 603 | struct symbol *sym = (struct symbol *)(sym_filter_entry+1); | ||
| 604 | name = sym->name; | ||
| 605 | } | ||
| 606 | |||
| 607 | fprintf(stdout, "\nMapped keys:\n"); | ||
| 608 | fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); | ||
| 609 | fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); | ||
| 610 | |||
| 611 | if (nr_counters > 1) | ||
| 612 | fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); | ||
| 613 | |||
| 614 | fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); | ||
| 615 | |||
| 616 | if (vmlinux) { | ||
| 617 | fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); | ||
| 618 | fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); | ||
| 619 | fprintf(stdout, "\t[S] stop annotation.\n"); | ||
| 620 | } | ||
| 621 | |||
| 622 | if (nr_counters > 1) | ||
| 623 | fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); | ||
| 624 | |||
| 625 | fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); | ||
| 626 | fprintf(stdout, "\t[qQ] quit.\n"); | ||
| 627 | } | ||
| 628 | |||
| 629 | static int key_mapped(int c) | ||
| 630 | { | ||
| 631 | switch (c) { | ||
| 632 | case 'd': | ||
| 633 | case 'e': | ||
| 634 | case 'f': | ||
| 635 | case 'z': | ||
| 636 | case 'q': | ||
| 637 | case 'Q': | ||
| 638 | return 1; | ||
| 639 | case 'E': | ||
| 640 | case 'w': | ||
| 641 | return nr_counters > 1 ? 1 : 0; | ||
| 642 | case 'F': | ||
| 643 | case 's': | ||
| 644 | case 'S': | ||
| 645 | return vmlinux ? 1 : 0; | ||
| 646 | } | ||
| 647 | |||
| 648 | return 0; | ||
| 649 | } | ||
| 650 | |||
| 651 | static void handle_keypress(int c) | ||
| 652 | { | ||
| 653 | if (!key_mapped(c)) { | ||
| 654 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | ||
| 655 | struct termios tc, save; | ||
| 656 | |||
| 657 | print_mapped_keys(); | ||
| 658 | fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); | ||
| 659 | fflush(stdout); | ||
| 660 | |||
| 661 | tcgetattr(0, &save); | ||
| 662 | tc = save; | ||
| 663 | tc.c_lflag &= ~(ICANON | ECHO); | ||
| 664 | tc.c_cc[VMIN] = 0; | ||
| 665 | tc.c_cc[VTIME] = 0; | ||
| 666 | tcsetattr(0, TCSANOW, &tc); | ||
| 667 | |||
| 668 | poll(&stdin_poll, 1, -1); | ||
| 669 | c = getc(stdin); | ||
| 670 | |||
| 671 | tcsetattr(0, TCSAFLUSH, &save); | ||
| 672 | if (!key_mapped(c)) | ||
| 673 | return; | ||
| 674 | } | ||
| 675 | |||
| 676 | switch (c) { | ||
| 677 | case 'd': | ||
| 678 | prompt_integer(&delay_secs, "Enter display delay"); | ||
| 679 | break; | ||
| 680 | case 'e': | ||
| 681 | prompt_integer(&print_entries, "Enter display entries (lines)"); | ||
| 682 | break; | ||
| 683 | case 'E': | ||
| 684 | if (nr_counters > 1) { | ||
| 685 | int i; | ||
| 686 | |||
| 687 | fprintf(stderr, "\nAvailable events:"); | ||
| 688 | for (i = 0; i < nr_counters; i++) | ||
| 689 | fprintf(stderr, "\n\t%d %s", i, event_name(i)); | ||
| 690 | |||
| 691 | prompt_integer(&sym_counter, "Enter details event counter"); | ||
| 692 | |||
| 693 | if (sym_counter >= nr_counters) { | ||
| 694 | fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); | ||
| 695 | sym_counter = 0; | ||
| 696 | sleep(1); | ||
| 697 | } | ||
| 698 | } else sym_counter = 0; | ||
| 699 | break; | ||
| 700 | case 'f': | ||
| 701 | prompt_integer(&count_filter, "Enter display event count filter"); | ||
| 702 | break; | ||
| 703 | case 'F': | ||
| 704 | prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); | ||
| 705 | break; | ||
| 706 | case 'q': | ||
| 707 | case 'Q': | ||
| 708 | printf("exiting.\n"); | ||
| 709 | exit(0); | ||
| 710 | case 's': | ||
| 711 | prompt_symbol(&sym_filter_entry, "Enter details symbol"); | ||
| 712 | break; | ||
| 713 | case 'S': | ||
| 714 | if (!sym_filter_entry) | ||
| 715 | break; | ||
| 716 | else { | ||
| 717 | struct sym_entry *syme = sym_filter_entry; | ||
| 718 | |||
| 719 | pthread_mutex_lock(&syme->source_lock); | ||
| 720 | sym_filter_entry = NULL; | ||
| 721 | __zero_source_counters(syme); | ||
| 722 | pthread_mutex_unlock(&syme->source_lock); | ||
| 723 | } | ||
| 724 | break; | ||
| 725 | case 'w': | ||
| 726 | display_weighted = ~display_weighted; | ||
| 727 | break; | ||
| 728 | case 'z': | ||
| 729 | zero = ~zero; | ||
| 730 | break; | ||
| 731 | } | ||
| 732 | } | ||
| 733 | |||
| 264 | static void *display_thread(void *arg __used) | 734 | static void *display_thread(void *arg __used) |
| 265 | { | 735 | { |
| 266 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | 736 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; |
| 267 | int delay_msecs = delay_secs * 1000; | 737 | struct termios tc, save; |
| 738 | int delay_msecs, c; | ||
| 739 | |||
| 740 | tcgetattr(0, &save); | ||
| 741 | tc = save; | ||
| 742 | tc.c_lflag &= ~(ICANON | ECHO); | ||
| 743 | tc.c_cc[VMIN] = 0; | ||
| 744 | tc.c_cc[VTIME] = 0; | ||
| 268 | 745 | ||
| 269 | printf("PerfTop refresh period: %d seconds\n", delay_secs); | 746 | repeat: |
| 747 | delay_msecs = delay_secs * 1000; | ||
| 748 | tcsetattr(0, TCSANOW, &tc); | ||
| 749 | /* trash return*/ | ||
| 750 | getc(stdin); | ||
| 270 | 751 | ||
| 271 | do { | 752 | do { |
| 272 | print_sym_table(); | 753 | print_sym_table(); |
| 273 | } while (!poll(&stdin_poll, 1, delay_msecs) == 1); | 754 | } while (!poll(&stdin_poll, 1, delay_msecs) == 1); |
| 274 | 755 | ||
| 275 | printf("key pressed - exiting.\n"); | 756 | c = getc(stdin); |
| 276 | exit(0); | 757 | tcsetattr(0, TCSAFLUSH, &save); |
| 758 | |||
| 759 | handle_keypress(c); | ||
| 760 | goto repeat; | ||
| 277 | 761 | ||
| 278 | return NULL; | 762 | return NULL; |
| 279 | } | 763 | } |
| @@ -293,7 +777,6 @@ static const char *skip_symbols[] = { | |||
| 293 | 777 | ||
| 294 | static int symbol_filter(struct dso *self, struct symbol *sym) | 778 | static int symbol_filter(struct dso *self, struct symbol *sym) |
| 295 | { | 779 | { |
| 296 | static int filter_match; | ||
| 297 | struct sym_entry *syme; | 780 | struct sym_entry *syme; |
| 298 | const char *name = sym->name; | 781 | const char *name = sym->name; |
| 299 | int i; | 782 | int i; |
| @@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym) | |||
| 315 | return 1; | 798 | return 1; |
| 316 | 799 | ||
| 317 | syme = dso__sym_priv(self, sym); | 800 | syme = dso__sym_priv(self, sym); |
| 801 | pthread_mutex_init(&syme->source_lock, NULL); | ||
| 802 | if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) | ||
| 803 | sym_filter_entry = syme; | ||
| 804 | |||
| 318 | for (i = 0; skip_symbols[i]; i++) { | 805 | for (i = 0; skip_symbols[i]; i++) { |
| 319 | if (!strcmp(skip_symbols[i], name)) { | 806 | if (!strcmp(skip_symbols[i], name)) { |
| 320 | syme->skip = 1; | 807 | syme->skip = 1; |
| @@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym) | |||
| 322 | } | 809 | } |
| 323 | } | 810 | } |
| 324 | 811 | ||
| 325 | if (filter_match == 1) { | ||
| 326 | filter_end = sym->start; | ||
| 327 | filter_match = -1; | ||
| 328 | if (filter_end - filter_start > 10000) { | ||
| 329 | fprintf(stderr, | ||
| 330 | "hm, too large filter symbol <%s> - skipping.\n", | ||
| 331 | sym_filter); | ||
| 332 | fprintf(stderr, "symbol filter start: %016lx\n", | ||
| 333 | filter_start); | ||
| 334 | fprintf(stderr, " end: %016lx\n", | ||
| 335 | filter_end); | ||
| 336 | filter_end = filter_start = 0; | ||
| 337 | sym_filter = NULL; | ||
| 338 | sleep(1); | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { | ||
| 343 | filter_match = 1; | ||
| 344 | filter_start = sym->start; | ||
| 345 | } | ||
| 346 | |||
| 347 | |||
| 348 | return 0; | 812 | return 0; |
| 349 | } | 813 | } |
| 350 | 814 | ||
| @@ -380,8 +844,6 @@ out_delete_dso: | |||
| 380 | return -1; | 844 | return -1; |
| 381 | } | 845 | } |
| 382 | 846 | ||
| 383 | #define TRACE_COUNT 3 | ||
| 384 | |||
| 385 | /* | 847 | /* |
| 386 | * Binary search in the histogram table and record the hit: | 848 | * Binary search in the histogram table and record the hit: |
| 387 | */ | 849 | */ |
| @@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter) | |||
| 394 | 856 | ||
| 395 | if (!syme->skip) { | 857 | if (!syme->skip) { |
| 396 | syme->count[counter]++; | 858 | syme->count[counter]++; |
| 859 | record_precise_ip(syme, counter, ip); | ||
| 397 | pthread_mutex_lock(&active_symbols_lock); | 860 | pthread_mutex_lock(&active_symbols_lock); |
| 398 | if (list_empty(&syme->node) || !syme->node.next) | 861 | if (list_empty(&syme->node) || !syme->node.next) |
| 399 | __list_insert_active_sym(syme); | 862 | __list_insert_active_sym(syme); |
| @@ -690,8 +1153,8 @@ static const struct option options[] = { | |||
| 690 | "put the counters into a counter group"), | 1153 | "put the counters into a counter group"), |
| 691 | OPT_BOOLEAN('i', "inherit", &inherit, | 1154 | OPT_BOOLEAN('i', "inherit", &inherit, |
| 692 | "child tasks inherit counters"), | 1155 | "child tasks inherit counters"), |
| 693 | OPT_STRING('s', "sym-filter", &sym_filter, "pattern", | 1156 | OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", |
| 694 | "only display symbols matchig this pattern"), | 1157 | "symbol to annotate - requires -k option"), |
| 695 | OPT_BOOLEAN('z', "zero", &zero, | 1158 | OPT_BOOLEAN('z', "zero", &zero, |
| 696 | "zero history across updates"), | 1159 | "zero history across updates"), |
| 697 | OPT_INTEGER('F', "freq", &freq, | 1160 | OPT_INTEGER('F', "freq", &freq, |
| @@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 734 | delay_secs = 1; | 1197 | delay_secs = 1; |
| 735 | 1198 | ||
| 736 | parse_symbols(); | 1199 | parse_symbols(); |
| 1200 | parse_source(sym_filter_entry); | ||
| 737 | 1201 | ||
| 738 | /* | 1202 | /* |
| 739 | * Fill in the ones not specifically initialized via -c: | 1203 | * Fill in the ones not specifically initialized via -c: |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9d3c8141b8c1..011473411642 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <stdio.h> | 13 | #include <stdio.h> |
| 14 | #include <stdbool.h> | 14 | #include <stdbool.h> |
| 15 | #include <errno.h> | 15 | #include <errno.h> |
| 16 | #include <math.h> | ||
| 16 | 17 | ||
| 17 | #include "callchain.h" | 18 | #include "callchain.h" |
| 18 | 19 | ||
| @@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, | |||
| 26 | struct rb_node **p = &root->rb_node; | 27 | struct rb_node **p = &root->rb_node; |
| 27 | struct rb_node *parent = NULL; | 28 | struct rb_node *parent = NULL; |
| 28 | struct callchain_node *rnode; | 29 | struct callchain_node *rnode; |
| 30 | u64 chain_cumul = cumul_hits(chain); | ||
| 29 | 31 | ||
| 30 | while (*p) { | 32 | while (*p) { |
| 33 | u64 rnode_cumul; | ||
| 34 | |||
| 31 | parent = *p; | 35 | parent = *p; |
| 32 | rnode = rb_entry(parent, struct callchain_node, rb_node); | 36 | rnode = rb_entry(parent, struct callchain_node, rb_node); |
| 37 | rnode_cumul = cumul_hits(rnode); | ||
| 33 | 38 | ||
| 34 | switch (mode) { | 39 | switch (mode) { |
| 35 | case CHAIN_FLAT: | 40 | case CHAIN_FLAT: |
| @@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, | |||
| 40 | break; | 45 | break; |
| 41 | case CHAIN_GRAPH_ABS: /* Falldown */ | 46 | case CHAIN_GRAPH_ABS: /* Falldown */ |
| 42 | case CHAIN_GRAPH_REL: | 47 | case CHAIN_GRAPH_REL: |
| 43 | if (rnode->cumul_hit < chain->cumul_hit) | 48 | if (rnode_cumul < chain_cumul) |
| 44 | p = &(*p)->rb_left; | 49 | p = &(*p)->rb_left; |
| 45 | else | 50 | else |
| 46 | p = &(*p)->rb_right; | 51 | p = &(*p)->rb_right; |
| @@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, | |||
| 87 | 92 | ||
| 88 | chain_for_each_child(child, node) { | 93 | chain_for_each_child(child, node) { |
| 89 | __sort_chain_graph_abs(child, min_hit); | 94 | __sort_chain_graph_abs(child, min_hit); |
| 90 | if (child->cumul_hit >= min_hit) | 95 | if (cumul_hits(child) >= min_hit) |
| 91 | rb_insert_callchain(&node->rb_root, child, | 96 | rb_insert_callchain(&node->rb_root, child, |
| 92 | CHAIN_GRAPH_ABS); | 97 | CHAIN_GRAPH_ABS); |
| 93 | } | 98 | } |
| @@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, | |||
| 108 | u64 min_hit; | 113 | u64 min_hit; |
| 109 | 114 | ||
| 110 | node->rb_root = RB_ROOT; | 115 | node->rb_root = RB_ROOT; |
| 111 | min_hit = node->cumul_hit * min_percent / 100.0; | 116 | min_hit = ceil(node->children_hit * min_percent); |
| 112 | 117 | ||
| 113 | chain_for_each_child(child, node) { | 118 | chain_for_each_child(child, node) { |
| 114 | __sort_chain_graph_rel(child, min_percent); | 119 | __sort_chain_graph_rel(child, min_percent); |
| 115 | if (child->cumul_hit >= min_hit) | 120 | if (cumul_hits(child) >= min_hit) |
| 116 | rb_insert_callchain(&node->rb_root, child, | 121 | rb_insert_callchain(&node->rb_root, child, |
| 117 | CHAIN_GRAPH_REL); | 122 | CHAIN_GRAPH_REL); |
| 118 | } | 123 | } |
| @@ -122,7 +127,7 @@ static void | |||
| 122 | sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, | 127 | sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, |
| 123 | u64 min_hit __used, struct callchain_param *param) | 128 | u64 min_hit __used, struct callchain_param *param) |
| 124 | { | 129 | { |
| 125 | __sort_chain_graph_rel(chain_root, param->min_percent); | 130 | __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); |
| 126 | rb_root->rb_node = chain_root->rb_root.rb_node; | 131 | rb_root->rb_node = chain_root->rb_root.rb_node; |
| 127 | } | 132 | } |
| 128 | 133 | ||
| @@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, | |||
| 211 | new = create_child(parent, false); | 216 | new = create_child(parent, false); |
| 212 | fill_node(new, chain, start, syms); | 217 | fill_node(new, chain, start, syms); |
| 213 | 218 | ||
| 214 | new->cumul_hit = new->hit = 1; | 219 | new->children_hit = 0; |
| 220 | new->hit = 1; | ||
| 215 | } | 221 | } |
| 216 | 222 | ||
| 217 | /* | 223 | /* |
| @@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, | |||
| 241 | 247 | ||
| 242 | /* split the hits */ | 248 | /* split the hits */ |
| 243 | new->hit = parent->hit; | 249 | new->hit = parent->hit; |
| 244 | new->cumul_hit = parent->cumul_hit; | 250 | new->children_hit = parent->children_hit; |
| 251 | parent->children_hit = cumul_hits(new); | ||
| 245 | new->val_nr = parent->val_nr - idx_local; | 252 | new->val_nr = parent->val_nr - idx_local; |
| 246 | parent->val_nr = idx_local; | 253 | parent->val_nr = idx_local; |
| 247 | 254 | ||
| @@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, | |||
| 249 | if (idx_total < chain->nr) { | 256 | if (idx_total < chain->nr) { |
| 250 | parent->hit = 0; | 257 | parent->hit = 0; |
| 251 | add_child(parent, chain, idx_total, syms); | 258 | add_child(parent, chain, idx_total, syms); |
| 259 | parent->children_hit++; | ||
| 252 | } else { | 260 | } else { |
| 253 | parent->hit = 1; | 261 | parent->hit = 1; |
| 254 | } | 262 | } |
| @@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, | |||
| 269 | unsigned int ret = __append_chain(rnode, chain, start, syms); | 277 | unsigned int ret = __append_chain(rnode, chain, start, syms); |
| 270 | 278 | ||
| 271 | if (!ret) | 279 | if (!ret) |
| 272 | goto cumul; | 280 | goto inc_children_hit; |
| 273 | } | 281 | } |
| 274 | /* nothing in children, add to the current node */ | 282 | /* nothing in children, add to the current node */ |
| 275 | add_child(root, chain, start, syms); | 283 | add_child(root, chain, start, syms); |
| 276 | 284 | ||
| 277 | cumul: | 285 | inc_children_hit: |
| 278 | root->cumul_hit++; | 286 | root->children_hit++; |
| 279 | } | 287 | } |
| 280 | 288 | ||
| 281 | static int | 289 | static int |
| @@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, | |||
| 317 | /* we match 100% of the path, increment the hit */ | 325 | /* we match 100% of the path, increment the hit */ |
| 318 | if (i - start == root->val_nr && i == chain->nr) { | 326 | if (i - start == root->val_nr && i == chain->nr) { |
| 319 | root->hit++; | 327 | root->hit++; |
| 320 | root->cumul_hit++; | ||
| 321 | |||
| 322 | return 0; | 328 | return 0; |
| 323 | } | 329 | } |
| 324 | 330 | ||
| @@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, | |||
| 331 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, | 337 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, |
| 332 | struct symbol **syms) | 338 | struct symbol **syms) |
| 333 | { | 339 | { |
| 340 | if (!chain->nr) | ||
| 341 | return; | ||
| 334 | __append_chain_children(root, chain, syms, 0); | 342 | __append_chain_children(root, chain, syms, 0); |
| 335 | } | 343 | } |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7812122bea1d..a926ae4f5a16 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "symbol.h" | 7 | #include "symbol.h" |
| 8 | 8 | ||
| 9 | enum chain_mode { | 9 | enum chain_mode { |
| 10 | CHAIN_NONE, | ||
| 10 | CHAIN_FLAT, | 11 | CHAIN_FLAT, |
| 11 | CHAIN_GRAPH_ABS, | 12 | CHAIN_GRAPH_ABS, |
| 12 | CHAIN_GRAPH_REL | 13 | CHAIN_GRAPH_REL |
| @@ -21,7 +22,7 @@ struct callchain_node { | |||
| 21 | struct rb_root rb_root; /* sorted tree of children */ | 22 | struct rb_root rb_root; /* sorted tree of children */ |
| 22 | unsigned int val_nr; | 23 | unsigned int val_nr; |
| 23 | u64 hit; | 24 | u64 hit; |
| 24 | u64 cumul_hit; /* hit + hits of children */ | 25 | u64 children_hit; |
| 25 | }; | 26 | }; |
| 26 | 27 | ||
| 27 | struct callchain_param; | 28 | struct callchain_param; |
| @@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node) | |||
| 48 | INIT_LIST_HEAD(&node->val); | 49 | INIT_LIST_HEAD(&node->val); |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 52 | static inline u64 cumul_hits(struct callchain_node *node) | ||
| 53 | { | ||
| 54 | return node->hit + node->children_hit; | ||
| 55 | } | ||
| 56 | |||
| 51 | int register_callchain_param(struct callchain_param *param); | 57 | int register_callchain_param(struct callchain_param *param); |
| 52 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, | 58 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, |
| 53 | struct symbol **syms); | 59 | struct symbol **syms); |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 450384b3bbe5..b92a457ca32e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
| @@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size) | |||
| 185 | 185 | ||
| 186 | if (ret < 0) | 186 | if (ret < 0) |
| 187 | die("failed to read"); | 187 | die("failed to read"); |
| 188 | if (ret == 0) | ||
| 189 | die("failed to read: missing data"); | ||
| 188 | 190 | ||
| 189 | size -= ret; | 191 | size -= ret; |
| 190 | buf += ret; | 192 | buf += ret; |
| @@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd) | |||
| 213 | 215 | ||
| 214 | for (i = 0; i < nr_attrs; i++) { | 216 | for (i = 0; i < nr_attrs; i++) { |
| 215 | struct perf_header_attr *attr; | 217 | struct perf_header_attr *attr; |
| 216 | off_t tmp = lseek(fd, 0, SEEK_CUR); | 218 | off_t tmp; |
| 217 | 219 | ||
| 218 | do_read(fd, &f_attr, sizeof(f_attr)); | 220 | do_read(fd, &f_attr, sizeof(f_attr)); |
| 221 | tmp = lseek(fd, 0, SEEK_CUR); | ||
| 219 | 222 | ||
| 220 | attr = perf_header_attr__new(&f_attr.attr); | 223 | attr = perf_header_attr__new(&f_attr.attr); |
| 221 | 224 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7bdad8df22a6..4858d83b3b67 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
| @@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = { | |||
| 121 | (strcmp(sys_dirent.d_name, ".")) && \ | 121 | (strcmp(sys_dirent.d_name, ".")) && \ |
| 122 | (strcmp(sys_dirent.d_name, ".."))) | 122 | (strcmp(sys_dirent.d_name, ".."))) |
| 123 | 123 | ||
| 124 | static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) | ||
| 125 | { | ||
| 126 | char evt_path[MAXPATHLEN]; | ||
| 127 | int fd; | ||
| 128 | |||
| 129 | snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, | ||
| 130 | sys_dir->d_name, evt_dir->d_name); | ||
| 131 | fd = open(evt_path, O_RDONLY); | ||
| 132 | if (fd < 0) | ||
| 133 | return -EINVAL; | ||
| 134 | close(fd); | ||
| 135 | |||
| 136 | return 0; | ||
| 137 | } | ||
| 138 | |||
| 124 | #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ | 139 | #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ |
| 125 | while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ | 140 | while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ |
| 126 | if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ | 141 | if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ |
| 127 | sys_dirent.d_name, evt_dirent.d_name) && \ | 142 | sys_dirent.d_name, evt_dirent.d_name) && \ |
| 128 | (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ | 143 | (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ |
| 129 | (strcmp(evt_dirent.d_name, ".")) && \ | 144 | (strcmp(evt_dirent.d_name, ".")) && \ |
| 130 | (strcmp(evt_dirent.d_name, ".."))) | 145 | (strcmp(evt_dirent.d_name, "..")) && \ |
| 146 | (!tp_event_has_id(&sys_dirent, &evt_dirent))) | ||
| 131 | 147 | ||
| 132 | #define MAX_EVENT_LENGTH 30 | 148 | #define MAX_EVENT_LENGTH 30 |
| 133 | 149 | ||
| @@ -223,9 +239,15 @@ char *event_name(int counter) | |||
| 223 | { | 239 | { |
| 224 | u64 config = attrs[counter].config; | 240 | u64 config = attrs[counter].config; |
| 225 | int type = attrs[counter].type; | 241 | int type = attrs[counter].type; |
| 242 | |||
| 243 | return __event_name(type, config); | ||
| 244 | } | ||
| 245 | |||
| 246 | char *__event_name(int type, u64 config) | ||
| 247 | { | ||
| 226 | static char buf[32]; | 248 | static char buf[32]; |
| 227 | 249 | ||
| 228 | if (attrs[counter].type == PERF_TYPE_RAW) { | 250 | if (type == PERF_TYPE_RAW) { |
| 229 | sprintf(buf, "raw 0x%llx", config); | 251 | sprintf(buf, "raw 0x%llx", config); |
| 230 | return buf; | 252 | return buf; |
| 231 | } | 253 | } |
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1ea5d09b6eb1..192a962e3a0f 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h | |||
| @@ -10,6 +10,7 @@ extern int nr_counters; | |||
| 10 | extern struct perf_counter_attr attrs[MAX_COUNTERS]; | 10 | extern struct perf_counter_attr attrs[MAX_COUNTERS]; |
| 11 | 11 | ||
| 12 | extern char *event_name(int ctr); | 12 | extern char *event_name(int ctr); |
| 13 | extern char *__event_name(int type, u64 config); | ||
| 13 | 14 | ||
| 14 | extern int parse_events(const struct option *opt, const char *str, int unset); | 15 | extern int parse_events(const struct option *opt, const char *str, int unset); |
| 15 | 16 | ||
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 16ddca202948..f1dcede14307 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
| @@ -24,6 +24,16 @@ const char *sym_hist_filter; | |||
| 24 | #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ | 24 | #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ |
| 25 | #endif | 25 | #endif |
| 26 | 26 | ||
| 27 | enum dso_origin { | ||
| 28 | DSO__ORIG_KERNEL = 0, | ||
| 29 | DSO__ORIG_JAVA_JIT, | ||
| 30 | DSO__ORIG_FEDORA, | ||
| 31 | DSO__ORIG_UBUNTU, | ||
| 32 | DSO__ORIG_BUILDID, | ||
| 33 | DSO__ORIG_DSO, | ||
| 34 | DSO__ORIG_NOT_FOUND, | ||
| 35 | }; | ||
| 36 | |||
| 27 | static struct symbol *symbol__new(u64 start, u64 len, | 37 | static struct symbol *symbol__new(u64 start, u64 len, |
| 28 | const char *name, unsigned int priv_size, | 38 | const char *name, unsigned int priv_size, |
| 29 | u64 obj_start, int verbose) | 39 | u64 obj_start, int verbose) |
| @@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) | |||
| 81 | self->sym_priv_size = sym_priv_size; | 91 | self->sym_priv_size = sym_priv_size; |
| 82 | self->find_symbol = dso__find_symbol; | 92 | self->find_symbol = dso__find_symbol; |
| 83 | self->slen_calculated = 0; | 93 | self->slen_calculated = 0; |
| 94 | self->origin = DSO__ORIG_NOT_FOUND; | ||
| 84 | } | 95 | } |
| 85 | 96 | ||
| 86 | return self; | 97 | return self; |
| @@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose) | |||
| 710 | ++raw; | 721 | ++raw; |
| 711 | bid += 2; | 722 | bid += 2; |
| 712 | } | 723 | } |
| 713 | if (verbose) | 724 | if (verbose >= 2) |
| 714 | printf("%s(%s): %s\n", __func__, self->name, build_id); | 725 | printf("%s(%s): %s\n", __func__, self->name, build_id); |
| 715 | out_elf_end: | 726 | out_elf_end: |
| 716 | elf_end(elf); | 727 | elf_end(elf); |
| @@ -720,11 +731,26 @@ out: | |||
| 720 | return build_id; | 731 | return build_id; |
| 721 | } | 732 | } |
| 722 | 733 | ||
| 734 | char dso__symtab_origin(const struct dso *self) | ||
| 735 | { | ||
| 736 | static const char origin[] = { | ||
| 737 | [DSO__ORIG_KERNEL] = 'k', | ||
| 738 | [DSO__ORIG_JAVA_JIT] = 'j', | ||
| 739 | [DSO__ORIG_FEDORA] = 'f', | ||
| 740 | [DSO__ORIG_UBUNTU] = 'u', | ||
| 741 | [DSO__ORIG_BUILDID] = 'b', | ||
| 742 | [DSO__ORIG_DSO] = 'd', | ||
| 743 | }; | ||
| 744 | |||
| 745 | if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) | ||
| 746 | return '!'; | ||
| 747 | return origin[self->origin]; | ||
| 748 | } | ||
| 749 | |||
| 723 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose) | 750 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose) |
| 724 | { | 751 | { |
| 725 | int size = PATH_MAX; | 752 | int size = PATH_MAX; |
| 726 | char *name = malloc(size), *build_id = NULL; | 753 | char *name = malloc(size), *build_id = NULL; |
| 727 | int variant = 0; | ||
| 728 | int ret = -1; | 754 | int ret = -1; |
| 729 | int fd; | 755 | int fd; |
| 730 | 756 | ||
| @@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) | |||
| 733 | 759 | ||
| 734 | self->adjust_symbols = 0; | 760 | self->adjust_symbols = 0; |
| 735 | 761 | ||
| 736 | if (strncmp(self->name, "/tmp/perf-", 10) == 0) | 762 | if (strncmp(self->name, "/tmp/perf-", 10) == 0) { |
| 737 | return dso__load_perf_map(self, filter, verbose); | 763 | ret = dso__load_perf_map(self, filter, verbose); |
| 764 | self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : | ||
| 765 | DSO__ORIG_NOT_FOUND; | ||
| 766 | return ret; | ||
| 767 | } | ||
| 768 | |||
| 769 | self->origin = DSO__ORIG_FEDORA - 1; | ||
| 738 | 770 | ||
| 739 | more: | 771 | more: |
| 740 | do { | 772 | do { |
| 741 | switch (variant) { | 773 | self->origin++; |
| 742 | case 0: /* Fedora */ | 774 | switch (self->origin) { |
| 775 | case DSO__ORIG_FEDORA: | ||
| 743 | snprintf(name, size, "/usr/lib/debug%s.debug", self->name); | 776 | snprintf(name, size, "/usr/lib/debug%s.debug", self->name); |
| 744 | break; | 777 | break; |
| 745 | case 1: /* Ubuntu */ | 778 | case DSO__ORIG_UBUNTU: |
| 746 | snprintf(name, size, "/usr/lib/debug%s", self->name); | 779 | snprintf(name, size, "/usr/lib/debug%s", self->name); |
| 747 | break; | 780 | break; |
| 748 | case 2: | 781 | case DSO__ORIG_BUILDID: |
| 749 | build_id = dso__read_build_id(self, verbose); | 782 | build_id = dso__read_build_id(self, verbose); |
| 750 | if (build_id != NULL) { | 783 | if (build_id != NULL) { |
| 751 | snprintf(name, size, | 784 | snprintf(name, size, |
| @@ -754,16 +787,15 @@ more: | |||
| 754 | free(build_id); | 787 | free(build_id); |
| 755 | break; | 788 | break; |
| 756 | } | 789 | } |
| 757 | variant++; | 790 | self->origin++; |
| 758 | /* Fall thru */ | 791 | /* Fall thru */ |
| 759 | case 3: /* Sane people */ | 792 | case DSO__ORIG_DSO: |
| 760 | snprintf(name, size, "%s", self->name); | 793 | snprintf(name, size, "%s", self->name); |
| 761 | break; | 794 | break; |
| 762 | 795 | ||
| 763 | default: | 796 | default: |
| 764 | goto out; | 797 | goto out; |
| 765 | } | 798 | } |
| 766 | variant++; | ||
| 767 | 799 | ||
| 768 | fd = open(name, O_RDONLY); | 800 | fd = open(name, O_RDONLY); |
| 769 | } while (fd < 0); | 801 | } while (fd < 0); |
| @@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, | |||
| 899 | if (err <= 0) | 931 | if (err <= 0) |
| 900 | err = dso__load_kallsyms(self, filter, verbose); | 932 | err = dso__load_kallsyms(self, filter, verbose); |
| 901 | 933 | ||
| 934 | if (err > 0) | ||
| 935 | self->origin = DSO__ORIG_KERNEL; | ||
| 936 | |||
| 902 | return err; | 937 | return err; |
| 903 | } | 938 | } |
| 904 | 939 | ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2f92b21c712d..1e003ec2f4b1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -26,6 +26,7 @@ struct dso { | |||
| 26 | unsigned int sym_priv_size; | 26 | unsigned int sym_priv_size; |
| 27 | unsigned char adjust_symbols; | 27 | unsigned char adjust_symbols; |
| 28 | unsigned char slen_calculated; | 28 | unsigned char slen_calculated; |
| 29 | unsigned char origin; | ||
| 29 | char name[0]; | 30 | char name[0]; |
| 30 | }; | 31 | }; |
| 31 | 32 | ||
| @@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); | |||
| 49 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose); | 50 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose); |
| 50 | 51 | ||
| 51 | size_t dso__fprintf(struct dso *self, FILE *fp); | 52 | size_t dso__fprintf(struct dso *self, FILE *fp); |
| 53 | char dso__symtab_origin(const struct dso *self); | ||
| 52 | 54 | ||
| 53 | void symbol__init(void); | 55 | void symbol__init(void); |
| 54 | #endif /* _PERF_SYMBOL_ */ | 56 | #endif /* _PERF_SYMBOL_ */ |
