diff options
32 files changed, 1604 insertions, 422 deletions
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 8ccd4e155768..0ea0639fcf75 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h | |||
@@ -61,6 +61,8 @@ struct pt_regs; | |||
61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
63 | 63 | ||
64 | #define PERF_COUNTER_INDEX_OFFSET 1 | ||
65 | |||
64 | /* | 66 | /* |
65 | * Only override the default definitions in include/linux/perf_counter.h | 67 | * Only override the default definitions in include/linux/perf_counter.h |
66 | * if we have hardware PMU support. | 68 | * if we have hardware PMU support. |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 5fb33e160ea0..fa64e401589d 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -87,6 +87,9 @@ union cpuid10_edx { | |||
87 | #ifdef CONFIG_PERF_COUNTERS | 87 | #ifdef CONFIG_PERF_COUNTERS |
88 | extern void init_hw_perf_counters(void); | 88 | extern void init_hw_perf_counters(void); |
89 | extern void perf_counters_lapic_init(void); | 89 | extern void perf_counters_lapic_init(void); |
90 | |||
91 | #define PERF_COUNTER_INDEX_OFFSET 0 | ||
92 | |||
90 | #else | 93 | #else |
91 | static inline void init_hw_perf_counters(void) { } | 94 | static inline void init_hw_perf_counters(void) { } |
92 | static inline void perf_counters_lapic_init(void) { } | 95 | static inline void perf_counters_lapic_init(void) { } |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 76dfef23f789..d4cf4ce19aac 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -401,7 +401,7 @@ static const u64 amd_hw_cache_event_ids | |||
401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | 401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
402 | }, | 402 | }, |
403 | [ C(OP_WRITE) ] = { | 403 | [ C(OP_WRITE) ] = { |
404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ | 404 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
405 | [ C(RESULT_MISS) ] = 0, | 405 | [ C(RESULT_MISS) ] = 0, |
406 | }, | 406 | }, |
407 | [ C(OP_PREFETCH) ] = { | 407 | [ C(OP_PREFETCH) ] = { |
@@ -912,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
912 | err = checking_wrmsrl(hwc->counter_base + idx, | 912 | err = checking_wrmsrl(hwc->counter_base + idx, |
913 | (u64)(-left) & x86_pmu.counter_mask); | 913 | (u64)(-left) & x86_pmu.counter_mask); |
914 | 914 | ||
915 | perf_counter_update_userpage(counter); | ||
916 | |||
915 | return ret; | 917 | return ret; |
916 | } | 918 | } |
917 | 919 | ||
@@ -969,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
969 | if (!x86_pmu.num_counters_fixed) | 971 | if (!x86_pmu.num_counters_fixed) |
970 | return -1; | 972 | return -1; |
971 | 973 | ||
972 | /* | ||
973 | * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: | ||
974 | */ | ||
975 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
976 | boot_cpu_data.x86_model == 28) | ||
977 | return -1; | ||
978 | |||
979 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | 974 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; |
980 | 975 | ||
981 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 976 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
@@ -1041,6 +1036,8 @@ try_generic: | |||
1041 | x86_perf_counter_set_period(counter, hwc, idx); | 1036 | x86_perf_counter_set_period(counter, hwc, idx); |
1042 | x86_pmu.enable(hwc, idx); | 1037 | x86_pmu.enable(hwc, idx); |
1043 | 1038 | ||
1039 | perf_counter_update_userpage(counter); | ||
1040 | |||
1044 | return 0; | 1041 | return 0; |
1045 | } | 1042 | } |
1046 | 1043 | ||
@@ -1133,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1133 | x86_perf_counter_update(counter, hwc, idx); | 1130 | x86_perf_counter_update(counter, hwc, idx); |
1134 | cpuc->counters[idx] = NULL; | 1131 | cpuc->counters[idx] = NULL; |
1135 | clear_bit(idx, cpuc->used_mask); | 1132 | clear_bit(idx, cpuc->used_mask); |
1133 | |||
1134 | perf_counter_update_userpage(counter); | ||
1136 | } | 1135 | } |
1137 | 1136 | ||
1138 | /* | 1137 | /* |
@@ -1428,8 +1427,6 @@ static int intel_pmu_init(void) | |||
1428 | */ | 1427 | */ |
1429 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | 1428 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
1430 | 1429 | ||
1431 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1432 | |||
1433 | /* | 1430 | /* |
1434 | * Install the hw-cache-events table: | 1431 | * Install the hw-cache-events table: |
1435 | */ | 1432 | */ |
@@ -1499,21 +1496,22 @@ void __init init_hw_perf_counters(void) | |||
1499 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1496 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1500 | 1497 | ||
1501 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1498 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1502 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1503 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | 1499 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", |
1504 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | 1500 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1501 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1505 | } | 1502 | } |
1506 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | 1503 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; |
1507 | perf_max_counters = x86_pmu.num_counters; | 1504 | perf_max_counters = x86_pmu.num_counters; |
1508 | 1505 | ||
1509 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 1506 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1510 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1511 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | 1507 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", |
1512 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | 1508 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1509 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1513 | } | 1510 | } |
1514 | 1511 | ||
1515 | perf_counter_mask |= | 1512 | perf_counter_mask |= |
1516 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | 1513 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1514 | x86_pmu.intel_ctrl = perf_counter_mask; | ||
1517 | 1515 | ||
1518 | perf_counters_lapic_init(); | 1516 | perf_counters_lapic_init(); |
1519 | register_die_notifier(&perf_counter_nmi_notifier); | 1517 | register_die_notifier(&perf_counter_nmi_notifier); |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 89698d8aba5c..5e970c7d3fd5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -178,8 +178,10 @@ struct perf_counter_attr { | |||
178 | mmap : 1, /* include mmap data */ | 178 | mmap : 1, /* include mmap data */ |
179 | comm : 1, /* include comm data */ | 179 | comm : 1, /* include comm data */ |
180 | freq : 1, /* use freq, not period */ | 180 | freq : 1, /* use freq, not period */ |
181 | inherit_stat : 1, /* per task counts */ | ||
182 | enable_on_exec : 1, /* next exec enables */ | ||
181 | 183 | ||
182 | __reserved_1 : 53; | 184 | __reserved_1 : 51; |
183 | 185 | ||
184 | __u32 wakeup_events; /* wakeup every n events */ | 186 | __u32 wakeup_events; /* wakeup every n events */ |
185 | __u32 __reserved_2; | 187 | __u32 __reserved_2; |
@@ -232,6 +234,14 @@ struct perf_counter_mmap_page { | |||
232 | __u32 lock; /* seqlock for synchronization */ | 234 | __u32 lock; /* seqlock for synchronization */ |
233 | __u32 index; /* hardware counter identifier */ | 235 | __u32 index; /* hardware counter identifier */ |
234 | __s64 offset; /* add to hardware counter value */ | 236 | __s64 offset; /* add to hardware counter value */ |
237 | __u64 time_enabled; /* time counter active */ | ||
238 | __u64 time_running; /* time counter on cpu */ | ||
239 | |||
240 | /* | ||
241 | * Hole for extension of the self monitor capabilities | ||
242 | */ | ||
243 | |||
244 | __u64 __reserved[123]; /* align to 1k */ | ||
235 | 245 | ||
236 | /* | 246 | /* |
237 | * Control data for the mmap() data buffer. | 247 | * Control data for the mmap() data buffer. |
@@ -253,7 +263,6 @@ struct perf_counter_mmap_page { | |||
253 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | 263 | #define PERF_EVENT_MISC_KERNEL (1 << 0) |
254 | #define PERF_EVENT_MISC_USER (2 << 0) | 264 | #define PERF_EVENT_MISC_USER (2 << 0) |
255 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) | 265 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) |
256 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
257 | 266 | ||
258 | struct perf_event_header { | 267 | struct perf_event_header { |
259 | __u32 type; | 268 | __u32 type; |
@@ -327,9 +336,18 @@ enum perf_event_type { | |||
327 | PERF_EVENT_FORK = 7, | 336 | PERF_EVENT_FORK = 7, |
328 | 337 | ||
329 | /* | 338 | /* |
330 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | 339 | * struct { |
331 | * will be PERF_SAMPLE_* | 340 | * struct perf_event_header header; |
332 | * | 341 | * u32 pid, tid; |
342 | * u64 value; | ||
343 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED | ||
344 | * { u64 time_running; } && PERF_FORMAT_RUNNING | ||
345 | * { u64 parent_id; } && PERF_FORMAT_ID | ||
346 | * }; | ||
347 | */ | ||
348 | PERF_EVENT_READ = 8, | ||
349 | |||
350 | /* | ||
333 | * struct { | 351 | * struct { |
334 | * struct perf_event_header header; | 352 | * struct perf_event_header header; |
335 | * | 353 | * |
@@ -337,8 +355,9 @@ enum perf_event_type { | |||
337 | * { u32 pid, tid; } && PERF_SAMPLE_TID | 355 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
338 | * { u64 time; } && PERF_SAMPLE_TIME | 356 | * { u64 time; } && PERF_SAMPLE_TIME |
339 | * { u64 addr; } && PERF_SAMPLE_ADDR | 357 | * { u64 addr; } && PERF_SAMPLE_ADDR |
340 | * { u64 config; } && PERF_SAMPLE_CONFIG | 358 | * { u64 id; } && PERF_SAMPLE_ID |
341 | * { u32 cpu, res; } && PERF_SAMPLE_CPU | 359 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
360 | * { u64 period; } && PERF_SAMPLE_PERIOD | ||
342 | * | 361 | * |
343 | * { u64 nr; | 362 | * { u64 nr; |
344 | * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP | 363 | * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP |
@@ -347,6 +366,9 @@ enum perf_event_type { | |||
347 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN | 366 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
348 | * }; | 367 | * }; |
349 | */ | 368 | */ |
369 | PERF_EVENT_SAMPLE = 9, | ||
370 | |||
371 | PERF_EVENT_MAX, /* non-ABI */ | ||
350 | }; | 372 | }; |
351 | 373 | ||
352 | enum perf_callchain_context { | 374 | enum perf_callchain_context { |
@@ -582,6 +604,7 @@ struct perf_counter_context { | |||
582 | int nr_counters; | 604 | int nr_counters; |
583 | int nr_active; | 605 | int nr_active; |
584 | int is_active; | 606 | int is_active; |
607 | int nr_stat; | ||
585 | atomic_t refcount; | 608 | atomic_t refcount; |
586 | struct task_struct *task; | 609 | struct task_struct *task; |
587 | 610 | ||
@@ -669,7 +692,16 @@ static inline int is_software_counter(struct perf_counter *counter) | |||
669 | (counter->attr.type != PERF_TYPE_HW_CACHE); | 692 | (counter->attr.type != PERF_TYPE_HW_CACHE); |
670 | } | 693 | } |
671 | 694 | ||
672 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | 695 | extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; |
696 | |||
697 | extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | ||
698 | |||
699 | static inline void | ||
700 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | ||
701 | { | ||
702 | if (atomic_read(&perf_swcounter_enabled[event])) | ||
703 | __perf_swcounter_event(event, nr, nmi, regs, addr); | ||
704 | } | ||
673 | 705 | ||
674 | extern void __perf_counter_mmap(struct vm_area_struct *vma); | 706 | extern void __perf_counter_mmap(struct vm_area_struct *vma); |
675 | 707 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1a933a221ea4..d55a50da2347 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
236 | 236 | ||
237 | list_add_rcu(&counter->event_entry, &ctx->event_list); | 237 | list_add_rcu(&counter->event_entry, &ctx->event_list); |
238 | ctx->nr_counters++; | 238 | ctx->nr_counters++; |
239 | if (counter->attr.inherit_stat) | ||
240 | ctx->nr_stat++; | ||
239 | } | 241 | } |
240 | 242 | ||
241 | /* | 243 | /* |
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
250 | if (list_empty(&counter->list_entry)) | 252 | if (list_empty(&counter->list_entry)) |
251 | return; | 253 | return; |
252 | ctx->nr_counters--; | 254 | ctx->nr_counters--; |
255 | if (counter->attr.inherit_stat) | ||
256 | ctx->nr_stat--; | ||
253 | 257 | ||
254 | list_del_init(&counter->list_entry); | 258 | list_del_init(&counter->list_entry); |
255 | list_del_rcu(&counter->event_entry); | 259 | list_del_rcu(&counter->event_entry); |
@@ -1006,6 +1010,81 @@ static int context_equiv(struct perf_counter_context *ctx1, | |||
1006 | && !ctx1->pin_count && !ctx2->pin_count; | 1010 | && !ctx1->pin_count && !ctx2->pin_count; |
1007 | } | 1011 | } |
1008 | 1012 | ||
1013 | static void __perf_counter_read(void *counter); | ||
1014 | |||
1015 | static void __perf_counter_sync_stat(struct perf_counter *counter, | ||
1016 | struct perf_counter *next_counter) | ||
1017 | { | ||
1018 | u64 value; | ||
1019 | |||
1020 | if (!counter->attr.inherit_stat) | ||
1021 | return; | ||
1022 | |||
1023 | /* | ||
1024 | * Update the counter value, we cannot use perf_counter_read() | ||
1025 | * because we're in the middle of a context switch and have IRQs | ||
1026 | * disabled, which upsets smp_call_function_single(), however | ||
1027 | * we know the counter must be on the current CPU, therefore we | ||
1028 | * don't need to use it. | ||
1029 | */ | ||
1030 | switch (counter->state) { | ||
1031 | case PERF_COUNTER_STATE_ACTIVE: | ||
1032 | __perf_counter_read(counter); | ||
1033 | break; | ||
1034 | |||
1035 | case PERF_COUNTER_STATE_INACTIVE: | ||
1036 | update_counter_times(counter); | ||
1037 | break; | ||
1038 | |||
1039 | default: | ||
1040 | break; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * In order to keep per-task stats reliable we need to flip the counter | ||
1045 | * values when we flip the contexts. | ||
1046 | */ | ||
1047 | value = atomic64_read(&next_counter->count); | ||
1048 | value = atomic64_xchg(&counter->count, value); | ||
1049 | atomic64_set(&next_counter->count, value); | ||
1050 | |||
1051 | swap(counter->total_time_enabled, next_counter->total_time_enabled); | ||
1052 | swap(counter->total_time_running, next_counter->total_time_running); | ||
1053 | |||
1054 | /* | ||
1055 | * Since we swizzled the values, update the user visible data too. | ||
1056 | */ | ||
1057 | perf_counter_update_userpage(counter); | ||
1058 | perf_counter_update_userpage(next_counter); | ||
1059 | } | ||
1060 | |||
1061 | #define list_next_entry(pos, member) \ | ||
1062 | list_entry(pos->member.next, typeof(*pos), member) | ||
1063 | |||
1064 | static void perf_counter_sync_stat(struct perf_counter_context *ctx, | ||
1065 | struct perf_counter_context *next_ctx) | ||
1066 | { | ||
1067 | struct perf_counter *counter, *next_counter; | ||
1068 | |||
1069 | if (!ctx->nr_stat) | ||
1070 | return; | ||
1071 | |||
1072 | counter = list_first_entry(&ctx->event_list, | ||
1073 | struct perf_counter, event_entry); | ||
1074 | |||
1075 | next_counter = list_first_entry(&next_ctx->event_list, | ||
1076 | struct perf_counter, event_entry); | ||
1077 | |||
1078 | while (&counter->event_entry != &ctx->event_list && | ||
1079 | &next_counter->event_entry != &next_ctx->event_list) { | ||
1080 | |||
1081 | __perf_counter_sync_stat(counter, next_counter); | ||
1082 | |||
1083 | counter = list_next_entry(counter, event_entry); | ||
1084 | next_counter = list_next_entry(counter, event_entry); | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1009 | /* | 1088 | /* |
1010 | * Called from scheduler to remove the counters of the current task, | 1089 | * Called from scheduler to remove the counters of the current task, |
1011 | * with interrupts disabled. | 1090 | * with interrupts disabled. |
@@ -1061,6 +1140,8 @@ void perf_counter_task_sched_out(struct task_struct *task, | |||
1061 | ctx->task = next; | 1140 | ctx->task = next; |
1062 | next_ctx->task = task; | 1141 | next_ctx->task = task; |
1063 | do_switch = 0; | 1142 | do_switch = 0; |
1143 | |||
1144 | perf_counter_sync_stat(ctx, next_ctx); | ||
1064 | } | 1145 | } |
1065 | spin_unlock(&next_ctx->lock); | 1146 | spin_unlock(&next_ctx->lock); |
1066 | spin_unlock(&ctx->lock); | 1147 | spin_unlock(&ctx->lock); |
@@ -1348,9 +1429,56 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
1348 | } | 1429 | } |
1349 | 1430 | ||
1350 | /* | 1431 | /* |
1432 | * Enable all of a task's counters that have been marked enable-on-exec. | ||
1433 | * This expects task == current. | ||
1434 | */ | ||
1435 | static void perf_counter_enable_on_exec(struct task_struct *task) | ||
1436 | { | ||
1437 | struct perf_counter_context *ctx; | ||
1438 | struct perf_counter *counter; | ||
1439 | unsigned long flags; | ||
1440 | int enabled = 0; | ||
1441 | |||
1442 | local_irq_save(flags); | ||
1443 | ctx = task->perf_counter_ctxp; | ||
1444 | if (!ctx || !ctx->nr_counters) | ||
1445 | goto out; | ||
1446 | |||
1447 | __perf_counter_task_sched_out(ctx); | ||
1448 | |||
1449 | spin_lock(&ctx->lock); | ||
1450 | |||
1451 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1452 | if (!counter->attr.enable_on_exec) | ||
1453 | continue; | ||
1454 | counter->attr.enable_on_exec = 0; | ||
1455 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
1456 | continue; | ||
1457 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
1458 | counter->tstamp_enabled = | ||
1459 | ctx->time - counter->total_time_enabled; | ||
1460 | enabled = 1; | ||
1461 | } | ||
1462 | |||
1463 | /* | ||
1464 | * Unclone this context if we enabled any counter. | ||
1465 | */ | ||
1466 | if (enabled && ctx->parent_ctx) { | ||
1467 | put_ctx(ctx->parent_ctx); | ||
1468 | ctx->parent_ctx = NULL; | ||
1469 | } | ||
1470 | |||
1471 | spin_unlock(&ctx->lock); | ||
1472 | |||
1473 | perf_counter_task_sched_in(task, smp_processor_id()); | ||
1474 | out: | ||
1475 | local_irq_restore(flags); | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1351 | * Cross CPU call to read the hardware counter | 1479 | * Cross CPU call to read the hardware counter |
1352 | */ | 1480 | */ |
1353 | static void __read(void *info) | 1481 | static void __perf_counter_read(void *info) |
1354 | { | 1482 | { |
1355 | struct perf_counter *counter = info; | 1483 | struct perf_counter *counter = info; |
1356 | struct perf_counter_context *ctx = counter->ctx; | 1484 | struct perf_counter_context *ctx = counter->ctx; |
@@ -1372,7 +1500,7 @@ static u64 perf_counter_read(struct perf_counter *counter) | |||
1372 | */ | 1500 | */ |
1373 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 1501 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { |
1374 | smp_call_function_single(counter->oncpu, | 1502 | smp_call_function_single(counter->oncpu, |
1375 | __read, counter, 1); | 1503 | __perf_counter_read, counter, 1); |
1376 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | 1504 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { |
1377 | update_counter_times(counter); | 1505 | update_counter_times(counter); |
1378 | } | 1506 | } |
@@ -1508,11 +1636,13 @@ static void free_counter(struct perf_counter *counter) | |||
1508 | { | 1636 | { |
1509 | perf_pending_sync(counter); | 1637 | perf_pending_sync(counter); |
1510 | 1638 | ||
1511 | atomic_dec(&nr_counters); | 1639 | if (!counter->parent) { |
1512 | if (counter->attr.mmap) | 1640 | atomic_dec(&nr_counters); |
1513 | atomic_dec(&nr_mmap_counters); | 1641 | if (counter->attr.mmap) |
1514 | if (counter->attr.comm) | 1642 | atomic_dec(&nr_mmap_counters); |
1515 | atomic_dec(&nr_comm_counters); | 1643 | if (counter->attr.comm) |
1644 | atomic_dec(&nr_comm_counters); | ||
1645 | } | ||
1516 | 1646 | ||
1517 | if (counter->destroy) | 1647 | if (counter->destroy) |
1518 | counter->destroy(counter); | 1648 | counter->destroy(counter); |
@@ -1751,6 +1881,14 @@ int perf_counter_task_disable(void) | |||
1751 | return 0; | 1881 | return 0; |
1752 | } | 1882 | } |
1753 | 1883 | ||
1884 | static int perf_counter_index(struct perf_counter *counter) | ||
1885 | { | ||
1886 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
1887 | return 0; | ||
1888 | |||
1889 | return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; | ||
1890 | } | ||
1891 | |||
1754 | /* | 1892 | /* |
1755 | * Callers need to ensure there can be no nesting of this function, otherwise | 1893 | * Callers need to ensure there can be no nesting of this function, otherwise |
1756 | * the seqlock logic goes bad. We can not serialize this because the arch | 1894 | * the seqlock logic goes bad. We can not serialize this because the arch |
@@ -1775,11 +1913,17 @@ void perf_counter_update_userpage(struct perf_counter *counter) | |||
1775 | preempt_disable(); | 1913 | preempt_disable(); |
1776 | ++userpg->lock; | 1914 | ++userpg->lock; |
1777 | barrier(); | 1915 | barrier(); |
1778 | userpg->index = counter->hw.idx; | 1916 | userpg->index = perf_counter_index(counter); |
1779 | userpg->offset = atomic64_read(&counter->count); | 1917 | userpg->offset = atomic64_read(&counter->count); |
1780 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | 1918 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) |
1781 | userpg->offset -= atomic64_read(&counter->hw.prev_count); | 1919 | userpg->offset -= atomic64_read(&counter->hw.prev_count); |
1782 | 1920 | ||
1921 | userpg->time_enabled = counter->total_time_enabled + | ||
1922 | atomic64_read(&counter->child_total_time_enabled); | ||
1923 | |||
1924 | userpg->time_running = counter->total_time_running + | ||
1925 | atomic64_read(&counter->child_total_time_running); | ||
1926 | |||
1783 | barrier(); | 1927 | barrier(); |
1784 | ++userpg->lock; | 1928 | ++userpg->lock; |
1785 | preempt_enable(); | 1929 | preempt_enable(); |
@@ -2483,15 +2627,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2483 | u32 cpu, reserved; | 2627 | u32 cpu, reserved; |
2484 | } cpu_entry; | 2628 | } cpu_entry; |
2485 | 2629 | ||
2486 | header.type = 0; | 2630 | header.type = PERF_EVENT_SAMPLE; |
2487 | header.size = sizeof(header); | 2631 | header.size = sizeof(header); |
2488 | 2632 | ||
2489 | header.misc = PERF_EVENT_MISC_OVERFLOW; | 2633 | header.misc = 0; |
2490 | header.misc |= perf_misc_flags(data->regs); | 2634 | header.misc |= perf_misc_flags(data->regs); |
2491 | 2635 | ||
2492 | if (sample_type & PERF_SAMPLE_IP) { | 2636 | if (sample_type & PERF_SAMPLE_IP) { |
2493 | ip = perf_instruction_pointer(data->regs); | 2637 | ip = perf_instruction_pointer(data->regs); |
2494 | header.type |= PERF_SAMPLE_IP; | ||
2495 | header.size += sizeof(ip); | 2638 | header.size += sizeof(ip); |
2496 | } | 2639 | } |
2497 | 2640 | ||
@@ -2500,7 +2643,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2500 | tid_entry.pid = perf_counter_pid(counter, current); | 2643 | tid_entry.pid = perf_counter_pid(counter, current); |
2501 | tid_entry.tid = perf_counter_tid(counter, current); | 2644 | tid_entry.tid = perf_counter_tid(counter, current); |
2502 | 2645 | ||
2503 | header.type |= PERF_SAMPLE_TID; | ||
2504 | header.size += sizeof(tid_entry); | 2646 | header.size += sizeof(tid_entry); |
2505 | } | 2647 | } |
2506 | 2648 | ||
@@ -2510,34 +2652,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2510 | */ | 2652 | */ |
2511 | time = sched_clock(); | 2653 | time = sched_clock(); |
2512 | 2654 | ||
2513 | header.type |= PERF_SAMPLE_TIME; | ||
2514 | header.size += sizeof(u64); | 2655 | header.size += sizeof(u64); |
2515 | } | 2656 | } |
2516 | 2657 | ||
2517 | if (sample_type & PERF_SAMPLE_ADDR) { | 2658 | if (sample_type & PERF_SAMPLE_ADDR) |
2518 | header.type |= PERF_SAMPLE_ADDR; | ||
2519 | header.size += sizeof(u64); | 2659 | header.size += sizeof(u64); |
2520 | } | ||
2521 | 2660 | ||
2522 | if (sample_type & PERF_SAMPLE_ID) { | 2661 | if (sample_type & PERF_SAMPLE_ID) |
2523 | header.type |= PERF_SAMPLE_ID; | ||
2524 | header.size += sizeof(u64); | 2662 | header.size += sizeof(u64); |
2525 | } | ||
2526 | 2663 | ||
2527 | if (sample_type & PERF_SAMPLE_CPU) { | 2664 | if (sample_type & PERF_SAMPLE_CPU) { |
2528 | header.type |= PERF_SAMPLE_CPU; | ||
2529 | header.size += sizeof(cpu_entry); | 2665 | header.size += sizeof(cpu_entry); |
2530 | 2666 | ||
2531 | cpu_entry.cpu = raw_smp_processor_id(); | 2667 | cpu_entry.cpu = raw_smp_processor_id(); |
2532 | } | 2668 | } |
2533 | 2669 | ||
2534 | if (sample_type & PERF_SAMPLE_PERIOD) { | 2670 | if (sample_type & PERF_SAMPLE_PERIOD) |
2535 | header.type |= PERF_SAMPLE_PERIOD; | ||
2536 | header.size += sizeof(u64); | 2671 | header.size += sizeof(u64); |
2537 | } | ||
2538 | 2672 | ||
2539 | if (sample_type & PERF_SAMPLE_GROUP) { | 2673 | if (sample_type & PERF_SAMPLE_GROUP) { |
2540 | header.type |= PERF_SAMPLE_GROUP; | ||
2541 | header.size += sizeof(u64) + | 2674 | header.size += sizeof(u64) + |
2542 | counter->nr_siblings * sizeof(group_entry); | 2675 | counter->nr_siblings * sizeof(group_entry); |
2543 | } | 2676 | } |
@@ -2547,10 +2680,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2547 | 2680 | ||
2548 | if (callchain) { | 2681 | if (callchain) { |
2549 | callchain_size = (1 + callchain->nr) * sizeof(u64); | 2682 | callchain_size = (1 + callchain->nr) * sizeof(u64); |
2550 | |||
2551 | header.type |= PERF_SAMPLE_CALLCHAIN; | ||
2552 | header.size += callchain_size; | 2683 | header.size += callchain_size; |
2553 | } | 2684 | } else |
2685 | header.size += sizeof(u64); | ||
2554 | } | 2686 | } |
2555 | 2687 | ||
2556 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | 2688 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); |
@@ -2601,13 +2733,79 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, | |||
2601 | } | 2733 | } |
2602 | } | 2734 | } |
2603 | 2735 | ||
2604 | if (callchain) | 2736 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
2605 | perf_output_copy(&handle, callchain, callchain_size); | 2737 | if (callchain) |
2738 | perf_output_copy(&handle, callchain, callchain_size); | ||
2739 | else { | ||
2740 | u64 nr = 0; | ||
2741 | perf_output_put(&handle, nr); | ||
2742 | } | ||
2743 | } | ||
2606 | 2744 | ||
2607 | perf_output_end(&handle); | 2745 | perf_output_end(&handle); |
2608 | } | 2746 | } |
2609 | 2747 | ||
2610 | /* | 2748 | /* |
2749 | * read event | ||
2750 | */ | ||
2751 | |||
2752 | struct perf_read_event { | ||
2753 | struct perf_event_header header; | ||
2754 | |||
2755 | u32 pid; | ||
2756 | u32 tid; | ||
2757 | u64 value; | ||
2758 | u64 format[3]; | ||
2759 | }; | ||
2760 | |||
2761 | static void | ||
2762 | perf_counter_read_event(struct perf_counter *counter, | ||
2763 | struct task_struct *task) | ||
2764 | { | ||
2765 | struct perf_output_handle handle; | ||
2766 | struct perf_read_event event = { | ||
2767 | .header = { | ||
2768 | .type = PERF_EVENT_READ, | ||
2769 | .misc = 0, | ||
2770 | .size = sizeof(event) - sizeof(event.format), | ||
2771 | }, | ||
2772 | .pid = perf_counter_pid(counter, task), | ||
2773 | .tid = perf_counter_tid(counter, task), | ||
2774 | .value = atomic64_read(&counter->count), | ||
2775 | }; | ||
2776 | int ret, i = 0; | ||
2777 | |||
2778 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | ||
2779 | event.header.size += sizeof(u64); | ||
2780 | event.format[i++] = counter->total_time_enabled; | ||
2781 | } | ||
2782 | |||
2783 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
2784 | event.header.size += sizeof(u64); | ||
2785 | event.format[i++] = counter->total_time_running; | ||
2786 | } | ||
2787 | |||
2788 | if (counter->attr.read_format & PERF_FORMAT_ID) { | ||
2789 | u64 id; | ||
2790 | |||
2791 | event.header.size += sizeof(u64); | ||
2792 | if (counter->parent) | ||
2793 | id = counter->parent->id; | ||
2794 | else | ||
2795 | id = counter->id; | ||
2796 | |||
2797 | event.format[i++] = id; | ||
2798 | } | ||
2799 | |||
2800 | ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); | ||
2801 | if (ret) | ||
2802 | return; | ||
2803 | |||
2804 | perf_output_copy(&handle, &event, event.header.size); | ||
2805 | perf_output_end(&handle); | ||
2806 | } | ||
2807 | |||
2808 | /* | ||
2611 | * fork tracking | 2809 | * fork tracking |
2612 | */ | 2810 | */ |
2613 | 2811 | ||
@@ -2798,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task) | |||
2798 | { | 2996 | { |
2799 | struct perf_comm_event comm_event; | 2997 | struct perf_comm_event comm_event; |
2800 | 2998 | ||
2999 | if (task->perf_counter_ctxp) | ||
3000 | perf_counter_enable_on_exec(task); | ||
3001 | |||
2801 | if (!atomic_read(&nr_comm_counters)) | 3002 | if (!atomic_read(&nr_comm_counters)) |
2802 | return; | 3003 | return; |
2803 | 3004 | ||
@@ -3317,8 +3518,8 @@ out: | |||
3317 | put_cpu_var(perf_cpu_context); | 3518 | put_cpu_var(perf_cpu_context); |
3318 | } | 3519 | } |
3319 | 3520 | ||
3320 | void | 3521 | void __perf_swcounter_event(u32 event, u64 nr, int nmi, |
3321 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | 3522 | struct pt_regs *regs, u64 addr) |
3322 | { | 3523 | { |
3323 | struct perf_sample_data data = { | 3524 | struct perf_sample_data data = { |
3324 | .regs = regs, | 3525 | .regs = regs, |
@@ -3509,9 +3710,21 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | |||
3509 | } | 3710 | } |
3510 | #endif | 3711 | #endif |
3511 | 3712 | ||
3713 | atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; | ||
3714 | |||
3715 | static void sw_perf_counter_destroy(struct perf_counter *counter) | ||
3716 | { | ||
3717 | u64 event = counter->attr.config; | ||
3718 | |||
3719 | WARN_ON(counter->parent); | ||
3720 | |||
3721 | atomic_dec(&perf_swcounter_enabled[event]); | ||
3722 | } | ||
3723 | |||
3512 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | 3724 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) |
3513 | { | 3725 | { |
3514 | const struct pmu *pmu = NULL; | 3726 | const struct pmu *pmu = NULL; |
3727 | u64 event = counter->attr.config; | ||
3515 | 3728 | ||
3516 | /* | 3729 | /* |
3517 | * Software counters (currently) can't in general distinguish | 3730 | * Software counters (currently) can't in general distinguish |
@@ -3520,7 +3733,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | |||
3520 | * to be kernel events, and page faults are never hypervisor | 3733 | * to be kernel events, and page faults are never hypervisor |
3521 | * events. | 3734 | * events. |
3522 | */ | 3735 | */ |
3523 | switch (counter->attr.config) { | 3736 | switch (event) { |
3524 | case PERF_COUNT_SW_CPU_CLOCK: | 3737 | case PERF_COUNT_SW_CPU_CLOCK: |
3525 | pmu = &perf_ops_cpu_clock; | 3738 | pmu = &perf_ops_cpu_clock; |
3526 | 3739 | ||
@@ -3541,6 +3754,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | |||
3541 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | 3754 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: |
3542 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | 3755 | case PERF_COUNT_SW_CONTEXT_SWITCHES: |
3543 | case PERF_COUNT_SW_CPU_MIGRATIONS: | 3756 | case PERF_COUNT_SW_CPU_MIGRATIONS: |
3757 | if (!counter->parent) { | ||
3758 | atomic_inc(&perf_swcounter_enabled[event]); | ||
3759 | counter->destroy = sw_perf_counter_destroy; | ||
3760 | } | ||
3544 | pmu = &perf_ops_generic; | 3761 | pmu = &perf_ops_generic; |
3545 | break; | 3762 | break; |
3546 | } | 3763 | } |
@@ -3556,6 +3773,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3556 | int cpu, | 3773 | int cpu, |
3557 | struct perf_counter_context *ctx, | 3774 | struct perf_counter_context *ctx, |
3558 | struct perf_counter *group_leader, | 3775 | struct perf_counter *group_leader, |
3776 | struct perf_counter *parent_counter, | ||
3559 | gfp_t gfpflags) | 3777 | gfp_t gfpflags) |
3560 | { | 3778 | { |
3561 | const struct pmu *pmu; | 3779 | const struct pmu *pmu; |
@@ -3591,6 +3809,8 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3591 | counter->ctx = ctx; | 3809 | counter->ctx = ctx; |
3592 | counter->oncpu = -1; | 3810 | counter->oncpu = -1; |
3593 | 3811 | ||
3812 | counter->parent = parent_counter; | ||
3813 | |||
3594 | counter->ns = get_pid_ns(current->nsproxy->pid_ns); | 3814 | counter->ns = get_pid_ns(current->nsproxy->pid_ns); |
3595 | counter->id = atomic64_inc_return(&perf_counter_id); | 3815 | counter->id = atomic64_inc_return(&perf_counter_id); |
3596 | 3816 | ||
@@ -3648,11 +3868,13 @@ done: | |||
3648 | 3868 | ||
3649 | counter->pmu = pmu; | 3869 | counter->pmu = pmu; |
3650 | 3870 | ||
3651 | atomic_inc(&nr_counters); | 3871 | if (!counter->parent) { |
3652 | if (counter->attr.mmap) | 3872 | atomic_inc(&nr_counters); |
3653 | atomic_inc(&nr_mmap_counters); | 3873 | if (counter->attr.mmap) |
3654 | if (counter->attr.comm) | 3874 | atomic_inc(&nr_mmap_counters); |
3655 | atomic_inc(&nr_comm_counters); | 3875 | if (counter->attr.comm) |
3876 | atomic_inc(&nr_comm_counters); | ||
3877 | } | ||
3656 | 3878 | ||
3657 | return counter; | 3879 | return counter; |
3658 | } | 3880 | } |
@@ -3815,7 +4037,7 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
3815 | } | 4037 | } |
3816 | 4038 | ||
3817 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, | 4039 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, |
3818 | GFP_KERNEL); | 4040 | NULL, GFP_KERNEL); |
3819 | ret = PTR_ERR(counter); | 4041 | ret = PTR_ERR(counter); |
3820 | if (IS_ERR(counter)) | 4042 | if (IS_ERR(counter)) |
3821 | goto err_put_context; | 4043 | goto err_put_context; |
@@ -3881,7 +4103,8 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3881 | 4103 | ||
3882 | child_counter = perf_counter_alloc(&parent_counter->attr, | 4104 | child_counter = perf_counter_alloc(&parent_counter->attr, |
3883 | parent_counter->cpu, child_ctx, | 4105 | parent_counter->cpu, child_ctx, |
3884 | group_leader, GFP_KERNEL); | 4106 | group_leader, parent_counter, |
4107 | GFP_KERNEL); | ||
3885 | if (IS_ERR(child_counter)) | 4108 | if (IS_ERR(child_counter)) |
3886 | return child_counter; | 4109 | return child_counter; |
3887 | get_ctx(child_ctx); | 4110 | get_ctx(child_ctx); |
@@ -3904,12 +4127,6 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3904 | */ | 4127 | */ |
3905 | add_counter_to_ctx(child_counter, child_ctx); | 4128 | add_counter_to_ctx(child_counter, child_ctx); |
3906 | 4129 | ||
3907 | child_counter->parent = parent_counter; | ||
3908 | /* | ||
3909 | * inherit into child's child as well: | ||
3910 | */ | ||
3911 | child_counter->attr.inherit = 1; | ||
3912 | |||
3913 | /* | 4130 | /* |
3914 | * Get a reference to the parent filp - we will fput it | 4131 | * Get a reference to the parent filp - we will fput it |
3915 | * when the child counter exits. This is safe to do because | 4132 | * when the child counter exits. This is safe to do because |
@@ -3953,10 +4170,14 @@ static int inherit_group(struct perf_counter *parent_counter, | |||
3953 | } | 4170 | } |
3954 | 4171 | ||
3955 | static void sync_child_counter(struct perf_counter *child_counter, | 4172 | static void sync_child_counter(struct perf_counter *child_counter, |
3956 | struct perf_counter *parent_counter) | 4173 | struct task_struct *child) |
3957 | { | 4174 | { |
4175 | struct perf_counter *parent_counter = child_counter->parent; | ||
3958 | u64 child_val; | 4176 | u64 child_val; |
3959 | 4177 | ||
4178 | if (child_counter->attr.inherit_stat) | ||
4179 | perf_counter_read_event(child_counter, child); | ||
4180 | |||
3960 | child_val = atomic64_read(&child_counter->count); | 4181 | child_val = atomic64_read(&child_counter->count); |
3961 | 4182 | ||
3962 | /* | 4183 | /* |
@@ -3985,7 +4206,8 @@ static void sync_child_counter(struct perf_counter *child_counter, | |||
3985 | 4206 | ||
3986 | static void | 4207 | static void |
3987 | __perf_counter_exit_task(struct perf_counter *child_counter, | 4208 | __perf_counter_exit_task(struct perf_counter *child_counter, |
3988 | struct perf_counter_context *child_ctx) | 4209 | struct perf_counter_context *child_ctx, |
4210 | struct task_struct *child) | ||
3989 | { | 4211 | { |
3990 | struct perf_counter *parent_counter; | 4212 | struct perf_counter *parent_counter; |
3991 | 4213 | ||
@@ -3999,7 +4221,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter, | |||
3999 | * counters need to be zapped - but otherwise linger. | 4221 | * counters need to be zapped - but otherwise linger. |
4000 | */ | 4222 | */ |
4001 | if (parent_counter) { | 4223 | if (parent_counter) { |
4002 | sync_child_counter(child_counter, parent_counter); | 4224 | sync_child_counter(child_counter, child); |
4003 | free_counter(child_counter); | 4225 | free_counter(child_counter); |
4004 | } | 4226 | } |
4005 | } | 4227 | } |
@@ -4061,7 +4283,7 @@ void perf_counter_exit_task(struct task_struct *child) | |||
4061 | again: | 4283 | again: |
4062 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | 4284 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, |
4063 | list_entry) | 4285 | list_entry) |
4064 | __perf_counter_exit_task(child_counter, child_ctx); | 4286 | __perf_counter_exit_task(child_counter, child_ctx, child); |
4065 | 4287 | ||
4066 | /* | 4288 | /* |
4067 | * If the last counter was a group counter, it will have appended all | 4289 | * If the last counter was a group counter, it will have appended all |
diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS new file mode 100644 index 000000000000..c2ddcb3acbd0 --- /dev/null +++ b/tools/perf/CREDITS | |||
@@ -0,0 +1,30 @@ | |||
1 | Most of the infrastructure that 'perf' uses here has been reused | ||
2 | from the Git project, as of version: | ||
3 | |||
4 | 66996ec: Sync with 1.6.2.4 | ||
5 | |||
6 | Here is an (incomplete!) list of main contributors to those files | ||
7 | in util/* and elsewhere: | ||
8 | |||
9 | Alex Riesen | ||
10 | Christian Couder | ||
11 | Dmitry Potapov | ||
12 | Jeff King | ||
13 | Johannes Schindelin | ||
14 | Johannes Sixt | ||
15 | Junio C Hamano | ||
16 | Linus Torvalds | ||
17 | Matthias Kestenholz | ||
18 | Michal Ostrowski | ||
19 | Miklos Vajna | ||
20 | Petr Baudis | ||
21 | Pierre Habouzit | ||
22 | René Scharfe | ||
23 | Samuel Tardieu | ||
24 | Shawn O. Pearce | ||
25 | Steffen Prohaska | ||
26 | Steve Haslam | ||
27 | |||
28 | Thanks guys! | ||
29 | |||
30 | The full history of the files can be found in the upstream Git commits. | ||
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 52d3fc6846a9..8aa3f8c88707 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -13,13 +13,25 @@ SYNOPSIS | |||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command displays the performance counter profile information recorded | 15 | This command displays the performance counter profile information recorded |
16 | via perf report. | 16 | via perf record. |
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
19 | ------- | 19 | ------- |
20 | -i:: | 20 | -i:: |
21 | --input=:: | 21 | --input=:: |
22 | Input file name. (default: perf.data) | 22 | Input file name. (default: perf.data) |
23 | -d:: | ||
24 | --dsos=:: | ||
25 | Only consider symbols in these dsos. CSV that understands | ||
26 | file://filename entries. | ||
27 | -C:: | ||
28 | --comms=:: | ||
29 | Only consider symbols in these comms. CSV that understands | ||
30 | file://filename entries. | ||
31 | -S:: | ||
32 | --symbols=:: | ||
33 | Only consider these symbols. CSV that understands | ||
34 | file://filename entries. | ||
23 | 35 | ||
24 | SEE ALSO | 36 | SEE ALSO |
25 | -------- | 37 | -------- |
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c368a72721d7..0d74346d21ab 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | 11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> |
12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] | 12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] |
13 | 13 | ||
14 | DESCRIPTION | 14 | DESCRIPTION |
15 | ----------- | 15 | ----------- |
@@ -40,7 +40,7 @@ OPTIONS | |||
40 | -a:: | 40 | -a:: |
41 | system-wide collection | 41 | system-wide collection |
42 | 42 | ||
43 | -l:: | 43 | -S:: |
44 | scale counter values | 44 | scale counter values |
45 | 45 | ||
46 | EXAMPLES | 46 | EXAMPLES |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef49913..9c6d0ae3708e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -290,7 +290,7 @@ LIB_FILE=libperf.a | |||
290 | 290 | ||
291 | LIB_H += ../../include/linux/perf_counter.h | 291 | LIB_H += ../../include/linux/perf_counter.h |
292 | LIB_H += perf.h | 292 | LIB_H += perf.h |
293 | LIB_H += types.h | 293 | LIB_H += util/types.h |
294 | LIB_H += util/list.h | 294 | LIB_H += util/list.h |
295 | LIB_H += util/rbtree.h | 295 | LIB_H += util/rbtree.h |
296 | LIB_H += util/levenshtein.h | 296 | LIB_H += util/levenshtein.h |
@@ -301,6 +301,7 @@ LIB_H += util/util.h | |||
301 | LIB_H += util/help.h | 301 | LIB_H += util/help.h |
302 | LIB_H += util/strbuf.h | 302 | LIB_H += util/strbuf.h |
303 | LIB_H += util/string.h | 303 | LIB_H += util/string.h |
304 | LIB_H += util/strlist.h | ||
304 | LIB_H += util/run-command.h | 305 | LIB_H += util/run-command.h |
305 | LIB_H += util/sigchain.h | 306 | LIB_H += util/sigchain.h |
306 | LIB_H += util/symbol.h | 307 | LIB_H += util/symbol.h |
@@ -322,12 +323,15 @@ LIB_OBJS += util/run-command.o | |||
322 | LIB_OBJS += util/quote.o | 323 | LIB_OBJS += util/quote.o |
323 | LIB_OBJS += util/strbuf.o | 324 | LIB_OBJS += util/strbuf.o |
324 | LIB_OBJS += util/string.o | 325 | LIB_OBJS += util/string.o |
326 | LIB_OBJS += util/strlist.o | ||
325 | LIB_OBJS += util/usage.o | 327 | LIB_OBJS += util/usage.o |
326 | LIB_OBJS += util/wrapper.o | 328 | LIB_OBJS += util/wrapper.o |
327 | LIB_OBJS += util/sigchain.o | 329 | LIB_OBJS += util/sigchain.o |
328 | LIB_OBJS += util/symbol.o | 330 | LIB_OBJS += util/symbol.o |
329 | LIB_OBJS += util/color.o | 331 | LIB_OBJS += util/color.o |
330 | LIB_OBJS += util/pager.o | 332 | LIB_OBJS += util/pager.o |
333 | LIB_OBJS += util/header.o | ||
334 | LIB_OBJS += util/callchain.o | ||
331 | 335 | ||
332 | BUILTIN_OBJS += builtin-annotate.o | 336 | BUILTIN_OBJS += builtin-annotate.o |
333 | BUILTIN_OBJS += builtin-help.o | 337 | BUILTIN_OBJS += builtin-help.o |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7e58e3ad1508..722c0f54e549 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -855,7 +855,7 @@ static unsigned long total = 0, | |||
855 | total_unknown = 0; | 855 | total_unknown = 0; |
856 | 856 | ||
857 | static int | 857 | static int |
858 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | 858 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) |
859 | { | 859 | { |
860 | char level; | 860 | char level; |
861 | int show = 0; | 861 | int show = 0; |
@@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) | |||
1013 | static int | 1013 | static int |
1014 | process_event(event_t *event, unsigned long offset, unsigned long head) | 1014 | process_event(event_t *event, unsigned long offset, unsigned long head) |
1015 | { | 1015 | { |
1016 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | ||
1017 | return process_overflow_event(event, offset, head); | ||
1018 | |||
1019 | switch (event->header.type) { | 1016 | switch (event->header.type) { |
1017 | case PERF_EVENT_SAMPLE: | ||
1018 | return process_sample_event(event, offset, head); | ||
1019 | |||
1020 | case PERF_EVENT_MMAP: | 1020 | case PERF_EVENT_MMAP: |
1021 | return process_mmap_event(event, offset, head); | 1021 | return process_mmap_event(event, offset, head); |
1022 | 1022 | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d7ebbd757543..d18546f37d7c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include "util/parse-events.h" | 14 | #include "util/parse-events.h" |
15 | #include "util/string.h" | 15 | #include "util/string.h" |
16 | 16 | ||
17 | #include "util/header.h" | ||
18 | |||
17 | #include <unistd.h> | 19 | #include <unistd.h> |
18 | #include <sched.h> | 20 | #include <sched.h> |
19 | 21 | ||
@@ -39,6 +41,8 @@ static int force = 0; | |||
39 | static int append_file = 0; | 41 | static int append_file = 0; |
40 | static int call_graph = 0; | 42 | static int call_graph = 0; |
41 | static int verbose = 0; | 43 | static int verbose = 0; |
44 | static int inherit_stat = 0; | ||
45 | static int no_samples = 0; | ||
42 | 46 | ||
43 | static long samples; | 47 | static long samples; |
44 | static struct timeval last_read; | 48 | static struct timeval last_read; |
@@ -52,7 +56,8 @@ static int nr_poll; | |||
52 | static int nr_cpu; | 56 | static int nr_cpu; |
53 | 57 | ||
54 | static int file_new = 1; | 58 | static int file_new = 1; |
55 | static struct perf_file_header file_header; | 59 | |
60 | struct perf_header *header; | ||
56 | 61 | ||
57 | struct mmap_event { | 62 | struct mmap_event { |
58 | struct perf_event_header header; | 63 | struct perf_event_header header; |
@@ -306,12 +311,11 @@ static void pid_synthesize_mmap_samples(pid_t pid) | |||
306 | continue; | 311 | continue; |
307 | pbf += n + 3; | 312 | pbf += n + 3; |
308 | if (*pbf == 'x') { /* vm_exec */ | 313 | if (*pbf == 'x') { /* vm_exec */ |
309 | char *execname = strrchr(bf, ' '); | 314 | char *execname = strchr(bf, '/'); |
310 | 315 | ||
311 | if (execname == NULL || execname[1] != '/') | 316 | if (execname == NULL) |
312 | continue; | 317 | continue; |
313 | 318 | ||
314 | execname += 1; | ||
315 | size = strlen(execname); | 319 | size = strlen(execname); |
316 | execname[size - 1] = '\0'; /* Remove \n */ | 320 | execname[size - 1] = '\0'; /* Remove \n */ |
317 | memcpy(mmap_ev.filename, execname, size); | 321 | memcpy(mmap_ev.filename, execname, size); |
@@ -329,7 +333,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) | |||
329 | fclose(fp); | 333 | fclose(fp); |
330 | } | 334 | } |
331 | 335 | ||
332 | static void synthesize_samples(void) | 336 | static void synthesize_all(void) |
333 | { | 337 | { |
334 | DIR *proc; | 338 | DIR *proc; |
335 | struct dirent dirent, *next; | 339 | struct dirent dirent, *next; |
@@ -353,10 +357,35 @@ static void synthesize_samples(void) | |||
353 | 357 | ||
354 | static int group_fd; | 358 | static int group_fd; |
355 | 359 | ||
360 | static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) | ||
361 | { | ||
362 | struct perf_header_attr *h_attr; | ||
363 | |||
364 | if (nr < header->attrs) { | ||
365 | h_attr = header->attr[nr]; | ||
366 | } else { | ||
367 | h_attr = perf_header_attr__new(a); | ||
368 | perf_header__add_attr(header, h_attr); | ||
369 | } | ||
370 | |||
371 | return h_attr; | ||
372 | } | ||
373 | |||
356 | static void create_counter(int counter, int cpu, pid_t pid) | 374 | static void create_counter(int counter, int cpu, pid_t pid) |
357 | { | 375 | { |
358 | struct perf_counter_attr *attr = attrs + counter; | 376 | struct perf_counter_attr *attr = attrs + counter; |
359 | int track = 1; | 377 | struct perf_header_attr *h_attr; |
378 | int track = !counter; /* only the first counter needs these */ | ||
379 | struct { | ||
380 | u64 count; | ||
381 | u64 time_enabled; | ||
382 | u64 time_running; | ||
383 | u64 id; | ||
384 | } read_data; | ||
385 | |||
386 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
387 | PERF_FORMAT_TOTAL_TIME_RUNNING | | ||
388 | PERF_FORMAT_ID; | ||
360 | 389 | ||
361 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | 390 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; |
362 | 391 | ||
@@ -366,25 +395,20 @@ static void create_counter(int counter, int cpu, pid_t pid) | |||
366 | attr->sample_freq = freq; | 395 | attr->sample_freq = freq; |
367 | } | 396 | } |
368 | 397 | ||
398 | if (no_samples) | ||
399 | attr->sample_freq = 0; | ||
400 | |||
401 | if (inherit_stat) | ||
402 | attr->inherit_stat = 1; | ||
403 | |||
369 | if (call_graph) | 404 | if (call_graph) |
370 | attr->sample_type |= PERF_SAMPLE_CALLCHAIN; | 405 | attr->sample_type |= PERF_SAMPLE_CALLCHAIN; |
371 | 406 | ||
372 | if (file_new) { | ||
373 | file_header.sample_type = attr->sample_type; | ||
374 | } else { | ||
375 | if (file_header.sample_type != attr->sample_type) { | ||
376 | fprintf(stderr, "incompatible append\n"); | ||
377 | exit(-1); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | attr->mmap = track; | 407 | attr->mmap = track; |
382 | attr->comm = track; | 408 | attr->comm = track; |
383 | attr->inherit = (cpu < 0) && inherit; | 409 | attr->inherit = (cpu < 0) && inherit; |
384 | attr->disabled = 1; | 410 | attr->disabled = 1; |
385 | 411 | ||
386 | track = 0; /* only the first counter needs these */ | ||
387 | |||
388 | try_again: | 412 | try_again: |
389 | fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); | 413 | fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); |
390 | 414 | ||
@@ -415,6 +439,22 @@ try_again: | |||
415 | exit(-1); | 439 | exit(-1); |
416 | } | 440 | } |
417 | 441 | ||
442 | h_attr = get_header_attr(attr, counter); | ||
443 | |||
444 | if (!file_new) { | ||
445 | if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { | ||
446 | fprintf(stderr, "incompatible append\n"); | ||
447 | exit(-1); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { | ||
452 | perror("Unable to read perf file descriptor\n"); | ||
453 | exit(-1); | ||
454 | } | ||
455 | |||
456 | perf_header_attr__add_id(h_attr, read_data.id); | ||
457 | |||
418 | assert(fd[nr_cpu][counter] >= 0); | 458 | assert(fd[nr_cpu][counter] >= 0); |
419 | fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); | 459 | fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); |
420 | 460 | ||
@@ -445,11 +485,6 @@ static void open_counters(int cpu, pid_t pid) | |||
445 | { | 485 | { |
446 | int counter; | 486 | int counter; |
447 | 487 | ||
448 | if (pid > 0) { | ||
449 | pid_synthesize_comm_event(pid, 0); | ||
450 | pid_synthesize_mmap_samples(pid); | ||
451 | } | ||
452 | |||
453 | group_fd = -1; | 488 | group_fd = -1; |
454 | for (counter = 0; counter < nr_counters; counter++) | 489 | for (counter = 0; counter < nr_counters; counter++) |
455 | create_counter(counter, cpu, pid); | 490 | create_counter(counter, cpu, pid); |
@@ -459,17 +494,16 @@ static void open_counters(int cpu, pid_t pid) | |||
459 | 494 | ||
460 | static void atexit_header(void) | 495 | static void atexit_header(void) |
461 | { | 496 | { |
462 | file_header.data_size += bytes_written; | 497 | header->data_size += bytes_written; |
463 | 498 | ||
464 | if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) | 499 | perf_header__write(header, output); |
465 | perror("failed to write on file headers"); | ||
466 | } | 500 | } |
467 | 501 | ||
468 | static int __cmd_record(int argc, const char **argv) | 502 | static int __cmd_record(int argc, const char **argv) |
469 | { | 503 | { |
470 | int i, counter; | 504 | int i, counter; |
471 | struct stat st; | 505 | struct stat st; |
472 | pid_t pid; | 506 | pid_t pid = 0; |
473 | int flags; | 507 | int flags; |
474 | int ret; | 508 | int ret; |
475 | 509 | ||
@@ -500,22 +534,31 @@ static int __cmd_record(int argc, const char **argv) | |||
500 | exit(-1); | 534 | exit(-1); |
501 | } | 535 | } |
502 | 536 | ||
503 | if (!file_new) { | 537 | if (!file_new) |
504 | if (read(output, &file_header, sizeof(file_header)) == -1) { | 538 | header = perf_header__read(output); |
505 | perror("failed to read file headers"); | 539 | else |
506 | exit(-1); | 540 | header = perf_header__new(); |
507 | } | ||
508 | |||
509 | lseek(output, file_header.data_size, SEEK_CUR); | ||
510 | } | ||
511 | 541 | ||
512 | atexit(atexit_header); | 542 | atexit(atexit_header); |
513 | 543 | ||
514 | if (!system_wide) { | 544 | if (!system_wide) { |
515 | open_counters(-1, target_pid != -1 ? target_pid : getpid()); | 545 | pid = target_pid; |
546 | if (pid == -1) | ||
547 | pid = getpid(); | ||
548 | |||
549 | open_counters(-1, pid); | ||
516 | } else for (i = 0; i < nr_cpus; i++) | 550 | } else for (i = 0; i < nr_cpus; i++) |
517 | open_counters(i, target_pid); | 551 | open_counters(i, target_pid); |
518 | 552 | ||
553 | if (file_new) | ||
554 | perf_header__write(header, output); | ||
555 | |||
556 | if (!system_wide) { | ||
557 | pid_synthesize_comm_event(pid, 0); | ||
558 | pid_synthesize_mmap_samples(pid); | ||
559 | } else | ||
560 | synthesize_all(); | ||
561 | |||
519 | if (target_pid == -1 && argc) { | 562 | if (target_pid == -1 && argc) { |
520 | pid = fork(); | 563 | pid = fork(); |
521 | if (pid < 0) | 564 | if (pid < 0) |
@@ -539,10 +582,7 @@ static int __cmd_record(int argc, const char **argv) | |||
539 | } | 582 | } |
540 | } | 583 | } |
541 | 584 | ||
542 | if (system_wide) | 585 | for (;;) { |
543 | synthesize_samples(); | ||
544 | |||
545 | while (!done) { | ||
546 | int hits = samples; | 586 | int hits = samples; |
547 | 587 | ||
548 | for (i = 0; i < nr_cpu; i++) { | 588 | for (i = 0; i < nr_cpu; i++) { |
@@ -550,8 +590,11 @@ static int __cmd_record(int argc, const char **argv) | |||
550 | mmap_read(&mmap_array[i][counter]); | 590 | mmap_read(&mmap_array[i][counter]); |
551 | } | 591 | } |
552 | 592 | ||
553 | if (hits == samples) | 593 | if (hits == samples) { |
594 | if (done) | ||
595 | break; | ||
554 | ret = poll(event_array, nr_poll, 100); | 596 | ret = poll(event_array, nr_poll, 100); |
597 | } | ||
555 | } | 598 | } |
556 | 599 | ||
557 | /* | 600 | /* |
@@ -600,6 +643,10 @@ static const struct option options[] = { | |||
600 | "do call-graph (stack chain/backtrace) recording"), | 643 | "do call-graph (stack chain/backtrace) recording"), |
601 | OPT_BOOLEAN('v', "verbose", &verbose, | 644 | OPT_BOOLEAN('v', "verbose", &verbose, |
602 | "be more verbose (show counter open errors, etc)"), | 645 | "be more verbose (show counter open errors, etc)"), |
646 | OPT_BOOLEAN('s', "stat", &inherit_stat, | ||
647 | "per thread counts"), | ||
648 | OPT_BOOLEAN('n', "no-samples", &no_samples, | ||
649 | "don't sample"), | ||
603 | OPT_END() | 650 | OPT_END() |
604 | }; | 651 | }; |
605 | 652 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5eb5566f0c95..135b7837e6bf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -15,8 +15,11 @@ | |||
15 | #include "util/rbtree.h" | 15 | #include "util/rbtree.h" |
16 | #include "util/symbol.h" | 16 | #include "util/symbol.h" |
17 | #include "util/string.h" | 17 | #include "util/string.h" |
18 | #include "util/callchain.h" | ||
19 | #include "util/strlist.h" | ||
18 | 20 | ||
19 | #include "perf.h" | 21 | #include "perf.h" |
22 | #include "util/header.h" | ||
20 | 23 | ||
21 | #include "util/parse-options.h" | 24 | #include "util/parse-options.h" |
22 | #include "util/parse-events.h" | 25 | #include "util/parse-events.h" |
@@ -30,6 +33,8 @@ static char *vmlinux = NULL; | |||
30 | 33 | ||
31 | static char default_sort_order[] = "comm,dso"; | 34 | static char default_sort_order[] = "comm,dso"; |
32 | static char *sort_order = default_sort_order; | 35 | static char *sort_order = default_sort_order; |
36 | static char *dso_list_str, *comm_list_str, *sym_list_str; | ||
37 | static struct strlist *dso_list, *comm_list, *sym_list; | ||
33 | 38 | ||
34 | static int input; | 39 | static int input; |
35 | static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | 40 | static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; |
@@ -51,6 +56,9 @@ static char *parent_pattern = default_parent_pattern; | |||
51 | static regex_t parent_regex; | 56 | static regex_t parent_regex; |
52 | 57 | ||
53 | static int exclude_other = 1; | 58 | static int exclude_other = 1; |
59 | static int callchain; | ||
60 | |||
61 | static u64 sample_type; | ||
54 | 62 | ||
55 | struct ip_event { | 63 | struct ip_event { |
56 | struct perf_event_header header; | 64 | struct perf_event_header header; |
@@ -59,11 +67,6 @@ struct ip_event { | |||
59 | unsigned char __more_data[]; | 67 | unsigned char __more_data[]; |
60 | }; | 68 | }; |
61 | 69 | ||
62 | struct ip_callchain { | ||
63 | u64 nr; | ||
64 | u64 ips[0]; | ||
65 | }; | ||
66 | |||
67 | struct mmap_event { | 70 | struct mmap_event { |
68 | struct perf_event_header header; | 71 | struct perf_event_header header; |
69 | u32 pid, tid; | 72 | u32 pid, tid; |
@@ -97,6 +100,13 @@ struct lost_event { | |||
97 | u64 lost; | 100 | u64 lost; |
98 | }; | 101 | }; |
99 | 102 | ||
103 | struct read_event { | ||
104 | struct perf_event_header header; | ||
105 | u32 pid,tid; | ||
106 | u64 value; | ||
107 | u64 format[3]; | ||
108 | }; | ||
109 | |||
100 | typedef union event_union { | 110 | typedef union event_union { |
101 | struct perf_event_header header; | 111 | struct perf_event_header header; |
102 | struct ip_event ip; | 112 | struct ip_event ip; |
@@ -105,6 +115,7 @@ typedef union event_union { | |||
105 | struct fork_event fork; | 115 | struct fork_event fork; |
106 | struct period_event period; | 116 | struct period_event period; |
107 | struct lost_event lost; | 117 | struct lost_event lost; |
118 | struct read_event read; | ||
108 | } event_t; | 119 | } event_t; |
109 | 120 | ||
110 | static LIST_HEAD(dsos); | 121 | static LIST_HEAD(dsos); |
@@ -229,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip) | |||
229 | 240 | ||
230 | static inline int is_anon_memory(const char *filename) | 241 | static inline int is_anon_memory(const char *filename) |
231 | { | 242 | { |
232 | return strcmp(filename, "//anon") == 0; | 243 | return strcmp(filename, "//anon") == 0; |
233 | } | 244 | } |
234 | 245 | ||
235 | static struct map *map__new(struct mmap_event *event) | 246 | static struct map *map__new(struct mmap_event *event) |
@@ -400,9 +411,27 @@ static void thread__insert_map(struct thread *self, struct map *map) | |||
400 | 411 | ||
401 | list_for_each_entry_safe(pos, tmp, &self->maps, node) { | 412 | list_for_each_entry_safe(pos, tmp, &self->maps, node) { |
402 | if (map__overlap(pos, map)) { | 413 | if (map__overlap(pos, map)) { |
403 | list_del_init(&pos->node); | 414 | if (verbose >= 2) { |
404 | /* XXX leaks dsos */ | 415 | printf("overlapping maps:\n"); |
405 | free(pos); | 416 | map__fprintf(map, stdout); |
417 | map__fprintf(pos, stdout); | ||
418 | } | ||
419 | |||
420 | if (map->start <= pos->start && map->end > pos->start) | ||
421 | pos->start = map->end; | ||
422 | |||
423 | if (map->end >= pos->end && map->start < pos->end) | ||
424 | pos->end = map->start; | ||
425 | |||
426 | if (verbose >= 2) { | ||
427 | printf("after collision:\n"); | ||
428 | map__fprintf(pos, stdout); | ||
429 | } | ||
430 | |||
431 | if (pos->start >= pos->end) { | ||
432 | list_del_init(&pos->node); | ||
433 | free(pos); | ||
434 | } | ||
406 | } | 435 | } |
407 | } | 436 | } |
408 | 437 | ||
@@ -464,17 +493,19 @@ static size_t threads__fprintf(FILE *fp) | |||
464 | static struct rb_root hist; | 493 | static struct rb_root hist; |
465 | 494 | ||
466 | struct hist_entry { | 495 | struct hist_entry { |
467 | struct rb_node rb_node; | 496 | struct rb_node rb_node; |
468 | 497 | ||
469 | struct thread *thread; | 498 | struct thread *thread; |
470 | struct map *map; | 499 | struct map *map; |
471 | struct dso *dso; | 500 | struct dso *dso; |
472 | struct symbol *sym; | 501 | struct symbol *sym; |
473 | struct symbol *parent; | 502 | struct symbol *parent; |
474 | u64 ip; | 503 | u64 ip; |
475 | char level; | 504 | char level; |
476 | 505 | struct callchain_node callchain; | |
477 | u64 count; | 506 | struct rb_root sorted_chain; |
507 | |||
508 | u64 count; | ||
478 | }; | 509 | }; |
479 | 510 | ||
480 | /* | 511 | /* |
@@ -745,6 +776,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | |||
745 | } | 776 | } |
746 | 777 | ||
747 | static size_t | 778 | static size_t |
779 | callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) | ||
780 | { | ||
781 | struct callchain_list *chain; | ||
782 | size_t ret = 0; | ||
783 | |||
784 | if (!self) | ||
785 | return 0; | ||
786 | |||
787 | ret += callchain__fprintf(fp, self->parent, total_samples); | ||
788 | |||
789 | |||
790 | list_for_each_entry(chain, &self->val, list) | ||
791 | ret += fprintf(fp, " %p\n", (void *)chain->ip); | ||
792 | |||
793 | return ret; | ||
794 | } | ||
795 | |||
796 | static size_t | ||
797 | hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, | ||
798 | u64 total_samples) | ||
799 | { | ||
800 | struct rb_node *rb_node; | ||
801 | struct callchain_node *chain; | ||
802 | size_t ret = 0; | ||
803 | |||
804 | rb_node = rb_first(&self->sorted_chain); | ||
805 | while (rb_node) { | ||
806 | double percent; | ||
807 | |||
808 | chain = rb_entry(rb_node, struct callchain_node, rb_node); | ||
809 | percent = chain->hit * 100.0 / total_samples; | ||
810 | ret += fprintf(fp, " %6.2f%%\n", percent); | ||
811 | ret += callchain__fprintf(fp, chain, total_samples); | ||
812 | ret += fprintf(fp, "\n"); | ||
813 | rb_node = rb_next(rb_node); | ||
814 | } | ||
815 | |||
816 | return ret; | ||
817 | } | ||
818 | |||
819 | |||
820 | static size_t | ||
748 | hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) | 821 | hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) |
749 | { | 822 | { |
750 | struct sort_entry *se; | 823 | struct sort_entry *se; |
@@ -784,6 +857,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) | |||
784 | 857 | ||
785 | ret += fprintf(fp, "\n"); | 858 | ret += fprintf(fp, "\n"); |
786 | 859 | ||
860 | if (callchain) | ||
861 | hist_entry_callchain__fprintf(fp, self, total_samples); | ||
862 | |||
787 | return ret; | 863 | return ret; |
788 | } | 864 | } |
789 | 865 | ||
@@ -797,7 +873,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, | |||
797 | { | 873 | { |
798 | struct dso *dso = dsop ? *dsop : NULL; | 874 | struct dso *dso = dsop ? *dsop : NULL; |
799 | struct map *map = mapp ? *mapp : NULL; | 875 | struct map *map = mapp ? *mapp : NULL; |
800 | uint64_t ip = *ipp; | 876 | u64 ip = *ipp; |
801 | 877 | ||
802 | if (!thread) | 878 | if (!thread) |
803 | return NULL; | 879 | return NULL; |
@@ -814,7 +890,6 @@ resolve_symbol(struct thread *thread, struct map **mapp, | |||
814 | *mapp = map; | 890 | *mapp = map; |
815 | got_map: | 891 | got_map: |
816 | ip = map->map_ip(map, ip); | 892 | ip = map->map_ip(map, ip); |
817 | *ipp = ip; | ||
818 | 893 | ||
819 | dso = map->dso; | 894 | dso = map->dso; |
820 | } else { | 895 | } else { |
@@ -828,6 +903,8 @@ got_map: | |||
828 | dso = kernel_dso; | 903 | dso = kernel_dso; |
829 | } | 904 | } |
830 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); | 905 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); |
906 | dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); | ||
907 | *ipp = ip; | ||
831 | 908 | ||
832 | if (dsop) | 909 | if (dsop) |
833 | *dsop = dso; | 910 | *dsop = dso; |
@@ -867,6 +944,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | |||
867 | .level = level, | 944 | .level = level, |
868 | .count = count, | 945 | .count = count, |
869 | .parent = NULL, | 946 | .parent = NULL, |
947 | .sorted_chain = RB_ROOT | ||
870 | }; | 948 | }; |
871 | int cmp; | 949 | int cmp; |
872 | 950 | ||
@@ -909,6 +987,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | |||
909 | 987 | ||
910 | if (!cmp) { | 988 | if (!cmp) { |
911 | he->count += count; | 989 | he->count += count; |
990 | if (callchain) | ||
991 | append_chain(&he->callchain, chain); | ||
912 | return 0; | 992 | return 0; |
913 | } | 993 | } |
914 | 994 | ||
@@ -922,6 +1002,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | |||
922 | if (!he) | 1002 | if (!he) |
923 | return -ENOMEM; | 1003 | return -ENOMEM; |
924 | *he = entry; | 1004 | *he = entry; |
1005 | if (callchain) { | ||
1006 | callchain_init(&he->callchain); | ||
1007 | append_chain(&he->callchain, chain); | ||
1008 | } | ||
925 | rb_link_node(&he->rb_node, parent, p); | 1009 | rb_link_node(&he->rb_node, parent, p); |
926 | rb_insert_color(&he->rb_node, &hist); | 1010 | rb_insert_color(&he->rb_node, &hist); |
927 | 1011 | ||
@@ -998,6 +1082,9 @@ static void output__insert_entry(struct hist_entry *he) | |||
998 | struct rb_node *parent = NULL; | 1082 | struct rb_node *parent = NULL; |
999 | struct hist_entry *iter; | 1083 | struct hist_entry *iter; |
1000 | 1084 | ||
1085 | if (callchain) | ||
1086 | sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); | ||
1087 | |||
1001 | while (*p != NULL) { | 1088 | while (*p != NULL) { |
1002 | parent = *p; | 1089 | parent = *p; |
1003 | iter = rb_entry(parent, struct hist_entry, rb_node); | 1090 | iter = rb_entry(parent, struct hist_entry, rb_node); |
@@ -1115,7 +1202,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) | |||
1115 | } | 1202 | } |
1116 | 1203 | ||
1117 | static int | 1204 | static int |
1118 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | 1205 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) |
1119 | { | 1206 | { |
1120 | char level; | 1207 | char level; |
1121 | int show = 0; | 1208 | int show = 0; |
@@ -1127,12 +1214,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
1127 | void *more_data = event->ip.__more_data; | 1214 | void *more_data = event->ip.__more_data; |
1128 | struct ip_callchain *chain = NULL; | 1215 | struct ip_callchain *chain = NULL; |
1129 | 1216 | ||
1130 | if (event->header.type & PERF_SAMPLE_PERIOD) { | 1217 | if (sample_type & PERF_SAMPLE_PERIOD) { |
1131 | period = *(u64 *)more_data; | 1218 | period = *(u64 *)more_data; |
1132 | more_data += sizeof(u64); | 1219 | more_data += sizeof(u64); |
1133 | } | 1220 | } |
1134 | 1221 | ||
1135 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", | 1222 | dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", |
1136 | (void *)(offset + head), | 1223 | (void *)(offset + head), |
1137 | (void *)(long)(event->header.size), | 1224 | (void *)(long)(event->header.size), |
1138 | event->header.misc, | 1225 | event->header.misc, |
@@ -1140,7 +1227,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
1140 | (void *)(long)ip, | 1227 | (void *)(long)ip, |
1141 | (long long)period); | 1228 | (long long)period); |
1142 | 1229 | ||
1143 | if (event->header.type & PERF_SAMPLE_CALLCHAIN) { | 1230 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
1144 | int i; | 1231 | int i; |
1145 | 1232 | ||
1146 | chain = (void *)more_data; | 1233 | chain = (void *)more_data; |
@@ -1166,6 +1253,9 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
1166 | return -1; | 1253 | return -1; |
1167 | } | 1254 | } |
1168 | 1255 | ||
1256 | if (comm_list && !strlist__has_entry(comm_list, thread->comm)) | ||
1257 | return 0; | ||
1258 | |||
1169 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { | 1259 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { |
1170 | show = SHOW_KERNEL; | 1260 | show = SHOW_KERNEL; |
1171 | level = 'k'; | 1261 | level = 'k'; |
@@ -1188,6 +1278,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
1188 | if (show & show_mask) { | 1278 | if (show & show_mask) { |
1189 | struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); | 1279 | struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); |
1190 | 1280 | ||
1281 | if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) | ||
1282 | return 0; | ||
1283 | |||
1284 | if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) | ||
1285 | return 0; | ||
1286 | |||
1191 | if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { | 1287 | if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { |
1192 | eprintf("problem incrementing symbol count, skipping event\n"); | 1288 | eprintf("problem incrementing symbol count, skipping event\n"); |
1193 | return -1; | 1289 | return -1; |
@@ -1328,14 +1424,27 @@ static void trace_event(event_t *event) | |||
1328 | } | 1424 | } |
1329 | 1425 | ||
1330 | static int | 1426 | static int |
1427 | process_read_event(event_t *event, unsigned long offset, unsigned long head) | ||
1428 | { | ||
1429 | dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", | ||
1430 | (void *)(offset + head), | ||
1431 | (void *)(long)(event->header.size), | ||
1432 | event->read.pid, | ||
1433 | event->read.tid, | ||
1434 | event->read.value); | ||
1435 | |||
1436 | return 0; | ||
1437 | } | ||
1438 | |||
1439 | static int | ||
1331 | process_event(event_t *event, unsigned long offset, unsigned long head) | 1440 | process_event(event_t *event, unsigned long offset, unsigned long head) |
1332 | { | 1441 | { |
1333 | trace_event(event); | 1442 | trace_event(event); |
1334 | 1443 | ||
1335 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | ||
1336 | return process_overflow_event(event, offset, head); | ||
1337 | |||
1338 | switch (event->header.type) { | 1444 | switch (event->header.type) { |
1445 | case PERF_EVENT_SAMPLE: | ||
1446 | return process_sample_event(event, offset, head); | ||
1447 | |||
1339 | case PERF_EVENT_MMAP: | 1448 | case PERF_EVENT_MMAP: |
1340 | return process_mmap_event(event, offset, head); | 1449 | return process_mmap_event(event, offset, head); |
1341 | 1450 | ||
@@ -1351,6 +1460,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
1351 | case PERF_EVENT_LOST: | 1460 | case PERF_EVENT_LOST: |
1352 | return process_lost_event(event, offset, head); | 1461 | return process_lost_event(event, offset, head); |
1353 | 1462 | ||
1463 | case PERF_EVENT_READ: | ||
1464 | return process_read_event(event, offset, head); | ||
1465 | |||
1354 | /* | 1466 | /* |
1355 | * We dont process them right now but they are fine: | 1467 | * We dont process them right now but they are fine: |
1356 | */ | 1468 | */ |
@@ -1366,13 +1478,30 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
1366 | return 0; | 1478 | return 0; |
1367 | } | 1479 | } |
1368 | 1480 | ||
1369 | static struct perf_file_header file_header; | 1481 | static struct perf_header *header; |
1482 | |||
1483 | static u64 perf_header__sample_type(void) | ||
1484 | { | ||
1485 | u64 sample_type = 0; | ||
1486 | int i; | ||
1487 | |||
1488 | for (i = 0; i < header->attrs; i++) { | ||
1489 | struct perf_header_attr *attr = header->attr[i]; | ||
1490 | |||
1491 | if (!sample_type) | ||
1492 | sample_type = attr->attr.sample_type; | ||
1493 | else if (sample_type != attr->attr.sample_type) | ||
1494 | die("non matching sample_type"); | ||
1495 | } | ||
1496 | |||
1497 | return sample_type; | ||
1498 | } | ||
1370 | 1499 | ||
1371 | static int __cmd_report(void) | 1500 | static int __cmd_report(void) |
1372 | { | 1501 | { |
1373 | int ret, rc = EXIT_FAILURE; | 1502 | int ret, rc = EXIT_FAILURE; |
1374 | unsigned long offset = 0; | 1503 | unsigned long offset = 0; |
1375 | unsigned long head = sizeof(file_header); | 1504 | unsigned long head, shift; |
1376 | struct stat stat; | 1505 | struct stat stat; |
1377 | event_t *event; | 1506 | event_t *event; |
1378 | uint32_t size; | 1507 | uint32_t size; |
@@ -1400,13 +1529,12 @@ static int __cmd_report(void) | |||
1400 | exit(0); | 1529 | exit(0); |
1401 | } | 1530 | } |
1402 | 1531 | ||
1403 | if (read(input, &file_header, sizeof(file_header)) == -1) { | 1532 | header = perf_header__read(input); |
1404 | perror("failed to read file headers"); | 1533 | head = header->data_offset; |
1405 | exit(-1); | ||
1406 | } | ||
1407 | 1534 | ||
1408 | if (sort__has_parent && | 1535 | sample_type = perf_header__sample_type(); |
1409 | !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { | 1536 | |
1537 | if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { | ||
1410 | fprintf(stderr, "selected --sort parent, but no callchain data\n"); | 1538 | fprintf(stderr, "selected --sort parent, but no callchain data\n"); |
1411 | exit(-1); | 1539 | exit(-1); |
1412 | } | 1540 | } |
@@ -1426,6 +1554,11 @@ static int __cmd_report(void) | |||
1426 | cwd = NULL; | 1554 | cwd = NULL; |
1427 | cwdlen = 0; | 1555 | cwdlen = 0; |
1428 | } | 1556 | } |
1557 | |||
1558 | shift = page_size * (head / page_size); | ||
1559 | offset += shift; | ||
1560 | head -= shift; | ||
1561 | |||
1429 | remap: | 1562 | remap: |
1430 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | 1563 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, |
1431 | MAP_SHARED, input, offset); | 1564 | MAP_SHARED, input, offset); |
@@ -1442,9 +1575,10 @@ more: | |||
1442 | size = 8; | 1575 | size = 8; |
1443 | 1576 | ||
1444 | if (head + event->header.size >= page_size * mmap_window) { | 1577 | if (head + event->header.size >= page_size * mmap_window) { |
1445 | unsigned long shift = page_size * (head / page_size); | ||
1446 | int ret; | 1578 | int ret; |
1447 | 1579 | ||
1580 | shift = page_size * (head / page_size); | ||
1581 | |||
1448 | ret = munmap(buf, page_size * mmap_window); | 1582 | ret = munmap(buf, page_size * mmap_window); |
1449 | assert(ret == 0); | 1583 | assert(ret == 0); |
1450 | 1584 | ||
@@ -1482,7 +1616,7 @@ more: | |||
1482 | 1616 | ||
1483 | head += size; | 1617 | head += size; |
1484 | 1618 | ||
1485 | if (offset + head >= sizeof(file_header) + file_header.data_size) | 1619 | if (offset + head >= header->data_offset + header->data_size) |
1486 | goto done; | 1620 | goto done; |
1487 | 1621 | ||
1488 | if (offset + head < stat.st_size) | 1622 | if (offset + head < stat.st_size) |
@@ -1536,6 +1670,13 @@ static const struct option options[] = { | |||
1536 | "regex filter to identify parent, see: '--sort parent'"), | 1670 | "regex filter to identify parent, see: '--sort parent'"), |
1537 | OPT_BOOLEAN('x', "exclude-other", &exclude_other, | 1671 | OPT_BOOLEAN('x', "exclude-other", &exclude_other, |
1538 | "Only display entries with parent-match"), | 1672 | "Only display entries with parent-match"), |
1673 | OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), | ||
1674 | OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", | ||
1675 | "only consider symbols in these dsos"), | ||
1676 | OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", | ||
1677 | "only consider symbols in these comms"), | ||
1678 | OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", | ||
1679 | "only consider these symbols"), | ||
1539 | OPT_END() | 1680 | OPT_END() |
1540 | }; | 1681 | }; |
1541 | 1682 | ||
@@ -1554,6 +1695,19 @@ static void setup_sorting(void) | |||
1554 | free(str); | 1695 | free(str); |
1555 | } | 1696 | } |
1556 | 1697 | ||
1698 | static void setup_list(struct strlist **list, const char *list_str, | ||
1699 | const char *list_name) | ||
1700 | { | ||
1701 | if (list_str) { | ||
1702 | *list = strlist__new(true, list_str); | ||
1703 | if (!*list) { | ||
1704 | fprintf(stderr, "problems parsing %s list\n", | ||
1705 | list_name); | ||
1706 | exit(129); | ||
1707 | } | ||
1708 | } | ||
1709 | } | ||
1710 | |||
1557 | int cmd_report(int argc, const char **argv, const char *prefix) | 1711 | int cmd_report(int argc, const char **argv, const char *prefix) |
1558 | { | 1712 | { |
1559 | symbol__init(); | 1713 | symbol__init(); |
@@ -1575,6 +1729,10 @@ int cmd_report(int argc, const char **argv, const char *prefix) | |||
1575 | if (argc) | 1729 | if (argc) |
1576 | usage_with_options(report_usage, options); | 1730 | usage_with_options(report_usage, options); |
1577 | 1731 | ||
1732 | setup_list(&dso_list, dso_list_str, "dso"); | ||
1733 | setup_list(&comm_list, comm_list_str, "comm"); | ||
1734 | setup_list(&sym_list, sym_list_str, "symbol"); | ||
1735 | |||
1578 | setup_pager(); | 1736 | setup_pager(); |
1579 | 1737 | ||
1580 | return __cmd_report(); | 1738 | return __cmd_report(); |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..2e03524a1de0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -32,6 +32,7 @@ | |||
32 | * Wu Fengguang <fengguang.wu@intel.com> | 32 | * Wu Fengguang <fengguang.wu@intel.com> |
33 | * Mike Galbraith <efault@gmx.de> | 33 | * Mike Galbraith <efault@gmx.de> |
34 | * Paul Mackerras <paulus@samba.org> | 34 | * Paul Mackerras <paulus@samba.org> |
35 | * Jaswinder Singh Rajput <jaswinder@kernel.org> | ||
35 | * | 36 | * |
36 | * Released under the GPL v2. (and only v2, not any later version) | 37 | * Released under the GPL v2. (and only v2, not any later version) |
37 | */ | 38 | */ |
@@ -45,7 +46,7 @@ | |||
45 | #include <sys/prctl.h> | 46 | #include <sys/prctl.h> |
46 | #include <math.h> | 47 | #include <math.h> |
47 | 48 | ||
48 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | 49 | static struct perf_counter_attr default_attrs[] = { |
49 | 50 | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | 52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, |
@@ -59,42 +60,28 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | |||
59 | 60 | ||
60 | }; | 61 | }; |
61 | 62 | ||
63 | #define MAX_RUN 100 | ||
64 | |||
62 | static int system_wide = 0; | 65 | static int system_wide = 0; |
63 | static int inherit = 1; | ||
64 | static int verbose = 0; | 66 | static int verbose = 0; |
65 | |||
66 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
67 | |||
68 | static int target_pid = -1; | ||
69 | static int nr_cpus = 0; | 67 | static int nr_cpus = 0; |
70 | static unsigned int page_size; | 68 | static int run_idx = 0; |
71 | 69 | ||
70 | static int run_count = 1; | ||
71 | static int inherit = 1; | ||
72 | static int scale = 1; | 72 | static int scale = 1; |
73 | static int target_pid = -1; | ||
74 | static int null_run = 0; | ||
73 | 75 | ||
74 | static const unsigned int default_count[] = { | 76 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; |
75 | 1000000, | ||
76 | 1000000, | ||
77 | 10000, | ||
78 | 10000, | ||
79 | 1000000, | ||
80 | 10000, | ||
81 | }; | ||
82 | |||
83 | #define MAX_RUN 100 | ||
84 | |||
85 | static int run_count = 1; | ||
86 | static int run_idx = 0; | ||
87 | |||
88 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
89 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
90 | |||
91 | //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; | ||
92 | |||
93 | 77 | ||
94 | static u64 runtime_nsecs[MAX_RUN]; | 78 | static u64 runtime_nsecs[MAX_RUN]; |
95 | static u64 walltime_nsecs[MAX_RUN]; | 79 | static u64 walltime_nsecs[MAX_RUN]; |
96 | static u64 runtime_cycles[MAX_RUN]; | 80 | static u64 runtime_cycles[MAX_RUN]; |
97 | 81 | ||
82 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
83 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
84 | |||
98 | static u64 event_res_avg[MAX_COUNTERS][3]; | 85 | static u64 event_res_avg[MAX_COUNTERS][3]; |
99 | static u64 event_res_noise[MAX_COUNTERS][3]; | 86 | static u64 event_res_noise[MAX_COUNTERS][3]; |
100 | 87 | ||
@@ -109,7 +96,10 @@ static u64 walltime_nsecs_noise; | |||
109 | static u64 runtime_cycles_avg; | 96 | static u64 runtime_cycles_avg; |
110 | static u64 runtime_cycles_noise; | 97 | static u64 runtime_cycles_noise; |
111 | 98 | ||
112 | static void create_perf_stat_counter(int counter) | 99 | #define ERR_PERF_OPEN \ |
100 | "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" | ||
101 | |||
102 | static void create_perf_stat_counter(int counter, int pid) | ||
113 | { | 103 | { |
114 | struct perf_counter_attr *attr = attrs + counter; | 104 | struct perf_counter_attr *attr = attrs + counter; |
115 | 105 | ||
@@ -119,20 +109,21 @@ static void create_perf_stat_counter(int counter) | |||
119 | 109 | ||
120 | if (system_wide) { | 110 | if (system_wide) { |
121 | int cpu; | 111 | int cpu; |
122 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 112 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
123 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | 113 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); |
124 | if (fd[cpu][counter] < 0 && verbose) { | 114 | if (fd[cpu][counter] < 0 && verbose) |
125 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); | 115 | fprintf(stderr, ERR_PERF_OPEN, counter, |
126 | } | 116 | fd[cpu][counter], strerror(errno)); |
127 | } | 117 | } |
128 | } else { | 118 | } else { |
129 | attr->inherit = inherit; | 119 | attr->inherit = inherit; |
130 | attr->disabled = 1; | 120 | attr->disabled = 1; |
131 | 121 | attr->enable_on_exec = 1; | |
132 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | 122 | |
133 | if (fd[0][counter] < 0 && verbose) { | 123 | fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); |
134 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); | 124 | if (fd[0][counter] < 0 && verbose) |
135 | } | 125 | fprintf(stderr, ERR_PERF_OPEN, counter, |
126 | fd[0][counter], strerror(errno)); | ||
136 | } | 127 | } |
137 | } | 128 | } |
138 | 129 | ||
@@ -168,7 +159,7 @@ static void read_counter(int counter) | |||
168 | count[0] = count[1] = count[2] = 0; | 159 | count[0] = count[1] = count[2] = 0; |
169 | 160 | ||
170 | nv = scale ? 3 : 1; | 161 | nv = scale ? 3 : 1; |
171 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 162 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
172 | if (fd[cpu][counter] < 0) | 163 | if (fd[cpu][counter] < 0) |
173 | continue; | 164 | continue; |
174 | 165 | ||
@@ -215,32 +206,67 @@ static int run_perf_stat(int argc, const char **argv) | |||
215 | int status = 0; | 206 | int status = 0; |
216 | int counter; | 207 | int counter; |
217 | int pid; | 208 | int pid; |
209 | int child_ready_pipe[2], go_pipe[2]; | ||
210 | char buf; | ||
218 | 211 | ||
219 | if (!system_wide) | 212 | if (!system_wide) |
220 | nr_cpus = 1; | 213 | nr_cpus = 1; |
221 | 214 | ||
222 | for (counter = 0; counter < nr_counters; counter++) | 215 | if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { |
223 | create_perf_stat_counter(counter); | 216 | perror("failed to create pipes"); |
224 | 217 | exit(1); | |
225 | /* | 218 | } |
226 | * Enable counters and exec the command: | ||
227 | */ | ||
228 | t0 = rdclock(); | ||
229 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
230 | 219 | ||
231 | if ((pid = fork()) < 0) | 220 | if ((pid = fork()) < 0) |
232 | perror("failed to fork"); | 221 | perror("failed to fork"); |
233 | 222 | ||
234 | if (!pid) { | 223 | if (!pid) { |
235 | if (execvp(argv[0], (char **)argv)) { | 224 | close(child_ready_pipe[0]); |
236 | perror(argv[0]); | 225 | close(go_pipe[1]); |
237 | exit(-1); | 226 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
238 | } | 227 | |
228 | /* | ||
229 | * Do a dummy execvp to get the PLT entry resolved, | ||
230 | * so we avoid the resolver overhead on the real | ||
231 | * execvp call. | ||
232 | */ | ||
233 | execvp("", (char **)argv); | ||
234 | |||
235 | /* | ||
236 | * Tell the parent we're ready to go | ||
237 | */ | ||
238 | close(child_ready_pipe[1]); | ||
239 | |||
240 | /* | ||
241 | * Wait until the parent tells us to go. | ||
242 | */ | ||
243 | read(go_pipe[0], &buf, 1); | ||
244 | |||
245 | execvp(argv[0], (char **)argv); | ||
246 | |||
247 | perror(argv[0]); | ||
248 | exit(-1); | ||
239 | } | 249 | } |
240 | 250 | ||
251 | /* | ||
252 | * Wait for the child to be ready to exec. | ||
253 | */ | ||
254 | close(child_ready_pipe[1]); | ||
255 | close(go_pipe[0]); | ||
256 | read(child_ready_pipe[0], &buf, 1); | ||
257 | close(child_ready_pipe[0]); | ||
258 | |||
259 | for (counter = 0; counter < nr_counters; counter++) | ||
260 | create_perf_stat_counter(counter, pid); | ||
261 | |||
262 | /* | ||
263 | * Enable counters and exec the command: | ||
264 | */ | ||
265 | t0 = rdclock(); | ||
266 | |||
267 | close(go_pipe[1]); | ||
241 | wait(&status); | 268 | wait(&status); |
242 | 269 | ||
243 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
244 | t1 = rdclock(); | 270 | t1 = rdclock(); |
245 | 271 | ||
246 | walltime_nsecs[run_idx] = t1 - t0; | 272 | walltime_nsecs[run_idx] = t1 - t0; |
@@ -262,7 +288,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
262 | { | 288 | { |
263 | double msecs = (double)count[0] / 1000000; | 289 | double msecs = (double)count[0] / 1000000; |
264 | 290 | ||
265 | fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); | 291 | fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); |
266 | 292 | ||
267 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | 293 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && |
268 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | 294 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { |
@@ -276,7 +302,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
276 | 302 | ||
277 | static void abs_printout(int counter, u64 *count, u64 *noise) | 303 | static void abs_printout(int counter, u64 *count, u64 *noise) |
278 | { | 304 | { |
279 | fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); | 305 | fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); |
280 | 306 | ||
281 | if (runtime_cycles_avg && | 307 | if (runtime_cycles_avg && |
282 | attrs[counter].type == PERF_TYPE_HARDWARE && | 308 | attrs[counter].type == PERF_TYPE_HARDWARE && |
@@ -306,7 +332,7 @@ static void print_counter(int counter) | |||
306 | scaled = event_scaled_avg[counter]; | 332 | scaled = event_scaled_avg[counter]; |
307 | 333 | ||
308 | if (scaled == -1) { | 334 | if (scaled == -1) { |
309 | fprintf(stderr, " %14s %-20s\n", | 335 | fprintf(stderr, " %14s %-24s\n", |
310 | "<not counted>", event_name(counter)); | 336 | "<not counted>", event_name(counter)); |
311 | return; | 337 | return; |
312 | } | 338 | } |
@@ -364,8 +390,11 @@ static void calc_avg(void) | |||
364 | event_res_avg[j]+1, event_res[i][j]+1); | 390 | event_res_avg[j]+1, event_res[i][j]+1); |
365 | update_avg("counter/2", j, | 391 | update_avg("counter/2", j, |
366 | event_res_avg[j]+2, event_res[i][j]+2); | 392 | event_res_avg[j]+2, event_res[i][j]+2); |
367 | update_avg("scaled", j, | 393 | if (event_scaled[i][j] != -1) |
368 | event_scaled_avg + j, event_scaled[i]+j); | 394 | update_avg("scaled", j, |
395 | event_scaled_avg + j, event_scaled[i]+j); | ||
396 | else | ||
397 | event_scaled_avg[j] = -1; | ||
369 | } | 398 | } |
370 | } | 399 | } |
371 | runtime_nsecs_avg /= run_count; | 400 | runtime_nsecs_avg /= run_count; |
@@ -429,11 +458,14 @@ static void print_stat(int argc, const char **argv) | |||
429 | for (counter = 0; counter < nr_counters; counter++) | 458 | for (counter = 0; counter < nr_counters; counter++) |
430 | print_counter(counter); | 459 | print_counter(counter); |
431 | 460 | ||
432 | |||
433 | fprintf(stderr, "\n"); | 461 | fprintf(stderr, "\n"); |
434 | fprintf(stderr, " %14.9f seconds time elapsed.\n", | 462 | fprintf(stderr, " %14.9f seconds time elapsed", |
435 | (double)walltime_nsecs_avg/1e9); | 463 | (double)walltime_nsecs_avg/1e9); |
436 | fprintf(stderr, "\n"); | 464 | if (run_count > 1) { |
465 | fprintf(stderr, " ( +- %7.3f%% )", | ||
466 | 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); | ||
467 | } | ||
468 | fprintf(stderr, "\n\n"); | ||
437 | } | 469 | } |
438 | 470 | ||
439 | static volatile int signr = -1; | 471 | static volatile int signr = -1; |
@@ -466,13 +498,15 @@ static const struct option options[] = { | |||
466 | OPT_INTEGER('p', "pid", &target_pid, | 498 | OPT_INTEGER('p', "pid", &target_pid, |
467 | "stat events on existing pid"), | 499 | "stat events on existing pid"), |
468 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 500 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
469 | "system-wide collection from all CPUs"), | 501 | "system-wide collection from all CPUs"), |
470 | OPT_BOOLEAN('S', "scale", &scale, | 502 | OPT_BOOLEAN('S', "scale", &scale, |
471 | "scale/normalize counters"), | 503 | "scale/normalize counters"), |
472 | OPT_BOOLEAN('v', "verbose", &verbose, | 504 | OPT_BOOLEAN('v', "verbose", &verbose, |
473 | "be more verbose (show counter open errors, etc)"), | 505 | "be more verbose (show counter open errors, etc)"), |
474 | OPT_INTEGER('r', "repeat", &run_count, | 506 | OPT_INTEGER('r', "repeat", &run_count, |
475 | "repeat command and print average + stddev (max: 100)"), | 507 | "repeat command and print average + stddev (max: 100)"), |
508 | OPT_BOOLEAN('n', "null", &null_run, | ||
509 | "null run - dont start any counters"), | ||
476 | OPT_END() | 510 | OPT_END() |
477 | }; | 511 | }; |
478 | 512 | ||
@@ -480,18 +514,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
480 | { | 514 | { |
481 | int status; | 515 | int status; |
482 | 516 | ||
483 | page_size = sysconf(_SC_PAGE_SIZE); | ||
484 | |||
485 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
486 | |||
487 | argc = parse_options(argc, argv, options, stat_usage, 0); | 517 | argc = parse_options(argc, argv, options, stat_usage, 0); |
488 | if (!argc) | 518 | if (!argc) |
489 | usage_with_options(stat_usage, options); | 519 | usage_with_options(stat_usage, options); |
490 | if (run_count <= 0 || run_count > MAX_RUN) | 520 | if (run_count <= 0 || run_count > MAX_RUN) |
491 | usage_with_options(stat_usage, options); | 521 | usage_with_options(stat_usage, options); |
492 | 522 | ||
493 | if (!nr_counters) | 523 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
494 | nr_counters = 8; | 524 | if (!null_run && !nr_counters) { |
525 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | ||
526 | nr_counters = ARRAY_SIZE(default_attrs); | ||
527 | } | ||
495 | 528 | ||
496 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | 529 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
497 | assert(nr_cpus <= MAX_NR_CPUS); | 530 | assert(nr_cpus <= MAX_NR_CPUS); |
@@ -511,7 +544,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
511 | status = 0; | 544 | status = 0; |
512 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 545 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
513 | if (run_count != 1 && verbose) | 546 | if (run_count != 1 && verbose) |
514 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); | 547 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
515 | status = run_perf_stat(argc, argv); | 548 | status = run_perf_stat(argc, argv); |
516 | } | 549 | } |
517 | 550 | ||
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5352b5e352ed..cf0d21f1ae10 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter) | |||
392 | samples--; | 392 | samples--; |
393 | } | 393 | } |
394 | 394 | ||
395 | static void process_event(u64 ip, int counter) | 395 | static void process_event(u64 ip, int counter, int user) |
396 | { | 396 | { |
397 | samples++; | 397 | samples++; |
398 | 398 | ||
399 | if (ip < min_ip || ip > max_ip) { | 399 | if (user) { |
400 | userspace_samples++; | 400 | userspace_samples++; |
401 | return; | 401 | return; |
402 | } | 402 | } |
@@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md) | |||
509 | 509 | ||
510 | old += size; | 510 | old += size; |
511 | 511 | ||
512 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | 512 | if (event->header.type == PERF_EVENT_SAMPLE) { |
513 | if (event->header.type & PERF_SAMPLE_IP) | 513 | int user = |
514 | process_event(event->ip.ip, md->counter); | 514 | (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; |
515 | process_event(event->ip.ip, md->counter, user); | ||
515 | } | 516 | } |
516 | } | 517 | } |
517 | 518 | ||
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ceb68aa51f7f..ce394192c85a 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <sys/syscall.h> | 25 | #include <sys/syscall.h> |
26 | 26 | ||
27 | #include "../../include/linux/perf_counter.h" | 27 | #include "../../include/linux/perf_counter.h" |
28 | #include "types.h" | 28 | #include "util/types.h" |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | 31 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all |
@@ -72,10 +72,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr, | |||
72 | #define MAX_COUNTERS 256 | 72 | #define MAX_COUNTERS 256 |
73 | #define MAX_NR_CPUS 256 | 73 | #define MAX_NR_CPUS 256 |
74 | 74 | ||
75 | struct perf_file_header { | 75 | struct ip_callchain { |
76 | u64 version; | 76 | u64 nr; |
77 | u64 sample_type; | 77 | u64 ips[0]; |
78 | u64 data_size; | ||
79 | }; | 78 | }; |
80 | 79 | ||
81 | #endif | 80 | #endif |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c new file mode 100644 index 000000000000..ad3c28578961 --- /dev/null +++ b/tools/perf/util/callchain.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | ||
3 | * | ||
4 | * Handle the callchains from the stream in an ad-hoc radix tree and then | ||
5 | * sort them in an rbtree. | ||
6 | * | ||
7 | */ | ||
8 | |||
9 | #include <stdlib.h> | ||
10 | #include <stdio.h> | ||
11 | #include <stdbool.h> | ||
12 | #include <errno.h> | ||
13 | |||
14 | #include "callchain.h" | ||
15 | |||
16 | |||
17 | static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) | ||
18 | { | ||
19 | struct rb_node **p = &root->rb_node; | ||
20 | struct rb_node *parent = NULL; | ||
21 | struct callchain_node *rnode; | ||
22 | |||
23 | while (*p) { | ||
24 | parent = *p; | ||
25 | rnode = rb_entry(parent, struct callchain_node, rb_node); | ||
26 | |||
27 | if (rnode->hit < chain->hit) | ||
28 | p = &(*p)->rb_left; | ||
29 | else | ||
30 | p = &(*p)->rb_right; | ||
31 | } | ||
32 | |||
33 | rb_link_node(&chain->rb_node, parent, p); | ||
34 | rb_insert_color(&chain->rb_node, root); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * Once we get every callchains from the stream, we can now | ||
39 | * sort them by hit | ||
40 | */ | ||
41 | void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) | ||
42 | { | ||
43 | struct callchain_node *child; | ||
44 | |||
45 | list_for_each_entry(child, &node->children, brothers) | ||
46 | sort_chain_to_rbtree(rb_root, child); | ||
47 | |||
48 | if (node->hit) | ||
49 | rb_insert_callchain(rb_root, node); | ||
50 | } | ||
51 | |||
52 | static struct callchain_node *create_child(struct callchain_node *parent) | ||
53 | { | ||
54 | struct callchain_node *new; | ||
55 | |||
56 | new = malloc(sizeof(*new)); | ||
57 | if (!new) { | ||
58 | perror("not enough memory to create child for code path tree"); | ||
59 | return NULL; | ||
60 | } | ||
61 | new->parent = parent; | ||
62 | INIT_LIST_HEAD(&new->children); | ||
63 | INIT_LIST_HEAD(&new->val); | ||
64 | list_add_tail(&new->brothers, &parent->children); | ||
65 | |||
66 | return new; | ||
67 | } | ||
68 | |||
69 | static void | ||
70 | fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) | ||
71 | { | ||
72 | int i; | ||
73 | |||
74 | for (i = start; i < chain->nr; i++) { | ||
75 | struct callchain_list *call; | ||
76 | |||
77 | call = malloc(sizeof(*chain)); | ||
78 | if (!call) { | ||
79 | perror("not enough memory for the code path tree"); | ||
80 | return; | ||
81 | } | ||
82 | call->ip = chain->ips[i]; | ||
83 | list_add_tail(&call->list, &node->val); | ||
84 | } | ||
85 | node->val_nr = i - start; | ||
86 | } | ||
87 | |||
88 | static void add_child(struct callchain_node *parent, struct ip_callchain *chain) | ||
89 | { | ||
90 | struct callchain_node *new; | ||
91 | |||
92 | new = create_child(parent); | ||
93 | fill_node(new, chain, parent->val_nr); | ||
94 | |||
95 | new->hit = 1; | ||
96 | } | ||
97 | |||
98 | static void | ||
99 | split_add_child(struct callchain_node *parent, struct ip_callchain *chain, | ||
100 | struct callchain_list *to_split, int idx) | ||
101 | { | ||
102 | struct callchain_node *new; | ||
103 | |||
104 | /* split */ | ||
105 | new = create_child(parent); | ||
106 | list_move_tail(&to_split->list, &new->val); | ||
107 | new->hit = parent->hit; | ||
108 | parent->hit = 0; | ||
109 | parent->val_nr = idx; | ||
110 | |||
111 | /* create the new one */ | ||
112 | add_child(parent, chain); | ||
113 | } | ||
114 | |||
115 | static int | ||
116 | __append_chain(struct callchain_node *root, struct ip_callchain *chain, | ||
117 | int start); | ||
118 | |||
119 | static int | ||
120 | __append_chain_children(struct callchain_node *root, struct ip_callchain *chain) | ||
121 | { | ||
122 | struct callchain_node *rnode; | ||
123 | |||
124 | /* lookup in childrens */ | ||
125 | list_for_each_entry(rnode, &root->children, brothers) { | ||
126 | int ret = __append_chain(rnode, chain, root->val_nr); | ||
127 | if (!ret) | ||
128 | return 0; | ||
129 | } | ||
130 | return -1; | ||
131 | } | ||
132 | |||
133 | static int | ||
134 | __append_chain(struct callchain_node *root, struct ip_callchain *chain, | ||
135 | int start) | ||
136 | { | ||
137 | struct callchain_list *cnode; | ||
138 | int i = start; | ||
139 | bool found = false; | ||
140 | |||
141 | /* lookup in the current node */ | ||
142 | list_for_each_entry(cnode, &root->val, list) { | ||
143 | if (cnode->ip != chain->ips[i++]) | ||
144 | break; | ||
145 | if (!found) | ||
146 | found = true; | ||
147 | if (i == chain->nr) | ||
148 | break; | ||
149 | } | ||
150 | |||
151 | /* matches not, relay on the parent */ | ||
152 | if (!found) | ||
153 | return -1; | ||
154 | |||
155 | /* we match only a part of the node. Split it and add the new chain */ | ||
156 | if (i < root->val_nr) { | ||
157 | split_add_child(root, chain, cnode, i); | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /* we match 100% of the path, increment the hit */ | ||
162 | if (i == root->val_nr) { | ||
163 | root->hit++; | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | return __append_chain_children(root, chain); | ||
168 | } | ||
169 | |||
170 | void append_chain(struct callchain_node *root, struct ip_callchain *chain) | ||
171 | { | ||
172 | if (__append_chain_children(root, chain) == -1) | ||
173 | add_child(root, chain); | ||
174 | } | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h new file mode 100644 index 000000000000..fa1cd2f71fd3 --- /dev/null +++ b/tools/perf/util/callchain.h | |||
@@ -0,0 +1,33 @@ | |||
1 | #ifndef __PERF_CALLCHAIN_H | ||
2 | #define __PERF_CALLCHAIN_H | ||
3 | |||
4 | #include "../perf.h" | ||
5 | #include "list.h" | ||
6 | #include "rbtree.h" | ||
7 | |||
8 | |||
9 | struct callchain_node { | ||
10 | struct callchain_node *parent; | ||
11 | struct list_head brothers; | ||
12 | struct list_head children; | ||
13 | struct list_head val; | ||
14 | struct rb_node rb_node; | ||
15 | int val_nr; | ||
16 | int hit; | ||
17 | }; | ||
18 | |||
19 | struct callchain_list { | ||
20 | unsigned long ip; | ||
21 | struct list_head list; | ||
22 | }; | ||
23 | |||
24 | static inline void callchain_init(struct callchain_node *node) | ||
25 | { | ||
26 | INIT_LIST_HEAD(&node->brothers); | ||
27 | INIT_LIST_HEAD(&node->children); | ||
28 | INIT_LIST_HEAD(&node->val); | ||
29 | } | ||
30 | |||
31 | void append_chain(struct callchain_node *root, struct ip_callchain *chain); | ||
32 | void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); | ||
33 | #endif | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c new file mode 100644 index 000000000000..450384b3bbe5 --- /dev/null +++ b/tools/perf/util/header.c | |||
@@ -0,0 +1,242 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <unistd.h> | ||
3 | #include <stdio.h> | ||
4 | #include <stdlib.h> | ||
5 | |||
6 | #include "util.h" | ||
7 | #include "header.h" | ||
8 | |||
9 | /* | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) | ||
14 | { | ||
15 | struct perf_header_attr *self = malloc(sizeof(*self)); | ||
16 | |||
17 | if (!self) | ||
18 | die("nomem"); | ||
19 | |||
20 | self->attr = *attr; | ||
21 | self->ids = 0; | ||
22 | self->size = 1; | ||
23 | self->id = malloc(sizeof(u64)); | ||
24 | |||
25 | if (!self->id) | ||
26 | die("nomem"); | ||
27 | |||
28 | return self; | ||
29 | } | ||
30 | |||
31 | void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) | ||
32 | { | ||
33 | int pos = self->ids; | ||
34 | |||
35 | self->ids++; | ||
36 | if (self->ids > self->size) { | ||
37 | self->size *= 2; | ||
38 | self->id = realloc(self->id, self->size * sizeof(u64)); | ||
39 | if (!self->id) | ||
40 | die("nomem"); | ||
41 | } | ||
42 | self->id[pos] = id; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * | ||
47 | */ | ||
48 | |||
49 | struct perf_header *perf_header__new(void) | ||
50 | { | ||
51 | struct perf_header *self = malloc(sizeof(*self)); | ||
52 | |||
53 | if (!self) | ||
54 | die("nomem"); | ||
55 | |||
56 | self->frozen = 0; | ||
57 | |||
58 | self->attrs = 0; | ||
59 | self->size = 1; | ||
60 | self->attr = malloc(sizeof(void *)); | ||
61 | |||
62 | if (!self->attr) | ||
63 | die("nomem"); | ||
64 | |||
65 | self->data_offset = 0; | ||
66 | self->data_size = 0; | ||
67 | |||
68 | return self; | ||
69 | } | ||
70 | |||
71 | void perf_header__add_attr(struct perf_header *self, | ||
72 | struct perf_header_attr *attr) | ||
73 | { | ||
74 | int pos = self->attrs; | ||
75 | |||
76 | if (self->frozen) | ||
77 | die("frozen"); | ||
78 | |||
79 | self->attrs++; | ||
80 | if (self->attrs > self->size) { | ||
81 | self->size *= 2; | ||
82 | self->attr = realloc(self->attr, self->size * sizeof(void *)); | ||
83 | if (!self->attr) | ||
84 | die("nomem"); | ||
85 | } | ||
86 | self->attr[pos] = attr; | ||
87 | } | ||
88 | |||
89 | static const char *__perf_magic = "PERFFILE"; | ||
90 | |||
91 | #define PERF_MAGIC (*(u64 *)__perf_magic) | ||
92 | |||
93 | struct perf_file_section { | ||
94 | u64 offset; | ||
95 | u64 size; | ||
96 | }; | ||
97 | |||
98 | struct perf_file_attr { | ||
99 | struct perf_counter_attr attr; | ||
100 | struct perf_file_section ids; | ||
101 | }; | ||
102 | |||
103 | struct perf_file_header { | ||
104 | u64 magic; | ||
105 | u64 size; | ||
106 | u64 attr_size; | ||
107 | struct perf_file_section attrs; | ||
108 | struct perf_file_section data; | ||
109 | }; | ||
110 | |||
111 | static void do_write(int fd, void *buf, size_t size) | ||
112 | { | ||
113 | while (size) { | ||
114 | int ret = write(fd, buf, size); | ||
115 | |||
116 | if (ret < 0) | ||
117 | die("failed to write"); | ||
118 | |||
119 | size -= ret; | ||
120 | buf += ret; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | void perf_header__write(struct perf_header *self, int fd) | ||
125 | { | ||
126 | struct perf_file_header f_header; | ||
127 | struct perf_file_attr f_attr; | ||
128 | struct perf_header_attr *attr; | ||
129 | int i; | ||
130 | |||
131 | lseek(fd, sizeof(f_header), SEEK_SET); | ||
132 | |||
133 | |||
134 | for (i = 0; i < self->attrs; i++) { | ||
135 | attr = self->attr[i]; | ||
136 | |||
137 | attr->id_offset = lseek(fd, 0, SEEK_CUR); | ||
138 | do_write(fd, attr->id, attr->ids * sizeof(u64)); | ||
139 | } | ||
140 | |||
141 | |||
142 | self->attr_offset = lseek(fd, 0, SEEK_CUR); | ||
143 | |||
144 | for (i = 0; i < self->attrs; i++) { | ||
145 | attr = self->attr[i]; | ||
146 | |||
147 | f_attr = (struct perf_file_attr){ | ||
148 | .attr = attr->attr, | ||
149 | .ids = { | ||
150 | .offset = attr->id_offset, | ||
151 | .size = attr->ids * sizeof(u64), | ||
152 | } | ||
153 | }; | ||
154 | do_write(fd, &f_attr, sizeof(f_attr)); | ||
155 | } | ||
156 | |||
157 | |||
158 | self->data_offset = lseek(fd, 0, SEEK_CUR); | ||
159 | |||
160 | f_header = (struct perf_file_header){ | ||
161 | .magic = PERF_MAGIC, | ||
162 | .size = sizeof(f_header), | ||
163 | .attr_size = sizeof(f_attr), | ||
164 | .attrs = { | ||
165 | .offset = self->attr_offset, | ||
166 | .size = self->attrs * sizeof(f_attr), | ||
167 | }, | ||
168 | .data = { | ||
169 | .offset = self->data_offset, | ||
170 | .size = self->data_size, | ||
171 | }, | ||
172 | }; | ||
173 | |||
174 | lseek(fd, 0, SEEK_SET); | ||
175 | do_write(fd, &f_header, sizeof(f_header)); | ||
176 | lseek(fd, self->data_offset + self->data_size, SEEK_SET); | ||
177 | |||
178 | self->frozen = 1; | ||
179 | } | ||
180 | |||
181 | static void do_read(int fd, void *buf, size_t size) | ||
182 | { | ||
183 | while (size) { | ||
184 | int ret = read(fd, buf, size); | ||
185 | |||
186 | if (ret < 0) | ||
187 | die("failed to read"); | ||
188 | |||
189 | size -= ret; | ||
190 | buf += ret; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | struct perf_header *perf_header__read(int fd) | ||
195 | { | ||
196 | struct perf_header *self = perf_header__new(); | ||
197 | struct perf_file_header f_header; | ||
198 | struct perf_file_attr f_attr; | ||
199 | u64 f_id; | ||
200 | |||
201 | int nr_attrs, nr_ids, i, j; | ||
202 | |||
203 | lseek(fd, 0, SEEK_SET); | ||
204 | do_read(fd, &f_header, sizeof(f_header)); | ||
205 | |||
206 | if (f_header.magic != PERF_MAGIC || | ||
207 | f_header.size != sizeof(f_header) || | ||
208 | f_header.attr_size != sizeof(f_attr)) | ||
209 | die("incompatible file format"); | ||
210 | |||
211 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | ||
212 | lseek(fd, f_header.attrs.offset, SEEK_SET); | ||
213 | |||
214 | for (i = 0; i < nr_attrs; i++) { | ||
215 | struct perf_header_attr *attr; | ||
216 | off_t tmp = lseek(fd, 0, SEEK_CUR); | ||
217 | |||
218 | do_read(fd, &f_attr, sizeof(f_attr)); | ||
219 | |||
220 | attr = perf_header_attr__new(&f_attr.attr); | ||
221 | |||
222 | nr_ids = f_attr.ids.size / sizeof(u64); | ||
223 | lseek(fd, f_attr.ids.offset, SEEK_SET); | ||
224 | |||
225 | for (j = 0; j < nr_ids; j++) { | ||
226 | do_read(fd, &f_id, sizeof(f_id)); | ||
227 | |||
228 | perf_header_attr__add_id(attr, f_id); | ||
229 | } | ||
230 | perf_header__add_attr(self, attr); | ||
231 | lseek(fd, tmp, SEEK_SET); | ||
232 | } | ||
233 | |||
234 | self->data_offset = f_header.data.offset; | ||
235 | self->data_size = f_header.data.size; | ||
236 | |||
237 | lseek(fd, self->data_offset + self->data_size, SEEK_SET); | ||
238 | |||
239 | self->frozen = 1; | ||
240 | |||
241 | return self; | ||
242 | } | ||
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h new file mode 100644 index 000000000000..b5ef53ad4c7a --- /dev/null +++ b/tools/perf/util/header.h | |||
@@ -0,0 +1,37 @@ | |||
1 | #ifndef _PERF_HEADER_H | ||
2 | #define _PERF_HEADER_H | ||
3 | |||
4 | #include "../../../include/linux/perf_counter.h" | ||
5 | #include <sys/types.h> | ||
6 | #include "types.h" | ||
7 | |||
8 | struct perf_header_attr { | ||
9 | struct perf_counter_attr attr; | ||
10 | int ids, size; | ||
11 | u64 *id; | ||
12 | off_t id_offset; | ||
13 | }; | ||
14 | |||
15 | struct perf_header { | ||
16 | int frozen; | ||
17 | int attrs, size; | ||
18 | struct perf_header_attr **attr; | ||
19 | off_t attr_offset; | ||
20 | u64 data_offset; | ||
21 | u64 data_size; | ||
22 | }; | ||
23 | |||
24 | struct perf_header *perf_header__read(int fd); | ||
25 | void perf_header__write(struct perf_header *self, int fd); | ||
26 | |||
27 | void perf_header__add_attr(struct perf_header *self, | ||
28 | struct perf_header_attr *attr); | ||
29 | |||
30 | struct perf_header_attr * | ||
31 | perf_header_attr__new(struct perf_counter_attr *attr); | ||
32 | void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); | ||
33 | |||
34 | |||
35 | struct perf_header *perf_header__new(void); | ||
36 | |||
37 | #endif /* _PERF_HEADER_H */ | ||
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6653f7dd1d78..17a00e0df2c4 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c | |||
@@ -126,21 +126,6 @@ static int is_executable(const char *name) | |||
126 | !S_ISREG(st.st_mode)) | 126 | !S_ISREG(st.st_mode)) |
127 | return 0; | 127 | return 0; |
128 | 128 | ||
129 | #ifdef __MINGW32__ | ||
130 | /* cannot trust the executable bit, peek into the file instead */ | ||
131 | char buf[3] = { 0 }; | ||
132 | int n; | ||
133 | int fd = open(name, O_RDONLY); | ||
134 | st.st_mode &= ~S_IXUSR; | ||
135 | if (fd >= 0) { | ||
136 | n = read(fd, buf, 2); | ||
137 | if (n == 2) | ||
138 | /* DOS executables start with "MZ" */ | ||
139 | if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) | ||
140 | st.st_mode |= S_IXUSR; | ||
141 | close(fd); | ||
142 | } | ||
143 | #endif | ||
144 | return st.st_mode & S_IXUSR; | 129 | return st.st_mode & S_IXUSR; |
145 | } | 130 | } |
146 | 131 | ||
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index a28bccae5458..1915de20dcac 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | static int spawned_pager; | 10 | static int spawned_pager; |
11 | 11 | ||
12 | #ifndef __MINGW32__ | ||
13 | static void pager_preexec(void) | 12 | static void pager_preexec(void) |
14 | { | 13 | { |
15 | /* | 14 | /* |
@@ -24,7 +23,6 @@ static void pager_preexec(void) | |||
24 | 23 | ||
25 | setenv("LESS", "FRSX", 0); | 24 | setenv("LESS", "FRSX", 0); |
26 | } | 25 | } |
27 | #endif | ||
28 | 26 | ||
29 | static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; | 27 | static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; |
30 | static struct child_process pager_process; | 28 | static struct child_process pager_process; |
@@ -70,9 +68,8 @@ void setup_pager(void) | |||
70 | pager_argv[2] = pager; | 68 | pager_argv[2] = pager; |
71 | pager_process.argv = pager_argv; | 69 | pager_process.argv = pager_argv; |
72 | pager_process.in = -1; | 70 | pager_process.in = -1; |
73 | #ifndef __MINGW32__ | ||
74 | pager_process.preexec_cb = pager_preexec; | 71 | pager_process.preexec_cb = pager_preexec; |
75 | #endif | 72 | |
76 | if (start_command(&pager_process)) | 73 | if (start_command(&pager_process)) |
77 | return; | 74 | return; |
78 | 75 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da38d6a..4d042f104cdc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -16,32 +16,28 @@ struct event_symbol { | |||
16 | u8 type; | 16 | u8 type; |
17 | u64 config; | 17 | u64 config; |
18 | char *symbol; | 18 | char *symbol; |
19 | char *alias; | ||
19 | }; | 20 | }; |
20 | 21 | ||
21 | #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y | 22 | #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x |
22 | #define CR(x, y) .type = PERF_TYPE_##x, .config = y | 23 | #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x |
23 | 24 | ||
24 | static struct event_symbol event_symbols[] = { | 25 | static struct event_symbol event_symbols[] = { |
25 | { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, | 26 | { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, |
26 | { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, | 27 | { CHW(INSTRUCTIONS), "instructions", "" }, |
27 | { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, | 28 | { CHW(CACHE_REFERENCES), "cache-references", "" }, |
28 | { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, | 29 | { CHW(CACHE_MISSES), "cache-misses", "" }, |
29 | { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, | 30 | { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, |
30 | { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, | 31 | { CHW(BRANCH_MISSES), "branch-misses", "" }, |
31 | { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, | 32 | { CHW(BUS_CYCLES), "bus-cycles", "" }, |
32 | { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, | 33 | |
33 | { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, | 34 | { CSW(CPU_CLOCK), "cpu-clock", "" }, |
34 | 35 | { CSW(TASK_CLOCK), "task-clock", "" }, | |
35 | { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, | 36 | { CSW(PAGE_FAULTS), "page-faults", "faults" }, |
36 | { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, | 37 | { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, |
37 | { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, | 38 | { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, |
38 | { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, | 39 | { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, |
39 | { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, | 40 | { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, |
40 | { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, | ||
41 | { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, | ||
42 | { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, | ||
43 | { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, | ||
44 | { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, | ||
45 | }; | 41 | }; |
46 | 42 | ||
47 | #define __PERF_COUNTER_FIELD(config, name) \ | 43 | #define __PERF_COUNTER_FIELD(config, name) \ |
@@ -74,26 +70,70 @@ static char *sw_event_names[] = { | |||
74 | 70 | ||
75 | #define MAX_ALIASES 8 | 71 | #define MAX_ALIASES 8 |
76 | 72 | ||
77 | static char *hw_cache [][MAX_ALIASES] = { | 73 | static char *hw_cache[][MAX_ALIASES] = { |
78 | { "L1-data" , "l1-d", "l1d" }, | 74 | { "L1-d$", "l1-d", "l1d", "L1-data", }, |
79 | { "L1-instruction" , "l1-i", "l1i" }, | 75 | { "L1-i$", "l1-i", "l1i", "L1-instruction", }, |
80 | { "L2" , "l2" }, | 76 | { "LLC", "L2" }, |
81 | { "Data-TLB" , "dtlb", "d-tlb" }, | 77 | { "dTLB", "d-tlb", "Data-TLB", }, |
82 | { "Instruction-TLB" , "itlb", "i-tlb" }, | 78 | { "iTLB", "i-tlb", "Instruction-TLB", }, |
83 | { "Branch" , "bpu" , "btb", "bpc" }, | 79 | { "branch", "branches", "bpu", "btb", "bpc", }, |
84 | }; | 80 | }; |
85 | 81 | ||
86 | static char *hw_cache_op [][MAX_ALIASES] = { | 82 | static char *hw_cache_op[][MAX_ALIASES] = { |
87 | { "Load" , "read" }, | 83 | { "load", "loads", "read", }, |
88 | { "Store" , "write" }, | 84 | { "store", "stores", "write", }, |
89 | { "Prefetch" , "speculative-read", "speculative-load" }, | 85 | { "prefetch", "prefetches", "speculative-read", "speculative-load", }, |
90 | }; | 86 | }; |
91 | 87 | ||
92 | static char *hw_cache_result [][MAX_ALIASES] = { | 88 | static char *hw_cache_result[][MAX_ALIASES] = { |
93 | { "Reference" , "ops", "access" }, | 89 | { "refs", "Reference", "ops", "access", }, |
94 | { "Miss" }, | 90 | { "misses", "miss", }, |
95 | }; | 91 | }; |
96 | 92 | ||
93 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
94 | #define CACHE_READ (1 << C(OP_READ)) | ||
95 | #define CACHE_WRITE (1 << C(OP_WRITE)) | ||
96 | #define CACHE_PREFETCH (1 << C(OP_PREFETCH)) | ||
97 | #define COP(x) (1 << x) | ||
98 | |||
99 | /* | ||
100 | * cache operartion stat | ||
101 | * L1I : Read and prefetch only | ||
102 | * ITLB and BPU : Read-only | ||
103 | */ | ||
104 | static unsigned long hw_cache_stat[C(MAX)] = { | ||
105 | [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), | ||
106 | [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), | ||
107 | [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), | ||
108 | [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), | ||
109 | [C(ITLB)] = (CACHE_READ), | ||
110 | [C(BPU)] = (CACHE_READ), | ||
111 | }; | ||
112 | |||
113 | static int is_cache_op_valid(u8 cache_type, u8 cache_op) | ||
114 | { | ||
115 | if (hw_cache_stat[cache_type] & COP(cache_op)) | ||
116 | return 1; /* valid */ | ||
117 | else | ||
118 | return 0; /* invalid */ | ||
119 | } | ||
120 | |||
121 | static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) | ||
122 | { | ||
123 | static char name[50]; | ||
124 | |||
125 | if (cache_result) { | ||
126 | sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], | ||
127 | hw_cache_op[cache_op][0], | ||
128 | hw_cache_result[cache_result][0]); | ||
129 | } else { | ||
130 | sprintf(name, "%s-%s", hw_cache[cache_type][0], | ||
131 | hw_cache_op[cache_op][1]); | ||
132 | } | ||
133 | |||
134 | return name; | ||
135 | } | ||
136 | |||
97 | char *event_name(int counter) | 137 | char *event_name(int counter) |
98 | { | 138 | { |
99 | u64 config = attrs[counter].config; | 139 | u64 config = attrs[counter].config; |
@@ -113,7 +153,6 @@ char *event_name(int counter) | |||
113 | 153 | ||
114 | case PERF_TYPE_HW_CACHE: { | 154 | case PERF_TYPE_HW_CACHE: { |
115 | u8 cache_type, cache_op, cache_result; | 155 | u8 cache_type, cache_op, cache_result; |
116 | static char name[100]; | ||
117 | 156 | ||
118 | cache_type = (config >> 0) & 0xff; | 157 | cache_type = (config >> 0) & 0xff; |
119 | if (cache_type > PERF_COUNT_HW_CACHE_MAX) | 158 | if (cache_type > PERF_COUNT_HW_CACHE_MAX) |
@@ -127,12 +166,10 @@ char *event_name(int counter) | |||
127 | if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) | 166 | if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) |
128 | return "unknown-ext-hardware-cache-result"; | 167 | return "unknown-ext-hardware-cache-result"; |
129 | 168 | ||
130 | sprintf(name, "%s-Cache-%s-%ses", | 169 | if (!is_cache_op_valid(cache_type, cache_op)) |
131 | hw_cache[cache_type][0], | 170 | return "invalid-cache"; |
132 | hw_cache_op[cache_op][0], | ||
133 | hw_cache_result[cache_result][0]); | ||
134 | 171 | ||
135 | return name; | 172 | return event_cache_name(cache_type, cache_op, cache_result); |
136 | } | 173 | } |
137 | 174 | ||
138 | case PERF_TYPE_SOFTWARE: | 175 | case PERF_TYPE_SOFTWARE: |
@@ -163,7 +200,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) | |||
163 | return -1; | 200 | return -1; |
164 | } | 201 | } |
165 | 202 | ||
166 | static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) | 203 | static int |
204 | parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) | ||
167 | { | 205 | { |
168 | int cache_type = -1, cache_op = 0, cache_result = 0; | 206 | int cache_type = -1, cache_op = 0, cache_result = 0; |
169 | 207 | ||
@@ -182,6 +220,9 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a | |||
182 | if (cache_op == -1) | 220 | if (cache_op == -1) |
183 | cache_op = PERF_COUNT_HW_CACHE_OP_READ; | 221 | cache_op = PERF_COUNT_HW_CACHE_OP_READ; |
184 | 222 | ||
223 | if (!is_cache_op_valid(cache_type, cache_op)) | ||
224 | return -EINVAL; | ||
225 | |||
185 | cache_result = parse_aliases(str, hw_cache_result, | 226 | cache_result = parse_aliases(str, hw_cache_result, |
186 | PERF_COUNT_HW_CACHE_RESULT_MAX); | 227 | PERF_COUNT_HW_CACHE_RESULT_MAX); |
187 | /* | 228 | /* |
@@ -196,6 +237,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a | |||
196 | return 0; | 237 | return 0; |
197 | } | 238 | } |
198 | 239 | ||
240 | static int check_events(const char *str, unsigned int i) | ||
241 | { | ||
242 | if (!strncmp(str, event_symbols[i].symbol, | ||
243 | strlen(event_symbols[i].symbol))) | ||
244 | return 1; | ||
245 | |||
246 | if (strlen(event_symbols[i].alias)) | ||
247 | if (!strncmp(str, event_symbols[i].alias, | ||
248 | strlen(event_symbols[i].alias))) | ||
249 | return 1; | ||
250 | return 0; | ||
251 | } | ||
252 | |||
199 | /* | 253 | /* |
200 | * Each event can have multiple symbolic names. | 254 | * Each event can have multiple symbolic names. |
201 | * Symbolic names are (almost) exactly matched. | 255 | * Symbolic names are (almost) exactly matched. |
@@ -235,9 +289,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) | |||
235 | } | 289 | } |
236 | 290 | ||
237 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | 291 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { |
238 | if (!strncmp(str, event_symbols[i].symbol, | 292 | if (check_events(str, i)) { |
239 | strlen(event_symbols[i].symbol))) { | ||
240 | |||
241 | attr->type = event_symbols[i].type; | 293 | attr->type = event_symbols[i].type; |
242 | attr->config = event_symbols[i].config; | 294 | attr->config = event_symbols[i].config; |
243 | 295 | ||
@@ -289,6 +341,7 @@ void print_events(void) | |||
289 | { | 341 | { |
290 | struct event_symbol *syms = event_symbols; | 342 | struct event_symbol *syms = event_symbols; |
291 | unsigned int i, type, prev_type = -1; | 343 | unsigned int i, type, prev_type = -1; |
344 | char name[40]; | ||
292 | 345 | ||
293 | fprintf(stderr, "\n"); | 346 | fprintf(stderr, "\n"); |
294 | fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); | 347 | fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); |
@@ -301,14 +354,18 @@ void print_events(void) | |||
301 | if (type != prev_type) | 354 | if (type != prev_type) |
302 | fprintf(stderr, "\n"); | 355 | fprintf(stderr, "\n"); |
303 | 356 | ||
304 | fprintf(stderr, " %-30s [%s]\n", syms->symbol, | 357 | if (strlen(syms->alias)) |
358 | sprintf(name, "%s OR %s", syms->symbol, syms->alias); | ||
359 | else | ||
360 | strcpy(name, syms->symbol); | ||
361 | fprintf(stderr, " %-40s [%s]\n", name, | ||
305 | event_type_descriptors[type]); | 362 | event_type_descriptors[type]); |
306 | 363 | ||
307 | prev_type = type; | 364 | prev_type = type; |
308 | } | 365 | } |
309 | 366 | ||
310 | fprintf(stderr, "\n"); | 367 | fprintf(stderr, "\n"); |
311 | fprintf(stderr, " %-30s [raw hardware event descriptor]\n", | 368 | fprintf(stderr, " %-40s [raw hardware event descriptor]\n", |
312 | "rNNN"); | 369 | "rNNN"); |
313 | fprintf(stderr, "\n"); | 370 | fprintf(stderr, "\n"); |
314 | 371 | ||
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c index b2f5e854f40a..a3935343091a 100644 --- a/tools/perf/util/run-command.c +++ b/tools/perf/util/run-command.c | |||
@@ -65,7 +65,6 @@ int start_command(struct child_process *cmd) | |||
65 | cmd->err = fderr[0]; | 65 | cmd->err = fderr[0]; |
66 | } | 66 | } |
67 | 67 | ||
68 | #ifndef __MINGW32__ | ||
69 | fflush(NULL); | 68 | fflush(NULL); |
70 | cmd->pid = fork(); | 69 | cmd->pid = fork(); |
71 | if (!cmd->pid) { | 70 | if (!cmd->pid) { |
@@ -118,71 +117,6 @@ int start_command(struct child_process *cmd) | |||
118 | } | 117 | } |
119 | exit(127); | 118 | exit(127); |
120 | } | 119 | } |
121 | #else | ||
122 | int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ | ||
123 | const char **sargv = cmd->argv; | ||
124 | char **env = environ; | ||
125 | |||
126 | if (cmd->no_stdin) { | ||
127 | s0 = dup(0); | ||
128 | dup_devnull(0); | ||
129 | } else if (need_in) { | ||
130 | s0 = dup(0); | ||
131 | dup2(fdin[0], 0); | ||
132 | } else if (cmd->in) { | ||
133 | s0 = dup(0); | ||
134 | dup2(cmd->in, 0); | ||
135 | } | ||
136 | |||
137 | if (cmd->no_stderr) { | ||
138 | s2 = dup(2); | ||
139 | dup_devnull(2); | ||
140 | } else if (need_err) { | ||
141 | s2 = dup(2); | ||
142 | dup2(fderr[1], 2); | ||
143 | } | ||
144 | |||
145 | if (cmd->no_stdout) { | ||
146 | s1 = dup(1); | ||
147 | dup_devnull(1); | ||
148 | } else if (cmd->stdout_to_stderr) { | ||
149 | s1 = dup(1); | ||
150 | dup2(2, 1); | ||
151 | } else if (need_out) { | ||
152 | s1 = dup(1); | ||
153 | dup2(fdout[1], 1); | ||
154 | } else if (cmd->out > 1) { | ||
155 | s1 = dup(1); | ||
156 | dup2(cmd->out, 1); | ||
157 | } | ||
158 | |||
159 | if (cmd->dir) | ||
160 | die("chdir in start_command() not implemented"); | ||
161 | if (cmd->env) { | ||
162 | env = copy_environ(); | ||
163 | for (; *cmd->env; cmd->env++) | ||
164 | env = env_setenv(env, *cmd->env); | ||
165 | } | ||
166 | |||
167 | if (cmd->perf_cmd) { | ||
168 | cmd->argv = prepare_perf_cmd(cmd->argv); | ||
169 | } | ||
170 | |||
171 | cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); | ||
172 | |||
173 | if (cmd->env) | ||
174 | free_environ(env); | ||
175 | if (cmd->perf_cmd) | ||
176 | free(cmd->argv); | ||
177 | |||
178 | cmd->argv = sargv; | ||
179 | if (s0 >= 0) | ||
180 | dup2(s0, 0), close(s0); | ||
181 | if (s1 >= 0) | ||
182 | dup2(s1, 1), close(s1); | ||
183 | if (s2 >= 0) | ||
184 | dup2(s2, 2), close(s2); | ||
185 | #endif | ||
186 | 120 | ||
187 | if (cmd->pid < 0) { | 121 | if (cmd->pid < 0) { |
188 | int err = errno; | 122 | int err = errno; |
@@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const | |||
288 | return run_command(&cmd); | 222 | return run_command(&cmd); |
289 | } | 223 | } |
290 | 224 | ||
291 | #ifdef __MINGW32__ | ||
292 | static __stdcall unsigned run_thread(void *data) | ||
293 | { | ||
294 | struct async *async = data; | ||
295 | return async->proc(async->fd_for_proc, async->data); | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | int start_async(struct async *async) | 225 | int start_async(struct async *async) |
300 | { | 226 | { |
301 | int pipe_out[2]; | 227 | int pipe_out[2]; |
@@ -304,7 +230,6 @@ int start_async(struct async *async) | |||
304 | return error("cannot create pipe: %s", strerror(errno)); | 230 | return error("cannot create pipe: %s", strerror(errno)); |
305 | async->out = pipe_out[0]; | 231 | async->out = pipe_out[0]; |
306 | 232 | ||
307 | #ifndef __MINGW32__ | ||
308 | /* Flush stdio before fork() to avoid cloning buffers */ | 233 | /* Flush stdio before fork() to avoid cloning buffers */ |
309 | fflush(NULL); | 234 | fflush(NULL); |
310 | 235 | ||
@@ -319,33 +244,17 @@ int start_async(struct async *async) | |||
319 | exit(!!async->proc(pipe_out[1], async->data)); | 244 | exit(!!async->proc(pipe_out[1], async->data)); |
320 | } | 245 | } |
321 | close(pipe_out[1]); | 246 | close(pipe_out[1]); |
322 | #else | 247 | |
323 | async->fd_for_proc = pipe_out[1]; | ||
324 | async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); | ||
325 | if (!async->tid) { | ||
326 | error("cannot create thread: %s", strerror(errno)); | ||
327 | close_pair(pipe_out); | ||
328 | return -1; | ||
329 | } | ||
330 | #endif | ||
331 | return 0; | 248 | return 0; |
332 | } | 249 | } |
333 | 250 | ||
334 | int finish_async(struct async *async) | 251 | int finish_async(struct async *async) |
335 | { | 252 | { |
336 | #ifndef __MINGW32__ | ||
337 | int ret = 0; | 253 | int ret = 0; |
338 | 254 | ||
339 | if (wait_or_whine(async->pid)) | 255 | if (wait_or_whine(async->pid)) |
340 | ret = error("waitpid (async) failed"); | 256 | ret = error("waitpid (async) failed"); |
341 | #else | 257 | |
342 | DWORD ret = 0; | ||
343 | if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) | ||
344 | ret = error("waiting for thread failed: %lu", GetLastError()); | ||
345 | else if (!GetExitCodeThread(async->tid, &ret)) | ||
346 | ret = error("cannot get thread exit code: %lu", GetLastError()); | ||
347 | CloseHandle(async->tid); | ||
348 | #endif | ||
349 | return ret; | 258 | return ret; |
350 | } | 259 | } |
351 | 260 | ||
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index 328289f23669..cc1837deba88 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h | |||
@@ -79,12 +79,7 @@ struct async { | |||
79 | int (*proc)(int fd, void *data); | 79 | int (*proc)(int fd, void *data); |
80 | void *data; | 80 | void *data; |
81 | int out; /* caller reads from here and closes it */ | 81 | int out; /* caller reads from here and closes it */ |
82 | #ifndef __MINGW32__ | ||
83 | pid_t pid; | 82 | pid_t pid; |
84 | #else | ||
85 | HANDLE tid; | ||
86 | int fd_for_proc; | ||
87 | #endif | ||
88 | }; | 83 | }; |
89 | 84 | ||
90 | int start_async(struct async *async); | 85 | int start_async(struct async *async); |
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index eaba09306802..464e7ca898cf 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c | |||
@@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) | |||
259 | res = fread(sb->buf + sb->len, 1, size, f); | 259 | res = fread(sb->buf + sb->len, 1, size, f); |
260 | if (res > 0) | 260 | if (res > 0) |
261 | strbuf_setlen(sb, sb->len + res); | 261 | strbuf_setlen(sb, sb->len + res); |
262 | else if (res < 0 && oldalloc == 0) | 262 | else if (oldalloc == 0) |
263 | strbuf_release(sb); | 263 | strbuf_release(sb); |
264 | return res; | 264 | return res; |
265 | } | 265 | } |
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 37b03255b425..3dca2f654cd0 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _PERF_STRING_H_ | 1 | #ifndef _PERF_STRING_H_ |
2 | #define _PERF_STRING_H_ | 2 | #define _PERF_STRING_H_ |
3 | 3 | ||
4 | #include "../types.h" | 4 | #include "types.h" |
5 | 5 | ||
6 | int hex2u64(const char *ptr, u64 *val); | 6 | int hex2u64(const char *ptr, u64 *val); |
7 | 7 | ||
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c new file mode 100644 index 000000000000..025a78edfffe --- /dev/null +++ b/tools/perf/util/strlist.c | |||
@@ -0,0 +1,184 @@ | |||
1 | /* | ||
2 | * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> | ||
3 | * | ||
4 | * Licensed under the GPLv2. | ||
5 | */ | ||
6 | |||
7 | #include "strlist.h" | ||
8 | #include <errno.h> | ||
9 | #include <stdio.h> | ||
10 | #include <stdlib.h> | ||
11 | #include <string.h> | ||
12 | |||
13 | static struct str_node *str_node__new(const char *s, bool dupstr) | ||
14 | { | ||
15 | struct str_node *self = malloc(sizeof(*self)); | ||
16 | |||
17 | if (self != NULL) { | ||
18 | if (dupstr) { | ||
19 | s = strdup(s); | ||
20 | if (s == NULL) | ||
21 | goto out_delete; | ||
22 | } | ||
23 | self->s = s; | ||
24 | } | ||
25 | |||
26 | return self; | ||
27 | |||
28 | out_delete: | ||
29 | free(self); | ||
30 | return NULL; | ||
31 | } | ||
32 | |||
33 | static void str_node__delete(struct str_node *self, bool dupstr) | ||
34 | { | ||
35 | if (dupstr) | ||
36 | free((void *)self->s); | ||
37 | free(self); | ||
38 | } | ||
39 | |||
40 | int strlist__add(struct strlist *self, const char *new_entry) | ||
41 | { | ||
42 | struct rb_node **p = &self->entries.rb_node; | ||
43 | struct rb_node *parent = NULL; | ||
44 | struct str_node *sn; | ||
45 | |||
46 | while (*p != NULL) { | ||
47 | int rc; | ||
48 | |||
49 | parent = *p; | ||
50 | sn = rb_entry(parent, struct str_node, rb_node); | ||
51 | rc = strcmp(sn->s, new_entry); | ||
52 | |||
53 | if (rc > 0) | ||
54 | p = &(*p)->rb_left; | ||
55 | else if (rc < 0) | ||
56 | p = &(*p)->rb_right; | ||
57 | else | ||
58 | return -EEXIST; | ||
59 | } | ||
60 | |||
61 | sn = str_node__new(new_entry, self->dupstr); | ||
62 | if (sn == NULL) | ||
63 | return -ENOMEM; | ||
64 | |||
65 | rb_link_node(&sn->rb_node, parent, p); | ||
66 | rb_insert_color(&sn->rb_node, &self->entries); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | int strlist__load(struct strlist *self, const char *filename) | ||
72 | { | ||
73 | char entry[1024]; | ||
74 | int err; | ||
75 | FILE *fp = fopen(filename, "r"); | ||
76 | |||
77 | if (fp == NULL) | ||
78 | return errno; | ||
79 | |||
80 | while (fgets(entry, sizeof(entry), fp) != NULL) { | ||
81 | const size_t len = strlen(entry); | ||
82 | |||
83 | if (len == 0) | ||
84 | continue; | ||
85 | entry[len - 1] = '\0'; | ||
86 | |||
87 | err = strlist__add(self, entry); | ||
88 | if (err != 0) | ||
89 | goto out; | ||
90 | } | ||
91 | |||
92 | err = 0; | ||
93 | out: | ||
94 | fclose(fp); | ||
95 | return err; | ||
96 | } | ||
97 | |||
98 | void strlist__remove(struct strlist *self, struct str_node *sn) | ||
99 | { | ||
100 | rb_erase(&sn->rb_node, &self->entries); | ||
101 | str_node__delete(sn, self->dupstr); | ||
102 | } | ||
103 | |||
104 | bool strlist__has_entry(struct strlist *self, const char *entry) | ||
105 | { | ||
106 | struct rb_node **p = &self->entries.rb_node; | ||
107 | struct rb_node *parent = NULL; | ||
108 | |||
109 | while (*p != NULL) { | ||
110 | struct str_node *sn; | ||
111 | int rc; | ||
112 | |||
113 | parent = *p; | ||
114 | sn = rb_entry(parent, struct str_node, rb_node); | ||
115 | rc = strcmp(sn->s, entry); | ||
116 | |||
117 | if (rc > 0) | ||
118 | p = &(*p)->rb_left; | ||
119 | else if (rc < 0) | ||
120 | p = &(*p)->rb_right; | ||
121 | else | ||
122 | return true; | ||
123 | } | ||
124 | |||
125 | return false; | ||
126 | } | ||
127 | |||
128 | static int strlist__parse_list_entry(struct strlist *self, const char *s) | ||
129 | { | ||
130 | if (strncmp(s, "file://", 7) == 0) | ||
131 | return strlist__load(self, s + 7); | ||
132 | |||
133 | return strlist__add(self, s); | ||
134 | } | ||
135 | |||
136 | int strlist__parse_list(struct strlist *self, const char *s) | ||
137 | { | ||
138 | char *sep; | ||
139 | int err; | ||
140 | |||
141 | while ((sep = strchr(s, ',')) != NULL) { | ||
142 | *sep = '\0'; | ||
143 | err = strlist__parse_list_entry(self, s); | ||
144 | *sep = ','; | ||
145 | if (err != 0) | ||
146 | return err; | ||
147 | s = sep + 1; | ||
148 | } | ||
149 | |||
150 | return *s ? strlist__parse_list_entry(self, s) : 0; | ||
151 | } | ||
152 | |||
153 | struct strlist *strlist__new(bool dupstr, const char *slist) | ||
154 | { | ||
155 | struct strlist *self = malloc(sizeof(*self)); | ||
156 | |||
157 | if (self != NULL) { | ||
158 | self->entries = RB_ROOT; | ||
159 | self->dupstr = dupstr; | ||
160 | if (slist && strlist__parse_list(self, slist) != 0) | ||
161 | goto out_error; | ||
162 | } | ||
163 | |||
164 | return self; | ||
165 | out_error: | ||
166 | free(self); | ||
167 | return NULL; | ||
168 | } | ||
169 | |||
170 | void strlist__delete(struct strlist *self) | ||
171 | { | ||
172 | if (self != NULL) { | ||
173 | struct str_node *pos; | ||
174 | struct rb_node *next = rb_first(&self->entries); | ||
175 | |||
176 | while (next) { | ||
177 | pos = rb_entry(next, struct str_node, rb_node); | ||
178 | next = rb_next(&pos->rb_node); | ||
179 | strlist__remove(self, pos); | ||
180 | } | ||
181 | self->entries = RB_ROOT; | ||
182 | free(self); | ||
183 | } | ||
184 | } | ||
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h new file mode 100644 index 000000000000..2fb117fb4b67 --- /dev/null +++ b/tools/perf/util/strlist.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef STRLIST_H_ | ||
2 | #define STRLIST_H_ | ||
3 | |||
4 | #include "rbtree.h" | ||
5 | #include <stdbool.h> | ||
6 | |||
7 | struct str_node { | ||
8 | struct rb_node rb_node; | ||
9 | const char *s; | ||
10 | }; | ||
11 | |||
12 | struct strlist { | ||
13 | struct rb_root entries; | ||
14 | bool dupstr; | ||
15 | }; | ||
16 | |||
17 | struct strlist *strlist__new(bool dupstr, const char *slist); | ||
18 | void strlist__delete(struct strlist *self); | ||
19 | |||
20 | void strlist__remove(struct strlist *self, struct str_node *sn); | ||
21 | int strlist__load(struct strlist *self, const char *filename); | ||
22 | int strlist__add(struct strlist *self, const char *str); | ||
23 | |||
24 | bool strlist__has_entry(struct strlist *self, const char *entry); | ||
25 | |||
26 | static inline bool strlist__empty(const struct strlist *self) | ||
27 | { | ||
28 | return rb_first(&self->entries) == NULL; | ||
29 | } | ||
30 | |||
31 | int strlist__parse_list(struct strlist *self, const char *s); | ||
32 | #endif /* STRLIST_H_ */ | ||
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 86e14375e74e..78c2efde01b7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -520,7 +520,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, | |||
520 | nr_syms = shdr.sh_size / shdr.sh_entsize; | 520 | nr_syms = shdr.sh_size / shdr.sh_entsize; |
521 | 521 | ||
522 | memset(&sym, 0, sizeof(sym)); | 522 | memset(&sym, 0, sizeof(sym)); |
523 | 523 | self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, | |
524 | ".gnu.prelink_undo", | ||
525 | NULL) != NULL; | ||
524 | elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { | 526 | elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { |
525 | struct symbol *f; | 527 | struct symbol *f; |
526 | u64 obj_start; | 528 | u64 obj_start; |
@@ -535,7 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, | |||
535 | gelf_getshdr(sec, &shdr); | 537 | gelf_getshdr(sec, &shdr); |
536 | obj_start = sym.st_value; | 538 | obj_start = sym.st_value; |
537 | 539 | ||
538 | sym.st_value -= shdr.sh_addr - shdr.sh_offset; | 540 | if (self->prelinked) { |
541 | if (verbose >= 2) | ||
542 | printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", | ||
543 | (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); | ||
544 | |||
545 | sym.st_value -= shdr.sh_addr - shdr.sh_offset; | ||
546 | } | ||
539 | 547 | ||
540 | f = symbol__new(sym.st_value, sym.st_size, | 548 | f = symbol__new(sym.st_value, sym.st_size, |
541 | elf_sym__name(&sym, symstrs), | 549 | elf_sym__name(&sym, symstrs), |
@@ -569,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) | |||
569 | if (!name) | 577 | if (!name) |
570 | return -1; | 578 | return -1; |
571 | 579 | ||
580 | self->prelinked = 0; | ||
581 | |||
572 | if (strncmp(self->name, "/tmp/perf-", 10) == 0) | 582 | if (strncmp(self->name, "/tmp/perf-", 10) == 0) |
573 | return dso__load_perf_map(self, filter, verbose); | 583 | return dso__load_perf_map(self, filter, verbose); |
574 | 584 | ||
@@ -629,7 +639,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, | |||
629 | if (vmlinux) | 639 | if (vmlinux) |
630 | err = dso__load_vmlinux(self, vmlinux, filter, verbose); | 640 | err = dso__load_vmlinux(self, vmlinux, filter, verbose); |
631 | 641 | ||
632 | if (err) | 642 | if (err < 0) |
633 | err = dso__load_kallsyms(self, filter, verbose); | 643 | err = dso__load_kallsyms(self, filter, verbose); |
634 | 644 | ||
635 | return err; | 645 | return err; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea332e56e458..2c48ace8203b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -2,7 +2,7 @@ | |||
2 | #define _PERF_SYMBOL_ 1 | 2 | #define _PERF_SYMBOL_ 1 |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include "../types.h" | 5 | #include "types.h" |
6 | #include "list.h" | 6 | #include "list.h" |
7 | #include "rbtree.h" | 7 | #include "rbtree.h" |
8 | 8 | ||
@@ -20,8 +20,9 @@ struct symbol { | |||
20 | struct dso { | 20 | struct dso { |
21 | struct list_head node; | 21 | struct list_head node; |
22 | struct rb_root syms; | 22 | struct rb_root syms; |
23 | unsigned int sym_priv_size; | ||
24 | struct symbol *(*find_symbol)(struct dso *, u64 ip); | 23 | struct symbol *(*find_symbol)(struct dso *, u64 ip); |
24 | unsigned int sym_priv_size; | ||
25 | unsigned char prelinked; | ||
25 | char name[0]; | 26 | char name[0]; |
26 | }; | 27 | }; |
27 | 28 | ||
diff --git a/tools/perf/types.h b/tools/perf/util/types.h index 5e75f9005940..5e75f9005940 100644 --- a/tools/perf/types.h +++ b/tools/perf/util/types.h | |||
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b8cfed776d81..b4be6071c105 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -67,7 +67,6 @@ | |||
67 | #include <assert.h> | 67 | #include <assert.h> |
68 | #include <regex.h> | 68 | #include <regex.h> |
69 | #include <utime.h> | 69 | #include <utime.h> |
70 | #ifndef __MINGW32__ | ||
71 | #include <sys/wait.h> | 70 | #include <sys/wait.h> |
72 | #include <sys/poll.h> | 71 | #include <sys/poll.h> |
73 | #include <sys/socket.h> | 72 | #include <sys/socket.h> |
@@ -81,20 +80,6 @@ | |||
81 | #include <netdb.h> | 80 | #include <netdb.h> |
82 | #include <pwd.h> | 81 | #include <pwd.h> |
83 | #include <inttypes.h> | 82 | #include <inttypes.h> |
84 | #if defined(__CYGWIN__) | ||
85 | #undef _XOPEN_SOURCE | ||
86 | #include <grp.h> | ||
87 | #define _XOPEN_SOURCE 600 | ||
88 | #include "compat/cygwin.h" | ||
89 | #else | ||
90 | #undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ | ||
91 | #include <grp.h> | ||
92 | #define _ALL_SOURCE 1 | ||
93 | #endif | ||
94 | #else /* __MINGW32__ */ | ||
95 | /* pull in Windows compatibility stuff */ | ||
96 | #include "compat/mingw.h" | ||
97 | #endif /* __MINGW32__ */ | ||
98 | 83 | ||
99 | #ifndef NO_ICONV | 84 | #ifndef NO_ICONV |
100 | #include <iconv.h> | 85 | #include <iconv.h> |